diff --git a/.github/workflows/bot-label-lgtm.yaml b/.github/workflows/bot-label-lgtm.yaml deleted file mode 100644 index 27f50375cdc..00000000000 --- a/.github/workflows/bot-label-lgtm.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# This workflow adds the community approval label ("lgtm") to pull requests. It -# does *not* indicate maintainer approval. This a way to visually highlight that -# someone in the world thinks the pull request is ready for further review. This -# event is triggered by a pull request approval, or simply a comment that -# contains the text "lgtm". -# Webhook events: Issue comments, Pull request reviews -name: Community approval -on: - repository_dispatch: - # From: issue_comment, pull_request_review - types: [created, edited, submitted] - -jobs: - lgtm-comment: - # Check the comment. contains() is case-insensitive. - if: >- - ${{ github.actor == 'tfdocsbot' && - contains(github.event.client_payload.comment.body, 'LGTM') }} - runs-on: ubuntu-latest - steps: - - name: Add label - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - ISSUE_URL: ${{ github.event.client_payload.comment.issue_url }} - run: | - curl -X POST \ - -H "Accept: application/vnd.github.v3+json" \ - -H "Authorization: token $GITHUB_TOKEN" \ - "${ISSUE_URL}/labels" \ - --data '{"labels":["lgtm"]}' - - review-approval: - # Check the pull request review. - if: >- - ${{ github.actor == 'tfdocsbot' && - contains(github.event.client_payload.review.state, 'approved') }} - runs-on: ubuntu-latest - steps: - - name: Add label - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - ISSUE_URL: ${{ github.event.client_payload.pull_request.issue_url }} - run: | - curl -X POST \ - -H "Accept: application/vnd.github.v3+json" \ - -H "Authorization: token $GITHUB_TOKEN" \ - "${ISSUE_URL}/labels" \ - --data '{"labels":["lgtm"]}' diff --git a/.github/workflows/bot-nightly.yaml b/.github/workflows/bot-nightly.yaml deleted file mode 100644 index a0595c74a0b..00000000000 --- a/.github/workflows/bot-nightly.yaml +++ /dev/null @@ -1,65 +0,0 @@ -# Nightly jobs run by a bot collaborator. -name: Nightly jobs -on: - repository_dispatch: - types: [nightly] - -jobs: - snapshot-source: - name: Update Keras guides - if : ${{ github.actor == 'tfdocsbot' }} - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - with: - repository: keras-team/keras-io - path: keras-io - - uses: actions/checkout@v2 - with: - # tensorflow/docs branch to save generated notebooks. - ref: snapshot-keras - path: docs - - name: Set up repo - run: | - # Set commit author. - git config --global user.name "$GITHUB_ACTOR" - git config --global user.email "$GITHUB_ACTOR@users.noreply.github.com" - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: '3.8' - - name: Install requirements - run: | - python3 -m pip install -U pip - python3 -m pip install -U -r keras-io/requirements.txt - python3 -m pip install -U git+https://github.com/tensorflow/docs - - name: Generate Keras notebooks - run: | - # Autogen requires formated code - echo "[${GITHUB_WORKFLOW}] Format Python files ..." - python -m black keras-io/guides/ - mkdir -p keras-io/tf # Make sure output dir exists. - cd keras-io/scripts/ - echo "[${GITHUB_WORKFLOW}] Generate Keras guides ..." - python3 autogen.py generate_tf_guides - echo "[${GITHUB_WORKFLOW}] Format notebooks ..." - python3 -m tensorflow_docs.tools.nbfmt ../tf/ - - name: Sync docs repo - env: - KERAS_GUIDES_DIR: site/en/guide/keras/ - run: | - rsync --archive --del --checksum ./keras-io/tf/ "./docs/${KERAS_GUIDES_DIR}" - cd docs - if [[ -z $(git status -s | grep "$KERAS_GUIDES_DIR") ]]; then - echo "[${GITHUB_WORKFLOW}] No Keras guides updated, exiting." - exit 0 - fi - # Match timestamp format to other snapshot messages. - fmt_rfc7231="%a, %d %b %Y %H:%M:%S %Z" - TIMESTAMP_STR=$(TZ=GMT date +"$fmt_rfc7231") - - git add "./${KERAS_GUIDES_DIR}" - git commit -m "Keras guides snapshot: ${TIMESTAMP_STR}" - # Push to current branch. - echo "[${GITHUB_WORKFLOW}] Push changes to repo ..." - git push origin diff --git a/.github/workflows/bot-pr-fix.yaml b/.github/workflows/bot-pr-fix.yaml deleted file mode 100644 index a8ead3aa7ae..00000000000 --- a/.github/workflows/bot-pr-fix.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Automatically add commits to fix pull requests. This workflow must initiate -# from an authenticated bot repo collaborator. Check for opt-out label. -# Webhook events: Pull requests -name: Auto-fix pull request -on: - repository_dispatch: - types: [opened, synchronize] - -jobs: - nbfmt: - # Check for opt-out label. - if: >- - ${{ github.actor == 'tfdocsbot' && - !contains(github.event.client_payload.pull_request.labels.*.name, 'nbfmt-disable') }} - runs-on: ubuntu-latest - steps: - - name: Set up Python - uses: actions/setup-python@v2 - - name: Install tensorflow-docs - run: python3 -m pip install -U git+https://github.com/tensorflow/docs - - name: Fetch pull request branch - uses: actions/checkout@v2 - with: - # Head repo is the user's fork. Ref is the branch name. - repository: ${{ github.event.client_payload.pull_request.head.repo.full_name }} - ref: ${{ github.event.client_payload.pull_request.head.ref }} - - name: Fetch base master branch - run: git fetch -u "$GITHUB_SERVER_URL/$GITHUB_REPOSITORY" master:master - - name: Format notebooks - run: | - # Only want notebooks modified in this pull request. - readarray -t changed_files < <(git diff --name-only master | grep '\.ipynb$' || true) - if [[ ${#changed_files[@]} == 0 ]]; then - echo "No notebooks modified in this pull request." - exit 0 - fi - python3 -m tensorflow_docs.tools.nbfmt "${changed_files[@]}" - - if [[ -z $(git ls-files --modified) ]]; then - echo "Notebooks already formatted." - exit 0 - fi - # Set author and commit. - git config --global user.name "$GITHUB_ACTOR" - git config --global user.email "$GITHUB_ACTOR@users.noreply.github.com" - git commit -am "nbfmt" - # Push to the pull request branch submitted by head. - git push diff --git a/.github/workflows/bot-pr-new.yaml b/.github/workflows/bot-pr-new.yaml index 7f2c6164832..13724cc14f0 100644 --- a/.github/workflows/bot-pr-new.yaml +++ b/.github/workflows/bot-pr-new.yaml @@ -6,8 +6,15 @@ on: repository_dispatch: types: [opened, reopened] +permissions: + contents: read # to fetch code (actions/checkout) + jobs: comment-welcome: + permissions: + contents: read # to fetch code (actions/checkout) + pull-requests: write # to comment on pull-request + if: ${{ github.actor == 'tfdocsbot' }} runs-on: ubuntu-latest steps: @@ -15,7 +22,7 @@ jobs: uses: actions/checkout@v2 with: repository: ${{ github.event.client_payload.pull_request.head.repo.full_name }} - ref: ${{ github.event.client_payload.pull_request.head.ref }} + ref: ${{ github.event.client_payload.pull_request.head.sha }} - name: Fetch base master branch run: git fetch -u "$GITHUB_SERVER_URL/$GITHUB_REPOSITORY" master:master - name: Create message diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml new file mode 100644 index 00000000000..0ca76b0677e --- /dev/null +++ b/.github/workflows/stale.yaml @@ -0,0 +1,49 @@ +# This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time. +# +# You can adjust the behavior by modifying this file. +# For more information, see: +# https://github.com/actions/stale +name: Mark stale issues and pull requests + +on: + schedule: + # Scheduled to run at 1.30 UTC everyday + - cron: '30 1 * * *' + workflow_dispatch: + +jobs: + stale: + + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + + steps: + - uses: actions/stale@v9 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + days-before-issue-stale: 14 + days-before-issue-close: 14 + stale-issue-label: "status:stale" + close-issue-reason: not_planned + any-of-labels: "awaiting-contributor-response,cla:no" + stale-issue-message: > + Marking this issue as stale since it has been open for 14 days with no activity. + This issue will be closed if no further activity occurs. + close-issue-message: > + This issue was closed because it has been inactive for 28 days. + Please post a new issue if you need further assistance. Thanks! + days-before-pr-stale: 14 + days-before-pr-close: 14 + stale-pr-label: "status:stale" + stale-pr-message: > + Marking this pull request as stale since it has been open for 14 days with no activity. + This PR will be closed if no further activity occurs. + close-pr-message: > + This pull request was closed because it has been inactive for 28 days. + Please open a new pull request if you need further assistance. Thanks! + # Label that can be assigned to issues to exclude them from being marked as stale + exempt-issue-labels: 'override-stale' + # Label that can be assigned to PRs to exclude them from being marked as stale + exempt-pr-labels: "override-stale" diff --git a/CODEOWNERS b/CODEOWNERS index 42fd773cab2..d4d2932d8bc 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,11 +1,14 @@ # https://help.github.com/articles/about-codeowners/ -# Last matching pattern takes preecedence. +# Last matching pattern takes precedence. # Default owners for everything in repo. -* @lamberta @MarkDaoust @8bitmp3 +* @tensorflow/docs-team -# Docs -/site/en/guide/keras/ @fchollet @lamberta @MarkDaoust @8bitmp3 +# Install +/site/en/install/ @haifeng-jin @MarkDaoust @8bitmp3 # Community -/site/en/community/ @ewilderj @lamberta @theadactyl @joanafilipa +/site/en/community/ @ewilderj @theadactyl @joanafilipa + +# Hub +/site/en/hub @gustheman \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1559b721f51..6f301eab782 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -6,9 +6,7 @@ This guide shows how to make contributions to [tensorflow.org](https://www.tenso See the [TensorFlow docs contributor guide](https://www.tensorflow.org/community/contribute/docs) -for guidance. For questions, the -[docs@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs) -mailing list is available. +for guidance. For questions, check out [TensorFlow Forum](https://discuss.tensorflow.org/). Questions about TensorFlow usage are better addressed on [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow) or the diff --git a/LICENSE b/LICENSE index 4862420c023..08026f1ac8e 100644 --- a/LICENSE +++ b/LICENSE @@ -201,3 +201,28 @@ Copyright 2018 The TensorFlow Authors. All rights reserved. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + + +--------------------------- + +Where indicated, some files are also distributed under the MIT License: + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in index fa4266dd2dc..c3f79ccabe2 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1,2 @@ -recursive-include tools/tensorflow_docs/api_generator/gen_java/ * \ No newline at end of file +global-include **/templates/* +global-include *.sh \ No newline at end of file diff --git a/OWNERS_METADATA b/OWNERS_METADATA deleted file mode 100644 index 1460eaaba9e..00000000000 --- a/OWNERS_METADATA +++ /dev/null @@ -1,20 +0,0 @@ -# Text proto format: devtools_piper.OwnersMetadataFile (go/owners_metadata) -# See go/silos-lite for details on how to use this file for read access control. -silo_acl { - # Acknowledgement that silo won't contain privacy sensitive data, like PII. - silo_will_contain_privacy_sensitive_data: false - - # This retains normal access for regular engineers and other accounts. - access_type: OPEN - - # This is equivalent to above. Currently, default_ganpati_group is a - # mandatory field in SiloAcl. We plan to make it optional in case - # access_type is OPEN. - default_ganpati_group: "piper-group-default-access" - - # Ganpati1 groups that are granted *read* access in addition to the default - # group above. - allowed_ganpati_groups: "restricted-dev-build" - allowed_ganpati_groups: "job-0000396906033-webdevelopmentdevelopermarketingwebprojects" - allowed_ganpati_groups: "job-0000551608589-smetechnicalwriterii" -} diff --git a/README.md b/README.md index 7b94ce5f90f..66b6d3fb065 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ To file a docs issue, use the issue tracker in the [tensorflow/tensorflow](https://github.com/tensorflow/tensorflow/issues/new?template=20-documentation-issue.md) repo. And join the TensorFlow documentation contributors on the -[docs@tensorflow.org mailing list](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs). +[TensorFlow Forum](https://discuss.tensorflow.org/). ## Community translations diff --git a/setup.py b/setup.py index 2077516bf4c..404479668b3 100644 --- a/setup.py +++ b/setup.py @@ -14,6 +14,7 @@ # ============================================================================== """tensorflow_docs is a package for generating python api-reference docs.""" +import datetime import subprocess import sys @@ -21,22 +22,35 @@ from setuptools import setup project_name = 'tensorflow-docs' -version = '0.0.0.dev0' + + +def get_version() -> str: + ts = int( + subprocess.check_output(['git', 'log', '-1', '--format=%ct', 'tools']) + .decode('utf-8') + .strip() + ) + dt = datetime.datetime.utcfromtimestamp(ts) + sec = 60 * 60 * dt.hour + 60 * dt.minute + dt.second + + # calver.org + return f'{dt.year}.{dt.month}.{dt.day}.{sec}' + + +version = get_version() DOCLINES = __doc__.split('\n') REQUIRED_PKGS = [ 'astor', 'absl-py', - 'protobuf>=3.14', + 'jinja2', + 'nbformat', + 'protobuf>=3.12', 'pyyaml', ] -# Dataclasses is in-built from py >=3.7. This version is a backport for py 3.6. -if (sys.version_info.major, sys.version_info.minor) == (3, 6): - REQUIRED_PKGS.append('dataclasses') - -VIS_REQURE = [ +VIS_REQUIRE = [ 'numpy', 'PILLOW', 'webp', @@ -45,6 +59,7 @@ # https://setuptools.readthedocs.io/en/latest/setuptools.html#new-and-changed-setup-keywords setup( name=project_name, + python_requires='>=3.9', version=version, description=DOCLINES[0], long_description='\n'.join(DOCLINES[2:]), @@ -57,7 +72,7 @@ package_dir={'': 'tools'}, scripts=[], install_requires=REQUIRED_PKGS, - extras_require={'vis': VIS_REQURE}, + extras_require={'vis': VIS_REQUIRE}, classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', @@ -65,7 +80,7 @@ 'Topic :: Scientific/Engineering :: Artificial Intelligence', ], keywords='tensorflow api reference', - # Include_package_data is required for setup.py to recognize the MAINFEST.in + # Include_package_data is required for setup.py to recognize the MANIFEST.in # https://python-packaging.readthedocs.io/en/latest/non-code-files.html include_package_data=True, ) diff --git a/site/en/README.md b/site/en/README.md new file mode 100644 index 00000000000..28dc0cce7d4 --- /dev/null +++ b/site/en/README.md @@ -0,0 +1,50 @@ +# TensorFlow docs + +These are the source files for the core TensorFlow +[guide](https://www.tensorflow.org/guide), +[tutorials](https://www.tensorflow.org/tutorials), and other technical docs. +Please read the +[contributor guide](https://www.tensorflow.org/community/contribute) +to submit patches to the TensorFlow documentation and code. + +## TensorFlow ecosystem projects + +In addition to the core TensorFlow docs, +[tensorflow.org](https://www.tensorflow.org) hosts documentation for many +[libraries and extensions](https://www.tensorflow.org/resources/libraries-extensions). +These docs are sourced from separate project repos and where pull requests can +be sent. The following is a list of TensorFlow documentation projects published +on the website and a link to their source files: + +tensorflow.org project | GitHub docs location +-----------------------|--------------------- +[/addons](https://www.tensorflow.org/addons) | https://github.com/tensorflow/addons/tree/master/docs +[/agents](https://www.tensorflow.org/agents) | https://github.com/tensorflow/agents/tree/master/docs +[/cloud](https://www.tensorflow.org/cloud) | https://github.com/tensorflow/cloud/tree/master/g3doc +[/datasets](https://www.tensorflow.org/datasets) | https://github.com/tensorflow/datasets/tree/master/docs +[/decision_forests](https://www.tensorflow.org/decision_forests) | https://github.com/tensorflow/decision-forests/tree/main/documentation +[/federated](https://www.tensorflow.org/federated) | https://github.com/tensorflow/federated/tree/main/docs +[/graphics](https://www.tensorflow.org/graphics) | https://github.com/tensorflow/graphics/tree/master/tensorflow_graphics/g3doc +[/hub](https://www.tensorflow.org/hub) | https://github.com/tensorflow/hub/tree/master/docs +[/io](https://www.tensorflow.org/io) | https://github.com/tensorflow/io/tree/master/docs/ +[/js](https://www.tensorflow.org/js) | https://github.com/tensorflow/tfjs-website/tree/master/docs +[/jvm](https://www.tensorflow.org/jvm) | https://github.com/tensorflow/java/tree/master/docs +[/lattice](https://www.tensorflow.org/lattice) | https://github.com/tensorflow/lattice/tree/master/docs +[/lite](https://www.tensorflow.org/lite) | https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/g3doc +[/mlir](https://www.tensorflow.org/mlir) | https://github.com/tensorflow/tensorflow/tree/master/tensorflow/compiler/mlir/g3doc +[/model_optimization](https://www.tensorflow.org/model_optimization) | https://github.com/tensorflow/model-optimization/tree/master/tensorflow_model_optimization/g3doc +[/neural_structured_learning](https://www.tensorflow.org/neural_structured_learning) | https://github.com/tensorflow/neural-structured-learning/tree/master/g3doc +[/probability](https://www.tensorflow.org/probability) | https://github.com/tensorflow/probability/tree/main/tensorflow_probability/g3doc +[/quantum](https://www.tensorflow.org/quantum) | https://github.com/tensorflow/quantum/tree/master/docs +[/ranking](https://www.tensorflow.org/ranking) | https://github.com/tensorflow/ranking/tree/master/docs +[/recommenders](https://www.tensorflow.org/recommenders) | https://github.com/tensorflow/recommenders/tree/main/docs +[/responsible_ai/fairness_indicators](https://www.tensorflow.org/responsible_ai/fairness_indicators/guide) | https://github.com/tensorflow/fairness-indicators/tree/master/g3doc +[/responsible_ai/model_card_toolkit](https://www.tensorflow.org/responsible_ai/model_card_toolkit/guide) | https://github.com/tensorflow/model-card-toolkit/tree/main/model_card_toolkit/documentation +[/responsible_ai/model_remediation](https://www.tensorflow.org/responsible_ai/model_remediation) | https://github.com/tensorflow/model-remediation/tree/master/docs +[/responsible_ai/privacy](https://www.tensorflow.org/responsible_ai/privacy/guide) | https://github.com/tensorflow/privacy/tree/master/g3doc +[/tensorboard](https://www.tensorflow.org/tensorboard) | https://github.com/tensorflow/tensorboard/tree/master/docs +[/guide/keras](https://www.tensorflow.org/guide/keras/) | https://github.com/keras-team/keras-io/tree/master/guides +[/text](https://www.tensorflow.org/text) | https://github.com/tensorflow/text/tree/master/docs +[/tfx](https://www.tensorflow.org/tfx) | https://github.com/tensorflow/tfx/tree/master/docs +[/tfx/guide/serving](https://www.tensorflow.org/tfx/guide/serving) | https://github.com/tensorflow/serving/tree/master/tensorflow_serving/g3doc +[/xla](https://www.tensorflow.org/xla) | https://github.com/tensorflow/tensorflow/tree/master/tensorflow/compiler/xla/g3doc diff --git a/site/en/about/_menu_toc.yaml b/site/en/about/_menu_toc.yaml index d5fe88d3745..8f34ca8089d 100644 --- a/site/en/about/_menu_toc.yaml +++ b/site/en/about/_menu_toc.yaml @@ -5,5 +5,3 @@ toc: path: /about/ - label: "Case studies" path: /about/case-studies/ - - label: "AI Service Partners" - path: /partners/ diff --git a/site/en/about/bib.md b/site/en/about/bib.md index a45eec3a40c..16da75adc3e 100644 --- a/site/en/about/bib.md +++ b/site/en/about/bib.md @@ -43,7 +43,7 @@ title={ {TensorFlow}: Large-Scale Machine Learning on Heterogeneous Systems}, url={https://www.tensorflow.org/}, note={Software available from tensorflow.org}, author={ - Mart\'{\i}n~Abadi and + Mart\'{i}n~Abadi and Ashish~Agarwal and Paul~Barham and Eugene~Brevdo and diff --git a/site/en/addons/README.md b/site/en/addons/README.md deleted file mode 100644 index 3f0ca09e73f..00000000000 --- a/site/en/addons/README.md +++ /dev/null @@ -1,5 +0,0 @@ -Welcome to the warp zone! - -# TensorFlow SIG Addons - -These docs are available here: https://github.com/tensorflow/addons/tree/master/docs diff --git a/site/en/agents/README.md b/site/en/agents/README.md deleted file mode 100644 index 468efd02dcd..00000000000 --- a/site/en/agents/README.md +++ /dev/null @@ -1,5 +0,0 @@ -Welcome to the warp zone! - -# TensorFlow Agents - -These docs are available here: https://github.com/tensorflow/agents/tree/master/docs diff --git a/site/en/community/_toc.yaml b/site/en/community/_toc.yaml index 074ecf72531..1a81d38cb01 100644 --- a/site/en/community/_toc.yaml +++ b/site/en/community/_toc.yaml @@ -34,5 +34,7 @@ toc: - heading: "Community" - title: "Contribute to the community" path: /community/contribute/community + - title: "Contribute to SIGs" + path: /community/contribute/sigs - title: "RFC process" path: /community/contribute/rfc_process diff --git a/site/en/community/contribute/code.md b/site/en/community/contribute/code.md index 35f2417046a..2f71f12d7fe 100644 --- a/site/en/community/contribute/code.md +++ b/site/en/community/contribute/code.md @@ -2,14 +2,24 @@ Whether you are adding a loss function, improving test coverage, or writing an RFC for a major design change, this portion of the contributor guide will help -you get started. Thank you for work and interest in improving TensorFlow. +you get started. Thank you for your work and interest in improving TensorFlow. ## Before you get started -Before you contribute source code to a TensorFlow project, please review the `CONTRIBUTING.md` file in the GitHub repo of the project. (For example, see the -[CONTRIBUTING.md file for the core TensorFlow repo](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md).) All code contributors are required to sign a [Contributor License Agreement](https://cla.developers.google.com/clas) (CLA). - -To avoid duplicating work, please review [current](https://github.com/tensorflow/community/tree/master/rfcs) or [proposed](https://github.com/tensorflow/community/labels/RFC%3A%20Proposed) RFCs and contact the developers on the TensorFlow forums ([developers@tensorflow.org](https://groups.google.com/u/1/a/tensorflow.org/g/developers)) before you start work on a non-trivial feature. We are somewhat selective when deciding to add new functionality, and the best way to contribute and help the project is to work on known issues. +Before you contribute source code to a TensorFlow project, please review the +`CONTRIBUTING.md` file in the GitHub repo of the project. For example, see the +[CONTRIBUTING.md](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md) +file in the core TensorFlow repo. All code contributors are required to sign a +[Contributor License Agreement](https://cla.developers.google.com/clas) (CLA). + +To avoid duplicating work, please review +[current](https://github.com/tensorflow/community/tree/master/rfcs) or +[proposed](https://github.com/tensorflow/community/labels/RFC%3A%20Proposed) +RFCs and contact the developers on the TensorFlow forums +([developers@tensorflow.org](https://groups.google.com/u/1/a/tensorflow.org/g/developers)) +before you start work on a non-trivial feature. We are somewhat selective when +deciding to add new functionality, and the best way to contribute and help the +project is to work on known issues. ## Issues for new contributors @@ -22,14 +32,20 @@ workflow, and for the core devs to become acquainted with the contributor. - [good first issue](https://github.com/tensorflow/tensorflow/labels/good%20first%20issue) - [contributions welcome](https://github.com/tensorflow/tensorflow/labels/stat%3Acontributions%20welcome) -If you are interested in recruiting a team to help tackle a large-scale problem or a new feature, please email the [developers@ group](https://groups.google.com/a/tensorflow.org/forum/#!forum/developers) and review our current list of RFCs. - +If you are interested in recruiting a team to help tackle a large-scale problem +or a new feature, please email the +[developers@ group](https://groups.google.com/a/tensorflow.org/g/developers) +and review our current list of RFCs. ## Code review -New features, bug fixes, and any other changes to the code base are subject to code review. +New features, bug fixes, and any other changes to the code base are subject to +code review. -Reviewing code contributed to the project as pull requests is a crucial component of TensorFlow development. We encourage anyone to start reviewing code submitted by other developers, especially if the feature is something that you are likely to use. +Reviewing code contributed to the project as pull requests is a crucial +component of TensorFlow development. We encourage anyone to start reviewing code +submitted by other developers, especially if the feature is something that you +are likely to use. Here are some questions to keep in mind during the code review process: @@ -44,36 +60,47 @@ Here are some questions to keep in mind during the code review process: ## Test and improve test coverage -High-quality unit testing is a corner-stone of the TensorFlow development process. For this purpose, we use Docker images. The test functions are appropriately named, and are responsible for checking the validity of algorithms as well as different options of the code. +High-quality unit testing is a corner-stone of the TensorFlow development +process. For this purpose, we use Docker images. The test functions are +appropriately named, and are responsible for checking the validity of algorithms +as well as different options of the code. -All new features and bug fixes *must* include adequate test coverage. We also welcome contributions of new test cases or improvements to existing tests. If you discover that our existing tests are not complete — even if that is not currently causing a bug — please file an issue and, if possible, a pull request. +All new features and bug fixes *must* include adequate test coverage. We also +welcome contributions of new test cases or improvements to existing tests. If +you discover that our existing tests are not complete — even if that is not +currently causing a bug — please file an issue and, if possible, a pull request. -For the specific details of testing procedures in each TensorFlow project, see the `README.md` and `CONTRIBUTING.md` files in the project repo on GitHub. +For the specific details of testing procedures in each TensorFlow project, see +the `README.md` and `CONTRIBUTING.md` files in the project repo on GitHub. Of particular concerns in *adequate testing*: -* Is *every public function and class* tested? -* Are a *reasonable set of parameters*, their values, value types, and combinations tested? -* Do the tests validate that the *code is correct*, and that it is *doing what the documentation says* the code is intended to do? +* Is *every public function and class* tested? +* Are a *reasonable set of parameters*, their values, value types, and + combinations tested? +* Do the tests validate that the *code is correct*, and that it is *doing what + the documentation says* the code is intended to do? * If the change is a bug fix, is a *non-regression test* included? * Do the tests *pass the continuous integration* build? -* Do the tests *cover every line of code?* If not, are the exceptions reasonable and explicit? - -If you find any problems, please consider helping the contributor understand those problems and resolve them. +* Do the tests *cover every line of code?* If not, are the exceptions + reasonable and explicit? +If you find any problems, please consider helping the contributor understand +those problems and resolve them. ## Improve error messages or logs -We welcome contributions that improve error messages and logging. - +We welcome contributions that improve error messages and logging. ## Contribution workflow -Code contributions—bug fixes, new development, test improvement—all follow a GitHub-centered workflow. To participate in TensorFlow development, set up a GitHub account. Then: +Code contributions—bug fixes, new development, test improvement—all follow a +GitHub-centered workflow. To participate in TensorFlow development, set up a +GitHub account. Then: -1. Fork the repo you plan to work on. - Go to the project repo page and use the *Fork* button. This will create a copy of the - repo, under your username. (For more details on how to fork a repository see +1. Fork the repo you plan to work on. Go to the project repo page and use the + *Fork* button. This will create a copy of the repo, under your username. + (For more details on how to fork a repository see [this guide](https://help.github.com/articles/fork-a-repo/).) 2. Clone down the repo to your local system. @@ -96,36 +123,46 @@ Code contributions—bug fixes, new development, test improvement—all follow a `$ git push origin branch-name` -7. Open a *Pull Request* (PR). Go to the original project repo on GitHub. There will be a message about your recently pushed branch, asking if you would like to open a pull request. Follow the prompts, *compare across repositories*, and submit the PR. This will send an email to the committers. You may want to consider sending an email to the mailing list for more visibility. (For more details, see the [GitHub guide on PRs](https://help.github.com/articles/creating-a-pull-request-from-a-fork). +7. Open a *Pull Request* (PR). Go to the original project repo on GitHub. There + will be a message about your recently pushed branch, asking if you would + like to open a pull request. Follow the prompts, *compare across + repositories*, and submit the PR. This will send an email to the committers. + You may want to consider sending an email to the mailing list for more + visibility. (For more details, see the + [GitHub guide on PRs](https://help.github.com/articles/creating-a-pull-request-from-a-fork). -8. Maintainers and other contributors will *review your PR*. Please participate in the conversation, and try to *make any requested changes*. Once the PR is approved, the code will be merged. +8. Maintainers and other contributors will *review your PR*. Please participate + in the conversation, and try to *make any requested changes*. Once the PR is + approved, the code will be merged. -*Before working on your next contribution*, make sure your local repository is up to date. +*Before working on your next contribution*, make sure your local repository is +up to date. -1. Set the upstream remote. (You only have to do this once per project, not every time.) +1. Set the upstream remote. (You only have to do this once per project, not + every time.) `$ git remote add upstream git@github.com:tensorflow/project-repo-name` -2. Switch to the local master branch. +2. Switch to the local master branch. `$ git checkout master` -3. Pull down the changes from upstream. +3. Pull down the changes from upstream. `$ git pull upstream master` -4. Push the changes to your GitHub account. (Optional, but a good practice.) +4. Push the changes to your GitHub account. (Optional, but a good practice.) `$ git push origin master` -5. Create a new branch if you are starting new work. +5. Create a new branch if you are starting new work. `$ git checkout -b branch-name` Additional `git` and GitHub resources: * [Git documentation](https://git-scm.com/documentation) -* [Git development workflow](https://docs.scipy.org/doc/numpy/dev/gitwash/development_workflow.html) +* [Git development workflow](https://docs.scipy.org/doc/numpy/dev/development_workflow.html) * [Resolving merge conflicts](https://help.github.com/articles/resolving-a-merge-conflict-using-the-command-line/). diff --git a/site/en/community/contribute/community.md b/site/en/community/contribute/community.md index 26cb66e6a40..bb2b2035d1b 100644 --- a/site/en/community/contribute/community.md +++ b/site/en/community/contribute/community.md @@ -2,14 +2,28 @@ An open source project isn't just about the code, it's also about the community of users, developers, writers, researchers, and other contributors. You can help grow and support this community. -Please read the TensorFlow [Code and Collaboration governance](https://github.com/tensorflow/community/blob/master/governance/code-and-collaboration.md) +Please read the TensorFlow [Code and Collaboration governance](https://github.com/tensorflow/community/blob/master/governance/code-and-collaboration.md). ## Community support -Many people [ask questions about TensorFlow on Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow). Answering those questions and pointing people to the relevant documentation is a great service to the community. +Many people [ask questions about TensorFlow on the TensorFlow Forum](https://discuss.tensorflow.org/). Answering those questions and pointing people to the relevant documentation is a great service to the community. Some users also ask support questions as GitHub issues. We try to discourage this, as GitHub issues are not the best place to ask for technical support. However, if you notice these issues, you are encouraged to answer them and point people to the relevant documentation. +### TensorFlow Forum + +The [TensorFlow Forum](https://discuss.tensorflow.org/) is a central platform for community discussion and support. It brings our community together to share ideas, best practices and use cases related to TensorFlow. We foster an open and welcoming environment according to the [TensorFlow Code of Conduct](https://discuss.tensorflow.org/faq). + +The TensorFlow Forum is organized by categories, subcategories and tags. We encourage you to create an account and follow categories and tags of interest. When you create a new post, select the most appropriate [category or subcategory](https://discuss.tensorflow.org/categories) and [tags](https://discuss.tensorflow.org/tags) to help other users find your topic. + +For more information on Discourse features, read the [Discourse New User Guide](https://meta.discourse.org/t/discourse-new-user-guide/96331). + +### Become a Forum expert + +Discourse uses [trust levels](https://blog.discourse.org/2018/06/understanding-discourse-trust-levels/) to reward increasing levels of participation in the forum. The Forum facilitates learning by doing, letting you to collect [badges](https://discuss.tensorflow.org/badges) that are displayed on your profile. This is a great way to be recognized for helping fellow community members. The more you invest in helping community members, the more badges and forum tools you will unlock. + +Certain groups, such as TensorFlow Team members and Machine Learning GDEs, display a special icon for easier identification. + ## Communication The TensorFlow community has a number of formal and informal ways of keeping in touch. @@ -18,95 +32,33 @@ The TensorFlow community has a number of formal and informal ways of keeping in The primary communication about work on TensorFlow happens in the [TensorFlow repositories on GitHub](https://github.com/tensorflow). This is the place to discuss bugs, new features, and in-progress work. - - ### Mailing lists -Mailing lists are reserved for announcements and contributor conversation. They are not intended to provide technical support. - -#### General TensorFlow lists +Most communication happens on the TensorFlow Forum. The following mailing lists are still used for announcements and contributor conversations. Note that they are not intended to provide technical support. * [announce@tensorflow.org](mailto:announce@tensorflow.org) — All major releases and important announcements are sent to this mailing group. We recommend that you join this list if you depend on TensorFlow in any way. -* [discuss@tensorflow.org](mailto:discuss@tensorflow.org) — General discussion about TensorFlow development and direction. * [developers@tensorflow.org](mailto:developers@tensorflow.org) — Discussion for developers who are contributing to TensorFlow. +For more information on project-specific communication, visit the [Contribute to SIGs](https://tensorflow.org/community/contribute/sigs) page. + +### Blog and social media + +The [TensorFlow Blog](http://blog.tensorflow.org/) is full of great content both from our team at Google and the broader community. We'd love to hear what you have to say, so if you would like to submit an article for review, please contact us at tensorflow-blog@google.com. Note that we receive many great submissions, and setting expectations, we can only publish a few. + +On [Twitter](https://twitter.com/tensorflow) we share the latest and greatest from our community, and our [YouTube channel](https://www.youtube.com/tensorflow) has free educational content to help you create, understand and deploy models for a variety of applications. + +## TensorFlow Community Spotlight + +The TensorFlow Community Spotlight Program provides an opportunity to showcase your passion projects using TensorFlow. [Submit your project](https://services.google.com/fb/forms/tensorflowprojectrecognitionform/) for a chance to be featured and recognized on TensorFlow’s Twitter account. + +Follow the [#TFCommunitySpotlight](https://twitter.com/hashtag/TFCommunitySpotlight?src=hashtag_click) hashtag and find out more about past winners [here](https://blog.tensorflow.org/2020/11/tensorflow-community-spotlight-program-update.html). + +## User groups + +[TensorFlow User Groups](https://www.tensorflow.org/community/groups) (or TFUGs, for short) are local communities of developers and researchers around the world. If you don’t have a TFUG in your country or city, we encourage you to start one by reaching out to [tfug-help@tensorflow.org](mailto:tfug-help@tensorflow.org). + +## Events + +The TensorFlow team hosts and supports events all around the world! If your TFUG is planning an upcoming event or meetup, please let our Community know by posting about it on the TensorFlow Forum under the [Events category](https://discuss.tensorflow.org/c/events/27). -#### Project-specific lists - -* [docs@tensorflow.org](mailto:docs@tensorflow.org) — If you are interested in contributing to the TensorFlow documentation, join this mailing list. -* [hub@tensorflow.org](mailto:hub@tensorflow.org) — Discussion and collaboration around TensorFlow Hub. -* [magenta-discuss@tensorflow.org](mailto:magenta-discuss@tensorflow.org) — General discussion about Magenta development and direction. -* [swift@tensorflow.org](mailto:swift@tensorflow.org) — Community and collaboration around Swift for TensorFlow. -* [tensor2tensor@tensorflow.org](mailto:tensor2tensor@tensorflow.org) — Discussion and peer support for Tensor2Tensor. -* [tfjs-announce@tensorflow.org](mailto:tfjs-announce@tensorflow.org) — Announcements of new TensorFlow.js releases. -* [tfjs@tensorflow.org](mailto:tfjs@tensorflow.org) — Discussion and peer support for TensorFlow.js. -* [tflite@tensorflow.org](mailto:tflite@tensorflow.org) — Discussion and peer support for TensorFlow Lite. -* [tfprobability@tensorflow.org](mailto:tfprobability@tensorflow.org) — Discussion and peer support for TensorFlow Probability. -* [tpu-users@tensorflow.org](mailto:tpu-users@tensorflow.org) — Community discussion and support for TPU users. - - -### Blog - -We post regularly to the [TensorFlow Blog](http://blog.tensorflow.org/), with content sourced from both TensorFlow developers and the broader community. If you would like to submit an article for review, please contact the TensorFlow Developer Relations team. - -### Social media - -For news and updates from around the universe of TensorFlow projects, follow [@tensorflow](https://twitter.com/tensorflow) on Twitter. To watch TensorFlow-related content, check out our [YouTube](http://youtube.com/tensorflow/) channel. - -### User groups - -TensorFlow has many communities all over the world! For a complete listing, please refer to the [Community](https://www.tensorflow.org/community/groups) section on the TensorFlow website. - -### Special Interest Groups (SIGs) - -To enable focused collaboration on particular areas of TensorFlow, we host Special Interest Groups (SIGs). SIGs do their work in public. If you want to join and contribute, review the work of the group, and get in touch with the relevant SIG leader. Membership policies vary on a per-SIG basis. - - -#### Current SIGs - -As of January 2019, the current TF-SIGs are: - - - - - - - - - - - - - - - - - - - - - - - - - - -
SIG Addons - Maintains a repository of contributions that conform to well-established API patterns, but implement new functionality not available in core TensorFlow. -
SIG Build - Focuses on issues surrounding building, packaging, and distribution of TensorFlow. -
SIG IO - Works on support for file systems and formats other than those in core TensorFlow (such as Apache Ignite FS, or Apache Hadoop SequenceFile), as subclasses of tf.data.Dataset and TensorFlow filesystems. -
SIG Networking - Maintains network fabrics and protocols not available in core TensorFlow. -
SIG TensorBoard - Furthers the development and direction of TensorBoard and its plug-ins. -
SIG Rust - Collaborates on the development of TensorFlow's Rust bindings. -
- -If you believe there is a strong need for a new SIG, -please read the [SIG playbook]() and get in touch with -the TensorFlow Developer Relations Team. +If you’ve already held your event, please share feedback with us [here](https://docs.google.com/forms/d/e/1FAIpQLSdvb8c2ZFXxS05aX6dpUVZlfYA0WsFFq-sUAzjiohVKAQ1RLw/viewform)! Feel free to share recaps and recordings on the Forum as well. diff --git a/site/en/community/contribute/docs.md b/site/en/community/contribute/docs.md index e055b3601b8..34b1619ca5d 100644 --- a/site/en/community/contribute/docs.md +++ b/site/en/community/contribute/docs.md @@ -24,12 +24,15 @@ To participate in the TensorFlow docs community: * Watch the [tensorflow/docs](https://github.com/tensorflow/docs) GitHub repository. -* Subscribe to [docs@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs). +* Follow the [docs](https://discuss.tensorflow.org/tag/docs) tag on the + [TensorFlow Forum](https://discuss.tensorflow.org/). + ## API reference -To update reference documentation, find the -[source file](https://www.tensorflow.org/code/tensorflow/python/) +For details, use the [TensorFlow API docs contributor guide](docs_ref.md). This +shows you how to find the +[source file](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/) and edit the symbol's docstring. Many API reference pages on tensorflow.org include a link to the source file @@ -38,9 +41,6 @@ where the symbol is defined. Docstrings support and can be (approximately) previewed using any Markdown previewer. -For reference documentation quality and how to get involved with doc sprints and -the community, see the -[TensorFlow 2 API Docs advice](https://docs.google.com/document/d/1e20k9CuaZ_-hp25-sSd8E8qldxKPKQR-SkwojYr_r-U/preview). ### Versions and branches @@ -53,9 +53,9 @@ main tensorflow/tensorflow repo. The reference documentation is generated from code comments and docstrings in the source code for -Python, -C++, and -Java. +Python, +C++, and +Java. Previous versions of the TensorFlow documentation are available as [rX.x branches](https://github.com/tensorflow/docs/branches) in the TensorFlow @@ -167,21 +167,21 @@ when you submit your pull request. Add a remote:
-git remote add upstream git@github.com:tensorflow/docs.git
+git remote add upstream git@github.com:tensorflow/docs.git
 
 # View remote repos
 git remote -v
 origin    git@github.com:username/docs.git (fetch)
 origin    git@github.com:username/docs.git (push)
-upstream  git@github.com:tensorflow/docs.git (fetch)
-upstream  git@github.com:tensorflow/docs.git (push)
+upstream  git@github.com:tensorflow/docs.git (fetch)
+upstream  git@github.com:tensorflow/docs.git (push)
 
To update:
 git checkout master
-git pull upstream master
+git pull upstream master
 
 git push  # Push changes to your GitHub account (defaults to origin)
 
diff --git a/site/en/community/contribute/docs_ref.md b/site/en/community/contribute/docs_ref.md index bf4bf8ef2f3..41fce4dde40 100644 --- a/site/en/community/contribute/docs_ref.md +++ b/site/en/community/contribute/docs_ref.md @@ -8,7 +8,7 @@ TensorFlow uses [DocTest](https://docs.python.org/3/library/doctest.html) to test code snippets in Python docstrings. The snippet must be executable Python code. To enable testing, prepend the line with `>>>` (three left-angle brackets). For example, here's a excerpt from the `tf.concat` function in the -[array_ops.py](https://www.tensorflow.org/code/tensorflow/python/ops/array_ops.py) +[array_ops.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/array_ops.py) source file: ``` @@ -45,6 +45,11 @@ def concat(values, axis, name="concat"): Note: TensorFlow DocTest uses TensorFlow 2 and Python 3. +To assess reference documentation quality, see the example section of the +[TensorFlow 2 API Docs advice](https://docs.google.com/document/d/1e20k9CuaZ_-hp25-sSd8E8qldxKPKQR-SkwojYr_r-U/preview). +(Be aware that the Task Tracker on this sheet is no longer in use.) + + ### Make the code testable with DocTest Currently, many docstrings use backticks (```) to identify code. To make the @@ -95,7 +100,7 @@ TensorFlow uses a few customizations to the builtin doctest logic: ``` def NewLayer(): - “””This layer does cool stuff. + """This layer does cool stuff. Example usage: @@ -103,7 +108,7 @@ TensorFlow uses a few customizations to the builtin doctest logic: >>> new_layer = NewLayer(x) >>> new_layer - “”” + """ ``` * *Floating point values*: The TensorFlow doctest extracts float values from @@ -173,7 +178,7 @@ There are two ways to test the code in the docstring locally: * If you are only changing the docstring of a class/function/method, then you can test it by passing that file's path to - [tf_doctest.py](https://www.tensorflow.org/code/tensorflow/tools/docs/tf_doctest.py). + [tf_doctest.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/docs/tf_doctest.py). For example:
diff --git a/site/en/community/contribute/docs_style.md b/site/en/community/contribute/docs_style.md
index 9c730c7f100..10f18e52699 100644
--- a/site/en/community/contribute/docs_style.md
+++ b/site/en/community/contribute/docs_style.md
@@ -48,50 +48,94 @@ language after the first backtick group, for example:
 ```
 
-### Links in Markdown +### Links in Markdown and notebooks -#### Links between files in this repository +#### Links between files in a repository -Use relative links between files in a repository. This works on -[tensorflow.org](https://www.tensorflow.org) and -[GitHub](https://github.com/tensorflow/docs/tree/master/site/en):
-\[Custom layers\]\(../tutorials/eager/custom_layers.ipynb\) produces -[Custom layers](https://www.tensorflow.org/tutorials/eager/custom_layers) on the -site. +Use relative links between files in a single GitHub repository. Include the file +extension. -#### Links to API documentation - -API links are converted when the site is published. To link to a symbol's API -reference page, enclose the full symbol path in backticks: +For example, **this file you're reading** is from the +[https://github.com/tensorflow/docs](https://github.com/tensorflow/docs) +repository. Therefore, it can use relative paths to link to other files in the same +repository like this: -* `tf.data.Dataset` produces - [`tf.data.Dataset`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) +* \[Basics\]\(../../guide/basics.ipynb\) produces +[Basics](../../guide/basics.ipynb). -For the C++ API, use the namespace path: +This is the preferred approach because this way the links on +[tensorflow.org](https://www.tensorflow.org), +[GitHub](https://github.com/tensorflow/docs) and +[Colab](https://github.com/tensorflow/docs/tree/master/site/en/guide/bazics.ipynb) +all work. Also, the reader stays in the same site when they click a link. -* `tensorflow::Tensor` produces - [tensorflow::Tensor](https://www.tensorflow.org/api_docs/cc/class/tensorflow/tensor) +Note: You should include the file extension—such as `.ipynb` or `.md`—for +relative links. It will rendered on `tensorflow.org` without an extension. #### External links -For external links, including files on https://www.tensorflow.org -that are not in the `tensorflow/docs` repository, use standard Markdown links -with the full URI. +For links to files that are not in the current repository, use standard Markdown +links with the full URI. Prefer to link to the +[tensorflow.org](https://www.tensorflow.org) URI if it's available. To link to source code, use a link starting with https://www.github.com/tensorflow/tensorflow/blob/master/, followed by the file name starting at the GitHub root. -This URI naming scheme ensures that https://www.tensorflow.org can -forward the link to the branch of the code corresponding to the version of the -documentation you're viewing. +When linking off of [tensorflow.org](https://www.tensorflow.org), include a +`` on the Markdown link so that the "external link" symbol is shown. + +* `[GitHub](https://github.com/tensorflow/docs)` produces + [GitHub](https://github.com/tensorflow/docs) + +Do not include URI query parameters in the link: + +* Use: `https://www.tensorflow.org/guide/data` +* Not: `https://www.tensorflow.org/guide/data?hl=en` + + +#### Images + +The advice in the previous section is for links to pages. Images are handled +differently. + +Generally, you should not check in images, and instead add the +[TensorFlow-Docs team](https://github.com/tensorflow/docs) to your PR, and ask +them to host the images on [tensorflow.org](https://www.tensorflow.org). +This helps keep the size of your repository down. + +If you do submit images to your repository, note that some systems do not handle +relative paths to images. Prefer to use a full URL pointing to the image's +eventual location on [tensorflow.org](https://www.tensorflow.org). + +#### Links to API documentation + +API links are converted when the site is published. To link to a symbol's API +reference page, enclose the symbol path in backticks: + +* `tf.data.Dataset` produces + [`tf.data.Dataset`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) + +Full paths are slightly preferred except for long paths. Paths +can be abbreviated by dropping the leading path components. Partial paths will +be converted to links if: + +* There is at least one `.` in the path, and +* The partial path is unique within the project. + +API paths are linked **for every project** with a Python API published on +[tensorflow.org](https://www.tensorflow.org). You can easily link to multiple +subprojects from a single file by wrapping the API names with backticks. +For example: -Do not include URI query parameters in the link. +* `tf.metrics`, `tf_agents.metrics`, + `text.metrics` produces: `tf.metrics`, + `tf_agents.metrics`, `text.metrics`. -File paths use underscores for spaces, for example, `custom_layers.ipynb`. +For symbols with multiple path aliases there is a slight preference for the +path that matches the API-page on [tensorflow.org](https://www.tensorflow.org). +All aliases will redirect to the correct page. -Include the file extension in links to use on the site *and* GitHub, for example,
-\[Custom layers\]\(../tutorials/eager/custom_layers.ipynb\). ### Math in Markdown diff --git a/site/en/community/contribute/sigs.md b/site/en/community/contribute/sigs.md new file mode 100644 index 00000000000..b736ec5919a --- /dev/null +++ b/site/en/community/contribute/sigs.md @@ -0,0 +1,97 @@ +# Contribute to TensorFlow Special Interest Groups (SIGs) + +The TensorFlow Special Interest Groups (TF SIGs) organize community contributions to key parts of the TensorFlow ecosystem. SIG leads and members work together to build and support important TensorFlow use cases. + +SIGs are led by members of the open source community, including industry collaborators and [Machine Learning Google Developer Experts](https://developers.google.com/community/experts) (ML GDEs). TensorFlow's success is due in large part to their hard work and contributions. + +We encourage you to join a SIG working on the area of TensorFlow's ecosystem you care most about. Not all SIGs will have the same level of energy, breadth of scope, or governance models — browse our [SIG charters](https://github.com/tensorflow/community/tree/master/sigs) to learn more. Stay connected with SIG leads and members on the [TensorFlow Forum](https://discuss.tensorflow.org/c/special-interest-groups/8), where you can subscribe to preferred [tags](https://discuss.tensorflow.org/tags) and learn more about the regular SIG meetings. + +## SIG Addons + +SIG Addons builds and maintains a repository of community contributions that conform to well-established API patterns, but implement new functionality not available in core TensorFlow. + +TensorFlow natively supports a large number of operators, layers, metrics, losses, optimizers, and more. However, in a fast-moving field like ML, there are many new developments that cannot be integrated into core TensorFlow (because their broad applicability is not yet clear, or it is mostly used by a smaller subset of the community). SIG Addons enables users to introduce new extensions to the TensorFlow ecosystem in a sustainable manner. + +SIG Addons on GitHub Contributing Discuss on the Forum + +## SIG Build + +SIG Build improves and extends the TensorFlow build process. SIG Build maintains a repository showcasing resources, guides, tools, and builds contributed by the community, for the community. + +SIG Build on GitHub Contributing Discuss on the Forum + +## SIG IO + +SIG IO maintains TensorFlow I/O, a collection of file systems and file formats that are not available in TensorFlow's built-in support. + +SIG IO on GitHub Contributing Discuss on the Forum + +## SIG JVM + +SIG JVM maintains the TF Java bindings to let users use JVM for building, training and running machine learning models. + +Java and other JVM languages, such as Scala or Kotlin, are frequently used in small-to-large enterprises all over the world, which makes TensorFlow a strategic choice for adopting machine learning at a large scale. + +SIG JVM on GitHub Contributing Discuss on the Forum + +## SIG Models + +SIG Models focuses on enabling contributions to the state-of-the-art model implementation in TensorFlow 2, and sharing best practices of using TensorFlow 2 for state-of-the-art research. Subgroups orient around different machine learning applications (Vision, NLP, etc.). + +SIG Models host discussions and collaborations around the [TensorFlow Model Garden](https://github.com/tensorflow/models) and [TensorFlow Hub](https://tfhub.dev). Learn how to contribute on GitHub below, or discuss [Research & Models](https://discuss.tensorflow.org/c/research-models/26) on the Forum. + +TensorFlow Model Garden on GitHub Contributing + +TensorFlow Hub on GitHub Contributing + +## SIG Micro + +SIG Micro discusses and shares updates on [TensorFlow Lite for Microcontrollers](https://www.tensorflow.org/lite/microcontrollers), a port of TensorFlow Lite designed to run machine learning models on DSPs, microcontrollers and other devices with limited memory. + +TensorFlow Lite Micro on GitHub Contributing Discuss on the Forum + +## SIG MLIR + +SIG MLIR maintains [MLIR](https://mlir.llvm.org/) dialects and utilities for TensorFlow, XLA and TF Lite, providing high performance compilers and optimization techniques that can be applied to TensorFlow graphs and code generation. Their overarching goal is to create common intermediate representation (IR) that reduces the cost to bring up new hardware, and improve usability for existing TensorFlow users. + +SIG MLIR on GitHub Contributing Discuss on the Forum + +## SIG Networking + +SIG Networking maintains the TensorFlow Networking repository for platform-specific networking extensions to core TensorFlow and related utilities. + +SIG Networking on GitHub Discuss on the Forum + +## SIG Recommenders + +SIG Recommenders maintains a collection of projects related to large-scale recommendation systems built upon TensorFlow contributed and maintained by the community. Those contributions are complementary to [TensorFlow Core](https://www.tensorflow.org/overview) and [TensorFlow Recommenders](https://www.tensorflow.org/recommenders). + +SIG Recommenders on GitHub Contributing Discuss on the Forum + +## SIG Rust + +SIG Rust maintains idiomatic Rust language bindings for TensorFlow. + +SIG Rust on GitHub Contributing Discuss on the Forum + +## SIG TensorBoard + +SIG TensorBoard facilitates discussion around [TensorBoard](https://www.tensorflow.org/tensorboard)—a suite of tools for inspecting, debugging and optimizing TensorFlow programs. + +TensorBoard on GitHub Contributing Discuss on the Forum + +## SIG TF.js + +SIG TF.js facilitates community-contributed components to [TensorFlow.js](https://www.tensorflow.org/js) and offers project support through the SIG. + +TensorFlow.js on GitHub Contributing Discuss on the Forum + +## SIG TFX-Addons + +SIG TFX-Addons accelerates the sharing of customizations and additions to meet the needs of production ML, expand the vision, and help drive new directions for [TensorFlow Extended (TFX)](https://www.tensorflow.org/tfx) and the ML community. + +SIG TFX-Addons on GitHub Contributing Discuss on the Forum + +## New SIGs + +Didn't find what you were looking for? If you believe there is a strong need for a new TensorFlow SIG, please read the [SIG playbook](https://www.tensorflow.org/community/sig_playbook) and follow instructions on how to propose it to our contributor community. diff --git a/site/en/community/mailing-lists.md b/site/en/community/mailing-lists.md index a33f758cdbf..35bfb218ba1 100644 --- a/site/en/community/mailing-lists.md +++ b/site/en/community/mailing-lists.md @@ -2,11 +2,12 @@ As a community, we do much of our collaboration on public mailing lists. Please note that if you're looking for help using TensorFlow, -[Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow) and +[TensorFlow Forum](https://discuss.tensorflow.org/), +[Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow), and [GitHub issues](https://github.com/tensorflow/tensorflow/issues) are the best -initial places to look. +initial places to look. To receive a roundup of updates from the TensorFlow team each quarter, subscribe to the [TensorFlow newsletter](https://services.google.com/fb/forms/tensorflow/). -## General TensorFlow lists +## General TensorFlow lists and forums * [announce](https://groups.google.com/a/tensorflow.org/d/forum/announce) - Low-volume announcements of new releases. @@ -14,7 +15,7 @@ initial places to look. General community discussion around TensorFlow. * [developers](https://groups.google.com/a/tensorflow.org/d/forum/developers) - Discussion for developers contributing to TensorFlow. -* [documentation](https://groups.google.com/a/tensorflow.org/d/forum/docs) - +* [documentation](https://discuss.tensorflow.org/tag/docs) - Discussion for contributing to TensorFlow documentation. See [community translations](https://www.tensorflow.org/community/contribute/docs#community_translations) for language-specific docs lists. diff --git a/site/en/community/sig_playbook.md b/site/en/community/sig_playbook.md index 75e277d3d96..6ec7a554a5b 100644 --- a/site/en/community/sig_playbook.md +++ b/site/en/community/sig_playbook.md @@ -55,7 +55,7 @@ must demonstrate: application area) * Two or more contributors willing to act as group leads, existence of other contributors, and evidence of demand for the group -* Resources it will initially require (usually, mailing list and regular VC +* Resources it will initially require (usually, mailing list and regular video conference call.) Approval for the group will be given by a decision of the TF Community Team, diff --git a/site/en/datasets/README.md b/site/en/datasets/README.md deleted file mode 100644 index 422d0b5c9a3..00000000000 --- a/site/en/datasets/README.md +++ /dev/null @@ -1,5 +0,0 @@ -Welcome to the warp zone! - -# TensorFlow Datasets - -These docs are available here: https://github.com/tensorflow/datasets/tree/master/docs diff --git a/site/en/federated/README.md b/site/en/federated/README.md deleted file mode 100644 index 518fde837cb..00000000000 --- a/site/en/federated/README.md +++ /dev/null @@ -1,6 +0,0 @@ -Welcome to the warp zone! - -# TensorFlow Federated - -These docs are available here: -https://github.com/tensorflow/federated/tree/main/docs diff --git a/site/en/graphics/README.md b/site/en/graphics/README.md deleted file mode 100644 index 9182fae0c12..00000000000 --- a/site/en/graphics/README.md +++ /dev/null @@ -1,5 +0,0 @@ -Welcome to the warp zone! - -# TensorFlow Graphics - -These docs are available here: https://github.com/tensorflow/graphics/tree/master/tensorflow_graphics/g3doc diff --git a/site/en/guide/_index.yaml b/site/en/guide/_index.yaml index f9d873dbf2a..e39dd37ead5 100644 --- a/site/en/guide/_index.yaml +++ b/site/en/guide/_index.yaml @@ -95,10 +95,11 @@ landing_page: items: - list: - description: > - - A suite of visualization tools to understand, debug, and optimize - TensorFlow programs. - path: /tensorboard + + A library to train, run and interpret decision forest models (e.g., Random Forests, + Gradient Boosted Trees) in TensorFlow. + path: /decision_forests icon: icon_name: chevron_right foreground: theme @@ -113,10 +114,10 @@ landing_page: foreground: theme background: grey - description: > - - The TensorFlow Model Optimization Toolkit is a suite of tools for - optimizing ML models for deployment and execution. - path: /model_optimization + + A TFX serving system for ML models, designed for high-performance in + production environments. + path: /tfx/guide/serving icon: icon_name: chevron_right foreground: theme @@ -147,7 +148,24 @@ landing_page: icon_name: chevron_right foreground: theme background: grey + - description: > + + Extra functionality for TensorFlow, maintained by SIG Addons. + path: https://github.com/tensorflow/addons + icon: + icon_name: chevron_right + foreground: theme + background: grey - list: + - description: > + + A suite of visualization tools to understand, debug, and optimize + TensorFlow programs. + path: /tensorboard + icon: + icon_name: chevron_right + foreground: theme + background: grey - description: > A collection of datasets ready to use with TensorFlow. @@ -157,10 +175,10 @@ landing_page: foreground: theme background: grey - description: > - - A TFX serving system for ML models, designed for high-performance in - production environments. - path: /tfx/guide/serving + + The TensorFlow Model Optimization Toolkit is a suite of tools for + optimizing ML models for deployment and execution. + path: /model_optimization icon: icon_name: chevron_right foreground: theme @@ -192,14 +210,6 @@ landing_page: icon_name: chevron_right foreground: theme background: grey - - description: > - - Extra functionality for TensorFlow, maintained by SIG Addons. - path: https://github.com/tensorflow/addons - icon: - icon_name: chevron_right - foreground: theme - background: grey - description: > Dataset, streaming, and file system extensions, maintained by SIG IO. diff --git a/site/en/guide/_toc.yaml b/site/en/guide/_toc.yaml index 62410bfcde6..92e5d6a80c3 100644 --- a/site/en/guide/_toc.yaml +++ b/site/en/guide/_toc.yaml @@ -3,41 +3,73 @@ toc: path: /guide/ - heading: "TensorFlow basics" -- title: "Eager execution" - path: /guide/eager -- title: "Tensor" +- title: "Overview" + path: /guide/basics +- title: "Tensors" path: /guide/tensor -- title: "Variable" +- title: "Variables" path: /guide/variable - title: "Automatic differentiation" path: /guide/autodiff -- title: "Intro to graphs and functions" +- title: "Graphs and functions" path: /guide/intro_to_graphs -- title: "Intro to modules, layers, and models" +- title: "Modules, layers, and models" path: /guide/intro_to_modules - title: "Training loops" path: /guide/basic_training_loops + +- heading: "Keras" +- title: "Overview" + path: /guide/keras +- include: /guide/keras/_toc.yaml + +- heading: "Build with Core" + status: new +- title: "Overview" + path: /guide/core/index +- title: "Quickstart for Core" + path: /guide/core/quickstart_core +- title: "Logistic regression" + path: /guide/core/logistic_regression_core +- title: "Multilayer perceptrons" + path: /guide/core/mlp_core +- title: "Matrix approximation" + path: /guide/core/matrix_core +- title: "Custom optimizers" + path: /guide/core/optimizers_core +- title: "DTensor with Core APIs" + path: /guide/core/distribution + status: experimental + +- heading: "TensorFlow in depth" +- title: "Tensor slicing" + path: /guide/tensor_slicing - title: "Advanced autodiff" path: /guide/advanced_autodiff - title: "Ragged tensor" path: /guide/ragged_tensor - title: "Sparse tensor" path: /guide/sparse_tensor +- title: "Random number generation" + path: /guide/random_numbers - title: "NumPy API" + status: experimental path: /guide/tf_numpy -- title: "Tensor slicing" - path: /guide/tensor_slicing +- title: "NumPy API Type Promotion" + status: nightly + path: /guide/tf_numpy_type_promotion +- title: "DTensor concepts" + path: /guide/dtensor_overview + status: experimental - title: "Thinking in TensorFlow 2" path: /guide/effective_tf2 -- heading: "Keras" -- include: /guide/keras/_toc.yaml - - heading: "Customization" - title: "Create an op" path: /guide/create_op -- title: "Random number generation" - path: /guide/random_numbers +- title: "Extension types" + path: /guide/extension_type + status: experimental - heading: "Data input pipelines" - title: "tf.data" @@ -47,11 +79,14 @@ toc: - title: "Analyze pipeline performance" path: /guide/data_performance_analysis -- heading: "Save a model" +- heading: "Import and export" - title: "Checkpoint" path: /guide/checkpoint - title: "SavedModel" path: /guide/saved_model +- title: "Import a JAX model using JAX2TF" + status: new + path: /guide/jax2tf - heading: "Accelerators" - title: "Distributed training" @@ -73,7 +108,19 @@ toc: - title: "Mixed precision" path: /guide/mixed_precision +- heading: "Model Garden" + status: new +- title: "Overview" + path: /tfmodels +- title: "Training with Orbit" + path: /tfmodels/orbit +- title: "TFModels - NLP" + path: /tfmodels/nlp + status: external +- include: /tfmodels/vision/_toc.yaml + - heading: "Estimators" + status: deprecated - title: "Estimator overview" path: /guide/estimator diff --git a/site/en/guide/advanced_autodiff.ipynb b/site/en/guide/advanced_autodiff.ipynb index 7da53d8ee48..e04b9db4d77 100644 --- a/site/en/guide/advanced_autodiff.ipynb +++ b/site/en/guide/advanced_autodiff.ipynb @@ -1101,7 +1101,7 @@ "id": "M_x7ih5sarvG" }, "source": [ - "In this case, `batch_jacobian` still runs and returns _something_ with the expected shape, but it's contents have an unclear meaning:" + "In this case, `batch_jacobian` still runs and returns _something_ with the expected shape, but its contents have an unclear meaning:" ] }, { diff --git a/site/en/guide/autodiff.ipynb b/site/en/guide/autodiff.ipynb index c9c57cd4e69..237a224569b 100644 --- a/site/en/guide/autodiff.ipynb +++ b/site/en/guide/autodiff.ipynb @@ -75,7 +75,7 @@ "[backpropagation](https://en.wikipedia.org/wiki/Backpropagation) for training\n", "neural networks.\n", "\n", - "In this guide, you will explore ways to compute gradients with TensorFlow, especially in [eager execution](eager.ipynb)." + "In this guide, you will explore ways to compute gradients with TensorFlow, especially in eager execution." ] }, { @@ -746,9 +746,9 @@ "id": "egypBxISAHhx" }, "source": [ - "## Getting a gradient of `None`\n", + "## Cases where `gradient` returns `None`\n", "\n", - "When a target is not connected to a source you will get a gradient of `None`.\n" + "When a target is not connected to a source, `gradient` will return `None`.\n" ] }, { diff --git a/site/en/guide/basic_training_loops.ipynb b/site/en/guide/basic_training_loops.ipynb index f2c2a61afa6..a1558b1903e 100644 --- a/site/en/guide/basic_training_loops.ipynb +++ b/site/en/guide/basic_training_loops.ipynb @@ -70,7 +70,7 @@ "source": [ "In the previous guides, you have learned about [tensors](./tensor.ipynb), [variables](./variable.ipynb), [gradient tape](autodiff.ipynb), and [modules](./intro_to_modules.ipynb). In this guide, you will fit these all together to train models.\n", "\n", - "TensorFlow also includes the [tf.Keras API](keras/overview.ipynb), a high-level neural network API that provides useful abstractions to reduce boilerplate. However, in this guide, you will use basic classes." + "TensorFlow also includes the [tf.Keras API](https://www.tensorflow.org/guide/keras/overview), a high-level neural network API that provides useful abstractions to reduce boilerplate. However, in this guide, you will use basic classes." ] }, { @@ -90,7 +90,11 @@ }, "outputs": [], "source": [ - "import tensorflow as tf" + "import tensorflow as tf\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "colors = plt.rcParams['axes.prop_cycle'].by_key()['color']" ] }, { @@ -142,16 +146,20 @@ "TRUE_W = 3.0\n", "TRUE_B = 2.0\n", "\n", - "NUM_EXAMPLES = 1000\n", + "NUM_EXAMPLES = 201\n", "\n", "# A vector of random x values\n", - "x = tf.random.normal(shape=[NUM_EXAMPLES])\n", + "x = tf.linspace(-2,2, NUM_EXAMPLES)\n", + "x = tf.cast(x, tf.float32)\n", + "\n", + "def f(x):\n", + " return x * TRUE_W + TRUE_B\n", "\n", "# Generate some noise\n", "noise = tf.random.normal(shape=[NUM_EXAMPLES])\n", "\n", "# Calculate y\n", - "y = x * TRUE_W + TRUE_B + noise" + "y = f(x) + noise" ] }, { @@ -163,9 +171,7 @@ "outputs": [], "source": [ "# Plot all the data\n", - "import matplotlib.pyplot as plt\n", - "\n", - "plt.scatter(x, y, c=\"b\")\n", + "plt.plot(x, y, '.')\n", "plt.show()" ] }, @@ -227,7 +233,7 @@ "id": "rdpN_3ssG9D5" }, "source": [ - "The initial variables are set here in a fixed way, but Keras comes with any of a number of [initalizers](https://www.tensorflow.org/api_docs/python/tf/keras/initializers) you could use, with or without the rest of Keras." + "The initial variables are set here in a fixed way, but Keras comes with any of a number of [initializers](https://www.tensorflow.org/api_docs/python/tf/keras/initializers) you could use, with or without the rest of Keras." ] }, { @@ -271,8 +277,10 @@ }, "outputs": [], "source": [ - "plt.scatter(x, y, c=\"b\")\n", - "plt.scatter(x, model(x), c=\"r\")\n", + "plt.plot(x, y, '.', label=\"Data\")\n", + "plt.plot(x, f(x), label=\"Ground truth\")\n", + "plt.plot(x, model(x), label=\"Predictions\")\n", + "plt.legend()\n", "plt.show()\n", "\n", "print(\"Current loss: %1.6f\" % loss(y, model(x)).numpy())" @@ -341,10 +349,15 @@ "model = MyModel()\n", "\n", "# Collect the history of W-values and b-values to plot later\n", - "Ws, bs = [], []\n", + "weights = []\n", + "biases = []\n", "epochs = range(10)\n", "\n", "# Define a training loop\n", + "def report(model, loss):\n", + " return f\"W = {model.w.numpy():1.2f}, b = {model.b.numpy():1.2f}, loss={loss:2.5f}\"\n", + "\n", + "\n", "def training_loop(model, x, y):\n", "\n", " for epoch in epochs:\n", @@ -352,12 +365,21 @@ " train(model, x, y, learning_rate=0.1)\n", "\n", " # Track this before I update\n", - " Ws.append(model.w.numpy())\n", - " bs.append(model.b.numpy())\n", + " weights.append(model.w.numpy())\n", + " biases.append(model.b.numpy())\n", " current_loss = loss(y, model(x))\n", "\n", - " print(\"Epoch %2d: W=%1.2f b=%1.2f, loss=%2.5f\" %\n", - " (epoch, Ws[-1], bs[-1], current_loss))\n" + " print(f\"Epoch {epoch:2d}:\")\n", + " print(\" \", report(model, current_loss))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8dKKLU4KkQEq" + }, + "source": [ + "Do the training" ] }, { @@ -368,21 +390,50 @@ }, "outputs": [], "source": [ - "print(\"Starting: W=%1.2f b=%1.2f, loss=%2.5f\" %\n", - " (model.w, model.b, loss(y, model(x))))\n", + "current_loss = loss(y, model(x))\n", "\n", - "# Do the training\n", - "training_loop(model, x, y)\n", + "print(f\"Starting:\")\n", + "print(\" \", report(model, current_loss))\n", "\n", - "# Plot it\n", - "plt.plot(epochs, Ws, \"r\",\n", - " epochs, bs, \"b\")\n", + "training_loop(model, x, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JPJgimg8kSA4" + }, + "source": [ + "Plot the evolution of the weights over time:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ND1fQw8sbTNr" + }, + "outputs": [], + "source": [ + "plt.plot(epochs, weights, label='Weights', color=colors[0])\n", + "plt.plot(epochs, [TRUE_W] * len(epochs), '--',\n", + " label = \"True weight\", color=colors[0])\n", "\n", - "plt.plot([TRUE_W] * len(epochs), \"r--\",\n", - " [TRUE_B] * len(epochs), \"b--\")\n", + "plt.plot(epochs, biases, label='bias', color=colors[1])\n", + "plt.plot(epochs, [TRUE_B] * len(epochs), \"--\",\n", + " label=\"True bias\", color=colors[1])\n", "\n", - "plt.legend([\"W\", \"b\", \"True W\", \"True b\"])\n", - "plt.show()\n" + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zhlwj1ojkcUP" + }, + "source": [ + "Visualize how the trained model performs" ] }, { @@ -393,9 +444,10 @@ }, "outputs": [], "source": [ - "# Visualize how the trained model performs\n", - "plt.scatter(x, y, c=\"b\")\n", - "plt.scatter(x, model(x), c=\"r\")\n", + "plt.plot(x, y, '.', label=\"Data\")\n", + "plt.plot(x, f(x), label=\"Ground truth\")\n", + "plt.plot(x, model(x), label=\"Predictions\")\n", + "plt.legend()\n", "plt.show()\n", "\n", "print(\"Current loss: %1.6f\" % loss(model(x), y).numpy())" @@ -523,7 +575,7 @@ "\n", "This is, however, an extremely simple problem. For a more practical introduction, see [Custom training walkthrough](../tutorials/customization/custom_training_walkthrough.ipynb).\n", "\n", - "For more on using built-in Keras training loops, see [this guide](keras/train_and_evaluate.ipynb). For more on training loops and Keras, see [this guide](keras/writing_a_training_loop_from_scratch.ipynb). For writing custom distributed training loops, see [this guide](distributed_training.ipynb#using_tfdistributestrategy_with_basic_training_loops_loops)." + "For more on using built-in Keras training loops, see [this guide](https://www.tensorflow.org/guide/keras/train_and_evaluate). For more on training loops and Keras, see [this guide](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch). For writing custom distributed training loops, see [this guide](distributed_training.ipynb#using_tfdistributestrategy_with_basic_training_loops_loops)." ] } ], @@ -531,8 +583,7 @@ "colab": { "collapsed_sections": [ "5rmpybwysXGV", - "iKD__8kFCKNt", - "vPnIVuaSJwWz" + "iKD__8kFCKNt" ], "name": "basic_training_loops.ipynb", "toc_visible": true diff --git a/site/en/guide/basics.ipynb b/site/en/guide/basics.ipynb new file mode 100644 index 00000000000..5457f162a0e --- /dev/null +++ b/site/en/guide/basics.ipynb @@ -0,0 +1,968 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "5rmpybwysXGV" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "m8y3rGtQsYP2" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hrXv0rU9sIma" + }, + "source": [ + "# TensorFlow basics" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7S0BwJ_8sLu7" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iJyZUDbzBTIG" + }, + "source": [ + "This guide provides a quick overview of _TensorFlow basics_. Each section of this doc is an overview of a larger topic—you can find links to full guides at the end of each section.\n", + "\n", + "TensorFlow is an end-to-end platform for machine learning. It supports the following:\n", + "\n", + "* Multidimensional-array based numeric computation (similar to NumPy.)\n", + "* GPU and distributed processing\n", + "* Automatic differentiation\n", + "* Model construction, training, and export\n", + "* And more" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gvLegMMvBZYg" + }, + "source": [ + "## Tensors\n", + "\n", + "TensorFlow operates on multidimensional arrays or _tensors_ represented as `tf.Tensor` objects. Here is a two-dimensional tensor:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6ZqX5RnbBS1f" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "\n", + "x = tf.constant([[1., 2., 3.],\n", + " [4., 5., 6.]])\n", + "\n", + "print(x)\n", + "print(x.shape)\n", + "print(x.dtype)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "k-AOMqevQGN4" + }, + "source": [ + "The most important attributes of a `tf.Tensor` are its `shape` and `dtype`:\n", + "\n", + "* `Tensor.shape`: tells you the size of the tensor along each of its axes.\n", + "* `Tensor.dtype`: tells you the type of all the elements in the tensor." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bUkKeNWZCIJO" + }, + "source": [ + "TensorFlow implements standard mathematical operations on tensors, as well as many operations specialized for machine learning.\n", + "\n", + "For example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BM7xXNDsBfN5" + }, + "outputs": [], + "source": [ + "x + x" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZLGqscTxB61v" + }, + "outputs": [], + "source": [ + "5 * x" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2ImJHd8VfnWq" + }, + "outputs": [], + "source": [ + "x @ tf.transpose(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "U9JZD6TYCZWu" + }, + "outputs": [], + "source": [ + "tf.concat([x, x, x], axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "seGBLeD9P_PI" + }, + "outputs": [], + "source": [ + "tf.nn.softmax(x, axis=-1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YZNZRv1ECjf8" + }, + "outputs": [], + "source": [ + "tf.reduce_sum(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TNHnIjOVLJfA" + }, + "source": [ + "Note: Typically, anywhere a TensorFlow function expects a `Tensor` as input, the function will also accept anything that can be converted to a `Tensor` using `tf.convert_to_tensor`. See below for an example." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "i_XKgjDsL4GE" + }, + "outputs": [], + "source": [ + "tf.convert_to_tensor([1,2,3])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wTBt-JUqLJDJ" + }, + "outputs": [], + "source": [ + "tf.reduce_sum([1,2,3])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8-mi5031DVxz" + }, + "source": [ + "Running large calculations on CPU can be slow. When properly configured, TensorFlow can use accelerator hardware like GPUs to execute operations very quickly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "m97Gv5H6Dz0G" + }, + "outputs": [], + "source": [ + "if tf.config.list_physical_devices('GPU'):\n", + " print(\"TensorFlow **IS** using the GPU\")\n", + "else:\n", + " print(\"TensorFlow **IS NOT** using the GPU\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ln2FkLOqMX92" + }, + "source": [ + "Refer to the [Tensor guide](tensor.ipynb) for details." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oVbomvMyEIVF" + }, + "source": [ + "## Variables\n", + "\n", + "Normal `tf.Tensor` objects are immutable. To store model weights (or other mutable state) in TensorFlow use a `tf.Variable`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SO8_bP4UEzxS" + }, + "outputs": [], + "source": [ + "var = tf.Variable([0.0, 0.0, 0.0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aDLYFvu5FAFa" + }, + "outputs": [], + "source": [ + "var.assign([1, 2, 3])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9EpiOmxXFDSS" + }, + "outputs": [], + "source": [ + "var.assign_add([1, 1, 1])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tlvTpi1CMedC" + }, + "source": [ + "Refer to the [Variables guide](variable.ipynb) for details." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rG1Dhv2QFkV3" + }, + "source": [ + "## Automatic differentiation\n", + "\n", + "_Gradient descent_ and related algorithms are a cornerstone of modern machine learning.\n", + "\n", + "To enable this, TensorFlow implements automatic differentiation (autodiff), which uses calculus to compute gradients. Typically you'll use this to calculate the gradient of a model's _error_ or _loss_ with respect to its weights." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cYKOi-z4GY9Y" + }, + "outputs": [], + "source": [ + "x = tf.Variable(1.0)\n", + "\n", + "def f(x):\n", + " y = x**2 + 2*x - 5\n", + " return y" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IQz99cxMGoF_" + }, + "outputs": [], + "source": [ + "f(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ozLLop0cHeYl" + }, + "source": [ + "At `x = 1.0`, `y = f(x) = (1**2 + 2*1 - 5) = -2`.\n", + "\n", + "The derivative of `y` is `y' = f'(x) = (2*x + 2) = 4`. TensorFlow can calculate this automatically:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "N02NfWpHGvw8" + }, + "outputs": [], + "source": [ + "with tf.GradientTape() as tape:\n", + " y = f(x)\n", + "\n", + "g_x = tape.gradient(y, x) # g(x) = dy/dx\n", + "\n", + "g_x" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "s-DVYJfcIRPd" + }, + "source": [ + "This simplified example only takes the derivative with respect to a single scalar (`x`), but TensorFlow can compute the gradient with respect to any number of non-scalar tensors simultaneously." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ECK3I9bUMk_r" + }, + "source": [ + "Refer to the [Autodiff guide](autodiff.ipynb) for details." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VglUM4M3KhNz" + }, + "source": [ + "## Graphs and tf.function\n", + "\n", + "While you can use TensorFlow interactively like any Python library, TensorFlow also provides tools for:\n", + "\n", + "* **Performance optimization**: to speed up training and inference.\n", + "* **Export**: so you can save your model when it's done training.\n", + "\n", + "These require that you use `tf.function` to separate your pure-TensorFlow code from Python." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VitACyZWKJD_" + }, + "outputs": [], + "source": [ + "@tf.function\n", + "def my_func(x):\n", + " print('Tracing.\\n')\n", + " return tf.reduce_sum(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fBYDh-huNUBZ" + }, + "source": [ + "The first time you run the `tf.function`, although it executes in Python, it captures a complete, optimized graph representing the TensorFlow computations done within the function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vkOFSEkoM1bd" + }, + "outputs": [], + "source": [ + "x = tf.constant([1, 2, 3])\n", + "my_func(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a3aWzt-rNsBa" + }, + "source": [ + "On subsequent calls TensorFlow only executes the optimized graph, skipping any non-TensorFlow steps. Below, note that `my_func` doesn't print _tracing_ since `print` is a Python function, not a TensorFlow function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "23dMHWwwNIoa" + }, + "outputs": [], + "source": [ + "x = tf.constant([10, 9, 8])\n", + "my_func(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nSeTti6zki0n" + }, + "source": [ + "A graph may not be reusable for inputs with a different _signature_ (`shape` and `dtype`), so a new graph is generated instead:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OWffqyhqlVPf" + }, + "outputs": [], + "source": [ + "x = tf.constant([10.0, 9.1, 8.2], dtype=tf.float32)\n", + "my_func(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UWknAA_zNTOa" + }, + "source": [ + "These captured graphs provide two benefits:\n", + "\n", + "* In many cases they provide a significant speedup in execution (though not this trivial example).\n", + "* You can export these graphs, using `tf.saved_model`, to run on other systems like a [server](https://www.tensorflow.org/tfx/serving/docker) or a [mobile device](https://www.tensorflow.org/lite/guide), no Python installation required." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hLUJ6f2eMsA8" + }, + "source": [ + "Refer to [Intro to graphs](intro_to_graphs.ipynb) for more details." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "t_36xPDPPBqp" + }, + "source": [ + "## Modules, layers, and models" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oDaT7kCpUgnJ" + }, + "source": [ + "`tf.Module` is a class for managing your `tf.Variable` objects, and the `tf.function` objects that operate on them. The `tf.Module` class is necessary to support two significant features:\n", + "\n", + "1. You can save and restore the values of your variables using `tf.train.Checkpoint`. This is useful during training as it is quick to save and restore a model's state.\n", + "2. You can import and export the `tf.Variable` values _and_ the `tf.function` graphs using `tf.saved_model`. This allows you to run your model independently of the Python program that created it.\n", + "\n", + "Here is a complete example exporting a simple `tf.Module` object:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1MqEcZOqPBDV" + }, + "outputs": [], + "source": [ + "class MyModule(tf.Module):\n", + " def __init__(self, value):\n", + " self.weight = tf.Variable(value)\n", + "\n", + " @tf.function\n", + " def multiply(self, x):\n", + " return x * self.weight" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "la2G82HfVfU0" + }, + "outputs": [], + "source": [ + "mod = MyModule(3)\n", + "mod.multiply(tf.constant([1, 2, 3]))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GaSJX7zQXCm4" + }, + "source": [ + "Save the `Module`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1MlfbEMjVzG4" + }, + "outputs": [], + "source": [ + "save_path = './saved'\n", + "tf.saved_model.save(mod, save_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LgfoftD4XGJW" + }, + "source": [ + "The resulting SavedModel is independent of the code that created it. You can load a SavedModel from Python, other language bindings, or [TensorFlow Serving](https://www.tensorflow.org/tfx/serving/docker). You can also convert it to run with [TensorFlow Lite](https://www.tensorflow.org/lite/guide) or [TensorFlow JS](https://www.tensorflow.org/js/guide)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pWuLOIKBWZYG" + }, + "outputs": [], + "source": [ + "reloaded = tf.saved_model.load(save_path)\n", + "reloaded.multiply(tf.constant([1, 2, 3]))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nxU6P1RGwHyC" + }, + "source": [ + "The `tf.keras.layers.Layer` and `tf.keras.Model` classes build on `tf.Module` providing additional functionality and convenience methods for building, training, and saving models. Some of these are demonstrated in the next section." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tQzt3yaWMzLf" + }, + "source": [ + "Refer to [Intro to modules](intro_to_modules.ipynb) for details." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rk1IEG5aav7X" + }, + "source": [ + "## Training loops\n", + "\n", + "Now put this all together to build a basic model and train it from scratch.\n", + "\n", + "First, create some example data. This generates a cloud of points that loosely follows a quadratic curve:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VcuFr7KPRPzn" + }, + "outputs": [], + "source": [ + "import matplotlib\n", + "from matplotlib import pyplot as plt\n", + "\n", + "matplotlib.rcParams['figure.figsize'] = [9, 6]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sXN9E_xf-GiP" + }, + "outputs": [], + "source": [ + "x = tf.linspace(-2, 2, 201)\n", + "x = tf.cast(x, tf.float32)\n", + "\n", + "def f(x):\n", + " y = x**2 + 2*x - 5\n", + " return y\n", + "\n", + "y = f(x) + tf.random.normal(shape=[201])\n", + "\n", + "plt.plot(x.numpy(), y.numpy(), '.', label='Data')\n", + "plt.plot(x, f(x), label='Ground truth')\n", + "plt.legend();" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "De5LldboSWcW" + }, + "source": [ + "Create a quadratic model with randomly initialized weights and a bias:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Pypd0GB4SRhf" + }, + "outputs": [], + "source": [ + "class Model(tf.Module):\n", + "\n", + " def __init__(self):\n", + " # Randomly generate weight and bias terms\n", + " rand_init = tf.random.uniform(shape=[3], minval=0., maxval=5., seed=22)\n", + " # Initialize model parameters\n", + " self.w_q = tf.Variable(rand_init[0])\n", + " self.w_l = tf.Variable(rand_init[1])\n", + " self.b = tf.Variable(rand_init[2])\n", + " \n", + " @tf.function\n", + " def __call__(self, x):\n", + " # Quadratic Model : quadratic_weight * x^2 + linear_weight * x + bias\n", + " return self.w_q * (x**2) + self.w_l * x + self.b" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "36o7VjaesScg" + }, + "source": [ + "First, observe your model's performance before training:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GkwToC5BWV1c" + }, + "outputs": [], + "source": [ + "quad_model = Model()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ReWhH40wTY5F" + }, + "outputs": [], + "source": [ + "def plot_preds(x, y, f, model, title):\n", + " plt.figure()\n", + " plt.plot(x, y, '.', label='Data')\n", + " plt.plot(x, f(x), label='Ground truth')\n", + " plt.plot(x, model(x), label='Predictions')\n", + " plt.title(title)\n", + " plt.legend()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Y0JtXQat-nlk" + }, + "outputs": [], + "source": [ + "plot_preds(x, y, f, quad_model, 'Before training')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hLzwD0-ascGf" + }, + "source": [ + "Now, define a loss for your model:\n", + "\n", + "Given that this model is intended to predict continuous values, the mean squared error (MSE) is a good choice for the loss function. Given a vector of predictions, $\\hat{y}$, and a vector of true targets, $y$, the MSE is defined as the mean of the squared differences between the predicted values and the ground truth.\n", + "\n", + "$MSE = \\frac{1}{m}\\sum_{i=1}^{m}(\\hat{y}_i -y_i)^2$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eCtJ1uuCseZd" + }, + "outputs": [], + "source": [ + "def mse_loss(y_pred, y):\n", + " return tf.reduce_mean(tf.square(y_pred - y))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7EWyDu3zot2w" + }, + "source": [ + "Write a basic training loop for the model. The loop will make use of the MSE loss function and its gradients with respect to the input in order to iteratively update the model's parameters. Using mini-batches for training provides both memory efficiency and faster convergence. The `tf.data.Dataset` API has useful functions for batching and shuffling." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8kX_-zily2Ia" + }, + "outputs": [], + "source": [ + "batch_size = 32\n", + "dataset = tf.data.Dataset.from_tensor_slices((x, y))\n", + "dataset = dataset.shuffle(buffer_size=x.shape[0]).batch(batch_size)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nOaES5gyTDtG" + }, + "outputs": [], + "source": [ + "# Set training parameters\n", + "epochs = 100\n", + "learning_rate = 0.01\n", + "losses = []\n", + "\n", + "# Format training loop\n", + "for epoch in range(epochs):\n", + " for x_batch, y_batch in dataset:\n", + " with tf.GradientTape() as tape:\n", + " batch_loss = mse_loss(quad_model(x_batch), y_batch)\n", + " # Update parameters with respect to the gradient calculations\n", + " grads = tape.gradient(batch_loss, quad_model.variables)\n", + " for g,v in zip(grads, quad_model.variables):\n", + " v.assign_sub(learning_rate*g)\n", + " # Keep track of model loss per epoch\n", + " loss = mse_loss(quad_model(x), y)\n", + " losses.append(loss)\n", + " if epoch % 10 == 0:\n", + " print(f'Mean squared error for step {epoch}: {loss.numpy():0.3f}')\n", + "\n", + "# Plot model results\n", + "print(\"\\n\")\n", + "plt.plot(range(epochs), losses)\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Mean Squared Error (MSE)\")\n", + "plt.title('MSE loss vs training iterations');" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dW5B2TTRsvxE" + }, + "source": [ + "Now, observe your model's performance after training:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Qcvzyg3eYLh8" + }, + "outputs": [], + "source": [ + "plot_preds(x, y, f, quad_model, 'After training')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hbtmFJIXb6qm" + }, + "source": [ + "That's working, but remember that implementations of common training utilities are available in the `tf.keras` module. So, consider using those before writing your own. To start with, the `Model.compile` and `Model.fit` methods implement a training loop for you:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cjx23MiztFmT" + }, + "source": [ + "Begin by creating a Sequential Model in Keras using `tf.keras.Sequential`. One of the simplest Keras layers is the dense layer, which can be instantiated with `tf.keras.layers.Dense`. The dense layer is able to learn multidimensional linear relationships of the form $\\mathrm{Y} = \\mathrm{W}\\mathrm{X} + \\vec{b}$. In order to learn a nonlinear equation of the form, $w_1x^2 + w_2x + b$, the dense layer's input should be a data matrix with $x^2$ and $x$ as features. The lambda layer, `tf.keras.layers.Lambda`, can be used to perform this stacking transformation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5rt8HP2TZhEM" + }, + "outputs": [], + "source": [ + "new_model = tf.keras.Sequential([\n", + " tf.keras.layers.Lambda(lambda x: tf.stack([x, x**2], axis=1)),\n", + " tf.keras.layers.Dense(units=1, kernel_initializer=tf.random.normal)])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "73kCo1BtP3rQ" + }, + "outputs": [], + "source": [ + "new_model.compile(\n", + " loss=tf.keras.losses.MSE,\n", + " optimizer=tf.keras.optimizers.SGD(learning_rate=0.01))\n", + "\n", + "history = new_model.fit(x, y,\n", + " epochs=100,\n", + " batch_size=32,\n", + " verbose=0)\n", + "\n", + "new_model.save('./my_new_model.keras')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u3q5d1SzvzTq" + }, + "source": [ + "Observe your Keras model's performance after training:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Mo7zRV7XZjv7" + }, + "outputs": [], + "source": [ + "plt.plot(history.history['loss'])\n", + "plt.xlabel('Epoch')\n", + "plt.ylim([0, max(plt.ylim())])\n", + "plt.ylabel('Loss [Mean Squared Error]')\n", + "plt.title('Keras training progress');" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bB44a9YsvnfK" + }, + "outputs": [], + "source": [ + "plot_preds(x, y, f, new_model, 'After Training: Keras')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ng-BY_eGS0bn" + }, + "source": [ + "Refer to [Basic training loops](basic_training_loops.ipynb) and the [Keras guide](https://www.tensorflow.org/guide/keras) for more details." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "basics.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/checkpoint.ipynb b/site/en/guide/checkpoint.ipynb index 59fae1257af..fb3b45437f7 100644 --- a/site/en/guide/checkpoint.ipynb +++ b/site/en/guide/checkpoint.ipynb @@ -139,7 +139,7 @@ "## Saving from `tf.keras` training APIs\n", "\n", "See the [`tf.keras` guide on saving and\n", - "restoring](./keras/overview.ipynb#save_and_restore).\n", + "restoring](https://www.tensorflow.org/guide/keras/save_and_serialize).\n", "\n", "`tf.keras.Model.save_weights` saves a TensorFlow checkpoint. " ] @@ -423,7 +423,7 @@ "\n", "The optimizer is in red, regular variables are in blue, and the optimizer slot variables are in orange. The other nodes—for example, representing the `tf.train.Checkpoint`—are in black.\n", "\n", - "Slot variables are part of the optimizer's state, but are created for a specific variable. For example the `'m'` edges above correspond to momentum, which the Adam optimizer tracks for each variable. Slot variables are only saved in a checkpoint if the variable and the optimizer would both be saved, thus the dashed edges." + "Slot variables are part of the optimizer's state, but are created for a specific variable. For example, the `'m'` edges above correspond to momentum, which the Adam optimizer tracks for each variable. Slot variables are only saved in a checkpoint if the variable and the optimizer would both be saved, thus the dashed edges." ] }, { @@ -491,11 +491,11 @@ "id": "KCcmJ-2j9RUP" }, "source": [ - "### Delayed restorations\n", + "### Deferred restorations\n", "\n", - "`Layer` objects in TensorFlow may delay the creation of variables to their first call, when input shapes are available. For example the shape of a `Dense` layer's kernel depends on both the layer's input and output shapes, and so the output shape required as a constructor argument is not enough information to create the variable on its own. Since calling a `Layer` also reads the variable's value, a restore must happen between the variable's creation and its first use.\n", + "`Layer` objects in TensorFlow may defer the creation of variables to their first call, when input shapes are available. For example, the shape of a `Dense` layer's kernel depends on both the layer's input and output shapes, and so the output shape required as a constructor argument is not enough information to create the variable on its own. Since calling a `Layer` also reads the variable's value, a restore must happen between the variable's creation and its first use.\n", "\n", - "To support this idiom, `tf.train.Checkpoint` queues restores which don't yet have a matching variable." + "To support this idiom, `tf.train.Checkpoint` defers restores which don't yet have a matching variable." ] }, { @@ -506,10 +506,10 @@ }, "outputs": [], "source": [ - "delayed_restore = tf.Variable(tf.zeros([1, 5]))\n", - "print(delayed_restore.numpy()) # Not restored; still zeros\n", - "fake_layer.kernel = delayed_restore\n", - "print(delayed_restore.numpy()) # Restored" + "deferred_restore = tf.Variable(tf.zeros([1, 5]))\n", + "print(deferred_restore.numpy()) # Not restored; still zeros\n", + "fake_layer.kernel = deferred_restore\n", + "print(deferred_restore.numpy()) # Restored" ] }, { @@ -589,7 +589,9 @@ "id": "5fxk_BnZ4W1b" }, "source": [ - "### List and dictionary tracking\n", + "### Object tracking\n", + "\n", + "Checkpoints save and restore the values of `tf.Variable` objects by \"tracking\" any variable or trackable object set in one of its attributes. When executing a save, variables are gathered recursively from all of the reachable tracked objects.\n", "\n", "As with direct attribute assignments like `self.l1 = tf.keras.layers.Dense(5)`, assigning lists and dictionaries to attributes will track their contents." ] @@ -647,7 +649,22 @@ "id": "OxCIf2J6JyQ8" }, "source": [ - "The same tracking is automatically applied to subclasses of `tf.keras.Model`, and may be used for example to track lists of layers." + "Trackable objects include `tf.train.Checkpoint`, `tf.Module` and its subclasses (e.g. `keras.layers.Layer` and `keras.Model`), and recognized Python containers:\n", + "\n", + " * `dict` (and `collections.OrderedDict`)\n", + " * `list`\n", + " * `tuple` (and `collections.namedtuple`, `typing.NamedTuple`)\n", + "\n", + "Other container types are **not supported**, including:\n", + "\n", + " * `collections.defaultdict`\n", + " * `set`\n", + "\n", + "All other Python objects are **ignored**, including:\n", + "\n", + " * `int`\n", + " * `string`\n", + " * `float`\n" ] }, { diff --git a/site/en/guide/core/distribution.ipynb b/site/en/guide/core/distribution.ipynb new file mode 100644 index 00000000000..c7f13b2f4db --- /dev/null +++ b/site/en/guide/core/distribution.ipynb @@ -0,0 +1,700 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "FhGuhbZ6M5tl" + }, + "source": [ + "##### Copyright 2022 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "AwOEIRJC6Une" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EIdT9iu_Z4Rb" + }, + "source": [ + "# Distributed training with Core APIs and DTensor" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bBIlTPscrIT9" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SjAxxRpBzVYg" + }, + "source": [ + "## Introduction\n", + "\n", + "This notebook uses the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core) and [DTensor](https://www.tensorflow.org/guide/dtensor_overview) to demonstrate a data parallel distributed training example. Visit the [Core APIs overview](https://www.tensorflow.org/guide/core) to learn more about TensorFlow Core and its intended use cases. Refer to the [DTensor Overview](https://www.tensorflow.org/guide/dtensor_overview) guide and [Distributed Training with DTensors](https://www.tensorflow.org/tutorials/distribute/dtensor_ml_tutorial) tutorial to learn more about DTensor.\n", + "\n", + "This example uses the same model and optimizer shown in the [multilayer perceptrons](https://www.tensorflow.org/guide/core/mlp_core) tutorial. See this tutorial first to get comfortable with writing an end-to-end machine learning workflow with the Core APIs.\n", + "\n", + "Note: DTensor is still an experimental TensorFlow API which means that its features are available for testing, and it is intended for use in test environments only." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d_OFkG0dyWCp" + }, + "source": [ + "## Overview of data parallel training with DTensor\n", + "\n", + "Before building an MLP that supports distribution, take a moment to explore the fundamentals of DTensor for data parallel training.\n", + "\n", + "DTensor allows you to run distributed training across devices to improve efficiency, reliability and scalability. DTensor distributes the program and tensors according to the sharding directives through a procedure called Single program, multiple data (SPMD) expansion. A variable of a `DTensor` aware layer is created as `dtensor.DVariable`, and the constructors of `DTensor` aware layer objects take additional `Layout` inputs in addition to the usual layer parameters.\n", + "\n", + "The main ideas for data parallel training are as follows:\n", + " - Model variables are replicated on N devices each.\n", + " - A global batch is split into N per-replica batches.\n", + " - Each per-replica batch is trained on the replica device.\n", + " - The gradient is reduced before weight up data is collectively performed on all replicas.\n", + " - Data parallel training provides nearly linear speed with respect to the number of devices" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nchsZfwEVtVs" + }, + "source": [ + "## Setup\n", + "\n", + "DTensor is part of TensorFlow 2.9.0 release." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "latuqlI_Yvoo" + }, + "outputs": [], + "source": [ + "#!pip install --quiet --upgrade --pre tensorflow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1rRo8oNqZ-Rj" + }, + "outputs": [], + "source": [ + "import matplotlib\n", + "from matplotlib import pyplot as plt\n", + "# Preset Matplotlib figure sizes.\n", + "matplotlib.rcParams['figure.figsize'] = [9, 6]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9xQKvCJ85kCQ" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_datasets as tfds\n", + "from tensorflow.experimental import dtensor\n", + "print(tf.__version__)\n", + "# Set random seed for reproducible results \n", + "tf.random.set_seed(22)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vDH9-sy4sfPf" + }, + "source": [ + "Configure 8 virtual CPUs for this experiment. DTensor can also be used with GPU or TPU devices. Given that this notebook uses virtual devices, the speedup gained from distributed training is not noticeable. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "H2iM-6J4s2D6" + }, + "outputs": [], + "source": [ + "def configure_virtual_cpus(ncpu):\n", + " phy_devices = tf.config.list_physical_devices('CPU')\n", + " tf.config.set_logical_device_configuration(phy_devices[0], [\n", + " tf.config.LogicalDeviceConfiguration(),\n", + " ] * ncpu)\n", + "\n", + "configure_virtual_cpus(8)\n", + "\n", + "DEVICES = [f'CPU:{i}' for i in range(8)]\n", + "devices = tf.config.list_logical_devices('CPU')\n", + "device_names = [d.name for d in devices]\n", + "device_names" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F_72b0LCNbjx" + }, + "source": [ + "## The MNIST Dataset\n", + "\n", + "The dataset is available from [TensorFlow Datasets](https://www.tensorflow.org/datasets/catalog/mnist). Split the data into training and testing sets. Only use 5000 examples for training and testing to save time." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8h4fV_JCfPIX" + }, + "outputs": [], + "source": [ + "train_data, test_data = tfds.load(\"mnist\", split=['train[:5000]', 'test[:5000]'], batch_size=128, as_supervised=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "twkJ35YB6tSi" + }, + "source": [ + "### Preprocessing the data\n", + "\n", + "Preprocess the data by reshaping it to be 2-dimensional and by rescaling it to fit into the unit interval, [0,1]." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6Cmjhg0xCqbz" + }, + "outputs": [], + "source": [ + "def preprocess(x, y):\n", + " # Reshaping the data\n", + " x = tf.reshape(x, shape=[-1, 784])\n", + " # Rescaling the data\n", + " x = x/255\n", + " return x, y\n", + "\n", + "train_data, test_data = train_data.map(preprocess), test_data.map(preprocess)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6o3CrycBXA2s" + }, + "source": [ + "## Build the MLP \n", + "\n", + "Build an MLP model with DTensor aware layers." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OHW6Yvg2yS6H" + }, + "source": [ + "### The dense layer\n", + "\n", + "Start by creating a dense layer module that supports DTensor. The `dtensor.call_with_layout` function can be used to call a function that takes in a DTensor input and produces a DTensor output. This is useful for initializing a DTensor variable, `dtensor.DVariable`, with a TensorFlow supported function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IM0yJos25FG5" + }, + "outputs": [], + "source": [ + "class DenseLayer(tf.Module):\n", + "\n", + " def __init__(self, in_dim, out_dim, weight_layout, activation=tf.identity):\n", + " super().__init__()\n", + " # Initialize dimensions and the activation function\n", + " self.in_dim, self.out_dim = in_dim, out_dim\n", + " self.activation = activation\n", + "\n", + " # Initialize the DTensor weights using the Xavier scheme\n", + " uniform_initializer = tf.function(tf.random.stateless_uniform)\n", + " xavier_lim = tf.sqrt(6.)/tf.sqrt(tf.cast(self.in_dim + self.out_dim, tf.float32))\n", + " self.w = dtensor.DVariable(\n", + " dtensor.call_with_layout(\n", + " uniform_initializer, weight_layout,\n", + " shape=(self.in_dim, self.out_dim), seed=(22, 23),\n", + " minval=-xavier_lim, maxval=xavier_lim))\n", + " \n", + " # Initialize the bias with the zeros\n", + " bias_layout = weight_layout.delete([0])\n", + " self.b = dtensor.DVariable(\n", + " dtensor.call_with_layout(tf.zeros, bias_layout, shape=[out_dim]))\n", + "\n", + " def __call__(self, x):\n", + " # Compute the forward pass\n", + " z = tf.add(tf.matmul(x, self.w), self.b)\n", + " return self.activation(z)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "X-7MzpjgyHg6" + }, + "source": [ + "### The MLP sequential model\n", + "\n", + "Now create an MLP module that executes the dense layers sequentially." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6XisRWiCyHAb" + }, + "outputs": [], + "source": [ + "class MLP(tf.Module):\n", + "\n", + " def __init__(self, layers):\n", + " self.layers = layers\n", + " \n", + " def __call__(self, x, preds=False): \n", + " # Execute the model's layers sequentially\n", + " for layer in self.layers:\n", + " x = layer(x)\n", + " return x" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r5HZJ0kv-V3v" + }, + "source": [ + "Performing \"data-parallel\" training with DTensor is equivalent to `tf.distribute.MirroredStrategy`. To do this each device will run the same model on a shard of the data batch. So you'll need the following:\n", + "\n", + "* A `dtensor.Mesh` with a single `\"batch\"` dimension\n", + "* A `dtensor.Layout` for all the weights that replicates them across the mesh (using `dtensor.UNSHARDED` for each axis)\n", + "* A `dtensor.Layout` for the data that splits the batch dimension across the mesh\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Create a DTensor mesh that consists of a single batch dimension, where each device becomes a replica that receives a shard from the global batch. Use this mesh to instantiate an MLP mode with the following architecture:\n", + "\n", + "Forward Pass: ReLU(784 x 700) x ReLU(700 x 500) x Softmax(500 x 10)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VmlACuki3oPi" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"batch\", 8)], devices=DEVICES)\n", + "weight_layout = dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh)\n", + "\n", + "input_size = 784\n", + "hidden_layer_1_size = 700\n", + "hidden_layer_2_size = 500\n", + "hidden_layer_2_size = 10\n", + "\n", + "mlp_model = MLP([\n", + " DenseLayer(in_dim=input_size, out_dim=hidden_layer_1_size, \n", + " weight_layout=weight_layout,\n", + " activation=tf.nn.relu),\n", + " DenseLayer(in_dim=hidden_layer_1_size , out_dim=hidden_layer_2_size,\n", + " weight_layout=weight_layout,\n", + " activation=tf.nn.relu),\n", + " DenseLayer(in_dim=hidden_layer_2_size, out_dim=hidden_layer_2_size, \n", + " weight_layout=weight_layout)])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tyBATDoRmDkg" + }, + "source": [ + "### Training metrics\n", + "\n", + "Use the cross-entropy loss function and accuracy metric for training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rskOYA7FVCwg" + }, + "outputs": [], + "source": [ + "def cross_entropy_loss(y_pred, y):\n", + " # Compute cross entropy loss with a sparse operation\n", + " sparse_ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=y_pred)\n", + " return tf.reduce_mean(sparse_ce)\n", + "\n", + "def accuracy(y_pred, y):\n", + " # Compute accuracy after extracting class predictions\n", + " class_preds = tf.argmax(y_pred, axis=1)\n", + " is_equal = tf.equal(y, class_preds)\n", + " return tf.reduce_mean(tf.cast(is_equal, tf.float32))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JSiNRhTOnKZr" + }, + "source": [ + "### Optimizer\n", + "\n", + "Using an optimizer can result in significantly faster convergence compared to standard gradient descent. The Adam optimizer is implemented below and has been configured to be compatible with DTensor. In order to use Keras optimizers with DTensor, refer to the experimental`tf.keras.dtensor.experimental.optimizers` module." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-9kIAI_lfXDS" + }, + "outputs": [], + "source": [ + "class Adam(tf.Module):\n", + "\n", + " def __init__(self, model_vars, learning_rate=1e-3, beta_1=0.9, beta_2=0.999, ep=1e-7):\n", + " # Initialize optimizer parameters and variable slots\n", + " self.model_vars = model_vars\n", + " self.beta_1 = beta_1\n", + " self.beta_2 = beta_2\n", + " self.learning_rate = learning_rate\n", + " self.ep = ep\n", + " self.t = 1.\n", + " self.v_dvar, self.s_dvar = [], []\n", + " # Initialize optimizer variable slots\n", + " for var in model_vars:\n", + " v = dtensor.DVariable(dtensor.call_with_layout(tf.zeros, var.layout, shape=var.shape))\n", + " s = dtensor.DVariable(dtensor.call_with_layout(tf.zeros, var.layout, shape=var.shape))\n", + " self.v_dvar.append(v)\n", + " self.s_dvar.append(s)\n", + "\n", + " def apply_gradients(self, grads):\n", + " # Update the model variables given their gradients\n", + " for i, (d_var, var) in enumerate(zip(grads, self.model_vars)):\n", + " self.v_dvar[i].assign(self.beta_1*self.v_dvar[i] + (1-self.beta_1)*d_var)\n", + " self.s_dvar[i].assign(self.beta_2*self.s_dvar[i] + (1-self.beta_2)*tf.square(d_var))\n", + " v_dvar_bc = self.v_dvar[i]/(1-(self.beta_1**self.t))\n", + " s_dvar_bc = self.s_dvar[i]/(1-(self.beta_2**self.t))\n", + " var.assign_sub(self.learning_rate*(v_dvar_bc/(tf.sqrt(s_dvar_bc) + self.ep)))\n", + " self.t += 1.\n", + " return " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "w54b7GtLfn1j" + }, + "source": [ + "### Data packing\n", + "\n", + "Start by writing a helper function for transferring data to the device. This function should use `dtensor.pack` to send (and only send) the shard of the global batch that is intended for a replica to the device backing the replica. For simplicity, assume a single-client application.\n", + "\n", + "Next, write a function that uses this helper function to pack the training data batches into DTensors sharded along the batch (first) axis. This ensures that DTensor evenly distributes the training data to the 'batch' mesh dimension. Note that in DTensor, the batch size always refers to the global batch size; therefore, the batch size should be chosen such that it can be divided evenly by the size of the batch mesh dimension. Additional DTensor APIs to simplify `tf.data` integration are planned, so please stay tuned." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3Rx82djZ6ITm" + }, + "outputs": [], + "source": [ + "def repack_local_tensor(x, layout):\n", + " # Repacks a local Tensor-like to a DTensor with layout\n", + " # This function assumes a single-client application\n", + " x = tf.convert_to_tensor(x)\n", + " sharded_dims = []\n", + "\n", + " # For every sharded dimension, use tf.split to split the along the dimension.\n", + " # The result is a nested list of split-tensors in queue[0].\n", + " queue = [x]\n", + " for axis, dim in enumerate(layout.sharding_specs):\n", + " if dim == dtensor.UNSHARDED:\n", + " continue\n", + " num_splits = layout.shape[axis]\n", + " queue = tf.nest.map_structure(lambda x: tf.split(x, num_splits, axis=axis), queue)\n", + " sharded_dims.append(dim)\n", + "\n", + " # Now you can build the list of component tensors by looking up the location in\n", + " # the nested list of split-tensors created in queue[0].\n", + " components = []\n", + " for locations in layout.mesh.local_device_locations():\n", + " t = queue[0]\n", + " for dim in sharded_dims:\n", + " split_index = locations[dim] # Only valid on single-client mesh.\n", + " t = t[split_index]\n", + " components.append(t)\n", + "\n", + " return dtensor.pack(components, layout)\n", + "\n", + "def repack_batch(x, y, mesh):\n", + " # Pack training data batches into DTensors along the batch axis\n", + " x = repack_local_tensor(x, layout=dtensor.Layout(['batch', dtensor.UNSHARDED], mesh))\n", + " y = repack_local_tensor(y, layout=dtensor.Layout(['batch'], mesh))\n", + " return x, y" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "osEK3rqpYfKd" + }, + "source": [ + "### Training\n", + "\n", + "Write a traceable function that executes a single training step given a batch of data. This function does not require any special DTensor annotations. Also write a function that executes a test step and returns the appropriate performance metrics." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZICEsDGuSbDD" + }, + "outputs": [], + "source": [ + "@tf.function\n", + "def train_step(model, x_batch, y_batch, loss, metric, optimizer):\n", + " # Execute a single training step\n", + " with tf.GradientTape() as tape:\n", + " y_pred = model(x_batch)\n", + " batch_loss = loss(y_pred, y_batch)\n", + " # Compute gradients and update the model's parameters\n", + " grads = tape.gradient(batch_loss, model.trainable_variables)\n", + " optimizer.apply_gradients(grads)\n", + " # Return batch loss and accuracy\n", + " batch_acc = metric(y_pred, y_batch)\n", + " return batch_loss, batch_acc\n", + "\n", + "@tf.function\n", + "def test_step(model, x_batch, y_batch, loss, metric):\n", + " # Execute a single testing step\n", + " y_pred = model(x_batch)\n", + " batch_loss = loss(y_pred, y_batch)\n", + " batch_acc = metric(y_pred, y_batch)\n", + " return batch_loss, batch_acc" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RjIDVTwwX-Mr" + }, + "source": [ + "Now, train the MLP model for 3 epochs with a batch size of 128." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oC85kuZgmh3q" + }, + "outputs": [], + "source": [ + "# Initialize the training loop parameters and structures\n", + "epochs = 3\n", + "batch_size = 128\n", + "train_losses, test_losses = [], []\n", + "train_accs, test_accs = [], []\n", + "optimizer = Adam(mlp_model.trainable_variables)\n", + "\n", + "# Format training loop\n", + "for epoch in range(epochs):\n", + " batch_losses_train, batch_accs_train = [], []\n", + " batch_losses_test, batch_accs_test = [], []\n", + "\n", + " # Iterate through training data\n", + " for x_batch, y_batch in train_data:\n", + " x_batch, y_batch = repack_batch(x_batch, y_batch, mesh)\n", + " batch_loss, batch_acc = train_step(mlp_model, x_batch, y_batch, cross_entropy_loss, accuracy, optimizer)\n", + " # Keep track of batch-level training performance\n", + " batch_losses_train.append(batch_loss)\n", + " batch_accs_train.append(batch_acc)\n", + "\n", + " # Iterate through testing data\n", + " for x_batch, y_batch in test_data:\n", + " x_batch, y_batch = repack_batch(x_batch, y_batch, mesh)\n", + " batch_loss, batch_acc = test_step(mlp_model, x_batch, y_batch, cross_entropy_loss, accuracy)\n", + " # Keep track of batch-level testing\n", + " batch_losses_test.append(batch_loss)\n", + " batch_accs_test.append(batch_acc)\n", + "\n", + "# Keep track of epoch-level model performance\n", + " train_loss, train_acc = tf.reduce_mean(batch_losses_train), tf.reduce_mean(batch_accs_train)\n", + " test_loss, test_acc = tf.reduce_mean(batch_losses_test), tf.reduce_mean(batch_accs_test)\n", + " train_losses.append(train_loss)\n", + " train_accs.append(train_acc)\n", + " test_losses.append(test_loss)\n", + " test_accs.append(test_acc)\n", + " print(f\"Epoch: {epoch}\")\n", + " print(f\"Training loss: {train_loss.numpy():.3f}, Training accuracy: {train_acc.numpy():.3f}\")\n", + " print(f\"Testing loss: {test_loss.numpy():.3f}, Testing accuracy: {test_acc.numpy():.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j_RVmt43G12R" + }, + "source": [ + "### Performance evaluation\n", + "\n", + "Start by writing a plotting function to visualize the model's loss and accuracy during training. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VXTCYVtNDjAM" + }, + "outputs": [], + "source": [ + "def plot_metrics(train_metric, test_metric, metric_type):\n", + " # Visualize metrics vs training Epochs\n", + " plt.figure()\n", + " plt.plot(range(len(train_metric)), train_metric, label = f\"Training {metric_type}\")\n", + " plt.plot(range(len(test_metric)), test_metric, label = f\"Testing {metric_type}\")\n", + " plt.xlabel(\"Epochs\")\n", + " plt.ylabel(metric_type)\n", + " plt.legend()\n", + " plt.title(f\"{metric_type} vs Training Epochs\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "407qok7q2JIO" + }, + "outputs": [], + "source": [ + "plot_metrics(train_losses, test_losses, \"Cross entropy loss\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8H_TgxV92NfX" + }, + "outputs": [], + "source": [ + "plot_metrics(train_accs, test_accs, \"Accuracy\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DHO_u-3w4YRF" + }, + "source": [ + "## Saving your model\n", + "\n", + "The integration of `tf.saved_model` and DTensor is still under development. As of TensorFlow 2.9.0, tf.saved_model only accepts DTensor models with fully replicated variables. As a workaround, you can convert a DTensor model to a fully replicated one by reloading a checkpoint. However, after a model is saved, all DTensor annotations are lost and the saved signatures can only be used with regular Tensors. This tutorial will be updated to showcase the integration once it is solidified.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VFLfEH4ManbW" + }, + "source": [ + "## Conclusion\n", + "\n", + "This notebook provided an overview of distributed training with DTensor and the TensorFlow Core APIs. Here are a few more tips that may help:\n", + "\n", + "- The [TensorFlow Core APIs](https://www.tensorflow.org/guide/core) can be used to build highly-configurable machine learning workflows with support for distributed training.\n", + "- The [DTensor concepts](https://www.tensorflow.org/guide/dtensor_overview) guide and [Distributed training with DTensors](https://www.tensorflow.org/tutorials/distribute/dtensor_ml_tutorial) tutorial contain the most up-to-date information about DTensor and its integrations.\n", + "\n", + "For more examples of using the TensorFlow Core APIs, check out the [guide](https://www.tensorflow.org/guide/core). If you want to learn more about loading and preparing data, see the tutorials on [image data loading](https://www.tensorflow.org/tutorials/load_data/images) or [CSV data loading](https://www.tensorflow.org/tutorials/load_data/csv)." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "FhGuhbZ6M5tl" + ], + "name": "distribution.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/core/index.md b/site/en/guide/core/index.md new file mode 100644 index 00000000000..7f44e11b018 --- /dev/null +++ b/site/en/guide/core/index.md @@ -0,0 +1,112 @@ +# TensorFlow Core APIs overview + +The TensorFlow Core APIs provide a set of comprehensive, composable, and +extensible low-level APIs for high-performance (distributed and accelerated) +computation, primarily aimed at building machine learning (ML) models as well as +authoring ML workflow tools and frameworks within the TensorFlow platform. These +APIs provide a foundation for creating highly configurable models with +fine-grained control and new frameworks from the ground up. + +The Core APIs can be used as an alternative to high-level machine learning APIs +like Keras. These high-level APIs are best suited for general machine learning +needs. They offer a variety of modules that abstract away the complexities of ML +while also offering functionalities for customization through subclassing. If +you are looking for an overview of TensorFlow using Keras, see the Quickstarts +and Keras sections in the [tutorials](https://www.tensorflow.org/tutorials). + +## Who should use the Core APIs + +The TensorFlow Core low-level APIs are designed with the following ML developers +in mind: + +* Researchers building complex models with high levels of configurability +* Developers interested in using TensorFlow as a high-performance scientific + computing platform +* Framework authors building tools on top of the TensorFlow platform +* High-level API users interested in: + * Adding additional functionalities to their machine learning workflows + such as custom layers, losses, models, and optimizers + * Learning more about the inner workings of their models + +## Core API applications + +The TensorFlow Core APIs provide access to low level functionality within the +TensorFlow ecosystem. This API provides more flexibility and control for +building ML models, applications, and tools, compared to high-level APIs, such +as Keras. + +### Build models and workflows + +The Core APIs are most commonly used to build highly customizable and optimized +machine learning models and workflows. Here are some of the ways that the +TensorFlow Core APIs can improve your machine learning models and workflow +development: + +TensorFlow + +* Building non-traditional models or layers that do not fully fit the + structures supported by high-level APIs +* Building custom layers, losses, models, and optimizers within Keras +* Implementing new optimization techniques to expedite convergence during + training +* Creating custom metrics for performance evaluation +* Designing highly-configurable training loops with support for features like + batching, cross-validation, and distribution strategies + +### Build frameworks and tools + +The TensorFlow Core APIs can also serve as the building blocks for new +high-level frameworks. Here are some examples of tools and frameworks that are +created with the low-level APIs: +TensorFlow + +* [Keras](https://keras.io): deep learning for humans +* [TensorFlow Model Optimization Toolkit](https://www.tensorflow.org/model_optimization): + a suite of tools to optimize ML models for deployment and execution +* [TensorFlow Graphics](https://www.tensorflow.org/graphics): a library for + making useful graphics functions widely accessible + +### Build for scientific computing + +The TensorFlow Core APIs can also be applied outside the realm of machine +learning. Here are a few general-purpose use cases of TensorFlow for scientific +computing: +TensorFlow + +* Physics simulations for solid mechanics and + [fluid dynamics](https://arxiv.org/abs/2108.11076) problems +* Graphics rendering applications like + [ray tracing](https://github.com/BachiLi/redner) +* Solving + [constrained optimization problems](https://github.com/google-research/tensorflow_constrained_optimization/blob/master/README.md) + +## Core API components + +Here are some of the fundamental components that comprise TensorFlow Core’s low- +level APIs. Note that this is not an all-encompassing list: + +TensorFlow + +* Data structures : `tf.Tensor`, `tf.Variable`, `tf.TensorArray` +* Primitive APIs: `tf.shape`, + [slicing](https://www.tensorflow.org/guide/tensor_slicing), `tf.concat`, + `tf.bitwise` +* Numerical: `tf.math`, `tf.linalg`, `tf.random` +* Functional components: `tf.function`, `tf.GradientTape` +* Distribution: [DTensor](https://www.tensorflow.org/guide/dtensor_overview) +* Export: `tf.saved_model` + +## Next steps + +The *Build with Core* documentation provides tutorials of basic machine learning +concepts from scratch. The tutorials in this section help you get comfortable +with writing low-level code with Core APIs that you can then apply to more +complex use cases of your own. + +Note: You should not use the Core APIs to simply re-implement high-level APIs, +and it is possible to use high-level APIs, such as Keras, with the Core APIs. + +To get started using and learning more about the Core APIs, check out the +[Quickstart for TensorFlow Core](https://www.tensorflow.org/guide/core/quickstart_core). diff --git a/site/en/guide/core/logistic_regression_core.ipynb b/site/en/guide/core/logistic_regression_core.ipynb new file mode 100644 index 00000000000..5a9af324ad5 --- /dev/null +++ b/site/en/guide/core/logistic_regression_core.ipynb @@ -0,0 +1,935 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "FhGuhbZ6M5tl" + }, + "source": [ + "##### Copyright 2022 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "AwOEIRJC6Une" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EIdT9iu_Z4Rb" + }, + "source": [ + "# Logistic regression for binary classification with Core APIs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bBIlTPscrIT9" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DauaqJ7WhIhO" + }, + "source": [ + "This guide demonstrates how to use the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core) to perform [binary classification](https://developers.google.com/machine-learning/glossary#binary_classification) with [logistic regression](https://developers.google.com/machine-learning/crash-course/logistic-regression/). It uses the [Wisconsin Breast Cancer Dataset](https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(original)) for tumor classification.\n", + "\n", + "[Logistic regression](https://developers.google.com/machine-learning/crash-course/logistic-regression/) is one of the most popular algorithms for binary classification. Given a set of examples with features, the goal of logistic regression is to output values between 0 and 1, which can be interpreted as the probabilities of each example belonging to a particular class. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nchsZfwEVtVs" + }, + "source": [ + "## Setup\n", + "\n", + "This tutorial uses [pandas](https://pandas.pydata.org) for reading a CSV file into a [DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html), [seaborn](https://seaborn.pydata.org) for plotting a pairwise relationship in a dataset, [Scikit-learn](https://scikit-learn.org/) for computing a confusion matrix, and [matplotlib](https://matplotlib.org/) for creating visualizations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5lZoUK6AVTos" + }, + "outputs": [], + "source": [ + "!pip install -q seaborn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1rRo8oNqZ-Rj" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import pandas as pd\n", + "import matplotlib\n", + "from matplotlib import pyplot as plt\n", + "import seaborn as sns\n", + "import sklearn.metrics as sk_metrics\n", + "import tempfile\n", + "import os\n", + "\n", + "# Preset matplotlib figure sizes.\n", + "matplotlib.rcParams['figure.figsize'] = [9, 6]\n", + "\n", + "print(tf.__version__)\n", + "# To make the results reproducible, set the random seed value.\n", + "tf.random.set_seed(22)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gFh9ne3FZ-On" + }, + "source": [ + "## Load the data\n", + "\n", + "Next, load the [Wisconsin Breast Cancer Dataset](https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(original)) from the [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/). This dataset contains various features such as a tumor's radius, texture, and concavity." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CiX2FI4gZtTt" + }, + "outputs": [], + "source": [ + "url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data'\n", + "\n", + "features = ['radius', 'texture', 'perimeter', 'area', 'smoothness', 'compactness',\n", + " 'concavity', 'concave_poinits', 'symmetry', 'fractal_dimension']\n", + "column_names = ['id', 'diagnosis']\n", + "\n", + "for attr in ['mean', 'ste', 'largest']:\n", + " for feature in features:\n", + " column_names.append(feature + \"_\" + attr)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A3VR1aTP92nV" + }, + "source": [ + "Read the dataset into a pandas [DataFrame]() using [`pandas.read_csv`](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uvR2Bzb691lJ" + }, + "outputs": [], + "source": [ + "dataset = pd.read_csv(url, names=column_names)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YB9eq6Zq-IZ4" + }, + "outputs": [], + "source": [ + "dataset.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0_Z1V6Dg-La_" + }, + "source": [ + "Display the first five rows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hWxktwbv-KPp" + }, + "outputs": [], + "source": [ + "dataset.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "s4-Wn2jzVC1W" + }, + "source": [ + "Split the dataset into training and test sets using [`pandas.DataFrame.sample`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sample.html), [`pandas.DataFrame.drop`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop.html) and [`pandas.DataFrame.iloc`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.iloc.html). Make sure to split the features from the target labels. The test set is used to evaluate your model's generalizability to unseen data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "m2O60B-IVG9Q" + }, + "outputs": [], + "source": [ + "train_dataset = dataset.sample(frac=0.75, random_state=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "i06vHFv_QB24" + }, + "outputs": [], + "source": [ + "len(train_dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "19JaochhaQ3m" + }, + "outputs": [], + "source": [ + "test_dataset = dataset.drop(train_dataset.index)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LmHRcbAfaSag" + }, + "outputs": [], + "source": [ + "len(test_dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "w6JxBhBc_wwO" + }, + "outputs": [], + "source": [ + "# The `id` column can be dropped since each row is unique\n", + "x_train, y_train = train_dataset.iloc[:, 2:], train_dataset.iloc[:, 1]\n", + "x_test, y_test = test_dataset.iloc[:, 2:], test_dataset.iloc[:, 1]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3MWuJTKEDM-f" + }, + "source": [ + "## Preprocess the data\n", + "\n", + "This dataset contains the mean, standard error, and largest values for each of the 10 tumor measurements collected per example. The `\"diagnosis\"` target column is a categorical variable with `'M'` indicating a malignant tumor and `'B'` indicating a benign tumor diagnosis. This column needs to be converted into a numerical binary format for model training.\n", + "\n", + "The [`pandas.Series.map`](https://pandas.pydata.org/docs/reference/api/pandas.Series.map.html) function is useful for mapping binary values to the categories.\n", + "\n", + "The dataset should also be converted to a tensor with the `tf.convert_to_tensor` function after the preprocessing is complete." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JEJHhN65a2VV" + }, + "outputs": [], + "source": [ + "y_train, y_test = y_train.map({'B': 0, 'M': 1}), y_test.map({'B': 0, 'M': 1})\n", + "x_train, y_train = tf.convert_to_tensor(x_train, dtype=tf.float32), tf.convert_to_tensor(y_train, dtype=tf.float32)\n", + "x_test, y_test = tf.convert_to_tensor(x_test, dtype=tf.float32), tf.convert_to_tensor(y_test, dtype=tf.float32)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J4ubs136WLNp" + }, + "source": [ + "Use [`seaborn.pairplot`](https://seaborn.pydata.org/generated/seaborn.pairplot.html) to review the joint distribution of a few pairs of mean-based features from the training set and observe how they relate to the target:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oRKO_x8gWKv-" + }, + "outputs": [], + "source": [ + "sns.pairplot(train_dataset.iloc[:, 1:6], hue = 'diagnosis', diag_kind='kde');" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5YOG5iKYKW_3" + }, + "source": [ + "This pairplot demonstrates that certain features such as radius, perimeter and area are highly correlated. This is expected since the tumor radius is directly involved in the computation of both perimeter and area. Additionally, note that malignant diagnoses seem to be more right-skewed for many of the features.\n", + "\n", + "Make sure to also check the overall statistics. Note how each feature covers a vastly different range of values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yi2FzC3T21jR" + }, + "outputs": [], + "source": [ + "train_dataset.describe().transpose()[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_8pDCIFjMla8" + }, + "source": [ + "Given the inconsistent ranges, it is beneficial to standardize the data such that each feature has a zero mean and unit variance. This process is called [normalization](https://developers.google.com/machine-learning/glossary#normalization)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FrzKNFNjLQDl" + }, + "outputs": [], + "source": [ + "class Normalize(tf.Module):\n", + " def __init__(self, x):\n", + " # Initialize the mean and standard deviation for normalization\n", + " self.mean = tf.Variable(tf.math.reduce_mean(x, axis=0))\n", + " self.std = tf.Variable(tf.math.reduce_std(x, axis=0))\n", + "\n", + " def norm(self, x):\n", + " # Normalize the input\n", + " return (x - self.mean)/self.std\n", + "\n", + " def unnorm(self, x):\n", + " # Unnormalize the input\n", + " return (x * self.std) + self.mean\n", + "\n", + "norm_x = Normalize(x_train)\n", + "x_train_norm, x_test_norm = norm_x.norm(x_train), norm_x.norm(x_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6o3CrycBXA2s" + }, + "source": [ + "## Logistic regression\n", + "\n", + "Before building a logistic regression model, it is crucial to understand the method's differences compared to traditional linear regression.\n", + "\n", + "### Logistic regression fundamentals\n", + "\n", + "Linear regression returns a linear combination of its inputs; this output is unbounded. The output of a [logistic regression](https://developers.google.com/machine-learning/glossary#logistic_regression) is in the `(0, 1)` range. For each example, it represents the probability that the example belongs to the _positive_ class.\n", + "\n", + "Logistic regression maps the continuous outputs of traditional linear regression, `(-∞, ∞)`, to probabilities, `(0, 1)`. This transformation is also symmetric so that flipping the sign of the linear output results in the inverse of the original probability.\n", + "\n", + "Let $Y$ denote the probability of being in class `1` (the tumor is malignant). The desired mapping can be achieved by interpreting the linear regression output as the [log odds](https://developers.google.com/machine-learning/glossary#log-odds) ratio of being in class `1` as opposed to class `0`:\n", + "\n", + "$$\\ln(\\frac{Y}{1-Y}) = wX + b$$\n", + "\n", + "By setting $wX + b = z$, this equation can then be solved for $Y$:\n", + "\n", + "$$Y = \\frac{e^{z}}{1 + e^{z}} = \\frac{1}{1 + e^{-z}}$$\n", + "\n", + "The expression $\\frac{1}{1 + e^{-z}}$ is known as the [sigmoid function](https://developers.google.com/machine-learning/glossary#sigmoid_function) $\\sigma(z)$. Hence, the equation for logistic regression can be written as $Y = \\sigma(wX + b)$.\n", + "\n", + "The dataset in this tutorial deals with a high-dimensional feature matrix. Therefore, the above equation must be rewritten in a matrix vector form as follows:\n", + "\n", + "$${\\mathrm{Y}} = \\sigma({\\mathrm{X}}w + b)$$\n", + "\n", + "where:\n", + "\n", + "* $\\underset{m\\times 1}{\\mathrm{Y}}$: a target vector\n", + "* $\\underset{m\\times n}{\\mathrm{X}}$: a feature matrix\n", + "* $\\underset{n\\times 1}w$: a weight vector\n", + "* $b$: a bias\n", + "* $\\sigma$: a sigmoid function applied to each element of the output vector\n", + "\n", + "Start by visualizing the sigmoid function, which transforms the linear output, `(-∞, ∞)`, to fall between `0` and `1`. The sigmoid function is available in `tf.math.sigmoid`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ThHaV_RmucZl" + }, + "outputs": [], + "source": [ + "x = tf.linspace(-10, 10, 500)\n", + "x = tf.cast(x, tf.float32)\n", + "f = lambda x : (1/20)*x + 0.6\n", + "plt.plot(x, tf.math.sigmoid(x))\n", + "plt.ylim((-0.1,1.1))\n", + "plt.title(\"Sigmoid function\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VMXEhrZuKECV" + }, + "source": [ + "### The log loss function\n", + "\n", + "The [log loss](https://developers.google.com/machine-learning/glossary#Log_Loss), or binary cross-entropy loss, is the ideal loss function for a binary classification problem with logistic regression. For each example, the log loss quantifies the similarity between a predicted probability and the example's true value. It is determined by the following equation:\n", + "\n", + "$$L = -\\frac{1}{m}\\sum_{i=1}^{m}y_i\\cdot\\log(\\hat{y}_i) + (1- y_i)\\cdot\\log(1 - \\hat{y}_i)$$\n", + "\n", + "where:\n", + "\n", + "* $\\hat{y}$: a vector of predicted probabilities\n", + "* $y$: a vector of true targets\n", + "\n", + "You can use the `tf.nn.sigmoid_cross_entropy_with_logits` function to compute the log loss. This function automatically applies the sigmoid activation to the regression output:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JVBInnSqS36W" + }, + "outputs": [], + "source": [ + "def log_loss(y_pred, y):\n", + " # Compute the log loss function\n", + " ce = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=y_pred)\n", + " return tf.reduce_mean(ce)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q_mutLj0KNUb" + }, + "source": [ + "### The gradient descent update rule\n", + "\n", + "The TensorFlow Core APIs support automatic differentiation with `tf.GradientTape`. If you are curious about the mathematics behind the logistic regression [gradient updates](https://developers.google.com/machine-learning/glossary#gradient_descent), here is a short explanation:\n", + "\n", + "In the above equation for the log loss, recall that each $\\hat{y}_i$ can be rewritten in terms of the inputs as $\\sigma({\\mathrm{X_i}}w + b)$.\n", + "\n", + "The goal is to find a $w^*$ and $b^*$ that minimize the log loss:\n", + "\n", + "$$L = -\\frac{1}{m}\\sum_{i=1}^{m}y_i\\cdot\\log(\\sigma({\\mathrm{X_i}}w + b)) + (1- y_i)\\cdot\\log(1 - \\sigma({\\mathrm{X_i}}w + b))$$\n", + "\n", + "By taking the gradient $L$ with respect to $w$, you get the following:\n", + "\n", + "$$\\frac{\\partial L}{\\partial w} = \\frac{1}{m}(\\sigma({\\mathrm{X}}w + b) - y)X$$\n", + "\n", + "By taking the gradient $L$ with respect to $b$, you get the following:\n", + "\n", + "$$\\frac{\\partial L}{\\partial b} = \\frac{1}{m}\\sum_{i=1}^{m}\\sigma({\\mathrm{X_i}}w + b) - y_i$$" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uTCndUecKZho" + }, + "source": [ + "Now, build the logistic regression model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "c0sXM7qLlKfZ" + }, + "outputs": [], + "source": [ + "class LogisticRegression(tf.Module):\n", + "\n", + " def __init__(self):\n", + " self.built = False\n", + " \n", + " def __call__(self, x, train=True):\n", + " # Initialize the model parameters on the first call\n", + " if not self.built:\n", + " # Randomly generate the weights and the bias term\n", + " rand_w = tf.random.uniform(shape=[x.shape[-1], 1], seed=22)\n", + " rand_b = tf.random.uniform(shape=[], seed=22)\n", + " self.w = tf.Variable(rand_w)\n", + " self.b = tf.Variable(rand_b)\n", + " self.built = True\n", + " # Compute the model output\n", + " z = tf.add(tf.matmul(x, self.w), self.b)\n", + " z = tf.squeeze(z, axis=1)\n", + " if train:\n", + " return z\n", + " return tf.sigmoid(z)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eObQu9fDnXGL" + }, + "source": [ + "To validate, make sure the untrained model outputs values in the range of `(0, 1)` for a small subset of the training data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5bIovC0Z4QHJ" + }, + "outputs": [], + "source": [ + "log_reg = LogisticRegression()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QJ2ievISyf0p" + }, + "outputs": [], + "source": [ + "y_pred = log_reg(x_train_norm[:5], train=False)\n", + "y_pred.numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PribnwDHUksC" + }, + "source": [ + "Next, write an accuracy function to calculate the proportion of correct classifications during training. In order to retrieve the classifications from the predicted probabilities, set a threshold for which all probabilities higher than the threshold belong to class `1`. This is a configurable hyperparameter that can be set to `0.5` as a default." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ssnVcKg7oMe6" + }, + "outputs": [], + "source": [ + "def predict_class(y_pred, thresh=0.5):\n", + " # Return a tensor with `1` if `y_pred` > `0.5`, and `0` otherwise\n", + " return tf.cast(y_pred > thresh, tf.float32)\n", + "\n", + "def accuracy(y_pred, y):\n", + " # Return the proportion of matches between `y_pred` and `y`\n", + " y_pred = tf.math.sigmoid(y_pred)\n", + " y_pred_class = predict_class(y_pred)\n", + " check_equal = tf.cast(y_pred_class == y,tf.float32)\n", + " acc_val = tf.reduce_mean(check_equal)\n", + " return acc_val" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J_0KHQ25_2dF" + }, + "source": [ + "### Train the model\n", + "\n", + "Using mini-batches for training provides both memory efficiency and faster convergence. The `tf.data.Dataset` API has useful functions for batching and shuffling. The API enables you to build complex input pipelines from simple, reusable pieces. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vJD7-4U0etqa" + }, + "outputs": [], + "source": [ + "batch_size = 64\n", + "train_dataset = tf.data.Dataset.from_tensor_slices((x_train_norm, y_train))\n", + "train_dataset = train_dataset.shuffle(buffer_size=x_train.shape[0]).batch(batch_size)\n", + "test_dataset = tf.data.Dataset.from_tensor_slices((x_test_norm, y_test))\n", + "test_dataset = test_dataset.shuffle(buffer_size=x_test.shape[0]).batch(batch_size)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sLiWZZPBSDip" + }, + "source": [ + "Now write a training loop for the logistic regression model. The loop utilizes the log loss function and its gradients with respect to the input in order to iteratively update the model's parameters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jNC3D1DGsGgK" + }, + "outputs": [], + "source": [ + "# Set training parameters\n", + "epochs = 200\n", + "learning_rate = 0.01\n", + "train_losses, test_losses = [], []\n", + "train_accs, test_accs = [], []\n", + "\n", + "# Set up the training loop and begin training\n", + "for epoch in range(epochs):\n", + " batch_losses_train, batch_accs_train = [], []\n", + " batch_losses_test, batch_accs_test = [], []\n", + "\n", + " # Iterate over the training data\n", + " for x_batch, y_batch in train_dataset:\n", + " with tf.GradientTape() as tape:\n", + " y_pred_batch = log_reg(x_batch)\n", + " batch_loss = log_loss(y_pred_batch, y_batch)\n", + " batch_acc = accuracy(y_pred_batch, y_batch)\n", + " # Update the parameters with respect to the gradient calculations\n", + " grads = tape.gradient(batch_loss, log_reg.variables)\n", + " for g,v in zip(grads, log_reg.variables):\n", + " v.assign_sub(learning_rate * g)\n", + " # Keep track of batch-level training performance\n", + " batch_losses_train.append(batch_loss)\n", + " batch_accs_train.append(batch_acc)\n", + "\n", + " # Iterate over the testing data\n", + " for x_batch, y_batch in test_dataset:\n", + " y_pred_batch = log_reg(x_batch)\n", + " batch_loss = log_loss(y_pred_batch, y_batch)\n", + " batch_acc = accuracy(y_pred_batch, y_batch)\n", + " # Keep track of batch-level testing performance\n", + " batch_losses_test.append(batch_loss)\n", + " batch_accs_test.append(batch_acc)\n", + "\n", + " # Keep track of epoch-level model performance\n", + " train_loss, train_acc = tf.reduce_mean(batch_losses_train), tf.reduce_mean(batch_accs_train)\n", + " test_loss, test_acc = tf.reduce_mean(batch_losses_test), tf.reduce_mean(batch_accs_test)\n", + " train_losses.append(train_loss)\n", + " train_accs.append(train_acc)\n", + " test_losses.append(test_loss)\n", + " test_accs.append(test_acc)\n", + " if epoch % 20 == 0:\n", + " print(f\"Epoch: {epoch}, Training log loss: {train_loss:.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NoLiAg7fYft7" + }, + "source": [ + "### Performance evaluation\n", + "\n", + "Observe the changes in your model's loss and accuracy over time. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mv3oCQPvWhr0" + }, + "outputs": [], + "source": [ + "plt.plot(range(epochs), train_losses, label = \"Training loss\")\n", + "plt.plot(range(epochs), test_losses, label = \"Testing loss\")\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Log loss\")\n", + "plt.legend()\n", + "plt.title(\"Log loss vs training iterations\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "D2HDVGLPODIE" + }, + "outputs": [], + "source": [ + "plt.plot(range(epochs), train_accs, label = \"Training accuracy\")\n", + "plt.plot(range(epochs), test_accs, label = \"Testing accuracy\")\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Accuracy (%)\")\n", + "plt.legend()\n", + "plt.title(\"Accuracy vs training iterations\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jonKhUzuPyfa" + }, + "outputs": [], + "source": [ + "print(f\"Final training log loss: {train_losses[-1]:.3f}\")\n", + "print(f\"Final testing log Loss: {test_losses[-1]:.3f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "d3DF4qyrPyke" + }, + "outputs": [], + "source": [ + "print(f\"Final training accuracy: {train_accs[-1]:.3f}\")\n", + "print(f\"Final testing accuracy: {test_accs[-1]:.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yrj1TbOJasjA" + }, + "source": [ + "The model demonstrates a high accuracy and a low loss when it comes to classifying tumors in the training dataset and also generalizes well to the unseen test data. To go one step further, you can explore error rates that give more insight beyond the overall accuracy score. The two most popular error rates for binary classification problems are the false positive rate (FPR) and the false negative rate (FNR).\n", + "\n", + "For this problem, the FPR is the proportion of malignant tumor predictions amongst tumors that are actually benign. Conversely, the FNR is the proportion of benign tumor predictions among tumors that are actually malignant.\n", + "\n", + "Compute a confusion matrix using [`sklearn.metrics.confusion_matrix`](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html#sklearn.metrics.confusion_matrix), which evaluates the accuracy of the classification, and use matplotlib to display the matrix:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OJO7YkA8ZDMU" + }, + "outputs": [], + "source": [ + "def show_confusion_matrix(y, y_classes, typ):\n", + " # Compute the confusion matrix and normalize it\n", + " plt.figure(figsize=(10,10))\n", + " confusion = sk_metrics.confusion_matrix(y.numpy(), y_classes.numpy())\n", + " confusion_normalized = confusion / confusion.sum(axis=1, keepdims=True)\n", + " axis_labels = range(2)\n", + " ax = sns.heatmap(\n", + " confusion_normalized, xticklabels=axis_labels, yticklabels=axis_labels,\n", + " cmap='Blues', annot=True, fmt='.4f', square=True)\n", + " plt.title(f\"Confusion matrix: {typ}\")\n", + " plt.ylabel(\"True label\")\n", + " plt.xlabel(\"Predicted label\")\n", + "\n", + "y_pred_train, y_pred_test = log_reg(x_train_norm, train=False), log_reg(x_test_norm, train=False)\n", + "train_classes, test_classes = predict_class(y_pred_train), predict_class(y_pred_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OQ5DFcleiDFm" + }, + "outputs": [], + "source": [ + "show_confusion_matrix(y_train, train_classes, 'Training')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gtfcsAp_iCNR" + }, + "outputs": [], + "source": [ + "show_confusion_matrix(y_test, test_classes, 'Testing')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DlivxaDmTnGq" + }, + "source": [ + "Observe the error rate measurements and interpret their significance in the context of this example. In many medical testing studies such as cancer detection, having a high false positive rate to ensure a low false negative rate is perfectly acceptable and in fact encouraged since the risk of missing a malignant tumor diagnosis (false negative) is a lot worse than misclassifying a benign tumor as malignant (false positive).\n", + "\n", + "In order to control for the FPR and FNR, try changing the threshold hyperparameter before classifying the probability predictions. A lower threshold increases the model's overall chances of making a malignant tumor classification. This inevitably increases the number of false positives and the FPR but it also helps to decrease the number of false negatives and the FNR." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7ADEN2rb4Nhj" + }, + "source": [ + "## Save the model\n", + "\n", + "Start by making an export module that takes in raw data and performs the following operations:\n", + "- Normalization\n", + "- Probability prediction\n", + "- Class prediction\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6KPRHCzg4ZxH" + }, + "outputs": [], + "source": [ + "class ExportModule(tf.Module):\n", + " def __init__(self, model, norm_x, class_pred):\n", + " # Initialize pre- and post-processing functions\n", + " self.model = model\n", + " self.norm_x = norm_x\n", + " self.class_pred = class_pred\n", + "\n", + " @tf.function(input_signature=[tf.TensorSpec(shape=[None, None], dtype=tf.float32)])\n", + " def __call__(self, x):\n", + " # Run the `ExportModule` for new data points\n", + " x = self.norm_x.norm(x)\n", + " y = self.model(x, train=False)\n", + " y = self.class_pred(y)\n", + " return y " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2YzRclo5-yjO" + }, + "outputs": [], + "source": [ + "log_reg_export = ExportModule(model=log_reg,\n", + " norm_x=norm_x,\n", + " class_pred=predict_class)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gtofGIBN_qFd" + }, + "source": [ + "If you want to save the model at its current state, you can do so with the `tf.saved_model.save` function. To load a saved model and make predictions, use the `tf.saved_model.load` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "a4Qum1Ts_pmF" + }, + "outputs": [], + "source": [ + "models = tempfile.mkdtemp()\n", + "save_path = os.path.join(models, 'log_reg_export')\n", + "tf.saved_model.save(log_reg_export, save_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3KPILr1i_M_c" + }, + "outputs": [], + "source": [ + "log_reg_loaded = tf.saved_model.load(save_path)\n", + "test_preds = log_reg_loaded(x_test)\n", + "test_preds[:10].numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vgGQuV-yqYZH" + }, + "source": [ + "## Conclusion\n", + "\n", + "This notebook introduced a few techniques to handle a logistic regression problem. Here are a few more tips that may help:\n", + "\n", + "- The [TensorFlow Core APIs](https://www.tensorflow.org/guide/core) can be used to build machine learning workflows with high levels of configurability\n", + "- Analyzing error rates is a great way to gain more insight about a classification model's performance beyond its overall accuracy score.\n", + "- Overfitting is another common problem for logistic regression models, though it wasn't a problem for this tutorial. Visit the [Overfit and underfit](../../tutorials/keras/overfit_and_underfit.ipynb) tutorial for more help with this.\n", + "\n", + "For more examples of using the TensorFlow Core APIs, check out the [guide](https://www.tensorflow.org/guide/core). If you want to learn more about loading and preparing data, see the tutorials on [image data loading](../../tutorials/load_data/images.ipynb) or [CSV data loading](../../tutorials/load_data/csv.ipynb)." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "logistic_regression_core.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/core/matrix_core.ipynb b/site/en/guide/core/matrix_core.ipynb new file mode 100644 index 00000000000..1d7d35ed047 --- /dev/null +++ b/site/en/guide/core/matrix_core.ipynb @@ -0,0 +1,731 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "FhGuhbZ6M5tl" + }, + "source": [ + "##### Copyright 2022 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "AwOEIRJC6Une" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EIdT9iu_Z4Rb" + }, + "source": [ + "# Matrix approximation with Core APIs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bBIlTPscrIT9" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qGw8TF2vtzru" + }, + "source": [ + "## Introduction \n", + "\n", + "This notebook uses the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core) to showcase TensorFlow's capabilities as a high-performance scientific computing platform. Visit the [Core APIs overview](https://www.tensorflow.org/guide/core) to learn more about TensorFlow Core and its intended use cases.\n", + "\n", + "This tutorial explores the technique of [singular value decomposition](https://developers.google.com/machine-learning/recommendation/collaborative/matrix) (SVD) and its applications for low-rank approximation problems. The SVD is used to factorize real or complex matrices and has a variety of use cases in data science such as image compression. The images for this tutorial come from Google Brain's [Imagen](https://imagen.research.google/) project. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5_FdwaovEkCC" + }, + "source": [ + ">![svd_intro](http://tensorflow.org/images/core/svd_intro.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nchsZfwEVtVs" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1rRo8oNqZ-Rj" + }, + "outputs": [], + "source": [ + "import matplotlib\n", + "from matplotlib.image import imread\n", + "from matplotlib import pyplot as plt\n", + "import requests\n", + "# Preset Matplotlib figure sizes.\n", + "matplotlib.rcParams['figure.figsize'] = [16, 9]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9xQKvCJ85kCQ" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "print(tf.__version__)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "so_ewq3gAoEI" + }, + "source": [ + "## SVD fundamentals\n", + "\n", + "The singular value decomposition of a matrix, ${\\mathrm{A}}$, is determined by the following factorization:\n", + "\n", + "$${\\mathrm{A}} = {\\mathrm{U}} \\Sigma {\\mathrm{V}}^T$$\n", + "\n", + "where\n", + "\n", + "* $\\underset{m \\times n}{\\mathrm{A}}$: input matrix where $m \\geq n$\n", + "* $\\underset{m \\times n}{\\mathrm{U}}$: orthogonal matrix, ${\\mathrm{U}}^T{\\mathrm{U}} = {\\mathrm{I}}$, with each column, $u_i$, denoting a left singular vector of ${\\mathrm{A}}$\n", + "* $\\underset{n \\times n}{\\Sigma}$: diagonal matrix with each diagonal entry, $\\sigma_i$, denoting a singular value of ${\\mathrm{A}}$\n", + "* $\\underset{n \\times n}{{\\mathrm{V}}^T}$: orthogonal matrix, ${\\mathrm{V}}^T{\\mathrm{V}} = {\\mathrm{I}}$, with each row, $v_i$, denoting a right singular vector of ${\\mathrm{A}}$\n", + "\n", + "When $m < n$, ${\\mathrm{U}}$ and $\\Sigma$ both have dimension $(m \\times m)$, and ${\\mathrm{V}}^T$ has dimension $(m \\times n)$." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "enGGGXCQKNv8" + }, + "source": [ + ">![svd_full](http://tensorflow.org/images/core/svd_full.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NlP-cBdSKLtc" + }, + "source": [ + "TensorFlow's linear algebra package has a function, `tf.linalg.svd`, which can be used to compute the singular value decomposition of one or more matrices. Start by defining a simple matrix and computing its SVD factorization.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "C3QAcgyoeIpv" + }, + "outputs": [], + "source": [ + "A = tf.random.uniform(shape=[40,30])\n", + "# Compute the SVD factorization\n", + "s, U, V = tf.linalg.svd(A)\n", + "# Define Sigma and V Transpose\n", + "S = tf.linalg.diag(s)\n", + "V_T = tf.transpose(V)\n", + "# Reconstruct the original matrix\n", + "A_svd = U@S@V_T\n", + "# Visualize \n", + "plt.bar(range(len(s)), s);\n", + "plt.xlabel(\"Singular value rank\")\n", + "plt.ylabel(\"Singular value\")\n", + "plt.title(\"Bar graph of singular values\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6H_C9WhFACm4" + }, + "source": [ + "The `tf.einsum` function can be used to directly compute the matrix reconstruction from the outputs of `tf.linalg.svd`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TPE6QeMtADUn" + }, + "outputs": [], + "source": [ + "A_svd = tf.einsum('s,us,vs -> uv',s,U,V)\n", + "print('\\nReconstructed Matrix, A_svd', A_svd)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x1m6JIsM9DLP" + }, + "source": [ + "## Low rank approximation with the SVD\n", + "\n", + "The rank of a matrix, ${\\mathrm{A}}$, is determined by the dimension of the vector space spanned by its columns. \n", + "The SVD can be used to approximate a matrix with a lower rank, which ultimately decreases the dimensionality of data required to store the information represented by the matrix.\n", + "\n", + "The rank-r approximation of ${\\mathrm{A}}$ in terms of the SVD is defined by the formula:\n", + "\n", + "$${\\mathrm{A_r}} = {\\mathrm{U_r}} \\Sigma_r {\\mathrm{V_r}}^T$$\n", + "\n", + "where\n", + "\n", + "* $\\underset{m \\times r}{\\mathrm{U_r}}$: matrix consisting of the first $r$ columns of ${\\mathrm{U}}$\n", + "* $\\underset{r \\times r}{\\Sigma_r}$: diagonal matrix consisting of the first $r$ singular values in $\\Sigma$\n", + "* $\\underset{r \\times n}{\\mathrm{V_r}}^T$: matrix consisting of the first $r$ rows of ${\\mathrm{V}}^T$" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nJWMJu36QyUV" + }, + "source": [ + ">![svd_approx](http://tensorflow.org/images/core/svd_approx.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TkiVUxeaQybq" + }, + "source": [ + "Start by writing a function to compute the rank-r approximation of a given matrix. This low-rank approximation procedure is used for image compression; therefore, it is also helpful to compute the physical data sizes for each approximation. For simplicity, assume that data size for an rank-r approximated matrix is equal to the total number of elements required to compute the approximation. Next, write a function to visualize the original matrix, $\\mathrm{A}$ its rank-r approximation, $\\mathrm{A}_r$ and the error matrix, $|\\mathrm{A} - \\mathrm{A}_r|$." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2oY3pMPagJrO" + }, + "outputs": [], + "source": [ + "def rank_r_approx(s, U, V, r, verbose=False):\n", + " # Compute the matrices necessary for a rank-r approximation\n", + " s_r, U_r, V_r = s[..., :r], U[..., :, :r], V[..., :, :r] # ... implies any number of extra batch axes\n", + " # Compute the low-rank approximation and its size\n", + " A_r = tf.einsum('...s,...us,...vs->...uv',s_r,U_r,V_r)\n", + " A_r_size = tf.size(U_r) + tf.size(s_r) + tf.size(V_r)\n", + " if verbose:\n", + " print(f\"Approximation Size: {A_r_size}\")\n", + " return A_r, A_r_size\n", + "\n", + "def viz_approx(A, A_r):\n", + " # Plot A, A_r, and A - A_r\n", + " vmin, vmax = 0, tf.reduce_max(A)\n", + " fig, ax = plt.subplots(1,3)\n", + " mats = [A, A_r, abs(A - A_r)]\n", + " titles = ['Original A', 'Approximated A_r', 'Error |A - A_r|']\n", + " for i, (mat, title) in enumerate(zip(mats, titles)):\n", + " ax[i].pcolormesh(mat, vmin=vmin, vmax=vmax)\n", + " ax[i].set_title(title)\n", + " ax[i].axis('off')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "O3ZRkYCkX2FQ" + }, + "outputs": [], + "source": [ + "print(f\"Original Size of A: {tf.size(A)}\")\n", + "s, U, V = tf.linalg.svd(A)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "S1DR83VMX4cM" + }, + "outputs": [], + "source": [ + "# Rank-15 approximation\n", + "A_15, A_15_size = rank_r_approx(s, U, V, 15, verbose = True)\n", + "viz_approx(A, A_15)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KgFT70XFX57E" + }, + "outputs": [], + "source": [ + "# Rank-3 approximation\n", + "A_3, A_3_size = rank_r_approx(s, U, V, 3, verbose = True)\n", + "viz_approx(A, A_3)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DS4XoSlTJgX0" + }, + "source": [ + "As expected, using lower ranks results in less-accurate approximations. However, the quality of these low-rank approximations are often good enough in real world scenarios. Also note that the main goal of low-rank approximation with SVD \n", + "is to reduce the dimensionality of the data but not to reduce the disk space of the data itself. However, as the input matrices become higher-dimensional, many low-rank approximations also end up benefiting from reduced data size. This reduction benefit is why the process is applicable for image compression problems." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IhsaiOnnZs6M" + }, + "source": [ + "## Image loading\n", + "\n", + "The following image is available on the [Imagen](https://imagen.research.google/) home page. Imagen is a text-to-image diffusion model developed by Google Research's Brain team. An AI created this image based on the prompt: \"A photo of a Corgi dog riding a bike in Times Square. It is wearing sunglasses and a beach hat.\" How cool is that! You can also change the url below to any .jpg link to load in a custom image of choice. \n", + "\n", + "Start by reading in and visualizing the image. After reading a JPEG file, Matplotlib outputs a matrix, ${\\mathrm{I}}$, of shape $(m \\times n \\times 3)$ which represents a 2-dimensional image with 3 color channels for red, green and blue respectively." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OVsZOQUAZ2C7" + }, + "outputs": [], + "source": [ + "img_link = \"https://imagen.research.google/main_gallery_images/a-photo-of-a-corgi-dog-riding-a-bike-in-times-square.jpg\"\n", + "img_path = requests.get(img_link, stream=True).raw\n", + "I = imread(img_path, 0)\n", + "print(\"Input Image Shape:\", I.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Qvs7uftcZ54x" + }, + "outputs": [], + "source": [ + "def show_img(I):\n", + " # Display the image in matplotlib\n", + " img = plt.imshow(I)\n", + " plt.axis('off')\n", + " return" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZbesXO3HZ6Qs" + }, + "outputs": [], + "source": [ + "show_img(I)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tdnUBVg_JoOa" + }, + "source": [ + "## The image compression algorithm\n", + "\n", + "Now, use the SVD to compute low-rank approximations of the sample image. Recall that the image is of shape $(1024 \\times 1024 \\times 3)$ and that the theory SVD only applies for 2-dimensional matrices. This means that the sample image has to be batched into 3 equal-size matrices that correspond to each of the 3 color channels. This can be done so by transposing the matrix to be of shape $(3 \\times 1024 \\times 1024)$. In order to clearly visualize the approximation error, rescale the RGB values of the image from $[0,255]$ to $[0,1]$. Remember to clip the approximated values to fall within this interval before visualizing them. The `tf.clip_by_value` function is useful for this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "i7DDp0h7oSIk" + }, + "outputs": [], + "source": [ + "def compress_image(I, r, verbose=False):\n", + " # Compress an image with the SVD given a rank \n", + " I_size = tf.size(I)\n", + " print(f\"Original size of image: {I_size}\")\n", + " # Compute SVD of image\n", + " I = tf.convert_to_tensor(I)/255\n", + " I_batched = tf.transpose(I, [2, 0, 1]) # einops.rearrange(I, 'h w c -> c h w')\n", + " s, U, V = tf.linalg.svd(I_batched)\n", + " # Compute low-rank approximation of image across each RGB channel\n", + " I_r, I_r_size = rank_r_approx(s, U, V, r)\n", + " I_r = tf.transpose(I_r, [1, 2, 0]) # einops.rearrange(I_r, 'c h w -> h w c')\n", + " I_r_prop = (I_r_size / I_size)\n", + " if verbose:\n", + " # Display compressed image and attributes\n", + " print(f\"Number of singular values used in compression: {r}\")\n", + " print(f\"Compressed image size: {I_r_size}\")\n", + " print(f\"Proportion of original size: {I_r_prop:.3f}\")\n", + " ax_1 = plt.subplot(1,2,1)\n", + " show_img(tf.clip_by_value(I_r,0.,1.))\n", + " ax_1.set_title(\"Approximated image\")\n", + " ax_2 = plt.subplot(1,2,2)\n", + " show_img(tf.clip_by_value(0.5+abs(I-I_r),0.,1.))\n", + " ax_2.set_title(\"Error\")\n", + " return I_r, I_r_prop" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RGQ_rTyKDX9F" + }, + "source": [ + "Now, compute rank-r approximations for the following ranks : 100, 50, 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7GlKkVLGDjre" + }, + "outputs": [], + "source": [ + "I_100, I_100_prop = compress_image(I, 100, verbose=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XdvUkF5_E75D" + }, + "outputs": [], + "source": [ + "I_50, I_50_prop = compress_image(I, 50, verbose=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MsCNZ8416Sbk" + }, + "outputs": [], + "source": [ + "I_10, I_10_prop = compress_image(I, 10, verbose=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RfYYBhcuNkvH" + }, + "source": [ + "## Evaluating approximations\n", + "\n", + "There are a variety of interesting methods to measure the effectiveness and have more control over matrix approximations." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D2Lotde9Zg7v" + }, + "source": [ + "### Compression factor vs rank\n", + "\n", + "For each of the above approximations, observe how the data sizes change with the rank." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "O1ariNQe6Wbl" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(11,6))\n", + "plt.plot([100, 50, 10], [I_100_prop, I_50_prop, I_10_prop])\n", + "plt.xlabel(\"Rank\")\n", + "plt.ylabel(\"Proportion of original image size\")\n", + "plt.title(\"Compression factor vs rank\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dvHcLRj2QoDg" + }, + "source": [ + "Based on this plot, there is a linear relationship between an approximated image's compression factor and its rank. To explore this further, recall that the data size of an approximated matrix, ${\\mathrm{A}}_r$, is defined as the total number of elements required for its computation. The following equations can be used to find the relationship between compression factor and rank:\n", + "\n", + "$$x = (m \\times r) + r + (r \\times n) = r \\times (m + n + 1)$$\n", + "\n", + "$$c = \\large \\frac{x}{y} = \\frac{r \\times (m + n + 1)}{m \\times n}$$\n", + "\n", + "where\n", + "\n", + "* $x$: size of ${\\mathrm{A_r}}$\n", + "* $y$: size of ${\\mathrm{A}}$\n", + "* $c = \\frac{x}{y}$: compression factor\n", + "* $r$: rank of the approximation\n", + "* $m$ and $n$: row and column dimensions of ${\\mathrm{A}}$\n", + "\n", + "In order to find the rank, $r$, that is necessary to compress an image to a desired factor, $c$, the above equation can be rearranged to solve for $r$:\n", + "\n", + "$$r = ⌊{\\large\\frac{c \\times m \\times n}{m + n + 1}}⌋$$\n", + "\n", + "Note that this formula is independent of the color channel dimension since each of the RGB approximations do not affect each other. Now, write a function to compress an input image given a desired compression factor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "viVO-I60QynI" + }, + "outputs": [], + "source": [ + "def compress_image_with_factor(I, compression_factor, verbose=False):\n", + " # Returns a compressed image based on a desired compression factor\n", + " m,n,o = I.shape\n", + " r = int((compression_factor * m * n)/(m + n + 1))\n", + " I_r, I_r_prop = compress_image(I, r, verbose=verbose)\n", + " return I_r" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gWSv58J6LSRQ" + }, + "source": [ + "Compress an image to 15% of its original size." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HVeeloIwQ1b6" + }, + "outputs": [], + "source": [ + "compression_factor = 0.15\n", + "I_r_img = compress_image_with_factor(I, compression_factor, verbose=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LkeRyms7jZMd" + }, + "source": [ + "### Cumulative sum of singular values\n", + "\n", + "The cumulative sum of singular values can be a useful indicator for the amount of energy captured by a rank-r approximation. Visualize the RGB-averaged cumulative proportion of singular values in the sample image. The `tf.cumsum` function can be useful for this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CteJ6VbKlndu" + }, + "outputs": [], + "source": [ + "def viz_energy(I):\n", + " # Visualize the energy captured based on rank\n", + " # Computing SVD\n", + " I = tf.convert_to_tensor(I)/255\n", + " I_batched = tf.transpose(I, [2, 0, 1]) \n", + " s, U, V = tf.linalg.svd(I_batched)\n", + " # Plotting average proportion across RGB channels \n", + " props_rgb = tf.map_fn(lambda x: tf.cumsum(x)/tf.reduce_sum(x), s)\n", + " props_rgb_mean = tf.reduce_mean(props_rgb, axis=0)\n", + " plt.figure(figsize=(11,6))\n", + " plt.plot(range(len(I)), props_rgb_mean, color='k')\n", + " plt.xlabel(\"Rank / singular value number\")\n", + " plt.ylabel(\"Cumulative proportion of singular values\")\n", + " plt.title(\"RGB-averaged proportion of energy captured by the first 'r' singular values\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Vl9PKow-GgCp" + }, + "outputs": [], + "source": [ + "viz_energy(I)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vQtwimKuQP19" + }, + "source": [ + "It looks like over 90% of the energy in this image is captured within the first 100 singular values. Now, write a function to compress an input image given a desired energy retention factor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fum5Cvm7R5vH" + }, + "outputs": [], + "source": [ + "def compress_image_with_energy(I, energy_factor, verbose=False):\n", + " # Returns a compressed image based on a desired energy factor\n", + " # Computing SVD\n", + " I_rescaled = tf.convert_to_tensor(I)/255\n", + " I_batched = tf.transpose(I_rescaled, [2, 0, 1]) \n", + " s, U, V = tf.linalg.svd(I_batched)\n", + " # Extracting singular values\n", + " props_rgb = tf.map_fn(lambda x: tf.cumsum(x)/tf.reduce_sum(x), s)\n", + " props_rgb_mean = tf.reduce_mean(props_rgb, axis=0)\n", + " # Find closest r that corresponds to the energy factor\n", + " r = tf.argmin(tf.abs(props_rgb_mean - energy_factor)) + 1\n", + " actual_ef = props_rgb_mean[r]\n", + " I_r, I_r_prop = compress_image(I, r, verbose=verbose)\n", + " print(f\"Proportion of energy captured by the first {r} singular values: {actual_ef:.3f}\")\n", + " return I_r" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Y_rChG0OLby1" + }, + "source": [ + "Compress an image to retain 75% of its energy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xDXBaZQ4c5jF" + }, + "outputs": [], + "source": [ + "energy_factor = 0.75\n", + "I_r_img = compress_image_with_energy(I, energy_factor, verbose=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2tmqTW0CYX-v" + }, + "source": [ + "### Error and singular values\n", + "\n", + "There is also an interesting relationship between the approximation error and the singular values. It turns out that the squared Frobenius norm of the approximation is equal to the sum of the squares of its singular values that were left out:\n", + "\n", + "$${||A - A_r||}^2 = \\sum_{i=r+1}^{R}σ_i^2$$\n", + "\n", + "Test out this relationship with a rank-10 approximation of the example matrix in the beginning of this tutorial." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hctOvN8BckiS" + }, + "outputs": [], + "source": [ + "s, U, V = tf.linalg.svd(A)\n", + "A_10, A_10_size = rank_r_approx(s, U, V, 10)\n", + "squared_norm = tf.norm(A - A_10)**2\n", + "s_squared_sum = tf.reduce_sum(s[10:]**2)\n", + "print(f\"Squared Frobenius norm: {squared_norm:.3f}\")\n", + "print(f\"Sum of squared singular values left out: {s_squared_sum:.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vgGQuV-yqYZH" + }, + "source": [ + "## Conclusion\n", + "\n", + "This notebook introduced the process of implementing the singular value decomposition with TensorFlow and applying it to write an image compression algorithm. Here are a few more tips that may help:\n", + "\n", + "* The [TensorFlow Core APIs](https://www.tensorflow.org/guide/core) can be utilized for a variety of high-performance scientific computing use cases.\n", + "* To learn more about TensorFlow's linear algebra functionalities, visit the docs for the [linalg module](https://www.tensorflow.org/api_docs/python/tf/linalg).\n", + "* The SVD can also be applied to build [recommendation systems](https://developers.google.com/machine-learning/recommendation/labs/movie-rec-programming-exercise).\n", + "\n", + "\n", + "For more examples of using the TensorFlow Core APIs, check out the [guide](https://www.tensorflow.org/guide/core). If you want learn more about loading and preparing data, see the tutorials on [image data loading](https://www.tensorflow.org/tutorials/load_data/images) or [CSV data loading](https://www.tensorflow.org/tutorials/load_data/csv)." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "matrix_core.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/core/mlp_core.ipynb b/site/en/guide/core/mlp_core.ipynb new file mode 100644 index 00000000000..a5975c20c6e --- /dev/null +++ b/site/en/guide/core/mlp_core.ipynb @@ -0,0 +1,964 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "FhGuhbZ6M5tl" + }, + "source": [ + "##### Copyright 2022 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "AwOEIRJC6Une" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EIdT9iu_Z4Rb" + }, + "source": [ + "# Multilayer perceptrons for digit recognition with Core APIs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bBIlTPscrIT9" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SjAxxRpBzVYg" + }, + "source": [ + "This notebook uses the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core) to build an end-to-end machine learning workflow for handwritten digit classification with [multilayer perceptrons](https://developers.google.com/machine-learning/crash-course/introduction-to-neural-networks/anatomy) and the [MNIST dataset](http://yann.lecun.com/exdb/mnist). Visit the [Core APIs overview](https://www.tensorflow.org/guide/core) to learn more about TensorFlow Core and its intended use cases." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GHVMVIFHSzl1" + }, + "source": [ + "## Multilayer perceptron (MLP) overview\n", + "\n", + "The Multilayer Perceptron (MLP) is a type of feedforward neural network used to approach [multiclass classification](https://developers.google.com/machine-learning/crash-course/multi-class-neural-networks/video-lecture) problems. Before building an MLP, it is crucial to understand the concepts of perceptrons, layers, and activation functions.\n", + "\n", + "Multilayer Perceptrons are made up of functional units called perceptrons. The equation of a perceptron is as follows:\n", + "\n", + "$$Z = \\vec{w}⋅\\mathrm{X} + b$$\n", + "\n", + "where\n", + "\n", + "* $Z$: perceptron output\n", + "* $\\mathrm{X}$: feature matrix\n", + "* $\\vec{w}$: weight vector\n", + "* $b$: bias\n", + "\n", + "When these perceptrons are stacked, they form structures called dense layers which can then be connected to build a neural network. A dense layer's equation is similar to that of a perceptron's but uses a weight matrix and a bias vector instead: \n", + "\n", + "$$Z = \\mathrm{W}⋅\\mathrm{X} + \\vec{b}$$\n", + "\n", + "where\n", + "\n", + "* $Z$: dense layer output\n", + "* $\\mathrm{X}$: feature matrix\n", + "* $\\mathrm{W}$: weight matrix\n", + "* $\\vec{b}$: bias vector\n", + "\n", + "\n", + "In an MLP, multiple dense layers are connected in such a way that the outputs of one layer are fully connected to the inputs of the next layer. Adding non-linear activation functions to the outputs of dense layers can help the MLP classifier learn complex decision boundaries and generalize well to unseen data." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nchsZfwEVtVs" + }, + "source": [ + "## Setup\n", + "\n", + "Import TensorFlow, [pandas](https://pandas.pydata.org), [Matplotlib](https://matplotlib.org) and [seaborn](https://seaborn.pydata.org) to get started." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mSfgqmwBagw_" + }, + "outputs": [], + "source": [ + "# Use seaborn for countplot.\n", + "!pip install -q seaborn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1rRo8oNqZ-Rj" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib\n", + "from matplotlib import pyplot as plt\n", + "import seaborn as sns\n", + "import tempfile\n", + "import os\n", + "# Preset Matplotlib figure sizes.\n", + "matplotlib.rcParams['figure.figsize'] = [9, 6]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9xQKvCJ85kCQ" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_datasets as tfds\n", + "print(tf.__version__)\n", + "# Set random seed for reproducible results \n", + "tf.random.set_seed(22)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F_72b0LCNbjx" + }, + "source": [ + "## Load the data\n", + "\n", + "This tutorial uses the [MNIST dataset](http://yann.lecun.com/exdb/mnist), and demonstrates how to build an MLP model that can classify handwritten digits. The dataset is available from [TensorFlow Datasets](https://www.tensorflow.org/datasets/catalog/mnist).\n", + "\n", + "Split the MNIST dataset into training, validation, and testing sets. The validation set can be used to gauge the model's generalizability during training so that the test set can serve as a final unbiased estimator for the model's performance.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Uiuh0B098_3p" + }, + "outputs": [], + "source": [ + "train_data, val_data, test_data = tfds.load(\"mnist\", \n", + " split=['train[10000:]', 'train[0:10000]', 'test'],\n", + " batch_size=128, as_supervised=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "X9uN3Lf6ANtn" + }, + "source": [ + "The MNIST dataset consists of handwritten digits and their corresponding true labels. Visualize a couple of examples below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6V8hSqJ7AMjQ" + }, + "outputs": [], + "source": [ + "x_viz, y_viz = tfds.load(\"mnist\", split=['train[:1500]'], batch_size=-1, as_supervised=True)[0]\n", + "x_viz = tf.squeeze(x_viz, axis=3)\n", + "\n", + "for i in range(9):\n", + " plt.subplot(3,3,1+i)\n", + " plt.axis('off')\n", + " plt.imshow(x_viz[i], cmap='gray')\n", + " plt.title(f\"True Label: {y_viz[i]}\")\n", + " plt.subplots_adjust(hspace=.5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bRald9dSE4qS" + }, + "source": [ + "Also review the distribution of digits in the training data to verify that each class is well represented in the dataset.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Rj3K4XgQE7qR" + }, + "outputs": [], + "source": [ + "sns.countplot(x=y_viz.numpy());\n", + "plt.xlabel('Digits')\n", + "plt.title(\"MNIST Digit Distribution\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x_Wt4bDx_BRV" + }, + "source": [ + "## Preprocess the data\n", + "\n", + "First, reshape the feature matrices to be 2-dimensional by flattening the images. Next, rescale the data so that the pixel values of [0,255] fit into the range of [0,1]. This step ensures that the input pixels have similar distributions and helps with training convergence." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JSyCm2V2_AvI" + }, + "outputs": [], + "source": [ + "def preprocess(x, y):\n", + " # Reshaping the data\n", + " x = tf.reshape(x, shape=[-1, 784])\n", + " # Rescaling the data\n", + " x = x/255\n", + " return x, y\n", + "\n", + "train_data, val_data = train_data.map(preprocess), val_data.map(preprocess)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6o3CrycBXA2s" + }, + "source": [ + "## Build the MLP \n", + "\n", + "Start by visualizing the [ReLU](https://developers.google.com/machine-learning/glossary#ReLU) and [Softmax](https://developers.google.com/machine-learning/glossary#softmax) activation functions. Both functions are available in `tf.nn.relu` and `tf.nn.softmax` respectively. The ReLU is a non-linear activation function that outputs the input if it is positive and 0 otherwise: \n", + "\n", + "$$\\text{ReLU}(X) = max(0, X)$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hYunzt3UyT9G" + }, + "outputs": [], + "source": [ + "x = tf.linspace(-2, 2, 201)\n", + "x = tf.cast(x, tf.float32)\n", + "plt.plot(x, tf.nn.relu(x));\n", + "plt.xlabel('x')\n", + "plt.ylabel('ReLU(x)')\n", + "plt.title('ReLU activation function');" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fuGrM9jMwsRM" + }, + "source": [ + "The softmax activation function is a normalized exponential function that converts $m$ real numbers into a probability distribution with $m$ outcomes/classes. This is useful for predicting class probabilities from a neural network's output:\n", + "\n", + "$$\\text{Softmax}(X) = \\frac{e^{X}}{\\sum_{i=1}^{m}e^{X_i}}$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fVM8pvhWwuwI" + }, + "outputs": [], + "source": [ + "x = tf.linspace(-4, 4, 201)\n", + "x = tf.cast(x, tf.float32)\n", + "plt.plot(x, tf.nn.softmax(x, axis=0));\n", + "plt.xlabel('x')\n", + "plt.ylabel('Softmax(x)')\n", + "plt.title('Softmax activation function');" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OHW6Yvg2yS6H" + }, + "source": [ + "### The dense layer\n", + "\n", + "Create a class for the dense layer. By definition, the outputs of one layer are fully connected to the inputs of the next layer in an MLP. Therefore, the input dimension for a dense layer can be inferred based on the output dimension of its previous layer and does not need to be specified upfront during its initialization. The weights should also be initialized properly to prevent activation outputs from becoming too large or small. One of the most popular weight initialization methods is the Xavier scheme, where each element of the weight matrix is sampled in the following manner:\n", + "\n", + "$$W_{ij} \\sim \\text{Uniform}(-\\frac{\\sqrt{6}}{\\sqrt{n + m}},\\frac{\\sqrt{6}}{\\sqrt{n + m}})$$\n", + "\n", + "The bias vector can be initialized to zeros." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "re1SSFyBdMrS" + }, + "outputs": [], + "source": [ + "def xavier_init(shape):\n", + " # Computes the xavier initialization values for a weight matrix\n", + " in_dim, out_dim = shape\n", + " xavier_lim = tf.sqrt(6.)/tf.sqrt(tf.cast(in_dim + out_dim, tf.float32))\n", + " weight_vals = tf.random.uniform(shape=(in_dim, out_dim), \n", + " minval=-xavier_lim, maxval=xavier_lim, seed=22)\n", + " return weight_vals" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "otDFX4u6e6ml" + }, + "source": [ + "The Xavier initialization method can also be implemented with `tf.keras.initializers.GlorotUniform`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IM0yJos25FG5" + }, + "outputs": [], + "source": [ + "class DenseLayer(tf.Module):\n", + "\n", + " def __init__(self, out_dim, weight_init=xavier_init, activation=tf.identity):\n", + " # Initialize the dimensions and activation functions\n", + " self.out_dim = out_dim\n", + " self.weight_init = weight_init\n", + " self.activation = activation\n", + " self.built = False\n", + "\n", + " def __call__(self, x):\n", + " if not self.built:\n", + " # Infer the input dimension based on first call\n", + " self.in_dim = x.shape[1]\n", + " # Initialize the weights and biases\n", + " self.w = tf.Variable(self.weight_init(shape=(self.in_dim, self.out_dim)))\n", + " self.b = tf.Variable(tf.zeros(shape=(self.out_dim,)))\n", + " self.built = True\n", + " # Compute the forward pass\n", + " z = tf.add(tf.matmul(x, self.w), self.b)\n", + " return self.activation(z)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "X-7MzpjgyHg6" + }, + "source": [ + "Next, build a class for the MLP model that executes layers sequentially.\n", + "Remember that the model variables are only available after the first sequence of dense layer calls due to dimension inference." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6XisRWiCyHAb" + }, + "outputs": [], + "source": [ + "class MLP(tf.Module):\n", + "\n", + " def __init__(self, layers):\n", + " self.layers = layers\n", + " \n", + " @tf.function\n", + " def __call__(self, x, preds=False): \n", + " # Execute the model's layers sequentially\n", + " for layer in self.layers:\n", + " x = layer(x)\n", + " return x" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "luXKup-43nd7" + }, + "source": [ + "Initialize a MLP model with the following architecture:\n", + "\n", + "- Forward Pass: ReLU(784 x 700) x ReLU(700 x 500) x Softmax(500 x 10)\n", + "\n", + "The softmax activation function does not need to be applied by the MLP. It is computed separately in the loss and prediction functions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VmlACuki3oPi" + }, + "outputs": [], + "source": [ + "hidden_layer_1_size = 700\n", + "hidden_layer_2_size = 500\n", + "output_size = 10\n", + "\n", + "mlp_model = MLP([\n", + " DenseLayer(out_dim=hidden_layer_1_size, activation=tf.nn.relu),\n", + " DenseLayer(out_dim=hidden_layer_2_size, activation=tf.nn.relu),\n", + " DenseLayer(out_dim=output_size)])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tyBATDoRmDkg" + }, + "source": [ + "### Define the loss function\n", + "\n", + "The cross-entropy loss function is a great choice for multiclass classification problems since it measures the negative-log-likelihood of the data according to the model's probability predictions. The higher the probability assigned to the true class, the lower the loss. The equation for the cross-entropy loss is as follows:\n", + "\n", + "$$L = -\\frac{1}{n}\\sum_{i=1}^{n}\\sum_{i=j}^{n} {y_j}^{[i]}⋅\\log(\\hat{{y_j}}^{[i]})$$\n", + "\n", + "where\n", + "\n", + "* $\\underset{n\\times m}{\\hat{y}}$: a matrix of predicted class distributions\n", + "* $\\underset{n\\times m}{y}$: a one hot encoded matrix of true classes\n", + "\n", + "The `tf.nn.sparse_softmax_cross_entropy_with_logits` function can be used to compute the cross-entropy loss. This function does not require the model's last layer to apply the softmax activation function nor does it require the class labels to be one hot encoded" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rskOYA7FVCwg" + }, + "outputs": [], + "source": [ + "def cross_entropy_loss(y_pred, y):\n", + " # Compute cross entropy loss with a sparse operation\n", + " sparse_ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=y_pred)\n", + " return tf.reduce_mean(sparse_ce)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BvWxED1km8jh" + }, + "source": [ + "Write a basic accuracy function that calculates the proportion of correct classifications during training. In order to generate class predictions from softmax outputs, return the index that corresponds to the largest class probability. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jPJMWx2UgiBm" + }, + "outputs": [], + "source": [ + "def accuracy(y_pred, y):\n", + " # Compute accuracy after extracting class predictions\n", + " class_preds = tf.argmax(tf.nn.softmax(y_pred), axis=1)\n", + " is_equal = tf.equal(y, class_preds)\n", + " return tf.reduce_mean(tf.cast(is_equal, tf.float32))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JSiNRhTOnKZr" + }, + "source": [ + "### Train the model\n", + "\n", + "Using an optimizer can result in significantly faster convergence compared to standard gradient descent. The Adam optimizer is implemented below. Visit the [Optimizers](https://www.tensorflow.org/guide/core/optimizers_core) guide to learn more about designing custom optimizers with TensorFlow Core." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iGIBDk3cAv6a" + }, + "outputs": [], + "source": [ + "class Adam:\n", + "\n", + " def __init__(self, learning_rate=1e-3, beta_1=0.9, beta_2=0.999, ep=1e-7):\n", + " # Initialize optimizer parameters and variable slots\n", + " self.beta_1 = beta_1\n", + " self.beta_2 = beta_2\n", + " self.learning_rate = learning_rate\n", + " self.ep = ep\n", + " self.t = 1.\n", + " self.v_dvar, self.s_dvar = [], []\n", + " self.built = False\n", + " \n", + " def apply_gradients(self, grads, vars):\n", + " # Initialize variables on the first call\n", + " if not self.built:\n", + " for var in vars:\n", + " v = tf.Variable(tf.zeros(shape=var.shape))\n", + " s = tf.Variable(tf.zeros(shape=var.shape))\n", + " self.v_dvar.append(v)\n", + " self.s_dvar.append(s)\n", + " self.built = True\n", + " # Update the model variables given their gradients\n", + " for i, (d_var, var) in enumerate(zip(grads, vars)):\n", + " self.v_dvar[i].assign(self.beta_1*self.v_dvar[i] + (1-self.beta_1)*d_var)\n", + " self.s_dvar[i].assign(self.beta_2*self.s_dvar[i] + (1-self.beta_2)*tf.square(d_var))\n", + " v_dvar_bc = self.v_dvar[i]/(1-(self.beta_1**self.t))\n", + " s_dvar_bc = self.s_dvar[i]/(1-(self.beta_2**self.t))\n", + " var.assign_sub(self.learning_rate*(v_dvar_bc/(tf.sqrt(s_dvar_bc) + self.ep)))\n", + " self.t += 1.\n", + " return " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "osEK3rqpYfKd" + }, + "source": [ + "Now, write a custom training loop that updates the MLP parameters with mini-batch gradient descent. Using mini-batches for training provides both memory efficiency and faster convergence." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CJLeY2ao1aw6" + }, + "outputs": [], + "source": [ + "def train_step(x_batch, y_batch, loss, acc, model, optimizer):\n", + " # Update the model state given a batch of data\n", + " with tf.GradientTape() as tape:\n", + " y_pred = model(x_batch)\n", + " batch_loss = loss(y_pred, y_batch)\n", + " batch_acc = acc(y_pred, y_batch)\n", + " grads = tape.gradient(batch_loss, model.variables)\n", + " optimizer.apply_gradients(grads, model.variables)\n", + " return batch_loss, batch_acc\n", + "\n", + "def val_step(x_batch, y_batch, loss, acc, model):\n", + " # Evaluate the model on given a batch of validation data\n", + " y_pred = model(x_batch)\n", + " batch_loss = loss(y_pred, y_batch)\n", + " batch_acc = acc(y_pred, y_batch)\n", + " return batch_loss, batch_acc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oC85kuZgmh3q" + }, + "outputs": [], + "source": [ + "def train_model(mlp, train_data, val_data, loss, acc, optimizer, epochs):\n", + " # Initialize data structures\n", + " train_losses, train_accs = [], []\n", + " val_losses, val_accs = [], []\n", + "\n", + " # Format training loop and begin training\n", + " for epoch in range(epochs):\n", + " batch_losses_train, batch_accs_train = [], []\n", + " batch_losses_val, batch_accs_val = [], []\n", + "\n", + " # Iterate over the training data\n", + " for x_batch, y_batch in train_data:\n", + " # Compute gradients and update the model's parameters\n", + " batch_loss, batch_acc = train_step(x_batch, y_batch, loss, acc, mlp, optimizer)\n", + " # Keep track of batch-level training performance\n", + " batch_losses_train.append(batch_loss)\n", + " batch_accs_train.append(batch_acc)\n", + "\n", + " # Iterate over the validation data\n", + " for x_batch, y_batch in val_data:\n", + " batch_loss, batch_acc = val_step(x_batch, y_batch, loss, acc, mlp)\n", + " batch_losses_val.append(batch_loss)\n", + " batch_accs_val.append(batch_acc)\n", + "\n", + " # Keep track of epoch-level model performance\n", + " train_loss, train_acc = tf.reduce_mean(batch_losses_train), tf.reduce_mean(batch_accs_train)\n", + " val_loss, val_acc = tf.reduce_mean(batch_losses_val), tf.reduce_mean(batch_accs_val)\n", + " train_losses.append(train_loss)\n", + " train_accs.append(train_acc)\n", + " val_losses.append(val_loss)\n", + " val_accs.append(val_acc)\n", + " print(f\"Epoch: {epoch}\")\n", + " print(f\"Training loss: {train_loss:.3f}, Training accuracy: {train_acc:.3f}\")\n", + " print(f\"Validation loss: {val_loss:.3f}, Validation accuracy: {val_acc:.3f}\")\n", + " return train_losses, train_accs, val_losses, val_accs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FvbfXlN5lwwB" + }, + "source": [ + "Train the MLP model for 10 epochs with batch size of 128. Hardware accelerators like GPUs or TPUs can also help speed up training time. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zPlT8QfxptYl" + }, + "outputs": [], + "source": [ + "train_losses, train_accs, val_losses, val_accs = train_model(mlp_model, train_data, val_data, \n", + " loss=cross_entropy_loss, acc=accuracy,\n", + " optimizer=Adam(), epochs=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j_RVmt43G12R" + }, + "source": [ + "### Performance evaluation\n", + "\n", + "Start by writing a plotting function to visualize the model's loss and accuracy during training. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VXTCYVtNDjAM" + }, + "outputs": [], + "source": [ + "def plot_metrics(train_metric, val_metric, metric_type):\n", + " # Visualize metrics vs training Epochs\n", + " plt.figure()\n", + " plt.plot(range(len(train_metric)), train_metric, label = f\"Training {metric_type}\")\n", + " plt.plot(range(len(val_metric)), val_metric, label = f\"Validation {metric_type}\")\n", + " plt.xlabel(\"Epochs\")\n", + " plt.ylabel(metric_type)\n", + " plt.legend()\n", + " plt.title(f\"{metric_type} vs Training epochs\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DC-qIvZbHo0G" + }, + "outputs": [], + "source": [ + "plot_metrics(train_losses, val_losses, \"cross entropy loss\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "P-w2xk2PIDve" + }, + "outputs": [], + "source": [ + "plot_metrics(train_accs, val_accs, \"accuracy\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tbrJJaFrD_XR" + }, + "source": [ + "## Save and load the model\n", + "\n", + "Start by making an export module that takes in raw data and performs the following operations:\n", + "- Data preprocessing \n", + "- Probability prediction\n", + "- Class prediction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1sszfWuJJZoo" + }, + "outputs": [], + "source": [ + "class ExportModule(tf.Module):\n", + " def __init__(self, model, preprocess, class_pred):\n", + " # Initialize pre and postprocessing functions\n", + " self.model = model\n", + " self.preprocess = preprocess\n", + " self.class_pred = class_pred\n", + "\n", + " @tf.function(input_signature=[tf.TensorSpec(shape=[None, None, None, None], dtype=tf.uint8)]) \n", + " def __call__(self, x):\n", + " # Run the ExportModule for new data points\n", + " x = self.preprocess(x)\n", + " y = self.model(x)\n", + " y = self.class_pred(y)\n", + " return y " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "p8x6gjTDVi5d" + }, + "outputs": [], + "source": [ + "def preprocess_test(x):\n", + " # The export module takes in unprocessed and unlabeled data\n", + " x = tf.reshape(x, shape=[-1, 784])\n", + " x = x/255\n", + " return x\n", + "\n", + "def class_pred_test(y):\n", + " # Generate class predictions from MLP output\n", + " return tf.argmax(tf.nn.softmax(y), axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vu9H5STrJzdo" + }, + "source": [ + "This export module can now be saved with the `tf.saved_model.save` function. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fN9pPBQTKTe3" + }, + "outputs": [], + "source": [ + "mlp_model_export = ExportModule(model=mlp_model,\n", + " preprocess=preprocess_test,\n", + " class_pred=class_pred_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "idS7rQKbKwRS" + }, + "outputs": [], + "source": [ + "models = tempfile.mkdtemp()\n", + "save_path = os.path.join(models, 'mlp_model_export')\n", + "tf.saved_model.save(mlp_model_export, save_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_zZxO8iqBGZ-" + }, + "source": [ + "Load the saved model with `tf.saved_model.load` and examine its performance on the unseen test data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "W5cwBTUqxldW" + }, + "outputs": [], + "source": [ + "mlp_loaded = tf.saved_model.load(save_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bmv0u6j_b5OC" + }, + "outputs": [], + "source": [ + "def accuracy_score(y_pred, y):\n", + " # Generic accuracy function\n", + " is_equal = tf.equal(y_pred, y)\n", + " return tf.reduce_mean(tf.cast(is_equal, tf.float32))\n", + "\n", + "x_test, y_test = tfds.load(\"mnist\", split=['test'], batch_size=-1, as_supervised=True)[0]\n", + "test_classes = mlp_loaded(x_test)\n", + "test_acc = accuracy_score(test_classes, y_test)\n", + "print(f\"Test Accuracy: {test_acc:.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j5t9vgv_ciQ_" + }, + "source": [ + "The model does a great job of classifying handwritten digits in the training dataset and also generalizes well to unseen data. Now, examine the model's class-wise accuracy to ensure good performance for each digit. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UD8YiC1Vfeyp" + }, + "outputs": [], + "source": [ + "print(\"Accuracy breakdown by digit:\")\n", + "print(\"---------------------------\")\n", + "label_accs = {}\n", + "for label in range(10):\n", + " label_ind = (y_test == label)\n", + " # extract predictions for specific true label\n", + " pred_label = test_classes[label_ind]\n", + " labels = y_test[label_ind]\n", + " # compute class-wise accuracy\n", + " label_accs[accuracy_score(pred_label, labels).numpy()] = label\n", + "for key in sorted(label_accs):\n", + " print(f\"Digit {label_accs[key]}: {key:.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rcykuJFhdGb0" + }, + "source": [ + "It looks like the model struggles with some digits a little more than others which is quite common in many multiclass classification problems. As a final exercise, plot a confusion matrix of the model's predictions and its corresponding true labels to gather more class-level insights. Sklearn and seaborn have functions for generating and visualizing confusion matrices. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JqCaqPwwh1tN" + }, + "outputs": [], + "source": [ + "import sklearn.metrics as sk_metrics\n", + "\n", + "def show_confusion_matrix(test_labels, test_classes):\n", + " # Compute confusion matrix and normalize\n", + " plt.figure(figsize=(10,10))\n", + " confusion = sk_metrics.confusion_matrix(test_labels.numpy(), \n", + " test_classes.numpy())\n", + " confusion_normalized = confusion / confusion.sum(axis=1, keepdims=True)\n", + " axis_labels = range(10)\n", + " ax = sns.heatmap(\n", + " confusion_normalized, xticklabels=axis_labels, yticklabels=axis_labels,\n", + " cmap='Blues', annot=True, fmt='.4f', square=True)\n", + " plt.title(\"Confusion matrix\")\n", + " plt.ylabel(\"True label\")\n", + " plt.xlabel(\"Predicted label\")\n", + "\n", + "show_confusion_matrix(y_test, test_classes)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JT-WA7GVda6d" + }, + "source": [ + "Class-level insights can help identify reasons for misclassifications and improve model performance in future training cycles." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VFLfEH4ManbW" + }, + "source": [ + "## Conclusion\n", + "\n", + "This notebook introduced a few techniques to handle a multiclass classification problem with an [MLP](https://developers.google.com/machine-learning/crash-course/multi-class-neural-networks/softmax). Here are a few more tips that may help:\n", + "\n", + "- The [TensorFlow Core APIs](https://www.tensorflow.org/guide/core) can be used to build machine learning workflows with high levels of configurability\n", + "- Initialization schemes can help prevent model parameters from vanishing or exploding during training.\n", + "- Overfitting is another common problem for neural networks, though it wasn't a problem for this tutorial. Visit the [Overfit and underfit](overfit_and_underfit.ipynb) tutorial for more help with this.\n", + "\n", + "For more examples of using the TensorFlow Core APIs, check out the [guide](https://www.tensorflow.org/guide/core). If you want to learn more about loading and preparing data, see the tutorials on [image data loading](https://www.tensorflow.org/tutorials/load_data/images) or [CSV data loading](https://www.tensorflow.org/tutorials/load_data/csv)." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "FhGuhbZ6M5tl" + ], + "name": "mlp_core.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/core/optimizers_core.ipynb b/site/en/guide/core/optimizers_core.ipynb new file mode 100644 index 00000000000..e22f0327419 --- /dev/null +++ b/site/en/guide/core/optimizers_core.ipynb @@ -0,0 +1,612 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "FhGuhbZ6M5tl" + }, + "source": [ + "##### Copyright 2022 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "AwOEIRJC6Une" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EIdT9iu_Z4Rb" + }, + "source": [ + "# Optimizers with Core APIs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bBIlTPscrIT9" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SjAxxRpBzVYg" + }, + "source": [ + "## Introduction\n", + "\n", + "This notebook introduces the process of creating custom optimizers with the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core). Visit the [Core APIs overview](https://www.tensorflow.org/guide/core) to learn more about TensorFlow Core and its intended use cases. \n", + "\n", + "The [Keras optimizers](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers) module is the recommended optimization toolkit for many general training purposes. It includes a variety of prebuilt optimiziers as well as subclassing functionality for customization. The Keras optimizers are also compatible with custom layers, models, and training loops built with the Core APIs. These prebuilt and customizable optimizers are suitable for most cases, but the Core APIs allow for complete control over the optimization process. For example, techniques such as Sharpness-Aware Minimization (SAM) require the model and optimizer to be coupled, which does not fit the traditional definition of ML optimizers. This guide walks through the process of building custom optimizers from scratch with the Core APIs, giving you the power to have full control over the structure, implementation, and behavior of your optimizers." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nBmqYyodNRd_" + }, + "source": [ + "## Optimizers overview\n", + "\n", + "An optimizer is an algorithm used to minimize a loss function with respect to a model's trainable parameters. The most straightforward optimization technique is gradient descent, which iteratively updates a model's parameters by taking a step in the direction of its loss function's steepest descent. Its step size is directly proportional to the size of the gradient, which can be problematic when the gradient is either too large or too small. There are many other gradient-based optimizers such as Adam, Adagrad, and RMSprop that leverage various mathematical properties of gradients for memory efficiency and fast convergence." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nchsZfwEVtVs" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "d9idwpXCltUl" + }, + "outputs": [], + "source": [ + "import matplotlib\n", + "from matplotlib import pyplot as plt\n", + "# Preset Matplotlib figure sizes.\n", + "matplotlib.rcParams['figure.figsize'] = [9, 6]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9xQKvCJ85kCQ" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "print(tf.__version__)\n", + "# set random seed for reproducible results \n", + "tf.random.set_seed(22)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0UmF5aU3MnwX" + }, + "source": [ + "## Gradient descent\n", + "\n", + "The basic optimizer class should have an initialization method and a function to update a list of variables given a list of gradients. Start by implementing the basic gradient descent optimizer which updates each variable by subtracting its gradient scaled by a learning rate." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MWjmUmeOQFFN" + }, + "outputs": [], + "source": [ + "class GradientDescent(tf.Module):\n", + "\n", + " def __init__(self, learning_rate=1e-3):\n", + " # Initialize parameters\n", + " self.learning_rate = learning_rate\n", + " self.title = f\"Gradient descent optimizer: learning rate={self.learning_rate}\"\n", + "\n", + " def apply_gradients(self, grads, vars):\n", + " # Update variables\n", + " for grad, var in zip(grads, vars):\n", + " var.assign_sub(self.learning_rate*grad)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZSekgBHDRzmp" + }, + "source": [ + "To test this optimizer, create a sample loss function to minimize with respect to a single variable, $x$. Compute its gradient function and solve for its minimizing parameter value:\n", + "\n", + "$$L = 2x^4 + 3x^3 + 2$$\n", + "\n", + "$$\\frac{dL}{dx} = 8x^3 + 9x^2$$\n", + "\n", + "$\\frac{dL}{dx}$ is 0 at $x = 0$, which is a saddle point and at $x = - \\frac{9}{8}$, which is the global minimum. Therefore, the loss function is optimized at $x^\\star = - \\frac{9}{8}$." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VCtJaUo6Ry8V" + }, + "outputs": [], + "source": [ + "x_vals = tf.linspace(-2, 2, 201)\n", + "x_vals = tf.cast(x_vals, tf.float32)\n", + "\n", + "def loss(x):\n", + " return 2*(x**4) + 3*(x**3) + 2\n", + "\n", + "def grad(f, x):\n", + " with tf.GradientTape() as tape:\n", + " tape.watch(x)\n", + " result = f(x)\n", + " return tape.gradient(result, x)\n", + "\n", + "plt.plot(x_vals, loss(x_vals), c='k', label = \"Loss function\")\n", + "plt.plot(x_vals, grad(loss, x_vals), c='tab:blue', label = \"Gradient function\")\n", + "plt.plot(0, loss(0), marker=\"o\", c='g', label = \"Inflection point\")\n", + "plt.plot(-9/8, loss(-9/8), marker=\"o\", c='r', label = \"Global minimum\")\n", + "plt.legend()\n", + "plt.ylim(0,5)\n", + "plt.xlabel(\"x\")\n", + "plt.ylabel(\"loss\")\n", + "plt.title(\"Sample loss function and gradient\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fLlIBJ9yuwhE" + }, + "source": [ + "Write a function to test the convergence of an optimizer with a single variable loss function. Assume that convergence has been achieved when the updated parameter's value at timestep $t$ is the same as its value held at timestep $t-1$. Terminate the test after a set number of iterations and also keep track of any exploding gradients during the process. In order to truly challenge the optimization algorithm, initialize the parameter poorly. In the above example, $x = 2$ is a good choice since it involves an steep gradient and also leads into an inflection point." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SLQTc41ouv0F" + }, + "outputs": [], + "source": [ + "def convergence_test(optimizer, loss_fn, grad_fn=grad, init_val=2., max_iters=2000):\n", + " # Function for optimizer convergence test\n", + " print(optimizer.title)\n", + " print(\"-------------------------------\")\n", + " # Initializing variables and structures\n", + " x_star = tf.Variable(init_val)\n", + " param_path = []\n", + " converged = False\n", + "\n", + " for iter in range(1, max_iters + 1):\n", + " x_grad = grad_fn(loss_fn, x_star)\n", + "\n", + " # Case for exploding gradient\n", + " if tf.math.is_nan(x_grad):\n", + " print(f\"Gradient exploded at iteration {iter}\\n\")\n", + " return []\n", + "\n", + " # Updating the variable and storing its old-version\n", + " x_old = x_star.numpy()\n", + " optimizer.apply_gradients([x_grad], [x_star])\n", + " param_path.append(x_star.numpy())\n", + "\n", + " # Checking for convergence\n", + " if x_star == x_old:\n", + " print(f\"Converged in {iter} iterations\\n\")\n", + " converged = True\n", + " break\n", + " \n", + " # Print early termination message\n", + " if not converged:\n", + " print(f\"Exceeded maximum of {max_iters} iterations. Test terminated.\\n\")\n", + " return param_path" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vK-7_TsmyAgI" + }, + "source": [ + "Test the convergence of the gradient descent optimizer for the following learning rates: 1e-3, 1e-2, 1e-1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lWRn8c91mqB0" + }, + "outputs": [], + "source": [ + "param_map_gd = {}\n", + "learning_rates = [1e-3, 1e-2, 1e-1]\n", + "for learning_rate in learning_rates:\n", + " param_map_gd[learning_rate] = (convergence_test(\n", + " GradientDescent(learning_rate=learning_rate), loss_fn=loss))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TydrGHF5y6iI" + }, + "source": [ + "Visualize the path of the parameters over a contour plot of the loss function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "piffzGHI_u5G" + }, + "outputs": [], + "source": [ + "def viz_paths(param_map, x_vals, loss_fn, title, max_iters=2000):\n", + " # Creating a controur plot of the loss function\n", + " t_vals = tf.range(1., max_iters + 100.)\n", + " t_grid, x_grid = tf.meshgrid(t_vals, x_vals)\n", + " loss_grid = tf.math.log(loss_fn(x_grid))\n", + " plt.pcolormesh(t_vals, x_vals, loss_grid, vmin=0, shading='nearest')\n", + " colors = ['r', 'w', 'c']\n", + " # Plotting the parameter paths over the contour plot\n", + " for i, learning_rate in enumerate(param_map):\n", + " param_path = param_map[learning_rate]\n", + " if len(param_path) > 0:\n", + " x_star = param_path[-1]\n", + " plt.plot(t_vals[:len(param_path)], param_path, c=colors[i])\n", + " plt.plot(len(param_path), x_star, marker='o', c=colors[i], \n", + " label = f\"x*: learning rate={learning_rate}\")\n", + " plt.xlabel(\"Iterations\")\n", + " plt.ylabel(\"Parameter value\")\n", + " plt.legend()\n", + " plt.title(f\"{title} parameter paths\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ssyj2sO4BcNY" + }, + "outputs": [], + "source": [ + "viz_paths(param_map_gd, x_vals, loss, \"Gradient descent\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MmM-5eDLFnmC" + }, + "source": [ + "Gradient descent seems to get stuck at the inflection point when using smaller learning rates. Increasing the learning rate can encourage faster movement around the plateau region due to a larger step size; however, this comes at the risk of having exploding gradients in early iterations when the loss function is extremely steep." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m5CDeXN8S1SF" + }, + "source": [ + "## Gradient descent with momentum\n", + "\n", + "Gradient descent with momentum not only uses the gradient to update a variable but also involves the change in position of a variable based on its previous update. The momentum parameter determines the level of influence the update at timestep $t-1$ has on the update at timestep $t$. Accumulating momentum helps to move variables past plataeu regions faster than basic gradient descent. The momentum update rule is as follows:\n", + "\n", + "$$\\Delta_x^{[t]} = lr \\cdot L^\\prime(x^{[t-1]}) + p \\cdot \\Delta_x^{[t-1]}$$\n", + "\n", + "$$x^{[t]} = x^{[t-1]} - \\Delta_x^{[t]}$$\n", + "\n", + "where\n", + "\n", + "* $x$: the variable being optimized\n", + "* $\\Delta_x$: change in $x$ \n", + "* $lr$: learning rate\n", + "* $L^\\prime(x)$: gradient of the loss function with respect to x\n", + "* $p$: momentum parameter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rOBY8Tz4S0dX" + }, + "outputs": [], + "source": [ + "class Momentum(tf.Module):\n", + "\n", + " def __init__(self, learning_rate=1e-3, momentum=0.7):\n", + " # Initialize parameters\n", + " self.learning_rate = learning_rate\n", + " self.momentum = momentum\n", + " self.change = 0.\n", + " self.title = f\"Gradient descent optimizer: learning rate={self.learning_rate}\"\n", + "\n", + " def apply_gradients(self, grads, vars):\n", + " # Update variables \n", + " for grad, var in zip(grads, vars):\n", + " curr_change = self.learning_rate*grad + self.momentum*self.change\n", + " var.assign_sub(curr_change)\n", + " self.change = curr_change" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "t_nDu38gW6Fu" + }, + "source": [ + "Test the convergence of the momentum optimizer for the following learning rates: 1e-3, 1e-2, 1e-1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tA6oQL-sW2xg" + }, + "outputs": [], + "source": [ + "param_map_mtm = {}\n", + "learning_rates = [1e-3, 1e-2, 1e-1]\n", + "for learning_rate in learning_rates:\n", + " param_map_mtm[learning_rate] = (convergence_test(\n", + " Momentum(learning_rate=learning_rate),\n", + " loss_fn=loss, grad_fn=grad))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wz_LV0EPYE6k" + }, + "source": [ + "Visualize the path of the parameters over a contour plot of the loss function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qbW1eEKaX3T9" + }, + "outputs": [], + "source": [ + "viz_paths(param_map_mtm, x_vals, loss, \"Momentum\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4bEFnhPRTBXh" + }, + "source": [ + "## Adaptive moment estimation (Adam)\n", + "\n", + "The Adaptive Moment Estimation (Adam) algorithm is an efficient and highly generalizable optimization technique that leverages two key gradient descent methedologies: momentum, and root mean square propogation (RMSP). Momentum helps accelerate gradient descent by using the first moment (sum of gradients) along with a decay parameter. RMSP is similar; however, it leverages the second moment (sum of gradients squared). \n", + "\n", + "The Adam algorithm combines both the first and second moment to provide a more generalizable update rule. The sign of a variable, $x$, can be determined by computing $\\frac{x}{\\sqrt{x^2}}$. The Adam optimizer uses this fact to calculate an update step which is effectively a smoothed sign. Instead of calculating $\\frac{x}{\\sqrt{x^2}}$, the optimizer calculates a smoothed version of $x$ (first moment) and $x^2$ (second moment) for each variable update. \n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WjgyqRiZ7XhA" + }, + "source": [ + "**Adam algorithm**\n", + "\n", + "$\\beta_1 \\gets 0.9 \\; \\triangleright \\text{literature value}$\n", + "\n", + "$\\beta_2 \\gets 0.999 \\; \\triangleright \\text{literature value}$\n", + "\n", + "$lr \\gets \\text{1e-3} \\; \\triangleright \\text{configurable learning rate}$\n", + "\n", + "$\\epsilon \\gets \\text{1e-7} \\; \\triangleright \\text{prevents divide by 0 error}$\n", + "\n", + "$V_{dv} \\gets \\vec {\\underset{n\\times1}{0}} \\;\\triangleright \\text{stores momentum updates for each variable}$\n", + "\n", + "$S_{dv} \\gets \\vec {\\underset{n\\times1}{0}} \\; \\triangleright \\text{stores RMSP updates for each variable}$\n", + "\n", + "$t \\gets 1$\n", + "\n", + "$\\text{On iteration } t:$\n", + "\n", + "$\\;\\;\\;\\; \\text{For} (\\frac{dL}{dv}, v) \\text{ in gradient variable pairs}:$\n", + "\n", + "$\\;\\;\\;\\;\\;\\;\\;\\; V_{dv\\_i} = \\beta_1V_{dv\\_i} + (1 - \\beta_1)\\frac{dL}{dv} \\; \\triangleright \\text{momentum update}$\n", + "\n", + "$\\;\\;\\;\\;\\;\\;\\;\\; S_{dv\\_i} = \\beta_2V_{dv\\_i} + (1 - \\beta_2)(\\frac{dL}{dv})^2 \\; \\triangleright \\text{RMSP update}$\n", + "\n", + "$\\;\\;\\;\\;\\;\\;\\;\\; v_{dv}^{bc} = \\frac{V_{dv\\_i}}{(1-\\beta_1)^t} \\; \\triangleright \\text{momentum bias correction}$\n", + "\n", + "$\\;\\;\\;\\;\\;\\;\\;\\; s_{dv}^{bc} = \\frac{S_{dv\\_i}}{(1-\\beta_2)^t} \\; \\triangleright \\text{RMSP bias correction}$\n", + "\n", + "$\\;\\;\\;\\;\\;\\;\\;\\; v = v - lr\\frac{v_{dv}^{bc}}{\\sqrt{s_{dv}^{bc}} + \\epsilon} \\; \\triangleright \\text{parameter update}$\n", + "\n", + "$\\;\\;\\;\\;\\;\\;\\;\\; t = t + 1$\n", + "\n", + "**End of algorithm**\n", + "\n", + "Given that $V_{dv}$ and $S_{dv}$ are initialized to 0 and that $\\beta_1$ and $\\beta_2$ are close to 1, the momentum and RMSP updates are naturally biased towards 0; therefore, the variables can benefit from bias correction. Bias correction also helps to control the osccilation of weights as they approach the global minimum." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hm5vffRJRsEc" + }, + "outputs": [], + "source": [ + "class Adam(tf.Module):\n", + " \n", + " def __init__(self, learning_rate=1e-3, beta_1=0.9, beta_2=0.999, ep=1e-7):\n", + " # Initialize the Adam parameters\n", + " self.beta_1 = beta_1\n", + " self.beta_2 = beta_2\n", + " self.learning_rate = learning_rate\n", + " self.ep = ep\n", + " self.t = 1.\n", + " self.v_dvar, self.s_dvar = [], []\n", + " self.title = f\"Adam: learning rate={self.learning_rate}\"\n", + " self.built = False\n", + "\n", + " def apply_gradients(self, grads, vars):\n", + " # Set up moment and RMSprop slots for each variable on the first call\n", + " if not self.built:\n", + " for var in vars:\n", + " v = tf.Variable(tf.zeros(shape=var.shape))\n", + " s = tf.Variable(tf.zeros(shape=var.shape))\n", + " self.v_dvar.append(v)\n", + " self.s_dvar.append(s)\n", + " self.built = True\n", + " # Perform Adam updates\n", + " for i, (d_var, var) in enumerate(zip(grads, vars)):\n", + " # Moment calculation\n", + " self.v_dvar[i] = self.beta_1*self.v_dvar[i] + (1-self.beta_1)*d_var\n", + " # RMSprop calculation\n", + " self.s_dvar[i] = self.beta_2*self.s_dvar[i] + (1-self.beta_2)*tf.square(d_var)\n", + " # Bias correction\n", + " v_dvar_bc = self.v_dvar[i]/(1-(self.beta_1**self.t))\n", + " s_dvar_bc = self.s_dvar[i]/(1-(self.beta_2**self.t))\n", + " # Update model variables\n", + " var.assign_sub(self.learning_rate*(v_dvar_bc/(tf.sqrt(s_dvar_bc) + self.ep)))\n", + " # Increment the iteration counter\n", + " self.t += 1." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UWN4Qus7flUO" + }, + "source": [ + "Test the performance of the Adam optimizer with the same learning rates used with the gradient descent examples. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GXHCxtemFBpR" + }, + "outputs": [], + "source": [ + "param_map_adam = {}\n", + "learning_rates = [1e-3, 1e-2, 1e-1]\n", + "for learning_rate in learning_rates:\n", + " param_map_adam[learning_rate] = (convergence_test(\n", + " Adam(learning_rate=learning_rate), loss_fn=loss))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jgpUcs_xXEjX" + }, + "source": [ + "Visualize the path of the parameters over a contour plot of the loss function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ctvOUmlzFK8s" + }, + "outputs": [], + "source": [ + "viz_paths(param_map_adam, x_vals, loss, \"Adam\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_oGScF8zJcY4" + }, + "source": [ + "In this particular example, the Adam optimizer has slower convergence compared to traditional gradient descent when using small learning rates. However, the algorithm successfully moves past the plataeu region and converges to the global minimum when a larger learning rate. Exploding gradients are no longer an issue due to Adam's dynamic scaling of learning rates when encountering large gradients." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VFLfEH4ManbW" + }, + "source": [ + "## Conclusion\n", + "\n", + "This notebook introduced the basics of writing and comparing optimizers with the [TensorFlow Core APIs](https://www.tensorflow.org/guide/core). Although prebuilt optimizers like Adam are generalizable, they may not always be the best choice for every model or dataset. Having fine-grained control over the optimization process can help streamline ML training workflows and improve overall performance. Refer to the following documentation for more examples of custom optimizers:\n", + "\n", + "* This Adam optimizer is used in the [Multilayer perceptrons](https://www.tensorflow.org/guide/core/mlp_core) tutorial and the [Distributed training]()\n", + "* [Model Garden](https://blog.tensorflow.org/2020/03/introducing-model-garden-for-tensorflow-2.html) has a variety of [custom optimizers](https://github.com/tensorflow/models/tree/master/official/modeling/optimization) written with the Core APIs.\n" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "optimizers_core.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/core/quickstart_core.ipynb b/site/en/guide/core/quickstart_core.ipynb new file mode 100644 index 00000000000..70586fd3f0c --- /dev/null +++ b/site/en/guide/core/quickstart_core.ipynb @@ -0,0 +1,591 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "rX8mhOLljYeM" + }, + "source": [ + "##### Copyright 2022 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "BZSlp3DAjdYf" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3wF5wszaj97Y" + }, + "source": [ + "# Quickstart for the TensorFlow Core APIs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DUNzJc4jTj6G" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "04QgGZc9bF5D" + }, + "source": [ + "This quickstart tutorial demonstrates how you can use the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core) to build and train a multiple linear regression model that predicts fuel efficiency. It uses the [Auto MPG](https://archive.ics.uci.edu/ml/datasets/auto+mpg) dataset which contains fuel efficiency data for late-1970s and early 1980s automobiles.\n", + "\n", + "You will follow the typical stages of a machine learning process:\n", + "\n", + "1. Load the dataset.\n", + "2. Build an [input pipeline](../data.ipynb).\n", + "3. Build a multiple [linear regression](https://developers.google.com/machine-learning/glossary#linear-regression) model.\n", + "4. Evaluate the performance of the model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nnrWf3PCEzXL" + }, + "source": [ + "## Setup\n", + "\n", + "Import TensorFlow and other necessary libraries to get started:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0trJmd6DjqBZ" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import pandas as pd\n", + "import matplotlib\n", + "from matplotlib import pyplot as plt\n", + "print(\"TensorFlow version:\", tf.__version__)\n", + "# Set a random seed for reproducible results \n", + "tf.random.set_seed(22)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7NAbSZiaoJ4z" + }, + "source": [ + "## Load and preprocess the dataset\n", + "\n", + "Next, you need to load and preprocess the [Auto MPG dataset](https://archive.ics.uci.edu/ml/datasets/auto+mpg) from the [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/). This dataset uses a variety of quantitative and categorical features such as cylinders, displacement, horsepower and weight to predict the fuel efficiencies of automobiles in the late-1970s and early 1980s.\n", + "\n", + "The dataset contains a few unknown values. Make sure to drop any missing values with `pandas.DataFrame.dropna`, and convert the dataset to a `tf.float32` tensor type with the `tf.convert_to_tensor` and `tf.cast` functions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HglhDsUfrJ98" + }, + "outputs": [], + "source": [ + "url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'\n", + "column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',\n", + " 'Acceleration', 'Model Year', 'Origin']\n", + "\n", + "dataset = pd.read_csv(url, names=column_names, na_values='?', comment='\\t',\n", + " sep=' ', skipinitialspace=True)\n", + "\n", + "dataset = dataset.dropna()\n", + "dataset_tf = tf.convert_to_tensor(dataset, dtype=tf.float32)\n", + "dataset.tail()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0vgoDL3hYesB" + }, + "source": [ + "Next, split the dataset into training and test sets. Make sure to shuffle the dataset with `tf.random.shuffle` to avoid biased splits." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0mJU4kt6YiAp" + }, + "outputs": [], + "source": [ + "dataset_shuffled = tf.random.shuffle(dataset_tf, seed=22)\n", + "train_data, test_data = dataset_shuffled[100:], dataset_shuffled[:100]\n", + "x_train, y_train = train_data[:, 1:], train_data[:, 0]\n", + "x_test, y_test = test_data[:, 1:], test_data[:, 0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Bscb2Vsbi3TE" + }, + "source": [ + "Perform basic feature engineering by one-hot-encoding the `\"Origin\"` feature. The `tf.one_hot` function is useful for transforming this categorical column into 3 separate binary columns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_B8N9IV1i6IV" + }, + "outputs": [], + "source": [ + "def onehot_origin(x):\n", + " origin = tf.cast(x[:, -1], tf.int32)\n", + " # Use `origin - 1` to account for 1-indexed feature\n", + " origin_oh = tf.one_hot(origin - 1, 3)\n", + " x_ohe = tf.concat([x[:, :-1], origin_oh], axis = 1)\n", + " return x_ohe\n", + "\n", + "x_train_ohe, x_test_ohe = onehot_origin(x_train), onehot_origin(x_test)\n", + "x_train_ohe.numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qnoCDzzedite" + }, + "source": [ + "This example shows a multiple regression problem with predictors or features on vastly different scales. Therefore, it is beneficial to standardize the data so that each feature has zero mean and unit variance. Use the `tf.reduce_mean` and `tf.math.reduce_std` functions for standardization. The regression model's prediction can then be unstandardized to obtain its value in terms of the original units." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dJJFdvqydhyp" + }, + "outputs": [], + "source": [ + "class Normalize(tf.Module):\n", + " def __init__(self, x):\n", + " # Initialize the mean and standard deviation for normalization\n", + " self.mean = tf.math.reduce_mean(x, axis=0)\n", + " self.std = tf.math.reduce_std(x, axis=0)\n", + "\n", + " def norm(self, x):\n", + " # Normalize the input\n", + " return (x - self.mean)/self.std\n", + "\n", + " def unnorm(self, x):\n", + " # Unnormalize the input\n", + " return (x * self.std) + self.mean" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5BONV6fYYwZb" + }, + "outputs": [], + "source": [ + "norm_x = Normalize(x_train_ohe)\n", + "norm_y = Normalize(y_train)\n", + "x_train_norm, y_train_norm = norm_x.norm(x_train_ohe), norm_y.norm(y_train)\n", + "x_test_norm, y_test_norm = norm_x.norm(x_test_ohe), norm_y.norm(y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BPZ68wASog_I" + }, + "source": [ + "## Build a machine learning model\n", + "\n", + "Build a linear regression model with the TensorFlow Core APIs. The equation for multiple linear regression is as follows:\n", + "\n", + "$${\\mathrm{Y}} = {\\mathrm{X}}w + b$$\n", + "\n", + "where\n", + "\n", + "* $\\underset{m\\times 1}{\\mathrm{Y}}$: target vector\n", + "* $\\underset{m\\times n}{\\mathrm{X}}$: feature matrix\n", + "* $\\underset{n\\times 1}w$: weight vector\n", + "* $b$: bias\n", + "\n", + "By using the `@tf.function` decorator, the corresponding Python code is traced to generate a callable TensorFlow graph. This approach is beneficial for saving and loading the model after training. It can also provide a performance boost for models with many layers and complex operations. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "h3IKyzTCDNGo" + }, + "outputs": [], + "source": [ + "class LinearRegression(tf.Module):\n", + "\n", + " def __init__(self):\n", + " self.built = False\n", + "\n", + " @tf.function\n", + " def __call__(self, x):\n", + " # Initialize the model parameters on the first call\n", + " if not self.built:\n", + " # Randomly generate the weight vector and bias term\n", + " rand_w = tf.random.uniform(shape=[x.shape[-1], 1])\n", + " rand_b = tf.random.uniform(shape=[])\n", + " self.w = tf.Variable(rand_w)\n", + " self.b = tf.Variable(rand_b)\n", + " self.built = True\n", + " y = tf.add(tf.matmul(x, self.w), self.b)\n", + " return tf.squeeze(y, axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l2hiez2eIUz8" + }, + "source": [ + "For each example, the model returns a prediction for the input automobile's MPG by computing the weighted sum of its features plus a bias term. This prediction can then be unstandardized to obtain its value in terms of the original units." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OeOrNdnkEEcR" + }, + "outputs": [], + "source": [ + "lin_reg = LinearRegression()\n", + "prediction = lin_reg(x_train_norm[:1])\n", + "prediction_unnorm = norm_y.unnorm(prediction)\n", + "prediction_unnorm.numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FIHANxNSvWr9" + }, + "source": [ + "## Define a loss function\n", + "\n", + "Now, define a loss function to evaluate the model's performance during the training process.\n", + "\n", + "Since regression problems deal with continuous outputs, the mean squared error (MSE) is an ideal choice for the loss function. The MSE is defined by the following equation:\n", + "\n", + "$$MSE = \\frac{1}{m}\\sum_{i=1}^{m}(\\hat{y}_i -y_i)^2$$\n", + "\n", + "where\n", + "\n", + "* $\\hat{y}$: vector of predictions\n", + "* $y$: vector of true targets\n", + "\n", + "The goal of this regression problem is to find the optimal weight vector, $w$, and bias, $b$, that minimizes the MSE loss function. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8tYNVUkmw35s" + }, + "outputs": [], + "source": [ + "def mse_loss(y_pred, y):\n", + " return tf.reduce_mean(tf.square(y_pred - y))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "htI-7aJPqclK" + }, + "source": [ + "## Train and evaluate your model\n", + "\n", + "Using mini-batches for training provides both memory efficiency and faster convergence. The `tf.data.Dataset` API has useful functions for batching and shuffling. The API enables you to build complex input pipelines from simple, reusable pieces. Learn more about building TensorFlow input pipelines in [this guide](https://www.tensorflow.org/guide/data)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kxST2w_Nq0C5" + }, + "outputs": [], + "source": [ + "batch_size = 64\n", + "train_dataset = tf.data.Dataset.from_tensor_slices((x_train_norm, y_train_norm))\n", + "train_dataset = train_dataset.shuffle(buffer_size=x_train.shape[0]).batch(batch_size)\n", + "test_dataset = tf.data.Dataset.from_tensor_slices((x_test_norm, y_test_norm))\n", + "test_dataset = test_dataset.shuffle(buffer_size=x_test.shape[0]).batch(batch_size)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C9haUW8Yq3xD" + }, + "source": [ + "Next, write a training loop to iteratively update your model's parameters by making use of the MSE loss function and its gradients with respect to the input parameters.\n", + "\n", + "This iterative method is referred to as [gradient descent](https://developers.google.com/machine-learning/glossary#gradient-descent). At each iteration, the model's parameters are updated by taking a step in the opposite direction of their computed gradients. The size of this step is determined by the learning rate, which is a configurable hyperparameter. Recall that the gradient of a function indicates the direction of its steepest ascent; therefore, taking a step in the opposite direction indicates the direction of steepest descent, which ultimately helps to minimize the MSE loss function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "y7suUbJXVLqP" + }, + "outputs": [], + "source": [ + "# Set training parameters\n", + "epochs = 100\n", + "learning_rate = 0.01\n", + "train_losses, test_losses = [], []\n", + "\n", + "# Format training loop\n", + "for epoch in range(epochs):\n", + " batch_losses_train, batch_losses_test = [], []\n", + "\n", + " # Iterate through the training data\n", + " for x_batch, y_batch in train_dataset:\n", + " with tf.GradientTape() as tape:\n", + " y_pred_batch = lin_reg(x_batch)\n", + " batch_loss = mse_loss(y_pred_batch, y_batch)\n", + " # Update parameters with respect to the gradient calculations\n", + " grads = tape.gradient(batch_loss, lin_reg.variables)\n", + " for g,v in zip(grads, lin_reg.variables):\n", + " v.assign_sub(learning_rate * g)\n", + " # Keep track of batch-level training performance \n", + " batch_losses_train.append(batch_loss)\n", + " \n", + " # Iterate through the testing data\n", + " for x_batch, y_batch in test_dataset:\n", + " y_pred_batch = lin_reg(x_batch)\n", + " batch_loss = mse_loss(y_pred_batch, y_batch)\n", + " # Keep track of batch-level testing performance \n", + " batch_losses_test.append(batch_loss)\n", + "\n", + " # Keep track of epoch-level model performance\n", + " train_loss = tf.reduce_mean(batch_losses_train)\n", + " test_loss = tf.reduce_mean(batch_losses_test)\n", + " train_losses.append(train_loss)\n", + " test_losses.append(test_loss)\n", + " if epoch % 10 == 0:\n", + " print(f'Mean squared error for step {epoch}: {train_loss.numpy():0.3f}')\n", + "\n", + "# Output final losses\n", + "print(f\"\\nFinal train loss: {train_loss:0.3f}\")\n", + "print(f\"Final test loss: {test_loss:0.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4mDAAPFqVVgn" + }, + "source": [ + "Plot the changes in MSE loss over time. Calculating performance metrics on a designated [validation set](https://developers.google.com/machine-learning/glossary#validation-set) or [test set](https://developers.google.com/machine-learning/glossary#test-set) ensures the model does not overfit to the training dataset and can generalize well to unseen data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "F7dTAzgHDUh7" + }, + "outputs": [], + "source": [ + "matplotlib.rcParams['figure.figsize'] = [9, 6]\n", + "\n", + "plt.plot(range(epochs), train_losses, label = \"Training loss\")\n", + "plt.plot(range(epochs), test_losses, label = \"Testing loss\")\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Mean squared error loss\")\n", + "plt.legend()\n", + "plt.title(\"MSE loss vs training iterations\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Aj8NrlzlJqDG" + }, + "source": [ + "It seems like the model does a good job of fitting the training data while also generalizing well to the unseen test data." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AUNIPubuPYDR" + }, + "source": [ + "## Save and load the model\n", + "\n", + "Start by making an export module that takes in raw data and performs the following operations:\n", + "- Feature extraction \n", + "- Normalization \n", + "- Prediction\n", + "- Unnormalization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "g-uOrGa9ZehG" + }, + "outputs": [], + "source": [ + "class ExportModule(tf.Module):\n", + " def __init__(self, model, extract_features, norm_x, norm_y):\n", + " # Initialize pre and postprocessing functions\n", + " self.model = model\n", + " self.extract_features = extract_features\n", + " self.norm_x = norm_x\n", + " self.norm_y = norm_y\n", + "\n", + " @tf.function(input_signature=[tf.TensorSpec(shape=[None, None], dtype=tf.float32)]) \n", + " def __call__(self, x):\n", + " # Run the ExportModule for new data points\n", + " x = self.extract_features(x)\n", + " x = self.norm_x.norm(x)\n", + " y = self.model(x)\n", + " y = self.norm_y.unnorm(y)\n", + " return y " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YPYYLQ8EZiU8" + }, + "outputs": [], + "source": [ + "lin_reg_export = ExportModule(model=lin_reg,\n", + " extract_features=onehot_origin,\n", + " norm_x=norm_x,\n", + " norm_y=norm_y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6v8xi06XZWiC" + }, + "source": [ + "If you want to save the model at its current state, use the `tf.saved_model.save` function. To load a saved model for making predictions, use the `tf.saved_model.load` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "K1IvMoHbptht" + }, + "outputs": [], + "source": [ + "import tempfile\n", + "import os\n", + "\n", + "models = tempfile.mkdtemp()\n", + "save_path = os.path.join(models, 'lin_reg_export')\n", + "tf.saved_model.save(lin_reg_export, save_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rYb6DrEH0GMv" + }, + "outputs": [], + "source": [ + "lin_reg_loaded = tf.saved_model.load(save_path)\n", + "test_preds = lin_reg_loaded(x_test)\n", + "test_preds[:10].numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-47O6_GLdRuT" + }, + "source": [ + "## Conclusion\n", + "\n", + "Congratulations! You have trained a regression model using the TensorFlow Core low-level APIs.\n", + "\n", + "For more examples of using TensorFlow Core APIs, check out the following guides:\n", + "* [Logistic regression](./logistic_regression_core.ipynb) for binary classification\n", + "* [Multi-layer perceptrons](./mlp_core.ipynb) for hand-written digit recognition\n" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "rX8mhOLljYeM" + ], + "name": "quickstart_core.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/create_op.md b/site/en/guide/create_op.md index 90d7fb1ddff..fa4f573fa32 100644 --- a/site/en/guide/create_op.md +++ b/site/en/guide/create_op.md @@ -47,7 +47,7 @@ To incorporate your custom op you'll need to: test the op in C++. If you define gradients, you can verify them with the Python `tf.test.compute_gradient_error`. See - [`relu_op_test.py`](https://www.tensorflow.org/code/tensorflow/python/kernel_tests/relu_op_test.py) as + [`relu_op_test.py`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/kernel_tests/nn_ops/relu_op_test.py) as an example that tests the forward functions of Relu-like operators and their gradients. @@ -55,8 +55,8 @@ To incorporate your custom op you'll need to: * Some familiarity with C++. * Must have installed the - [TensorFlow binary](../../install), or must have - [downloaded TensorFlow source](../../install/source.md), + [TensorFlow binary](https://www.tensorflow.org/install), or must have + [downloaded TensorFlow source](https://www.tensorflow.org/install/source), and be able to build it. ## Define the op interface @@ -152,17 +152,17 @@ REGISTER_KERNEL_BUILDER(Name("ZeroOut").Device(DEVICE_CPU), ZeroOutOp); > Important: Instances of your OpKernel may be accessed concurrently. > Your `Compute` method must be thread-safe. Guard any access to class > members with a mutex. Or better yet, don't share state via class members! -> Consider using a [`ResourceMgr`](https://www.tensorflow.org/code/tensorflow/core/framework/resource_mgr.h) +> Consider using a [`ResourceMgr`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/resource_mgr.h) > to keep track of op state. ### Multi-threaded CPU kernels To write a multi-threaded CPU kernel, the Shard function in -[`work_sharder.h`](https://www.tensorflow.org/code/tensorflow/core/util/work_sharder.h) +[`work_sharder.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/work_sharder.h) can be used. This function shards a computation function across the threads configured to be used for intra-op threading (see intra_op_parallelism_threads in -[`config.proto`](https://www.tensorflow.org/code/tensorflow/core/protobuf/config.proto)). +[`config.proto`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto)). ### GPU kernels @@ -354,18 +354,19 @@ to compile your op into a dynamic library. ```bash TF_CFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') ) TF_LFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') ) -g++ -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2 +g++ -std=c++14 -shared zero_out.cc -o zero_out.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2 ``` On macOS, the additional flag "-undefined dynamic_lookup" is required when building the `.so` file. -> Note on `gcc` version `>=5`: gcc uses the new C++ -> [ABI](https://gcc.gnu.org/gcc-5/changes.html#libstdcxx) since version `5`. The binary pip -> packages available on the TensorFlow website are built with `gcc4` that uses -> the older ABI. If you compile your op library with `gcc>=5`, add -> `-D_GLIBCXX_USE_CXX11_ABI=0` to the command line to make the library -> compatible with the older abi. +> Note on `gcc` version `>=5`: gcc uses the new C++ +> [ABI](https://gcc.gnu.org/gcc-5/changes.html#libstdcxx) since version `5`. +> TensorFlow 2.8 and earlier were built with `gcc4` that uses the older ABI. If +> you are using these versions of TensorFlow and are trying to compile your op +> library with `gcc>=5`, add `-D_GLIBCXX_USE_CXX11_ABI=0` to the command line to +> make the library compatible with the older ABI. TensorFlow 2.9+ packages are +> compatible with the newer ABI by default. ### Compile the op using bazel (TensorFlow source installation) @@ -518,16 +519,16 @@ This asserts that the input is a vector, and returns having set the * The `context`, which can either be an `OpKernelContext` or `OpKernelConstruction` pointer (see - [`tensorflow/core/framework/op_kernel.h`](https://www.tensorflow.org/code/tensorflow/core/framework/op_kernel.h)), + [`tensorflow/core/framework/op_kernel.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_kernel.h)), for its `SetStatus()` method. * The condition. For example, there are functions for validating the shape of a tensor in - [`tensorflow/core/framework/tensor_shape.h`](https://www.tensorflow.org/code/tensorflow/core/framework/tensor_shape.h) + [`tensorflow/core/framework/tensor_shape.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor_shape.h) * The error itself, which is represented by a `Status` object, see - [`tensorflow/core/lib/core/status.h`](https://www.tensorflow.org/code/tensorflow/core/lib/core/status.h). A + [`tensorflow/core/platform/status.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/status.h). A `Status` has both a type (frequently `InvalidArgument`, but see the list of types) and a message. Functions for constructing an error may be found in - [`tensorflow/core/lib/core/errors.h`][validation-macros]. + [`tensorflow/core/platform/errors.h`][validation-macros]. Alternatively, if you want to test whether a `Status` object returned from some function is an error, and if so return it, use @@ -667,7 +668,7 @@ There are shortcuts for common type constraints: The specific lists of types allowed by these are defined by the functions (like `NumberTypes()`) in -[`tensorflow/core/framework/types.h`](https://www.tensorflow.org/code/tensorflow/core/framework/types.h). +[`tensorflow/core/framework/types.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.h). In this example the attr `t` must be one of the numeric types: ```c++ @@ -1225,7 +1226,7 @@ There are several ways to preserve backwards-compatibility. type into a list of varying types). The full list of safe and unsafe changes can be found in -[`tensorflow/core/framework/op_compatibility_test.cc`](https://www.tensorflow.org/code/tensorflow/core/framework/op_compatibility_test.cc). +[`tensorflow/core/framework/op_compatibility_test.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_compatibility_test.cc). If you cannot make your change to an operation backwards compatible, then create a new operation with a new name with the new semantics. @@ -1242,16 +1243,16 @@ made when TensorFlow changes major versions, and must conform to the You can implement different OpKernels and register one for CPU and another for GPU, just like you can [register kernels for different types](#polymorphism). There are several examples of kernels with GPU support in -[`tensorflow/core/kernels/`](https://www.tensorflow.org/code/tensorflow/core/kernels/). +[`tensorflow/core/kernels/`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/). Notice some kernels have a CPU version in a `.cc` file, a GPU version in a file ending in `_gpu.cu.cc`, and some code shared in common in a `.h` file. For example, the `tf.pad` has everything but the GPU kernel in [`tensorflow/core/kernels/pad_op.cc`][pad_op]. The GPU kernel is in -[`tensorflow/core/kernels/pad_op_gpu.cu.cc`](https://www.tensorflow.org/code/tensorflow/core/kernels/pad_op_gpu.cu.cc), +[`tensorflow/core/kernels/pad_op_gpu.cu.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/pad_op_gpu.cu.cc), and the shared code is a templated class defined in -[`tensorflow/core/kernels/pad_op.h`](https://www.tensorflow.org/code/tensorflow/core/kernels/pad_op.h). +[`tensorflow/core/kernels/pad_op.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/pad_op.h). We organize the code this way for two reasons: it allows you to share common code among the CPU and GPU implementations, and it puts the GPU implementation into a separate file so that it can be compiled only by the GPU compiler. @@ -1272,23 +1273,23 @@ kept on the CPU, add a `HostMemory()` call to the kernel registration, e.g.: #### Compiling the kernel for the GPU device Look at -[cuda_op_kernel.cu.cc](https://www.tensorflow.org/code/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc) +[cuda_op_kernel.cu.cc](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc) for an example that uses a CUDA kernel to implement an op. The `tf_custom_op_library` accepts a `gpu_srcs` argument in which the list of source files containing the CUDA kernels (`*.cu.cc` files) can be specified. For use with a binary installation of TensorFlow, the CUDA kernels have to be compiled with NVIDIA's `nvcc` compiler. Here is the sequence of commands you can use to compile the -[cuda_op_kernel.cu.cc](https://www.tensorflow.org/code/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc) +[cuda_op_kernel.cu.cc](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc) and -[cuda_op_kernel.cc](https://www.tensorflow.org/code/tensorflow/examples/adding_an_op/cuda_op_kernel.cc) +[cuda_op_kernel.cc](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/adding_an_op/cuda_op_kernel.cc) into a single dynamically loadable library: ```bash -nvcc -std=c++11 -c -o cuda_op_kernel.cu.o cuda_op_kernel.cu.cc \ +nvcc -std=c++14 -c -o cuda_op_kernel.cu.o cuda_op_kernel.cu.cc \ ${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -g++ -std=c++11 -shared -o cuda_op_kernel.so cuda_op_kernel.cc \ +g++ -std=c++14 -shared -o cuda_op_kernel.so cuda_op_kernel.cc \ cuda_op_kernel.cu.o ${TF_CFLAGS[@]} -fPIC -lcudart ${TF_LFLAGS[@]} ``` @@ -1379,6 +1380,13 @@ Note that at the time the gradient function is called, only the data flow graph of ops is available, not the tensor data itself. Thus, all computation must be performed using other tensorflow ops, to be run at graph execution time. +Add type hints when registering the custom gradient for an op type to make the +code more readable, debuggable, easier to maintain, and more robust through data +validation. For example, when taking an `op` as a parameter in a function, +specify that the gradient function will take an +tf.Operation +as its parameter type. + ### Shape functions in C++ The TensorFlow API has a feature called "shape inference" that provides @@ -1404,7 +1412,7 @@ be set to the first input's shape. If the output is selected by its index as in There are a number of common shape functions that apply to many ops, such as `shape_inference::UnchangedShape` which can be -found in [common_shape_fns.h](https://www.tensorflow.org/code/tensorflow/core/framework/common_shape_fns.h) and used as follows: +found in [common_shape_fns.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/common_shape_fns.h) and used as follows: ```c++ REGISTER_OP("ZeroOut") @@ -1451,7 +1459,7 @@ provides access to the attributes of the op). Since shape inference is an optional feature, and the shapes of tensors may vary dynamically, shape functions must be robust to incomplete shape information for -any of the inputs. The `Merge` method in [`InferenceContext`](https://www.tensorflow.org/code/tensorflow/core/framework/shape_inference.h) +any of the inputs. The `Merge` method in [`InferenceContext`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/shape_inference.h) allows the caller to assert that two shapes are the same, even if either or both of them do not have complete information. Shape functions are defined for all of the core TensorFlow ops and provide many different usage examples. @@ -1476,7 +1484,7 @@ If you have a complicated shape function, you should consider adding a test for validating that various input shape combinations produce the expected output shape combinations. You can see examples of how to write these tests in some our -[core ops tests](https://www.tensorflow.org/code/tensorflow/core/ops/array_ops_test.cc). +[core ops tests](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/array_ops_test.cc). (The syntax of `INFER_OK` and `INFER_ERROR` are a little cryptic, but try to be compact in representing input and output shape specifications in tests. For now, see the surrounding comments in those tests to get a sense of the shape @@ -1489,20 +1497,20 @@ To build a `pip` package for your op, see the guide shows how to build custom ops from the TensorFlow pip package instead of building TensorFlow from source. -[core-array_ops]:https://www.tensorflow.org/code/tensorflow/core/ops/array_ops.cc -[python-user_ops]:https://www.tensorflow.org/code/tensorflow/python/user_ops/user_ops.py -[tf-kernels]:https://www.tensorflow.org/code/tensorflow/core/kernels/ -[user_ops]:https://www.tensorflow.org/code/tensorflow/core/user_ops/ -[pad_op]:https://www.tensorflow.org/code/tensorflow/core/kernels/pad_op.cc -[standard_ops-py]:https://www.tensorflow.org/code/tensorflow/python/ops/standard_ops.py -[standard_ops-cc]:https://www.tensorflow.org/code/tensorflow/cc/ops/standard_ops.h -[python-BUILD]:https://www.tensorflow.org/code/tensorflow/python/BUILD -[validation-macros]:https://www.tensorflow.org/code/tensorflow/core/lib/core/errors.h -[op_def_builder]:https://www.tensorflow.org/code/tensorflow/core/framework/op_def_builder.h -[register_types]:https://www.tensorflow.org/code/tensorflow/core/framework/register_types.h -[FinalizeAttr]:https://www.tensorflow.org/code/tensorflow/core/framework/op_def_builder.cc -[DataTypeString]:https://www.tensorflow.org/code/tensorflow/core/framework/types.cc -[python-BUILD]:https://www.tensorflow.org/code/tensorflow/python/BUILD -[types-proto]:https://www.tensorflow.org/code/tensorflow/core/framework/types.proto -[TensorShapeProto]:https://www.tensorflow.org/code/tensorflow/core/framework/tensor_shape.proto -[TensorProto]:https://www.tensorflow.org/code/tensorflow/core/framework/tensor.proto +[core-array_ops]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/array_ops.cc +[python-user_ops]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/user_ops/user_ops.py +[tf-kernels]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/ +[user_ops]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/user_ops/ +[pad_op]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/pad_op.cc +[standard_ops-py]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/standard_ops.py +[standard_ops-cc]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/ops/standard_ops.h +[python-BUILD]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/BUILD +[validation-macros]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/errors.h +[op_def_builder]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_def_builder.h +[register_types]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/register_types.h +[FinalizeAttr]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_def_builder.cc +[DataTypeString]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.cc +[python-BUILD]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/BUILD +[types-proto]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.proto +[TensorShapeProto]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor_shape.proto +[TensorProto]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto diff --git a/site/en/guide/data.ipynb b/site/en/guide/data.ipynb index 42146ac9a01..739ef131005 100644 --- a/site/en/guide/data.ipynb +++ b/site/en/guide/data.ipynb @@ -15,7 +15,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "cellView": "form", "id": "llMNufAK7nfK" }, "outputs": [], @@ -139,8 +138,8 @@ "\n", "Once you have a `Dataset` object, you can *transform* it into a new `Dataset` by\n", "chaining method calls on the `tf.data.Dataset` object. For example, you can\n", - "apply per-element transformations such as `Dataset.map()`, and multi-element\n", - "transformations such as `Dataset.batch()`. See the documentation for\n", + "apply per-element transformations such as `Dataset.map`, and multi-element\n", + "transformations such as `Dataset.batch`. Refer to the documentation for\n", "`tf.data.Dataset` for a complete list of transformations.\n", "\n", "The `Dataset` object is a Python iterable. This makes it possible to consume its\n", @@ -238,9 +237,9 @@ "structure of elements include `tuple`, `dict`, `NamedTuple`, and\n", "`OrderedDict`. In particular, `list` is not a valid construct for\n", "expressing the structure of dataset elements. This is because\n", - "early tf.data users felt strongly about `list` inputs (e.g. passed\n", + "early `tf.data` users felt strongly about `list` inputs (for example, when passed\n", "to `tf.data.Dataset.from_tensors`) being automatically packed as\n", - "tensors and `list` outputs (e.g. return values of user-defined\n", + "tensors and `list` outputs (for example, return values of user-defined\n", "functions) being coerced into a `tuple`. As a consequence, if you\n", "would like a `list` input to be treated as a structure, you need\n", "to convert it into `tuple` and if you would like a `list` output\n", @@ -328,7 +327,7 @@ }, "source": [ "The `Dataset` transformations support datasets of any structure. When using the\n", - "`Dataset.map()`, and `Dataset.filter()` transformations,\n", + "`Dataset.map`, and `Dataset.filter` transformations,\n", "which apply a function to each element, the element structure determines the\n", "arguments of the function:" ] @@ -416,11 +415,11 @@ "source": [ "### Consuming NumPy arrays\n", "\n", - "See [Loading NumPy arrays](../tutorials/load_data/numpy.ipynb) for more examples.\n", + "Refer to the [Loading NumPy arrays](../tutorials/load_data/numpy.ipynb) tutorial for more examples.\n", "\n", "If all of your input data fits in memory, the simplest way to create a `Dataset`\n", "from them is to convert them to `tf.Tensor` objects and use\n", - "`Dataset.from_tensor_slices()`." + "`Dataset.from_tensor_slices`." ] }, { @@ -472,7 +471,7 @@ "\n", "Another common data source that can easily be ingested as a `tf.data.Dataset` is the python generator.\n", "\n", - "Caution: While this is a convienient approach it has limited portability and scalibility. It must run in the same python process that created the generator, and is still subject to the Python [GIL](https://en.wikipedia.org/wiki/Global_interpreter_lock)." + "Caution: While this is a convenient approach it has limited portability and scalability. It must run in the same python process that created the generator, and is still subject to the Python [GIL](https://en.wikipedia.org/wiki/Global_interpreter_lock)." ] }, { @@ -544,11 +543,11 @@ "id": "wxy9hDMTq1zD" }, "source": [ - "The `output_shapes` argument is not *required* but is highly recomended as many tensorflow operations do not support tensors with unknown rank. If the length of a particular axis is unknown or variable, set it as `None` in the `output_shapes`.\n", + "The `output_shapes` argument is not *required* but is highly recommended as many TensorFlow operations do not support tensors with an unknown rank. If the length of a particular axis is unknown or variable, set it as `None` in the `output_shapes`.\n", "\n", "It's also important to note that the `output_shapes` and `output_types` follow the same nesting rules as other dataset methods.\n", "\n", - "Here is an example generator that demonstrates both aspects, it returns tuples of arrays, where the second array is a vector with unknown length." + "Here is an example generator that demonstrates both aspects: it returns tuples of arrays, where the second array is a vector with unknown length." ] }, { @@ -589,7 +588,7 @@ "source": [ "The first output is an `int32` the second is a `float32`.\n", "\n", - "The first item is a scalar, shape `()`, and the second is a vector of unknown length, shape `(None,)` " + "The first item is a scalar, shape `()`, and the second is a vector of unknown length, shape `(None,)`" ] }, { @@ -601,8 +600,8 @@ "outputs": [], "source": [ "ds_series = tf.data.Dataset.from_generator(\n", - " gen_series, \n", - " output_types=(tf.int32, tf.float32), \n", + " gen_series,\n", + " output_types=(tf.int32, tf.float32),\n", " output_shapes=((), (None,)))\n", "\n", "ds_series" @@ -710,8 +709,8 @@ "outputs": [], "source": [ "ds = tf.data.Dataset.from_generator(\n", - " lambda: img_gen.flow_from_directory(flowers), \n", - " output_types=(tf.float32, tf.float32), \n", + " lambda: img_gen.flow_from_directory(flowers),\n", + " output_types=(tf.float32, tf.float32),\n", " output_shapes=([32,256,256,3], [32,5])\n", ")\n", "\n", @@ -726,7 +725,7 @@ }, "outputs": [], "source": [ - "for images, label in ds.take(1):\n", + "for images, labels in ds.take(1):\n", " print('images.shape: ', images.shape)\n", " print('labels.shape: ', labels.shape)\n" ] @@ -739,7 +738,7 @@ "source": [ "### Consuming TFRecord data\n", "\n", - "See [Loading TFRecords](../tutorials/load_data/tfrecord.ipynb) for an end-to-end example.\n", + "Refer to the [Loading TFRecords](../tutorials/load_data/tfrecord.ipynb) tutorial for an end-to-end example.\n", "\n", "The `tf.data` API supports a variety of file formats so that you can process\n", "large datasets that do not fit in memory. For example, the TFRecord file format\n", @@ -825,7 +824,7 @@ "source": [ "### Consuming text data\n", "\n", - "See [Loading Text](../tutorials/load_data/text.ipynb) for an end to end example.\n", + "Refer to the [Load text](../tutorials/load_data/text.ipynb) tutorial for an end-to-end example.\n", "\n", "Many datasets are distributed as one or more text files. The\n", "`tf.data.TextLineDataset` provides an easy way to extract lines from one or more\n", @@ -916,7 +915,7 @@ "source": [ "By default, a `TextLineDataset` yields *every* line of each file, which may\n", "not be desirable, for example, if the file starts with a header line, or contains comments. These lines can be removed using the `Dataset.skip()` or\n", - "`Dataset.filter()` transformations. Here, you skip the first line, then filter to\n", + "`Dataset.filter` transformations. Here, you skip the first line, then filter to\n", "find only survivors." ] }, @@ -985,7 +984,7 @@ "id": "ChDHNi3qbDch" }, "source": [ - "See [Loading CSV Files](../tutorials/load_data/csv.ipynb), and [Loading Pandas DataFrames](../tutorials/load_data/pandas_dataframe.ipynb) for more examples. \n", + "Refer to the [Loading CSV Files](../tutorials/load_data/csv.ipynb) and [Loading Pandas DataFrames](../tutorials/load_data/pandas_dataframe.ipynb) tutorials for more examples.\n", "\n", "The CSV file format is a popular format for storing tabular data in plain text.\n", "\n", @@ -1045,11 +1044,11 @@ "id": "47yippqaHFk6" }, "source": [ - "A more scalable approach is to load from disk as necessary. \n", + "A more scalable approach is to load from disk as necessary.\n", "\n", "The `tf.data` module provides methods to extract records from one or more CSV files that comply with [RFC 4180](https://tools.ietf.org/html/rfc4180).\n", "\n", - "The `experimental.make_csv_dataset` function is the high level interface for reading sets of csv files. It supports column type inference and many other features, like batching and shuffling, to make usage simple." + "The `tf.data.experimental.make_csv_dataset` function is the high-level interface for reading sets of CSV files. It supports column type inference and many other features, like batching and shuffling, to make usage simple." ] }, { @@ -1122,7 +1121,7 @@ "id": "TSVgJJ1HJD6M" }, "source": [ - "There is also a lower-level `experimental.CsvDataset` class which provides finer grained control. It does not support column type inference. Instead you must specify the type of each column. " + "There is also a lower-level `experimental.CsvDataset` class which provides finer grained control. It does not support column type inference. Instead you must specify the type of each column." ] }, { @@ -1133,7 +1132,7 @@ }, "outputs": [], "source": [ - "titanic_types = [tf.int32, tf.string, tf.float32, tf.int32, tf.int32, tf.float32, tf.string, tf.string, tf.string, tf.string] \n", + "titanic_types = [tf.int32, tf.string, tf.float32, tf.int32, tf.int32, tf.float32, tf.string, tf.string, tf.string, tf.string]\n", "dataset = tf.data.experimental.CsvDataset(titanic_file, titanic_types , header=True)\n", "\n", "for line in dataset.take(10):\n", @@ -1386,7 +1385,7 @@ "The simplest form of batching stacks `n` consecutive elements of a dataset into\n", "a single element. The `Dataset.batch()` transformation does exactly this, with\n", "the same constraints as the `tf.stack()` operator, applied to each component\n", - "of the elements: i.e. for each component *i*, all elements must have a tensor\n", + "of the elements: i.e., for each component *i*, all elements must have a tensor\n", "of the exact same shape." ] }, @@ -1457,10 +1456,10 @@ "### Batching tensors with padding\n", "\n", "The above recipe works for tensors that all have the same size. However, many\n", - "models (e.g. sequence models) work with input data that can have varying size\n", - "(e.g. sequences of different lengths). To handle this case, the\n", + "models (including sequence models) work with input data that can have varying size\n", + "(for example, sequences of different lengths). To handle this case, the\n", "`Dataset.padded_batch` transformation enables you to batch tensors of\n", - "different shape by specifying one or more dimensions in which they may be\n", + "different shapes by specifying one or more dimensions in which they may be\n", "padded." ] }, @@ -1604,7 +1603,7 @@ "id": "DlEM5f9loSHR" }, "source": [ - "If you would like to perform a custom computation (e.g. to collect statistics) at the end of each epoch then it's simplest to restart the dataset iteration on each epoch:" + "If you would like to perform a custom computation (for example, to collect statistics) at the end of each epoch then it's simplest to restart the dataset iteration on each epoch:" ] }, { @@ -1693,7 +1692,7 @@ "source": [ "As with `Dataset.batch` the order relative to `Dataset.repeat` matters.\n", "\n", - "`Dataset.shuffle` doesn't signal the end of an epoch until the shuffle buffer is empty. So a shuffle placed before a repeat will show every element of one epoch before moving to the next: " + "`Dataset.shuffle` doesn't signal the end of an epoch until the shuffle buffer is empty. So a shuffle placed before a repeat will show every element of one epoch before moving to the next:" ] }, { @@ -1838,7 +1837,7 @@ " label = parts[-2]\n", "\n", " image = tf.io.read_file(filename)\n", - " image = tf.image.decode_jpeg(image)\n", + " image = tf.io.decode_jpeg(image)\n", " image = tf.image.convert_image_dtype(image, tf.float32)\n", " image = tf.image.resize(image, [128, 128])\n", " return image, label" @@ -1906,7 +1905,7 @@ "\n", "For performance reasons, use TensorFlow operations for\n", "preprocessing your data whenever possible. However, it is sometimes useful to\n", - "call external Python libraries when parsing your input data. You can use the `tf.py_function()` operation in a `Dataset.map()` transformation." + "call external Python libraries when parsing your input data. You can use the `tf.py_function` operation in a `Dataset.map` transformation." ] }, { @@ -1915,7 +1914,7 @@ "id": "R2u7CeA67DU8" }, "source": [ - "For example, if you want to apply a random rotation, the `tf.image` module only has `tf.image.rot90`, which is not very useful for image augmentation. \n", + "For example, if you want to apply a random rotation, the `tf.image` module only has `tf.image.rot90`, which is not very useful for image augmentation.\n", "\n", "Note: `tensorflow_addons` has a TensorFlow compatible `rotate` in `tensorflow_addons.image.rotate`.\n", "\n", @@ -1932,6 +1931,7 @@ "source": [ "import scipy.ndimage as ndimage\n", "\n", + "@tf.py_function(Tout=tf.float32)\n", "def random_rotate_image(image):\n", " image = ndimage.rotate(image, np.random.uniform(-30, 30), reshape=False)\n", " return image" @@ -1969,7 +1969,7 @@ "source": [ "def tf_random_rotate_image(image, label):\n", " im_shape = image.shape\n", - " [image,] = tf.py_function(random_rotate_image, [image], [tf.float32])\n", + " image = random_rotate_image(image)\n", " image.set_shape(im_shape)\n", " return image, label" ] @@ -2124,7 +2124,7 @@ "id": "t0JMgvXEz9y1" }, "source": [ - "For an end to end time series example see: [Time series forecasting](../../tutorials/structured_data/time_series.ipynb)." + "For an end-to-end time series example see: [Time series forecasting](../../tutorials/structured_data/time_series.ipynb)." ] }, { @@ -2155,7 +2155,7 @@ "id": "o6GLGhxgpazJ" }, "source": [ - "Typically, models based on this sort of data will want a contiguous time slice. \n", + "Typically, models based on this sort of data will want a contiguous time slice.\n", "\n", "The simplest approach would be to batch the data:" ] @@ -2283,7 +2283,7 @@ "id": "fF6pEdlduq8E" }, "source": [ - "While using `Dataset.batch` works, there are situations where you may need finer control. The `Dataset.window` method gives you complete control, but requires some care: it returns a `Dataset` of `Datasets`. See [Dataset structure](#dataset_structure) for details." + "While using `Dataset.batch` works, there are situations where you may need finer control. The `Dataset.window` method gives you complete control, but requires some care: it returns a `Dataset` of `Datasets`. Go to the [Dataset structure](#dataset_structure) section for details." ] }, { @@ -2328,7 +2328,7 @@ "id": "sgLIwq9Anc34" }, "source": [ - "In nearly all cases, you will want to `.batch` the dataset first:" + "In nearly all cases, you will want to `Dataset.batch` the dataset first:" ] }, { @@ -2422,7 +2422,7 @@ "\n", "When working with a dataset that is very class-imbalanced, you may want to resample the dataset. `tf.data` provides two methods to do this. The credit card fraud dataset is a good example of this sort of problem.\n", "\n", - "Note: See [Imbalanced Data](../tutorials/keras/imbalanced_data.ipynb) for a full tutorial.\n" + "Note: Go to [Classification on imbalanced data](../tutorials/structured_data/imbalanced_data.ipynb) for a full tutorial.\n" ] }, { @@ -2529,7 +2529,7 @@ "id": "ov14SRrQyQE3" }, "source": [ - "One approach to resampling a dataset is to use `sample_from_datasets`. This is more applicable when you have a separate `data.Dataset` for each class.\n", + "One approach to resampling a dataset is to use `sample_from_datasets`. This is more applicable when you have a separate `tf.data.Dataset` for each class.\n", "\n", "Here, just use filter to generate them from the credit card fraud data:" ] @@ -2572,7 +2572,7 @@ "id": "GxLAr-7p0ATX" }, "source": [ - "To use `tf.data.experimental.sample_from_datasets` pass the datasets, and the weight for each:" + "To use `tf.data.Dataset.sample_from_datasets` pass the datasets, and the weight for each:" ] }, { @@ -2583,7 +2583,7 @@ }, "outputs": [], "source": [ - "balanced_ds = tf.data.experimental.sample_from_datasets(\n", + "balanced_ds = tf.data.Dataset.sample_from_datasets(\n", " [negative_ds, positive_ds], [0.5, 0.5]).batch(10)" ] }, @@ -2593,7 +2593,7 @@ "id": "2K4ObOms082B" }, "source": [ - "Now the dataset produces examples of each class with 50/50 probability:" + "Now the dataset produces examples of each class with a 50/50 probability:" ] }, { @@ -2623,15 +2623,15 @@ "id": "kZ9ezkK6irMD" }, "source": [ - "One problem with the above `experimental.sample_from_datasets` approach is that\n", - "it needs a separate `tf.data.Dataset` per class. Using `Dataset.filter`\n", - "works, but results in all the data being loaded twice.\n", + "One problem with the above `Dataset.sample_from_datasets` approach is that\n", + "it needs a separate `tf.data.Dataset` per class. You could use `Dataset.filter`\n", + "to create those two datasets, but that results in all the data being loaded twice.\n", "\n", - "The `data.experimental.rejection_resample` function can be applied to a dataset to rebalance it, while only loading it once. Elements will be dropped from the dataset to achieve balance.\n", + "The `tf.data.Dataset.rejection_resample` method can be applied to a dataset to rebalance it, while only loading it once. Elements will be dropped or repeated to achieve balance.\n", "\n", - "`data.experimental.rejection_resample` takes a `class_func` argument. This `class_func` is applied to each dataset element, and is used to determine which class an example belongs to for the purposes of balancing.\n", + "The `rejection_resample` method takes a `class_func` argument. This `class_func` is applied to each dataset element, and is used to determine which class an example belongs to for the purposes of balancing.\n", "\n", - "The elements of `creditcard_ds` are already `(features, label)` pairs. So the `class_func` just needs to return those labels:" + "The goal here is to balance the label distribution, and the elements of `creditcard_ds` are already `(features, label)` pairs. So the `class_func` just needs to return those labels:" ] }, { @@ -2646,34 +2646,15 @@ " return label" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "DdKmE8Jumlp0" - }, - "source": [ - "The resampler also needs a target distribution, and optionally an initial distribution estimate:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9tv0tWNxmkzM" - }, - "outputs": [], - "source": [ - "resampler = tf.data.experimental.rejection_resample(\n", - " class_func, target_dist=[0.5, 0.5], initial_dist=fractions)" - ] - }, { "cell_type": "markdown", "metadata": { "id": "YxJrOZVToGuE" }, "source": [ - "The resampler deals with individual examples, so you must `unbatch` the dataset before applying the resampler:" + "The resampling method deals with individual examples, so in this case you must `unbatch` the dataset before applying that method.\n", + "\n", + "The method needs a target distribution, and optionally an initial distribution estimate as inputs." ] }, { @@ -2684,7 +2665,12 @@ }, "outputs": [], "source": [ - "resample_ds = creditcard_ds.unbatch().apply(resampler).batch(10)" + "resample_ds = (\n", + " creditcard_ds\n", + " .unbatch()\n", + " .rejection_resample(class_func, target_dist=[0.5,0.5],\n", + " initial_dist=fractions)\n", + " .batch(10))" ] }, { @@ -2693,7 +2679,7 @@ "id": "L-HnC1s8idqV" }, "source": [ - "The resampler returns creates `(class, example)` pairs from the output of the `class_func`. In this case, the `example` was already a `(feature, label)` pair, so use `map` to drop the extra copy of the labels:" + "The `rejection_resample` method returns `(class, example)` pairs where the `class` is the output of the `class_func`. In this case, the `example` was already a `(feature, label)` pair, so use `map` to drop the extra copy of the labels:" ] }, { @@ -2713,7 +2699,7 @@ "id": "j3d2jyEhx9kD" }, "source": [ - "Now the dataset produces examples of each class with 50/50 probability:" + "Now the dataset produces examples of each class with a 50/50 probability:" ] }, { @@ -2743,7 +2729,7 @@ "id": "SOGg1UFhUE4z" }, "source": [ - "Tensorflow supports [taking checkpoints](https://www.tensorflow.org/guide/checkpoint) so that when your training process restarts it can restore the latest checkpoint to recover most of its progress. In addition to checkpointing the model variables, you can also checkpoint the progress of the dataset iterator. This could be useful if you have a large dataset and don't want to start the dataset from the beginning on each restart. Note however that iterator checkpoints may be large, since transformations such as `shuffle` and `prefetch` require buffering elements within the iterator. \n", + "Tensorflow supports [taking checkpoints](./checkpoint.ipynb) so that when your training process restarts it can restore the latest checkpoint to recover most of its progress. In addition to checkpointing the model variables, you can also checkpoint the progress of the dataset iterator. This could be useful if you have a large dataset and don't want to start the dataset from the beginning on each restart. Note however that iterator checkpoints may be large, since transformations such as `Dataset.shuffle` and `Dataset.prefetch` require buffering elements within the iterator.\n", "\n", "To include your iterator in a checkpoint, pass the iterator to the `tf.train.Checkpoint` constructor." ] @@ -2779,7 +2765,7 @@ "id": "gxWglTwX9Fex" }, "source": [ - "Note: It is not possible to checkpoint an iterator which relies on external state such as a `tf.py_function`. Attempting to do so will raise an exception complaining about the external state." + "Note: It is not possible to checkpoint an iterator which relies on an external state, such as a `tf.py_function`. Attempting to do so will raise an exception complaining about the external state." ] }, { @@ -2788,7 +2774,7 @@ "id": "uLRdedPpbDdD" }, "source": [ - "## Using tf.data with tf.keras" + "## Using `tf.data` with `tf.keras`" ] }, { @@ -2798,7 +2784,7 @@ }, "source": [ "The `tf.keras` API simplifies many aspects of creating and executing machine\n", - "learning models. Its `.fit()` and `.evaluate()` and `.predict()` APIs support datasets as inputs. Here is a quick dataset and model setup:" + "learning models. Its `Model.fit` and `Model.evaluate` and `Model.predict` APIs support datasets as inputs. Here is a quick dataset and model setup:" ] }, { @@ -2833,7 +2819,7 @@ "])\n", "\n", "model.compile(optimizer='adam',\n", - " loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), \n", + " loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", " metrics=['accuracy'])" ] }, @@ -2863,7 +2849,7 @@ "id": "FzpAQfJMJF41" }, "source": [ - "If you pass an infinite dataset, for example by calling `Dataset.repeat()`, you just need to also pass the `steps_per_epoch` argument:" + "If you pass an infinite dataset, for example by calling `Dataset.repeat`, you just need to also pass the `steps_per_epoch` argument:" ] }, { @@ -2927,7 +2913,7 @@ "id": "aZYhJ_YSIl6w" }, "source": [ - "The labels are not required in when calling `Model.predict`. " + "The labels are not required when calling `Model.predict`." ] }, { @@ -2967,8 +2953,8 @@ ], "metadata": { "colab": { - "collapsed_sections": [], "name": "data.ipynb", + "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/data_performance.ipynb b/site/en/guide/data_performance.ipynb index 78427505020..81d8b3fd5b3 100644 --- a/site/en/guide/data_performance.ipynb +++ b/site/en/guide/data_performance.ipynb @@ -274,6 +274,8 @@ "source": [ "### Prefetching\n", "\n", + "\n", + "\n", "Prefetching overlaps the preprocessing and model execution of a training step.\n", "While the model is executing training step `s`, the input pipeline is reading the data for step `s+1`.\n", "Doing so reduces the step time to the maximum (as opposed to the sum) of the training and the time it takes to extract the data.\n", @@ -321,6 +323,8 @@ "source": [ "### Parallelizing data extraction\n", "\n", + "\n", + "\n", "In a real-world setting, the input data may be stored remotely (for example, on Google Cloud Storage or HDFS).\n", "A dataset pipeline that works well when reading data locally might become bottlenecked on I/O when reading data remotely because of the following differences between local and remote storage:\n", "\n", @@ -420,6 +424,8 @@ "source": [ "### Parallelizing data transformation\n", "\n", + "\n", + "\n", "When preparing data, input elements may need to be pre-processed.\n", "To this end, the `tf.data` API offers the `tf.data.Dataset.map` transformation, which applies a user-defined function to each element of the input dataset.\n", "Because input elements are independent of one another, the pre-processing can be parallelized across multiple CPU cores.\n", @@ -527,6 +533,8 @@ "source": [ "### Caching\n", "\n", + "\n", + "\n", "The `tf.data.Dataset.cache` transformation can cache a dataset, either in memory or on local storage.\n", "This will save some operations (like file opening and data reading) from being executed during each epoch." ] @@ -572,6 +580,8 @@ "source": [ "### Vectorizing mapping\n", "\n", + "\n", + "\n", "Invoking a user-defined function passed into the `map` transformation has overhead related to scheduling and executing the user-defined function.\n", "Vectorize the user-defined function (that is, have it operate over a batch of inputs at once) and apply the `batch` transformation _before_ the `map` transformation.\n", "\n", @@ -687,6 +697,8 @@ "source": [ "### Reducing memory footprint\n", "\n", + "\n", + "\n", "A number of transformations, including `interleave`, `prefetch`, and `shuffle`, maintain an internal buffer of elements. If the user-defined function passed into the `map` transformation changes the size of the elements, then the ordering of the map transformation and the transformations that buffer elements affects the memory usage. In general, choose the order that results in lower memory footprint, unless different ordering is desirable for performance.\n", "\n", "#### Caching partial computations\n", @@ -713,12 +725,12 @@ "Here is a summary of the best practices for designing performant TensorFlow\n", "input pipelines:\n", "\n", - "* [Use the `prefetch` transformation](#Pipelining) to overlap the work of a producer and consumer\n", - "* [Parallelize the data reading transformation](#Parallelizing-data-extraction) using the `interleave` transformation\n", - "* [Parallelize the `map` transformation](#Parallelizing-data-transformation) by setting the `num_parallel_calls` argument\n", - "* [Use the `cache` transformation](#Caching) to cache data in memory during the first epoch\n", - "* [Vectorize user-defined functions](#Map-and-batch) passed in to the `map` transformation\n", - "* [Reduce memory usage](#Reducing-memory-footprint) when applying the `interleave`, `prefetch`, and `shuffle` transformations" + "* [Use the `prefetch` transformation](#prefetching) to overlap the work of a producer and consumer\n", + "* [Parallelize the data reading transformation](#parallelizing_data_extraction) using the `interleave` transformation\n", + "* [Parallelize the `map` transformation](#parallelizing_data_transformation) by setting the `num_parallel_calls` argument\n", + "* [Use the `cache` transformation](#caching) to cache data in memory during the first epoch\n", + "* [Vectorize user-defined functions](#vectorizing_mapping) passed in to the `map` transformation\n", + "* [Reduce memory usage](#reducing_memory_footprint) when applying the `interleave`, `prefetch`, and `shuffle` transformations" ] }, { @@ -1153,7 +1165,6 @@ "colab": { "collapsed_sections": [], "name": "data_performance.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/distributed_training.ipynb b/site/en/guide/distributed_training.ipynb index 6a0c72f3207..04b7118b1f2 100644 --- a/site/en/guide/distributed_training.ipynb +++ b/site/en/guide/distributed_training.ipynb @@ -78,7 +78,7 @@ "* Provide good performance out of the box.\n", "* Easy switching between strategies.\n", "\n", - "You can distribute training using `tf.distribute.Strategy` with a high-level API like Keras `Model.fit`, as well as [custom training loops](keras/writing_a_training_loop_from_scratch.ipynb) (and, in general, any computation using TensorFlow).\n", + "You can distribute training using `tf.distribute.Strategy` with a high-level API like Keras `Model.fit`, as well as [custom training loops](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) (and, in general, any computation using TensorFlow).\n", "\n", "In TensorFlow 2.x, you can execute your programs eagerly, or in a graph using [`tf.function`](function.ipynb). `tf.distribute.Strategy` intends to support both these modes of execution, but works best with `tf.function`. Eager mode is only recommended for debugging purposes and not supported for `tf.distribute.TPUStrategy`. Although training is the focus of this guide, this API can also be used for distributing evaluation and prediction on different platforms.\n", "\n", @@ -130,7 +130,7 @@ "| **Custom training loop** | Supported | Supported | Supported | Experimental support | Experimental support |\n", "| **Estimator API** | Limited Support | Not supported | Limited Support | Limited Support | Limited Support |\n", "\n", - "Note: [Experimental support](https://www.tensorflow.org/guide/versions#what_is_not_covered) means the APIs are not covered by any compatibilities guarantees.\n", + "Note: [Experimental support](https://www.tensorflow.org/guide/versions#what_is_not_covered) means the APIs are not covered by any compatibility guarantees.\n", "\n", "Warning: Estimator support is limited. Basic training and evaluation are experimental, and advanced features—such as scaffold—are not implemented. You should be using Keras or custom training loops if a use case is not covered. Estimators are not recommended for new code. Estimators run `v1.Session`-style code which is more difficult to write correctly, and can behave unexpectedly, especially when combined with TF 2 code. Estimators do fall under our [compatibility guarantees](https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities. Go to the [migration guide](https://tensorflow.org/guide/migrate) for details." ] @@ -421,7 +421,7 @@ "source": [ "This strategy serves two main purposes:\n", "\n", - "* It allows writing distribution-aware library code unconditionally. For example, in `tf.optimizer`s you can use `tf.distribute.get_strategy` and use that strategy for reducing gradients—it will always return a strategy object on which you can call the `Strategy.reduce` API.\n" + "* It allows writing distribution-aware library code unconditionally. For example, in `tf.keras.optimizers` you can use `tf.distribute.get_strategy` and use that strategy for reducing gradients—it will always return a strategy object on which you can call the `Strategy.reduce` API.\n" ] }, { @@ -503,14 +503,14 @@ "source": [ "## Use tf.distribute.Strategy with Keras Model.fit\n", "\n", - "`tf.distribute.Strategy` is integrated into `tf.keras`, which is TensorFlow's implementation of the [Keras API specification](https://keras.io). `tf.keras` is a high-level API to build and train models. By integrating into the `tf.keras` backend, it's seamless for you to distribute your training written in the Keras training framework [using Model.fit](/keras/customizing_what_happens_in_fit.ipynb).\n", + "`tf.distribute.Strategy` is integrated into `tf.keras`, which is TensorFlow's implementation of the [Keras API specification](https://keras.io/api/). `tf.keras` is a high-level API to build and train models. By integrating into the `tf.keras` backend, it's seamless for you to distribute your training written in the Keras training framework [using Model.fit](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit).\n", "\n", "Here's what you need to change in your code:\n", "\n", "1. Create an instance of the appropriate `tf.distribute.Strategy`.\n", - "2. Move the creation of Keras model, optimizer and metrics inside `strategy.scope`.\n", + "2. Move the creation of Keras model, optimizer and metrics inside `strategy.scope`. Thus the code in the model's `call()`, `train_step()`, and `test_step()` methods will all be distributed and executed on the accelerator(s).\n", "\n", - "TensorFlow distribution strategies support all types of Keras models—[Sequential](/keras/sequential_model.ipynb), [Functional](/keras/functional.ipynb), and [subclassed](/keras/custom_layers_and_models.ipynb).\n", + "TensorFlow distribution strategies support all types of Keras models—[Sequential](https://www.tensorflow.org/guide/keras/sequential_model), [Functional](https://www.tensorflow.org/guide/keras/functional), and [subclassed](https://www.tensorflow.org/guide/keras/custom_layers_and_models)\n", "\n", "Here is a snippet of code to do this for a very simple Keras model with one `Dense` layer:" ] @@ -526,9 +526,10 @@ "mirrored_strategy = tf.distribute.MirroredStrategy()\n", "\n", "with mirrored_strategy.scope():\n", - " model = tf.keras.Sequential([tf.keras.layers.Dense(1, input_shape=(1,))])\n", - "\n", - "model.compile(loss='mse', optimizer='sgd')" + " model = tf.keras.Sequential([\n", + " tf.keras.layers.Dense(1, input_shape=(1,),\n", + " kernel_regularizer=tf.keras.regularizers.L2(1e-4))])\n", + " model.compile(loss='mse', optimizer='sgd')" ] }, { @@ -585,6 +586,17 @@ "In both cases—with `Dataset` or NumPy—each batch of the given input is divided equally among the multiple replicas. For instance, if you are using the `MirroredStrategy` with 2 GPUs, each batch of size 10 will be divided among the 2 GPUs, with each receiving 5 input examples in each step. Each epoch will then train faster as you add more GPUs. Typically, you would want to increase your batch size as you add more accelerators, so as to make effective use of the extra computing power. You will also need to re-tune your learning rate, depending on the model. You can use `strategy.num_replicas_in_sync` to get the number of replicas." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8ZmJqErtS4A1" + }, + "outputs": [], + "source": [ + "mirrored_strategy.num_replicas_in_sync" + ] + }, { "cell_type": "code", "execution_count": null, @@ -600,7 +612,7 @@ "dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(100)\n", "dataset = dataset.batch(global_batch_size)\n", "\n", - "LEARNING_RATES_BY_BATCH_SIZE = {5: 0.1, 10: 0.15}\n", + "LEARNING_RATES_BY_BATCH_SIZE = {5: 0.1, 10: 0.15, 20:0.175}\n", "learning_rate = LEARNING_RATES_BY_BATCH_SIZE[global_batch_size]" ] }, @@ -636,7 +648,7 @@ "source": [ "## Use tf.distribute.Strategy with custom training loops\n", "\n", - "As demonstrated above, using `tf.distribute.Strategy` with Keras `Model.fit` requires changing only a couple lines of your code. With a little more effort, you can also use `tf.distribute.Strategy` [with custom training loops](/keras/writing_a_training_loop_from_scratch.ipynb).\n", + "As demonstrated above, using `tf.distribute.Strategy` with Keras `Model.fit` requires changing only a couple lines of your code. With a little more effort, you can also use `tf.distribute.Strategy` [with custom training loops](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch).\n", "\n", "If you need more flexibility and control over your training loops than is possible with Estimator or Keras, you can write custom training loops. For instance, when using a GAN, you may want to take a different number of generator or discriminator steps each round. Similarly, the high level frameworks are not very suitable for Reinforcement Learning training.\n", "\n", @@ -663,7 +675,9 @@ "outputs": [], "source": [ "with mirrored_strategy.scope():\n", - " model = tf.keras.Sequential([tf.keras.layers.Dense(1, input_shape=(1,))])\n", + " model = tf.keras.Sequential([\n", + " tf.keras.layers.Dense(1, input_shape=(1,),\n", + " kernel_regularizer=tf.keras.regularizers.L2(1e-4))])\n", " optimizer = tf.keras.optimizers.SGD()" ] }, @@ -684,7 +698,7 @@ }, "outputs": [], "source": [ - "dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(100).batch(\n", + "dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(1000).batch(\n", " global_batch_size)\n", "dist_dataset = mirrored_strategy.experimental_distribute_dataset(dataset)" ] @@ -706,20 +720,21 @@ }, "outputs": [], "source": [ + "# Sets `reduction=NONE` to leave it to tf.nn.compute_average_loss() below.\n", "loss_object = tf.keras.losses.BinaryCrossentropy(\n", " from_logits=True,\n", " reduction=tf.keras.losses.Reduction.NONE)\n", "\n", - "def compute_loss(labels, predictions):\n", - " per_example_loss = loss_object(labels, predictions)\n", - " return tf.nn.compute_average_loss(per_example_loss, global_batch_size=global_batch_size)\n", - "\n", "def train_step(inputs):\n", " features, labels = inputs\n", "\n", " with tf.GradientTape() as tape:\n", " predictions = model(features, training=True)\n", - " loss = compute_loss(labels, predictions)\n", + " per_example_loss = loss_object(labels, predictions)\n", + " loss = tf.nn.compute_average_loss(per_example_loss)\n", + " model_losses = model.losses\n", + " if model_losses:\n", + " loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n", "\n", " gradients = tape.gradient(loss, model.trainable_variables)\n", " optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n", @@ -740,9 +755,16 @@ "source": [ "A few other things to note in the code above:\n", "\n", - "1. You used `tf.nn.compute_average_loss` to compute the loss. `tf.nn.compute_average_loss` sums the per example loss and divides the sum by the `global_batch_size`. This is important because later after the gradients are calculated on each replica, they are aggregated across the replicas by **summing** them.\n", - "2. You also used the `tf.distribute.Strategy.reduce` API to aggregate the results returned by `tf.distribute.Strategy.run`. `tf.distribute.Strategy.run` returns results from each local replica in the strategy, and there are multiple ways to consume this result. You can `reduce` them to get an aggregated value. You can also do `tf.distribute.Strategy.experimental_local_results` to get the list of values contained in the result, one per local replica.\n", - "3. When you call `apply_gradients` within a distribution strategy scope, its behavior is modified. Specifically, before applying gradients on each parallel instance during synchronous training, it performs a sum-over-all-replicas of the gradients.\n" + " 1. You used `tf.nn.compute_average_loss` to reduce the per-example prediction losses to a scalar. `tf.nn.compute_average_loss` sums the per example loss and divides the sum by the global batch size. This is important because later after the gradients are calculated on each replica, they are aggregated across the replicas by **summing** them.\n", + "\n", + " By default, the global batch size is taken to be `tf.get_strategy().num_replicas_in_sync * tf.shape(per_example_loss)[0]`. It can also be specified explicitly as a keyword argument `global_batch_size=`. Without short batches, the default is equivalent to `tf.nn.compute_average_loss(..., global_batch_size=global_batch_size)` with the `global_batch_size` defined above. (For more on short batches and how to avoid or handle them, see the [Custom Training tutorial](../tutorials/distribute/custom_training.ipynb).)\n", + "\n", + " 2. You used `tf.nn.scale_regularization_loss` to scale regularization losses registered with the `Model` object, if any, by `1/num_replicas_in_sync` as well. For those regularization losses that are input-dependent, it falls on the modeling code, not the custom training loop, to perform the averaging over the per-replica(!) batch size; that way the modeling code can remain agnostic of replication while the training loop remains agnostic of how regularization losses are computed.\n", + "\n", + " 3. When you call `apply_gradients` within a distribution strategy scope, its behavior is modified. Specifically, before applying gradients on each parallel instance during synchronous training, it performs a sum-over-all-replicas of the gradients.\n", + "\n", + " 4. You also used the `tf.distribute.Strategy.reduce` API to aggregate the results returned by `tf.distribute.Strategy.run` for reporting. `tf.distribute.Strategy.run` returns results from each local replica in the strategy, and there are multiple ways to consume this result. You can `reduce` them to get an aggregated value. You can also do `tf.distribute.Strategy.experimental_local_results` to get the list of values contained in the result, one per local replica.\n", + "\n" ] }, { diff --git a/site/en/guide/dtensor_overview.ipynb b/site/en/guide/dtensor_overview.ipynb new file mode 100644 index 00000000000..1b55ee0283f --- /dev/null +++ b/site/en/guide/dtensor_overview.ipynb @@ -0,0 +1,1082 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "1ljvLya59ep5" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Authors.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "tuOe1ymfHZPu" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VcQIa1uG86Wh" + }, + "source": [ + "# DTensor concepts" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6dWNQEum9AfY" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MGZuakHVlVQf" + }, + "source": [ + "## Overview\n", + "\n", + "This colab introduces DTensor, an extension to TensorFlow for synchronous distributed computing.\n", + "\n", + "DTensor provides a global programming model that allows developers to compose applications that operate on Tensors globally while managing the distribution across devices internally. DTensor distributes the program and tensors according to the sharding directives through a procedure called *[Single program, multiple data (SPMD)](https://en.wikipedia.org/wiki/SPMD) expansion*.\n", + "\n", + "By decoupling the application from sharding directives, DTensor enables running the same application on a single device, multiple devices, or even multiple clients, while preserving its global semantics.\n", + "\n", + "This guide introduces DTensor concepts for distributed computing, and how DTensor integrates with TensorFlow. For a demo of using DTensor in model training, refer to the [Distributed training with DTensor](../tutorials/distribute/dtensor_ml_tutorial.ipynb) tutorial." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h7ZTDq7KngwA" + }, + "source": [ + "## Setup\n", + "\n", + "DTensor (`tf.experimental.dtensor`) has been part of TensorFlow since the 2.9.0 release.\n", + "\n", + "Begin by importing TensorFlow, `dtensor`, and configure TensorFlow to use 6 virtual CPUs. Even though this example uses virtual CPUs, DTensor works the same way on CPU, GPU or TPU devices." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Q92lo0zjwej8" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.experimental import dtensor\n", + "\n", + "print('TensorFlow version:', tf.__version__)\n", + "\n", + "def configure_virtual_cpus(ncpu):\n", + " phy_devices = tf.config.list_physical_devices('CPU')\n", + " tf.config.set_logical_device_configuration(phy_devices[0], [\n", + " tf.config.LogicalDeviceConfiguration(),\n", + " ] * ncpu)\n", + "\n", + "configure_virtual_cpus(6)\n", + "DEVICES = [f'CPU:{i}' for i in range(6)]\n", + "\n", + "tf.config.list_logical_devices('CPU')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O-lsrxUnlsCC" + }, + "source": [ + "## DTensor's model of distributed tensors\n", + "\n", + "DTensor introduces two concepts: `dtensor.Mesh` and `dtensor.Layout`. They are abstractions to model the sharding of tensors across topologically related devices.\n", + "\n", + "- `Mesh` defines the device list for computation.\n", + "- `Layout` defines how to shard the Tensor dimension on a `Mesh`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JjiHaH0ql9yo" + }, + "source": [ + "### Mesh\n", + "\n", + "`Mesh` represents a logical Cartisian topology of a set of devices. Each dimension of the Cartisian grid is called a **Mesh dimension**, and referred to with a name. Names of mesh dimension within the same `Mesh` must be unique.\n", + "\n", + "Names of mesh dimensions are referenced by `Layout` to describe the sharding behavior of a `tf.Tensor` along each of its axes. This is described in more detail later in the section on `Layout`.\n", + "\n", + "`Mesh` can be thought of as a multi-dimensional array of devices." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_J6cOieEbaUw" + }, + "source": [ + "In a 1 dimensional `Mesh`, all devices form a list in a single mesh dimension. The following example uses `dtensor.create_mesh` to create a mesh from 6 CPU devices along a mesh dimension `'x'` with a size of 6 devices:\n", + "\n", + "\"A\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QLH5fgdBmA58" + }, + "outputs": [], + "source": [ + "mesh_1d = dtensor.create_mesh([('x', 6)], devices=DEVICES)\n", + "print(mesh_1d)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hSZwaUwnEgXB" + }, + "source": [ + "A `Mesh` can be multi dimensional as well. In the following example, 6 CPU devices form a `3x2` mesh, where the `'x'` mesh dimension has a size of 3 devices, and the `'y'` mesh dimension has a size of 2 devices:\n", + "\n", + "\"A" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "op6TmKUQE-sZ" + }, + "outputs": [], + "source": [ + "mesh_2d = dtensor.create_mesh([('x', 3), ('y', 2)], devices=DEVICES)\n", + "print(mesh_2d)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "deAqdrDPFn2f" + }, + "source": [ + "### Layout\n", + "\n", + "**`Layout`** specifies how a tensor is distributed, or sharded, on a `Mesh`.\n", + "\n", + "Note: In order to avoid confusions between `Mesh` and `Layout`, the term *dimension* is always associated with `Mesh`, and the term *axis* with `Tensor` and `Layout` in this guide.\n", + "\n", + "The rank of `Layout` should be the same as the rank of the `Tensor` where the `Layout` is applied. For each of the `Tensor`'s axes the `Layout` may specify a mesh dimension to shard the tensor across, or specify the axis as \"unsharded\".\n", + "The tensor is replicated across any mesh dimensions that it is not sharded across.\n", + "\n", + "The rank of a `Layout` and the number of dimensions of a `Mesh` do not need to match. The `unsharded` axes of a `Layout` do not need to be associated to a mesh dimension, and `unsharded` mesh dimensions do not need to be associated with a `layout` axis.\n", + "\n", + "\"Diagram" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Px_bF1c-bQ7e" + }, + "source": [ + "Let's analyze a few examples of `Layout` for the `Mesh`'s created in the previous section." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fqzCNlWAbm-c" + }, + "source": [ + "On a 1-dimensional mesh such as `[(\"x\", 6)]` (`mesh_1d` in the previous section), `Layout([\"unsharded\", \"unsharded\"], mesh_1d)` is a layout for a rank-2 tensor replicated across 6 devices.\n", + "\"A" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-a3EnmZag6x1" + }, + "outputs": [], + "source": [ + "layout = dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh_1d)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ywRJwuLDt2Qq" + }, + "source": [ + "Using the same tensor and mesh the layout `Layout(['unsharded', 'x'])` would shard the second axis of the tensor across the 6 devices.\n", + "\n", + "\"A" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7BgqL0jUvV5a" + }, + "outputs": [], + "source": [ + "layout = dtensor.Layout([dtensor.UNSHARDED, 'x'], mesh_1d)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DgciDNmK76l9" + }, + "source": [ + "Given a 2-dimensional 3x2 mesh such as `[(\"x\", 3), (\"y\", 2)]`, (`mesh_2d` from the previous section), `Layout([\"y\", \"x\"], mesh_2d)` is a layout for a rank-2 `Tensor` whose first axis is sharded across mesh dimension `\"y\"`, and whose second axis is sharded across mesh dimension `\"x\"`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Eyp_qOSyvieo" + }, + "source": [ + "\"A\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "p8OrehEuhPbS" + }, + "outputs": [], + "source": [ + "layout = dtensor.Layout(['y', 'x'], mesh_2d)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1Kyg0V3ehMNJ" + }, + "source": [ + "For the same `mesh_2d`, the layout `Layout([\"x\", dtensor.UNSHARDED], mesh_2d)` is a layout for a rank-2 `Tensor` that is replicated across `\"y\"`, and whose first axis is sharded on mesh dimension `x`.\n", + "\n", + "\"A\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IkWe6mVl7uRb" + }, + "outputs": [], + "source": [ + "layout = dtensor.Layout([\"x\", dtensor.UNSHARDED], mesh_2d)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TTalu6M-ISYb" + }, + "source": [ + "### Single-client and multi-client applications\n", + "\n", + "DTensor supports both single-client and multi-client applications. The colab Python kernel is an example of a single client DTensor application, where there is a single Python process.\n", + "\n", + "In a multi-client DTensor application, multiple Python processes collectively perform as a coherent application. The Cartisian grid of a `Mesh` in a multi-client DTensor application can span across devices regardless of whether they are attached locally to the current client or attached remotely to another client. The set of all devices used by a `Mesh` are called the *global device list*.\n", + "\n", + "The creation of a `Mesh` in a multi-client DTensor application is a collective operation where the *global device list* is identical for all of the participating clients, and the creation of the `Mesh` serves as a global barrier.\n", + "\n", + "During `Mesh` creation, each client provides its *local device list* together with the expected *global device list*. DTensor validates that both lists are consistent. Please refer to the API documentation for `dtensor.create_mesh` and `dtensor.create_distributed_mesh`\n", + " for more information on multi-client mesh creation and the *global device list*.\n", + "\n", + "Single-client can be thought of as a special case of multi-client, with 1 client. In a single-client application, the *global device list* is identical to the *local device list*.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P_F7DWkXkB4w" + }, + "source": [ + "## DTensor as a sharded tensor\n", + "\n", + "Now, start coding with `DTensor`. The helper function, `dtensor_from_array`, demonstrates creating DTensors from something that looks like a `tf.Tensor`. The function performs two steps:\n", + "\n", + " - Replicates the tensor to every device on the mesh.\n", + " - Shards the copy according to the layout requested in its arguments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "s6aws-b8dN9L" + }, + "outputs": [], + "source": [ + "def dtensor_from_array(arr, layout, shape=None, dtype=None):\n", + " \"\"\"Convert a DTensor from something that looks like an array or Tensor.\n", + "\n", + " This function is convenient for quick doodling DTensors from a known,\n", + " unsharded data object in a single-client environment. This is not the\n", + " most efficient way of creating a DTensor, but it will do for this\n", + " tutorial.\n", + " \"\"\"\n", + " if shape is not None or dtype is not None:\n", + " arr = tf.constant(arr, shape=shape, dtype=dtype)\n", + "\n", + " # replicate the input to the mesh\n", + " a = dtensor.copy_to_mesh(arr,\n", + " layout=dtensor.Layout.replicated(layout.mesh, rank=layout.rank))\n", + " # shard the copy to the desirable layout\n", + " return dtensor.relayout(a, layout=layout)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r3o6IysrlGMu" + }, + "source": [ + "### Anatomy of a DTensor\n", + "\n", + "A DTensor is a `tf.Tensor` object, but augumented with the `Layout` annotation that defines its sharding behavior. A DTensor consists of the following:\n", + "\n", + " - Global tensor meta-data, including the global shape and dtype of the tensor.\n", + " - A `Layout`, which defines the `Mesh` the `Tensor` belongs to, and how the `Tensor` is sharded onto the `Mesh`.\n", + " - A list of **component tensors**, one item per local device in the `Mesh`.\n", + "\n", + "With `dtensor_from_array`, you can create your first DTensor, `my_first_dtensor`, and examine its contents:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mQu_nScGUvYH" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"x\", 6)], devices=DEVICES)\n", + "layout = dtensor.Layout([dtensor.UNSHARDED], mesh)\n", + "\n", + "my_first_dtensor = dtensor_from_array([0, 1], layout)\n", + "\n", + "# Examine the DTensor content\n", + "print(my_first_dtensor)\n", + "print(\"global shape:\", my_first_dtensor.shape)\n", + "print(\"dtype:\", my_first_dtensor.dtype)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r8LQy1nqmvFy" + }, + "source": [ + "#### Layout and `fetch_layout`\n", + "\n", + "The layout of a DTensor is not a regular attribute of `tf.Tensor`. Instead, DTensor provides a function, `dtensor.fetch_layout` to access the layout of a DTensor:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dCSFyaAjmzGu" + }, + "outputs": [], + "source": [ + "print(dtensor.fetch_layout(my_first_dtensor))\n", + "assert layout == dtensor.fetch_layout(my_first_dtensor)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ed7i3l2lmatm" + }, + "source": [ + "#### Component tensors, `pack` and `unpack`\n", + "\n", + "A DTensor consists of a list of **component tensors**. The component tensor for a device in the `Mesh` is the `Tensor` object representing the piece of the global DTensor that is stored on this device.\n", + "\n", + "A DTensor can be unpacked into component tensors through `dtensor.unpack`. You can make use of `dtensor.unpack` to inspect the components of the DTensor, and confirm they are on all devices of the `Mesh`.\n", + "\n", + "Note that the positions of component tensors in the global view may overlap each other. For example, in the case of a fully replicated layout, all components are identical replicas of the global tensor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BGbjqVAOnXMk" + }, + "outputs": [], + "source": [ + "for component_tensor in dtensor.unpack(my_first_dtensor):\n", + " print(\"Device:\", component_tensor.device, \",\", component_tensor)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-tqIQM52k788" + }, + "source": [ + "As shown, `my_first_dtensor` is a tensor of `[0, 1]` replicated to all 6 devices." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6By3k-CGn3yv" + }, + "source": [ + "The inverse operation of `dtensor.unpack` is `dtensor.pack`. Component tensors can be packed back into a DTensor.\n", + "\n", + "The components must have the same rank and dtype, which will be the rank and dtype of the returned DTensor. However, there is no strict requirement on the device placement of component tensors as inputs of `dtensor.unpack`: the function will automatically copy the component tensors to their respective corresponding devices.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9lT-6qQwxOgf" + }, + "outputs": [], + "source": [ + "packed_dtensor = dtensor.pack(\n", + " [[0, 1], [0, 1], [0, 1],\n", + " [0, 1], [0, 1], [0, 1]],\n", + " layout=layout\n", + ")\n", + "print(packed_dtensor)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zvS3autrpK2U" + }, + "source": [ + "### Sharding a DTensor to a Mesh\n", + "\n", + "So far you've worked with the `my_first_dtensor`, which is a rank-1 DTensor fully replicated across a dim-1 `Mesh`.\n", + "\n", + "Next, create and inspect DTensors that are sharded across a dim-2 `Mesh`. The following example does this with a 3x2 `Mesh` on 6 CPU devices, where size of mesh dimension `'x'` is 3 devices, and size of mesh dimension`'y'` is 2 devices:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KWb9Ae0VJ-Rc" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"x\", 3), (\"y\", 2)], devices=DEVICES)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ndSeQSFWKQk9" + }, + "source": [ + "#### Fully sharded rank-2 Tensor on a dim-2 Mesh\n", + "\n", + "Create a 3x2 rank-2 DTensor, sharding its first axis along the `'x'` mesh dimension, and its second axis along the `'y'` mesh dimension.\n", + "\n", + "- Because the tensor shape equals to the mesh dimension along all of the sharded axes, each device receives a single element of the DTensor.\n", + "- The rank of the component tensor is always the same as the rank of the global shape. DTensor adopts this convention as a simple way to preserve information for locating the relation between a component tensor and the global DTensor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ax_ZHouJp1MX" + }, + "outputs": [], + "source": [ + "fully_sharded_dtensor = dtensor_from_array(\n", + " tf.reshape(tf.range(6), (3, 2)),\n", + " layout=dtensor.Layout([\"x\", \"y\"], mesh))\n", + "\n", + "for raw_component in dtensor.unpack(fully_sharded_dtensor):\n", + " print(\"Device:\", raw_component.device, \",\", raw_component)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zhsLC-NgrC2p" + }, + "source": [ + "#### Fully replicated rank-2 Tensor on a dim-2 Mesh\n", + "\n", + "For comparison, create a 3x2 rank-2 DTensor, fully replicated to the same dim-2 Mesh.\n", + "\n", + " - Because the DTensor is fully replicated, each device receives a full replica of the 3x2 DTensor.\n", + " - The rank of the component tensors are the same as the rank of the global shape -- this fact is trivial, because in this case, the shape of the component tensors are the same as the global shape anyway." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xmyC6H6Ec90P" + }, + "outputs": [], + "source": [ + "fully_replicated_dtensor = dtensor_from_array(\n", + " tf.reshape(tf.range(6), (3, 2)),\n", + " layout=dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh))\n", + "# Or, layout=tensor.Layout.fully_replicated(mesh, rank=2)\n", + "\n", + "for component_tensor in dtensor.unpack(fully_replicated_dtensor):\n", + " print(\"Device:\", component_tensor.device, \",\", component_tensor)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KWoyv_oHMzk1" + }, + "source": [ + "#### Hybrid rank-2 Tensor on a dim-2 Mesh\n", + "\n", + "What about somewhere between fully sharded and fully replicated?\n", + "\n", + "DTensor allows a `Layout` to be a hybrid, sharded along some axes, but replicated along others.\n", + "\n", + "For example, you can shard the same 3x2 rank-2 DTensor in the following way:\n", + "\n", + " - 1st axis sharded along the `'x'` mesh dimension.\n", + " - 2nd axis replicated along the `'y'` mesh dimension.\n", + "\n", + "To achieve this sharding scheme, you just need to replace the sharding spec of the 2nd axis from `'y'` to `dtensor.UNSHARDED`, to indicate your intention of replicating along the 2nd axis. The layout object will look like `Layout(['x', dtensor.UNSHARDED], mesh)`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DygnbkQ1Lu42" + }, + "outputs": [], + "source": [ + "hybrid_sharded_dtensor = dtensor_from_array(\n", + " tf.reshape(tf.range(6), (3, 2)),\n", + " layout=dtensor.Layout(['x', dtensor.UNSHARDED], mesh))\n", + "\n", + "for component_tensor in dtensor.unpack(hybrid_sharded_dtensor):\n", + " print(\"Device:\", component_tensor.device, \",\", component_tensor)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "T7FtZ9kQRZgE" + }, + "source": [ + "You can inspect the component tensors of the created DTensor and verify they are indeed sharded according to your scheme. It may be helpful to illustrate the situation with a chart:\n", + "\n", + " \"A\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "auAkA38XjL-q" + }, + "source": [ + "#### Tensor.numpy() and sharded DTensor\n", + "\n", + "Be aware that calling the `.numpy()` method on a sharded DTensor raises an error. The rationale for erroring is to protect against unintended gathering of data from multiple computing devices to the host CPU device backing the returned NumPy array:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hNdwmnL0jAXS" + }, + "outputs": [], + "source": [ + "print(fully_replicated_dtensor.numpy())\n", + "\n", + "try:\n", + " fully_sharded_dtensor.numpy()\n", + "except tf.errors.UnimplementedError:\n", + " print(\"got an error as expected for fully_sharded_dtensor\")\n", + "\n", + "try:\n", + " hybrid_sharded_dtensor.numpy()\n", + "except tf.errors.UnimplementedError:\n", + " print(\"got an error as expected for hybrid_sharded_dtensor\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8WcMkiagPF_6" + }, + "source": [ + "## TensorFlow API on DTensor\n", + "\n", + "DTensor strives to be a drop-in replacement for tensor in your program. The TensorFlow Python API that consume `tf.Tensor`, such as the Ops library functions, `tf.function`, `tf.GradientTape`, also work with DTensor.\n", + "\n", + "To accomplish this, for each [TensorFlow Graph](https://www.tensorflow.org/guide/intro_to_graphs), DTensor produces and executes an equivalent [SPMD](https://en.wikipedia.org/wiki/SPMD) graph in a procedure called *SPMD expansion*. A few critical steps in DTensor SPMD expansion are:\n", + "\n", + " - Propagating the sharding `Layout` of DTensor in the TensorFlow graph\n", + " - Rewriting TensorFlow Ops on the global DTensor with equivalent TensorFlow Ops on the component tensors, inserting collective and communication Ops when necessary\n", + " - Lowering backend neutral TensorFlow Ops to backend specific TensorFlow Ops.\n", + "\n", + "The final result is that **DTensor is a drop-in replacement for Tensor**.\n", + "\n", + "Note: DTensor is still an experimental API which means you will be exploring and pushing the boundaries and limits of the DTensor programming model.\n", + "\n", + "There are 2 ways of triggering DTensor execution:\n", + "\n", + " - DTensor as operands of a Python function, such as `tf.matmul(a, b)`, will run through DTensor if `a`, `b`, or both are DTensors.\n", + " - Requesting the result of a Python function to be a DTensor, such as `dtensor.call_with_layout(tf.ones, layout, shape=(3, 2))`, will run through DTensor because we requested the output of `tf.ones` to be sharded according to a `layout`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "urKzmqAoPssT" + }, + "source": [ + "### DTensor as operands\n", + "\n", + "Many TensorFlow API functions take `tf.Tensor` as their operands, and returns `tf.Tensor` as their results. For these functions, you can express intention to run a function through DTensor by passing in DTensor as operands. This section uses `tf.matmul(a, b)` as an example." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7LO8ZT7iWVga" + }, + "source": [ + "#### Fully replicated input and output\n", + "\n", + "In this case, the DTensors are fully replicated. On each of the devices of the `Mesh`,\n", + " - the component tensor for operand `a` is `[[1, 2, 3], [4, 5, 6]]` (2x3)\n", + " - the component tensor for operand `b` is `[[6, 5], [4, 3], [2, 1]]` (3x2)\n", + " - the computation consists of a single `MatMul` of `(2x3, 3x2) -> 2x2`,\n", + " - the component tensor for result `c` is `[[20, 14], [56,41]]` (2x2)\n", + "\n", + "Total number of floating point mul operations is `6 device * 4 result * 3 mul = 72`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TiZf2J9JNd2D" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"x\", 6)], devices=DEVICES)\n", + "layout = dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh)\n", + "a = dtensor_from_array([[1, 2, 3], [4, 5, 6]], layout=layout)\n", + "b = dtensor_from_array([[6, 5], [4, 3], [2, 1]], layout=layout)\n", + "\n", + "c = tf.matmul(a, b) # runs 6 identical matmuls in parallel on 6 devices\n", + "\n", + "# `c` is a DTensor replicated on all devices (same as `a` and `b`)\n", + "print('Sharding spec:', dtensor.fetch_layout(c).sharding_specs)\n", + "print(\"components:\")\n", + "for component_tensor in dtensor.unpack(c):\n", + " print(component_tensor.device, component_tensor.numpy())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QXtR9qgKWgWV" + }, + "source": [ + "#### Sharding operands along the contracted axis\n", + "\n", + "You can reduce the amount of computation per device by sharding the operands `a` and `b`. A popular sharding scheme for `tf.matmul` is to shard the operands along the axis of the contraction, which means sharding `a` along the second axis, and `b` along the first axis.\n", + "\n", + "The global matrix product sharded under this scheme can be performed efficiently, by local matmuls that runs concurrently, followed by a collective reduction to aggregate the local results. This is also the [canonical way](https://github.com/open-mpi/ompi/blob/ee87ec391f48512d3718fc7c8b13596403a09056/docs/man-openmpi/man3/MPI_Reduce.3.rst?plain=1#L265) of implementing a distributed matrix dot product.\n", + "\n", + "Total number of floating point mul operations is `6 devices * 4 result * 1 = 24`, a factor of 3 reduction compared to the fully replicated case (72) above. The factor of 3 is due to the sharding along `x` mesh dimension with a size of `3` devices.\n", + "\n", + "The reduction of the number of operations run sequentially is the main mechansism with which synchronuous model parallelism accelerates training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EyVAUvMePbms" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"x\", 3), (\"y\", 2)], devices=DEVICES)\n", + "a_layout = dtensor.Layout([dtensor.UNSHARDED, 'x'], mesh)\n", + "a = dtensor_from_array([[1, 2, 3], [4, 5, 6]], layout=a_layout)\n", + "b_layout = dtensor.Layout(['x', dtensor.UNSHARDED], mesh)\n", + "b = dtensor_from_array([[6, 5], [4, 3], [2, 1]], layout=b_layout)\n", + "\n", + "c = tf.matmul(a, b)\n", + "# `c` is a DTensor replicated on all devices (same as `a` and `b`)\n", + "print('Sharding spec:', dtensor.fetch_layout(c).sharding_specs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IhD8yYgJiCEh" + }, + "source": [ + "#### Additional sharding\n", + "\n", + "You can perform additional sharding on the inputs, and they are appropriately carried over to the results. For example, you can apply additional sharding of operand `a` along its first axis to the `'y'` mesh dimension. The additional sharding will be carried over to the first axis of the result `c`.\n", + "\n", + "Total number of floating point mul operations is `6 devices * 2 result * 1 = 12`, an additional factor of 2 reduction compared to the case (24) above. The factor of 2 is due to the sharding along `y` mesh dimension with a size of `2` devices." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0PYqe0neiOpR" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"x\", 3), (\"y\", 2)], devices=DEVICES)\n", + "\n", + "a_layout = dtensor.Layout(['y', 'x'], mesh)\n", + "a = dtensor_from_array([[1, 2, 3], [4, 5, 6]], layout=a_layout)\n", + "b_layout = dtensor.Layout(['x', dtensor.UNSHARDED], mesh)\n", + "b = dtensor_from_array([[6, 5], [4, 3], [2, 1]], layout=b_layout)\n", + "\n", + "c = tf.matmul(a, b)\n", + "# The sharding of `a` on the first axis is carried to `c'\n", + "print('Sharding spec:', dtensor.fetch_layout(c).sharding_specs)\n", + "print(\"components:\")\n", + "for component_tensor in dtensor.unpack(c):\n", + " print(component_tensor.device, component_tensor.numpy())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "c-1NazCVmLWZ" + }, + "source": [ + "### DTensor as output\n", + "\n", + "What about Python functions that do not take operands, but returns a Tensor result that can be sharded? Examples of such functions are:\n", + "\n", + " - `tf.ones`, `tf.zeros`, `tf.random.stateless_normal`\n", + "\n", + "For these Python functions, DTensor provides `dtensor.call_with_layout` which eagerly executes a Python function with DTensor, and ensures that the returned Tensor is a DTensor with the requested `Layout`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "J0jo_8NPtJiO" + }, + "outputs": [], + "source": [ + "help(dtensor.call_with_layout)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V-YdLvfytM7g" + }, + "source": [ + "The eagerly executed Python function usually only contain a single non-trivial TensorFlow Op.\n", + "\n", + "To use a Python function that emits multiple TensorFlow Ops with `dtensor.call_with_layout`, the function should be converted to a `tf.function`. Calling a `tf.function` is a single TensorFlow Op. When the `tf.function` is called, DTensor can perform layout propagation when it analyzes the computing graph of the `tf.function`, before any of the intermediate tensors are materialized." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DLrksgFjqRLS" + }, + "source": [ + "#### APIs that emit a single TensorFlow Op\n", + "\n", + "If a function emits a single TensorFlow Op, you can directly apply `dtensor.call_with_layout` to the function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "G1CuKYSFtFeM" + }, + "outputs": [], + "source": [ + "help(tf.ones)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2m_EAwy-ozOh" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"x\", 3), (\"y\", 2)], devices=DEVICES)\n", + "ones = dtensor.call_with_layout(tf.ones, dtensor.Layout(['x', 'y'], mesh), shape=(6, 4))\n", + "print(ones)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bx-7Xo8Cpb8S" + }, + "source": [ + "#### APIs that emit multiple TensorFlow Ops\n", + "\n", + "If the API emits multiple TensorFlow Ops, convert the function into a single Op through `tf.function`. For example, `tf.random.stateleess_normal`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "H8BQSTRFtCih" + }, + "outputs": [], + "source": [ + "help(tf.random.stateless_normal)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TvP81eYopSPm" + }, + "outputs": [], + "source": [ + "ones = dtensor.call_with_layout(\n", + " tf.function(tf.random.stateless_normal),\n", + " dtensor.Layout(['x', 'y'], mesh),\n", + " shape=(6, 4),\n", + " seed=(1, 1))\n", + "print(ones)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qKoojp9ZyWzW" + }, + "source": [ + "Wrapping a Python function that emits a single TensorFlow Op with `tf.function` is allowed. The only caveat is paying the associated cost and complexity of creating a `tf.function` from a Python function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LbAtKrSkpOaq" + }, + "outputs": [], + "source": [ + "ones = dtensor.call_with_layout(\n", + " tf.function(tf.ones),\n", + " dtensor.Layout(['x', 'y'], mesh),\n", + " shape=(6, 4))\n", + "print(ones)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D-m1816JP3CE" + }, + "source": [ + "### From `tf.Variable` to `dtensor.DVariable`\n", + "\n", + "In Tensorflow, `tf.Variable` is the holder for a mutable `Tensor` value.\n", + "With DTensor, the corresponding variable semantics is provided by `dtensor.DVariable`.\n", + "\n", + "The reason a new type `DVariable` was introduced for DTensor variable is because DVariables have an additional requirement that the layout cannot change from its initial value." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "awRPuR26P0Sc" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"x\", 6)], devices=DEVICES)\n", + "layout = dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh)\n", + "\n", + "v = dtensor.DVariable(\n", + " initial_value=dtensor.call_with_layout(\n", + " tf.function(tf.random.stateless_normal),\n", + " layout=layout,\n", + " shape=tf.TensorShape([64, 32]),\n", + " seed=[1, 1],\n", + " dtype=tf.float32))\n", + "\n", + "print(v.handle)\n", + "assert layout == dtensor.fetch_layout(v)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Pb9jn473prC_" + }, + "source": [ + "Other than the requirement on matching the `layout`, a `DVariable` behaves the same as a `tf.Variable`. For example, you can add a DVariable to a DTensor,\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "adxFw9wJpqQQ" + }, + "outputs": [], + "source": [ + "a = dtensor.call_with_layout(tf.ones, layout=layout, shape=(64, 32))\n", + "b = v + a # add DVariable and DTensor\n", + "print(b)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QxBdNHWSu-kV" + }, + "source": [ + "You can also assign a DTensor to a DVariable:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oYwfiyw5P94U" + }, + "outputs": [], + "source": [ + "v.assign(a) # assign a DTensor to a DVariable\n", + "print(a)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4fvSk_VUvGnj" + }, + "source": [ + "Attempting to mutate the layout of a `DVariable`, by assigning a DTensor with an incompatible layout produces an error:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3pckUugYP_r-" + }, + "outputs": [], + "source": [ + "# variable's layout is immutable.\n", + "another_mesh = dtensor.create_mesh([(\"x\", 3), (\"y\", 2)], devices=DEVICES)\n", + "b = dtensor.call_with_layout(tf.ones,\n", + " layout=dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], another_mesh),\n", + " shape=(64, 32))\n", + "try:\n", + " v.assign(b)\n", + "except:\n", + " print(\"exception raised\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3LadIcwRvR6f" + }, + "source": [ + "## What's next?\n", + "\n", + "In this colab, you learned about DTensor, an extension to TensorFlow for distributed computing. To try out these concepts in a tutorial, check out [Distributed training with DTensor](../tutorials/distribute/dtensor_ml_tutorial.ipynb)." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "dtensor_overview.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/eager.ipynb b/site/en/guide/eager.ipynb deleted file mode 100644 index 44e2e624d43..00000000000 --- a/site/en/guide/eager.ipynb +++ /dev/null @@ -1,1146 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "CCQY7jpBfMur" - }, - "source": [ - "##### Copyright 2018 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "z6X9omPnfO_h" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2QQJJyDzqGRb" - }, - "source": [ - "# Eager execution\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "B1xdylywqUSX" - }, - "source": [ - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " View on TensorFlow.org\n", - " \n", - " Run in Google Colab\n", - " \n", - " View source on GitHub\n", - " \n", - " Download notebook\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EGjDcGxIqEfX" - }, - "source": [ - "TensorFlow's eager execution is an imperative programming environment that\n", - "evaluates operations immediately, without building graphs: operations return\n", - "concrete values instead of constructing a computational graph to run later. This\n", - "makes it easy to get started with TensorFlow and debug models, and it\n", - "reduces boilerplate as well. To follow along with this guide, run the code\n", - "samples below in an interactive `python` interpreter.\n", - "\n", - "Eager execution is a flexible machine learning platform for research and\n", - "experimentation, providing:\n", - "\n", - "* *An intuitive interface*—Structure your code naturally and use Python data\n", - " structures. Quickly iterate on small models and small data.\n", - "* *Easier debugging*—Call ops directly to inspect running models and test\n", - " changes. Use standard Python debugging tools for immediate error reporting.\n", - "* *Natural control flow*—Use Python control flow instead of graph control\n", - " flow, simplifying the specification of dynamic models.\n", - "\n", - "Eager execution supports most TensorFlow operations and GPU acceleration.\n", - "\n", - "Note: Some models may experience increased overhead with eager execution\n", - "enabled. Performance improvements are ongoing, but please\n", - "[file a bug](https://github.com/tensorflow/tensorflow/issues) if you find a\n", - "problem and share your benchmarks." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RBAeIwOMrYk8" - }, - "source": [ - "## Setup and basic usage" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ByNsp4VqqEfa" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import tensorflow as tf\n", - "\n", - "import cProfile" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "48P3-8q4qEfe" - }, - "source": [ - "In Tensorflow 2.0, eager execution is enabled by default." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7aFsD8csqEff" - }, - "outputs": [], - "source": [ - "tf.executing_eagerly()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "x_G1zZT5qEfh" - }, - "source": [ - "Now you can run TensorFlow operations and the results will return immediately:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9gsI54pbqEfj" - }, - "outputs": [], - "source": [ - "x = [[2.]]\n", - "m = tf.matmul(x, x)\n", - "print(\"hello, {}\".format(m))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ajFn6qsdqEfl" - }, - "source": [ - "Enabling eager execution changes how TensorFlow operations behave—now they\n", - "immediately evaluate and return their values to Python. `tf.Tensor` objects\n", - "reference concrete values instead of symbolic handles to nodes in a computational\n", - "graph. Since there isn't a computational graph to build and run later in a\n", - "session, it's easy to inspect results using `print()` or a debugger. Evaluating,\n", - "printing, and checking tensor values does not break the flow for computing\n", - "gradients.\n", - "\n", - "Eager execution works nicely with [NumPy](http://www.numpy.org/). NumPy\n", - "operations accept `tf.Tensor` arguments. The TensorFlow\n", - "`tf.math` operations convert\n", - "Python objects and NumPy arrays to `tf.Tensor` objects. The\n", - "`tf.Tensor.numpy` method returns the object's value as a NumPy `ndarray`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "sTO0_5TYqz1n" - }, - "outputs": [], - "source": [ - "a = tf.constant([[1, 2],\n", - " [3, 4]])\n", - "print(a)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Dp14YT8Gq4r1" - }, - "outputs": [], - "source": [ - "# Broadcasting support\n", - "b = tf.add(a, 1)\n", - "print(b)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "69p3waMfq8cQ" - }, - "outputs": [], - "source": [ - "# Operator overloading is supported\n", - "print(a * b)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Ui025t1qqEfm" - }, - "outputs": [], - "source": [ - "# Use NumPy values\n", - "import numpy as np\n", - "\n", - "c = np.multiply(a, b)\n", - "print(c)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Tq_aFRzWrCua" - }, - "outputs": [], - "source": [ - "# Obtain numpy value from a tensor:\n", - "print(a.numpy())\n", - "# => [[1 2]\n", - "# [3 4]]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "H08f9ss9qEft" - }, - "source": [ - "## Dynamic control flow\n", - "\n", - "A major benefit of eager execution is that all the functionality of the host\n", - "language is available while your model is executing. So, for example,\n", - "it is easy to write [fizzbuzz](https://en.wikipedia.org/wiki/Fizz_buzz):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0fudRMeUqEfu" - }, - "outputs": [], - "source": [ - "def fizzbuzz(max_num):\n", - " counter = tf.constant(0)\n", - " max_num = tf.convert_to_tensor(max_num)\n", - " for num in range(1, max_num.numpy()+1):\n", - " num = tf.constant(num)\n", - " if int(num % 3) == 0 and int(num % 5) == 0:\n", - " print('FizzBuzz')\n", - " elif int(num % 3) == 0:\n", - " print('Fizz')\n", - " elif int(num % 5) == 0:\n", - " print('Buzz')\n", - " else:\n", - " print(num.numpy())\n", - " counter += 1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "P2cKknQWrJLB" - }, - "outputs": [], - "source": [ - "fizzbuzz(15)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7kA-aC3BqEfy" - }, - "source": [ - "This has conditionals that depend on tensor values and it prints these values\n", - "at runtime." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8huKpuuAwICq" - }, - "source": [ - "## Eager training" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mp2lCCZYrxHd" - }, - "source": [ - "### Computing gradients\n", - "\n", - "[Automatic differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation)\n", - "is useful for implementing machine learning algorithms such as\n", - "[backpropagation](https://en.wikipedia.org/wiki/Backpropagation) for training\n", - "neural networks. During eager execution, use `tf.GradientTape` to trace\n", - "operations for computing gradients later.\n", - "\n", - "You can use `tf.GradientTape` to train and/or compute gradients in eager. It is especially useful for complicated training loops. \n", - "\n", - "Since different operations can occur during each call, all\n", - "forward-pass operations get recorded to a \"tape\". To compute the gradient, play\n", - "the tape backwards and then discard. A particular `tf.GradientTape` can only\n", - "compute one gradient; subsequent calls throw a runtime error." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7g1yWiSXqEf-" - }, - "outputs": [], - "source": [ - "w = tf.Variable([[1.0]])\n", - "with tf.GradientTape() as tape:\n", - " loss = w * w\n", - "\n", - "grad = tape.gradient(loss, w)\n", - "print(grad) # => tf.Tensor([[ 2.]], shape=(1, 1), dtype=float32)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vkHs32GqweYS" - }, - "source": [ - "### Train a model\n", - "\n", - "The following example creates a multi-layer model that classifies the standard\n", - "MNIST handwritten digits. It demonstrates the optimizer and layer APIs to build\n", - "trainable graphs in an eager execution environment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "38kymXZowhhz" - }, - "outputs": [], - "source": [ - "# Fetch and format the mnist data\n", - "(mnist_images, mnist_labels), _ = tf.keras.datasets.mnist.load_data()\n", - "\n", - "dataset = tf.data.Dataset.from_tensor_slices(\n", - " (tf.cast(mnist_images[...,tf.newaxis]/255, tf.float32),\n", - " tf.cast(mnist_labels,tf.int64)))\n", - "dataset = dataset.shuffle(1000).batch(32)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rl1K8rOowmwT" - }, - "outputs": [], - "source": [ - "# Build the model\n", - "mnist_model = tf.keras.Sequential([\n", - " tf.keras.layers.Conv2D(16,[3,3], activation='relu',\n", - " input_shape=(None, None, 1)),\n", - " tf.keras.layers.Conv2D(16,[3,3], activation='relu'),\n", - " tf.keras.layers.GlobalAveragePooling2D(),\n", - " tf.keras.layers.Dense(10)\n", - "])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fvyk-HgGwxwl" - }, - "source": [ - "Even without training, call the model and inspect the output in eager execution:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "BsxystjBwxLS" - }, - "outputs": [], - "source": [ - "for images,labels in dataset.take(1):\n", - " print(\"Logits: \", mnist_model(images[0:1]).numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Y3PGa8G7qEgB" - }, - "source": [ - "While keras models have a builtin training loop (using the `fit` method), sometimes you need more customization. Here's an example, of a training loop implemented with eager:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "bzRhM7JDnaEG" - }, - "outputs": [], - "source": [ - "optimizer = tf.keras.optimizers.Adam()\n", - "loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n", - "\n", - "loss_history = []" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tXaupYXRI2YM" - }, - "source": [ - "Note: Use the assert functions in `tf.debugging` to check if a condition holds up. This works in eager and graph execution." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "DDHrigtiCIA4" - }, - "outputs": [], - "source": [ - "def train_step(images, labels):\n", - " with tf.GradientTape() as tape:\n", - " logits = mnist_model(images, training=True)\n", - " \n", - " # Add asserts to check the shape of the output.\n", - " tf.debugging.assert_equal(logits.shape, (32, 10))\n", - " \n", - " loss_value = loss_object(labels, logits)\n", - "\n", - " loss_history.append(loss_value.numpy().mean())\n", - " grads = tape.gradient(loss_value, mnist_model.trainable_variables)\n", - " optimizer.apply_gradients(zip(grads, mnist_model.trainable_variables))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0m1xAXrmqEgJ" - }, - "outputs": [], - "source": [ - "def train(epochs):\n", - " for epoch in range(epochs):\n", - " for (batch, (images, labels)) in enumerate(dataset):\n", - " train_step(images, labels)\n", - " print ('Epoch {} finished'.format(epoch))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "C5dGz0p_nf4W" - }, - "outputs": [], - "source": [ - "train(epochs = 3)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "5vG5ql_2vYB5" - }, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "plt.plot(loss_history)\n", - "plt.xlabel('Batch #')\n", - "plt.ylabel('Loss [entropy]')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kKpOlHPLqEgl" - }, - "source": [ - "### Variables and optimizers\n", - "\n", - "`tf.Variable` objects store mutable `tf.Tensor`-like values accessed during\n", - "training to make automatic differentiation easier. \n", - "\n", - "The collections of variables can be encapsulated into layers or models, along with methods that operate on them. See [Custom Keras layers and models](./keras/custom_layers_and_models.ipynb) for details. The main difference between layers and models is that models add methods like `Model.fit`, `Model.evaluate`, and `Model.save`.\n", - "\n", - "For example, the automatic differentiation example above\n", - "can be rewritten:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "2qXcPngYk8dN" - }, - "outputs": [], - "source": [ - "class Linear(tf.keras.Model):\n", - " def __init__(self):\n", - " super(Linear, self).__init__()\n", - " self.W = tf.Variable(5., name='weight')\n", - " self.B = tf.Variable(10., name='bias')\n", - " def call(self, inputs):\n", - " return inputs * self.W + self.B" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "nnQLBYmEqEgm" - }, - "outputs": [], - "source": [ - "# A toy dataset of points around 3 * x + 2\n", - "NUM_EXAMPLES = 2000\n", - "training_inputs = tf.random.normal([NUM_EXAMPLES])\n", - "noise = tf.random.normal([NUM_EXAMPLES])\n", - "training_outputs = training_inputs * 3 + 2 + noise\n", - "\n", - "# The loss function to be optimized\n", - "def loss(model, inputs, targets):\n", - " error = model(inputs) - targets\n", - " return tf.reduce_mean(tf.square(error))\n", - "\n", - "def grad(model, inputs, targets):\n", - " with tf.GradientTape() as tape:\n", - " loss_value = loss(model, inputs, targets)\n", - " return tape.gradient(loss_value, [model.W, model.B])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Q7x1CDurl3IG" - }, - "source": [ - "Next:\n", - "\n", - "1. Create the model.\n", - "2. The Derivatives of a loss function with respect to model parameters.\n", - "3. A strategy for updating the variables based on the derivatives." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "SbXJk0f2lztg" - }, - "outputs": [], - "source": [ - "model = Linear()\n", - "optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)\n", - "\n", - "print(\"Initial loss: {:.3f}\".format(loss(model, training_inputs, training_outputs)))\n", - "\n", - "steps = 300\n", - "for i in range(steps):\n", - " grads = grad(model, training_inputs, training_outputs)\n", - " optimizer.apply_gradients(zip(grads, [model.W, model.B]))\n", - " if i % 20 == 0:\n", - " print(\"Loss at step {:03d}: {:.3f}\".format(i, loss(model, training_inputs, training_outputs)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "PV_dqer7pzSH" - }, - "outputs": [], - "source": [ - "print(\"Final loss: {:.3f}\".format(loss(model, training_inputs, training_outputs)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rvt_Wj3Tp0hm" - }, - "outputs": [], - "source": [ - "print(\"W = {}, B = {}\".format(model.W.numpy(), model.B.numpy()))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rPjb8nRWqEgr" - }, - "source": [ - "Note: Variables persist until the last reference to the python object\n", - "is removed, and is the variable is deleted." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "scMjg6L6qEgv" - }, - "source": [ - "### Object-based saving\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Y-0ZcCcjwkux" - }, - "source": [ - "A `tf.keras.Model` includes a convenient `save_weights` method allowing you to easily create a checkpoint: " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "oJrMX94PwD9s" - }, - "outputs": [], - "source": [ - "model.save_weights('weights')\n", - "status = model.load_weights('weights')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2EfTjWV_wEng" - }, - "source": [ - "Using `tf.train.Checkpoint` you can take full control over this process.\n", - "\n", - "This section is an abbreviated version of the [guide to training checkpoints](./checkpoint.ipynb).\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7z5xRfdHzZOQ" - }, - "outputs": [], - "source": [ - "x = tf.Variable(10.)\n", - "checkpoint = tf.train.Checkpoint(x=x)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "IffrUVG7zyVb" - }, - "outputs": [], - "source": [ - "x.assign(2.) # Assign a new value to the variables and save.\n", - "checkpoint_path = './ckpt/'\n", - "checkpoint.save(checkpoint_path)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "eMT9koCoqEgw" - }, - "outputs": [], - "source": [ - "x.assign(11.) # Change the variable after saving.\n", - "\n", - "# Restore values from the checkpoint\n", - "checkpoint.restore(tf.train.latest_checkpoint(checkpoint_path))\n", - "\n", - "print(x) # => 2.0" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vbFnP-yLqEgx" - }, - "source": [ - "To save and load models, `tf.train.Checkpoint` stores the internal state of objects,\n", - "without requiring hidden variables. To record the state of a `model`,\n", - "an `optimizer`, and a global step, pass them to a `tf.train.Checkpoint`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "hWZHyAXMqEg0" - }, - "outputs": [], - "source": [ - "model = tf.keras.Sequential([\n", - " tf.keras.layers.Conv2D(16,[3,3], activation='relu'),\n", - " tf.keras.layers.GlobalAveragePooling2D(),\n", - " tf.keras.layers.Dense(10)\n", - "])\n", - "optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)\n", - "checkpoint_dir = 'path/to/model_dir'\n", - "if not os.path.exists(checkpoint_dir):\n", - " os.makedirs(checkpoint_dir)\n", - "checkpoint_prefix = os.path.join(checkpoint_dir, \"ckpt\")\n", - "root = tf.train.Checkpoint(optimizer=optimizer,\n", - " model=model)\n", - "\n", - "root.save(checkpoint_prefix)\n", - "root.restore(tf.train.latest_checkpoint(checkpoint_dir))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "R-ITwkBCF6GJ" - }, - "source": [ - "Note: In many training loops, variables are created after `tf.train.Checkpoint.restore` is called. These variables will be restored as soon as they are created, and assertions are available to ensure that a checkpoint has been fully loaded. See the [guide to training checkpoints](./checkpoint.ipynb) for details." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3yoD0VJ7qEg3" - }, - "source": [ - "### Object-oriented metrics\n", - "\n", - "`tf.keras.metrics` are stored as objects. Update a metric by passing the new data to\n", - "the callable, and retrieve the result using the `tf.keras.metrics.result` method,\n", - "for example:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9ccu0iAaqEg5" - }, - "outputs": [], - "source": [ - "m = tf.keras.metrics.Mean(\"loss\")\n", - "m(0)\n", - "m(5)\n", - "m.result() # => 2.5\n", - "m([8, 9])\n", - "m.result() # => 5.5" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aB8qWtT955pI" - }, - "source": [ - "### Summaries and TensorBoard\n", - "\n", - "[TensorBoard](https://tensorflow.org/tensorboard) is a visualization tool for\n", - "understanding, debugging and optimizing the model training process. It uses\n", - "summary events that are written while executing the program.\n", - "\n", - "You can use `tf.summary` to record summaries of variable in eager execution.\n", - "For example, to record summaries of `loss` once every 100 training steps:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "z6VInqhA6RH4" - }, - "outputs": [], - "source": [ - "logdir = \"./tb/\"\n", - "writer = tf.summary.create_file_writer(logdir)\n", - "\n", - "steps = 1000\n", - "with writer.as_default(): # or call writer.set_as_default() before the loop.\n", - " for i in range(steps):\n", - " step = i + 1\n", - " # Calculate loss with your real train function.\n", - " loss = 1 - 0.001 * step\n", - " if step % 100 == 0:\n", - " tf.summary.scalar('loss', loss, step=step)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "08QQD2j36TaI" - }, - "outputs": [], - "source": [ - "!ls tb/" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xEL4yJe5qEhD" - }, - "source": [ - "## Advanced automatic differentiation topics\n", - "\n", - "### Dynamic models\n", - "\n", - "`tf.GradientTape` can also be used in dynamic models. This example for a\n", - "[backtracking line search](https://wikipedia.org/wiki/Backtracking_line_search)\n", - "algorithm looks like normal NumPy code, except there are gradients and is\n", - "differentiable, despite the complex control flow:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "L518n5dkqEhE" - }, - "outputs": [], - "source": [ - "def line_search_step(fn, init_x, rate=1.0):\n", - " with tf.GradientTape() as tape:\n", - " # Variables are automatically tracked.\n", - " # But to calculate a gradient from a tensor, you must `watch` it.\n", - " tape.watch(init_x)\n", - " value = fn(init_x)\n", - " grad = tape.gradient(value, init_x)\n", - " grad_norm = tf.reduce_sum(grad * grad)\n", - " init_value = value\n", - " while value > init_value - rate * grad_norm:\n", - " x = init_x - rate * grad\n", - " value = fn(x)\n", - " rate /= 2.0\n", - " return x, value" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "gieGOf_DqEhK" - }, - "source": [ - "### Custom gradients\n", - "\n", - "Custom gradients are an easy way to override gradients. Within the forward function, define the gradient with respect to the\n", - "inputs, outputs, or intermediate results. For example, here's an easy way to clip\n", - "the norm of the gradients in the backward pass:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "-OwwsWUAqEhK" - }, - "outputs": [], - "source": [ - "@tf.custom_gradient\n", - "def clip_gradient_by_norm(x, norm):\n", - " y = tf.identity(x)\n", - " def grad_fn(dresult):\n", - " return [tf.clip_by_norm(dresult, norm), None]\n", - " return y, grad_fn" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JPLDHkF_qEhN" - }, - "source": [ - "Custom gradients are commonly used to provide a numerically stable gradient for a\n", - "sequence of operations:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "24WiLROnqEhO" - }, - "outputs": [], - "source": [ - "def log1pexp(x):\n", - " return tf.math.log(1 + tf.exp(x))\n", - "\n", - "def grad_log1pexp(x):\n", - " with tf.GradientTape() as tape:\n", - " tape.watch(x)\n", - " value = log1pexp(x)\n", - " return tape.gradient(value, x)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "n8fq69r9-B-c" - }, - "outputs": [], - "source": [ - "# The gradient computation works fine at x = 0.\n", - "grad_log1pexp(tf.constant(0.)).numpy()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_VFSU0mG-FSp" - }, - "outputs": [], - "source": [ - "# However, x = 100 fails because of numerical instability.\n", - "grad_log1pexp(tf.constant(100.)).numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-VcTR34rqEhQ" - }, - "source": [ - "Here, the `log1pexp` function can be analytically simplified with a custom\n", - "gradient. The implementation below reuses the value for `tf.exp(x)` that is\n", - "computed during the forward pass—making it more efficient by eliminating\n", - "redundant calculations:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Q7nvfx_-qEhS" - }, - "outputs": [], - "source": [ - "@tf.custom_gradient\n", - "def log1pexp(x):\n", - " e = tf.exp(x)\n", - " def grad(dy):\n", - " return dy * (1 - 1 / (1 + e))\n", - " return tf.math.log(1 + e), grad\n", - "\n", - "def grad_log1pexp(x):\n", - " with tf.GradientTape() as tape:\n", - " tape.watch(x)\n", - " value = log1pexp(x)\n", - " return tape.gradient(value, x)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "5gHPKMfl-Kge" - }, - "outputs": [], - "source": [ - "# As before, the gradient computation works fine at x = 0.\n", - "grad_log1pexp(tf.constant(0.)).numpy()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "u38MOfz3-MDE" - }, - "outputs": [], - "source": [ - "# And the gradient computation also works at x = 100.\n", - "grad_log1pexp(tf.constant(100.)).numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rnZXjfQzqEhV" - }, - "source": [ - "## Performance\n", - "\n", - "Computation is automatically offloaded to GPUs during eager execution. If you\n", - "want control over where a computation runs you can enclose it in a\n", - "`tf.device('/gpu:0')` block (or the CPU equivalent):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Ac9Y64H-qEhX" - }, - "outputs": [], - "source": [ - "import time\n", - "\n", - "def measure(x, steps):\n", - " # TensorFlow initializes a GPU the first time it's used, exclude from timing.\n", - " tf.matmul(x, x)\n", - " start = time.time()\n", - " for i in range(steps):\n", - " x = tf.matmul(x, x)\n", - " # tf.matmul can return before completing the matrix multiplication\n", - " # (e.g., can return after enqueing the operation on a CUDA stream).\n", - " # The x.numpy() call below will ensure that all enqueued operations\n", - " # have completed (and will also copy the result to host memory,\n", - " # so we're including a little more than just the matmul operation\n", - " # time).\n", - " _ = x.numpy()\n", - " end = time.time()\n", - " return end - start\n", - "\n", - "shape = (1000, 1000)\n", - "steps = 200\n", - "print(\"Time to multiply a {} matrix by itself {} times:\".format(shape, steps))\n", - "\n", - "# Run on CPU:\n", - "with tf.device(\"/cpu:0\"):\n", - " print(\"CPU: {} secs\".format(measure(tf.random.normal(shape), steps)))\n", - "\n", - "# Run on GPU, if available:\n", - "if tf.config.list_physical_devices(\"GPU\"):\n", - " with tf.device(\"/gpu:0\"):\n", - " print(\"GPU: {} secs\".format(measure(tf.random.normal(shape), steps)))\n", - "else:\n", - " print(\"GPU: not found\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RLw3IS7UqEhe" - }, - "source": [ - "A `tf.Tensor` object can be copied to a different device to execute its\n", - "operations:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ny6LX2BVqEhf" - }, - "outputs": [], - "source": [ - "if tf.config.list_physical_devices(\"GPU\"):\n", - " x = tf.random.normal([10, 10])\n", - "\n", - " x_gpu0 = x.gpu()\n", - " x_cpu = x.cpu()\n", - "\n", - " _ = tf.matmul(x_cpu, x_cpu) # Runs on CPU\n", - " _ = tf.matmul(x_gpu0, x_gpu0) # Runs on GPU:0" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "oA_qaII3-p6c" - }, - "source": [ - "### Benchmarks\n", - "\n", - "For compute-heavy models, such as\n", - "[ResNet50](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/eager/benchmarks/resnet50)\n", - "training on a GPU, eager execution performance is comparable to `tf.function` execution.\n", - "But this gap grows larger for models with less computation and there is work to\n", - "be done for optimizing hot code paths for models with lots of small operations.\n", - "\n", - "## Work with functions\n", - "\n", - "While eager execution makes development and debugging more interactive,\n", - "TensorFlow 1.x style graph execution has advantages for distributed training, performance\n", - "optimizations, and production deployment. To bridge this gap, TensorFlow 2.0 introduces `function`s via the `tf.function` API. For more information, see the [tf.function](./function.ipynb) guide." - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "eager.ipynb", - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/site/en/guide/effective_tf2.ipynb b/site/en/guide/effective_tf2.ipynb index 59f8fab1e3d..f4204c0971f 100644 --- a/site/en/guide/effective_tf2.ipynb +++ b/site/en/guide/effective_tf2.ipynb @@ -70,7 +70,7 @@ "source": [ "## Overview\n", "\n", - "This guide provides a list of best practices for writing code using TensorFlow 2 (TF2). Refer to the [migrate section of the guide](https://tensorflow.org/guide/migrate) for more info on migrating your TF1.x code to TF2." + "This guide provides a list of best practices for writing code using TensorFlow 2 (TF2), it is written for users who have recently switched over from TensorFlow 1 (TF1). Refer to the [migrate section of the guide](https://tensorflow.org/guide/migrate) for more info on migrating your TF1 code to TF2." ] }, { @@ -751,8 +751,7 @@ "\n", "* When using `tf.keras.Model.compile`, set `run_eagerly = True` to disable the `Model` logic from being wrapped in a `tf.function`.\n", "\n", - "* Use `tf.data.experimental.enable_debug_mode` to enable the debug mode for `tf.data`. Read the [API docs](https://www.tensorflow.org/api_docs/python/tf/data/experimental/enable_debug_mode) for more details.\n", - "\n" + "* Use `tf.data.experimental.enable_debug_mode` to enable the debug mode for `tf.data`. Read the [API docs](https://www.tensorflow.org/api_docs/python/tf/data/experimental/enable_debug_mode) for more details.\n" ] }, { @@ -786,7 +785,6 @@ "colab": { "collapsed_sections": [], "name": "effective_tf2.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/estimator.ipynb b/site/en/guide/estimator.ipynb index e0ae0a3792f..05e8fb4012a 100644 --- a/site/en/guide/estimator.ipynb +++ b/site/en/guide/estimator.ipynb @@ -68,7 +68,7 @@ "id": "rILQuAiiRlI7" }, "source": [ - "> Warning: Estimators are not recommended for new code. Estimators run `v1.Session`-style code which is more difficult to write correctly, and can behave unexpectedly, especially when combined with TF 2 code. Estimators do fall under our [compatibility guarantees](https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities. See the [migration guide](https://tensorflow.org/guide/migrate) for details." + "> Warning: TensorFlow 2.15 included the final release of the `tf-estimator` package. Estimators will not be available in TensorFlow 2.16 or after. See the [migration guide](https://www.tensorflow.org/guide/migrate/migrating_estimator) for more information about how to convert off of Estimators." ] }, { @@ -869,7 +869,6 @@ "A_lvUsSLZzVg" ], "name": "estimator.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/extension_type.ipynb b/site/en/guide/extension_type.ipynb new file mode 100644 index 00000000000..7e8edeea7c9 --- /dev/null +++ b/site/en/guide/extension_type.ipynb @@ -0,0 +1,2130 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "WrcIOXsUQh8U" + }, + "source": [ + "##### Copyright 2021 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "tXAbWHtqs1Y2" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HTgMAvQq-PU_" + }, + "source": [ + "# Extension types\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jHcw9MtgBo7e" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0MsE_F0WBpmc" + }, + "outputs": [], + "source": [ + "!pip install -q tf_nightly\n", + "import tensorflow as tf\n", + "import numpy as np\n", + "from typing import Tuple, List, Mapping, Union, Optional\n", + "import tempfile" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1BAk3bji_0wl" + }, + "source": [ + "## Extension types\n", + "\n", + "User-defined types can make projects more readable, modular, maintainable. However, most TensorFlow APIs have very limited support for user-defined Python types. This includes both high-level APIs (such as [Keras](https://www.tensorflow.org/guide/keras/overview), [tf.function](https://www.tensorflow.org/guide/function), [`tf.SavedModel`](https://www.tensorflow.org/guide/saved_model)) and lower-level APIs (such as `tf.while_loop` and `tf.concat`). TensorFlow **extension types** can be used to create user-defined object-oriented types that work seamlessly with TensorFlow's APIs. To create an extension type, simply define a Python class with `tf.experimental.ExtensionType` as its base, and use [type annotations](https://www.python.org/dev/peps/pep-0484/) to specify the type for each field." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7o5KY7L5_nxy" + }, + "outputs": [], + "source": [ + "class TensorGraph(tf.experimental.ExtensionType):\n", + " \"\"\"A collection of labeled nodes connected by weighted edges.\"\"\"\n", + " edge_weights: tf.Tensor # shape=[num_nodes, num_nodes]\n", + " node_labels: Mapping[str, tf.Tensor] # shape=[num_nodes]; dtype=any\n", + "\n", + "class MaskedTensor(tf.experimental.ExtensionType):\n", + " \"\"\"A tensor paired with a boolean mask, indicating which values are valid.\"\"\"\n", + " values: tf.Tensor\n", + " mask: tf.Tensor # shape=values.shape; false for missing/invalid values.\n", + "\n", + "class CSRSparseMatrix(tf.experimental.ExtensionType):\n", + " \"\"\"Compressed sparse row matrix (https://en.wikipedia.org/wiki/Sparse_matrix).\"\"\"\n", + " values: tf.Tensor # shape=[num_nonzero]; dtype=any\n", + " col_index: tf.Tensor # shape=[num_nonzero]; dtype=int64\n", + " row_index: tf.Tensor # shape=[num_rows+1]; dtype=int64" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FiaNXPa7pNK-" + }, + "source": [ + "The `tf.experimental.ExtensionType` base class works similarly to [`typing.NamedTuple`](https://docs.python.org/3/library/typing.html#typing.NamedTuple) and [`@dataclasses.dataclass`](https://docs.python.org/3/library/dataclasses.html#dataclasses.dataclass) from the standard Python library. In particular, it automatically adds a constructor and special methods (such as `__repr__` and `__eq__`) based on the field type annotations." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JsE7X6_uMyLo" + }, + "source": [ + "Typically, extension types tend to fall into one of two categories:\n", + "\n", + "* ***Data structures***, which group together a collection of related values, and can provide useful operations based on those values. Data structures may be fairly general (such as the `TensorGraph` example above); or they may be highly customized to a specific model.\n", + "\n", + "* ***Tensor-like types***, which specialize or extend the concept of \"Tensor.\" Types in this category have a `rank`, a `shape`, and usually a `dtype`; and it makes sense to use them with Tensor operations (such as `tf.stack`, `tf.add`, or `tf.matmul`). `MaskedTensor` and `CSRSparseMatrix` are examples of tensor-like types." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uxngcajlMqIY" + }, + "source": [ + "## Supported APIs\n", + "\n", + "Extension types are supported by the following TensorFlow APIs:\n", + "\n", + "* **Keras**: Extension types can be used as inputs and outputs for Keras `Models` and `Layers`.\n", + "* **`tf.data.Dataset`**: Extension types can be included in `Datasets`, and returned by dataset `Iterators`.\n", + "* **TensorFlow Hub**: Extension types can be used as inputs and outputs for `tf.hub` modules.\n", + "* **SavedModel**: Extension types can be used as inputs and outputs for `SavedModel` functions.\n", + "* **`tf.function`**: Extension types can be used as arguments and return values for functions wrapped with the `@tf.function` decorator.\n", + "* **While loops**: Extension types can be used as loop variables in `tf.while_loop`, and can be used as arguments and return values for the while-loop's body.\n", + "* **Conditionals**: Extension types can be conditionally selected using `tf.cond` and `tf.case`.\n", + "* **`tf.py_function`**: Extension types can be used as arguments and return values for the `func` argument to `tf.py_function`.\n", + "* **Tensor ops**: Extension types can be extended to support most TensorFlow ops that accept Tensor inputs (such as `tf.matmul`, `tf.gather`, and `tf.reduce_sum`). Go to the \"*Dispatch*\" section below for more information.\n", + "* **Distribution strategy**: Extension types can be used as per-replica values.\n", + "\n", + "For more details, see the section on \"TensorFlow APIs that support ExtensionTypes\" below.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VIpZwuPVpwOX" + }, + "source": [ + "## Requirements\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nNk_TQeJGVwV" + }, + "source": [ + "### Field types\n", + "\n", + "All fields—instance variables—must be declared, and a type annotation must be provided for each field. The following type annotations are supported:\n", + "\n", + "Type | Example\n", + "---- | -------\n", + "Python integers | `i: int`\n", + "Python floats | `f: float`\n", + "Python strings | `s: str`\n", + "Python booleans | `b: bool`\n", + "Python `None` | `n: None`\n", + "[Tensor shapes](https://www.tensorflow.org/api_docs/python/tf/TensorShape) | `shape: tf.TensorShape`\n", + "[Tensor `dtype`s](https://www.tensorflow.org/api_docs/python/tf/dtypes/DType) | `dtype: tf.DType`\n", + "[Tensors](https://www.tensorflow.org/api_docs/python/tf/Tensor) | `t: tf.Tensor`\n", + "[Extension types](https://www.tensorflow.org/api_docs/python/tf/experimental/ExtensionType) | `mt: MyMaskedTensor`\n", + "[Ragged tensors](https://www.tensorflow.org/api_docs/python/tf/RaggedTensor) | `rt: tf.RaggedTensor`\n", + "[Sparse tensors](https://www.tensorflow.org/api_docs/python/tf/sparse/SparseTensor) | `st: tf.SparseTensor`\n", + "[Indexed slices](https://www.tensorflow.org/api_docs/python/tf/IndexedSlices) | `s: tf.IndexedSlices`\n", + "[Optional tensors](https://www.tensorflow.org/api_docs/python/tf/experimental/Optional) | `o: tf.experimental.Optional`\n", + "[Type unions](https://docs.python.org/3/library/typing.html#typing.Union) | `int_or_float: typing.Union[int, float]`\n", + "[Tuples](https://docs.python.org/3/library/typing.html#typing.Tuple) | `params: typing.Tuple[int, float, tf.Tensor, int]`\n", + "[Var-length tuples](https://docs.python.org/3/library/typing.html#typing.Tuple) | `lengths: typing.Tuple[int, ...]`\n", + "[Mappings](https://docs.python.org/3/library/typing.html#typing.Mapping) | `tags: typing.Mapping[str, tf.Tensor]`\n", + "[Optional values](https://docs.python.org/3/library/typing.html#typing.Optional) | `weight: typing.Optional[tf.Tensor]`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iFetYyZsIvf6" + }, + "source": [ + "### Mutability\n", + "\n", + "Extension types are required to be immutable. This ensures that they can be properly tracked by TensorFlow's graph-tracing mechanisms.\n", + "If you find yourself wanting to mutate an extension type value, consider instead defining methods that transform values. For example, rather than defining a `set_mask` method to mutate a `MaskedTensor`, you could define a `replace_mask` method that returns a new `MaskedTensor`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DThZLYH2IwFh" + }, + "outputs": [], + "source": [ + "class MaskedTensor(tf.experimental.ExtensionType):\n", + " values: tf.Tensor\n", + " mask: tf.Tensor\n", + "\n", + " def replace_mask(self, new_mask):\n", + " self.values.shape.assert_is_compatible_with(new_mask.shape)\n", + " return MaskedTensor(self.values, new_mask)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x3JyivI_qAtt" + }, + "source": [ + "## Functionality added by `ExtensionType`\n", + "\n", + "The `ExtensionType` base class provides the following functionality:\n", + "\n", + "* A constructor (`__init__`).\n", + "* A printable representation method (`__repr__`).\n", + "* Equality and inequality operators (`__eq__`).\n", + "* A validation method (`__validate__`).\n", + "* Enforced immutability.\n", + "* A nested `TypeSpec`.\n", + "* Tensor API dispatch support.\n", + "\n", + "Go to the \"Customizing `ExtensionType`s\" section below for more information on customizing this functionality." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pfSYs6P26gKq" + }, + "source": [ + "### Constructor\n", + "The constructor added by `ExtensionType` takes each field as a named argument (in the order they were listed in the class definition). This constructor will type-check each parameter, and convert them where necessary. In particular, `Tensor` fields are converted using `tf.convert_to_tensor`; `Tuple` fields are converted to `tuple`s; and `Mapping` fields are converted to immutable dicts." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DiXwyZ5M5KFW" + }, + "outputs": [], + "source": [ + "class MaskedTensor(tf.experimental.ExtensionType):\n", + " values: tf.Tensor\n", + " mask: tf.Tensor\n", + "\n", + "# Constructor takes one parameter for each field.\n", + "mt = MaskedTensor(values=[[1, 2, 3], [4, 5, 6]],\n", + " mask=[[True, True, False], [True, False, True]])\n", + "\n", + "# Fields are type-checked and converted to the declared types.\n", + "# For example, `mt.values` is converted to a Tensor.\n", + "print(mt.values)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ezNDe1cYF0Qb" + }, + "source": [ + "The constructor raises an `TypeError` if a field value can not be converted to its declared type:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6HnrMaabF5VS" + }, + "outputs": [], + "source": [ + "try:\n", + " MaskedTensor([1, 2, 3], None)\n", + "except TypeError as e:\n", + " print(f\"Got expected TypeError: {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FwQUI3X02s20" + }, + "source": [ + "The default value for a field can be specified by setting its value at the class level:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GbzDT9fz20JA" + }, + "outputs": [], + "source": [ + "class Pencil(tf.experimental.ExtensionType):\n", + " color: str = \"black\"\n", + " has_erasor: bool = True\n", + " length: tf.Tensor = 1.0\n", + "\n", + "Pencil()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nOW7lS9P4Foc" + }, + "outputs": [], + "source": [ + "Pencil(length=0.5, color=\"blue\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "S5Eivtg07Aau" + }, + "source": [ + "### Printable representation\n", + "\n", + "`ExtensionType` adds a default printable representation method (`__repr__`) that includes the class name and the value for each field:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5SyiKTe55krG" + }, + "outputs": [], + "source": [ + "print(MaskedTensor(values=[1, 2, 3], mask=[True, True, False]))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "q4l_gnQh6nXR" + }, + "source": [ + "### Equality operators\n", + "\n", + "`ExtensionType` adds default equality operators (`__eq__` and `__ne__`) that consider two values equal if they have the same type and all their fields are equal. Tensor fields are considered equal if they have the same shape and are elementwise equal for all elements." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bHdLg13V52Xm" + }, + "outputs": [], + "source": [ + "a = MaskedTensor([1, 2], [True, False])\n", + "b = MaskedTensor([[3, 4], [5, 6]], [[False, True], [True, True]])\n", + "print(f\"a == a: {a==a}\")\n", + "print(f\"a == b: {a==b}\")\n", + "print(f\"a == a.values: {a==a.values}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O3HqsO3jZlQq" + }, + "source": [ + "**Note:** if any field contains a `Tensor`, then `__eq__` may return a scalar boolean `Tensor` (rather than a Python boolean value)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hCpBfkKqCuip" + }, + "source": [ + "### Validation method\n", + "\n", + "`ExtensionType` adds a `__validate__` method, which can be overridden to perform validation checks on fields. It is run after the constructor is called, and after fields have been type-checked and converted to their declared types, so it can assume that all fields have their declared types.\n", + "\n", + "The following example updates `MaskedTensor` to validate the `shape`s and `dtype`s of its fields:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dgZOJRINDn00" + }, + "outputs": [], + "source": [ + "class MaskedTensor(tf.experimental.ExtensionType):\n", + " \"\"\"A tensor paired with a boolean mask, indicating which values are valid.\"\"\"\n", + " values: tf.Tensor\n", + " mask: tf.Tensor\n", + " def __validate__(self):\n", + " self.values.shape.assert_is_compatible_with(self.mask.shape)\n", + " assert self.mask.dtype.is_bool, 'mask.dtype must be bool'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ajSgkGUUn9WL" + }, + "outputs": [], + "source": [ + "try:\n", + " MaskedTensor([1, 2, 3], [0, 1, 0]) # Wrong `dtype` for mask.\n", + "except AssertionError as e:\n", + " print(f\"Got expected AssertionError: {e}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Fhb96luJn9K7" + }, + "outputs": [], + "source": [ + "try:\n", + " MaskedTensor([1, 2, 3], [True, False]) # shapes don't match.\n", + "except ValueError as e:\n", + " print(f\"Got expected ValueError: {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pjIPAF1OCAdO" + }, + "source": [ + "### Enforced immutability\n", + "\n", + "`ExtensionType` overrides the `__setattr__` and `__delattr__` methods to prevent mutation, ensuring that extension type values are immutable." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NgmJ1C7ilN5C" + }, + "outputs": [], + "source": [ + "mt = MaskedTensor([1, 2, 3], [True, False, True])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cMYmJr3RoFKp" + }, + "outputs": [], + "source": [ + "try:\n", + " mt.mask = [True, True, True]\n", + "except AttributeError as e:\n", + " print(f\"Got expected AttributeError: {e}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZWwA-zWdzqlU" + }, + "outputs": [], + "source": [ + "try:\n", + " mt.mask[0] = False\n", + "except TypeError as e:\n", + " print(f\"Got expected TypeError: {e}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PN_txJVKoFoF" + }, + "outputs": [], + "source": [ + "try:\n", + " del mt.mask\n", + "except AttributeError as e:\n", + " print(f\"Got expected AttributeError: {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FBVFtCYn69Ou" + }, + "source": [ + "### Nested TypeSpec\n", + "\n", + "Each `ExtensionType` class has a corresponding `TypeSpec` class, which is created automatically and stored as `.Spec`.\n", + "\n", + "This class captures all the information from a value *except* for the values of any nested tensors. In particular, the `TypeSpec` for a value is created by replacing any nested Tensor, ExtensionType, or CompositeTensor with its `TypeSpec`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GRjANkGYKGnV" + }, + "outputs": [], + "source": [ + "class Player(tf.experimental.ExtensionType):\n", + " name: tf.Tensor\n", + " attributes: Mapping[str, tf.Tensor]\n", + "\n", + "anne = Player(\"Anne\", {\"height\": 8.3, \"speed\": 28.1})\n", + "anne_spec = tf.type_spec_from_value(anne)\n", + "print(anne_spec.name) # Records `dtype` and `shape`, but not the string value.\n", + "print(anne_spec.attributes) # Records keys and TensorSpecs for values." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "I2fkgckxO564" + }, + "source": [ + "`TypeSpec` values can be constructed explicitly, or they can be built from an `ExtensionType` value using `tf.type_spec_from_value`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1ehAa7d9OGai" + }, + "outputs": [], + "source": [ + "spec1 = Player.Spec(name=tf.TensorSpec([], tf.float32), attributes={})\n", + "spec2 = tf.type_spec_from_value(anne)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "owcFG3cAMCwA" + }, + "source": [ + "`TypeSpec`s are used by TensorFlow to divide values into a **static component** and a **dynamic component**:\n", + "\n", + "* The **static component** (which is fixed at graph-construction time) is encoded with a `tf.TypeSpec`.\n", + "* The **dynamic component** (which can vary each time the graph is run) is encoded as a list of `tf.Tensor`s.\n", + "\n", + "For example, `tf.function` retraces its wrapped function whenever an argument has a previously unseen `TypeSpec`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pg-m5YLRM1Nd" + }, + "outputs": [], + "source": [ + "@tf.function\n", + "def anonymize_player(player):\n", + " print(\"<>\")\n", + " return Player(\"\", player.attributes)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0CCGm7cpeIq-" + }, + "outputs": [], + "source": [ + "# Function gets traced (first time the function has been called):\n", + "anonymize_player(Player(\"Anne\", {\"height\": 8.3, \"speed\": 28.1}))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WB7bt7s83mFE" + }, + "outputs": [], + "source": [ + "# Function does NOT get traced (same TypeSpec: just tensor values changed)\n", + "anonymize_player(Player(\"Bart\", {\"height\": 8.1, \"speed\": 25.3}))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dNm7vLpR3nMH" + }, + "outputs": [], + "source": [ + "# Function gets traced (new TypeSpec: keys for attributes changed):\n", + "anonymize_player(Player(\"Chuck\", {\"height\": 11.0, \"jump\": 5.3}))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "U5rN1HPq25xC" + }, + "source": [ + "For more information, see the [tf.function Guide](https://www.tensorflow.org/guide/function#rules_of_tracing)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gX613uRk0qLz" + }, + "source": [ + "## Customizing `ExtensionType`s\n", + "\n", + "In addition to simply declaring fields and their types, extension types may:\n", + "\n", + "* Override the default printable representation (`__repr__`).\n", + "* Define methods.\n", + "* Define `classmethod`s and `staticmethod`s.\n", + "* Define properties.\n", + "* Override the default constructor (`__init__`).\n", + "* Override the default equality operator (`__eq__`).\n", + "* Define operators (such as `__add__` and `__lt__`).\n", + "* Declare default values for fields.\n", + "* Define subclasses.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MK-ePVDj-ROE" + }, + "source": [ + "### Overriding the default printable representation\n", + "\n", + "You can override this default string conversion operator for extension types. The following example updates the `MaskedTensor` class to generate a more readable string representation when values are printed in Eager mode." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gdPhjYEr8IGO" + }, + "outputs": [], + "source": [ + "class MaskedTensor(tf.experimental.ExtensionType):\n", + " \"\"\"A tensor paired with a boolean mask, indicating which values are valid.\"\"\"\n", + " values: tf.Tensor\n", + " mask: tf.Tensor # shape=values.shape; false for invalid values.\n", + "\n", + " def __repr__(self):\n", + " return masked_tensor_str(self.values, self.mask)\n", + "\n", + "def masked_tensor_str(values, mask):\n", + " if isinstance(values, tf.Tensor):\n", + " if hasattr(values, 'numpy') and hasattr(mask, 'numpy'):\n", + " return f''\n", + " else:\n", + " return f'MaskedTensor(values={values}, mask={mask})'\n", + " if len(values.shape) == 1:\n", + " items = [repr(v) if m else '_' for (v, m) in zip(values, mask)]\n", + " else:\n", + " items = [masked_tensor_str(v, m) for (v, m) in zip(values, mask)]\n", + " return '[%s]' % ', '.join(items)\n", + "\n", + "mt = MaskedTensor(values=[[1, 2, 3], [4, 5, 6]],\n", + " mask=[[True, True, False], [True, False, True]])\n", + "print(mt)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_MLQU2_v8VjG" + }, + "source": [ + "### Defining methods\n", + "\n", + "Extension types may define methods, just like any normal Python class. For example, the `MaskedTensor` type could define a `with_default` method that returns a copy of `self` with masked values replaced by a given `default` value. Methods may optionally be annotated with the `@tf.function` decorator." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7RR-tqee8ZdP" + }, + "outputs": [], + "source": [ + "class MaskedTensor(tf.experimental.ExtensionType):\n", + " values: tf.Tensor\n", + " mask: tf.Tensor\n", + "\n", + " def with_default(self, default):\n", + " return tf.where(self.mask, self.values, default)\n", + "\n", + "MaskedTensor([1, 2, 3], [True, False, True]).with_default(0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Qwd_gGKp9RP0" + }, + "source": [ + "### Defining `classmethod`s and `staticmethod`s\n", + "\n", + "Extension types may define methods using the `@classmethod` and `@staticmethod` decorators. For example, the `MaskedTensor` type could define a factory method that masks any element with a given value:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BacCEJYU9sBR" + }, + "outputs": [], + "source": [ + "class MaskedTensor(tf.experimental.ExtensionType):\n", + " values: tf.Tensor\n", + " mask: tf.Tensor\n", + "\n", + " def __repr__(self):\n", + " return masked_tensor_str(self.values, self.mask)\n", + "\n", + " @staticmethod\n", + " def from_tensor_and_value_to_mask(values, value_to_mask):\n", + " return MaskedTensor(values, values != value_to_mask)\n", + "\n", + "x = tf.constant([[1, 0, 2], [3, 0, 0]])\n", + "MaskedTensor.from_tensor_and_value_to_mask(x, 0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xIPf9PZX9AwL" + }, + "source": [ + "### Defining properties\n", + "Extension types may define properties using the `@property` decorator, just like any normal Python class. For example, the `MaskedTensor` type could define a `dtype` property that's a shorthand for the `dtype` of the values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "16E68wZ-9KXp" + }, + "outputs": [], + "source": [ + "class MaskedTensor(tf.experimental.ExtensionType):\n", + " values: tf.Tensor\n", + " mask: tf.Tensor\n", + "\n", + " @property\n", + " def dtype(self):\n", + " return self.values.dtype\n", + "\n", + "MaskedTensor([1, 2, 3], [True, False, True]).dtype" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Mm5gxoG57nf3" + }, + "source": [ + "### Overriding the default constructor\n", + "\n", + "You can override the default constructor for extension types. Custom constructors must set a value for every declared field; and after the custom constructor returns, all fields will be type-checked, and values will be converted as described above." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-8K3KeB08G1S" + }, + "outputs": [], + "source": [ + "class Toy(tf.experimental.ExtensionType):\n", + " name: str\n", + " price: tf.Tensor\n", + " def __init__(self, name, price, discount=0):\n", + " self.name = name\n", + " self.price = price * (1 - discount)\n", + "\n", + "print(Toy(\"ball\", 5.0, discount=0.2)) # On sale -- 20% off!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qyQxMlwLFQt7" + }, + "source": [ + "Alternatively, you might consider leaving the default constructor as-is, but adding one or more factory methods. For example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jiApK4hzFY89" + }, + "outputs": [], + "source": [ + "class Toy(tf.experimental.ExtensionType):\n", + " name: str\n", + " price: tf.Tensor\n", + "\n", + " @staticmethod\n", + " def new_toy_with_discount(name, price, discount):\n", + " return Toy(name, price * (1 - discount))\n", + "\n", + "print(Toy.new_toy_with_discount(\"ball\", 5.0, discount=0.2))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pdVcRBhG-Uee" + }, + "source": [ + "### Overriding the default equality operator (`__eq__`)\n", + "\n", + "You can override the default `__eq__` operator for extension types. The following example updates `MaskedTensor` to ignore masked elements when comparing for equality." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dA7DyjfB-Yz0" + }, + "outputs": [], + "source": [ + "class MaskedTensor(tf.experimental.ExtensionType):\n", + " values: tf.Tensor\n", + " mask: tf.Tensor\n", + "\n", + " def __repr__(self):\n", + " return masked_tensor_str(self.values, self.mask)\n", + "\n", + " def __eq__(self, other):\n", + " result = tf.math.equal(self.values, other.values)\n", + " result = result | ~(self.mask & other.mask)\n", + " return tf.reduce_all(result)\n", + "\n", + "x = MaskedTensor([1, 2, 3, 4], [True, True, False, True])\n", + "y = MaskedTensor([5, 2, 0, 4], [False, True, False, True])\n", + "print(x == y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "n1mZ1Lkyi14B" + }, + "source": [ + "**Note:** You generally don't need to override `__ne__`, since its default implementation simply calls `__eq__` and negates the result." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A_Jib1SQD1-z" + }, + "source": [ + "### Using forward references\n", + "\n", + "If the type for a field has not been defined yet, you may use a string containing the name of the type instead. In the following example, the string `\"Node\"` is used to annotate the `children` field because the `Node` type hasn't been (fully) defined yet.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_Z029QKED0Ao" + }, + "outputs": [], + "source": [ + "class Node(tf.experimental.ExtensionType):\n", + " value: tf.Tensor\n", + " children: Tuple[\"Node\", ...] = ()\n", + "\n", + "Node(3, [Node(5), Node(2)])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "boaNg1zHgoVn" + }, + "source": [ + "### Defining subclasses\n", + "\n", + "Extension types may be subclassed using the standard Python syntax. Extension type subclasses may add new fields, methods, and properties; and may override the constructor, the printable representation, and the equality operator. The following example defines a basic `TensorGraph` class that uses three `Tensor` fields to encode a set of edges between nodes. It then defines a subclass that adds a `Tensor` field to record a \"feature value\" for each node. The subclass also defines a method to propagate the feature values along the edges." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "58r6qRiK-uZh" + }, + "outputs": [], + "source": [ + "class TensorGraph(tf.experimental.ExtensionType):\n", + " num_nodes: tf.Tensor\n", + " edge_src: tf.Tensor # edge_src[e] = index of src node for edge e.\n", + " edge_dst: tf.Tensor # edge_dst[e] = index of dst node for edge e.\n", + "\n", + "class TensorGraphWithNodeFeature(TensorGraph):\n", + " node_features: tf.Tensor # node_features[n] = feature value for node n.\n", + "\n", + " def propagate_features(self, weight=1.0) -> 'TensorGraphWithNodeFeature':\n", + " updates = tf.gather(self.node_features, self.edge_src) * weight\n", + " new_node_features = tf.tensor_scatter_nd_add(\n", + " self.node_features, tf.expand_dims(self.edge_dst, 1), updates)\n", + " return TensorGraphWithNodeFeature(\n", + " self.num_nodes, self.edge_src, self.edge_dst, new_node_features)\n", + "\n", + "g = TensorGraphWithNodeFeature( # Edges: 0->1, 4->3, 2->2, 2->1\n", + " num_nodes=5, edge_src=[0, 4, 2, 2], edge_dst=[1, 3, 2, 1],\n", + " node_features=[10.0, 0.0, 2.0, 5.0, -1.0, 0.0])\n", + "\n", + "print(\"Original features:\", g.node_features)\n", + "print(\"After propagating:\", g.propagate_features().node_features)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "U_oElT5HzqSG" + }, + "source": [ + "### Defining private fields\n", + "\n", + "An extension type's fields may be marked private by prefixing them with an underscore (following standard Python conventions). This does not impact the way that TensorFlow treats the fields in any way; but simply serves as a signal to any users of the extension type that those fields are private.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oMdH7ORqh8Pl" + }, + "source": [ + "### Customizing the `ExtensionType`'s `TypeSpec`\n", + "\n", + "Each `ExtensionType` class has a corresponding `TypeSpec` class, which is created automatically and stored as `.Spec`. For more information, see the section \"Nested TypeSpec\" above.\n", + "\n", + "To customize the `TypeSpec`, simply define your own nested class named `Spec`, and `ExtensionType` will use that as the basis for the automatically constructed `TypeSpec`. You can customize the `Spec` class by:\n", + "\n", + "* Overriding the default printable representation.\n", + "* Overriding the default constructor.\n", + "* Defining methods, `classmethod`s, `staticmethod`s, and properties.\n", + "\n", + "The following example customizes the `MaskedTensor.Spec` class to make it easier to use:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Gm4RaqbkLlNG" + }, + "outputs": [], + "source": [ + "class MaskedTensor(tf.experimental.ExtensionType):\n", + " values: tf.Tensor\n", + " mask: tf.Tensor\n", + "\n", + " shape = property(lambda self: self.values.shape)\n", + " dtype = property(lambda self: self.values.dtype)\n", + "\n", + " def __repr__(self):\n", + " return masked_tensor_str(self.values, self.mask)\n", + "\n", + " def with_values(self, new_values):\n", + " return MaskedTensor(new_values, self.mask)\n", + "\n", + " class Spec:\n", + " def __init__(self, shape, dtype=tf.float32):\n", + " self.values = tf.TensorSpec(shape, dtype)\n", + " self.mask = tf.TensorSpec(shape, tf.bool)\n", + "\n", + " def __repr__(self):\n", + " return f\"MaskedTensor.Spec(shape={self.shape}, dtype={self.dtype})\"\n", + "\n", + " shape = property(lambda self: self.values.shape)\n", + " dtype = property(lambda self: self.values.dtype)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "s3zzUXPSNF72" + }, + "source": [ + "**Note**: The custom `Spec` class may not use any instance variables that were not declared in the original `ExtensionType`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rip4GCuYPL7o" + }, + "source": [ + "## Tensor API dispatch\n", + "\n", + "Extension types can be \"tensor-like\", in the sense that they specialize or extend the interface defined by the `tf.Tensor` type. Examples of tensor-like extension types include `RaggedTensor`, `SparseTensor`, and `MaskedTensor`. ***Dispatch decorators*** can be used to override the default behavior of TensorFlow operations when applied to tensor-like extension types. TensorFlow currently defines three dispatch decorators:\n", + "\n", + "* `@tf.experimental.dispatch_for_api(tf_api)`\n", + "* `@tf.experimental.dispatch_for_unary_elementwise_apis(x_type)`\n", + "* `@tf.experimental.dispatch_for_binary_elementwise_apis(x_type, y_type)`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5BTQHcY4gHwZ" + }, + "source": [ + "### Dispatch for a single API\n", + "\n", + "The `tf.experimental.dispatch_for_api` decorator overrides the default behavior of a specified TensorFlow operation when it is called with the specified signature. For example, you can use this decorator to specify how `tf.stack` should process `MaskedTensor` values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "B4QgO_fUW2o2" + }, + "outputs": [], + "source": [ + "@tf.experimental.dispatch_for_api(tf.stack)\n", + "def masked_stack(values: List[MaskedTensor], axis = 0):\n", + " return MaskedTensor(tf.stack([v.values for v in values], axis),\n", + " tf.stack([v.mask for v in values], axis))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FxKcKWNUaLvm" + }, + "source": [ + "This overrides the default implementation for `tf.stack` whenever it is called with a list of `MaskedTensor` values (since the `values` argument is annotated with `typing.List[MaskedTensor]`):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RqpFjaAvaA19" + }, + "outputs": [], + "source": [ + "x = MaskedTensor([1, 2, 3], [True, True, False])\n", + "y = MaskedTensor([4, 5, 6], [False, True, True])\n", + "tf.stack([x, y])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "loGi8taCa265" + }, + "source": [ + "To allow `tf.stack` to handle lists of mixed `MaskedTensor` and `Tensor` values, you can refine the type annotation for the `values` parameter and update the body of the function appropriately:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_xySkm0ganAI" + }, + "outputs": [], + "source": [ + "tf.experimental.unregister_dispatch_for(masked_stack)\n", + "\n", + "def convert_to_masked_tensor(x):\n", + " if isinstance(x, MaskedTensor):\n", + " return x\n", + " else:\n", + " return MaskedTensor(x, tf.ones_like(x, tf.bool))\n", + "\n", + "@tf.experimental.dispatch_for_api(tf.stack)\n", + "def masked_stack_v2(values: List[Union[MaskedTensor, tf.Tensor]], axis = 0):\n", + " values = [convert_to_masked_tensor(v) for v in values]\n", + " return MaskedTensor(tf.stack([v.values for v in values], axis),\n", + " tf.stack([v.mask for v in values], axis))\n", + "x = MaskedTensor([1, 2, 3], [True, True, False])\n", + "y = tf.constant([4, 5, 6])\n", + "tf.stack([x, y, x])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ITioFCyjQm8V" + }, + "source": [ + "For a list of APIs that can be overridden, see the API documentation for `tf.experimental.dispatch_for_api`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f91SaHSqc-jO" + }, + "source": [ + "### Dispatch for all unary elementwise APIs\n", + "\n", + "The `tf.experimental.dispatch_for_unary_elementwise_apis` decorator overrides the default behavior of ***all*** unary elementwise ops (such as `tf.math.cos`) whenever the value for the first argument (typically named `x`) matches the type annotation `x_type`. The decorated function should take two arguments:\n", + "\n", + "* `api_func`: A function that takes a single parameter and performs the elementwise operation (for example, `tf.abs`).\n", + "* `x`: The first argument to the elementwise operation.\n", + "\n", + "The following example updates all unary elementwise operations to handle the `MaskedTensor` type:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cv5fV4xxZI9q" + }, + "outputs": [], + "source": [ + " @tf.experimental.dispatch_for_unary_elementwise_apis(MaskedTensor)\n", + " def masked_tensor_unary_elementwise_api_handler(api_func, x):\n", + " return MaskedTensor(api_func(x.values), x.mask)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qiK4n6vaeFwo" + }, + "source": [ + "This function will now be used whenever a unary elementwise operation is called on a `MaskedTensor`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SkH0xi5gd_41" + }, + "outputs": [], + "source": [ + " x = MaskedTensor([1, -2, -3], [True, False, True])\n", + " print(tf.abs(x))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2Ej5fxLBfaXW" + }, + "outputs": [], + "source": [ + "print(tf.ones_like(x, dtype=tf.float32))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Z9OgLyfEejqc" + }, + "source": [ + "### Dispatch for binary all elementwise APIs\n", + "\n", + "Similarly, `tf.experimental.dispatch_for_binary_elementwise_apis` can be used to update all binary elementwise operations to handle the `MaskedTensor` type:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Z8Du-GPofpCW" + }, + "outputs": [], + "source": [ + "@tf.experimental.dispatch_for_binary_elementwise_apis(MaskedTensor, MaskedTensor)\n", + "def masked_tensor_binary_elementwise_api_handler(api_func, x, y):\n", + " return MaskedTensor(api_func(x.values, y.values), x.mask & y.mask)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gghVHDfSfyi2" + }, + "outputs": [], + "source": [ + "x = MaskedTensor([1, -2, -3], [True, False, True])\n", + "y = MaskedTensor([[4], [5]], [[True], [False]])\n", + "tf.math.add(x, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "txTGg9pzG0Ux" + }, + "source": [ + "For a list of the elementwise APIs that are overridden, go to the API documentation for `tf.experimental.dispatch_for_unary_elementwise_apis` and `tf.experimental.dispatch_for_binary_elementwise_apis`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UseRtohYKiE5" + }, + "source": [ + "## Batchable `ExtensionType`s\n", + "\n", + "An `ExtensionType` is *batchable* if a single instance can be used to represent a batch of values. Typically, this is accomplished by adding batch dimensions to all nested `Tensor`s. The following TensorFlow APIs require that any extension type inputs be batchable:\n", + "\n", + "* `tf.data.Dataset` (`batch`, `unbatch`, `from_tensor_slices`)\n", + "* `tf.keras` (`fit`, `evaluate`, `predict`)\n", + "* `tf.map_fn`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hWPauKGj_yRz" + }, + "source": [ + "By default, `BatchableExtensionType` creates batched values by batching any nested `Tensor`s, `CompositeTensor`s, and `ExtensionType`s. If this is not appropriate for your class, then you will need to use `tf.experimental.ExtensionTypeBatchEncoder` to override this default behavior. For example, it would not be appropriate to create a batch of `tf.SparseTensor` values by simply stacking individual sparse tensors' `values`, `indices`, and `dense_shape` fields -- in most cases, you can't stack these tensors, since they have incompatible shapes; and even if you could, the result would not be a valid `SparseTensor`.\n", + "\n", + "\n", + "**Note**: `BatchableExtensionType`s do *not* automatically define dispatchers for `tf.stack`, `tf.concat`, `tf.slice`, etc. If your class needs to be supported by these APIs, then use the dispatch decorators described above." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xkOJ8ke8GH7s" + }, + "source": [ + "### `BatchableExtensionType` example: `Network`\n", + "As an example, consider a simple `Network` class used for load balancing, which tracks how much work is left to do at each node, and how much bandwidth is available to move work between nodes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tOeEXwCcfrPd" + }, + "outputs": [], + "source": [ + "class Network(tf.experimental.ExtensionType): # This version is not batchable.\n", + " work: tf.Tensor # work[n] = work left to do at node n\n", + " bandwidth: tf.Tensor # bandwidth[n1, n2] = bandwidth from n1->n2\n", + "\n", + "net1 = Network([5., 3, 8], [[0., 2, 0], [2, 0, 3], [0, 3, 0]])\n", + "net2 = Network([3., 4, 2], [[0., 2, 2], [2, 0, 2], [2, 2, 0]])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PaOzUev6g3wT" + }, + "source": [ + "To make this type batchable, change the base type to `BatchableExtensionType`, and adjust the shape of each field to include optional batch dimensions. The following example also adds a `shape` field to keep track of the batch shape. This `shape` field is not required by `tf.data.Dataset` or `tf.map_fn`, but it *is* required by `tf.keras`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "T03WWBSMg2XC" + }, + "outputs": [], + "source": [ + "class Network(tf.experimental.BatchableExtensionType):\n", + " shape: tf.TensorShape # batch shape. A single network has shape=[].\n", + " work: tf.Tensor # work[*shape, n] = work left to do at node n\n", + " bandwidth: tf.Tensor # bandwidth[*shape, n1, n2] = bandwidth from n1->n2\n", + "\n", + " def __init__(self, work, bandwidth):\n", + " self.work = tf.convert_to_tensor(work)\n", + " self.bandwidth = tf.convert_to_tensor(bandwidth)\n", + " work_batch_shape = self.work.shape[:-1]\n", + " bandwidth_batch_shape = self.bandwidth.shape[:-2]\n", + " self.shape = work_batch_shape.merge_with(bandwidth_batch_shape)\n", + "\n", + " def __repr__(self):\n", + " return network_repr(self)\n", + "\n", + "def network_repr(network):\n", + " work = network.work\n", + " bandwidth = network.bandwidth\n", + " if hasattr(work, 'numpy'):\n", + " work = ' '.join(str(work.numpy()).split())\n", + " if hasattr(bandwidth, 'numpy'):\n", + " bandwidth = ' '.join(str(bandwidth.numpy()).split())\n", + " return (f\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NUUJe9HuIPel" + }, + "outputs": [], + "source": [ + "net1 = Network([5., 3, 8], [[0., 2, 0], [2, 0, 3], [0, 3, 0]])\n", + "net2 = Network([3., 4, 2], [[0., 2, 2], [2, 0, 2], [2, 2, 0]])\n", + "batch_of_networks = Network(\n", + " work=tf.stack([net1.work, net2.work]),\n", + " bandwidth=tf.stack([net1.bandwidth, net2.bandwidth]))\n", + "print(f\"net1={net1}\")\n", + "print(f\"net2={net2}\")\n", + "print(f\"batch={batch_of_networks}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r0qWur5JGc3d" + }, + "source": [ + "You can then use `tf.data.Dataset` to iterate through a batch of networks:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BN_kixAUFZtv" + }, + "outputs": [], + "source": [ + "dataset = tf.data.Dataset.from_tensor_slices(batch_of_networks)\n", + "for i, network in enumerate(dataset):\n", + " print(f\"Batch element {i}: {network}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aXENhTzIIjbM" + }, + "source": [ + "And you can also use `map_fn` to apply a function to each batch element:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "j1XEsSWj9a3D" + }, + "outputs": [], + "source": [ + "def balance_work_greedy(network):\n", + " delta = (tf.expand_dims(network.work, -1) - tf.expand_dims(network.work, -2))\n", + " delta /= 4\n", + " delta = tf.maximum(tf.minimum(delta, network.bandwidth), -network.bandwidth)\n", + " new_work = network.work + tf.reduce_sum(delta, -1)\n", + " return Network(new_work, network.bandwidth)\n", + "\n", + "tf.map_fn(balance_work_greedy, batch_of_networks)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f_HLsTT02Xul" + }, + "source": [ + "## TensorFlow APIs that support `ExtensionType`s" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NNiQad2U2alT" + }, + "source": [ + "### @tf.function\n", + "\n", + "[`tf.function`](https://www.tensorflow.org/guide/function) is a decorator that precomputes TensorFlow graphs for Python functions, which can substantially improve the performance of your TensorFlow code. Extension type values can be used transparently with `@tf.function`-decorated functions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jQ_rAvrA6qEb" + }, + "outputs": [], + "source": [ + "class Pastry(tf.experimental.ExtensionType):\n", + " sweetness: tf.Tensor # 2d embedding that encodes sweetness\n", + " chewiness: tf.Tensor # 2d embedding that encodes chewiness\n", + "\n", + "@tf.function\n", + "def combine_pastry_features(x: Pastry):\n", + " return (x.sweetness + x.chewiness) / 2\n", + "\n", + "cookie = Pastry(sweetness=[1.2, 0.4], chewiness=[0.8, 0.2])\n", + "combine_pastry_features(cookie)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u1P-0Udg71Vx" + }, + "source": [ + "If you wish to explicitly specify the `input_signature` for `tf.function`, then you can do so using the extension type's `TypeSpec`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0df90E4x78d7" + }, + "outputs": [], + "source": [ + "pastry_spec = Pastry.Spec(tf.TensorSpec([2]), tf.TensorSpec(2))\n", + "\n", + "@tf.function(input_signature=[pastry_spec])\n", + "def increase_sweetness(x: Pastry, delta=1.0):\n", + " return Pastry(x.sweetness + delta, x.chewiness)\n", + "\n", + "increase_sweetness(cookie)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CdTfc5nD9JpD" + }, + "source": [ + "#### Concrete functions\n", + "Concrete functions encapsulate individual traced graphs that are built by `tf.function`. Extension types can be used transparently with concrete functions.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FyHBBQWk9xz2" + }, + "outputs": [], + "source": [ + "cf = combine_pastry_features.get_concrete_function(pastry_spec)\n", + "cf(cookie)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LYas8gtG5IMA" + }, + "source": [ + "### Control flow operations\n", + "\n", + "Extension types are supported by TensorFlow's control-flow operations:\n", + "\n", + "* `tf.cond`\n", + "* `tf.case`\n", + "* `tf.while_loop`\n", + "* `tf.identity`\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6G2XE9ZtJu8z" + }, + "outputs": [], + "source": [ + "# Example: using tf.cond to select between two MaskedTensors. Note that the\n", + "# two MaskedTensors don't need to have the same shape.\n", + "a = MaskedTensor([1., 2, 3], [True, False, True])\n", + "b = MaskedTensor([22., 33, 108, 55], [True, True, True, False])\n", + "condition = tf.constant(True)\n", + "print(tf.cond(condition, lambda: a, lambda: b))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2NwLOw1kKSek" + }, + "outputs": [], + "source": [ + "# Example: using tf.while_loop with MaskedTensor.\n", + "cond = lambda i, _: i < 10\n", + "def body(i, mt):\n", + " return i + 1, mt.with_values(mt.values + 3 / 7)\n", + "print(tf.while_loop(cond, body, [0, b])[1])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zkN7IuWVMRzn" + }, + "source": [ + "### Autograph control flow\n", + "\n", + "Extension types are also supported by control flow statements in `tf.function` (using autograph). In the following example, the `if` statement and `for` statements are automatically converted to `tf.cond` and `tf.while_loop` operations, which support extension types." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4RFySEl8gZ8w" + }, + "outputs": [], + "source": [ + "@tf.function\n", + "def fn(x, b):\n", + " if b:\n", + " x = MaskedTensor(x, tf.less(x, 0))\n", + " else:\n", + " x = MaskedTensor(x, tf.greater(x, 0))\n", + " for i in tf.range(5 if b else 7):\n", + " x = x.with_values(x.values + 1 / 2)\n", + " return x\n", + "\n", + "print(fn(tf.constant([1., -2, 3]), tf.constant(True)))\n", + "print(fn(tf.constant([1., -2, 3]), tf.constant(False)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-FjZt2ohfja4" + }, + "source": [ + "### Keras\n", + "\n", + "[tf.keras](https://www.tensorflow.org/guide/keras) is TensorFlow's high-level API for building and training deep learning models. Extension types may be passed as inputs to a Keras model, passed between Keras layers, and returned by Keras models. Keras currently puts two requirements on extension types:\n", + "\n", + "* They must be batchable (go to \"Batchable `ExtensionType`s\" above).\n", + "* They must have a field or property named `shape`. `shape[0]` is assumed to be the batch dimension.\n", + "\n", + "The following two subsections give examples showing how extension types can be used with Keras.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QH1TXQYiGv8u" + }, + "source": [ + "#### Keras example: `Network`\n", + "\n", + "For the first example, consider the `Network` class defined in the \"Batchable `ExtensionType`s\" section above, which can be used for load balancing work between nodes. Its definition is repeated here:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zHj1RIS2PK50" + }, + "outputs": [], + "source": [ + "class Network(tf.experimental.BatchableExtensionType):\n", + " shape: tf.TensorShape # batch shape. A single network has shape=[].\n", + " work: tf.Tensor # work[*shape, n] = work left to do at node n\n", + " bandwidth: tf.Tensor # bandwidth[*shape, n1, n2] = bandwidth from n1->n2\n", + "\n", + " def __init__(self, work, bandwidth):\n", + " self.work = tf.convert_to_tensor(work)\n", + " self.bandwidth = tf.convert_to_tensor(bandwidth)\n", + " work_batch_shape = self.work.shape[:-1]\n", + " bandwidth_batch_shape = self.bandwidth.shape[:-2]\n", + " self.shape = work_batch_shape.merge_with(bandwidth_batch_shape)\n", + "\n", + " def __repr__(self):\n", + " return network_repr(self)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "w9LPTEVJD0FD" + }, + "outputs": [], + "source": [ + "single_network = Network( # A single network with 4 nodes.\n", + " work=[8.0, 5, 12, 2],\n", + " bandwidth=[[0.0, 1, 2, 2], [1, 0, 0, 2], [2, 0, 0, 1], [2, 2, 1, 0]])\n", + "\n", + "batch_of_networks = Network( # Batch of 2 networks, each w/ 2 nodes.\n", + " work=[[8.0, 5], [3, 2]],\n", + " bandwidth=[[[0.0, 1], [1, 0]], [[0, 2], [2, 0]]])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IUfWi3SDD0dj" + }, + "source": [ + "You can define a new Keras layer that processes `Network`s." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2WSYt58r4SF1" + }, + "outputs": [], + "source": [ + "class BalanceNetworkLayer(tf.keras.layers.Layer):\n", + " \"\"\"Layer that balances work between nodes in a network.\n", + "\n", + " Shifts work from more busy nodes to less busy nodes, constrained by bandwidth.\n", + " \"\"\"\n", + " def call(self, inputs):\n", + " # This function is defined above in the \"Batchable `ExtensionType`s\" section.\n", + " return balance_work_greedy(inputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VWwFJNb1E03q" + }, + "source": [ + "You can then use these layers to create a simple model. To feed an `ExtensionType` into a model, you can use a `tf.keras.layer.Input` layer with `type_spec` set to the extension type's `TypeSpec`. If the Keras model will be used to process batches, then the `type_spec` must include the batch dimension." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "plTyqISRExA4" + }, + "outputs": [], + "source": [ + "input_spec = Network.Spec(shape=None,\n", + " work=tf.TensorSpec(None, tf.float32),\n", + " bandwidth=tf.TensorSpec(None, tf.float32))\n", + "model = tf.keras.Sequential([\n", + " tf.keras.layers.Input(type_spec=input_spec),\n", + " BalanceNetworkLayer(),\n", + " ])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hyeAbt1WFIiO" + }, + "source": [ + "Finally, you can apply the model to a single network and to a batch of networks." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hH1EtA5lFHdN" + }, + "outputs": [], + "source": [ + "model(single_network)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "V7eM67M7FYYM" + }, + "outputs": [], + "source": [ + "model(batch_of_networks)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tOxtt9Z1HDCv" + }, + "source": [ + "#### Keras example: MaskedTensor\n", + "\n", + "In this example, `MaskedTensor` is extended to support `Keras`. `shape` is defined as a property that is calculated from the `values` field. Keras requires that you add this property to both the extension type and its `TypeSpec`. `MaskedTensor` also defines a `__name__` variable, which will be required for `SavedModel` serialization (below)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1JBZ_t48Ht7e" + }, + "outputs": [], + "source": [ + "class MaskedTensor(tf.experimental.BatchableExtensionType):\n", + " # __name__ is required for serialization in SavedModel; see below for details.\n", + " __name__ = 'extension_type_colab.MaskedTensor'\n", + "\n", + " values: tf.Tensor\n", + " mask: tf.Tensor\n", + "\n", + " shape = property(lambda self: self.values.shape)\n", + " dtype = property(lambda self: self.values.dtype)\n", + "\n", + " def with_default(self, default):\n", + " return tf.where(self.mask, self.values, default)\n", + "\n", + " def __repr__(self):\n", + " return masked_tensor_str(self.values, self.mask)\n", + "\n", + " class Spec:\n", + " def __init__(self, shape, dtype=tf.float32):\n", + " self.values = tf.TensorSpec(shape, dtype)\n", + " self.mask = tf.TensorSpec(shape, tf.bool)\n", + "\n", + " shape = property(lambda self: self.values.shape)\n", + " dtype = property(lambda self: self.values.dtype)\n", + "\n", + " def with_shape(self):\n", + " return MaskedTensor.Spec(tf.TensorSpec(shape, self.values.dtype),\n", + " tf.TensorSpec(shape, self.mask.dtype))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oer8BVc8H7_V" + }, + "source": [ + "Next, the dispatch decorators are used to override the default behavior of several TensorFlow APIs. Since these APIs are used by standard Keras layers (such as the `Dense` layer), overriding these will allow us to use those layers with `MaskedTensor`. For the purposes of this example, `matmul` for masked tensors is defined to treat the masked values as zeros (that is, to not include them in the product)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xy0dhQ_b-ca_" + }, + "outputs": [], + "source": [ + "@tf.experimental.dispatch_for_unary_elementwise_apis(MaskedTensor)\n", + "def unary_elementwise_op_handler(op, x):\n", + " return MaskedTensor(op(x.values), x.mask)\n", + "\n", + "@tf.experimental.dispatch_for_binary_elementwise_apis(\n", + " Union[MaskedTensor, tf.Tensor],\n", + " Union[MaskedTensor, tf.Tensor])\n", + "def binary_elementwise_op_handler(op, x, y):\n", + " x = convert_to_masked_tensor(x)\n", + " y = convert_to_masked_tensor(y)\n", + " return MaskedTensor(op(x.values, y.values), x.mask & y.mask)\n", + "\n", + "@tf.experimental.dispatch_for_api(tf.matmul)\n", + "def masked_matmul(a: MaskedTensor, b,\n", + " transpose_a=False, transpose_b=False,\n", + " adjoint_a=False, adjoint_b=False,\n", + " a_is_sparse=False, b_is_sparse=False,\n", + " output_type=None,\n", + " grad_a=False, grad_b=False,\n", + " name=None,\n", + " ):\n", + " if isinstance(a, MaskedTensor):\n", + " a = a.with_default(0)\n", + " if isinstance(b, MaskedTensor):\n", + " b = b.with_default(0)\n", + " return tf.matmul(a, b, transpose_a, transpose_b, adjoint_a,\n", + " adjoint_b, a_is_sparse, b_is_sparse,\n", + " output_type)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "osJ_L-fKJusI" + }, + "source": [ + "You can then construct a Keras model that accepts `MaskedTensor` inputs, using standard Keras layers:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IS6JCVbk1rd0" + }, + "outputs": [], + "source": [ + "input_spec = MaskedTensor.Spec([None, 2], tf.float32)\n", + "\n", + "masked_tensor_model = tf.keras.Sequential([\n", + " tf.keras.layers.Input(type_spec=input_spec),\n", + " tf.keras.layers.Dense(16, activation=\"relu\"),\n", + " tf.keras.layers.Dense(1)])\n", + "masked_tensor_model.compile(loss='binary_crossentropy', optimizer='rmsprop')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SB1WUSzn1RPj" + }, + "outputs": [], + "source": [ + "a = MaskedTensor([[1., 2], [3, 4], [5, 6]],\n", + " [[True, False], [False, True], [True, True]])\n", + "masked_tensor_model.fit(a, tf.constant([[1], [0], [1]]), epochs=3)\n", + "print(masked_tensor_model(a))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "msmd9XcL2bqb" + }, + "source": [ + "### SavedModel\n", + "\n", + "A [SavedModel](https://www.tensorflow.org/guide/saved_model) is a serialized TensorFlow program, including both weights and computation. It can be built from a Keras model or from a custom model. In either case, extension types can be used transparently with the functions and methods defined by a SavedModel.\n", + "\n", + "SavedModel can save models, layers, and functions that process extension types, as long as the extension types have a `__name__` field. This name is used to register the extension type, so it can be located when the model is loaded." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PEtbFrz6-Vku" + }, + "source": [ + "#### Example: saving a Keras model\n", + "\n", + "Keras models that use extension types may be saved using `SavedModel`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ecxQMnybSzV6" + }, + "outputs": [], + "source": [ + "masked_tensor_model_path = tempfile.mkdtemp()\n", + "tf.saved_model.save(masked_tensor_model, masked_tensor_model_path)\n", + "imported_model = tf.saved_model.load(masked_tensor_model_path)\n", + "imported_model(a)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ne2nu3r6-XMr" + }, + "source": [ + "#### Example: saving a custom model\n", + "\n", + "SavedModel can also be used to save custom `tf.Module` subclasses with functions that process extension types." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2V6hV3yOT2vz" + }, + "outputs": [], + "source": [ + "class CustomModule(tf.Module):\n", + " def __init__(self, variable_value):\n", + " super().__init__()\n", + " self.v = tf.Variable(variable_value)\n", + "\n", + " @tf.function\n", + " def grow(self, x: MaskedTensor):\n", + " \"\"\"Increase values in `x` by multiplying them by `self.v`.\"\"\"\n", + " return MaskedTensor(x.values * self.v, x.mask)\n", + "\n", + "module = CustomModule(100.0)\n", + "\n", + "module.grow.get_concrete_function(MaskedTensor.Spec(shape=None,\n", + " dtype=tf.float32))\n", + "custom_module_path = tempfile.mkdtemp()\n", + "tf.saved_model.save(module, custom_module_path)\n", + "imported_model = tf.saved_model.load(custom_module_path)\n", + "imported_model.grow(MaskedTensor([1., 2, 3], [False, True, False]))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "o6beljh576ee" + }, + "source": [ + "#### Loading a SavedModel when the `ExtensionType` is unavailable\n", + "\n", + "If you load a `SavedModel` that uses an `ExtensionType`, but that `ExtensionType` is not available (that is, it has not been imported), then you will get a warning and TensorFlow will fall back to using an \"anonymous extension type\" object. This object will have the same fields as the original type, but will lack any further customization you have added for the type, such as custom methods or properties." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ec9PcUkJ9bFK" + }, + "source": [ + "#### Using `ExtensionType`s with TensorFlow Serving\n", + "\n", + "Currently, [TensorFlow Serving](https://www.tensorflow.org/tfx/guide/serving) (and other consumers of the SavedModel \"signatures\" dictionary) require that all inputs and outputs be raw tensors. If you wish to use TensorFlow Serving with a model that uses extension types, then you can add wrapper methods that compose or decompose extension type values from tensors. For example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4VnzAwVo9tTc" + }, + "outputs": [], + "source": [ + "class CustomModuleWrapper(tf.Module):\n", + " def __init__(self, variable_value):\n", + " super().__init__()\n", + " self.v = tf.Variable(variable_value)\n", + "\n", + " @tf.function\n", + " def var_weighted_mean(self, x: MaskedTensor):\n", + " \"\"\"Mean value of unmasked values in x, weighted by self.v.\"\"\"\n", + " x = MaskedTensor(x.values * self.v, x.mask)\n", + " return (tf.reduce_sum(x.with_default(0)) /\n", + " tf.reduce_sum(tf.cast(x.mask, x.dtype)))\n", + "\n", + " @tf.function()\n", + " def var_weighted_mean_wrapper(self, x_values, x_mask):\n", + " \"\"\"Raw tensor wrapper for var_weighted_mean.\"\"\"\n", + " return self.var_weighted_mean(MaskedTensor(x_values, x_mask))\n", + "\n", + "module = CustomModuleWrapper([3., 2., 8., 5.])\n", + "\n", + "module.var_weighted_mean_wrapper.get_concrete_function(\n", + " tf.TensorSpec(None, tf.float32), tf.TensorSpec(None, tf.bool))\n", + "custom_module_path = tempfile.mkdtemp()\n", + "tf.saved_model.save(module, custom_module_path)\n", + "imported_model = tf.saved_model.load(custom_module_path)\n", + "x = MaskedTensor([1., 2., 3., 4.], [False, True, False, True])\n", + "imported_model.var_weighted_mean_wrapper(x.values, x.mask)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4dwBadWQ5G9_" + }, + "source": [ + "### `Dataset`s\n", + "\n", + "[`tf.data`](https://www.tensorflow.org/guide/data) is an API that enables you to build complex input pipelines from simple, reusable pieces. Its core data structure is `tf.data.Dataset`, which represents a sequence of elements, in which each element consists of one or more components." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GcIR19FuwRJV" + }, + "source": [ + "#### Building `Dataset`s with extension types\n", + "\n", + "Datasets can be built from extension type values using `Dataset.from_tensors`, `Dataset.from_tensor_slices`, or `Dataset.from_generator`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Oe7fRCkzwdub" + }, + "outputs": [], + "source": [ + "ds = tf.data.Dataset.from_tensors(Pastry(5, 5))\n", + "iter(ds).next()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fk9CD2fZx6yT" + }, + "outputs": [], + "source": [ + "mt = MaskedTensor(tf.reshape(range(20), [5, 4]), tf.ones([5, 4]))\n", + "ds = tf.data.Dataset.from_tensor_slices(mt)\n", + "for value in ds:\n", + " print(value)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DGw8y87awsOJ" + }, + "outputs": [], + "source": [ + "def value_gen():\n", + " for i in range(2, 7):\n", + " yield MaskedTensor(range(10), [j%i != 0 for j in range(10)])\n", + "\n", + "ds = tf.data.Dataset.from_generator(\n", + " value_gen, output_signature=MaskedTensor.Spec(shape=[10], dtype=tf.int32))\n", + "for value in ds:\n", + " print(value)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wfEm4NInyqtj" + }, + "source": [ + "#### Batching and unbatching `Dataset`s with extension types\n", + "\n", + "Datasets with extension types can be batchand and unbatched using `Dataset.batch` and `Dataset.unbatch`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "snoOUE1ay1rO" + }, + "outputs": [], + "source": [ + "batched_ds = ds.batch(2)\n", + "for value in batched_ds:\n", + " print(value)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "f8PTky6EzBVY" + }, + "outputs": [], + "source": [ + "unbatched_ds = batched_ds.unbatch()\n", + "for value in unbatched_ds:\n", + " print(value)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "extension_type.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/function.ipynb b/site/en/guide/function.ipynb index 7a97c8337b0..f4677f21eb8 100644 --- a/site/en/guide/function.ipynb +++ b/site/en/guide/function.ipynb @@ -61,7 +61,7 @@ "id": "J122XQYG7W6w" }, "source": [ - "In TensorFlow 2, [eager execution](eager.ipynb) is turned on by default. The user interface is intuitive and flexible (running one-off operations is much easier and faster), but this can come at the expense of performance and deployability.\n", + "In TensorFlow 2, [eager execution](basics.ipynb) is turned on by default. The user interface is intuitive and flexible (running one-off operations is much easier and faster), but this can come at the expense of performance and deployability.\n", "\n", "You can use `tf.function` to make graphs out of your programs. It is a transformation tool that creates Python-independent dataflow graphs out of your Python code. This will help you create performant and portable models, and it is required to use `SavedModel`.\n", "\n", @@ -146,7 +146,7 @@ "source": [ "### Usage\n", "\n", - "A `Function` you define (for example by applying the `@tf.function` decorator) is just like a core TensorFlow operation: You can execute it eagerly; you can compute gradients; and so on." + "A `tf.function` that you define (for example by applying the `@tf.function` decorator) is just like a core TensorFlow operation: You can execute it eagerly; you can compute gradients; and so on." ] }, { @@ -157,7 +157,7 @@ }, "outputs": [], "source": [ - "@tf.function # The decorator converts `add` into a `Function`.\n", + "@tf.function # The decorator converts `add` into a `PolymorphicFunction`.\n", "def add(a, b):\n", " return a + b\n", "\n", @@ -184,7 +184,7 @@ "id": "ocWZvqrmHnmX" }, "source": [ - "You can use `Function`s inside other `Function`s." + "You can use `tf.function`s inside other `tf.function`s." ] }, { @@ -208,7 +208,7 @@ "id": "piBhz7gYsHqU" }, "source": [ - "`Function`s can be faster than eager code, especially for graphs with many small ops. But for graphs with a few expensive ops (like convolutions), you may not see much speedup.\n" + "`tf.function`s can be faster than eager code, especially for graphs with many small ops. But for graphs with a few expensive ops (like convolutions), you may not see much speedup.\n" ] }, { @@ -242,7 +242,7 @@ "source": [ "### Tracing\n", "\n", - "This section exposes how `Function` works under the hood, including implementation details *which may change in the future*. However, once you understand why and when tracing happens, it's much easier to use `tf.function` effectively!" + "This section exposes how `tf.function` works under the hood, including implementation details *which may change in the future*. However, once you understand why and when tracing happens, it's much easier to use `tf.function` effectively!" ] }, { @@ -253,17 +253,17 @@ "source": [ "#### What is \"tracing\"?\n", "\n", - "A `Function` runs your program in a [TensorFlow Graph](https://www.tensorflow.org/guide/intro_to_graphs#what_are_graphs). However, a `tf.Graph` cannot represent all the things that you'd write in an eager TensorFlow program. For instance, Python supports polymorphism, but `tf.Graph` requires its inputs to have a specified data type and dimension. Or you may perform side tasks like reading command-line arguments, raising an error, or working with a more complex Python object; none of these things can run in a `tf.Graph`.\n", + "A `tf.function` runs your program in a [TensorFlow Graph](https://www.tensorflow.org/guide/intro_to_graphs#what_are_graphs). However, a `tf.Graph` cannot represent all the things that you'd write in an eager TensorFlow program. For instance, Python supports polymorphism, but `tf.Graph` requires its inputs to have a specified data type and dimension. Or you may perform side tasks like reading command-line arguments, raising an error, or working with a more complex Python object; none of these things can run in a `tf.Graph`.\n", "\n", - "`Function` bridges this gap by separating your code in two stages:\n", + "`tf.function` bridges this gap by separating your code in two stages:\n", "\n", - " 1) In the first stage, referred to as \"**tracing**\", `Function` creates a new `tf.Graph`. Python code runs normally, but all TensorFlow operations (like adding two Tensors) are *deferred*: they are captured by the `tf.Graph` and not run.\n", + " 1) In the first stage, referred to as \"**tracing**\", `tf.function` creates a new `tf.Graph`. Python code runs normally, but all TensorFlow operations (like adding two Tensors) are *deferred*: they are captured by the `tf.Graph` and not run.\n", "\n", " 2) In the second stage, a `tf.Graph` which contains everything that was deferred in the first stage is run. This stage is much faster than the tracing stage.\n", "\n", - "Depending on its inputs, `Function` will not always run the first stage when it is called. See [\"Rules of tracing\"](#rules_of_tracing) below to get a better sense of how it makes that determination. Skipping the first stage and only executing the second stage is what gives you TensorFlow's high performance.\n", + "Depending on its inputs, `tf.function` will not always run the first stage when it is called. See [\"Rules of tracing\"](#rules_of_tracing) below to get a better sense of how it makes that determination. Skipping the first stage and only executing the second stage is what gives you TensorFlow's high performance.\n", "\n", - "When `Function` does decide to trace, the tracing stage is immediately followed by the second stage, so calling the `Function` both creates and runs the `tf.Graph`. Later you will see how you can run only the tracing stage with [`get_concrete_function`](#obtaining_concrete_functions)." + "When `tf.function` does decide to trace, the tracing stage is immediately followed by the second stage, so calling the `tf.function` both creates and runs the `tf.Graph`. Later you will see how you can run only the tracing stage with [`get_concrete_function`](#obtaining_concrete_functions)." ] }, { @@ -272,7 +272,7 @@ "id": "K7scSzLx662f" }, "source": [ - "When we pass arguments of different types into a `Function`, both stages are run:\n" + "When you pass arguments of different types into a `tf.function`, both stages are run:\n" ] }, { @@ -302,7 +302,7 @@ "id": "QPfouGUQrcNb" }, "source": [ - "Note that if you repeatedly call a `Function` with the same argument type, TensorFlow will skip the tracing stage and reuse a previously traced graph, as the generated graph would be identical." + "Note that if you repeatedly call a `tf.function` with the same argument type, TensorFlow will skip the tracing stage and reuse a previously traced graph, as the generated graph would be identical." ] }, { @@ -346,10 +346,11 @@ "So far, you've seen that `tf.function` creates a cached, dynamic dispatch layer over TensorFlow's graph tracing logic. To be more specific about the terminology:\n", "\n", "- A `tf.Graph` is the raw, language-agnostic, portable representation of a TensorFlow computation.\n", - "- A `ConcreteFunction` wraps a `tf.Graph`.\n", - "- A `Function` manages a cache of `ConcreteFunction`s and picks the right one for your inputs.\n", - "- `tf.function` wraps a Python function, returning a `Function` object.\n", - "- **Tracing** creates a `tf.Graph` and wraps it in a `ConcreteFunction`, also known as a **trace.**\n" + "- Tracing is the process through which new `tf.Graph`s are generated from Python code.\n", + "- An instance of `tf.Graph` is specialized to the specific input types it was traced with. Differing types require retracing.\n", + "- Each traced `tf.Graph` has a corresponding `ConcreteFunction`.\n", + "- A `tf.function` manages a cache of `ConcreteFunction`s and picks the right one for your inputs.\n", + "- `tf.function` wraps the Python function that will be traced, returning a `tf.types.experimental.PolymorphicFunction` object.\n" ] }, { @@ -360,20 +361,22 @@ "source": [ "#### Rules of tracing\n", "\n", - "A `Function` determines whether to reuse a traced `ConcreteFunction` by computing a **cache key** from an input's args and kwargs. A **cache key** is a key that identifies a `ConcreteFunction` based on the input args and kwargs of the `Function` call, according to the following rules (which may change):\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "h62XoXho6EWN" - }, - "source": [ - "- The key generated for a `tf.Tensor` is its shape and dtype.\n", - "- The key generated for a `tf.Variable` is a unique variable id.\n", - "- The key generated for a Python primitive (like `int`, `float`, `str`) is its value. \n", - "- The key generated for nested `dict`s, `list`s, `tuple`s, `namedtuple`s, and [`attr`](https://www.attrs.org/en/stable/)s is the flattened tuple of leaf-keys (see `nest.flatten`). (As a result of this flattening, calling a concrete function with a different nesting structure than the one used during tracing will result in a TypeError).\n", - "- For all other Python types the key is unique to the object. This way a function or method is traced independently for each instance it is called with.\n" + "When called, a `tf.function` first evaluates the type of each input argument using the `tf.types.experimental.TraceType` of each argument. This is used to construct a `tf.types.experimental.FunctionType` describing the signature of the desired `ConcreteFunction`. We compare this `FunctionType` to the `FunctionType`s of existing `ConcreteFunction`s. If a matching `ConcreteFunction` is found, the call is dispatched to it. If no match is found, a new `ConcreteFunction` is traced for the desired `FunctionType`.\n", + "\n", + "If multiple matches are found, the most specific signature is chosen. Matching is done by [subtyping](https://en.wikipedia.org/wiki/Subtyping), much like normal function calls in C++ or Java, for instance. For example, `TensorShape([1, 2])` is a subtype of `TensorShape([None, None])` and so a call to the tf.function with `TensorShape([1, 2])` can be dispatched to the `ConcreteFunction` produced with `TensorShape([None, None])` but if a `ConcreteFunction` with `TensorShape([1, None])` also exists then it will be prioritized since it is more specific.\n", + "\n", + "The `TraceType` is determined from input arguments as follows:\n", + "* For `Tensor`, the type is parameterized by the `Tensor`'s `dtype` and `shape`; ranked shapes are a subtype of unranked shapes; fixed dimensions are a subtype of unknown dimensions\n", + "* For `Variable`, the type is similar to `Tensor`, but also includes a unique resource ID of the variable, necessary to correctly wire control dependencies\n", + "* For Python primitive values, the type corresponds to the **value** itself. For example, the `TraceType` of the value `3` is `LiteralTraceType<3>`, not `int`.\n", + "* For Python ordered containers such as `list` and `tuple`, etc., the type is parameterized by the types of their elements; for example, the type of `[1, 2]` is `ListTraceType, LiteralTraceType<2>>` and the type for `[2, 1]` is `ListTraceType, LiteralTraceType<1>>` which is different.\n", + "* For Python mappings such as `dict`, the type is also a mapping from the same keys but to the types of values instead of the actual values. For example, the type of `{1: 2, 3: 4}`, is `MappingTraceType<>>, >>>`. However, unlike ordered containers, `{1: 2, 3: 4}` and `{3: 4, 1: 2}` have equivalent types.\n", + "* For Python objects which implement the `__tf_tracing_type__` method, the type is whatever that method returns.\n", + "* For any other Python objects, the type is a generic `TraceType`, and the matching precedure is:\n", + " * First it checks if the object is the same object used in the previous trace (using Python `id()` or `is`). Note that this will still match if the object has changed, so if you use Python objects as `tf.function` arguments it's best to use *immutable* ones.\n", + " * Next it checks if the object is equal to the object used in the previous trace (using Python `==`).\n", + " \n", + " Note that this procedure only keeps a [weakref](https://docs.python.org/3/library/weakref.html) to the object and hence only works as long as the object is in scope/not deleted.\n" ] }, { @@ -382,7 +385,7 @@ "id": "GNNN4lgRzpIs" }, "source": [ - "Note: Cache keys are based on the `Function` input parameters so changes to global and [free variables](https://docs.python.org/3/reference/executionmodel.html#binding-of-names) alone will not create a new trace. See [this section](#depending_on_python_global_and_free_variables) for recommended practices when dealing with Python global and free variables." + "Note: `TraceType` is based on the `tf.function` input parameters so changes to global and [free variables](https://docs.python.org/3/reference/executionmodel.html#binding-of-names) alone will not create a new trace. See [this section](#depending_on_python_global_and_free_variables) for recommended practices when dealing with Python global and free variables." ] }, { @@ -391,9 +394,9 @@ "id": "PEDwbumO32Wh" }, "source": [ - "#### Controlling retracing\n", + "### Controlling retracing\n", "\n", - "Retracing, which is when your `Function` creates more than one trace, helps ensures that TensorFlow generates correct graphs for each set of inputs. However, tracing is an expensive operation! If your `Function` retraces a new graph for every call, you'll find that your code executes more slowly than if you didn't use `tf.function`.\n", + "Retracing, which is when your `tf.function` creates more than one trace, helps ensure that TensorFlow generates correct graphs for each set of inputs. However, tracing is an expensive operation! If your `tf.function` retraces a new graph for every call, you'll find that your code executes more slowly than if you didn't use `tf.function`.\n", "\n", "To control the tracing behavior, you can use the following techniques:" ] @@ -404,7 +407,9 @@ "id": "EUtycWJa34TT" }, "source": [ - "- Specify `input_signature` in `tf.function` to limit tracing." + "#### Pass a fixed `input_signature` to `tf.function`\n", + "\n", + "This forces `tf.function` to constrain itself to only one `tf.types.experimental.FunctionType` composed of the types enumerated by the `input_signature`. Calls that cannot be dispatched to this `FunctionType` will throw an error." ] }, { @@ -422,11 +427,11 @@ "\n", "print(next_collatz(tf.constant([1, 2])))\n", "# You specified a 1-D tensor in the input signature, so this should fail.\n", - "with assert_raises(ValueError):\n", + "with assert_raises(TypeError):\n", " next_collatz(tf.constant([[1, 2], [3, 4]]))\n", "\n", "# You specified an int32 dtype in the input signature, so this should fail.\n", - "with assert_raises(ValueError):\n", + "with assert_raises(TypeError):\n", " next_collatz(tf.constant([1.0, 2.0]))\n" ] }, @@ -436,9 +441,9 @@ "id": "ocxX-HVk7P2o" }, "source": [ - "- Specify a \\[None\\] dimension in `tf.TensorSpec` to allow for flexibility in trace reuse.\n", + "#### Use unknown dimensions for flexibility\n", "\n", - " Since TensorFlow matches tensors based on their shape, using a `None` dimension as a wildcard will allow `Function`s to reuse traces for variably-sized input. Variably-sized input can occur if you have sequences of different length, or images of different sizes for each batch (See the [Transformer](../tutorials/text/transformer.ipynb) and [Deep Dream](../tutorials/generative/deepdream.ipynb) tutorials for example)." + " Since TensorFlow matches tensors based on their shape, using a `None` dimension as a wildcard will allow `tf.function`s to reuse traces for variably-sized input. Variably-sized input can occur if you have sequences of different length, or images of different sizes for each batch. You can check out the [Transformer](https://www.tensorflow.org/text/tutorials/transformer) and [Deep Dream](../tutorials/generative/deepdream.ipynb) tutorials for examples." ] }, { @@ -459,13 +464,48 @@ "print(g(tf.constant([1, 2, 3, 4, 5])))\n" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "37cc12f93cbd" + }, + "source": [ + "#### Use `reduce_retracing` for automatic flexibility\n", + "\n", + "When `reduce_retracing` is enabled, `tf.function` automatically identifies supertypes of the input types it is observing and chooses to trace more generalized graphs automatically. It is less efficient than setting the `input_signature` directly but useful when many types need to be supported." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0403fae03a1f" + }, + "outputs": [], + "source": [ + "@tf.function(reduce_retracing=True)\n", + "def g(x):\n", + " print('Tracing with', x)\n", + " return x\n", + "\n", + "# Traces once.\n", + "print(g(tf.constant([1, 2, 3])))\n", + "\n", + "# Traces again, but more generalized this time.\n", + "print(g(tf.constant([1, 2, 3, 4, 5])))\n", + "\n", + "# No more tracing!\n", + "print(g(tf.constant([1, 2, 3, 4, 5, 6, 7])))\n", + "print(g(tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9])))" + ] + }, { "cell_type": "markdown", "metadata": { "id": "AY5oiQN0XIyA" }, "source": [ - "- Cast Python arguments to Tensors to reduce retracing.\n", + "#### Pass tensors instead of python literals\n", "\n", " Often, Python arguments are used to control hyperparameters and graph constructions - for example, `num_layers=10` or `training=True` or `nonlinearity='relu'`. So, if the Python argument changes, it makes sense that you'd have to retrace the graph.\n", "\n", @@ -506,7 +546,7 @@ "id": "4pJqkDR_Q2wz" }, "source": [ - "If you need to force retracing, create a new `Function`. Separate `Function` objects are guaranteed not to share traces." + "If you need to force retracing, create a new `tf.function`. Separate `tf.function` objects are guaranteed not to share traces." ] }, { @@ -525,6 +565,92 @@ "tf.function(f)()" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "-tZoWrA6INvc" + }, + "source": [ + "#### Use the tracing protocol\n", + "\n", + "Where possible, you should prefer converting the Python type into a `tf.experimental.ExtensionType` instead. Moreover, the `TraceType` of an `ExtensionType` is the `tf.TypeSpec` associated with it. Therefore, if needed, you can simply override the default `tf.TypeSpec` to take control of an `ExtensionType`'s `Tracing Protocol`. Refer to the _Customizing the ExtensionType's TypeSpec_ section in the [Extension types](extension_type.ipynb) guide for details.\n", + "\n", + "Otherwise, for direct control over when `tf.function` should retrace in regards to a particular Python type, you can implement the `Tracing Protocol` for it yourself." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gZkIh7UaIKc6" + }, + "outputs": [], + "source": [ + "@tf.function\n", + "def get_mixed_flavor(fruit_a, fruit_b):\n", + " return fruit_a.flavor + fruit_b.flavor\n", + "\n", + "class Fruit:\n", + " flavor = tf.constant([0, 0])\n", + "\n", + "class Apple(Fruit):\n", + " flavor = tf.constant([1, 2])\n", + "\n", + "class Mango(Fruit):\n", + " flavor = tf.constant([3, 4])\n", + "\n", + "# As described in the above rules, a generic TraceType for `Apple` and `Mango`\n", + "# is generated (and a corresponding ConcreteFunction is traced) but it fails to\n", + "# match the second function call since the first pair of Apple() and Mango()\n", + "# have gone out out of scope by then and deleted.\n", + "get_mixed_flavor(Apple(), Mango()) # Traces a new concrete function\n", + "get_mixed_flavor(Apple(), Mango()) # Traces a new concrete function again\n", + "\n", + "# However, each subclass of the `Fruit` class has a fixed flavor, and you\n", + "# can reuse an existing traced concrete function if it was the same\n", + "# subclass. Avoiding such unnecessary tracing of concrete functions\n", + "# can have significant performance benefits.\n", + "\n", + "class FruitTraceType(tf.types.experimental.TraceType):\n", + " def __init__(self, fruit):\n", + " self.fruit_type = type(fruit)\n", + " self.fruit_value = fruit\n", + "\n", + " def is_subtype_of(self, other):\n", + " # True if self subtypes `other` and `other`'s type matches FruitTraceType.\n", + " return (type(other) is FruitTraceType and\n", + " self.fruit_type is other.fruit_type)\n", + "\n", + " def most_specific_common_supertype(self, others):\n", + " # `self` is the specific common supertype if all input types match it.\n", + " return self if all(self == other for other in others) else None\n", + "\n", + " def placeholder_value(self, placeholder_context=None):\n", + " # Use the fruit itself instead of the type for correct tracing.\n", + " return self.fruit_value\n", + "\n", + " def __eq__(self, other):\n", + " return type(other) is FruitTraceType and self.fruit_type == other.fruit_type\n", + "\n", + " def __hash__(self):\n", + " return hash(self.fruit_type)\n", + "\n", + "class FruitWithTraceType:\n", + "\n", + " def __tf_tracing_type__(self, context):\n", + " return FruitTraceType(self)\n", + "\n", + "class AppleWithTraceType(FruitWithTraceType):\n", + " flavor = tf.constant([1, 2])\n", + "\n", + "class MangoWithTraceType(FruitWithTraceType):\n", + " flavor = tf.constant([3, 4])\n", + "\n", + "# Now if you try calling it again:\n", + "get_mixed_flavor(AppleWithTraceType(), MangoWithTraceType()) # Traces a new concrete function\n", + "get_mixed_flavor(AppleWithTraceType(), MangoWithTraceType()) # Re-uses the traced concrete function" + ] + }, { "cell_type": "markdown", "metadata": { @@ -601,8 +727,7 @@ }, "outputs": [], "source": [ - "print(double_strings.structured_input_signature)\n", - "print(double_strings.structured_outputs)" + "print(double_strings.function_type)" ] }, { @@ -673,7 +798,7 @@ "source": [ "### Obtaining graphs\n", "\n", - "Each concrete function is a callable wrapper around a `tf.Graph`. Although retrieving the actual `tf.Graph` object is not something you'll normally need to do, you can obtain it easily from any concrete function." + "Although retrieving the actual `tf.Graph` object is not something you'll normally need to do, you can obtain it easily from any concrete function." ] }, { @@ -689,6 +814,36 @@ " print(f'{node.input} -> {node.name}')\n" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "2d49c486ccd4" + }, + "source": [ + "In reality, `tf.Graph`s are not directly callable. We actually use an `tf.types.experimental.AtomicFunction` to perform the computations described by the `tf.Graph`. You can access the `AtomicFunction` describing the traced `tf.Graph` and call it directly instead of the `ConcreteFunction`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4c3879aa0be0" + }, + "outputs": [], + "source": [ + "atomic_fn = double_strings.inference_fn\n", + "atomic_fn(tf.constant(\"a\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "c3bd1036c18c" + }, + "source": [ + "This has the advantage of having lower Python overhead for high-performance scenarios. But it should only be used for forward inference (no gradient support), and captured tensor values (if any) would need to be explicitly supplied." + ] + }, { "cell_type": "markdown", "metadata": { @@ -745,7 +900,7 @@ "id": "KxwJ8znPI0Cg" }, "source": [ - "If you're curious you can inspect the code autograph generates." + "If you're curious you can inspect the code AutoGraph generates." ] }, { @@ -884,9 +1039,9 @@ "id": "JeD2U-yrbfVb" }, "source": [ - "When wrapping Python/NumPy data in a Dataset, be mindful of `tf.data.Dataset.from_generator` versus ` tf.data.Dataset.from_tensors`. The former will keep the data in Python and fetch it via `tf.py_function` which can have performance implications, whereas the latter will bundle a copy of the data as one large `tf.constant()` node in the graph, which can have memory implications.\n", + "When wrapping Python/NumPy data in a Dataset, be mindful of `tf.data.Dataset.from_generator` versus ` tf.data.Dataset.from_tensor_slices`. The former will keep the data in Python and fetch it via `tf.py_function` which can have performance implications, whereas the latter will bundle a copy of the data as one large `tf.constant()` node in the graph, which can have memory implications.\n", "\n", - "Reading data from files via `TFRecordDataset`, `CsvDataset`, etc. is the most effective way to consume data, as then TensorFlow itself can manage the asynchronous loading and prefetching of data, without having to involve Python. To learn more, see the [`tf.data`: Build TensorFlow input pipelines](../../guide/data) guide." + "Reading data from files via `TFRecordDataset`, `CsvDataset`, etc. is the most effective way to consume data, as then TensorFlow itself can manage the asynchronous loading and prefetching of data, without having to involve Python. To learn more, see the [`tf.data`: Build TensorFlow input pipelines](data.ipynb) guide." ] }, { @@ -927,7 +1082,7 @@ " state = rnn_step(input_data[i], state)\n", " states = states.write(i, state)\n", " return tf.transpose(states.stack(), [1, 0, 2])\n", - " \n", + "\n", "dynamic_rnn(rnn_step,\n", " tf.random.uniform([batch_size, seq_len, feature_size]),\n", " tf.zeros([batch_size, feature_size]))" @@ -941,7 +1096,7 @@ "source": [ "## Limitations\n", "\n", - "TensorFlow `Function` has a few limitations by design that you should be aware of when converting a Python function to a `Function`." + "`tf.function` has a few limitations by design that you should be aware of when converting a Python function to a `tf.function`." ] }, { @@ -952,7 +1107,7 @@ "source": [ "### Executing Python side effects\n", "\n", - "Side effects, like printing, appending to lists, and mutating globals, can behave unexpectedly inside a `Function`, sometimes executing twice or not all. They only happen the first time you call a `Function` with a set of inputs. Afterwards, the traced `tf.Graph` is reexecuted, without executing the Python code.\n", + "Side effects, like printing, appending to lists, and mutating globals, can behave unexpectedly inside a `tf.function`, sometimes executing twice or not all. They only happen the first time you call a `tf.function` with a set of inputs. Afterwards, the traced `tf.Graph` is reexecuted, without executing the Python code.\n", "\n", "The general rule of thumb is to avoid relying on Python side effects in your logic and only use them to debug your traces. Otherwise, TensorFlow APIs like `tf.data`, `tf.print`, `tf.summary`, `tf.Variable.assign`, and `tf.TensorArray` are the best way to ensure your code will be executed by the TensorFlow runtime with each call." ] @@ -981,7 +1136,66 @@ "id": "e1I0dPiqTV8H" }, "source": [ - "If you would like to execute Python code during each invocation of a `Function`, `tf.py_function` is an exit hatch. The drawback of `tf.py_function` is that it's not portable or particularly performant, cannot be saved with SavedModel, and does not work well in distributed (multi-GPU, TPU) setups. Also, since `tf.py_function` has to be wired into the graph, it casts all inputs/outputs to tensors." + "If you would like to execute Python code during each invocation of a `tf.function`, `tf. py_function` is an exit hatch. The drawbacks of `tf.py_function` are that it's not portable or particularly performant, cannot be saved with `SavedModel`, and does not work well in distributed (multi-GPU, TPU) setups. Also, since `tf.py_function` has to be wired into the graph, it casts all inputs/outputs to tensors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZbI7XA_e6yA2" + }, + "outputs": [], + "source": [ + "@tf.py_function(Tout=tf.float32)\n", + "def py_plus(x, y):\n", + " print('Executing eagerly.')\n", + " return x + y\n", + "\n", + "@tf.function\n", + "def tf_wrapper(x, y):\n", + " print('Tracing.')\n", + " return py_plus(x, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h5ttN_sI7TdQ" + }, + "source": [ + "The `tf.function` will trace the first time:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mAK4XINl7Ldy" + }, + "outputs": [], + "source": [ + "tf_wrapper(tf.constant(1.0), tf.constant(2.0)).numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Atxvrd_o7dSy" + }, + "source": [ + "But the `tf.py_function` inside executes eagerly every time:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vv7qTiTU7bjy" + }, + "outputs": [], + "source": [ + "tf_wrapper(tf.constant(1.0), tf.constant(2.0)).numpy()" ] }, { @@ -1017,13 +1231,86 @@ "assert len(external_list) == 1" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "5eZTFRv_k_nR" + }, + "source": [ + "Sometimes unexpected behaviors are very hard to notice. In the example below, the `counter` is intended to safeguard the increment of a variable. However because it is a python integer and not a TensorFlow object, it's value is captured during the first trace. When the `tf.function` is used, the `assign_add` will be recorded unconditionally in the underlying graph. Therefore `v` will increase by 1, every time the `tf.function` is called. This issue is common among users that try to migrate their Graph-mode Tensorflow code to Tensorflow 2 using `tf.function` decorators, when python side-effects (the `counter` in the example) are used to determine what ops to run (`assign_add` in the example). Usually, users realize this only after seeing suspicious numerical results, or significantly lower performance than expected (e.g. if the guarded operation is very costly)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5r6p7-9jk_3L" + }, + "outputs": [], + "source": [ + "class Model(tf.Module):\n", + " def __init__(self):\n", + " self.v = tf.Variable(0)\n", + " self.counter = 0\n", + "\n", + " @tf.function\n", + " def __call__(self):\n", + " if self.counter == 0:\n", + " # A python side-effect\n", + " self.counter += 1\n", + " self.v.assign_add(1)\n", + "\n", + " return self.v\n", + "\n", + "m = Model()\n", + "for n in range(3):\n", + " print(m().numpy()) # prints 1, 2, 3" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tXCTcHoVcxhX" + }, + "source": [ + "A workaround to achieve the expected behavior is using [`tf.init_scope`](https://www.tensorflow.org/api_docs/python/tf/init_scope) to lift the operations outside of the function graph. This ensures that the variable increment is only done once during tracing time. It should be noted `init_scope` has other side effects including cleared control flow and gradient tape. Sometimes the usage of `init_scope` can become too complex to manage realistically." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "An4MrIbrcvi8" + }, + "outputs": [], + "source": [ + "class Model(tf.Module):\n", + " def __init__(self):\n", + " self.v = tf.Variable(0)\n", + " self.counter = 0\n", + "\n", + " @tf.function\n", + " def __call__(self):\n", + " if self.counter == 0:\n", + " # Lifts ops out of function-building graphs\n", + " with tf.init_scope():\n", + " self.counter += 1\n", + " self.v.assign_add(1)\n", + "\n", + " return self.v\n", + "\n", + "m = Model()\n", + "for n in range(3):\n", + " print(m().numpy()) # prints 1, 1, 1" + ] + }, { "cell_type": "markdown", "metadata": { "id": "pbFG5CX4LwQA" }, "source": [ - "You should avoid mutating containers like lists, dicts, other objects that live outside the `Function`. Instead, use arguments and TF objects. For example, the section [\"Accumulating values in a loop\"](#accumulating_values_in_a_loop) has one example of how list-like operations can be implemented.\n", + "In summary, as a rule of thumb, you should avoid mutating python objects such as integers or containers like lists that live outside the `tf.function`. Instead, use arguments and TF objects. For example, the section [\"Accumulating values in a loop\"](#accumulating_values_in_a_loop) has one example of how list-like operations can be implemented.\n", "\n", "You can, in some cases, capture and manipulate state if it is a [`tf.Variable`](https://www.tensorflow.org/guide/variable). This is how the weights of Keras models are updated with repeated calls to the same `ConcreteFunction`." ] @@ -1094,43 +1381,6 @@ "good_consume_next(iterator)" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "FHQ0UeU-vWo8" - }, - "source": [ - "### Deleting tf.Variables between `Function` calls\n", - "\n", - "Another error you may encounter is a garbage-collected variable. `ConcreteFunction`s only retain [WeakRefs](https://docs.python.org/3/library/weakref.html) to the variables they close over, so you must retain a reference to any variables." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "uMiRPfETjpt-" - }, - "outputs": [], - "source": [ - "external_var = tf.Variable(3)\n", - "@tf.function\n", - "def f(x):\n", - " return x * external_var\n", - "\n", - "traced_f = f.get_concrete_function(4)\n", - "print(\"Calling concrete function...\")\n", - "print(traced_f(4))\n", - "\n", - "# The original variable object gets garbage collected, since there are no more\n", - "# references to it.\n", - "external_var = tf.Variable(4)\n", - "print()\n", - "print(\"Calling concrete function after garbage collecting its closed Variable...\")\n", - "with assert_raises(tf.errors.FailedPreconditionError):\n", - " traced_f(4)" - ] - }, { "cell_type": "markdown", "metadata": { @@ -1166,9 +1416,10 @@ "correct_a = leaky_function(tf.constant(1))\n", "\n", "print(correct_a.numpy()) # Good - value obtained from function's returns\n", - "with assert_raises(AttributeError):\n", + "try:\n", " x.numpy() # Bad - tensor leaked from inside the function, cannot be used here\n", - "print(x)" + "except AttributeError as expected:\n", + " print(expected)" ] }, { @@ -1197,9 +1448,10 @@ "correct_a = leaky_function(tf.constant(1))\n", "\n", "print(correct_a.numpy()) # Good - value obtained from function's returns\n", - "with assert_raises(AttributeError):\n", + "try:\n", " x.numpy() # Bad - tensor leaked from inside the function, cannot be used here\n", - "print(x)\n", + "except AttributeError as expected:\n", + " print(expected)\n", "\n", "@tf.function\n", "def captures_leaked_tensor(b):\n", @@ -1244,6 +1496,64 @@ " external_object.field = a # Bad - leaks tensor" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "g-XVQcD-wf5K" + }, + "source": [ + "### Recursive tf.functions are not supported\n", + "\n", + "Recursive `tf.function`s are not supported and could cause infinite loops. For example," + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QSN-T1m5EFcR" + }, + "outputs": [], + "source": [ + "@tf.function\n", + "def recursive_fn(n):\n", + " if n > 0:\n", + " return recursive_fn(n - 1)\n", + " else:\n", + " return 1\n", + "\n", + "with assert_raises(Exception):\n", + " recursive_fn(tf.constant(5)) # Bad - maximum recursion error." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LyRyooKGUxNV" + }, + "source": [ + "Even if a recursive `tf.function` seems to work, the Python function will be traced multiple times and could have performance implications. For example," + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7FlmTqfMUwmT" + }, + "outputs": [], + "source": [ + "@tf.function\n", + "def recursive_fn(n):\n", + " if n > 0:\n", + " print('tracing')\n", + " return recursive_fn(n - 1)\n", + " else:\n", + " return 1\n", + "\n", + "recursive_fn(5) # Warning - multiple tracings" + ] + }, { "cell_type": "markdown", "metadata": { @@ -1252,7 +1562,7 @@ "source": [ "## Known Issues\n", "\n", - "If your `Function` is not evaluating correctly, the error may be explained by these known issues which are planned to be fixed in the future." + "If your `tf.function` is not evaluating correctly, the error may be explained by these known issues which are planned to be fixed in the future." ] }, { @@ -1263,9 +1573,9 @@ "source": [ "### Depending on Python global and free variables\n", "\n", - "`Function` creates a new `ConcreteFunction` when called with a new value of a Python argument. However, it does not do that for the Python closure, globals, or nonlocals of that `Function`. If their value changes in between calls to the `Function`, the `Function` will still use the values they had when it was traced. This is different from how regular Python functions work.\n", + "`tf.function` creates a new `ConcreteFunction` when called with a new value of a Python argument. However, it does not do that for the Python closure, globals, or nonlocals of that `tf.function`. If their value changes in between calls to the `tf.function`, the `tf.function` will still use the values they had when it was traced. This is different from how regular Python functions work.\n", "\n", - "For that reason, we recommend a functional programming style that uses arguments instead of closing over outer names." + "For that reason, you should follow a functional programming style that uses arguments instead of closing over outer names." ] }, { @@ -1306,10 +1616,39 @@ { "cell_type": "markdown", "metadata": { - "id": "Tu0SnPwaL7pI" + "id": "ZoPg5w1Pjqnb" + }, + "source": [ + "Another way to update a global value is to make it a `tf.Variable` and use the `Variable.assign` method instead.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oeJMdXd3M0cc" }, + "outputs": [], "source": [ - "You can close over outer names, as long as you don't update their values.\n" + "@tf.function\n", + "def variable_add():\n", + " return 1 + foo\n", + "\n", + "foo = tf.Variable(1)\n", + "print(\"Variable:\", variable_add())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "L3q7sUJWZOSd" + }, + "outputs": [], + "source": [ + "print(\"Updating the value of `foo` to 100!\")\n", + "foo.assign(100)\n", + "print(\"Variable:\", variable_add())" ] }, { @@ -1318,7 +1657,7 @@ "id": "hvwe9gTIWfx6" }, "source": [ - "#### Depending on Python objects" + "### Depending on Python objects" ] }, { @@ -1327,7 +1666,11 @@ "id": "BJkZS-SwPvOQ" }, "source": [ - "The recommendation to pass Python objects as arguments into `tf.function` has a number of known issues, that are expected to be fixed in the future. In general, you can rely on consistent tracing if you use a Python primitive or `tf.nest`-compatible structure as an argument or pass in a *different* instance of an object into a `Function`. However, `Function` will *not* create a new trace when you pass **the same object and only change its attributes**." + "Passing custom Python objects as arguments to `tf.function` is supported but has certain limitations.\n", + "\n", + "For maximum feature coverage, consider transforming the objects into [Extension types](extension_type.ipynb) before passing them to `tf.function`. You can also use Python primitives and `tf.nest`-compatible structures.\n", + "\n", + "However, as covered in the [rules of tracing](#rules_of_tracing), when a custom `TraceType` is not provided by the custom Python class, `tf.function` is forced to use instance-based equality which means it will **not create a new trace** when you pass the **same object with modified attributes**." ] }, { @@ -1372,11 +1715,11 @@ "id": "Ytcgg2qFWaBF" }, "source": [ - "Using the same `Function` to evaluate the updated instance of the model will be buggy since the updated model has the [same cache key](#rules_of_tracing) as the original model.\n", + "Using the same `tf.function` to evaluate the modified instance of the model will be buggy since it still has the [same instance-based TraceType](#rules_of_tracing) as the original model.\n", "\n", - "For that reason, we recommend that you write your `Function` to avoid depending on mutable object attributes or create new objects.\n", + "For that reason, you're recommended to write your `tf.function` to avoid depending on mutable object attributes or implement the [Tracing Protocol](#use_the_tracing_protocol) for the objects to inform `tf.function` about such attributes.\n", "\n", - "If that is not possible, one workaround is to make new `Function`s each time you modify your object to force retracing:" + "If that is not possible, one workaround is to make new `tf.function`s each time you modify your object to force retracing:" ] }, { @@ -1392,8 +1735,8 @@ "\n", "new_model = SimpleModel()\n", "evaluate_no_bias = tf.function(evaluate).get_concrete_function(new_model, x)\n", - "# Don't pass in `new_model`, `Function` already captured its state during tracing.\n", - "print(evaluate_no_bias(x)) " + "# Don't pass in `new_model`. `tf.function` already captured its state during tracing.\n", + "print(evaluate_no_bias(x))" ] }, { @@ -1406,7 +1749,7 @@ "source": [ "print(\"Adding bias!\")\n", "new_model.bias += 5.0\n", - "# Create new Function and ConcreteFunction since you modified new_model.\n", + "# Create new `tf.function` and `ConcreteFunction` since you modified `new_model`.\n", "evaluate_with_bias = tf.function(evaluate).get_concrete_function(new_model, x)\n", "print(evaluate_with_bias(x)) # Don't pass in `new_model`." ] @@ -1463,7 +1806,7 @@ "source": [ "### Creating tf.Variables\n", "\n", - "`Function` only supports singleton `tf.Variable`s created once on the first call, and reused across subsequent function calls. The code snippet below would create a new `tf.Variable` in every function call, which results in a `ValueError` exception.\n", + "`tf.function` only supports singleton `tf.Variable`s created once on the first call, and reused across subsequent function calls. The code snippet below would create a new `tf.Variable` in every function call, which results in a `ValueError` exception.\n", "\n", "Example:" ] @@ -1524,7 +1867,7 @@ }, "source": [ "#### Using with multiple Keras optimizers\n", - "You may encounter `ValueError: tf.function only supports singleton tf.Variables created on the first call.` when using more than one Keras optimizer with a `tf.function`. This error occurs because optimizers internally create `tf.Variables` when they apply gradients for the first time." + "You may encounter `ValueError: tf.function only supports singleton tf.Variables created on the first call.` when using more than one Keras optimizer with a `tf.function`. This error occurs because optimizers internally create `tf.Variable`s when they apply gradients for the first time." ] }, { @@ -1537,7 +1880,7 @@ "source": [ "opt1 = tf.keras.optimizers.Adam(learning_rate = 1e-2)\n", "opt2 = tf.keras.optimizers.Adam(learning_rate = 1e-3)\n", - " \n", + "\n", "@tf.function\n", "def train_step(w, x, y, optimizer):\n", " with tf.GradientTape() as tape:\n", @@ -1561,7 +1904,46 @@ "id": "7Q8BRPCThTjB" }, "source": [ - "If you need to change the optimizer during training, a workaround is to create a new `Function` for each optimizer, calling the [`ConcreteFunction`](#obtaining_concrete_functions) directly." + "If you need to change a stateful object between calls, it's simplest to define a `tf.Module` subclass, and create instances to hold those objects:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3P59ocmIslHz" + }, + "outputs": [], + "source": [ + "class TrainStep(tf.Module):\n", + " def __init__(self, optimizer):\n", + " self.optimizer = optimizer\n", + "\n", + " @tf.function\n", + " def __call__(self, w, x, y):\n", + " with tf.GradientTape() as tape:\n", + " L = tf.reduce_sum(tf.square(w*x - y))\n", + " gradients = tape.gradient(L, [w])\n", + " self.optimizer.apply_gradients(zip(gradients, [w]))\n", + "\n", + "\n", + "opt1 = tf.keras.optimizers.Adam(learning_rate = 1e-2)\n", + "opt2 = tf.keras.optimizers.Adam(learning_rate = 1e-3)\n", + "\n", + "train_o1 = TrainStep(opt1)\n", + "train_o2 = TrainStep(opt2)\n", + "\n", + "train_o1(w, x, y)\n", + "train_o2(w, x, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dUHUi881smHF" + }, + "source": [ + "You could also do this manually by creating multiple instances of the `@tf.function` wrapper, one for each optimizer:" ] }, { @@ -1586,14 +1968,14 @@ "x = tf.constant([-1.])\n", "y = tf.constant([2.])\n", "\n", - "# Make a new Function and ConcreteFunction for each optimizer.\n", - "train_step_1 = tf.function(train_step).get_concrete_function(w, x, y, opt1)\n", - "train_step_2 = tf.function(train_step).get_concrete_function(w, x, y, opt2)\n", + "# Make a new tf.function and ConcreteFunction for each optimizer.\n", + "train_step_1 = tf.function(train_step)\n", + "train_step_2 = tf.function(train_step)\n", "for i in range(10):\n", " if i % 2 == 0:\n", - " train_step_1(w, x, y) # `opt1` is not used as a parameter. \n", + " train_step_1(w, x, y, opt1)\n", " else:\n", - " train_step_2(w, x, y) # `opt2` is not used as a parameter." + " train_step_2(w, x, y, opt2)" ] }, { @@ -1604,9 +1986,9 @@ "source": [ "#### Using with multiple Keras models\n", "\n", - "You may also encounter `ValueError: tf.function only supports singleton tf.Variables created on the first call.` when passing different model instances to the same `Function`.\n", + "You may also encounter `ValueError: tf.function only supports singleton tf.Variables created on the first call.` when passing different model instances to the same `tf.function`.\n", "\n", - "This error occurs because Keras models (which [do not have their input shape defined](https://www.tensorflow.org/guide/keras/custom_layers_and_models#best_practice_deferring_weight_creation_until_the_shape_of_the_inputs_is_known)) and Keras layers create `tf.Variables`s when they are first called. You may be attempting to initialize those variables inside a `Function`, which has already been called. To avoid this error, try calling `model.build(input_shape)` to initialize all the weights before training the model.\n" + "This error occurs because Keras models (which [do not have their input shape defined](https://www.tensorflow.org/guide/keras/custom_layers_and_models#best_practice_deferring_weight_creation_until_the_shape_of_the_inputs_is_known)) and Keras layers create `tf.Variable`s when they are first called. You may be attempting to initialize those variables inside a `tf.function`, which has already been called. To avoid this error, try calling `model.build(input_shape)` to initialize all the weights before training the model.\n" ] }, { @@ -1617,14 +1999,14 @@ "source": [ "## Further reading\n", "\n", - "To learn about how to export and load a `Function`, see the [SavedModel guide](../../guide/saved_model). To learn more about graph optimizations that are performed after tracing, see the [Grappler guide](../../guide/graph_optimization). To learn how to optimize your data pipeline and profile your model, see the [Profiler guide](../../guide/profiler.md)." + "To learn about how to export and load a `tf.function`, see the [SavedModel guide](../../guide/saved_model). To learn more about graph optimizations that are performed after tracing, see the [Grappler guide](../../guide/graph_optimization). To learn how to optimize your data pipeline and profile your model, see the [Profiler guide](../../guide/profiler.md)." ] } ], "metadata": { "colab": { - "collapsed_sections": [], "name": "function.ipynb", + "private_outputs": true, "provenance": [], "toc_visible": true }, diff --git a/site/en/guide/gpu_performance_analysis.md b/site/en/guide/gpu_performance_analysis.md index ecbb03ad3ad..2832686d8f1 100644 --- a/site/en/guide/gpu_performance_analysis.md +++ b/site/en/guide/gpu_performance_analysis.md @@ -169,8 +169,8 @@ the trace viewer, you should look at the model code between steps and check if disabling callbacks/metrics improves performance. Some details of these ops are also on the trace viewer (both device and host side).The recommendation in this scenario is to amortize the overhead of these ops by executing them after a -fixed number of steps instead of every step. When using the `compile` method in -the `tf.keras` API, setting the `experimental_steps_per_execution` flag does +fixed number of steps instead of every step. When using the `Model.compile` method in +the `tf.keras` API, setting the `steps_per_execution` flag does this automatically. For custom training loops, use `tf.while_loop`. #### 2. Achieve higher device utilization diff --git a/site/en/guide/graph_optimization.ipynb b/site/en/guide/graph_optimization.ipynb index 50eedda621c..063d8817489 100644 --- a/site/en/guide/graph_optimization.ipynb +++ b/site/en/guide/graph_optimization.ipynb @@ -90,7 +90,7 @@ "* *Constant folding optimizer -* Statically infers the value of tensors when possible by folding constant nodes in the graph and materializes the result using constants.\n", "* *Arithmetic optimizer -* Simplifies arithmetic operations by eliminating common subexpressions and simplifying arithmetic statements. \n", "* *Layout optimizer -* Optimizes tensor layouts to execute data format dependent operations such as convolutions more efficiently.\n", - "* *Remapper optimizer -* Remaps subgraphs onto more efficient implementations by replacing commonly occuring subgraphs with optimized fused monolithic kernels.\n", + "* *Remapper optimizer -* Remaps subgraphs onto more efficient implementations by replacing commonly occurring subgraphs with optimized fused monolithic kernels.\n", "* *Memory optimizer -* Analyzes the graph to inspect the peak memory usage for each operation and inserts CPU-GPU memory copy operations for swapping GPU memory to CPU to reduce the peak memory usage.\n", "* *Dependency optimizer -* Removes or rearranges control dependencies to shorten the critical path for a model step or enables other\n", "optimizations. Also removes nodes that are effectively no-ops such as Identity.\n", @@ -101,7 +101,7 @@ "* *Loop optimizer -* Optimizes the graph control flow by hoisting loop-invariant subgraphs out of loops and by removing redundant stack operations in loops. Also optimizes loops with statically known trip counts and removes statically known dead branches in conditionals.\n", "* *Scoped allocator optimizer -* Introduces scoped allocators to reduce data movement and to consolidate some operations.\n", "* *Pin to host optimizer -* Swaps small operations onto the CPU. This optimizer is turned OFF by default. \n", - "* *Auto mixed precision optimizer -* Converts data types to float16 where applicable to improve performance. Currently applies only to GPUs.\n", + "* *Auto mixed precision optimizer -* Converts data types to float16 where applicable to improve performance. Currently applies to GPUs and the latest Intel Xeon CPUs.\n", "* *Debug stripper -* Strips nodes related to debugging operations such as `tf.debugging.Assert`, `tf.debugging.check_numerics`, and `tf.print` from the graph. This optimizer is turned OFF by default." ] }, @@ -166,7 +166,7 @@ "source": [ "## Compare execution performance with and without Grappler\n", "\n", - "TensorFlow 2 and beyond executes [eagerly](../eager.md) by default. Use `tf.function` to switch the default execution to Graph mode. Grappler runs automatically in the background to apply the graph optimizations above and improve execution performance. \n" + "TensorFlow 2 and beyond executes eagerly by default. Use `tf.function` to switch the default execution to Graph mode. Grappler runs automatically in the background to apply the graph optimizations above and improve execution performance. \n" ] }, { diff --git a/site/en/guide/images/new_type_promotion/type_promotion_lattice.png b/site/en/guide/images/new_type_promotion/type_promotion_lattice.png new file mode 100644 index 00000000000..501698965a2 Binary files /dev/null and b/site/en/guide/images/new_type_promotion/type_promotion_lattice.png differ diff --git a/site/en/guide/images/new_type_promotion/type_promotion_table.png b/site/en/guide/images/new_type_promotion/type_promotion_table.png new file mode 100644 index 00000000000..62bb465212a Binary files /dev/null and b/site/en/guide/images/new_type_promotion/type_promotion_table.png differ diff --git a/site/en/guide/images/tensor/shape2.png b/site/en/guide/images/tensor/shape2.png index 3609ff2c263..a316359c8fc 100644 Binary files a/site/en/guide/images/tensor/shape2.png and b/site/en/guide/images/tensor/shape2.png differ diff --git a/site/en/guide/intro_to_graphs.ipynb b/site/en/guide/intro_to_graphs.ipynb index 38cfcb967d8..4fe442632ba 100644 --- a/site/en/guide/intro_to_graphs.ipynb +++ b/site/en/guide/intro_to_graphs.ipynb @@ -70,13 +70,13 @@ "source": [ "## Overview\n", "\n", - "This guide goes beneath the surface of TensorFlow and Keras to demonstrate how TensorFlow works. If you instead want to immediately get started with Keras, check out the [collection of Keras guides](keras/).\n", + "This guide goes beneath the surface of TensorFlow and Keras to demonstrate how TensorFlow works. If you instead want to immediately get started with Keras, check out the [collection of Keras guides](https://www.tensorflow.org/guide/keras/).\n", "\n", "In this guide, you'll learn how TensorFlow allows you to make simple changes to your code to get graphs, how graphs are stored and represented, and how you can use them to accelerate your models.\n", "\n", "Note: For those of you who are only familiar with TensorFlow 1.x, this guide demonstrates a very different view of graphs.\n", "\n", - "**This is a big-picture overview that covers how `tf.function` allows you to switch from eager execution to graph execution.** For a more complete specification of `tf.function`, go to the [`tf.function` guide](function).\n" + "**This is a big-picture overview that covers how `tf.function` allows you to switch from eager execution to graph execution.** For a more complete specification of `tf.function`, go to the [Better performance with `tf.function`](./function.ipynb) guide.\n" ] }, { @@ -87,13 +87,13 @@ "source": [ "### What are graphs?\n", "\n", - "In the previous three guides, you ran TensorFlow **eagerly**. This means TensorFlow operations are executed by Python, operation by operation, and returning results back to Python.\n", + "In the previous three guides, you ran TensorFlow **eagerly**. This means TensorFlow operations are executed by Python, operation by operation, and return results back to Python.\n", "\n", "While eager execution has several unique advantages, graph execution enables portability outside Python and tends to offer better performance. **Graph execution** means that tensor computations are executed as a *TensorFlow graph*, sometimes referred to as a `tf.Graph` or simply a \"graph.\"\n", "\n", "**Graphs are data structures that contain a set of `tf.Operation` objects, which represent units of computation; and `tf.Tensor` objects, which represent the units of data that flow between operations.** They are defined in a `tf.Graph` context. Since these graphs are data structures, they can be saved, run, and restored all without the original Python code.\n", "\n", - "This is what a TensorFlow graph representing a two-layer neural network looks like when visualized in TensorBoard.\n" + "This is what a TensorFlow graph representing a two-layer neural network looks like when visualized in TensorBoard:" ] }, { @@ -113,7 +113,7 @@ "source": [ "### The benefits of graphs\n", "\n", - "With a graph, you have a great deal of flexibility. You can use your TensorFlow graph in environments that don't have a Python interpreter, like mobile applications, embedded devices, and backend servers. TensorFlow uses graphs as the format for [saved models](saved_model) when it exports them from Python.\n", + "With a graph, you have a great deal of flexibility. You can use your TensorFlow graph in environments that don't have a Python interpreter, like mobile applications, embedded devices, and backend servers. TensorFlow uses graphs as the format for [saved models](./saved_model.ipynb) when it exports them from Python.\n", "\n", "Graphs are also easily optimized, allowing the compiler to do transformations like:\n", "\n", @@ -144,6 +144,15 @@ "## Setup" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "0d1689fa928f" + }, + "source": [ + "Import some necessary libraries:" + ] + }, { "cell_type": "code", "execution_count": null, @@ -165,7 +174,7 @@ "source": [ "## Taking advantage of graphs\n", "\n", - "You create and run a graph in TensorFlow by using `tf.function`, either as a direct call or as a decorator. `tf.function` takes a regular function as input and returns a `Function`. **A `Function` is a Python callable that builds TensorFlow graphs from the Python function. You use a `Function` in the same way as its Python equivalent.**\n" + "You create and run a graph in TensorFlow by using `tf.function`, either as a direct call or as a decorator. `tf.function` takes a regular function as input and returns a `tf.types.experimental.PolymorphicFunction`. **A `PolymorphicFunction` is a Python callable that builds TensorFlow graphs from the Python function. You use a `tf.function` in the same way as its Python equivalent.**\n" ] }, { @@ -182,7 +191,8 @@ " x = x + b\n", " return x\n", "\n", - "# `a_function_that_uses_a_graph` is a TensorFlow `Function`.\n", + "# The Python type of `a_function_that_uses_a_graph` will now be a\n", + "# `PolymorphicFunction`.\n", "a_function_that_uses_a_graph = tf.function(a_regular_function)\n", "\n", "# Make some tensors.\n", @@ -191,7 +201,7 @@ "b1 = tf.constant(4.0)\n", "\n", "orig_value = a_regular_function(x1, y1, b1).numpy()\n", - "# Call a `Function` like a Python function.\n", + "# Call a `tf.function` like a Python function.\n", "tf_function_value = a_function_that_uses_a_graph(x1, y1, b1).numpy()\n", "assert(orig_value == tf_function_value)" ] @@ -202,7 +212,7 @@ "id": "PNvuAYpdrTOf" }, "source": [ - "On the outside, a `Function` looks like a regular function you write using TensorFlow operations. [Underneath](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/eager/def_function.py), however, it is *very different*. A `Function` **encapsulates [several `tf.Graph`s behind one API](#polymorphism_one_function_many_graphs).** That is how `Function` is able to give you the [benefits of graph execution](#the_benefits_of_graphs), like speed and deployability." + "On the outside, a `tf.function` looks like a regular function you write using TensorFlow operations. [Underneath](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/eager/polymorphic_function/polymorphic_function.py), however, it is *very different*. The underlying `PolymorphicFunction` **encapsulates several `tf.Graph`s behind one API** (learn more in the _Polymorphism_ section). That is how a `tf.function` is able to give you the benefits of graph execution, like speed and deployability (refer to _The benefits of graphs_ above)." ] }, { @@ -227,7 +237,8 @@ " x = x + b\n", " return x\n", "\n", - "# Use the decorator to make `outer_function` a `Function`.\n", + "# Using the `tf.function` decorator makes `outer_function` into a\n", + "# `PolymorphicFunction`.\n", "@tf.function\n", "def outer_function(x):\n", " y = tf.constant([[2.0], [3.0]])\n", @@ -274,7 +285,8 @@ " else:\n", " return 0\n", "\n", - "# `tf_simple_relu` is a TensorFlow `Function` that wraps `simple_relu`.\n", + "# Using `tf.function` makes `tf_simple_relu` a `PolymorphicFunction` that wraps\n", + "# `simple_relu`.\n", "tf_simple_relu = tf.function(simple_relu)\n", "\n", "print(\"First branch, with graph:\", tf_simple_relu(tf.constant(1)).numpy())\n", @@ -320,7 +332,7 @@ "id": "GZ4Ieg6tBE6l" }, "source": [ - "Most of the time, `tf.function` will work without special considerations. However, there are some caveats, and the [tf.function guide](./function.ipynb) can help here, as well as the [complete AutoGraph reference](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/autograph/g3doc/reference/index.md)" + "Most of the time, `tf.function` will work without special considerations. However, there are some caveats, and the [`tf.function` guide](./function.ipynb) can help here, as well as the [complete AutoGraph reference](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/autograph/g3doc/reference/index.md)." ] }, { @@ -329,13 +341,13 @@ "id": "sIpc_jfjEZEg" }, "source": [ - "### Polymorphism: one `Function`, many graphs\n", + "### Polymorphism: one `tf.function`, many graphs\n", "\n", - "A `tf.Graph` is specialized to a specific type of inputs (for example, tensors with a specific [`dtype`](https://www.tensorflow.org/api_docs/python/tf/dtypes/DType) or objects with the same [`id()`](https://docs.python.org/3/library/functions.html#id])).\n", + "A `tf.Graph` is specialized to a specific type of inputs (for example, tensors with a specific [`dtype`](https://www.tensorflow.org/api_docs/python/tf/dtypes/DType) or objects with the same [`id()`](https://docs.python.org/3/library/functions.html#id)).\n", "\n", - "Each time you invoke a `Function` with new `dtypes` and shapes in its arguments, `Function` creates a new `tf.Graph` for the new arguments. The `dtypes` and shapes of a `tf.Graph`'s inputs are known as an **input signature** or just a **signature**.\n", + "Each time you invoke a `tf.function` with a set of arguments that can't be handled by any of its existing graphs (such as arguments with new `dtypes` or incompatible shapes), it creates a new `tf.Graph` specialized to those new arguments. The type specification of a `tf.Graph`'s inputs is represented by `tf.types.experimental.FunctionType`, also referred to as the **signature**. For more information regarding when a new `tf.Graph` is generated, how that can be controlled, and how `FunctionType` can be useful, go to the _Rules of tracing_ section of the [Better performance with `tf.function`](./function.ipynb) guide.\n", "\n", - "The `Function` stores the `tf.Graph` corresponding to that signature in a `ConcreteFunction`. **A `ConcreteFunction` is a wrapper around a `tf.Graph`.**\n" + "The `tf.function` stores the `tf.Graph` corresponding to that signature in a `ConcreteFunction`. **A `ConcreteFunction` can be thought of as a wrapper around a `tf.Graph`.**\n" ] }, { @@ -350,7 +362,7 @@ "def my_relu(x):\n", " return tf.maximum(0., x)\n", "\n", - "# `my_relu` creates new graphs as it observes more signatures.\n", + "# `my_relu` creates new graphs as it observes different input types.\n", "print(my_relu(tf.constant(5.5)))\n", "print(my_relu([1, -1]))\n", "print(my_relu(tf.constant([3., -3.])))" @@ -362,7 +374,7 @@ "id": "1qRtw7R4KL9X" }, "source": [ - "If the `Function` has already been called with that signature, `Function` does not create a new `tf.Graph`." + "If the `tf.function` has already been called with the same input types, it does not create a new `tf.Graph`." ] }, { @@ -374,8 +386,8 @@ "outputs": [], "source": [ "# These two calls do *not* create new graphs.\n", - "print(my_relu(tf.constant(-2.5))) # Signature matches `tf.constant(5.5)`.\n", - "print(my_relu(tf.constant([-1., 1.]))) # Signature matches `tf.constant([3., -3.])`." + "print(my_relu(tf.constant(-2.5))) # Input type matches `tf.constant(5.5)`.\n", + "print(my_relu(tf.constant([-1., 1.]))) # Input type matches `tf.constant([3., -3.])`." ] }, { @@ -384,7 +396,7 @@ "id": "UohRmexhIpvQ" }, "source": [ - "Because it's backed by multiple graphs, a `Function` is **polymorphic**. That enables it to support more input types than a single `tf.Graph` could represent, as well as to optimize each `tf.Graph` for better performance." + "Because it's backed by multiple graphs, a `tf.function` is (as the name \"PolymorphicFunction\" suggests) **polymorphic**. That enables it to support more input types than a single `tf.Graph` could represent, and to optimize each `tf.Graph` for better performance." ] }, { @@ -419,7 +431,7 @@ "source": [ "### Graph execution vs. eager execution\n", "\n", - "The code in a `Function` can be executed both eagerly and as a graph. By default, `Function` executes its code as a graph:\n" + "The code in a `tf.function` can be executed both eagerly and as a graph. By default, `tf.function` executes its code as a graph:\n" ] }, { @@ -467,7 +479,7 @@ "id": "cyZNCRcQorGO" }, "source": [ - "To verify that your `Function`'s graph is doing the same computation as its equivalent Python function, you can make it execute eagerly with `tf.config.run_functions_eagerly(True)`. This is a switch that **turns off `Function`'s ability to create and run graphs**, instead executing the code normally." + "To verify that your `tf.function`'s graph is doing the same computation as its equivalent Python function, you can make it execute eagerly with `tf.config.run_functions_eagerly(True)`. This is a switch that **turns off `tf.function`'s ability to create and run graphs**, instead of executing the code normally." ] }, { @@ -510,7 +522,7 @@ "id": "DKT3YBsqy0x4" }, "source": [ - "However, `Function` can behave differently under graph and eager execution. The Python [`print`](https://docs.python.org/3/library/functions.html#print) function is one example of how these two modes differ. Let's check out what happens when you insert a `print` statement to your function and call it repeatedly.\n" + "However, `tf.function` can behave differently under graph and eager execution. The Python [`print`](https://docs.python.org/3/library/functions.html#print) function is one example of how these two modes differ. Let's check out what happens when you insert a `print` statement to your function and call it repeatedly." ] }, { @@ -558,7 +570,7 @@ "source": [ "Is the output surprising? **`get_MSE` only printed once even though it was called *three* times.**\n", "\n", - "To explain, the `print` statement is executed when `Function` runs the original code in order to create the graph in a process known as [\"tracing\"](function.ipynb#tracing). **Tracing captures the TensorFlow operations into a graph, and `print` is not captured in the graph.** That graph is then executed for all three calls **without ever running the Python code again**.\n", + "To explain, the `print` statement is executed when `tf.function` runs the original code in order to create the graph in a process known as \"tracing\" (refer to the _Tracing_ section of the [`tf.function` guide](./function.ipynb). **Tracing captures the TensorFlow operations into a graph, and `print` is not captured in the graph.** That graph is then executed for all three calls **without ever running the Python code again**.\n", "\n", "As a sanity check, let's turn off graph execution to compare:" ] @@ -606,7 +618,7 @@ "id": "PUR7qC_bquCn" }, "source": [ - "`print` is a *Python side effect*, and there are [other differences](function#limitations) that you should be aware of when converting a function into a `Function`." + "`print` is a *Python side effect*, and there are other differences that you should be aware of when converting a function into a `tf.function`. Learn more in the _Limitations_ section of the [Better performance with `tf.function`](./function.ipynb) guide." ] }, { @@ -628,7 +640,7 @@ "\n", "\n", "\n", - "Graph execution only executes the operations necessary to produce the observable effects, which includes:\n", + "Graph execution only executes the operations necessary to produce the observable effects, which include:\n", "\n", "- The return value of the function\n", "- Documented well-known side-effects such as:\n", @@ -676,7 +688,7 @@ " tf.gather(x, [1]) # unused\n", " return x\n", "\n", - "# Only needed operations are run during graph exection. The error is not raised.\n", + "# Only needed operations are run during graph execution. The error is not raised.\n", "print(unused_return_graph(tf.constant([0.0])))" ] }, @@ -686,16 +698,16 @@ "id": "def6MupG9R0O" }, "source": [ - "###`tf.function` best practices\n", + "### `tf.function` best practices\n", "\n", - "It may take some time to get used to the behavior of `Function`. To get started quickly, first-time users should play around with decorating toy functions with `@tf.function` to get experience with going from eager to graph execution.\n", + "It may take some time to get used to the behavior of `tf.function`. To get started quickly, first-time users should play around with decorating toy functions with `@tf.function` to get experience with going from eager to graph execution.\n", "\n", "*Designing for `tf.function`* may be your best bet for writing graph-compatible TensorFlow programs. Here are some tips:\n", "- Toggle between eager and graph execution early and often with `tf.config.run_functions_eagerly` to pinpoint if/ when the two modes diverge.\n", "- Create `tf.Variable`s\n", - "outside the Python function and modify them on the inside. The same goes for objects that use `tf.Variable`, like `keras.layers`, `keras.Model`s and `tf.optimizers`.\n", - "- Avoid writing functions that [depend on outer Python variables](function#depending_on_python_global_and_free_variables), excluding `tf.Variable`s and Keras objects.\n", - "- Prefer to write functions which take tensors and other TensorFlow types as input. You can pass in other object types but [be careful](function#depending_on_python_objects)!\n", + "outside the Python function and modify them on the inside. The same goes for objects that use `tf.Variable`, like `tf.keras.layers`, `tf.keras.Model`s and `tf.keras.optimizers`.\n", + "- Avoid writing functions that depend on outer Python variables, excluding `tf.Variable`s and Keras objects. Learn more in _Depending on Python global and free variables_ of the [`tf.function` guide](./function.ipynb).\n", + "- Prefer to write functions which take tensors and other TensorFlow types as input. You can pass in other object types but be careful! Learn more in _Depending on Python objects_ of the [`tf.function` guide](./function.ipynb).\n", "- Include as much computation as possible under a `tf.function` to maximize the performance gain. For example, decorate a whole training step or the entire training loop.\n" ] }, @@ -742,7 +754,7 @@ }, "outputs": [], "source": [ - "print(\"Eager execution:\", timeit.timeit(lambda: power(x, 100), number=1000))" + "print(\"Eager execution:\", timeit.timeit(lambda: power(x, 100), number=1000), \"seconds\")" ] }, { @@ -754,7 +766,7 @@ "outputs": [], "source": [ "power_as_graph = tf.function(power)\n", - "print(\"Graph execution:\", timeit.timeit(lambda: power_as_graph(x, 100), number=1000))" + "print(\"Graph execution:\", timeit.timeit(lambda: power_as_graph(x, 100), number=1000), \"seconds\")" ] }, { @@ -763,9 +775,9 @@ "id": "Q1Pfo5YwwILi" }, "source": [ - "`tf.function` is commonly used to speed up training loops, and you can learn more about it in [Writing a training loop from scratch](keras/writing_a_training_loop_from_scratch#speeding-up_your_training_step_with_tffunction) with Keras.\n", + "`tf.function` is commonly used to speed up training loops, and you can learn more about it in the _Speeding-up your training step with `tf.function`_ section of the [Writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) with Keras guide.\n", "\n", - "Note: You can also try [`tf.function(jit_compile=True)`](https://www.tensorflow.org/xla#explicit_compilation_with_tffunctionjit_compiletrue) for a more significant performance boost, especially if your code is heavy on TF control flow and uses many small tensors." + "Note: You can also try `tf.function(jit_compile=True)` for a more significant performance boost, especially if your code is heavy on TensorFlow control flow and uses many small tensors. Learn more in the _Explicit compilation with `tf.function(jit_compile=True)`_ section of the [XLA overview](https://www.tensorflow.org/xla)." ] }, { @@ -778,7 +790,7 @@ "\n", "Graphs can speed up your code, but the process of creating them has some overhead. For some functions, the creation of the graph takes more time than the execution of the graph. **This investment is usually quickly paid back with the performance boost of subsequent executions, but it's important to be aware that the first few steps of any large model training can be slower due to tracing.**\n", "\n", - "No matter how large your model, you want to avoid tracing frequently. The `tf.function` guide discusses [how to set input specifications and use tensor arguments](function#controlling_retracing) to avoid retracing. If you find you are getting unusually poor performance, it's a good idea to check if you are retracing accidentally." + "No matter how large your model, you want to avoid tracing frequently. In the _Controlling retracing_ section, the [`tf.function` guide](./function.ipynb) discusses how to set input specifications and use tensor arguments to avoid retracing. If you find you are getting unusually poor performance, it's a good idea to check if you are retracing accidentally." ] }, { @@ -787,9 +799,9 @@ "id": "F4InDaTjwmBA" }, "source": [ - "## When is a `Function` tracing?\n", + "## When is a `tf.function` tracing?\n", "\n", - "To figure out when your `Function` is tracing, add a `print` statement to its code. As a rule of thumb, `Function` will execute the `print` statement every time it traces." + "To figure out when your `tf.function` is tracing, add a `print` statement to its code. As a rule of thumb, `tf.function` will execute the `print` statement every time it traces." ] }, { @@ -843,13 +855,12 @@ "source": [ "## Next steps\n", "\n", - "You can learn more about `tf.function` on the API reference page and by following the [Better performance with `tf.function`](function) guide." + "You can learn more about `tf.function` on the API reference page and by following the [Better performance with `tf.function`](./function.ipynb) guide." ] } ], "metadata": { "colab": { - "collapsed_sections": [], "name": "intro_to_graphs.ipynb", "toc_visible": true }, diff --git a/site/en/guide/intro_to_modules.ipynb b/site/en/guide/intro_to_modules.ipynb index 93b677b6e7e..79bbe89ca56 100644 --- a/site/en/guide/intro_to_modules.ipynb +++ b/site/en/guide/intro_to_modules.ipynb @@ -91,6 +91,7 @@ "outputs": [], "source": [ "import tensorflow as tf\n", + "import keras\n", "from datetime import datetime\n", "\n", "%load_ext tensorboard" @@ -102,10 +103,12 @@ "id": "yt5HEbsYAbw1" }, "source": [ - "## Defining models and layers in TensorFlow\n", + "## TensorFlow Modules\n", "\n", "Most models are made of layers. Layers are functions with a known mathematical structure that can be reused and have trainable variables. In TensorFlow, most high-level implementations of layers and models, such as Keras or [Sonnet](https://github.com/deepmind/sonnet), are built on the same foundational class: `tf.Module`.\n", "\n", + "### Building Modules\n", + "\n", "Here's an example of a very simple `tf.Module` that operates on a scalar tensor:\n" ] }, @@ -337,7 +340,7 @@ "id": "JOLVVBT8J_dl" }, "source": [ - "## Saving weights\n", + "### Saving weights\n", "\n", "You can save a `tf.Module` as both a [checkpoint](./checkpoint.ipynb) and a [SavedModel](./saved_model.ipynb).\n", "\n", @@ -403,7 +406,7 @@ "id": "4eGaNiQWcK4j" }, "source": [ - "During distributed (multi-machine) training they can be sharded, which is why they are numbered (e.g., '00000-of-00001'). In this case, though, there is only have one shard.\n", + "During distributed (multi-machine) training they can be sharded, which is why they are numbered (e.g., '00000-of-00001'). In this case, though, there is only one shard.\n", "\n", "When you load models back in, you overwrite the values in your Python object." ] @@ -439,7 +442,7 @@ "id": "pSZebVuWxDXu" }, "source": [ - "## Saving functions\n", + "### Saving functions\n", "\n", "TensorFlow can run models without the original Python objects, as demonstrated by [TensorFlow Serving](https://tensorflow.org/tfx) and [TensorFlow Lite](https://tensorflow.org/lite), even when you download a trained model from [TensorFlow Hub](https://tensorflow.org/hub).\n", "\n", @@ -696,7 +699,26 @@ "\n", "Note that up until this point, there is no mention of Keras. You can build your own high-level API on top of `tf.Module`, and people have. \n", "\n", - "In this section, you will examine how Keras uses `tf.Module`. A complete user guide to Keras models can be found in the [Keras guide](keras/sequential_model.ipynb).\n" + "In this section, you will examine how Keras uses `tf.Module`. A complete user guide to Keras models can be found in the [Keras guide](https://www.tensorflow.org/guide/keras/sequential_model).\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ds08u3touwe4t" + }, + "source": [ + "Keras layers and models have a lot more extra features including:\n", + "\n", + "* Optional losses\n", + "* Support for [metrics](https://keras.io/api/layers/base_layer/#add_metric-method)\n", + "* Built-in support for an optional `training` argument to differentiate between training and inference use\n", + "* Saving and restoring python objects instead of just black-box functions\n", + "* `get_config` and `from_config` methods that allow you to accurately store configurations to allow model cloning in Python\n", + "\n", + "These features allow for far more complex models through subclassing, such as a custom GAN or a Variational AutoEncoder (VAE) model. Read about them in the [full guide](./keras/custom_layers_and_models.ipynb) to custom layers and models.\n", + "\n", + "Keras models also come with extra functionality that makes them easy to train, evaluate, load, save, and even train on multiple machines." ] }, { @@ -874,22 +896,6 @@ " print(\"Failed:\", e)" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "YnporXiudF1I" - }, - "source": [ - "Keras layers have a lot more extra features including:\n", - "\n", - "* Optional losses\n", - "* Support for metrics\n", - "* Built-in support for an optional `training` argument to differentiate between training and inference use\n", - "* `get_config` and `from_config` methods that allow you to accurately store configurations to allow model cloning in Python\n", - "\n", - "Read about them in the [full guide](./keras/custom_layers_and_models.ipynb) to custom layers and models." - ] - }, { "cell_type": "markdown", "metadata": { @@ -900,7 +906,7 @@ "\n", "You can define your model as nested Keras layers.\n", "\n", - "However, Keras also provides a full-featured model class called `tf.keras.Model`. It inherits from `tf.keras.layers.Layer`, so a Keras model can be used, nested, and saved in the same way as Keras layers. Keras models come with extra functionality that makes them easy to train, evaluate, load, save, and even train on multiple machines.\n", + "However, Keras also provides a full-featured model class called `tf.keras.Model`. It inherits from `tf.keras.layers.Layer`, so a Keras model can be used and nested in the same way as Keras layers. Keras models come with extra functionality that makes them easy to train, evaluate, load, save, and even train on multiple machines.\n", "\n", "You can define the `SequentialModule` from above with nearly identical code, again converting `__call__` to `call()` and changing the parent:" ] @@ -913,6 +919,7 @@ }, "outputs": [], "source": [ + "@keras.saving.register_keras_serializable()\n", "class MySequentialModel(tf.keras.Model):\n", " def __init__(self, name=None, **kwargs):\n", " super().__init__(**kwargs)\n", @@ -938,7 +945,7 @@ "source": [ "All the same features are available, including tracking variables and submodules.\n", "\n", - "Note: To emphasize the note above, a raw `tf.Module` nested inside a Keras layer or model will not get its variables collected for training or saving. Instead, nest Keras layers inside of Keras layers." + "Note: A raw `tf.Module` nested inside a Keras layer or model will not get its variables collected for training or saving. Instead, nest Keras layers inside of Keras layers." ] }, { @@ -1022,11 +1029,9 @@ "id": "qI9aXLnaHEFF" }, "source": [ - "## Saving Keras models\n", - "\n", - "Keras models can be checkpointed, and that will look the same as `tf.Module`.\n", + "### Saving Keras models\n", "\n", - "Keras models can also be saved with `tf.saved_model.save()`, as they are modules. However, Keras models have convenience methods and other functionality:" + "Keras models have their own specialized zip archive saving format, marked by the `.keras` extension. When calling `tf.keras.Model.save`, add a `.keras` extension to the filename. For example:" ] }, { @@ -1037,7 +1042,7 @@ }, "outputs": [], "source": [ - "my_sequential_model.save(\"exname_of_file\")" + "my_sequential_model.save(\"exname_of_file.keras\")" ] }, { @@ -1057,7 +1062,7 @@ }, "outputs": [], "source": [ - "reconstructed_model = tf.keras.models.load_model(\"exname_of_file\")" + "reconstructed_model = tf.keras.models.load_model(\"exname_of_file.keras\")" ] }, { @@ -1066,7 +1071,7 @@ "id": "EA7P_MNvpviZ" }, "source": [ - "Keras `SavedModels` also save metric, loss, and optimizer states.\n", + "Keras zip archives — `.keras` files — also save metric, loss, and optimizer states.\n", "\n", "This reconstructed model can be used and will produce the same result when called on the same data:" ] @@ -1082,13 +1087,24 @@ "reconstructed_model(tf.constant([[2.0, 2.0, 2.0]]))" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "seLIUG2354s" + }, + "source": [ + "### Checkpointing Keras models\n", + "\n", + "Keras models can also be checkpointed, and that will look the same as `tf.Module`." + ] + }, { "cell_type": "markdown", "metadata": { "id": "xKyjlkceqjwD" }, "source": [ - "There is more to know about saving and serialization of Keras models, including providing configuration methods for custom layers for feature support. Check out the [guide to saving and serialization](keras/save_and_serialize)." + "There is more to know about saving and serialization of Keras models, including providing configuration methods for custom layers for feature support. Check out the [guide to saving and serialization](https://www.tensorflow.org/guide/keras/save_and_serialize)." ] }, { diff --git a/site/en/guide/jax2tf.ipynb b/site/en/guide/jax2tf.ipynb new file mode 100644 index 00000000000..613c622658d --- /dev/null +++ b/site/en/guide/jax2tf.ipynb @@ -0,0 +1,851 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ckM5wJMsNTYL" + }, + "source": [ + "##### Copyright 2023 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "NKvERjPVNWxu" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bqePLdDjNhNk" + }, + "source": [ + "# Import a JAX model using JAX2TF" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gw3w46yhNiK_" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IyrsY3uTOmPY" + }, + "source": [ + "This notebook provides a complete, runnable example of creating a model using [JAX](https://jax.readthedocs.io/en/latest/) and bringing it into TensorFlow to continue training. This is made possible by [JAX2TF](https://github.com/google/jax/tree/main/jax/experimental/jax2tf), a lightweight API that provides a pathway from the JAX ecosystem to the TensorFlow ecosystem. \n", + "\n", + "JAX is a high-performance array computing library. To create the model, this notebook uses [Flax](https://flax.readthedocs.io/en/latest/), a neural network library for JAX. To train it, it uses [Optax](https://optax.readthedocs.io), an optimization library for JAX.\n", + "\n", + "If you're a researcher using JAX, JAX2TF gives you a path to production using TensorFlow's proven tools.\n", + "\n", + "There are many ways this can be useful, here are just a few:\n", + "\n", + "* Inference: Taking a model written for JAX and deploying it either on a server using TF Serving, on-device using TFLite, or on the web using TensorFlow.js. \n", + "\n", + "* Fine-tuning: Taking a model that was trained using JAX, you can bring its components to TF using JAX2TF, and continue training it in TensorFlow with your existing training data and setup.\n", + "\n", + "* Fusion: Combining parts of models that were trained using JAX with those trained using TensorFlow, for maximum flexibility.\n", + "\n", + "The key to enabling this kind of interoperation between JAX and TensorFlow is `jax2tf.convert`, which takes in model components created on top of JAX (your loss function, prediction function, etc) and creates equivalent representations of them as TensorFlow functions, which can then be exported as a TensorFlow SavedModel." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G6rtu96yOepm" + }, + "source": [ + "## Setup\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9yqxfHzr0LPF" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import numpy as np\n", + "import jax\n", + "import jax.numpy as jnp\n", + "import flax\n", + "import optax\n", + "import os\n", + "from matplotlib import pyplot as plt\n", + "from jax.experimental import jax2tf\n", + "from threading import Lock # Only used in the visualization utility.\n", + "from functools import partial" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SDnTaZO0r872" + }, + "outputs": [], + "source": [ + "# Needed for TensorFlow and JAX to coexist in GPU memory.\n", + "os.environ['XLA_PYTHON_CLIENT_PREALLOCATE'] = \"false\"\n", + "gpus = tf.config.list_physical_devices('GPU')\n", + "if gpus:\n", + " try:\n", + " for gpu in gpus:\n", + " tf.config.experimental.set_memory_growth(gpu, True)\n", + " except RuntimeError as e:\n", + " # Memory growth must be set before GPUs have been initialized.\n", + " print(e)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "BXOjCNJxDLil" + }, + "outputs": [], + "source": [ + "#@title Visualization utilities\n", + "\n", + "plt.rcParams[\"figure.figsize\"] = (20,8)\n", + "\n", + "# The utility for displaying training and validation curves.\n", + "def display_train_curves(loss, avg_loss, eval_loss, eval_accuracy, epochs, steps_per_epochs, ignore_first_n=10):\n", + "\n", + " ignore_first_n_epochs = int(ignore_first_n/steps_per_epochs)\n", + "\n", + " # The losses.\n", + " ax = plt.subplot(121)\n", + " if loss is not None:\n", + " x = np.arange(len(loss)) / steps_per_epochs #* epochs\n", + " ax.plot(x, loss)\n", + " ax.plot(range(1, epochs+1), avg_loss, \"-o\", linewidth=3)\n", + " ax.plot(range(1, epochs+1), eval_loss, \"-o\", linewidth=3)\n", + " ax.set_title('Loss')\n", + " ax.set_ylabel('loss')\n", + " ax.set_xlabel('epoch')\n", + " if loss is not None:\n", + " ax.set_ylim(0, np.max(loss[ignore_first_n:]))\n", + " ax.legend(['train', 'avg train', 'eval'])\n", + " else:\n", + " ymin = np.min(avg_loss[ignore_first_n_epochs:])\n", + " ymax = np.max(avg_loss[ignore_first_n_epochs:])\n", + " ax.set_ylim(ymin-(ymax-ymin)/10, ymax+(ymax-ymin)/10)\n", + " ax.legend(['avg train', 'eval'])\n", + "\n", + " # The accuracy.\n", + " ax = plt.subplot(122)\n", + " ax.set_title('Eval Accuracy')\n", + " ax.set_ylabel('accuracy')\n", + " ax.set_xlabel('epoch')\n", + " ymin = np.min(eval_accuracy[ignore_first_n_epochs:])\n", + " ymax = np.max(eval_accuracy[ignore_first_n_epochs:])\n", + " ax.set_ylim(ymin-(ymax-ymin)/10, ymax+(ymax-ymin)/10)\n", + " ax.plot(range(1, epochs+1), eval_accuracy, \"-o\", linewidth=3)\n", + "\n", + "class Progress:\n", + " \"\"\"Text mode progress bar.\n", + " Usage:\n", + " p = Progress(30)\n", + " p.step()\n", + " p.step()\n", + " p.step(reset=True) # to restart form 0%\n", + " The progress bar displays a new header at each restart.\"\"\"\n", + " def __init__(self, maxi, size=100, msg=\"\"):\n", + " \"\"\"\n", + " :param maxi: the number of steps required to reach 100%\n", + " :param size: the number of characters taken on the screen by the progress bar\n", + " :param msg: the message displayed in the header of the progress bar\n", + " \"\"\"\n", + " self.maxi = maxi\n", + " self.p = self.__start_progress(maxi)() # `()`: to get the iterator from the generator.\n", + " self.header_printed = False\n", + " self.msg = msg\n", + " self.size = size\n", + " self.lock = Lock()\n", + "\n", + " def step(self, reset=False):\n", + " with self.lock:\n", + " if reset:\n", + " self.__init__(self.maxi, self.size, self.msg)\n", + " if not self.header_printed:\n", + " self.__print_header()\n", + " next(self.p)\n", + "\n", + " def __print_header(self):\n", + " print()\n", + " format_string = \"0%{: ^\" + str(self.size - 6) + \"}100%\"\n", + " print(format_string.format(self.msg))\n", + " self.header_printed = True\n", + "\n", + " def __start_progress(self, maxi):\n", + " def print_progress():\n", + " # Bresenham's algorithm. Yields the number of dots printed.\n", + " # This will always print 100 dots in max invocations.\n", + " dx = maxi\n", + " dy = self.size\n", + " d = dy - dx\n", + " for x in range(maxi):\n", + " k = 0\n", + " while d >= 0:\n", + " print('=', end=\"\", flush=True)\n", + " k += 1\n", + " d -= dx\n", + " d += dy\n", + " yield k\n", + " # Keep yielding the last result if there are too many steps.\n", + " while True:\n", + " yield k\n", + "\n", + " return print_progress" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6xgS_8nDDIu8" + }, + "source": [ + "## Download and prepare the MNIST dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nbN7rmuF0VFB" + }, + "outputs": [], + "source": [ + "(x_train, train_labels), (x_test, test_labels) = tf.keras.datasets.mnist.load_data()\n", + "\n", + "train_data = tf.data.Dataset.from_tensor_slices((x_train, train_labels))\n", + "train_data = train_data.map(lambda x,y: (tf.expand_dims(tf.cast(x, tf.float32)/255.0, axis=-1),\n", + " tf.one_hot(y, depth=10)))\n", + "\n", + "BATCH_SIZE = 256\n", + "train_data = train_data.batch(BATCH_SIZE, drop_remainder=True)\n", + "train_data = train_data.cache()\n", + "train_data = train_data.shuffle(5000, reshuffle_each_iteration=True)\n", + "\n", + "test_data = tf.data.Dataset.from_tensor_slices((x_test, test_labels))\n", + "test_data = test_data.map(lambda x,y: (tf.expand_dims(tf.cast(x, tf.float32)/255.0, axis=-1),\n", + " tf.one_hot(y, depth=10)))\n", + "test_data = test_data.batch(10000)\n", + "test_data = test_data.cache()\n", + "\n", + "(one_batch, one_batch_labels) = next(iter(train_data)) # just one batch\n", + "(all_test_data, all_test_labels) = next(iter(test_data)) # all in one batch since batch size is 10000" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LuZTo7SM3W_n" + }, + "source": [ + "## Configure training\n", + "This notebook will create and train a simple model for demonstration purposes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3vbKB4yZ3aTL" + }, + "outputs": [], + "source": [ + "# Training hyperparameters.\n", + "JAX_EPOCHS = 3\n", + "TF_EPOCHS = 7\n", + "STEPS_PER_EPOCH = len(train_labels)//BATCH_SIZE\n", + "LEARNING_RATE = 0.01\n", + "LEARNING_RATE_EXP_DECAY = 0.6\n", + "\n", + "# The learning rate schedule for JAX (with Optax).\n", + "jlr_decay = optax.exponential_decay(LEARNING_RATE, transition_steps=STEPS_PER_EPOCH, decay_rate=LEARNING_RATE_EXP_DECAY, staircase=True)\n", + "\n", + "# THe learning rate schedule for TensorFlow.\n", + "tflr_decay = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=LEARNING_RATE, decay_steps=STEPS_PER_EPOCH, decay_rate=LEARNING_RATE_EXP_DECAY, staircase=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Od3sMwQxtC34" + }, + "source": [ + "## Create the model using Flax" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-ybqQF2zd2QX" + }, + "outputs": [], + "source": [ + "class ConvModel(flax.linen.Module):\n", + "\n", + " @flax.linen.compact\n", + " def __call__(self, x, train):\n", + " x = flax.linen.Conv(features=12, kernel_size=(3,3), padding=\"SAME\", use_bias=False)(x)\n", + " x = flax.linen.BatchNorm(use_running_average=not train, use_scale=False, use_bias=True)(x)\n", + " x = x.reshape((x.shape[0], -1)) # flatten\n", + " x = flax.linen.Dense(features=200, use_bias=True)(x)\n", + " x = flax.linen.BatchNorm(use_running_average=not train, use_scale=False, use_bias=True)(x)\n", + " x = flax.linen.Dropout(rate=0.3, deterministic=not train)(x)\n", + " x = flax.linen.relu(x)\n", + " x = flax.linen.Dense(features=10)(x)\n", + " #x = flax.linen.log_softmax(x)\n", + " return x\n", + "\n", + " # JAX differentiation requires a function `f(params, other_state, data, labels)` -> `loss` (as a single number).\n", + " # `jax.grad` will differentiate it against the fist argument.\n", + " # The user must split trainable and non-trainable variables into `params` and `other_state`.\n", + " # Must pass a different RNG key each time for the dropout mask to be different.\n", + " def loss(self, params, other_state, rng, data, labels, train):\n", + " logits, batch_stats = self.apply({'params': params, **other_state},\n", + " data,\n", + " mutable=['batch_stats'],\n", + " rngs={'dropout': rng},\n", + " train=train)\n", + " # The loss averaged across the batch dimension.\n", + " loss = optax.softmax_cross_entropy(logits, labels).mean()\n", + " return loss, batch_stats\n", + "\n", + " def predict(self, state, data):\n", + " logits = self.apply(state, data, train=False) # predict and accuracy disable dropout and use accumulated batch norm stats (train=False)\n", + " probabilities = flax.linen.log_softmax(logits)\n", + " return probabilities\n", + "\n", + " def accuracy(self, state, data, labels):\n", + " probabilities = self.predict(state, data)\n", + " predictions = jnp.argmax(probabilities, axis=-1)\n", + " dense_labels = jnp.argmax(labels, axis=-1)\n", + " accuracy = jnp.equal(predictions, dense_labels).mean()\n", + " return accuracy" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7Cr0FRNFtHN4" + }, + "source": [ + "## Write the training step function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tmDwApcpgZzw" + }, + "outputs": [], + "source": [ + "# The training step.\n", + "@partial(jax.jit, static_argnums=[0]) # this forces jax.jit to recompile for every new model\n", + "def train_step(model, state, optimizer_state, rng, data, labels):\n", + "\n", + " other_state, params = state.pop('params') # differentiate only against 'params' which represents trainable variables\n", + " (loss, batch_stats), grads = jax.value_and_grad(model.loss, has_aux=True)(params, other_state, rng, data, labels, train=True)\n", + "\n", + " updates, optimizer_state = optimizer.update(grads, optimizer_state)\n", + " params = optax.apply_updates(params, updates)\n", + " new_state = state.copy(add_or_replace={**batch_stats, 'params': params})\n", + "\n", + " rng, _ = jax.random.split(rng)\n", + "\n", + " return new_state, optimizer_state, rng, loss" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Zr16g6NzV4O9" + }, + "source": [ + "## Write the training loop" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zbl5w-KUV7Qw" + }, + "outputs": [], + "source": [ + "def train(model, state, optimizer_state, train_data, epochs, losses, avg_losses, eval_losses, eval_accuracies):\n", + " p = Progress(STEPS_PER_EPOCH)\n", + " rng = jax.random.PRNGKey(0)\n", + " for epoch in range(epochs):\n", + "\n", + " # This is where the learning rate schedule state is stored in the optimizer state.\n", + " optimizer_step = optimizer_state[1].count\n", + "\n", + " # Run an epoch of training.\n", + " for step, (data, labels) in enumerate(train_data):\n", + " p.step(reset=(step==0))\n", + " state, optimizer_state, rng, loss = train_step(model, state, optimizer_state, rng, data.numpy(), labels.numpy())\n", + " losses.append(loss)\n", + " avg_loss = np.mean(losses[-step:])\n", + " avg_losses.append(avg_loss)\n", + "\n", + " # Run one epoch of evals (10,000 test images in a single batch).\n", + " other_state, params = state.pop('params')\n", + " # Gotcha: must discard modified batch_stats here\n", + " eval_loss, _ = model.loss(params, other_state, rng, all_test_data.numpy(), all_test_labels.numpy(), train=False)\n", + " eval_losses.append(eval_loss)\n", + " eval_accuracy = model.accuracy(state, all_test_data.numpy(), all_test_labels.numpy())\n", + " eval_accuracies.append(eval_accuracy)\n", + "\n", + " print(\"\\nEpoch\", epoch, \"train loss:\", avg_loss, \"eval loss:\", eval_loss, \"eval accuracy\", eval_accuracy, \"lr:\", jlr_decay(optimizer_step))\n", + "\n", + " return state, optimizer_state" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DGB3W5g0Wt1H" + }, + "source": [ + "## Create the model and the optimizer (with Optax)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mW5mkmCWtN8W" + }, + "outputs": [], + "source": [ + "# The model.\n", + "model = ConvModel()\n", + "state = model.init({'params':jax.random.PRNGKey(0), 'dropout':jax.random.PRNGKey(0)}, one_batch, train=True) # Flax allows a separate RNG for \"dropout\"\n", + "\n", + "# The optimizer.\n", + "optimizer = optax.adam(learning_rate=jlr_decay) # Gotcha: it does not seem to be possible to pass just a callable as LR, must be an Optax Schedule\n", + "optimizer_state = optimizer.init(state['params'])\n", + "\n", + "losses=[]\n", + "avg_losses=[]\n", + "eval_losses=[]\n", + "eval_accuracies=[]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FJdsKghBNF" + }, + "source": [ + "## Train the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nmcofTTBZSIb" + }, + "outputs": [], + "source": [ + "new_state, new_optimizer_state = train(model, state, optimizer_state, train_data, JAX_EPOCHS+TF_EPOCHS, losses, avg_losses, eval_losses, eval_accuracies)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "n_20vgvDXB5r" + }, + "outputs": [], + "source": [ + "display_train_curves(losses, avg_losses, eval_losses, eval_accuracies, len(eval_losses), STEPS_PER_EPOCH, ignore_first_n=1*STEPS_PER_EPOCH)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0lT3cdENCBzL" + }, + "source": [ + "## Partially train the model\n", + "\n", + "You will continue training the model in TensorFlow shortly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KT-xqj5N7C6L" + }, + "outputs": [], + "source": [ + "model = ConvModel()\n", + "state = model.init({'params':jax.random.PRNGKey(0), 'dropout':jax.random.PRNGKey(0)}, one_batch, train=True) # Flax allows a separate RNG for \"dropout\"\n", + "\n", + "# The optimizer.\n", + "optimizer = optax.adam(learning_rate=jlr_decay) # LR must be an Optax LR Schedule\n", + "optimizer_state = optimizer.init(state['params'])\n", + "\n", + "losses, avg_losses, eval_losses, eval_accuracies = [], [], [], []" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oa362HMDbzDE" + }, + "outputs": [], + "source": [ + "state, optimizer_state = train(model, state, optimizer_state, train_data, JAX_EPOCHS, losses, avg_losses, eval_losses, eval_accuracies)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0IyZtUPPCt0y" + }, + "outputs": [], + "source": [ + "display_train_curves(losses, avg_losses, eval_losses, eval_accuracies, len(eval_losses), STEPS_PER_EPOCH, ignore_first_n=1*STEPS_PER_EPOCH)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uNtlSaOCCumB" + }, + "source": [ + "## Save just enough for inference\n", + "\n", + "If your goal is to deploy your JAX model (so you can run inference using `model.predict()`), simply exporting it to [SavedModel](https://www.tensorflow.org/guide/saved_model) is sufficient. This section demonstrates how to accomplish that." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "O653B3-5H8FL" + }, + "outputs": [], + "source": [ + "# Test data with a different batch size to test polymorphic shapes.\n", + "x, y = next(iter(train_data.unbatch().batch(13)))\n", + "\n", + "m = tf.Module()\n", + "# Wrap the JAX state in `tf.Variable` (needed when calling the converted JAX function.\n", + "state_vars = tf.nest.map_structure(tf.Variable, state)\n", + "# Keep the wrapped state as flat list (needed in TensorFlow fine-tuning).\n", + "m.vars = tf.nest.flatten(state_vars)\n", + "# Convert the desired JAX function (`model.predict`).\n", + "predict_fn = jax2tf.convert(model.predict, polymorphic_shapes=[\"...\", \"(b, 28, 28, 1)\"])\n", + "# Wrap the converted function in `tf.function` with the correct `tf.TensorSpec` (necessary for dynamic shapes to work).\n", + "@tf.function(autograph=False, input_signature=[tf.TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32)])\n", + "def predict(data):\n", + " return predict_fn(state_vars, data)\n", + "m.predict = predict\n", + "tf.saved_model.save(m, \"./\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8HFx67zStgvo" + }, + "outputs": [], + "source": [ + "# Test the converted function.\n", + "print(\"Converted function predictions:\", np.argmax(m.predict(x).numpy(), axis=-1))\n", + "# Reload the model.\n", + "reloaded_model = tf.saved_model.load(\"./\")\n", + "# Test the reloaded converted function (the result should be the same).\n", + "print(\"Reloaded function predictions:\", np.argmax(reloaded_model.predict(x).numpy(), axis=-1))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eEk8wv4HJu94" + }, + "source": [ + "## Save everything\n", + "If your goal is a comprehensive export (useful if you're planning on brining the model into TensorFlow for fine-tuning, fusion, etc), this section demonstrates how to save the model so you can access methods including:\n", + "\n", + " - model.predict\n", + " - model.accuracy\n", + " - model.loss (including train=True/False bool, RNG for dropout and BatchNorm state updates)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9mty52pmvDDp" + }, + "outputs": [], + "source": [ + "from collections import abc\n", + "\n", + "def _fix_frozen(d):\n", + " \"\"\"Changes any mappings (e.g. frozendict) back to dict.\"\"\"\n", + " if isinstance(d, list):\n", + " return [_fix_frozen(v) for v in d]\n", + " elif isinstance(d, tuple):\n", + " return tuple(_fix_frozen(v) for v in d)\n", + " elif not isinstance(d, abc.Mapping):\n", + " return d\n", + " d = dict(d)\n", + " for k, v in d.items():\n", + " d[k] = _fix_frozen(v)\n", + " return d" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3HEsKNXbCwXw" + }, + "outputs": [], + "source": [ + "class TFModel(tf.Module):\n", + " def __init__(self, state, model):\n", + " super().__init__()\n", + "\n", + " # Special care needed for the train=True/False parameter in the loss\n", + " @jax.jit\n", + " def loss_with_train_bool(state, rng, data, labels, train):\n", + " other_state, params = state.pop('params')\n", + " loss, batch_stats = jax.lax.cond(train,\n", + " lambda state, data, labels: model.loss(params, other_state, rng, data, labels, train=True),\n", + " lambda state, data, labels: model.loss(params, other_state, rng, data, labels, train=False),\n", + " state, data, labels)\n", + " # must use JAX to split the RNG, therefore, must do it in a @jax.jit function\n", + " new_rng, _ = jax.random.split(rng)\n", + " return loss, batch_stats, new_rng\n", + "\n", + " self.state_vars = tf.nest.map_structure(tf.Variable, state)\n", + " self.vars = tf.nest.flatten(self.state_vars)\n", + " self.jax_rng = tf.Variable(jax.random.PRNGKey(0))\n", + "\n", + " self.loss_fn = jax2tf.convert(loss_with_train_bool, polymorphic_shapes=[\"...\", \"...\", \"(b, 28, 28, 1)\", \"(b, 10)\", \"...\"])\n", + " self.accuracy_fn = jax2tf.convert(model.accuracy, polymorphic_shapes=[\"...\", \"(b, 28, 28, 1)\", \"(b, 10)\"])\n", + " self.predict_fn = jax2tf.convert(model.predict, polymorphic_shapes=[\"...\", \"(b, 28, 28, 1)\"])\n", + "\n", + " # Must specify TensorSpec manually for variable batch size to work\n", + " @tf.function(autograph=False, input_signature=[tf.TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32)])\n", + " def predict(self, data):\n", + " # Make sure the TfModel.predict function implicitly use self.state_vars and not the JAX state directly\n", + " # otherwise, all model weights would be embedded in the TF graph as constants.\n", + " return self.predict_fn(self.state_vars, data)\n", + "\n", + " @tf.function(input_signature=[tf.TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32),\n", + " tf.TensorSpec(shape=(None, 10), dtype=tf.float32)],\n", + " autograph=False)\n", + " def train_loss(self, data, labels):\n", + " loss, batch_stats, new_rng = self.loss_fn(self.state_vars, self.jax_rng, data, labels, True)\n", + " # update batch norm stats\n", + " flat_vars = tf.nest.flatten(self.state_vars['batch_stats'])\n", + " flat_values = tf.nest.flatten(batch_stats['batch_stats'])\n", + " for var, val in zip(flat_vars, flat_values):\n", + " var.assign(val)\n", + " # update RNG\n", + " self.jax_rng.assign(new_rng)\n", + " return loss\n", + "\n", + " @tf.function(input_signature=[tf.TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32),\n", + " tf.TensorSpec(shape=(None, 10), dtype=tf.float32)],\n", + " autograph=False)\n", + " def eval_loss(self, data, labels):\n", + " loss, batch_stats, new_rng = self.loss_fn(self.state_vars, self.jax_rng, data, labels, False)\n", + " return loss\n", + "\n", + " @tf.function(input_signature=[tf.TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32),\n", + " tf.TensorSpec(shape=(None, 10), dtype=tf.float32)],\n", + " autograph=False)\n", + " def accuracy(self, data, labels):\n", + " return self.accuracy_fn(self.state_vars, data, labels)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "znJrAVpcxO9u" + }, + "outputs": [], + "source": [ + "# Instantiate the model.\n", + "tf_model = TFModel(state, model)\n", + "\n", + "# Save the model.\n", + "tf.saved_model.save(tf_model, \"./\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Y02DHEwTjNzV" + }, + "source": [ + "## Reload the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "i75yS3v2jPpM" + }, + "outputs": [], + "source": [ + "reloaded_model = tf.saved_model.load(\"./\")\n", + "\n", + "# Test if it works and that the batch size is indeed variable.\n", + "x,y = next(iter(train_data.unbatch().batch(13)))\n", + "print(np.argmax(reloaded_model.predict(x).numpy(), axis=-1))\n", + "x,y = next(iter(train_data.unbatch().batch(20)))\n", + "print(np.argmax(reloaded_model.predict(x).numpy(), axis=-1))\n", + "\n", + "print(reloaded_model.accuracy(one_batch, one_batch_labels))\n", + "print(reloaded_model.accuracy(all_test_data, all_test_labels))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DiwEAwQmlx1x" + }, + "source": [ + "## Continue training the converted JAX model in TensorFlow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MubFcO_jl2vE" + }, + "outputs": [], + "source": [ + "optimizer = tf.keras.optimizers.Adam(learning_rate=tflr_decay)\n", + "\n", + "# Set the iteration step for the learning rate to resume from where it left off in JAX.\n", + "optimizer.iterations.assign(len(eval_losses)*STEPS_PER_EPOCH)\n", + "\n", + "p = Progress(STEPS_PER_EPOCH)\n", + "\n", + "for epoch in range(JAX_EPOCHS, JAX_EPOCHS+TF_EPOCHS):\n", + "\n", + " # This is where the learning rate schedule state is stored in the optimizer state.\n", + " optimizer_step = optimizer.iterations\n", + "\n", + " for step, (data, labels) in enumerate(train_data):\n", + " p.step(reset=(step==0))\n", + " with tf.GradientTape() as tape:\n", + " #loss = reloaded_model.loss(data, labels, True)\n", + " loss = reloaded_model.train_loss(data, labels)\n", + " grads = tape.gradient(loss, reloaded_model.vars)\n", + " optimizer.apply_gradients(zip(grads, reloaded_model.vars))\n", + " losses.append(loss)\n", + " avg_loss = np.mean(losses[-step:])\n", + " avg_losses.append(avg_loss)\n", + "\n", + " eval_loss = reloaded_model.eval_loss(all_test_data.numpy(), all_test_labels.numpy()).numpy()\n", + " eval_losses.append(eval_loss)\n", + " eval_accuracy = reloaded_model.accuracy(all_test_data.numpy(), all_test_labels.numpy()).numpy()\n", + " eval_accuracies.append(eval_accuracy)\n", + "\n", + " print(\"\\nEpoch\", epoch, \"train loss:\", avg_loss, \"eval loss:\", eval_loss, \"eval accuracy\", eval_accuracy, \"lr:\", tflr_decay(optimizer.iterations).numpy())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "50V1FSmI6UTk" + }, + "outputs": [], + "source": [ + "display_train_curves(losses, avg_losses, eval_losses, eval_accuracies, len(eval_losses), STEPS_PER_EPOCH, ignore_first_n=2*STEPS_PER_EPOCH)\n", + "\n", + "# The loss takes a hit when the training restarts, but does not go back to random levels.\n", + "# This is likely caused by the optimizer momentum being reinitialized." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L7lSziW0K0ny" + }, + "source": [ + "## Next steps\n", + "You can learn more about [JAX](https://jax.readthedocs.io/en/latest/index.html) and [Flax](https://flax.readthedocs.io/en/latest) on their documentation websites which contain detailed guides and examples. If you're new to JAX, be sure to explore the [JAX 101 tutorials](https://jax.readthedocs.io/en/latest/jax-101/index.html), and check out the [Flax quickstart](https://flax.readthedocs.io/en/latest/getting_started.html). To learn more about converting JAX models to TensorFlow format, check out the [jax2tf](https://github.com/google/jax/tree/main/jax/experimental/jax2tf) utility on GitHub. If you're interested in converting JAX models to run in the browser with TensorFlow.js, visit [JAX on the Web with TensorFlow.js](https://blog.tensorflow.org/2022/08/jax-on-web-with-tensorflowjs.html). If you'd like to prepare JAX models to run in TensorFLow Lite, visit the [JAX Model Conversion For TFLite](https://www.tensorflow.org/lite/examples/jax_conversion/overview) guide." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "jax2tf.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/keras.md b/site/en/guide/keras.md new file mode 100644 index 00000000000..3dee7be3aa1 --- /dev/null +++ b/site/en/guide/keras.md @@ -0,0 +1,135 @@ +# Keras: The high-level API for TensorFlow + +Keras is the high-level API of the TensorFlow platform. It provides an +approachable, highly-productive interface for solving machine learning (ML) +problems, with a focus on modern deep learning. Keras covers every step of the +machine learning workflow, from data processing to hyperparameter tuning to +deployment. It was developed with a focus on enabling fast experimentation. + +With Keras, you have full access to the scalability and cross-platform +capabilities of TensorFlow. You can run Keras on a TPU Pod or large clusters of +GPUs, and you can export Keras models to run in the browser or on mobile +devices. You can also serve Keras models via a web API. + +Keras is designed to reduce cognitive load by achieving the following goals: + +* Offer simple, consistent interfaces. +* Minimize the number of actions required for common use cases. +* Provide clear, actionable error messages. +* Follow the principle of progressive disclosure of complexity: It's easy to get + started, and you can complete advanced workflows by learning as you go. +* Help you write concise, readable code. + +## Who should use Keras + +The short answer is that every TensorFlow user should use the Keras APIs by +default. Whether you're an engineer, a researcher, or an ML practitioner, you +should start with Keras. + +There are a few use cases (for example, building tools on top of TensorFlow or +developing your own high-performance platform) that require the low-level +[TensorFlow Core APIs](https://www.tensorflow.org/guide/core). But if your use +case doesn't fall into one +of the +[Core API applications](https://www.tensorflow.org/guide/core#core_api_applications), +you should prefer Keras. + +## Keras API components + +The core data structures of Keras are [layers](https://keras.io/api/layers/) and +[models](https://keras.io/api/models/). A layer is a simple input/output +transformation, and a model is a directed acyclic graph (DAG) of layers. + +### Layers + +The `tf.keras.layers.Layer` class is the fundamental abstraction in Keras. A +`Layer` encapsulates a state (weights) and some computation (defined in the +`tf.keras.layers.Layer.call` method). + +Weights created by layers can be trainable or non-trainable. Layers are +recursively composable: If you assign a layer instance as an attribute of +another layer, the outer layer will start tracking the weights created by the +inner layer. + +You can also use layers to handle data preprocessing tasks like normalization +and text vectorization. Preprocessing layers can be included directly into a +model, either during or after training, which makes the model portable. + +### Models + +A model is an object that groups layers together and that can be trained on +data. + +The simplest type of model is the +[`Sequential` model](https://www.tensorflow.org/guide/keras/sequential_model), +which is a linear stack of layers. For more complex architectures, you can +either use the +[Keras functional API](https://www.tensorflow.org/guide/keras/functional_api), +which lets you build arbitrary graphs of layers, or +[use subclassing to write models from scratch](https://www.tensorflow.org/guide/keras/making_new_layers_and_models_via_subclassing). + +The `tf.keras.Model` class features built-in training and evaluation methods: + +* `tf.keras.Model.fit`: Trains the model for a fixed number of epochs. +* `tf.keras.Model.predict`: Generates output predictions for the input samples. +* `tf.keras.Model.evaluate`: Returns the loss and metrics values for the model; + configured via the `tf.keras.Model.compile` method. + +These methods give you access to the following built-in training features: + +* [Callbacks](https://www.tensorflow.org/api_docs/python/tf/keras/callbacks). + You can leverage built-in callbacks for early stopping, model checkpointing, + and [TensorBoard](https://www.tensorflow.org/tensorboard) monitoring. You can + also + [implement custom callbacks](https://www.tensorflow.org/guide/keras/writing_your_own_callbacks). +* [Distributed training](https://www.tensorflow.org/guide/keras/distributed_training). + You can easily scale up your training to multiple GPUs, TPUs, or devices. +* Step fusing. With the `steps_per_execution` argument in + `tf.keras.Model.compile`, you can process multiple batches in a single + `tf.function` call, which greatly improves device utilization on TPUs. + +For a detailed overview of how to use `fit`, see the +[training and evaluation guide](https://www.tensorflow.org/guide/keras/training_with_built_in_methods). +To learn how to customize the built-in training and evaluation loops, see +[Customizing what happens in `fit()`](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit). + +### Other APIs and tools + +Keras provides many other APIs and tools for deep learning, including: + +* [Optimizers](https://keras.io/api/optimizers/) +* [Metrics](https://keras.io/api/metrics/) +* [Losses](https://keras.io/api/losses/) +* [Data loading utilities](https://keras.io/api/data_loading/) + +For a full list of available APIs, see the +[Keras API reference](https://keras.io/api/). To learn more about other Keras +projects and initiatives, see +[The Keras ecosystem](https://keras.io/getting_started/ecosystem/). + +## Next steps + +To get started using Keras with TensorFlow, check out the following topics: + +* [The Sequential model](https://www.tensorflow.org/guide/keras/sequential_model) +* [The Functional API](https://www.tensorflow.org/guide/keras/functional) +* [Training & evaluation with the built-in methods](https://www.tensorflow.org/guide/keras/training_with_built_in_methods) +* [Making new layers and models via subclassing](https://www.tensorflow.org/guide/keras/custom_layers_and_models) +* [Serialization and saving](https://www.tensorflow.org/guide/keras/save_and_serialize) +* [Working with preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) +* [Customizing what happens in fit()](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit) +* [Writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) +* [Working with RNNs](https://www.tensorflow.org/guide/keras/rnn) +* [Understanding masking & padding](https://www.tensorflow.org/guide/keras/masking_and_padding) +* [Writing your own callbacks](https://www.tensorflow.org/guide/keras/custom_callback) +* [Transfer learning & fine-tuning](https://www.tensorflow.org/guide/keras/transfer_learning) +* [Multi-GPU and distributed training](https://www.tensorflow.org/guide/keras/distributed_training) + +To learn more about Keras, see the following topics at +[keras.io](http://keras.io): + +* [About Keras](https://keras.io/about/) +* [Introduction to Keras for Engineers](https://keras.io/getting_started/intro_to_keras_for_engineers/) +* [Introduction to Keras for Researchers](https://keras.io/getting_started/intro_to_keras_for_researchers/) +* [Keras API reference](https://keras.io/api/) +* [The Keras ecosystem](https://keras.io/getting_started/ecosystem/) \ No newline at end of file diff --git a/site/en/guide/migrate/_toc.yaml b/site/en/guide/migrate/_toc.yaml index efad54a108f..ceb7e5f57ae 100644 --- a/site/en/guide/migrate/_toc.yaml +++ b/site/en/guide/migrate/_toc.yaml @@ -65,7 +65,7 @@ toc: - heading: Validate model quality and performance - title: Validate correctness and numerical equivalence path: /guide/migrate/validate_correctness -# - title: Quality testing and debugging -# path: /guide/migrate/testing_debugging +- title: Debug TF2 Migrated Training Pipeline + path: /guide/migrate/migration_debugging # - title: Testing speed and throughput # path: /guide/migrate/throughput_testing diff --git a/site/en/guide/migrate/canned_estimators.ipynb b/site/en/guide/migrate/canned_estimators.ipynb index d19b2a6cae0..68859511a84 100644 --- a/site/en/guide/migrate/canned_estimators.ipynb +++ b/site/en/guide/migrate/canned_estimators.ipynb @@ -37,7 +37,7 @@ "id": "77z2OchJTk0l" }, "source": [ - "# Migration Examples: Canned Estimators\n", + "# Migration examples: Canned Estimators\n", "\n", "\n", " - @@ -43,12 +44,12 @@ unzip: cannot find zipfile directory in one of ./bazel-bin/tensorflow/tools/pip_ No such file or directory - + + +CodedInputStream::SetTotalBytesLimit() in google/protobuf/io/coded_stream.h. @@ -60,7 +61,7 @@ unzip: cannot find zipfile directory in one of ./bazel-bin/tensorflow/tools/pip_ @@ -111,7 +112,7 @@ ImportError: cannot import name 'descriptor' @@ -226,7 +227,7 @@ ImportError: cannot import name 'descriptor' diff --git a/site/en/install/gpu.md b/site/en/install/gpu.md deleted file mode 100644 index 2879873189d..00000000000 --- a/site/en/install/gpu.md +++ /dev/null @@ -1,188 +0,0 @@ -# GPU support - -Note: GPU support is available for Ubuntu and Windows with CUDA®-enabled cards. - -TensorFlow GPU support requires an assortment of drivers and libraries. To -simplify installation and avoid library conflicts, we recommend using a -[TensorFlow Docker image with GPU support](./docker.md) (Linux only). This setup -only requires the [NVIDIA® GPU drivers](https://www.nvidia.com/drivers){:.external}. - -These install instructions are for the latest release of TensorFlow. See the -[tested build configurations](./source.md#gpu) for CUDA® and cuDNN versions to -use with older TensorFlow releases. - -## Pip package - -See the [pip install guide](./pip) for available packages, systems requirements, -and instructions. The TensorFlow `pip` package includes GPU support for -CUDA®-enabled cards: - -
-pip install tensorflow
-
- -This guide covers GPU support and installation steps for the latest *stable* -TensorFlow release. - -### Older versions of TensorFlow - -For releases 1.15 and older, CPU and GPU packages are separate: - -
-pip install tensorflow==1.15      # CPU
-pip install tensorflow-gpu==1.15  # GPU
-
- -## Hardware requirements - -The following GPU-enabled devices are supported: - -* NVIDIA® GPU card with CUDA® architectures 3.5, 5.0, 6.0, 7.0, 7.5, 8.0 and - higher than 8.0. See the list of - CUDA®-enabled - GPU cards. -* For GPUs with unsupported CUDA® architectures, or to avoid JIT compilation - from PTX, or to use different versions of the NVIDIA® libraries, see the - [Linux build from source](./source.md) guide. -* Packages do not contain PTX code except for the latest supported CUDA® - architecture; therefore, TensorFlow fails to load on older GPUs when - `CUDA_FORCE_PTX_JIT=1` is set. (See - Application - Compatibility for details.) - -Note: The error message "Status: device kernel image is invalid" indicates that -the TensorFlow package does not contain PTX for your architecture. You can -enable compute capabilities by [building TensorFlow from source](./source.md). - -## Software requirements - -The following NVIDIA® software must be installed on your system: - -* [NVIDIA® GPU drivers](https://www.nvidia.com/drivers){:.external} —CUDA® - 11.2 requires 450.80.02 or higher. -* [CUDA® Toolkit](https://developer.nvidia.com/cuda-toolkit-archive){:.external} - —TensorFlow supports CUDA® 11.2 (TensorFlow >= 2.5.0) -* [CUPTI](http://docs.nvidia.com/cuda/cupti/){:.external} ships with the CUDA® - Toolkit. -* [cuDNN SDK 8.1.0](https://developer.nvidia.com/cudnn){:.external} - [cuDNN versions](https://developer.nvidia.com/rdp/cudnn-archive){:.external}). -* *(Optional)* - [TensorRT 6.0](https://docs.nvidia.com/deeplearning/tensorrt/archives/index.html#trt_6){:.external} - to improve latency and throughput for inference on some models. - -## Linux setup - -The `apt` instructions below are the easiest way to install the required NVIDIA -software on Ubuntu. However, if [building TensorFlow from source](./source.md), -manually install the software requirements listed above, and consider using a -`-devel` [TensorFlow Docker image](./docker.md) as a base. - -Install [CUPTI](http://docs.nvidia.com/cuda/cupti/){:.external} which ships with -the CUDA® Toolkit. Append its installation directory to the `$LD_LIBRARY_PATH` -environmental variable: - -
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64
-
- -### Install CUDA with apt - -This section shows how to install CUDA® 11 (TensorFlow >= 2.4.0) on Ubuntu -16.04 and 18.04. These instructions may work for other Debian-based distros. - -Caution: [Secure Boot](https://wiki.ubuntu.com/UEFI/SecureBoot){:.external} -complicates installation of the NVIDIA driver and is beyond the scope of these instructions. - - -#### Ubuntu 18.04 (CUDA 11.0) - -
-# Add NVIDIA package repositories
-wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
-sudo mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
-sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
-sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"
-sudo apt-get update
-
-wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb
-
-sudo apt install ./nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb
-sudo apt-get update
-
-wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libnvinfer7_7.1.3-1+cuda11.0_amd64.deb
-sudo apt install ./libnvinfer7_7.1.3-1+cuda11.0_amd64.deb
-sudo apt-get update
-
-# Install development and runtime libraries (~4GB)
-sudo apt-get install --no-install-recommends \
-    cuda-11-0 \
-    libcudnn8=8.0.4.30-1+cuda11.0  \
-    libcudnn8-dev=8.0.4.30-1+cuda11.0
-
-# Reboot. Check that GPUs are visible using the command: nvidia-smi
-
-# Install TensorRT. Requires that libcudnn8 is installed above.
-sudo apt-get install -y --no-install-recommends libnvinfer7=7.1.3-1+cuda11.0 \
-    libnvinfer-dev=7.1.3-1+cuda11.0 \
-    libnvinfer-plugin7=7.1.3-1+cuda11.0
-
-
- -#### Ubuntu 16.04 (CUDA 11.0) - -
-# Add NVIDIA package repositories
-# Add HTTPS support for apt-key
-sudo apt-get install gnupg-curl
-wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-ubuntu1604.pin
-sudo mv cuda-ubuntu1604.pin /etc/apt/preferences.d/cuda-repository-pin-600
-sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub
-sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/ /"
-sudo apt-get update
-wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/nvidia-machine-learning-repo-ubuntu1604_1.0.0-1_amd64.deb
-sudo apt install ./nvidia-machine-learning-repo-ubuntu1604_1.0.0-1_amd64.deb
-sudo apt-get update
-wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libnvinfer7_7.1.3-1+cuda11.0_amd64.deb
-sudo apt install ./libnvinfer7_7.1.3-1+cuda11.0_amd64.deb
-sudo apt-get update
-
-# Install development and runtime libraries (~4GB)
-sudo apt-get install --no-install-recommends \
-    cuda-11-0 \
-    libcudnn8=8.0.4.30-1+cuda11.0  \
-    libcudnn8-dev=8.0.4.30-1+cuda11.0
-
-
-# Reboot. Check that GPUs are visible using the command: nvidia-smi
-
-# Install TensorRT. Requires that libcudnn7 is installed above.
-sudo apt-get install -y --no-install-recommends \
-    libnvinfer7=7.1.3-1+cuda11.0 \
-    libnvinfer-dev=7.1.3-1+cuda11.0 \
-    libnvinfer-plugin7=7.1.3-1+cuda11.0 \
-    libnvinfer-plugin-dev=7.1.3-1+cuda11.0
-
-
- - -## Windows setup - -See the [hardware requirements](#hardware_requirements) and -[software requirements](#software_requirements) listed above. Read the -[CUDA® install guide for Windows](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/){:.external}. - -Make sure the installed NVIDIA software packages match the versions listed above. In -particular, TensorFlow will not load without the `cuDNN64_8.dll` file. To use a -different version, see the [Windows build from source](./source_windows.md) guide. - -Add the CUDA®, CUPTI, and cuDNN installation directories to the `%PATH%` -environmental variable. For example, if the CUDA® Toolkit is installed to -`C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0` and cuDNN to -`C:\tools\cuda`, update your `%PATH%` to match: - -
-SET PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0\bin;%PATH%
-SET PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0\extras\CUPTI\lib64;%PATH%
-SET PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0\include;%PATH%
-SET PATH=C:\tools\cuda\bin;%PATH%
-
diff --git a/site/en/install/gpu_plugins.md b/site/en/install/gpu_plugins.md index 358db01b312..39e3cf09b29 100644 --- a/site/en/install/gpu_plugins.md +++ b/site/en/install/gpu_plugins.md @@ -1,12 +1,12 @@ # GPU device plugins -Note: This page is for non-NVIDIA® GPU devices. For NVIDIA® GPU support, click -[here](./gpu.md). +Note: This page is for non-NVIDIA® GPU devices. For NVIDIA® GPU support, go to +the [Install TensorFlow with pip](./pip.md) guide. TensorFlow's -pluggable -device architecture adds new device support as separate plug-in packages -that are installed alongside the official TensorFlow package. +[pluggable device](https://github.com/tensorflow/community/blob/master/rfcs/20200624-pluggable-device-for-tensorflow.md) +architecture adds new device support as separate plug-in packages that are +installed alongside the official TensorFlow package. The mechanism requires no device-specific changes in the TensorFlow code. It relies on C APIs to communicate with the TensorFlow binary in a stable manner. @@ -57,6 +57,24 @@ run() # PluggableDevices also work with tf.function and graph mode. Metal `PluggableDevice` for macOS GPUs: -* [Getting started guide](https://developer.apple.com/metal/tensorflow-plugin/){:.external}. +* Works with TF 2.5 or later. +* [Getting started guide](https://developer.apple.com/metal/tensorflow-plugin/). * For questions and feedback, please visit the - [Apple Developer Forum](https://developer.apple.com/forums/tags/tensorflow-metal){:.external}. + [Apple Developer Forum](https://developer.apple.com/forums/tags/tensorflow-metal). + +DirectML `PluggableDevice` for Windows and WSL (preview): + +* Works with `tensorflow-cpu` package, version 2.10 or later. +* [PyPI wheel](https://pypi.org/project/tensorflow-directml-plugin/). +* [GitHub repo](https://github.com/microsoft/tensorflow-directml-plugin). +* For questions, feedback or to raise issues, please visit the + [Issues page of `tensorflow-directml-plugin` on GitHub](https://github.com/microsoft/tensorflow-directml-plugin/issues). + +Intel® Extension for TensorFlow `PluggableDevice` for Linux and WSL: + +* Works with TF 2.10 or later. +* [Getting started guide](https://intel.github.io/intel-extension-for-tensorflow/latest/get_started.html) +* [PyPI wheel](https://pypi.org/project/intel-extension-for-tensorflow/). +* [GitHub repo](https://github.com/intel/intel-extension-for-tensorflow). +* For questions, feedback, or to raise issues, please visit the + [Issues page of `intel-extension-for-tensorflow` on GitHub](https://github.com/intel/intel-extension-for-tensorflow/issues). diff --git a/site/en/install/lang_c.ipynb b/site/en/install/lang_c.ipynb new file mode 100644 index 00000000000..788a5e6c891 --- /dev/null +++ b/site/en/install/lang_c.ipynb @@ -0,0 +1,383 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Tce3stUlHN0L" + }, + "source": [ + "##### Copyright 2018 The TensorFlow Authors.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "tuOe1ymfHZPu" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "s7Bo2MipUnXX" + }, + "source": [ + "# Install TensorFlow for C" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Birwb-khUOIq" + }, + "source": [ + "
\n", @@ -67,15 +67,15 @@ "source": [ "Canned (or Premade) Estimators have traditionally been used in TensorFlow 1 as quick and easy ways to train models for a variety of typical use cases. TensorFlow 2 provides straightforward approximate substitutes for a number of them by way of Keras models. For those canned estimators that do not have built-in TensorFlow 2 substitutes, you can still build your own replacement fairly easily.\n", "\n", - "This guide walks through a few examples of direct equivalents and custom substitutions to demonstrate how TensorFlow 1's `tf.estimator`-derived models can be migrated to TF2 with Keras.\n", + "This guide will walk you through a few examples of direct equivalents and custom substitutions to demonstrate how TensorFlow 1's `tf.estimator`-derived models can be migrated to TensorFlow 2 with Keras.\n", "\n", "Namely, this guide includes examples for migrating:\n", "* From `tf.estimator`'s `LinearEstimator`, `Classifier` or `Regressor` in TensorFlow 1 to Keras `tf.compat.v1.keras.models.LinearModel` in TensorFlow 2\n", "* From `tf.estimator`'s `DNNEstimator`, `Classifier` or `Regressor` in TensorFlow 1 to a custom Keras DNN ModelKeras in TensorFlow 2\n", "* From `tf.estimator`'s `DNNLinearCombinedEstimator`, `Classifier` or `Regressor` in TensorFlow 1 to `tf.compat.v1.keras.models.WideDeepModel` in TensorFlow 2\n", - "* From `tf.estimator`'s `BoostedTreesEstimator`, `Classifier` or `Regressor` in TensorFlow 1 to `tf.compat.v1.keras.models.WideDeepModel` in TensorFlow 2\n", + "* From `tf.estimator`'s `BoostedTreesEstimator`, `Classifier` or `Regressor` in TensorFlow 1 to `tfdf.keras.GradientBoostedTreesModel` in TensorFlow 2\n", "\n", - "A common precursor to the training of a model is feature preprocessing, which is done for TensorFlow 1 Estimator models with `tf.feature_column`. For more information on feature preprocessing in TensorFlow 2, see [this guide on migrating feature columns](migrating_feature_columns.ipynb)." + "A common precursor to the training of a model is feature preprocessing, which is done for TensorFlow 1 Estimator models with `tf.feature_column`. For more information on feature preprocessing in TensorFlow 2, see [this guide on migrating from feature columns to the Keras preprocessing layers API](migrating_feature_columns.ipynb)." ] }, { @@ -108,11 +108,11 @@ }, "outputs": [], "source": [ - "import keras\n", "import pandas as pd\n", "import tensorflow as tf\n", "import tensorflow.compat.v1 as tf1\n", - "import tensorflow_decision_forests as tfdf\n" + "import tensorflow_decision_forests as tfdf\n", + "from tensorflow import keras\n" ] }, { @@ -183,7 +183,7 @@ "id": "bYSgoezeMrpI" }, "source": [ - "and create a method to instantiate a simplistic sample optimizer to use with our various TensorFlow 1 Estimator and TensorFlow 2 Keras models." + "and create a method to instantiate a simplistic sample optimizer to use with various TensorFlow 1 Estimator and TensorFlow 2 Keras models." ] }, { @@ -196,7 +196,7 @@ "source": [ "def create_sample_optimizer(tf_version):\n", " if tf_version == 'tf1':\n", - " optimizer = lambda: tf.keras.optimizers.Ftrl(\n", + " optimizer = lambda: tf.keras.optimizers.legacy.Ftrl(\n", " l1_regularization_strength=0.001,\n", " learning_rate=tf1.train.exponential_decay(\n", " learning_rate=0.1,\n", @@ -204,7 +204,7 @@ " decay_steps=10000,\n", " decay_rate=0.9))\n", " elif tf_version == 'tf2':\n", - " optimizer = tf.keras.optimizers.Ftrl(\n", + " optimizer = tf.keras.optimizers.legacy.Ftrl(\n", " l1_regularization_strength=0.001,\n", " learning_rate=tf.keras.optimizers.schedules.ExponentialDecay(\n", " initial_learning_rate=0.1, decay_steps=10000, decay_rate=0.9))\n", @@ -226,7 +226,7 @@ "id": "_O7fyhCnpvED" }, "source": [ - "### TF1: Using LinearEstimator" + "### TensorFlow 1: Using LinearEstimator" ] }, { @@ -270,7 +270,7 @@ "id": "KEmzBjfnsxwT" }, "source": [ - "### TF2: Using Keras LinearModel" + "### TensorFlow 2: Using Keras LinearModel" ] }, { @@ -311,7 +311,7 @@ "id": "YKl6XZ7Bp1t5" }, "source": [ - "### TF1: Using DNNEstimator" + "### TensorFlow 1: Using DNNEstimator" ] }, { @@ -320,7 +320,7 @@ "id": "J7wJUmgypln8" }, "source": [ - "In TensorFlow 1, you can use `tf.estimator.DNNEstimator` to create a baseline DNN model for regression and classification problems." + "In TensorFlow 1, you can use `tf.estimator.DNNEstimator` to create a baseline deep neural network (DNN) model for regression and classification problems." ] }, { @@ -357,7 +357,7 @@ "id": "6xJz6px6pln-" }, "source": [ - "### TF2: Using Keras to Create a Custom DNN Model" + "### TensorFlow 2: Using Keras to create a custom DNN model" ] }, { @@ -368,7 +368,7 @@ "source": [ "In TensorFlow 2, you can create a custom DNN model to substitute for one generated by `tf.estimator.DNNEstimator`, with similar levels of user-specified customization (for instance, as in the previous example, the ability to customize a chosen model optimizer).\n", "\n", - "A similar workflow can be used to replace `tf.estimator.experimental.RNNEstimator` with a Keras RNN Model. Keras provides a number of built-in, customizable choices by way of `tf.keras.layers.RNN`, `tf.keras.layers.LSTM`, and `tf.keras.layers.GRU` - see [here](https://www.tensorflow.org/guide/keras/rnn#built-in_rnn_layers_a_simple_example) for more details." + "A similar workflow can be used to replace `tf.estimator.experimental.RNNEstimator` with a Keras recurrent neural network (RNN) model. Keras provides a number of built-in, customizable choices by way of `tf.keras.layers.RNN`, `tf.keras.layers.LSTM`, and `tf.keras.layers.GRU`. To learn more, check out the _Built-in RNN layers: a simple example_ section of [RNN with Keras guide](https://www.tensorflow.org/guide/keras/rnn)." ] }, { @@ -413,7 +413,7 @@ "id": "GfRaObf5g4TU" }, "source": [ - "### TF1: Using DNNLinearCombinedEstimator" + "### TensorFlow 1: Using DNNLinearCombinedEstimator" ] }, { @@ -464,7 +464,7 @@ "id": "BeMikL5ug4TX" }, "source": [ - "### TF2: Using Keras WideDeepModel" + "### TensorFlow 2: Using Keras WideDeepModel" ] }, { @@ -477,7 +477,7 @@ "\n", "This `WideDeepModel` is constructed on the basis of a constituent `LinearModel` and a custom DNN Model, both of which are discussed in the preceding two examples. A custom linear model can also be used in place of the built-in Keras `LinearModel` if desired.\n", "\n", - "If you would like to build your own model instead of a canned estimator, check out [how to build a `keras.Sequential` model](https://www.tensorflow.org/guide/keras/sequential_model). For more information on custom training and optimizers you can also checkout [this guide](https://www.tensorflow.org/tutorials/customization/custom_training_walkthrough)." + "If you would like to build your own model instead of using a canned estimator, check out the [Keras Sequential model](https://www.tensorflow.org/guide/keras/sequential_model) guide. For more information on custom training and optimizers, check out the [Custom training: walkthrough](https://www.tensorflow.org/tutorials/customization/custom_training_walkthrough) guide." ] }, { @@ -532,7 +532,7 @@ "id": "_3mCQVDSeOKD" }, "source": [ - "### TF1: Using BoostedTreesEstimator" + "### TensorFlow 1: Using BoostedTreesEstimator" ] }, { @@ -541,35 +541,35 @@ "id": "oEWYHNt4eOKD" }, "source": [ - "In TensorFlow 1, you can use `tf.estimator.BoostedTreesEstimator` to create a baseline to create a baseline Gradient Boosting model using an ensemble of decision trees for regression and classification problems." + "In TensorFlow 1, you could use `tf.estimator.BoostedTreesEstimator` to create a baseline to create a baseline Gradient Boosting model using an ensemble of decision trees for regression and classification problems. This functionality is no longer included in TensorFlow 2." ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": { - "id": "uBIURLLMeOKE" + "id": "wliVIER1jLnA" }, - "outputs": [], "source": [ + "```\n", "bt_estimator = tf1.estimator.BoostedTreesEstimator(\n", " head=tf.estimator.BinaryClassHead(),\n", " n_batches_per_layer=1,\n", " max_depth=10,\n", " n_trees=1000,\n", - " feature_columns=feature_columns)" + " feature_columns=feature_columns)\n", + "```" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": { - "id": "MUo5omt3eOKE" + "id": "-K87uBrZjR0u" }, - "outputs": [], "source": [ + "```\n", "bt_estimator.train(input_fn=_input_fn, steps=1000)\n", - "bt_estimator.evaluate(input_fn=_eval_input_fn, steps=100)" + "bt_estimator.evaluate(input_fn=_eval_input_fn, steps=100)\n", + "```" ] }, { @@ -578,7 +578,7 @@ "id": "eNuLP6BeeOKF" }, "source": [ - "### TF2: Using TensorFlow Decision Forests" + "### TensorFlow 2: Using TensorFlow Decision Forests" ] }, { @@ -587,40 +587,100 @@ "id": "m3EVq388eOKF" }, "source": [ - "In TensorFlow 2, the closest pre-packaged substitute for a model generated by `tf.estimator.BoostedTreesEstimator` is one created using `tfdf.keras.GradientBoostedTreesModel`, which creates a sequentially-trained sequence of shallow decision trees, each designed to \"learn\" from errors made by its predecessors in the sequence.\n", + "In TensorFlow 2, `tf.estimator.BoostedTreesEstimator` is replaced by [tfdf.keras.GradientBoostedTreesModel](https://www.tensorflow.org/decision_forests/api_docs/python/tfdf/keras/GradientBoostedTreesModel#attributes) from the [TensorFlow Decision Forests](https://www.tensorflow.org/decision_forests) package.\n", "\n", - "`GradientBoostedTreesModel` provides more options for customization, allowing for the specification of everything from basic depth constraints to early stopping conditions. See [here](https://www.tensorflow.org/decision_forests/api_docs/python/tfdf/keras/GradientBoostedTreesModel#attributes) for more `GradientBoostedTreesModel` attribute details." + "TensorFlow Decision Forests provides various advantages over the `tf.estimator.BoostedTreesEstimator`, notably regarding quality, speed, ease of use and flexibility. To learn about TensorFlow Decision Forests, start with the [beginner colab](https://www.tensorflow.org/decision_forests/tutorials/beginner_colab).\n", + "\n", + "The following example shows how to train a Gradient Boosted Trees model using TensorFlow 2:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UB90fXJdVWC5" + }, + "source": [ + "Install TensorFlow Decision Forests." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "JLS_2vKKeOKF" + "id": "9097mTCIVVE9" }, "outputs": [], "source": [ - "gbt_model = tfdf.keras.GradientBoostedTreesModel(\n", - " task=tfdf.keras.Task.CLASSIFICATION)\n", - "gbt_model.compile(metrics=['mse', 'accuracy'])" + "!pip install tensorflow_decision_forests" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "B1qTdAS-VpXk" + }, + "source": [ + "Create a TensorFlow dataset. Note that Decision Forests natively support many types of features and do not need pre-processing." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "sZZSM7_VeOKF" + "id": "jkjFHmDTVswY" }, "outputs": [], "source": [ - "train_df, eval_df = x_train.copy(), x_eval.copy()\n", - "train_df['survived'], eval_df['survived'] = y_train, y_eval\n", - "\n", - "train_dataset = tfdf.keras.pd_dataframe_to_tf_dataset(train_df, label='survived')\n", - "eval_dataset = tfdf.keras.pd_dataframe_to_tf_dataset(eval_df, label='survived')\n", + "train_dataframe = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')\n", + "eval_dataframe = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')\n", "\n", - "gbt_model.fit(train_dataset)\n", - "gbt_model.evaluate(eval_dataset, return_dict=True)" + "# Convert the Pandas Dataframes into TensorFlow datasets.\n", + "train_dataset = tfdf.keras.pd_dataframe_to_tf_dataset(train_dataframe, label=\"survived\")\n", + "eval_dataset = tfdf.keras.pd_dataframe_to_tf_dataset(eval_dataframe, label=\"survived\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7fPa-LfDWDzB" + }, + "source": [ + "Train the model on the `train_dataset` dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JO0yCH9hWPvJ" + }, + "outputs": [], + "source": [ + "# Use the default hyper-parameters of the model.\n", + "gbt_model = tfdf.keras.GradientBoostedTreesModel()\n", + "gbt_model.fit(train_dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2Y5xm29AWGxt" + }, + "source": [ + "Evaluate the quality of the model on the `eval_dataset` dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JLS_2vKKeOKF" + }, + "outputs": [], + "source": [ + "gbt_model.compile(metrics=['accuracy'])\n", + "gbt_evaluation = gbt_model.evaluate(eval_dataset, return_dict=True)\n", + "print(gbt_evaluation)" ] }, { @@ -629,36 +689,52 @@ "id": "Z22UJ5SUqToQ" }, "source": [ - "In TensorFlow 2, there is also another available TFDF substitute for a model generated by `tf.estimator.BoostedTreesEstimator` - `tfdf.keras.RandomForestModel`. `RandomForestModel` creates a robust, overfitting-resistant learner consisting of a voting population of deep decision trees, each trained on random subsets of the input training dataset.\n", - "\n", - "`RandomForestModel` and `GradientBoostedTreesModel` provide similarly extensive levels of customization. Choosing between them is problem-specific and dependent on your task or application.\n", + "Gradient Boosted Trees is just one of the many decision forest algorithms available in TensorFlow Decision Forests. For example, Random Forests (available as [tfdf.keras.GradientBoostedTreesModel](https://www.tensorflow.org/decision_forests/api_docs/python/tfdf/keras/RandomForestModel) is very resistant to overfitting) while CART (available as [tfdf.keras.CartModel](https://www.tensorflow.org/decision_forests/api_docs/python/tfdf/keras/CartModel)) is great for model interpretation.\n", "\n", - "Check the API docs for more information on the [`RandomForestModel`](https://https://www.tensorflow.org/decision_forests/api_docs/python/tfdf/keras/RandomForestModel#attributes) and [`GradientBoostedTreesModel`](https://www.tensorflow.org/decision_forests/api_docs/python/tfdf/keras/GradientBoostedTreesModel#attributes) attribute." + "In the next example, train and plot a Random Forest model." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "027bGnCork_W" + "id": "W3slOhn4Zi9X" }, "outputs": [], "source": [ - "rf_model = tfdf.keras.RandomForestModel(\n", - " task=tfdf.keras.Task.CLASSIFICATION)\n", - "rf_model.compile(metrics=['mse', 'accuracy'])" + "# Train a Random Forest model\n", + "rf_model = tfdf.keras.RandomForestModel()\n", + "rf_model.fit(train_dataset)\n", + "\n", + "# Evaluate the Random Forest model\n", + "rf_model.compile(metrics=['accuracy'])\n", + "rf_evaluation = rf_model.evaluate(eval_dataset, return_dict=True)\n", + "print(rf_evaluation)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Z0QYolhoZb_k" + }, + "source": [ + "In the final example, train and evaluate a CART model." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "Tc7KtjMlryn_" + "id": "027bGnCork_W" }, "outputs": [], "source": [ - "rf_model.fit(train_dataset)\n", - "rf_model.evaluate(eval_dataset, return_dict=True)" + "# Train a CART model\n", + "cart_model = tfdf.keras.CartModel()\n", + "cart_model.fit(train_dataset)\n", + "\n", + "# Plot the CART model\n", + "tfdf.model_plotter.plot_model_in_colab(cart_model, max_depth=2)" ] } ], diff --git a/site/en/guide/migrate/checkpoint_saver.ipynb b/site/en/guide/migrate/checkpoint_saver.ipynb index fca239a8497..7e7f35f4c4e 100644 --- a/site/en/guide/migrate/checkpoint_saver.ipynb +++ b/site/en/guide/migrate/checkpoint_saver.ipynb @@ -77,9 +77,9 @@ "- Save continually at a certain frequency (using the `save_freq` argument).\n", "- Save the weights/parameters only instead of the whole model by setting `save_weights_only` to `True`.\n", "\n", - "For more details, refer to the `tf.keras.callbacks.ModelCheckpoint` API docs and the *Save checkpoints during training* section in the [Save and load models](../../tutorials/keras/save_and_load.ipynb) tutorial. Learn more about the Checkpoint format in the *TF Checkpoint format* section in the [Save and load Keras models](../../guide/keras/save_and_serialize.ipynb) guide. In addition, to add fault tolerance, you can use `tf.keras.callbacks.experimental.BackupAndRestore` or `tf.train.Checkpoint` for manual checkpointing. Learn more in the [Fault tolerance migration guide](fault_tolerance.ipynb).\n", + "For more details, refer to the `tf.keras.callbacks.ModelCheckpoint` API docs and the *Save checkpoints during training* section in the [Save and load models](../../tutorials/keras/save_and_load.ipynb) tutorial. Learn more about the Checkpoint format in the *TF Checkpoint format* section in the [Save and load Keras models](https://www.tensorflow.org/guide/keras/save_and_serialize) guide. In addition, to add fault tolerance, you can use `tf.keras.callbacks.BackupAndRestore` or `tf.train.Checkpoint` for manual checkpointing. Learn more in the [Fault tolerance migration guide](fault_tolerance.ipynb).\n", "\n", - "Keras [callbacks](../../guide/keras/custom_callback.ipynb) are objects that are called at different points during training/evaluation/prediction in the built-in Keras `Model.fit`/`Model.evaluate`/`Model.predict` APIs. Learn more in the _Next steps_ section at the end of the guide." + "Keras [callbacks](https://www.tensorflow.org/guide/keras/custom_callback) are objects that are called at different points during training/evaluation/prediction in the built-in Keras `Model.fit`/`Model.evaluate`/`Model.predict` APIs. Learn more in the _Next steps_ section at the end of the guide." ] }, { @@ -201,7 +201,7 @@ "source": [ "## TensorFlow 2: Save checkpoints with a Keras callback for Model.fit\n", "\n", - "In TensorFlow 2, when you use the built-in Keras `Model.fit` (or `Model.evaluate`) for training/evaluation, you can configure `tf.keras.callbacks.ModelCheckpoint` and then pass it to the `callbacks` parameter of `Model.fit` (or `Model.evaluate`). (Learn more in the API docs and the *Using callbacks* section in the [Training and evaluation with the built-in methods](../../guide/keras/train_and_evaluate.ipynb) guide.)\n", + "In TensorFlow 2, when you use the built-in Keras `Model.fit` (or `Model.evaluate`) for training/evaluation, you can configure `tf.keras.callbacks.ModelCheckpoint` and then pass it to the `callbacks` parameter of `Model.fit` (or `Model.evaluate`). (Learn more in the API docs and the *Using callbacks* section in the [Training and evaluation with the built-in methods](https://www.tensorflow.org/guide/keras/train_and_evaluate) guide.)\n", "\n", "In the example below, you will use a `tf.keras.callbacks.ModelCheckpoint` callback to store checkpoints in a temporary directory:" ] @@ -263,17 +263,17 @@ "\n", "- API docs: `tf.keras.callbacks.ModelCheckpoint`\n", "- Tutorial: [Save and load models](../../tutorials/keras/save_and_load.ipynb) (the *Save checkpoints during training* section)\n", - "- Guide: [Save and load Keras models](../../guide/keras/save_and_serialize.ipynb) (the *TF Checkpoint format* section)\n", + "- Guide: [Save and load Keras models](https://www.tensorflow.org/guide/keras/save_and_serialize) (the *TF Checkpoint format* section)\n", "\n", "Learn more about callbacks in:\n", "\n", "- API docs: `tf.keras.callbacks.Callback`\n", - "- Guide: [Writing your own callbacks](../..guide/keras/custom_callback.ipynb/)\n", - "- Guide: [Training and evaluation with the built-in methods](../../guide/keras/train_and_evaluate.ipynb) (the *Using callbacks* section)\n", + "- Guide: [Writing your own callbacks](https://www.tensorflow.org/guide/keras/guide/keras/custom_callback)\n", + "- Guide: [Training and evaluation with the built-in methods](https://www.tensorflow.org/guide/keras/train_and_evaluate) (the *Using callbacks* section)\n", "\n", "You may also find the following migration-related resources useful:\n", "\n", - "- The [Fault tolerance migration guide](fault_tolerance.ipynb): `tf.keras.callbacks.experimental.BackupAndRestore` for `Model.fit`, or `tf.train.Checkpoint` and `tf.train.CheckpointManager` APIs for a custom training loop\n", + "- The [Fault tolerance migration guide](fault_tolerance.ipynb): `tf.keras.callbacks.BackupAndRestore` for `Model.fit`, or `tf.train.Checkpoint` and `tf.train.CheckpointManager` APIs for a custom training loop\n", "- The [Early stopping migration guide](early_stopping.ipynb): `tf.keras.callbacks.EarlyStopping` is a built-in early stopping callback\n", "- The [TensorBoard migration guide](tensorboard.ipynb): TensorBoard enables tracking and displaying metrics\n", "- The [LoggingTensorHook and StopAtStepHook to Keras callbacks migration guide](logging_stop_hook.ipynb)\n", diff --git a/site/en/guide/migrate/early_stopping.ipynb b/site/en/guide/migrate/early_stopping.ipynb index fec8144348a..1c1712e975b 100644 --- a/site/en/guide/migrate/early_stopping.ipynb +++ b/site/en/guide/migrate/early_stopping.ipynb @@ -457,7 +457,7 @@ "epochs = 100\n", "patience = 5\n", "wait = 0\n", - "best = 0\n", + "best = float('inf')\n", "\n", "for epoch in range(epochs):\n", " print(\"\\nStart of epoch %d\" % (epoch,))\n", @@ -486,7 +486,7 @@ " # The early stopping strategy: stop the training if `val_loss` does not\n", " # decrease over a certain number of epochs.\n", " wait += 1\n", - " if val_loss > best:\n", + " if val_loss < best:\n", " best = val_loss\n", " wait = 0\n", " if wait >= patience:\n", diff --git a/site/en/guide/migrate/evaluator.ipynb b/site/en/guide/migrate/evaluator.ipynb index 305fdef8e37..c8f848e4406 100644 --- a/site/en/guide/migrate/evaluator.ipynb +++ b/site/en/guide/migrate/evaluator.ipynb @@ -13,6 +13,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "cellView": "form", "id": "HMUDt0CiUJk9" }, "outputs": [], @@ -67,7 +68,7 @@ "source": [ "Evaluation is a critical part of measuring and benchmarking models.\n", "\n", - "This guide demonstrates how to migrate evaluator tasks from TensorFlow 1 to TensorFlow 2. In Tensorflow 1 this functionality is implemented by `tf.estimator.train_and_evaluate`, when the API is running distributedly. In Tensorflow 2, you can use the built-in `tf.keras.experimental.SidecarEvaluator`, or a custom evaluation loop on the evaluator task.\n", + "This guide demonstrates how to migrate evaluator tasks from TensorFlow 1 to TensorFlow 2. In Tensorflow 1 this functionality is implemented by `tf.estimator.train_and_evaluate`, when the API is running distributedly. In Tensorflow 2, you can use the built-in `tf.keras.utils.SidecarEvaluator`, or a custom evaluation loop on the evaluator task.\n", "\n", "There are simple serial evaluation options in both TensorFlow 1 (`tf.estimator.Estimator.evaluate`) and TensorFlow 2 (`Model.fit(..., validation_data=(...))` or `Model.evaluate`). The evaluator task is preferable when you would like your workers not switching between training and evaluation, and built-in evaluation in `Model.fit` is preferable when you would like your evaluation to be distributed.\n" ] @@ -121,7 +122,7 @@ "\n", "In TensorFlow 1, you can configure a `tf.estimator` to evaluate the estimator using `tf.estimator.train_and_evaluate`.\n", "\n", - "In this example, start by defining the `tf.estimator.Estimator` and speciyfing training and evaluation specifications:" + "In this example, start by defining the `tf.estimator.Estimator` and specifying training and evaluation specifications:" ] }, { @@ -193,7 +194,7 @@ "source": [ "## TensorFlow 2: Evaluating a Keras model\n", "\n", - "In TensorFlow 2, if you use the Keras `Model.fit` API for training, you can evaluate the model with `tf.keras.experimental.SidecarEvaluator`. You can also visualize the evaluation metrics in Tensorboard which is not shown in this guide.\n", + "In TensorFlow 2, if you use the Keras `Model.fit` API for training, you can evaluate the model with `tf.keras.utils.SidecarEvaluator`. You can also visualize the evaluation metrics in TensorBoard which is not shown in this guide.\n", "\n", "To help demonstrate this, let's first start by defining and training the model:\n" ] @@ -240,7 +241,7 @@ "id": "AhU3VTYZoDh-" }, "source": [ - "Then, evaluate the model using `tf.keras.experimental.SidecarEvaluator`. In real training, it's recommended to use a separate job to conduct the evaluation to free up worker resources for training." + "Then, evaluate the model using `tf.keras.utils.SidecarEvaluator`. In real training, it's recommended to use a separate job to conduct the evaluation to free up worker resources for training." ] }, { @@ -254,7 +255,7 @@ "data = tf.data.Dataset.from_tensor_slices((x_test, y_test))\n", "data = data.batch(64)\n", "\n", - "tf.keras.experimental.SidecarEvaluator(\n", + "tf.keras.utils.SidecarEvaluator(\n", " model=model,\n", " data=data,\n", " checkpoint_dir=log_dir,\n", @@ -270,7 +271,7 @@ "source": [ "## Next steps\n", "\n", - "- To learn more about sidecar evaluation consider reading the `tf.keras.experimental.SidecarEvaluator` API docs.\n", + "- To learn more about sidecar evaluation consider reading the `tf.keras.utils.SidecarEvaluator` API docs.\n", "- To consider alternating training and evaluation in Keras consider reading about [other built-in methods](https://www.tensorflow.org/guide/keras/train_and_evaluate)." ] } @@ -279,7 +280,6 @@ "colab": { "collapsed_sections": [], "name": "evaluator.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/migrate/fault_tolerance.ipynb b/site/en/guide/migrate/fault_tolerance.ipynb index b9c2ed52ae2..fdbd0b972c3 100644 --- a/site/en/guide/migrate/fault_tolerance.ipynb +++ b/site/en/guide/migrate/fault_tolerance.ipynb @@ -13,6 +13,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "cellView": "form", "id": "HMUDt0CiUJk9" }, "outputs": [], @@ -69,7 +70,7 @@ "\n", "This guide first demonstrates how to add fault tolerance to training with `tf.estimator.Estimator` in TensorFlow 1 by specifying metric saving with `tf.estimator.RunConfig`. Then, you will learn how to implement fault tolerance for training in Tensorflow 2 in two ways:\n", "\n", - "- If you use the Keras `Model.fit` API, you can pass the `tf.keras.callbacks.experimental.BackupAndRestore` callback to it.\n", + "- If you use the Keras `Model.fit` API, you can pass the `tf.keras.callbacks.BackupAndRestore` callback to it.\n", "- If you use a custom training loop (with `tf.GradientTape`), you can arbitrarily save checkpoints using the `tf.train.Checkpoint` and `tf.train.CheckpointManager` APIs.\n", "\n", "Both of these methods will back up and restore the training states in [checkpoint](../../guide/checkpoint.ipynb) files.\n" @@ -84,6 +85,26 @@ "## Setup" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "TOVQubuDzdmA" + }, + "source": [ + "Install `tf-nightly`, as the frequency of checkpoint saving at a particular step with the `save_freq` argument in `tf.keras.callbacks.BackupAndRestore` is introduced from TensorFlow 2.10:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pGW0XhXkxY_q" + }, + "outputs": [], + "source": [ + "!pip install tf-nightly" + ] + }, { "cell_type": "code", "execution_count": null, @@ -119,7 +140,7 @@ "id": "TtlucRG_Uro_" }, "source": [ - "## TensorFlow 1: Save checkpoints with tf.estimator.RunConfig\n", + "## TensorFlow 1: Save checkpoints with `tf.estimator.RunConfig`\n", "\n", "In TensorFlow 1, you can configure a `tf.estimator` to save checkpoints every step by configuring `tf.estimator.RunConfig`.\n", "\n", @@ -250,11 +271,11 @@ "id": "T5LtVtmvYx7J" }, "source": [ - "## TensorFlow 2: Back up and restore with a callback and Model.fit\n", + "## TensorFlow 2: Back up and restore with a callback and `Model.fit`\n", "\n", - "In TensorFlow 2, if you use the Keras `Model.fit` API for training, you can provide the `tf.keras.callbacks.experimental.BackupAndRestore` callback to add the fault tolerance functionality.\n", + "In TensorFlow 2, if you use the Keras `Model.fit` API for training, you can provide the `tf.keras.callbacks.BackupAndRestore` callback to add the fault tolerance functionality.\n", "\n", - "To help demonstrate this, let's first start by defining a callback class that artificially throws an error during the fifth checkpoint:\n" + "To help demonstrate this, first start by defining a Keras `Callback` class that artificially throws an error during the fourth epoch checkpoint:\n" ] }, { @@ -265,10 +286,13 @@ }, "outputs": [], "source": [ - "class InterruptingCallback(tf.keras.callbacks.Callback):\n", + "class InterruptAtEpoch(tf.keras.callbacks.Callback):\n", " # A callback for artificially interrupting training.\n", + " def __init__(self, interrupting_epoch=3):\n", + " self.interrupting_epoch = interrupting_epoch\n", + "\n", " def on_epoch_end(self, epoch, log=None):\n", - " if epoch == 4:\n", + " if epoch == self.interrupting_epoch:\n", " raise RuntimeError('Interruption')" ] }, @@ -278,7 +302,7 @@ "id": "AhU3VTYZoDh-" }, "source": [ - "Then, define and instantiate a simple Keras model, define the loss function, call `Model.compile`, and set up a `tf.keras.callbacks.experimental.BackupAndRestore` callback that will save the checkpoints in a temporary directory:" + "Then, define and instantiate a simple Keras model, define the loss function, call `Model.compile`, and set up a `tf.keras.callbacks.BackupAndRestore` callback that will save the checkpoints in a temporary directory at epoch boundaries:" ] }, { @@ -296,20 +320,14 @@ " tf.keras.layers.Dropout(0.2),\n", " tf.keras.layers.Dense(10)\n", " ])\n", - "\n", "loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n", - "\n", "model = create_model()\n", "model.compile(optimizer='adam',\n", " loss=loss,\n", - " metrics=['accuracy'],\n", - " steps_per_execution=10)\n", - "\n", + " metrics=['accuracy'])\n", "log_dir = tempfile.mkdtemp()\n", - "\n", - "backup_restore_callback = tf.keras.callbacks.experimental.BackupAndRestore(\n", - " backup_dir = log_dir\n", - ")" + "backup_restore_callback = tf.keras.callbacks.BackupAndRestore(\n", + " backup_dir = log_dir)" ] }, { @@ -318,7 +336,7 @@ "id": "LRRWmZqsvMrq" }, "source": [ - "Now, start training the model with `Model.fit`. During training, checkpoints will be saved thanks to the `backup_restore_callback` defined above, while the `InterruptingCallback` will raise an artificial exception to simulate a failure." + "Start training the model with `Model.fit`. During training, checkpoints will be saved thanks to `tf.keras.callbacks.BackupAndRestore` instantiated above, while the `InterruptAtEpoch` class will raise an artificial exception to simulate a failure after the fourth epoch." ] }, { @@ -333,8 +351,9 @@ " model.fit(x=x_train,\n", " y=y_train,\n", " epochs=10,\n", + " steps_per_epoch=100,\n", " validation_data=(x_test, y_test),\n", - " callbacks=[backup_restore_callback, InterruptingCallback()])\n", + " callbacks=[backup_restore_callback, InterruptAtEpoch()])\n", "except Exception as e:\n", " print(f'{type(e).__name__}:{e}')" ] @@ -364,6 +383,108 @@ "model.fit(x=x_train,\n", " y=y_train,\n", " epochs=10,\n", + " steps_per_epoch=100,\n", + " validation_data=(x_test, y_test),\n", + " callbacks=[backup_restore_callback])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nP2dnpMPxtYj" + }, + "source": [ + "Define another `Callback` class that artificially throws an error during the 140th step:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YardkAaBxr-c" + }, + "outputs": [], + "source": [ + "class InterruptAtStep(tf.keras.callbacks.Callback):\n", + " # A callback for artificially interrupting training.\n", + " def __init__(self, interrupting_step=140):\n", + " self.total_step_count = 0\n", + " self.interrupting_step = interrupting_step\n", + "\n", + " def on_batch_begin(self, batch, logs=None):\n", + " self.total_step_count += 1\n", + "\n", + " def on_batch_end(self, batch, logs=None):\n", + " if self.total_step_count == self.interrupting_step:\n", + " print(\"\\nInterrupting at step count\", self.total_step_count)\n", + " raise RuntimeError('Interruption')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Af3VpehxyTpb" + }, + "source": [ + "Note: This section uses features that are only available in `tf-nightly` until Tensorflow 2.10 is released.\n", + "\n", + "To make sure the checkpoints are saved every 30 steps, set the `save_freq` in the `BackupAndRestore` callback to `30`. The `InterruptAtStep` will raise an artificial exception to simulate a failure at epoch 1 and step 40 (total step count 140). The checkpoint would be last saved at epoch 1 and step 20." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dHHCENDPyUHS" + }, + "outputs": [], + "source": [ + "log_dir_2 = tempfile.mkdtemp()\n", + "\n", + "backup_restore_callback = tf.keras.callbacks.BackupAndRestore(\n", + " backup_dir = log_dir_2, save_freq=30\n", + ")\n", + "model = create_model()\n", + "model.compile(optimizer='adam',\n", + " loss=loss,\n", + " metrics=['accuracy'])\n", + "try:\n", + " model.fit(x=x_train,\n", + " y=y_train,\n", + " epochs=10,\n", + " steps_per_epoch=100,\n", + " validation_data=(x_test, y_test),\n", + " callbacks=[backup_restore_callback, InterruptAtStep()])\n", + "except Exception as e:\n", + " print(f'{type(e).__name__}:{e}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2-ggMFEHynMR" + }, + "source": [ + "Next, instantiate the Keras model, call `Model.compile`, and continue training the model with `Model.fit` from a previously saved checkpoint. Notice that the training starts from epoch 2 and step 21." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vT7Kx30NEqly" + }, + "outputs": [], + "source": [ + "model = create_model()\n", + "model.compile(optimizer='adam',\n", + " loss=loss,\n", + " metrics=['accuracy'],\n", + " steps_per_execution=10)\n", + "model.fit(x=x_train,\n", + " y=y_train,\n", + " epochs=10,\n", + " steps_per_epoch=100,\n", " validation_data=(x_test, y_test),\n", " callbacks=[backup_restore_callback])" ] @@ -452,7 +573,7 @@ "\n", "To learn more about fault tolerance and checkpointing in TensorFlow 2, consider the following documentation:\n", "\n", - "- The `tf.keras.callbacks.experimental.BackupAndRestore` callback API docs.\n", + "- The `tf.keras.callbacks.BackupAndRestore` callback API docs.\n", "- The `tf.train.Checkpoint` and `tf.train.CheckpointManager` API docs.\n", "- The [Training checkpoints](../../guide/checkpoint.ipynb) guide, including the _Writing checkpoints_ section.\n", "\n", @@ -467,7 +588,6 @@ "colab": { "collapsed_sections": [], "name": "fault_tolerance.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/migrate/images/tensorboard_TF1.png b/site/en/guide/migrate/images/tensorboard_TF1.png index 18b7bbb12cf..294fbbcc5b5 100644 Binary files a/site/en/guide/migrate/images/tensorboard_TF1.png and b/site/en/guide/migrate/images/tensorboard_TF1.png differ diff --git a/site/en/guide/migrate/images/tensorboard_TF2.png b/site/en/guide/migrate/images/tensorboard_TF2.png index 55abb91fe6f..bbad8768210 100644 Binary files a/site/en/guide/migrate/images/tensorboard_TF2.png and b/site/en/guide/migrate/images/tensorboard_TF2.png differ diff --git a/site/en/guide/migrate/logging_stop_hook.ipynb b/site/en/guide/migrate/logging_stop_hook.ipynb index 3dbcd35e293..a1d60243c37 100644 --- a/site/en/guide/migrate/logging_stop_hook.ipynb +++ b/site/en/guide/migrate/logging_stop_hook.ipynb @@ -69,7 +69,7 @@ "source": [ "In TensorFlow 1, you use `tf.estimator.LoggingTensorHook` to monitor and log tensors, while `tf.estimator.StopAtStepHook` helps stop training at a specified step when training with `tf.estimator.Estimator`. This notebook demonstrates how to migrate from these APIs to their equivalents in TensorFlow 2 using custom Keras callbacks (`tf.keras.callbacks.Callback`) with `Model.fit`.\n", "\n", - "Keras [callbacks](../../guide/keras/custom_callback.ipynb) are objects that are called at different points during training/evaluation/prediction in the built-in Keras `Model.fit`/`Model.evaluate`/`Model.predict` APIs. You can learn more about callbacks in the `tf.keras.callbacks.Callback` API docs, as well as the [Writing your own callbacks](../..guide/keras/custom_callback.ipynb/) and [Training and evaluation with the built-in methods](../../guide/keras/train_and_evaluate.ipynb) (the *Using callbacks* section) guides. For migrating from `SessionRunHook` in TensorFlow 1 to Keras callbacks in TensorFlow 2, check out the [Migrate training with assisted logic](sessionrunhook_callback.ipynb) guide." + "Keras [callbacks](https://www.tensorflow.org/guide/keras/custom_callback) are objects that are called at different points during training/evaluation/prediction in the built-in Keras `Model.fit`/`Model.evaluate`/`Model.predict` APIs. You can learn more about callbacks in the `tf.keras.callbacks.Callback` API docs, as well as the [Writing your own callbacks](../..guide/keras/custom_callback.ipynb/) and [Training and evaluation with the built-in methods](https://www.tensorflow.org/guide/keras/train_and_evaluate) (the *Using callbacks* section) guides. For migrating from `SessionRunHook` in TensorFlow 1 to Keras callbacks in TensorFlow 2, check out the [Migrate training with assisted logic](sessionrunhook_callback.ipynb) guide." ] }, { @@ -277,7 +277,7 @@ "\n", "- API docs: `tf.keras.callbacks.Callback`\n", "- Guide: [Writing your own callbacks](../..guide/keras/custom_callback.ipynb/)\n", - "- Guide: [Training and evaluation with the built-in methods](../../guide/keras/train_and_evaluate.ipynb) (the *Using callbacks* section)\n", + "- Guide: [Training and evaluation with the built-in methods](https://www.tensorflow.org/guide/keras/train_and_evaluate) (the *Using callbacks* section)\n", "\n", "You may also find the following migration-related resources useful:\n", "\n", diff --git a/site/en/guide/migrate/metrics_optimizers.ipynb b/site/en/guide/migrate/metrics_optimizers.ipynb index a720b3c8e9a..61afb35aea6 100644 --- a/site/en/guide/migrate/metrics_optimizers.ipynb +++ b/site/en/guide/migrate/metrics_optimizers.ipynb @@ -144,7 +144,7 @@ "\n", "def _model_fn(features, labels, mode):\n", " logits = tf1.layers.Dense(2)(features)\n", - " predictions = tf.argmax(input=logits, axis=1)\n", + " predictions = tf.math.argmax(input=logits, axis=1)\n", " loss = tf1.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits)\n", " optimizer = tf1.train.AdagradOptimizer(0.05)\n", " train_op = optimizer.minimize(loss, global_step=tf1.train.get_global_step())\n", @@ -223,7 +223,7 @@ "\n", "inputs = tf.keras.Input((2,))\n", "logits = tf.keras.layers.Dense(2)(inputs)\n", - "predictions = tf.argmax(input=logits, axis=1)\n", + "predictions = tf.math.argmax(input=logits, axis=1)\n", "model = tf.keras.models.Model(inputs, predictions)\n", "optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.05)\n", "\n", @@ -283,7 +283,7 @@ "id": "E3F3ElcyadW-" }, "source": [ - "For more details about `tf.keras.metrics.Metric`, please take a look for the API documentation at `tf.keras.metrics.Metric`, as well as the [migration guide](https://www.tensorflow.org/guide/migrate#new-style_metrics_and_losses)." + "For more details about `tf.keras.metrics.Metric`, please take a look for the API documentation at `tf.keras.metrics.Metric`, as well as the [migration guide](https://www.tensorflow.org/guide/effective_tf2#new-style_metrics_and_losses)." ] }, { @@ -370,8 +370,7 @@ "metadata": { "colab": { "collapsed_sections": [], - "name": "metrics.ipynb", - "provenance": [], + "name": "metrics_optimizers.ipynb", "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/migrate/migrating_checkpoints.ipynb b/site/en/guide/migrate/migrating_checkpoints.ipynb index 38569cc7615..a63789037ff 100644 --- a/site/en/guide/migrate/migrating_checkpoints.ipynb +++ b/site/en/guide/migrate/migrating_checkpoints.ipynb @@ -834,7 +834,7 @@ "\n", "**TF2 checkpoints work with Keras's `build()` step**\n", "\n", - "`tf.train.Checkpoint.restore` has a mechanism called *delayed restoration* which\n", + "`tf.train.Checkpoint.restore` has a mechanism called *deferred restoration* which\n", "allows `tf.Module` and Keras objects to store variable values if the variable has not yet been created. This allows *initialized* models to load weights and *build* after.\n", "\n", "```\n", diff --git a/site/en/guide/migrate/migrating_estimator.ipynb b/site/en/guide/migrate/migrating_estimator.ipynb index ffd8c9ce153..4d3259babb8 100644 --- a/site/en/guide/migrate/migrating_estimator.ipynb +++ b/site/en/guide/migrate/migrating_estimator.ipynb @@ -70,7 +70,7 @@ "This guide demonstrates how to migrate from TensorFlow 1's `tf.estimator.Estimator` APIs to TensorFlow 2's `tf.keras` APIs. First, you will set up and run a basic model for training and evaluation with `tf.estimator.Estimator`. Then, you will perform the equivalent steps in TensorFlow 2 with the `tf.keras` APIs. You will also learn how to customize the training step by subclassing `tf.keras.Model` and using `tf.GradientTape`.\n", "\n", "- In TensorFlow 1, the high-level `tf.estimator.Estimator` APIs let you train and evaluate a model, as well as perform inference and save your model (for serving).\n", - "- In TensorFlow 2, use the Keras APIs to perform the aforementioned tasks, such as [model building](../../guide/keras/custom_layers_and_models.ipynb), gradient application, [training](../../guide/keras/customizing_what_happens_in_fit.ipynb), evaluation, and prediction.\n", + "- In TensorFlow 2, use the Keras APIs to perform the aforementioned tasks, such as [model building](https://www.tensorflow.org/guide/keras/custom_layers_and_models), gradient application, [training](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit), evaluation, and prediction.\n", "\n", "(For migrating model/checkpoint saving workflows to TensorFlow 2, check out the [SavedModel](saved_model.ipynb) and [Checkpoint](checkpoint_saved.ipynb) migration guides.)" ] @@ -197,10 +197,10 @@ "source": [ "## TensorFlow 2: Train and evaluate with the built-in Keras methods\n", "\n", - "This example demonstrates how to perform training and evaluation with Keras `Model.fit` and `Model.evaluate` in TensorFlow 2. (You can learn more in the [Training and evaluation with the built-in methods](../../guide/keras/train_and_evaluate.ipynb) guide.)\n", + "This example demonstrates how to perform training and evaluation with Keras `Model.fit` and `Model.evaluate` in TensorFlow 2. (You can learn more in the [Training and evaluation with the built-in methods](https://www.tensorflow.org/guide/keras/train_and_evaluate) guide.)\n", "\n", "- Start by preparing the dataset pipeline with the `tf.data.Dataset` APIs.\n", - "- Define a simple Keras [Sequential](../../guide/keras/sequential_model.ipynb) model with one linear (`tf.keras.layers.Dense`) layer.\n", + "- Define a simple Keras [Sequential](https://www.tensorflow.org/guide/keras/sequential_model) model with one linear (`tf.keras.layers.Dense`) layer.\n", "- Instantiate an Adagrad optimizer (`tf.keras.optimizers.Adagrad`).\n", "- Configure the model for training by passing the `optimizer` variable and the mean-squared error (`\"mse\"`) loss to `Model.compile`." ] @@ -278,9 +278,9 @@ "id": "gHx_RUL8xcJ3" }, "source": [ - "In TensorFlow 2, you can also write your own custom training step function with `tf.GradientTape` to perform forward and backward passes, while still taking advantage of the built-in training support, such as `tf.keras.callbacks.Callback` and `tf.distribute.Strategy`. (Learn more in [Customizing what happens in Model.fit](../../guide/keras/customizing_what_happens_in_fit.ipynb) and [Writing custom training loops from scratch](../../guide/keras/writing_a_training_loop_from_scratch.ipynb).)\n", + "In TensorFlow 2, you can also write your own custom training step function with `tf.GradientTape` to perform forward and backward passes, while still taking advantage of the built-in training support, such as `tf.keras.callbacks.Callback` and `tf.distribute.Strategy`. (Learn more in [Customizing what happens in Model.fit](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit) and [Writing custom training loops from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch).)\n", "\n", - "In this example, start by creating a custom `tf.keras.Model` by subclassing `tf.keras.Sequential` that overrides `Model.train_step`. (Learn more about [subclassing tf.keras.Model](../../keras/custom_layers_and_models.ipynb)). Inside that class, define a custom `train_step` function that for each batch of data performs a forward pass and backward pass during one training step.\n" + "In this example, start by creating a custom `tf.keras.Model` by subclassing `tf.keras.Sequential` that overrides `Model.train_step`. (Learn more about [subclassing tf.keras.Model](https://www.tensorflow.org/guide/keras/custom_layers_and_models)). Inside that class, define a custom `train_step` function that for each batch of data performs a forward pass and backward pass during one training step.\n" ] }, { @@ -394,10 +394,10 @@ "\n", "Additional Keras resources you may find useful:\n", "\n", - "- Guide: [Training and evaluation with the built-in methods](../../guide/keras/train_and_evaluate.ipynb)\n", - "- Guide: [Customize what happens in Model.fit](../../guide/keras/customizing_what_happens_in_fit.ipynb)\n", - "- Guide: [Writing a training loop from scratch](../../guide/keras/writing_a_training_loop_from_scratch.ipynb)\n", - "- Guide: [Making new Keras layers and models via subclassing](../../guide/keras/custom_layers_and_models.ipynb)\n", + "- Guide: [Training and evaluation with the built-in methods](https://www.tensorflow.org/guide/keras/train_and_evaluate)\n", + "- Guide: [Customize what happens in Model.fit](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit)\n", + "- Guide: [Writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch)\n", + "- Guide: [Making new Keras layers and models via subclassing](https://www.tensorflow.org/guide/keras/custom_layers_and_models)\n", "\n", "The following guides can assist with migrating distribution strategy workflows from `tf.estimator` APIs:\n", "\n", diff --git a/site/en/guide/migrate/migrating_feature_columns.ipynb b/site/en/guide/migrate/migrating_feature_columns.ipynb index 0e08fd431ea..b2dbc5fe7c0 100644 --- a/site/en/guide/migrate/migrating_feature_columns.ipynb +++ b/site/en/guide/migrate/migrating_feature_columns.ipynb @@ -37,7 +37,7 @@ "id": "77z2OchJTk0l" }, "source": [ - "# Migrating feature_columns to TF2's Keras Preprocessing Layers\n", + "# Migrate `tf.feature_column`s to Keras preprocessing layers\n", "\n", "\n", " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", @@ -67,11 +67,11 @@ "id": "-5jGPDA2PDPI" }, "source": [ - "Training a model will usually come with some amount of feature preprocessing, particularly when dealing with structured data. When training a `tf.estimator.Estimator` in TF1, this feature preprocessing is usually done with the `tf.feature_column` API. In TF2, this preprocessing can be done directly with Keras layers, called _preprocessing layers_.\n", + "Training a model usually comes with some amount of feature preprocessing, particularly when dealing with structured data. When training a `tf.estimator.Estimator` in TensorFlow 1, you usually perform feature preprocessing with the `tf.feature_column` API. In TensorFlow 2, you can do this directly with Keras preprocessing layers.\n", "\n", - "In this migration guide, you will perform some common feature transformations using both feature columns and preprocessing layers, followed by training a complete model with both APIs.\n", + "This migration guide demonstrates common feature transformations using both feature columns and preprocessing layers, followed by training a complete model with both APIs.\n", "\n", - "First, start with a couple of necessary imports," + "First, start with a couple of necessary imports:" ] }, { @@ -93,7 +93,7 @@ "id": "NVPYTQAWtDwH" }, "source": [ - "and add a utility for calling a feature column for demonstration:" + "Now, add a utility function for calling a feature column for demonstration:" ] }, { @@ -463,6 +463,124 @@ "embedding(string_lookup_layer(['small', 'medium', 'large']))" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "UwqvADV6HRdC" + }, + "source": [ + "## Summing weighted categorical data\n", + "\n", + "In some cases, you need to deal with categorical data where each occurance of a category comes with an associated weight. In feature columns, this is handled with `tf.feature_column.weighted_categorical_column`. When paired with an `indicator_column`, this has the effect of summing weights per category." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "02HqjPLMRxWn" + }, + "outputs": [], + "source": [ + "ids = tf.constant([[5, 11, 5, 17, 17]])\n", + "weights = tf.constant([[0.5, 1.5, 0.7, 1.8, 0.2]])\n", + "\n", + "categorical_col = tf1.feature_column.categorical_column_with_identity(\n", + " 'ids', num_buckets=20)\n", + "weighted_categorical_col = tf1.feature_column.weighted_categorical_column(\n", + " categorical_col, 'weights')\n", + "indicator_col = tf1.feature_column.indicator_column(weighted_categorical_col)\n", + "call_feature_columns(indicator_col, {'ids': ids, 'weights': weights})" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "98jaq7Q3S9aG" + }, + "source": [ + "In Keras, this can be done by passing a `count_weights` input to `tf.keras.layers.CategoryEncoding` with `output_mode='count'`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JsoYUUgRS7hu" + }, + "outputs": [], + "source": [ + "ids = tf.constant([[5, 11, 5, 17, 17]])\n", + "weights = tf.constant([[0.5, 1.5, 0.7, 1.8, 0.2]])\n", + "\n", + "# Using sparse output is more efficient when `num_tokens` is large.\n", + "count_layer = tf.keras.layers.CategoryEncoding(\n", + " num_tokens=20, output_mode='count', sparse=True)\n", + "tf.sparse.to_dense(count_layer(ids, count_weights=weights))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gBJxb6y2GasI" + }, + "source": [ + "## Embedding weighted categorical data\n", + "\n", + "You might alternately want to embed weighted categorical inputs. In feature columns, the `embedding_column` contains a `combiner` argument. If any sample\n", + "contains multiple entries for a category, they will be combined according to the argument setting (by default `'mean'`)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AjOt1wgmT5mM" + }, + "outputs": [], + "source": [ + "ids = tf.constant([[5, 11, 5, 17, 17]])\n", + "weights = tf.constant([[0.5, 1.5, 0.7, 1.8, 0.2]])\n", + "\n", + "categorical_col = tf1.feature_column.categorical_column_with_identity(\n", + " 'ids', num_buckets=20)\n", + "weighted_categorical_col = tf1.feature_column.weighted_categorical_column(\n", + " categorical_col, 'weights')\n", + "embedding_col = tf1.feature_column.embedding_column(\n", + " weighted_categorical_col, 4, combiner='mean')\n", + "call_feature_columns(embedding_col, {'ids': ids, 'weights': weights})" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fd6eluARXndC" + }, + "source": [ + "In Keras, there is no `combiner` option to `tf.keras.layers.Embedding`, but you can achieve the same effect with `tf.keras.layers.Dense`. The `embedding_column` above is simply linearly combining embedding vectors according to category weight. Though not obvious at first, it is exactly equivalent to representing your categorical inputs as a sparse weight vector of size `(num_tokens)`, and multiplying them by a `Dense` kernel of shape `(embedding_size, num_tokens)`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Y-vZvPyiYilE" + }, + "outputs": [], + "source": [ + "ids = tf.constant([[5, 11, 5, 17, 17]])\n", + "weights = tf.constant([[0.5, 1.5, 0.7, 1.8, 0.2]])\n", + "\n", + "# For `combiner='mean'`, normalize your weights to sum to 1. Removing this line\n", + "# would be equivalent to an `embedding_column` with `combiner='sum'`.\n", + "weights = weights / tf.reduce_sum(weights, axis=-1, keepdims=True)\n", + "\n", + "count_layer = tf.keras.layers.CategoryEncoding(\n", + " num_tokens=20, output_mode='count', sparse=True)\n", + "embedding_layer = tf.keras.layers.Dense(4, use_bias=False)\n", + "embedding_layer(count_layer(ids, count_weights=weights))" + ] + }, { "cell_type": "markdown", "metadata": { @@ -497,7 +615,7 @@ "id": "e_4Xx2c37lqD" }, "source": [ - "Define some common constants for both TF1 and TF2 workflows:" + "Define some common constants for both TensorFlow 1 and TensorFlow 2 workflows:" ] }, { @@ -536,17 +654,17 @@ "source": [ "categorical_col = tf1.feature_column.categorical_column_with_identity(\n", " 'type', num_buckets=one_hot_dims)\n", - "# Convert index to one-hot; e.g. [2] -> [0,0,1].\n", + "# Convert index to one-hot; e.g., [2] -> [0,0,1].\n", "indicator_col = tf1.feature_column.indicator_column(categorical_col)\n", "\n", - "# Convert strings to indices; e.g. ['small'] -> [1].\n", + "# Convert strings to indices; e.g., ['small'] -> [1].\n", "vocab_col = tf1.feature_column.categorical_column_with_vocabulary_list(\n", " 'size', vocabulary_list=vocab, num_oov_buckets=1)\n", "# Embed the indices.\n", "embedding_col = tf1.feature_column.embedding_column(vocab_col, embedding_dims)\n", "\n", "normalizer_fn = lambda x: (x - weight_mean) / math.sqrt(weight_variance)\n", - "# Normalize the numeric inputs; e.g. [2.0] -> [0.0].\n", + "# Normalize the numeric inputs; e.g., [2.0] -> [0.0].\n", "numeric_col = tf1.feature_column.numeric_column(\n", " 'weight', normalizer_fn=normalizer_fn)\n", "\n", @@ -609,12 +727,12 @@ " 'size': tf.keras.Input(shape=(), dtype='string'),\n", " 'weight': tf.keras.Input(shape=(), dtype='float32'),\n", "}\n", - "# Convert index to one-hot; e.g. [2] -> [0,0,1].\n", + "# Convert index to one-hot; e.g., [2] -> [0,0,1].\n", "type_output = tf.keras.layers.CategoryEncoding(\n", " one_hot_dims, output_mode='one_hot')(inputs['type'])\n", - "# Convert size strings to indices; e.g. ['small'] -> [1].\n", + "# Convert size strings to indices; e.g., ['small'] -> [1].\n", "size_output = tf.keras.layers.StringLookup(vocabulary=vocab)(inputs['size'])\n", - "# Normalize the numeric inputs; e.g. [2.0] -> [0.0].\n", + "# Normalize the numeric inputs; e.g., [2.0] -> [0.0].\n", "weight_output = tf.keras.layers.Normalization(\n", " axis=None, mean=weight_mean, variance=weight_variance)(inputs['weight'])\n", "outputs = {\n", @@ -727,8 +845,8 @@ "outputs": [], "source": [ "inputs = preprocessing_model.input\n", - "outpus = training_model(preprocessing_model(inputs))\n", - "inference_model = tf.keras.Model(inputs, outpus)\n", + "outputs = training_model(preprocessing_model(inputs))\n", + "inference_model = tf.keras.Model(inputs, outputs)\n", "\n", "predict_dataset = tf.data.Dataset.from_tensor_slices(predict_features).batch(1)\n", "inference_model.predict(predict_dataset)" @@ -740,7 +858,7 @@ "id": "O01VQIxCWBxU" }, "source": [ - "This composed model can be saved as a [SavedModel](https://www.tensorflow.org/guide/saved_model) for later use." + "This composed model can be saved as a `.keras` file for later use." ] }, { @@ -751,8 +869,8 @@ }, "outputs": [], "source": [ - "inference_model.save('model')\n", - "restored_model = tf.keras.models.load_model('model')\n", + "inference_model.save('model.keras')\n", + "restored_model = tf.keras.models.load_model('model.keras')\n", "restored_model.predict(predict_dataset)" ] }, @@ -762,7 +880,7 @@ "id": "IXMBwzggwUjI" }, "source": [ - "Note: Preprocessing layers are not trainable, which allows you to apply them *asynchronously* using `tf.data`. This has performence benefits, as you can both [prefetch](https://www.tensorflow.org/guide/data_performance#prefetching) preprocessed batches, and free up any accelerators to focus on the differentiable parts of a model. As this guide shows, seperating preprocessing during training and composing it during inference is a flexible way to leverage these performance gains. However, if your model is small or preprocessing time is negligable, it may be simpler to build preprocessing into a complete model from the start. To do this you can build a single model starting with `tf.keras.Input`, followed by preprocessing layers, followed by trainable layers." + "Note: Preprocessing layers are not trainable, which allows you to apply them *asynchronously* using `tf.data`. This has performance benefits, as you can both prefetch preprocessed batches, and free up any accelerators to focus on the differentiable parts of a model (learn more in the _Prefetching_ section of the [Better performance with the `tf.data` API](../data_performance.ipynb) guide). As this guide shows, separating preprocessing during training and composing it during inference is a flexible way to leverage these performance gains. However, if your model is small or preprocessing time is negligible, it may be simpler to build preprocessing into a complete model from the start. To do this you can build a single model starting with `tf.keras.Input`, followed by preprocessing layers, followed by trainable layers." ] }, { @@ -774,76 +892,78 @@ "## Feature column equivalence table\n", "\n", "For reference, here is an approximate correspondence between feature columns and\n", - "preprocessing layers:\n", + "Keras preprocessing layers:
\n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", "
Feature ColumnKeras LayerFeature columnKeras layer
`feature_column.bucketized_column``layers.Discretization``tf.feature_column.bucketized_column``tf.keras.layers.Discretization`
`feature_column.categorical_column_with_hash_bucket``layers.Hashing``tf.feature_column.categorical_column_with_hash_bucket``tf.keras.layers.Hashing`
`feature_column.categorical_column_with_identity``layers.CategoryEncoding``tf.feature_column.categorical_column_with_identity``tf.keras.layers.CategoryEncoding`
`feature_column.categorical_column_with_vocabulary_file``layers.StringLookup` or `layers.IntegerLookup``tf.feature_column.categorical_column_with_vocabulary_file``tf.keras.layers.StringLookup` or `tf.keras.layers.IntegerLookup`
`feature_column.categorical_column_with_vocabulary_list``layers.StringLookup` or `layers.IntegerLookup``tf.feature_column.categorical_column_with_vocabulary_list``tf.keras.layers.StringLookup` or `tf.keras.layers.IntegerLookup`
`feature_column.crossed_column`Not implemented.`tf.feature_column.crossed_column``tf.keras.layers.experimental.preprocessing.HashedCrossing`
`feature_column.embedding_column``layers.Embedding``tf.feature_column.embedding_column``tf.keras.layers.Embedding`
`feature_column.indicator_column``tf.feature_column.indicator_column``output_mode='one_hot'` or `output_mode='multi_hot'`*
`feature_column.numeric_column``layers.Normalization``tf.feature_column.numeric_column``tf.keras.layers.Normalization`
`feature_column.sequence_categorical_column_with_hash_bucket``layers.Hashing``tf.feature_column.sequence_categorical_column_with_hash_bucket``tf.keras.layers.Hashing`
`feature_column.sequence_categorical_column_with_identity``layers.CategoryEncoding``tf.feature_column.sequence_categorical_column_with_identity``tf.keras.layers.CategoryEncoding`
`feature_column.sequence_categorical_column_with_vocabulary_file``layers.StringLookup`, `layers.IntegerLookup`, or `layer.TextVectorization`†`tf.feature_column.sequence_categorical_column_with_vocabulary_file``tf.keras.layers.StringLookup`, `tf.keras.layers.IntegerLookup`, or `tf.keras.layer.TextVectorization`†
`feature_column.sequence_categorical_column_with_vocabulary_list``layers.StringLookup`, `layers.IntegerLookup`, or `layer.TextVectorization`†`tf.feature_column.sequence_categorical_column_with_vocabulary_list``tf.keras.layers.StringLookup`, `tf.keras.layers.IntegerLookup`, or `tf.keras.layer.TextVectorization`†
`feature_column.sequence_numeric_column``layers.Normalization``tf.feature_column.sequence_numeric_column``tf.keras.layers.Normalization`
`feature_column.weighted_categorical_column``layers.CategoryEncoding``tf.feature_column.weighted_categorical_column``tf.keras.layers.CategoryEncoding`
\n", "\n", - "\\* `output_mode` can be passed to `layers.CategoryEncoding`, `layers.StringLookup`, `layers.IntegerLookup`, and `layers.TextVectorization`.\n", + "\\* The `output_mode` can be passed to `tf.keras.layers.CategoryEncoding`, `tf.keras.layers.StringLookup`, `tf.keras.layers.IntegerLookup`, and `tf.keras.layers.TextVectorization`.\n", + "\n", + "† `tf.keras.layers.TextVectorization` can handle freeform text input directly (for example, entire sentences or paragraphs). This is not one-to-one replacement for categorical sequence handling in TensorFlow 1, but may offer a convenient replacement for ad-hoc text preprocessing.\n", "\n", - "† `layers.TextVectorization` can handle freeform text input directly (e.g. entire sentences or paragraphs). This is not one-to-one replacement for categorical sequence handling in TF1, but may offer a convinient replacement for ad-hoc text preprocessing." + "Note: Linear estimators, such as `tf.estimator.LinearClassifier`, can handle direct categorical input (integer indices) without an `embedding_column` or `indicator_column`. However, integer indices cannot be passed directly to `tf.keras.layers.Dense` or `tf.keras.experimental.LinearModel`. These inputs should be first encoded with `tf.layers.CategoryEncoding` with `output_mode='count'` (and `sparse=True` if the category sizes are large) before calling into `Dense` or `LinearModel`." ] }, { @@ -852,10 +972,10 @@ "id": "AQCJ6lM3YDq_" }, "source": [ - "## Next Steps\n", + "## Next steps\n", "\n", - " - For more information on keras preprocessing layers, see [the guide to preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers).\n", - " - For a more in-depth example of applying preprocessing layers to structured data, see [the structured data tutorial](https://www.tensorflow.org/tutorials/structured_data/preprocessing_layers)." + " - For more information on Keras preprocessing layers, go to the [Working with preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) guide.\n", + " - For a more in-depth example of applying preprocessing layers to structured data, refer to the [Classify structured data using Keras preprocessing layers](../../tutorials/structured_data/preprocessing_layers.ipynb) tutorial." ] } ], @@ -863,7 +983,6 @@ "colab": { "collapsed_sections": [], "name": "migrating_feature_columns.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/migrate/migration_debugging.ipynb b/site/en/guide/migrate/migration_debugging.ipynb new file mode 100644 index 00000000000..25cb7f9065f --- /dev/null +++ b/site/en/guide/migrate/migration_debugging.ipynb @@ -0,0 +1,799 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "FEL3NlTTDlSX" + }, + "source": [ + "##### Copyright 2021 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "FlUw7tSKbtg4" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "77z2OchJTk0l" + }, + "source": [ + "# Debug a TensorFlow 2 migrated training pipeline\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " View on TensorFlow.org\n", + " \n", + " \n", + " \n", + " Run in Google Colab\n", + " \n", + " \n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zTwPu-w6M5sz" + }, + "source": [ + "This notebook demonstrates how to debug a training pipeline when migrating to TensorFlow 2 (TF2). It consists of following components:\n", + "1. Suggested steps and code samples for debugging training pipeline\n", + "2. Tools for debugging\n", + "3. Other related resources\n", + "\n", + "One assumption is you have the TensorFlow 1 (TF1.x) code and trained models for comparison, and you want to build a TF2 model that achieves similar validation accuracy.\n", + "\n", + "This notebook does **NOT** cover debugging performance issues for training/inference speed or memory usage." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fKm9R4CtOAP3" + }, + "source": [ + "## Debugging workflow\n", + "\n", + "Below is a general workflow for debugging your TF2 training pipelines. Note that you do not need to follow these steps in order. You can also use a binary search approach where you test the model in an intermediate step and narrow down the debugging scope. \n", + "\n", + "1. Fix compile and runtime errors\n", + "\n", + "2. Single forward pass validation (in a separate\n", + " [guide](./validate_correctness.ipynb))\n", + "\n", + " a. On single CPU device\n", + "\n", + " * Verify variables are created only once\n", + " * Check variable counts, names, and shapes match\n", + " * Reset all variables, check numerical equivalence with all randomness\n", + " disabled\n", + " * Align random number generation, check numerical equivalence in inference\n", + " * (Optional) Check checkpoints are loaded properly and TF1.x/TF2 models\n", + " generate identical output\n", + "\n", + " b. On single GPU/TPU device\n", + "\n", + " c. With multi-device strategies\n", + "\n", + "3. Model training numerical equivalence validation for a few steps (code\n", + " samples available below)\n", + "\n", + " a. Single training step validation using small and fixed data on single CPU\n", + " device. Specifically, check numerical equivalence for the following\n", + " components\n", + "\n", + " * losses computation\n", + " * metrics\n", + " * learning rate\n", + " * gradient computation and update\n", + "\n", + " b. Check statistics after training 3 or more steps to verify optimizer behaviors like the momentum, still with fixed data on single CPU device\n", + "\n", + " c. On single GPU/TPU device\n", + "\n", + " d. With multi-device strategies (check the intro for [MultiProcessRunner](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/distribute/multi_process_runner.py#L108) at the bottom)\n", + "\n", + "4. End-to-end convergence testing on real dataset\n", + "\n", + " a. Check training behaviors with TensorBoard\n", + "\n", + " * use simple optimizers e.g., SGD and simple distribution strategies e.g.\n", + " `tf.distribute.OneDeviceStrategy` first\n", + " * training metrics\n", + " * evaluation metrics\n", + " * figure out what the reasonable tolerance for inherent randomness is\n", + "\n", + " b. Check equivalence with advanced optimizer/learning rate\n", + " scheduler/distribution strategies\n", + "\n", + " c. Check equivalence when using mixed precision\n", + "\n", + "5. Additional product benchmarks" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XKakQBI9-FLb" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "i1ghHyXl-Oqd" + }, + "outputs": [], + "source": [ + "# The `DeterministicRandomTestTool` is only available from Tensorflow 2.8:\n", + "!pip install -q \"tensorflow==2.9.*\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "usyRSlIRl3r2" + }, + "source": [ + "### Single forward pass validation \n", + "\n", + "Single forward pass validation, including checkpoint loading, is covered in a different [colab](./validate_correctness.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HVBQbsZeVL_V" + }, + "outputs": [], + "source": [ + "import sys\n", + "import unittest\n", + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow.compat.v1 as v1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4M104dt7m5cC" + }, + "source": [ + "### Model training numerical equivalence validation for a few steps" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "v2Nz2Ni1EkMz" + }, + "source": [ + "Set up model configuration and prepare a fake dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hUxXadzKU9rT" + }, + "outputs": [], + "source": [ + "params = {\n", + " 'input_size': 3,\n", + " 'num_classes': 3,\n", + " 'layer_1_size': 2,\n", + " 'layer_2_size': 2,\n", + " 'num_train_steps': 100,\n", + " 'init_lr': 1e-3,\n", + " 'end_lr': 0.0,\n", + " 'decay_steps': 1000,\n", + " 'lr_power': 1.0,\n", + "}\n", + "\n", + "# make a small fixed dataset\n", + "fake_x = np.ones((2, params['input_size']), dtype=np.float32)\n", + "fake_y = np.zeros((2, params['num_classes']), dtype=np.int32)\n", + "fake_y[0][0] = 1\n", + "fake_y[1][1] = 1\n", + "\n", + "step_num = 3" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lV_n3Ukmz4Un" + }, + "source": [ + "Define the TF1.x model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ATa5fzL8mAwl" + }, + "outputs": [], + "source": [ + "# Assume there is an existing TF1.x model using estimator API\n", + "# Wrap the model_fn to log necessary tensors for result comparison\n", + "class SimpleModelWrapper():\n", + " def __init__(self):\n", + " self.logged_ops = {}\n", + " self.logs = {\n", + " 'step': [],\n", + " 'lr': [],\n", + " 'loss': [],\n", + " 'grads_and_vars': [],\n", + " 'layer_out': []}\n", + " \n", + " def model_fn(self, features, labels, mode, params):\n", + " out_1 = tf.compat.v1.layers.dense(features, units=params['layer_1_size'])\n", + " out_2 = tf.compat.v1.layers.dense(out_1, units=params['layer_2_size'])\n", + " logits = tf.compat.v1.layers.dense(out_2, units=params['num_classes'])\n", + " loss = tf.compat.v1.losses.softmax_cross_entropy(labels, logits)\n", + "\n", + " # skip EstimatorSpec details for prediction and evaluation \n", + " if mode == tf.estimator.ModeKeys.PREDICT:\n", + " pass\n", + " if mode == tf.estimator.ModeKeys.EVAL:\n", + " pass\n", + " assert mode == tf.estimator.ModeKeys.TRAIN\n", + "\n", + " global_step = tf.compat.v1.train.get_or_create_global_step()\n", + " lr = tf.compat.v1.train.polynomial_decay(\n", + " learning_rate=params['init_lr'],\n", + " global_step=global_step,\n", + " decay_steps=params['decay_steps'],\n", + " end_learning_rate=params['end_lr'],\n", + " power=params['lr_power'])\n", + " \n", + " optmizer = tf.compat.v1.train.GradientDescentOptimizer(lr)\n", + " grads_and_vars = optmizer.compute_gradients(\n", + " loss=loss,\n", + " var_list=graph.get_collection(\n", + " tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES))\n", + " train_op = optmizer.apply_gradients(\n", + " grads_and_vars,\n", + " global_step=global_step)\n", + " \n", + " # log tensors\n", + " self.logged_ops['step'] = global_step\n", + " self.logged_ops['lr'] = lr\n", + " self.logged_ops['loss'] = loss\n", + " self.logged_ops['grads_and_vars'] = grads_and_vars\n", + " self.logged_ops['layer_out'] = {\n", + " 'layer_1': out_1,\n", + " 'layer_2': out_2,\n", + " 'logits': logits}\n", + "\n", + " return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)\n", + "\n", + " def update_logs(self, logs):\n", + " for key in logs.keys():\n", + " model_tf1.logs[key].append(logs[key])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kki9yILSKS7f" + }, + "source": [ + "The following [`v1.keras.utils.DeterministicRandomTestTool`](https://www.tensorflow.org/api_docs/python/tf/compat/v1/keras/utils/DeterministicRandomTestTool) class provides a context manager `scope()` that can make stateful random operations use the same seed across both TF1 graphs/sessions and eager execution,\n", + "\n", + "The tool provides two testing modes: \n", + "1. `constant` which uses the same seed for every single operation no matter how many times it has been called and,\n", + "2. `num_random_ops` which uses the number of previously-observed stateful random operations as the operation seed.\n", + "\n", + "This applies both to the stateful random operations used for creating and initializing variables, and to the stateful random operations used in computation (such as for dropout layers)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "X6Y3RWMoKOl8" + }, + "outputs": [], + "source": [ + "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mk5-ZzxcErX5" + }, + "source": [ + "Run the TF1.x model in graph mode. Collect statistics for first 3 training steps for numerical equivalence comparison." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "r5zhJHvsWA24" + }, + "outputs": [], + "source": [ + "with random_tool.scope():\n", + " graph = tf.Graph()\n", + " with graph.as_default(), tf.compat.v1.Session(graph=graph) as sess:\n", + " model_tf1 = SimpleModelWrapper()\n", + " # build the model\n", + " inputs = tf.compat.v1.placeholder(tf.float32, shape=(None, params['input_size']))\n", + " labels = tf.compat.v1.placeholder(tf.float32, shape=(None, params['num_classes']))\n", + " spec = model_tf1.model_fn(inputs, labels, tf.estimator.ModeKeys.TRAIN, params)\n", + " train_op = spec.train_op\n", + "\n", + " sess.run(tf.compat.v1.global_variables_initializer())\n", + " for step in range(step_num):\n", + " # log everything and update the model for one step\n", + " logs, _ = sess.run(\n", + " [model_tf1.logged_ops, train_op],\n", + " feed_dict={inputs: fake_x, labels: fake_y})\n", + " model_tf1.update_logs(logs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eZxjI8Nxz9Ea" + }, + "source": [ + "Define the TF2 model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AA67rh2TkS1M" + }, + "outputs": [], + "source": [ + "class SimpleModel(tf.keras.Model):\n", + " def __init__(self, params, *args, **kwargs):\n", + " super(SimpleModel, self).__init__(*args, **kwargs)\n", + " # define the model\n", + " self.dense_1 = tf.keras.layers.Dense(params['layer_1_size'])\n", + " self.dense_2 = tf.keras.layers.Dense(params['layer_2_size'])\n", + " self.out = tf.keras.layers.Dense(params['num_classes'])\n", + " learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(\n", + " initial_learning_rate=params['init_lr'],\n", + " decay_steps=params['decay_steps'],\n", + " end_learning_rate=params['end_lr'],\n", + " power=params['lr_power']) \n", + " self.optimizer = tf.keras.optimizers.legacy.SGD(learning_rate_fn)\n", + " self.compiled_loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)\n", + " self.logs = {\n", + " 'lr': [],\n", + " 'loss': [],\n", + " 'grads': [],\n", + " 'weights': [],\n", + " 'layer_out': []}\n", + "\n", + " def call(self, inputs):\n", + " out_1 = self.dense_1(inputs)\n", + " out_2 = self.dense_2(out_1)\n", + " logits = self.out(out_2)\n", + " # log output features for every layer for comparison\n", + " layer_wise_out = {\n", + " 'layer_1': out_1,\n", + " 'layer_2': out_2,\n", + " 'logits': logits}\n", + " self.logs['layer_out'].append(layer_wise_out)\n", + " return logits\n", + "\n", + " def train_step(self, data):\n", + " x, y = data\n", + " with tf.GradientTape() as tape:\n", + " logits = self(x)\n", + " loss = self.compiled_loss(y, logits)\n", + " grads = tape.gradient(loss, self.trainable_weights)\n", + " # log training statistics\n", + " step = self.optimizer.iterations.numpy()\n", + " self.logs['lr'].append(self.optimizer.learning_rate(step).numpy())\n", + " self.logs['loss'].append(loss.numpy())\n", + " self.logs['grads'].append(grads)\n", + " self.logs['weights'].append(self.trainable_weights)\n", + " # update model\n", + " self.optimizer.apply_gradients(zip(grads, self.trainable_weights))\n", + " return" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "I5smAcaEE8nX" + }, + "source": [ + "Run the TF2 model in eager mode. Collect statistics for first 3 training steps for numerical equivalence comparison." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Q0AbXF_eE8cS" + }, + "outputs": [], + "source": [ + "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", + "with random_tool.scope():\n", + " model_tf2 = SimpleModel(params)\n", + " for step in range(step_num):\n", + " model_tf2.train_step([fake_x, fake_y])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cjJDjLcAz_gU" + }, + "source": [ + "Compare numerical equivalence for first few training steps.\n", + "\n", + "You can also check the [Validating correctness & numerical equivalence notebook](./validate_correctness.ipynb) for additional advice for numerical equivalence." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6CbCUbsCiabC" + }, + "outputs": [], + "source": [ + "np.testing.assert_allclose(model_tf1.logs['lr'], model_tf2.logs['lr'])\n", + "np.testing.assert_allclose(model_tf1.logs['loss'], model_tf2.logs['loss'])\n", + "for step in range(step_num):\n", + " for name in model_tf1.logs['layer_out'][step]:\n", + " np.testing.assert_allclose(\n", + " model_tf1.logs['layer_out'][step][name],\n", + " model_tf2.logs['layer_out'][step][name])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dhVuuciimLIY" + }, + "source": [ + "#### Unit tests" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sXZYFC6Hhqeb" + }, + "source": [ + "There are a few types of unit testing that can help debug your migration code.\n", + "1. Single forward pass validation\n", + "2. Model training numerical equivalence validation for a few steps\n", + "3. Benchmark inference performance\n", + "4. The trained model makes correct predictions on fixed and simple data points\n", + "\n", + "You can use `@parameterized.parameters` to test models with different configurations. [Details with code sample](https://github.com/abseil/abseil-py/blob/master/absl/testing/parameterized.py).\n", + "\n", + "Note that it's possible to run session APIs and eager execution in the same test case. The code snippets below show how." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CdHqkgPPM2Bj" + }, + "outputs": [], + "source": [ + "import unittest\n", + "\n", + "class TestNumericalEquivalence(unittest.TestCase):\n", + "\n", + " # copied from code samples above\n", + " def setup(self):\n", + " # record statistics for 100 training steps\n", + " step_num = 100\n", + "\n", + " # setup TF 1 model\n", + " random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", + " with random_tool.scope():\n", + " # run TF1.x code in graph mode with context management\n", + " graph = tf.Graph()\n", + " with graph.as_default(), tf.compat.v1.Session(graph=graph) as sess:\n", + " self.model_tf1 = SimpleModelWrapper()\n", + " # build the model\n", + " inputs = tf.compat.v1.placeholder(tf.float32, shape=(None, params['input_size']))\n", + " labels = tf.compat.v1.placeholder(tf.float32, shape=(None, params['num_classes']))\n", + " spec = self.model_tf1.model_fn(inputs, labels, tf.estimator.ModeKeys.TRAIN, params)\n", + " train_op = spec.train_op\n", + "\n", + " sess.run(tf.compat.v1.global_variables_initializer())\n", + " for step in range(step_num):\n", + " # log everything and update the model for one step\n", + " logs, _ = sess.run(\n", + " [self.model_tf1.logged_ops, train_op],\n", + " feed_dict={inputs: fake_x, labels: fake_y})\n", + " self.model_tf1.update_logs(logs)\n", + "\n", + " # setup TF2 model\n", + " random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", + " with random_tool.scope():\n", + " self.model_tf2 = SimpleModel(params)\n", + " for step in range(step_num):\n", + " self.model_tf2.train_step([fake_x, fake_y])\n", + " \n", + " def test_learning_rate(self):\n", + " np.testing.assert_allclose(\n", + " self.model_tf1.logs['lr'],\n", + " self.model_tf2.logs['lr'])\n", + "\n", + " def test_training_loss(self):\n", + " # adopt different tolerance strategies before and after 10 steps\n", + " first_n_step = 10\n", + "\n", + " # absolute difference is limited below 1e-5\n", + " # set `equal_nan` to be False to detect potential NaN loss issues\n", + " abosolute_tolerance = 1e-5\n", + " np.testing.assert_allclose(\n", + " actual=self.model_tf1.logs['loss'][:first_n_step],\n", + " desired=self.model_tf2.logs['loss'][:first_n_step],\n", + " atol=abosolute_tolerance,\n", + " equal_nan=False)\n", + " \n", + " # relative difference is limited below 5%\n", + " relative_tolerance = 0.05\n", + " np.testing.assert_allclose(self.model_tf1.logs['loss'][first_n_step:],\n", + " self.model_tf2.logs['loss'][first_n_step:],\n", + " rtol=relative_tolerance,\n", + " equal_nan=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gshSQdKIddpZ" + }, + "source": [ + "## Debugging tools" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CkMfCaJRclKv" + }, + "source": [ + "### tf.print\n", + "\n", + "tf.print vs print/logging.info\n", + "\n", + "- With configurable arguments, `tf.print` can recursively display the first and last few elements of each dimension for printed tensors. Check the [API docs](https://www.tensorflow.org/api_docs/python/tf/print) for details.\n", + "- For eager execution, both `print` and `tf.print` print the value of the tensor. But `print` may involve device-to-host copy, which can potentially slow down your code. \n", + "- For graph mode including usage inside `tf.function`, you need to use `tf.print` to print the actual tensor value. `tf.print` is compiled into an op in the graph, whereas `print` and `logging.info` only log at tracing time, which is often not what you want. \n", + "- `tf.print` also supports printing composite tensors like `tf.RaggedTensor` and `tf.sparse.SparseTensor`.\n", + "- You can also use a callback to monitor metrics and variables. Please check how to use custom callbacks with [logs dict](https://www.tensorflow.org/guide/keras/custom_callback#usage_of_logs_dict) and [self.model attribute](https://www.tensorflow.org/guide/keras/custom_callback#usage_of_selfmodel_attribute)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "S-5h3cX8Dc50" + }, + "source": [ + "tf.print vs print inside tf.function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gRED9FMyDKih" + }, + "outputs": [], + "source": [ + "# `print` prints info of tensor object\n", + "# `tf.print` prints the tensor value\n", + "@tf.function\n", + "def dummy_func(num):\n", + " num += 1\n", + " print(num)\n", + " tf.print(num)\n", + " return num\n", + "\n", + "_ = dummy_func(tf.constant([1.0]))\n", + "\n", + "# Output:\n", + "# Tensor(\"add:0\", shape=(1,), dtype=float32)\n", + "# [2]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3QroLA_zDK2w" + }, + "source": [ + "tf.distribute.Strategy\n", + "\n", + "- If the `tf.function` containing `tf.print` is executed on the workers, for example when using `TPUStrategy` or `ParameterServerStrategy`, you need to check worker/parameter server logs to find the printed values.\n", + "- For `print` or `logging.info`, logs will be printed on the coordinator when using `ParameterServerStrategy`, and logs will be printed on the STDOUT on worker0 when using TPUs.\n", + "\n", + "tf.keras.Model\n", + "- When using Sequential and Functional API models, if you want to print values, e.g., model inputs or intermediate features after some layers, you have following options.\n", + " 1. [Write a custom layer](https://www.tensorflow.org/guide/keras/custom_layers_and_models) that `tf.print` the inputs. \n", + " 2. Include the intermediate outputs you want to inspect in the model outputs.\n", + "- `tf.keras.layers.Lambda` layers have (de)serialization limitations. To avoid checkpoint loading issues, write a custom subclassed layer instead. Check the [API docs](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Lambda) for more details. \n", + "- You can't `tf.print` intermediate outputs in a `tf.keras.callbacks.LambdaCallback` if you don't have access to the actual values, but instead only to the symbolic Keras tensor objects.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aKazGTr1ZUMG" + }, + "source": [ + "Option 1: write a custom layer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8w4aY7wO0B4W" + }, + "outputs": [], + "source": [ + "class PrintLayer(tf.keras.layers.Layer):\n", + " def call(self, inputs):\n", + " tf.print(inputs)\n", + " return inputs\n", + "\n", + "def get_model():\n", + " inputs = tf.keras.layers.Input(shape=(1,))\n", + " out_1 = tf.keras.layers.Dense(4)(inputs)\n", + " out_2 = tf.keras.layers.Dense(1)(out_1)\n", + " # use custom layer to tf.print intermediate features\n", + " out_3 = PrintLayer()(out_2)\n", + " model = tf.keras.Model(inputs=inputs, outputs=out_3)\n", + " return model\n", + "\n", + "model = get_model()\n", + "model.compile(optimizer=\"adam\", loss=\"mse\")\n", + "model.fit([1, 2, 3], [0.0, 0.0, 1.0])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KNESOatq7iM9" + }, + "source": [ + "Option 2: include the intermediate outputs you want to inspect in the model outputs.\n", + "\n", + "Note that in such case, you may need some [customizations](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit) to use `Model.fit`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MiifvdLk7g9J" + }, + "outputs": [], + "source": [ + "def get_model():\n", + " inputs = tf.keras.layers.Input(shape=(1,))\n", + " out_1 = tf.keras.layers.Dense(4)(inputs)\n", + " out_2 = tf.keras.layers.Dense(1)(out_1)\n", + " # include intermediate values in model outputs\n", + " model = tf.keras.Model(\n", + " inputs=inputs,\n", + " outputs={\n", + " 'inputs': inputs,\n", + " 'out_1': out_1,\n", + " 'out_2': out_2})\n", + " return model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MvIKDZpHSLmQ" + }, + "source": [ + "### pdb\n", + "You can use [pdb](https://docs.python.org/3/library/pdb.html) both in terminal and Colab to inspect intermediate values for debugging.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Qu0n4O2umyT7" + }, + "source": [ + "### Visualize graph with TensorBoard\n", + "\n", + "You can [examine the TensorFlow graph with TensorBoard](https://www.tensorflow.org/tensorboard/graphs). TensorBoard is also [supported on colab](https://www.tensorflow.org/tensorboard/tensorboard_in_notebooks). TensorBoard is a great tool to visualize summaries. You can use it to compare learning rate, model weights, gradient scale, training/validation metrics, or even model intermediate outputs between TF1.x model and migrated TF2 model through the training process and seeing if the values look as expected." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vBnxB6_xzlnT" + }, + "source": [ + "### TensorFlow Profiler\n", + "\n", + "[TensorFlow Profiler](https://www.tensorflow.org/guide/profiler) can help you visualize the execution timeline on GPUs/TPUs. You can check out this [Colab Demo](https://www.tensorflow.org/tensorboard/tensorboard_profiling_keras) for its basic usage." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9wNmCSHBpiGM" + }, + "source": [ + "### MultiProcessRunner\n", + "[MultiProcessRunner](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/distribute/multi_process_runner.py#L108) is a useful tool when debugging with MultiWorkerMirroredStrategy and ParameterServerStrategy. You can take a look at [this concrete example](https://github.com/keras-team/keras/blob/master/keras/integration_test/mwms_multi_process_runner_test.py) for its usage.\n", + "\n", + "Specifically for the cases of these two strategies, you are recommended to 1) not only have unit tests to cover their flow, 2) but also to attempt to reproduce failures using it in unit test to avoid launch real distributed job every time when they attempt a fix." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "migration_debugging.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/migrate/model_mapping.ipynb b/site/en/guide/migrate/model_mapping.ipynb index 9970405c45e..2d4582839c0 100644 --- a/site/en/guide/migrate/model_mapping.ipynb +++ b/site/en/guide/migrate/model_mapping.ipynb @@ -123,8 +123,9 @@ }, "outputs": [], "source": [ - "# Install tf-nightly as the model mapping shim is available only in \n", - "# TensorFlow 2.7\n", + "# Install tf-nightly as the DeterministicRandomTestTool is available only in\n", + "# Tensorflow 2.8\n", + "\n", "!pip install -q tf-nightly" ] }, @@ -141,7 +142,6 @@ "import sys\n", "import numpy as np\n", "\n", - "from unittest import mock\n", "from contextlib import contextmanager" ] }, @@ -617,65 +617,71 @@ "source": [ "## Nesting `tf.Variable`s, `tf.Module`s, `tf.keras.layers` & `tf.keras.models` in decorated calls\n", "\n", - "Decorating your layer call in `tf.compat.v1.keras.utils.track_tf1_style_variables` will only add automatic implicit tracking of variables created (and reused) via `tf.compat.v1.get_variable`. It will not capture weights directly created by `tf.Variable` calls, such as those used by typical Keras layers and most `tf.Module`s. You still need to explicitly track these in the same way you would for any other Keras layer or `tf.Module`.\n", - "\n", - "If you need to embed `tf.Variable` calls, Keras layers/models, or `tf.Module`s in your decorators (either because you are following the incremental migration to Native TF2 described later in this guide, or because your TF1.x code partially consisted of Keras modules):\n", - "* Explicitly make sure that the variable/module/layer is only created once\n", - "* Explicitly attach them as instance attributes just as you would when defining a [typical module/layer](https://www.tensorflow.org/guide/intro_to_modules#defining_models_and_layers_in_tensorflow)\n", - "* Explicitly reuse the already-created object in follow-on calls\n", + "Decorating your layer call in `tf.compat.v1.keras.utils.track_tf1_style_variables` will only add automatic implicit tracking of variables created (and reused) via `tf.compat.v1.get_variable`. It will not capture weights directly created by `tf.Variable` calls, such as those used by typical Keras layers and most `tf.Module`s. This section describes how to handle these nested cases.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Azxza3bVOZlv" + }, + "source": [ + "### (Pre-existing usages) `tf.keras.layers` and `tf.keras.models`\n", "\n", - "This ensures that weights are not created new and are correctly resued. Additionally, this also ensures that existing weights and regularization losses get tracked.\n", + "For pre-existing usages of nested Keras layers and models, use `tf.compat.v1.keras.utils.get_or_create_layer`. This is only recommended for easing migration of existing TF1.x nested Keras usages; new code should use explicit attribute setting as described below for tf.Variables and tf.Modules.\n", "\n", - "Here is an example of how this could look:" + "To use `tf.compat.v1.keras.utils.get_or_create_layer`, wrap the code that constructs your nested model into a method, and pass it in to the method. Example:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "mrRPPoJ5ap5U" + "id": "LN15TcRgHKsq" }, "outputs": [], "source": [ - "class WrappedDenseLayer(tf.keras.layers.Layer):\n", + "class NestedModel(tf.keras.Model):\n", "\n", - " def __init__(self, units, **kwargs):\n", - " super().__init__(**kwargs)\n", + " def __init__(self, units, *args, **kwargs):\n", + " super().__init__(*args, **kwargs)\n", " self.units = units\n", - " self._dense_model = None\n", + "\n", + " def build_model(self):\n", + " inp = tf.keras.Input(shape=(5, 5))\n", + " dense_layer = tf.keras.layers.Dense(\n", + " 10, name=\"dense\", kernel_regularizer=\"l2\",\n", + " kernel_initializer=tf.compat.v1.ones_initializer())\n", + " model = tf.keras.Model(inputs=inp, outputs=dense_layer(inp))\n", + " return model\n", "\n", " @tf.compat.v1.keras.utils.track_tf1_style_variables\n", " def call(self, inputs):\n", - " # Create the nested tf.variable/module/layer/model\n", - " # only if it has not been created already\n", - " if not self._dense_model:\n", - " inp = tf.keras.Input(shape=inputs.shape)\n", - " dense_layer = tf.keras.layers.Dense(\n", - " self.units, name=\"dense\",\n", - " kernel_regularizer=\"l2\")\n", - " self._dense_model = tf.keras.Model(\n", - " inputs=inp, outputs=dense_layer(inp))\n", - " return self._dense_model(inputs)\n", - "\n", - "layer = WrappedDenseLayer(10)\n", + " # Get or create a nested model without assigning it as an explicit property\n", + " model = tf.compat.v1.keras.utils.get_or_create_layer(\n", + " \"dense_model\", self.build_model)\n", + " return model(inputs)\n", "\n", - "layer(tf.ones(shape=(5, 5)))" + "layer = NestedModel(10)\n", + "layer(tf.ones(shape=(5,5)))" ] }, { "cell_type": "markdown", "metadata": { - "id": "Lo9h6wc6bmEF" + "id": "DgsKlltPHI8z" }, "source": [ - "The weights are correctly tracked:" + "This method ensures that these nested layers are correctly reused and tracked by tensorflow. Note that the `@track_tf1_style_variables` decorator is still required on the appropriate method. The model builder method passed into `get_or_create_layer` (in this case, `self.build_model`), should take no arguments.\n", + "\n", + "Weights are tracked:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "Qt6USaTVbauM" + "id": "3zO5A78MJsqO" }, "outputs": [], "source": [ @@ -690,55 +696,46 @@ { "cell_type": "markdown", "metadata": { - "id": "oyH4lIcPb45r" + "id": "o3Xsi-JbKTuj" }, "source": [ - "As is the regularization loss (if present):" + "And regularization loss as well:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "N7cmuhRGbfFt" + "id": "mdK5RGm5KW5C" }, "outputs": [], "source": [ - "regularization_loss = tf.add_n(layer.losses)\n", - "regularization_loss" + "tf.add_n(layer.losses)" ] }, { "cell_type": "markdown", "metadata": { - "id": "FsTgnydkdezQ" + "id": "J_VRycQYJrXu" }, "source": [ - "### Guidance on variable names\n", - "\n", - "Explicit `tf.Variable` calls and Keras layers use a different layer name / variable name autogeneration mechanism than you may be used to from the combination of `get_variable` and `variable_scopes`. Although the shim will make your variable names match for variables created by `get_variable` even when going from TF1.x graphs to TF2 eager execution & `tf.function`, it cannot guarantee the same for the variable names generated for `tf.Variable` calls and Keras layers that you embed within your method decorators. It is even possible for multiple variables to share the same name in TF2 eager execution and `tf.function`.\n", + "### Incremental migration: `tf.Variables` and `tf.Modules`\n", "\n", - "You should take special care with this when following the sections on validating correctness and mapping TF1.x checkpoints later on in this guide." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mSFaHTCvhUso" - }, - "source": [ - "### Nesting layers/modules that use `@track_tf1_style_variables`\n", + "If you need to embed `tf.Variable` calls or `tf.Module`s in your decorated methods (for example, if you are following the incremental migration to non-legacy TF2 APIs described later in this guide), you still need to explicitly track these, with the following requirements:\n", + "* Explicitly make sure that the variable/module/layer is only created once\n", + "* Explicitly attach them as instance attributes just as you would when defining a [typical module or layer](https://www.tensorflow.org/guide/intro_to_modules#defining_models_and_layers_in_tensorflow)\n", + "* Explicitly reuse the already-created object in follow-on calls\n", "\n", - "If you are nesting one layer that uses the `@track_tf1_style_variables` decorator inside of another, you should treat it the same way you would treat any Keras layer or `tf.Module` that did not use `get_variable` to create its variables.\n", + "This ensures that weights are not created new each call and are correctly reused. Additionally, this also ensures that existing weights and regularization losses get tracked.\n", "\n", - "For example," + "Here is an example of how this could look:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "SI5V-1JLhTfW" + "id": "mrRPPoJ5ap5U" }, "outputs": [], "source": [ @@ -749,9 +746,9 @@ " self.units = units\n", "\n", " @tf.compat.v1.keras.utils.track_tf1_style_variables\n", - " def call(self, inputs):\n", + " def __call__(self, inputs):\n", " out = inputs\n", - " with tf.compat.v1.variable_scope(\"dense\"):\n", + " with tf.compat.v1.variable_scope(\"inner_dense\"):\n", " # The weights are created with a `regularizer`,\n", " # so the layer should track their regularization losses\n", " kernel = tf.compat.v1.get_variable(\n", @@ -785,29 +782,81 @@ "\n", "layer = WrappedDenseLayer(10)\n", "\n", - "layer(tf.ones(shape=(5, 5)))\n", + "layer(tf.ones(shape=(5, 5)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Lo9h6wc6bmEF" + }, + "source": [ + "Note that explicit tracking of the nested module is needed even though it is decorated with the `track_tf1_style_variables` decorator. This is because each module/layer with decorated methods has its own variable store associated with it. \n", + "\n", + "The weights are correctly tracked:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Qt6USaTVbauM" + }, + "outputs": [], + "source": [ + "assert len(layer.weights) == 6\n", + "weights = {x.name: x for x in layer.variables}\n", + "\n", + "assert set(weights.keys()) == {\"outer/inner_dense/bias:0\",\n", + " \"outer/inner_dense/kernel:0\",\n", + " \"outer/dense/bias:0\",\n", + " \"outer/dense/kernel:0\",\n", + " \"outer/dense_1/bias:0\",\n", + " \"outer/dense_1/kernel:0\"}\n", "\n", - "# Recursively track weights and regularization losses\n", - "layer.trainable_weights\n", + "layer.trainable_weights" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dHn-bJoNJw7l" + }, + "source": [ + "As well as regularization loss:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pq5GFtXjJyut" + }, + "outputs": [], + "source": [ "layer.losses" ] }, { "cell_type": "markdown", "metadata": { - "id": "DkEkLnGbipSS" + "id": "p7VKJj3JOCEk" }, "source": [ - "Notice that `variable_scope`s set in the outer layer may affect the naming of variables set in the nested layer, *but* `get_variable` will not share variables by name across the outer shim-based layer and the nested shim-based layer even if they have the same name, because the nested and outer layer utilize different internal variable stores." + "Note that if the `NestedLayer` were a non-Keras `tf.Module` instead, variables would still be tracked but regularization losses would not be automatically tracked, so you would have to explicitly track them separately." ] }, { "cell_type": "markdown", "metadata": { - "id": "PfbiY08UizLz" + "id": "FsTgnydkdezQ" }, "source": [ - "As mentioned previously, if you are using a shim-decorated `tf.Module` there is no `losses` property to recursively and automatically track the regularization loss of your nested layer, and you will have to track it separately." + "### Guidance on variable names\n", + "\n", + "Explicit `tf.Variable` calls and Keras layers use a different layer name / variable name autogeneration mechanism than you may be used to from the combination of `get_variable` and `variable_scopes`. Although the shim will make your variable names match for variables created by `get_variable` even when going from TF1.x graphs to TF2 eager execution & `tf.function`, it cannot guarantee the same for the variable names generated for `tf.Variable` calls and Keras layers that you embed within your method decorators. It is even possible for multiple variables to share the same name in TF2 eager execution and `tf.function`.\n", + "\n", + "You should take special care with this when following the sections on validating correctness and mapping TF1.x checkpoints later on in this guide." ] }, { @@ -986,69 +1035,7 @@ "id": "kzJF0H0sbce8" }, "source": [ - "Use the deterministic number generation test tool to verify that this incremental change leaves the model with the same behavior as before." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VRTg0bQlcPeP" - }, - "outputs": [], - "source": [ - "# import tensorflow.python.framework.random_seed as random_seed\n", - "seed_implementation = sys.modules[tf.compat.v1.get_seed.__module__]\n", - "\n", - "class DeterministicTestTool(object):\n", - " def __init__(self, seed: int = 42, mode='constant'):\n", - " \"\"\"Set mode to 'constant' or 'num_random_ops'. Defaults to 'constant'.\"\"\"\n", - " if mode not in {'constant', 'num_random_ops'}:\n", - " raise ValueError(\"Mode arg must be 'constant' or 'num_random_ops'. \" +\n", - " \"Got: {}\".format(mode))\n", - "\n", - " self._mode = mode\n", - " self._seed = seed\n", - " self.operation_seed = 0\n", - " self._observed_seeds = set()\n", - "\n", - " def scope(self):\n", - " tf.random.set_seed(self._seed)\n", - "\n", - " def _get_seed(_):\n", - " \"\"\"Wraps TF get_seed to make deterministic random generation easier.\n", - "\n", - " This makes a variable's initialization (and calls that involve random\n", - " number generation) depend only on how many random number generations\n", - " were used in the scope so far, rather than on how many unrelated\n", - " operations the graph contains.\n", - "\n", - " Returns:\n", - " Random seed tuple.\n", - " \"\"\"\n", - " op_seed = self.operation_seed\n", - " if self._mode == \"constant\":\n", - " tf.random.set_seed(op_seed)\n", - " else:\n", - " if op_seed in self._observed_seeds:\n", - " raise ValueError(\n", - " 'This `DeterministicTestTool` object is trying to re-use the ' +\n", - " 'already-used operation seed {}. '.format(op_seed) +\n", - " 'It cannot guarantee random numbers will match between eager ' +\n", - " 'and sessions when an operation seed is reused. ' +\n", - " 'You most likely set ' +\n", - " '`operation_seed` explicitly but used a value that caused the ' +\n", - " 'naturally-incrementing operation seed sequences to overlap ' +\n", - " 'with an already-used seed.')\n", - "\n", - " self._observed_seeds.add(op_seed)\n", - " self.operation_seed += 1\n", - "\n", - " return (self._seed, op_seed)\n", - "\n", - " # mock.patch internal symbols to modify the behavior of TF APIs relying on them\n", - "\n", - " return mock.patch.object(seed_implementation, 'get_seed', wraps=_get_seed)" + "Use the [`v1.keras.utils.DeterministicRandomTestTool`](https://www.tensorflow.org/api_docs/python/tf/compat/v1/keras/utils/DeterministicRandomTestTool) class to verify that this incremental change leaves the model with the same behavior as before." ] }, { @@ -1059,8 +1046,9 @@ }, "outputs": [], "source": [ - "random_tool = DeterministicTestTool(mode='num_random_ops')\n", + "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", "with random_tool.scope():\n", + " tf.keras.utils.set_random_seed(42)\n", " layer = CompatModel(10)\n", "\n", " inputs = tf.random.normal(shape=(10, 5, 5, 5))\n", @@ -1080,8 +1068,9 @@ }, "outputs": [], "source": [ - "random_tool = DeterministicTestTool(mode='num_random_ops')\n", + "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", "with random_tool.scope():\n", + " tf.keras.utils.set_random_seed(42)\n", " layer = PartiallyMigratedModel(10)\n", "\n", " inputs = tf.random.normal(shape=(10, 5, 5, 5))\n", @@ -1153,8 +1142,9 @@ }, "outputs": [], "source": [ - "random_tool = DeterministicTestTool(mode='num_random_ops')\n", + "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", "with random_tool.scope():\n", + " tf.keras.utils.set_random_seed(42)\n", " layer = NearlyFullyNativeModel(10)\n", "\n", " inputs = tf.random.normal(shape=(10, 5, 5, 5))\n", @@ -1226,8 +1216,9 @@ }, "outputs": [], "source": [ - "random_tool = DeterministicTestTool(mode='num_random_ops')\n", + "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", "with random_tool.scope():\n", + " tf.keras.utils.set_random_seed(42)\n", " layer = FullyNativeModel(10)\n", "\n", " inputs = tf.random.normal(shape=(10, 5, 5, 5))\n", @@ -1262,7 +1253,7 @@ "\n", "The above migration process to native TF2 APIs changed both the variable names (as Keras APIs produce very different weight names), and the object-oriented paths that point to different weights in the model. The impact of these changes is that they will have broken both any existing TF1-style name-based checkpoints or TF2-style object-oriented checkpoints.\n", "\n", - "However, in some cases, you might be able to take your original name-based checkpoint and find a mapping of the variables to their new names with approaches like the one detailed in the [Reusing TF1.x checkpoints guide](./reusing_checkpoints.ipynb).\n", + "However, in some cases, you might be able to take your original name-based checkpoint and find a mapping of the variables to their new names with approaches like the one detailed in the [Reusing TF1.x checkpoints guide](./migrating_checkpoints.ipynb).\n", "\n", "Some tips to making this feasible are as follows:\n", "- Variables still all have a `name` argument you can set.\n", @@ -1527,7 +1518,6 @@ "colab": { "collapsed_sections": [], "name": "model_mapping.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/migrate/multi_worker_cpu_gpu_training.ipynb b/site/en/guide/migrate/multi_worker_cpu_gpu_training.ipynb index 48cb8823262..8a95cb903d6 100644 --- a/site/en/guide/migrate/multi_worker_cpu_gpu_training.ipynb +++ b/site/en/guide/migrate/multi_worker_cpu_gpu_training.ipynb @@ -105,10 +105,8 @@ }, "outputs": [], "source": [ - "# Install tf-nightly as the notebook uses a dataset instance for `Model.fit`\n", - "# with `ParameterServerStrategy`, which depends on symbols in TF 2.7.\n", - "!pip uninstall -q -y tensorflow keras\n", - "!pip install -q tf-nightly\n", + "# The notebook uses a dataset instance for `Model.fit` with\n", + "# `ParameterServerStrategy`, which depends on symbols in TF 2.7.\n", "# Install a utility needed for this demonstration\n", "!pip install portpicker\n", "\n", @@ -371,7 +369,7 @@ "\n", "with strategy.scope():\n", " model = tf.keras.models.Sequential([tf.keras.layers.Dense(1)])\n", - " optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.05)\n", + " optimizer = tf.keras.optimizers.legacy.Adagrad(learning_rate=0.05)\n", " model.compile(optimizer, \"mse\")\n", "\n", "model.fit(dataset, epochs=5, steps_per_epoch=10)" @@ -466,7 +464,6 @@ "colab": { "collapsed_sections": [], "name": "multi_worker_cpu_gpu_training.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/migrate/saved_model.ipynb b/site/en/guide/migrate/saved_model.ipynb index f1e888ca272..e7e8ce8daa1 100644 --- a/site/en/guide/migrate/saved_model.ipynb +++ b/site/en/guide/migrate/saved_model.ipynb @@ -168,7 +168,7 @@ }, "outputs": [], "source": [ - "!saved_model_cli run --dir simple-save --tag_set serve \\\n", + "!saved_model_cli run --dir saved-model-builder --tag_set serve \\\n", " --signature_def serving_default --input_exprs input=10" ] }, @@ -303,7 +303,7 @@ "source": [ "### Save and export a SavedModel defined with tf.Module\n", "\n", - "To export your model in TensorFlow 2, you must define a `tf.Module` or a `tf.keras.Model` to hold all of your model's variables and functions. Then, you can call `tf.saved_model.save` to create a SavedModel. Refer to [Saving a custom model](../../guide/saved_model#saving_a_custom_model) in the [Using the SavedModel format](../../guide/saved_model) guide to learn more." + "To export your model in TensorFlow 2, you must define a `tf.Module` or a `tf.keras.Model` to hold all of your model's variables and functions. Then, you can call `tf.saved_model.save` to create a SavedModel. Refer to the _Saving a custom model_ section in the [Using the SavedModel format](../saved_model.ipynb) guide to learn more." ] }, { @@ -352,7 +352,11 @@ "source": [ "### Save and export a SavedModel defined with Keras\n", "\n", - "The Keras APIs for saving and exporting—`Mode.save` or `tf.keras.models.save_model`—can export a SavedModel from a `tf.keras.Model`. Check out the [Save and load Keras models](../..guide/keras/save_and_serialize) for more details." + "\n", + "Deprecated: For Keras objects, it's recommended to use the new high-level `.keras` format and `tf.keras.Model.export`, as demonstrated in the guide [here](https://www.tensorflow.org/guide/keras/save_and_serialize). The low-level SavedModel format continues to be supported for existing code.\n", + "\n", + "\n", + "The Keras APIs for saving and exporting—`Model.save` or `tf.keras.models.save_model`—can export a SavedModel from a `tf.keras.Model`. Check out the [Save and load Keras models](../..guide/keras/save_and_serialize) for more details." ] }, { @@ -395,7 +399,7 @@ "source": [ "## Loading a SavedModel\n", "\n", - "A SavedModel saved with any of the above APIs can be loaded using either TensorFlow 1 or TensorFlow APIs.\n", + "A SavedModel saved with any of the above APIs can be loaded using either TensorFlow 1 or TensorFlow 2 APIs.\n", "\n", "A TensorFlow 1 SavedModel can generally be used for inference when loaded into TensorFlow 2, but training (generating gradients) is only possible if the SavedModel contains *resource variables*. You can check the dtype of the variables—if the variable dtype contains \"_ref\", then it is a reference variable.\n", "\n", @@ -506,9 +510,12 @@ "source": [ "### TensorFlow 2: Load a model saved with Keras\n", "\n", + "Deprecated: For Keras objects, it's recommended to use the new high-level `.keras` format and `tf.keras.Model.export`, as demonstrated in the guide [here](https://www.tensorflow.org/guide/keras/save_and_serialize). The low-level SavedModel format continues to be supported for existing code.\n", + "\n", + "\n", "The Keras loading API—`tf.keras.models.load_model`—allows you to reload a saved model back into a Keras Model object. Note that this only allows you to load SavedModels saved with Keras (`Model.save` or `tf.keras.models.save_model`).\n", "\n", - "Models saved with `tf.saved_model.save` should be loaded with `tf.saved_model.load`. You can load a Keras model saved with `Model.save` using `tf.saved_model.load` but you will only get the TensorFlow graph. Refer to the `tf.keras.models.load_model` API docs and [Save and load Keras models](../../guide/keras/save_and_serialize#savedmodel_format) guide for details." + "Models saved with `tf.saved_model.save` should be loaded with `tf.saved_model.load`. You can load a Keras model saved with `Model.save` using `tf.saved_model.load` but you will only get the TensorFlow graph. Refer to the `tf.keras.models.load_model` API docs and [Save and load Keras models](https://www.tensorflow.org/guide/keras/save_and_serialize#savedmodel_format) guide for details." ] }, { @@ -655,7 +662,7 @@ "\n", "Signatures are the endpoints of a SavedModel—they tell the user how to run the model and what inputs are needed.\n", "\n", - "In TensorFlow 1, signatures are created by listing the input and output tensors. In TensorFlow 2, signatures are generated by passing in *concrete functions*. (Read more about TensorFlow functions in the [Introduction to graphs and tf.function](../guide/intro_to_graphs) guide.) In short, [a concrete function is generated](../guide/intro_to_graphs#polymorphism_one_function_many_graphs) from a `tf.function`:\n", + "In TensorFlow 1, signatures are created by listing the input and output tensors. In TensorFlow 2, signatures are generated by passing in *concrete functions*. (Read more about TensorFlow functions in the [Introduction to graphs and tf.function](../intro_to_graphs.ipynb) guide, particularly the _Polymorphism: one Function, many graphs_ section.) In short, a concrete function is generated from a `tf.function`:\n", "\n", "```python\n", "# Option 1: Specify an input signature.\n", @@ -748,7 +755,6 @@ "colab": { "collapsed_sections": [], "name": "saved_model.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/migrate/sessionrunhook_callback.ipynb b/site/en/guide/migrate/sessionrunhook_callback.ipynb index 60a7df4ed63..7e20a1bab05 100644 --- a/site/en/guide/migrate/sessionrunhook_callback.ipynb +++ b/site/en/guide/migrate/sessionrunhook_callback.ipynb @@ -69,7 +69,7 @@ "source": [ "In TensorFlow 1, to customize the behavior of training, you use `tf.estimator.SessionRunHook` with `tf.estimator.Estimator`. This guide demonstrates how to migrate from `SessionRunHook` to TensorFlow 2's custom callbacks with the `tf.keras.callbacks.Callback` API, which works with Keras `Model.fit` for training (as well as `Model.evaluate` and `Model.predict`). You will learn how to do this by implementing a `SessionRunHook` and a `Callback` task that measures examples per second during training.\n", "\n", - "Examples of callbacks are checkpoint saving (`tf.keras.callbacks.ModelCheckpoint`) and [TensorBoard](`tf.keras.callbacks.TensorBoard`) summary writing. Keras [callbacks](../../guide/keras/custom_callback.ipynb) are objects that are called at different points during training/evaluation/prediction in the built-in Keras `Model.fit`/`Model.evaluate`/`Model.predict` APIs. You can learn more about callbacks in the `tf.keras.callbacks.Callback` API docs, as well as the [Writing your own callbacks](../..guide/keras/custom_callback.ipynb/) and [Training and evaluation with the built-in methods](../../guide/keras/train_and_evaluate.ipynb) (the *Using callbacks* section) guides." + "Examples of callbacks are checkpoint saving (`tf.keras.callbacks.ModelCheckpoint`) and [TensorBoard](`tf.keras.callbacks.TensorBoard`) summary writing. Keras [callbacks](https://www.tensorflow.org/guide/keras/custom_callback) are objects that are called at different points during training/evaluation/prediction in the built-in Keras `Model.fit`/`Model.evaluate`/`Model.predict` APIs. You can learn more about callbacks in the `tf.keras.callbacks.Callback` API docs, as well as the [Writing your own callbacks](https://www.tensorflow.org/guide/keras/custom_callback.ipynb/) and [Training and evaluation with the built-in methods](https://www.tensorflow.org/guide/keras/train_and_evaluate) (the *Using callbacks* section) guides." ] }, { @@ -246,7 +246,7 @@ "\n", "- API docs: `tf.keras.callbacks.Callback`\n", "- Guide: [Writing your own callbacks](../..guide/keras/custom_callback.ipynb/)\n", - "- Guide: [Training and evaluation with the built-in methods](../../guide/keras/train_and_evaluate.ipynb) (the *Using callbacks* section)\n", + "- Guide: [Training and evaluation with the built-in methods](https://www.tensorflow.org/guide/keras/train_and_evaluate) (the *Using callbacks* section)\n", "\n", "You may also find the following migration-related resources useful:\n", "\n", diff --git a/site/en/guide/migrate/tensorboard.ipynb b/site/en/guide/migrate/tensorboard.ipynb index c8ec222b621..ea0cd72b47e 100644 --- a/site/en/guide/migrate/tensorboard.ipynb +++ b/site/en/guide/migrate/tensorboard.ipynb @@ -218,10 +218,10 @@ "\n", "def create_model():\n", " return tf.keras.models.Sequential([\n", - " tf.keras.layers.Flatten(input_shape=(28, 28)),\n", - " tf.keras.layers.Dense(512, activation='relu'),\n", - " tf.keras.layers.Dropout(0.2),\n", - " tf.keras.layers.Dense(10, activation='softmax')\n", + " tf.keras.layers.Flatten(input_shape=(28, 28), name='layers_flatten'),\n", + " tf.keras.layers.Dense(512, activation='relu', name='layers_dense'),\n", + " tf.keras.layers.Dropout(0.2, name='layers_dropout'),\n", + " tf.keras.layers.Dense(10, activation='softmax', name='layers_dense_2')\n", " ])\n", "\n", "model = create_model()\n", @@ -279,7 +279,6 @@ "colab": { "collapsed_sections": [], "name": "tensorboard.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/migrate/tf1_vs_tf2.ipynb b/site/en/guide/migrate/tf1_vs_tf2.ipynb index 3fc69a4c7f9..60791f72680 100644 --- a/site/en/guide/migrate/tf1_vs_tf2.ipynb +++ b/site/en/guide/migrate/tf1_vs_tf2.ipynb @@ -131,7 +131,7 @@ "\n", "### Other API changes\n", "\n", - "* TF2 features significant improvements to the device placement algorithms which renders the usage of `tf.colocate_with` unnecessary. If removing it causes a performance degrade [please file a bug](https://github.com/tensorflow/tensorflow/issues).\n", + "* TF2 features significant improvements to the device placement algorithms which renders the usage of `tf.colocate_with` unnecessary. If removing it causes a performance degradation, [please file a bug](https://github.com/tensorflow/tensorflow/issues).\n", "\n", "* Replace all usage of `tf.v1.ConfigProto` with equivalent functions from `tf.config`." ] @@ -159,7 +159,7 @@ "source": [ "## No more globals\n", "\n", - "TF1.x relied heavily on implicit global namespaces and collections. When you called `tf.Variable`, it would be put into a collection in the default graph, and it would remain there, even if you lost track of the Python variable pointing to it. You could then recover that `tf.Variable`, but only if you knew the name that it had been created with. This was difficult to do if you were not in control of the variable's creation. As a result, all sorts of mechanisms proliferated to\n", + "TF1.x relied heavily on implicit global namespaces and collections. When you call `tf.Variable`, it would be put into a collection in the default graph, and it would remain there, even if you lost track of the Python variable pointing to it. You could then recover that `tf.Variable`, but only if you knew the name that it had been created with. This was difficult to do if you were not in control of the variable's creation. As a result, all sorts of mechanisms proliferated to\n", "attempt to help you find your variables again, and for frameworks to find\n", "user-created variables. Some of these include: variable scopes, global collections, helper methods like `tf.get_global_step` and `tf.global_variables_initializer`, optimizers implicitly\n", "computing gradients over all trainable variables, and so on. TF2 eliminates all of these mechanisms ([Variables 2.0 RFC](https://github.com/tensorflow/community/pull/11)) in favor of the default mechanism - you keep track of your variables. If you lose track of a `tf.Variable`, it gets garbage collected.\n", @@ -241,15 +241,223 @@ "#### Pattern 1: Python object manipulation and variable creation intended to be done only once get run multiple times\n", "\n", "\n", - "In TF1.x programs that rely on graphs and sessions, the expectation is usually that all Python logic in your program will only run once. However, with eager execution and `tf.function` it is fair to expect that your Python logic will be run at least once, but possibly more times (either multiple times eagerly, or multiple times across different `tf.function` traces). Any Python logic within a `tf.function` will be traced at least twice due to how `tf.function` works. Refer to the `tf.function` [guide](https://www.tensorflow.org/guide/function) for more details.\n", + "In TF1.x programs that rely on graphs and sessions, the expectation is usually that all Python logic in your program will only run once. However, with eager execution and `tf.function` it is fair to expect that your Python logic will be run at least once, but possibly more times (either multiple times eagerly, or multiple times across different `tf.function` traces). Sometimes, `tf.function` will even trace twice on the same input, causing unexpected behaviors (see Example 1 and 2). Refer to the `tf.function` [guide](https://www.tensorflow.org/guide/function) for more details.\n", "\n", "Note: This pattern usually causes your code to silently misbehave when executing eagerly without `tf.function`s, but generally raises an `InaccessibleTensorError` or a `ValueError` when attempting to wrap the problematic code inside of a `tf.function`. To discover and debug this issue, it is recommended you wrap your code with `tf.function` early on, and use [pdb](https://docs.python.org/3/library/pdb.html) or interactive debugging to identify the source of the `InaccessibleTensorError`.\n", "\n", "**Example 1: Variable creation**\n", "\n", - "TF1.x code often creates variables without checking that they have already been made (because it runs the Python logic only once at all times). Naively mapping this code to eager execution may cause it to accidentally create new variables in each training step.\n", + "Consider the example below, where the function creates a variable when called:\n", "\n", - "**Example 2: Manipulating a global Python list**\n", + "```python\n", + "def f():\n", + " v = tf.Variable(1.0)\n", + " return v\n", + "\n", + "with tf.Graph().as_default():\n", + " with tf.compat.v1.Session() as sess:\n", + " res = f()\n", + " sess.run(tf.compat.v1.global_variables_initializer())\n", + " sess.run(res)\n", + "```\n", + "\n", + "However, naively wrapping the above function that contains variable creation with `tf.function` is not allowed. `tf.function` only supports [singleton variable creations on the first call](https://www.tensorflow.org/guide/function#creating_tfvariables). To enforce this, when tf.function detects variable creation in the first call, it will attempt to trace again and raise an error if there is variable creation in the second trace.\n", + "\n", + "```python\n", + "@tf.function\n", + "def f():\n", + " print(\"trace\") # This will print twice because the python body is run twice\n", + " v = tf.Variable(1.0)\n", + " return v\n", + "\n", + "try:\n", + " f()\n", + "except ValueError as e:\n", + " print(e)\n", + "```\n", + "\n", + "A workaround is caching and reusing the variable after it is created in the first call.\n", + "\n", + "```python\n", + "class Model(tf.Module):\n", + " def __init__(self):\n", + " self.v = None\n", + "\n", + " @tf.function\n", + " def __call__(self):\n", + " print(\"trace\") # This will print twice because the python body is run twice\n", + " if self.v is None:\n", + " self.v = tf.Variable(0)\n", + " return self.v\n", + "\n", + "m = Model()\n", + "m()\n", + "```\n", + "\n", + "**Example 2: Out-of-scope Tensors due to `tf.function` retracing**\n", + "\n", + "As demonstrated in Example 1, `tf.function` will retrace when it detects Variable creation in the first call. This can cause extra confusion, because the two tracings will create two graphs. When the second graph from retracing attempts to access a Tensor from the graph generated during the first tracing, Tensorflow will raise an error complaining that the Tensor is out of scope. To demonstrate the scenario, the code below creates a dataset on the first `tf.function` call. This would run as expected.\n", + "\n", + "```python\n", + "class Model(tf.Module):\n", + " def __init__(self):\n", + " self.dataset = None\n", + "\n", + " @tf.function\n", + " def __call__(self):\n", + " print(\"trace\") # This will print once: only traced once\n", + " if self.dataset is None:\n", + " self.dataset = tf.data.Dataset.from_tensors([1, 2, 3])\n", + " it = iter(self.dataset)\n", + " return next(it)\n", + "\n", + "m = Model()\n", + "m()\n", + "```\n", + "\n", + "However, if we also attempt to create a variable on the first `tf.function` call, the code will raise an error complaining that the dataset is out of scope. This is because the dataset is in the first graph, while the second graph is also attempting to access it.\n", + "\n", + "```python\n", + "class Model(tf.Module):\n", + " def __init__(self):\n", + " self.v = None\n", + " self.dataset = None\n", + "\n", + " @tf.function\n", + " def __call__(self):\n", + " print(\"trace\") # This will print twice because the python body is run twice\n", + " if self.v is None:\n", + " self.v = tf.Variable(0)\n", + " if self.dataset is None:\n", + " self.dataset = tf.data.Dataset.from_tensors([1, 2, 3])\n", + " it = iter(self.dataset)\n", + " return [self.v, next(it)]\n", + "\n", + "m = Model()\n", + "try:\n", + " m()\n", + "except TypeError as e:\n", + " print(e) # is out of scope and cannot be used here.\n", + "```\n", + "\n", + "The most straightforward solution is ensuring that the variable creation and dataset creation are both outside of the `tf.function` call. For example:\n", + "\n", + "```python\n", + "class Model(tf.Module):\n", + " def __init__(self):\n", + " self.v = None\n", + " self.dataset = None\n", + "\n", + " def initialize(self):\n", + " if self.dataset is None:\n", + " self.dataset = tf.data.Dataset.from_tensors([1, 2, 3])\n", + " if self.v is None:\n", + " self.v = tf.Variable(0)\n", + "\n", + " @tf.function\n", + " def __call__(self):\n", + " it = iter(self.dataset)\n", + " return [self.v, next(it)]\n", + "\n", + "m = Model()\n", + "m.initialize()\n", + "m()\n", + "```\n", + "\n", + "However, sometimes it's not avoidable to create variables in `tf.function` (such as slot variables in some [TF keras optimizers](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Optimizer#slots)). Still, we can simply move the dataset creation outside of the `tf.function` call. The reason that we can rely on this is because `tf.function` will receive the dataset as an implicit input and both graphs can access it properly.\n", + "\n", + "```python\n", + "class Model(tf.Module):\n", + " def __init__(self):\n", + " self.v = None\n", + " self.dataset = None\n", + "\n", + " def initialize(self):\n", + " if self.dataset is None:\n", + " self.dataset = tf.data.Dataset.from_tensors([1, 2, 3])\n", + "\n", + " @tf.function\n", + " def __call__(self):\n", + " if self.v is None:\n", + " self.v = tf.Variable(0)\n", + " it = iter(self.dataset)\n", + " return [self.v, next(it)]\n", + "\n", + "m = Model()\n", + "m.initialize()\n", + "m()\n", + "```\n", + "\n", + "**Example 3: Unexpected Tensorflow object re-creations due to dict usage**\n", + "\n", + "`tf.function` has very poor support for python side effects such as appending to a list, or checking/adding to a dictionary. More details are in [\"Better performance with tf.function\"](https://www.tensorflow.org/guide/function#executing_python_side_effects). In the example below, the code uses dictionaries to cache datasets and iterators. For the same key, each call to the model will return the same iterator of the dataset.\n", + "\n", + "```python\n", + "class Model(tf.Module):\n", + " def __init__(self):\n", + " self.datasets = {}\n", + " self.iterators = {}\n", + "\n", + " def __call__(self, key):\n", + " if key not in self.datasets:\n", + " self.datasets[key] = tf.compat.v1.data.Dataset.from_tensor_slices([1, 2, 3])\n", + " self.iterators[key] = self.datasets[key].make_initializable_iterator()\n", + " return self.iterators[key]\n", + "\n", + "with tf.Graph().as_default():\n", + " with tf.compat.v1.Session() as sess:\n", + " m = Model()\n", + " it = m('a')\n", + " sess.run(it.initializer)\n", + " for _ in range(3):\n", + " print(sess.run(it.get_next())) # prints 1, 2, 3\n", + "```\n", + "\n", + "However, the pattern above will not work as expected in `tf.function`. During tracing, `tf.function` will ignore the python side effect of addition to the dictionaries. Instead, it only remembers the creation of a new dataset and iterator. As a result, each call to the model will always return a new iterator. This issue is hard to notice unless the numerical results or performance are significant enough. Hence, we recommend users to think about the code carefully before wrapping `tf.function` naively onto the python code.\n", + "\n", + "```python\n", + "class Model(tf.Module):\n", + " def __init__(self):\n", + " self.datasets = {}\n", + " self.iterators = {}\n", + "\n", + " @tf.function\n", + " def __call__(self, key):\n", + " if key not in self.datasets:\n", + " self.datasets[key] = tf.data.Dataset.from_tensor_slices([1, 2, 3])\n", + " self.iterators[key] = iter(self.datasets[key])\n", + " return self.iterators[key]\n", + "\n", + "m = Model()\n", + "for _ in range(3):\n", + " print(next(m('a'))) # prints 1, 1, 1\n", + "```\n", + "\n", + "We can use [`tf.init_scope`](https://www.tensorflow.org/api_docs/python/tf/init_scope) to lift the dataset and iterator creation outside of the graph, to achieve the expected behavior:\n", + "\n", + "```python\n", + "class Model(tf.Module):\n", + " def __init__(self):\n", + " self.datasets = {}\n", + " self.iterators = {}\n", + "\n", + " @tf.function\n", + " def __call__(self, key):\n", + " if key not in self.datasets:\n", + " # Lifts ops out of function-building graphs\n", + " with tf.init_scope():\n", + " self.datasets[key] = tf.data.Dataset.from_tensor_slices([1, 2, 3])\n", + " self.iterators[key] = iter(self.datasets[key])\n", + " return self.iterators[key]\n", + "\n", + "m = Model()\n", + "for _ in range(3):\n", + " print(next(m('a'))) # prints 1, 2, 3\n", + "```\n", + "\n", + "The general rule of thumb is to avoid relying on Python side effects in your logic and only use them to debug your traces.\n", + "\n", + "**Example 4: Manipulating a global Python list**\n", "\n", "The following TF1.x code uses a global list of losses that it uses to only maintain the list of losses generated by the current training step. Note that the Python logic that appends losses to the list will only be called once regardless of how many training steps the session is run for.\n", "\n", @@ -473,11 +681,11 @@ "source": [ "### `ResourceVariables` instead of `ReferenceVariables`\n", "\n", - "`ResourceVariables` have stronger read-write consistency guarantees than `ReferenceVariables`. This leads to more predictable, easier-to-reason about semantics about whether or not you will observe the result of a previous write when using your variables. This change is extremely unlikely to cause existing code to raise errors or to break silently.\n", + "`ResourceVariables` have stronger read-write consistency guarantees than `ReferenceVariables`. This leads to more predictable, easier-to-reason semantics about whether or not you will observe the result of a previous write when using your variables. This change is extremely unlikely to cause existing code to raise errors or to break silently.\n", "\n", "However, it is ***possible though unlikely*** that these stronger consistency guarantees may increase the memory usage of your specific program. Please file an [issue](https://github.com/tensorflow/tensorflow/issues) if you find this to be the case. Additionally, if you have unit tests relying on exact string comparisons against the operator names in a graph corresponding to variable reads, be aware that enabling resource variables may slightly change the name of these operators.\n", "\n", - "To isolate the impact of this behavior change on your code, if eager execution is disabled you can use `tf.compat.v1.disable_resource_variables()` and `tf.compat.v1.enable_resource_variables()` to globally disable or enable this behavior change. `ResourceVariables` will always be used if eager execution is enabled. You can also \n" + "To isolate the impact of this behavior change on your code, if eager execution is disabled you can use `tf.compat.v1.disable_resource_variables()` and `tf.compat.v1.enable_resource_variables()` to globally disable or enable this behavior change. `ResourceVariables` will always be used if eager execution is enabled.\n" ] }, { @@ -819,28 +1027,12 @@ }, "source": [ "### Hashing tensors and variables\n", - "With TF1.x behaviors you used to be able to directly add variables and tensors to data structures that require hashing, such as `set` and `dict` keys." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "fxi7EgKdBpd5" - }, - "outputs": [], - "source": [ - "tf.compat.v1.disable_tensor_equality()\n", + "With TF1.x behaviors you used to be able to directly add variables and tensors to data structures that require hashing, such as `set` and `dict` keys.\n", + "```python\n", "x = tf.Variable(0.0)\n", - "set([x, tf.constant(2.0)])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "LFgjR-PSajVQ" - }, - "source": [ + "set([x, tf.constant(2.0)])\n", + "```\n", + "\n", "However, in TF2 with tensor equality enabled, tensors and variables are made unhashable due to the `==` and `!=` operator semantics changing to value equality checks." ] }, @@ -925,11 +1117,8 @@ ], "metadata": { "colab": { - "collapsed_sections": [ - "Tce3stUlHN0L" - ], + "collapsed_sections": [], "name": "tf1_vs_tf2.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/migrate/tflite.ipynb b/site/en/guide/migrate/tflite.ipynb index fd3695e5434..0426655ee1a 100644 --- a/site/en/guide/migrate/tflite.ipynb +++ b/site/en/guide/migrate/tflite.ipynb @@ -67,14 +67,14 @@ "id": "meUTrR4I6m1C" }, "source": [ - "[TensorFlow Lite](https://www.tensorflow.org/lite/guide) (TFLite) is a set of tools that helps developers run ML inference on-device (mobile, embedded, and IoT devices). The [TFLite converter](https://www.tensorflow.org/lite/convert) is one such tool that converts exisiting TF models into an optimized TFLite model format that can be efficiently run on-device.\n", + "[TensorFlow Lite](https://www.tensorflow.org/lite/guide) (TFLite) is a set of tools that helps developers run ML inference on-device (mobile, embedded, and IoT devices). The [TFLite converter](https://www.tensorflow.org/lite/convert) is one such tool that converts existing TF models into an optimized TFLite model format that can be efficiently run on-device.\n", "\n", "In this doc, you'll learn what changes you need to make to your TF to TFLite conversion code, followed by a few examples that do the same.\n", "\n", "\n", "## Changes to your TF to TFLite conversion code\n", "\n", - "* If you're using a legacy TF1 model format (Keras file, frozen GraphDef, checkpoints, tf.Session, etc), update it to TF1/TF2 SavedModel and use the TF2 converter API `tf.lite.TFLiteConverter.from_saved_model(...)` to convert it to a TFLite model (refer to Table 1).\n", + "* If you're using a legacy TF1 model format (such as Keras file, frozen GraphDef, checkpoints, tf.Session), update it to TF1/TF2 SavedModel and use the TF2 converter API `tf.lite.TFLiteConverter.from_saved_model(...)` to convert it to a TFLite model (refer to Table 1).\n", "\n", "* Update the converter API flags (refer to Table 2).\n", "* Remove legacy APIs such as `tf.lite.constants`. (eg: Replace `tf.lite.constants.INT8` with `tf.int8`)\n", @@ -125,7 +125,7 @@ "source": [ "## Examples\n", "\n", - "You'll now walkthrough some examples to convert legacy TF1 models to TF1/TF2 SavedModels and then convert it to TF2 TFLite models.\n", + "You'll now walk through some examples to convert legacy TF1 models to TF1/TF2 SavedModels and then convert them to TF2 TFLite models.\n", "\n", "### Setup\n", "\n", @@ -400,7 +400,7 @@ "with tf.Graph().as_default() as g:\n", " tf.graph_util.import_graph_def(gdef, name=\"\")\n", "\n", - "# Lookup the input and output tensors.\n", + "# Look up the input and output tensors.\n", "input_tensor = g.get_tensor_by_name('input:0') \n", "output_tensor = g.get_tensor_by_name('MobilenetV1/Predictions/Softmax:0')\n", "\n", @@ -436,7 +436,6 @@ "colab": { "collapsed_sections": [], "name": "tflite.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/migrate/tpu_embedding.ipynb b/site/en/guide/migrate/tpu_embedding.ipynb index e912e868df2..44105ea984b 100644 --- a/site/en/guide/migrate/tpu_embedding.ipynb +++ b/site/en/guide/migrate/tpu_embedding.ipynb @@ -476,7 +476,10 @@ "source": [ "strategy = tf.distribute.TPUStrategy(cluster_resolver)\n", "with strategy.scope():\n", - " optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.05)\n", + " if hasattr(tf.keras.optimizers, \"legacy\"):\n", + " optimizer = tf.keras.optimizers.legacy.Adagrad(learning_rate=0.05)\n", + " else:\n", + " optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.05)\n", " dense_input = tf.keras.Input(shape=(2,), dtype=tf.float32, batch_size=global_batch_size)\n", " sparse_input = tf.keras.Input(shape=(), dtype=tf.int32, batch_size=global_batch_size)\n", " embedded_input = tfrs.layers.embedding.TPUEmbedding(\n", @@ -550,7 +553,7 @@ "source": [ "Learn more about setting up TPU-specific embeddings in the API docs:\n", "\n", - "- `tfrs.layers.embedding.TPUEmbedding`: particularly about feature and table configuration, setting the optimizer, creating a model (using the Keras [functional](../../guide/keras/functional.ipynb) API or via [subclassing](../..guide/keras/custom_layers_and_models.ipynb) `tf.keras.Model`), training/evaluation, and model serving with `tf.saved_model`\n", + "- `tfrs.layers.embedding.TPUEmbedding`: particularly about feature and table configuration, setting the optimizer, creating a model (using the Keras [functional](https://www.tensorflow.org/guide/keras/functional) API or via [subclassing](../..guide/keras/custom_layers_and_models.ipynb) `tf.keras.Model`), training/evaluation, and model serving with `tf.saved_model`\n", "- `tf.tpu.experimental.embedding.TableConfig`\n", "- `tf.tpu.experimental.embedding.FeatureConfig`\n", "\n", @@ -563,7 +566,7 @@ "To learn more about customizing your training, refer to:\n", "\n", "- Guide: [Customize what happens in Model.fit](../..guide/keras/customizing_what_happens_in_fit.ipynb)\n", - "- Guide: [Writing a training loop from scratch](../../guide/keras/writing_a_training_loop_from_scratch.ipynb)\n", + "- Guide: [Writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch)\n", "\n", "TPUs—Google's specialized ASICs for machine learning—are available through [Google Colab](https://colab.research.google.com/), the [TPU Research Cloud](https://sites.research.google/trc/), and [Cloud TPU](https://cloud.google.com/tpu)." ] diff --git a/site/en/guide/migrate/tpu_estimator.ipynb b/site/en/guide/migrate/tpu_estimator.ipynb index 6aa05aed84a..9cc35dc8bae 100644 --- a/site/en/guide/migrate/tpu_estimator.ipynb +++ b/site/en/guide/migrate/tpu_estimator.ipynb @@ -369,7 +369,7 @@ "\n", "To learn more about customizing your training, refer to:\n", "- Guide: [Customize what happens in Model.fit](../..guide/keras/customizing_what_happens_in_fit.ipynb)\n", - "- Guide: [Writing a training loop from scratch](../../guide/keras/writing_a_training_loop_from_scratch.ipynb)\n", + "- Guide: [Writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch)\n", "\n", "TPUs—Google's specialized ASICs for machine learning—are available through [Google Colab](https://colab.research.google.com/), the [TPU Research Cloud](https://sites.research.google/trc/), and [Cloud TPU](https://cloud.google.com/tpu)." ] diff --git a/site/en/guide/migrate/upgrade.ipynb b/site/en/guide/migrate/upgrade.ipynb index c29b8edec88..7223a8c8c81 100644 --- a/site/en/guide/migrate/upgrade.ipynb +++ b/site/en/guide/migrate/upgrade.ipynb @@ -95,7 +95,7 @@ "source": [ "## Compatibility modules\n", "\n", - "Certain API symbols can not be upgraded simply by using a string replacement. Those that cannot be automatically upgraded will be mapped to their locations in the `compat.v1` module. This module replaces TF 1.x symbols like `tf.foo` with the equivalent `tf.compat.v1.foo` reference. If you are already using `compat.v1` APIs by importing TF via `import tensorflow.compat.v1 as tf`, the `tf_upgrade_v2` script will attempt to convert these usages to the non-compat APIs where possible. Note that while some `compat.v1` APIs are compatible with TF2.x behaviors, many are not. So, we recommend that you manually proofread replacements and migrate them to new APIs in the `tf.*` namespace instead of `tf.compat.v1` namespace as quickly as possible.\n", + "Certain API symbols can not be upgraded simply by using a string replacement. Those that cannot be automatically upgraded will be mapped to their locations in the `compat.v1` module. This module replaces TF 1.x symbols like `tf.foo` with the equivalent `tf.compat.v1.foo` reference. If you are already using `compat.v1` APIs by importing TF via `import tensorflow.compat.v1 as tf`, the `tf_upgrade_v2` script will attempt to convert these usages to the non-compat APIs where possible. Note that while some `compat.v1` APIs are compatible with TF2.x behaviors, many are not. Therefore, it's recommended to manually proofread replacements and migrate them to new APIs in the `tf.*` namespace instead of `tf.compat.v1` namespace as quickly as possible.\n", "\n", "Because of TensorFlow 2.x module deprecations (for example, `tf.flags` and `tf.contrib`), some changes can not be worked around by switching to `compat.v1`. Upgrading this code may require using an additional library (for example, [`absl.flags`](https://github.com/abseil/abseil-py)) or switching to a package in [tensorflow/addons](http://www.github.com/tensorflow/addons).\n" ] @@ -108,7 +108,7 @@ "source": [ "## Recommended upgrade process\n", "\n", - "The rest of this guide demonstrates how to use the symbol-rewriting script. While the script is easy to use, it is strongly recomended that you use the script as part of the following process: \n", + "The rest of this guide demonstrates how to use the symbol-rewriting script. While the script is easy to use, it is strongly recommended that you use the script as part of the following process: \n", "\n", "1. **Unit Test**: Ensure that the code you’re upgrading has a unit test suite with reasonable coverage. This is Python code, so the language won’t protect you from many classes of mistakes. Also ensure that any dependency you have has already been upgraded to be compatible with TensorFlow 2.x.\n", "\n", @@ -574,13 +574,13 @@ "source": [ "## Caveats\n", "\n", - "- Do not update parts of your code manually before running this script. In particular, functions that have had reordered arguments like `tf.argmax` or `tf.batch_to_space` cause the script to incorrectly add keyword arguments that mismap your existing code.\n", + "- Do not update parts of your code manually before running this script. In particular, functions that have had reordered arguments like `tf.math.argmax` or `tf.batch_to_space` cause the script to incorrectly add keyword arguments that mismap your existing code.\n", "\n", "- The script assumes that `tensorflow` is imported using `import tensorflow as tf`, or `import tensorflow.compat.v1 as tf`.\n", "\n", "- This script does not reorder arguments. Instead, the script adds keyword arguments to functions that have their arguments reordered.\n", "\n", - "- Check out [tf2up.ml](http://tf2up.ml) for a convenient tool to upgrade Jupyter\n", + "- Check out [tf2up.ml](https://github.com/lc0/tf2up) for a convenient tool to upgrade Jupyter\n", " notebooks and Python files in a GitHub repository.\n", "\n", "To report upgrade script bugs or make feature requests, please file an issue on [GitHub](https://github.com/tensorflow/tensorflow/issues)." @@ -600,4 +600,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/site/en/guide/migrate/validate_correctness.ipynb b/site/en/guide/migrate/validate_correctness.ipynb index 803965a2252..a0555cdd55c 100644 --- a/site/en/guide/migrate/validate_correctness.ipynb +++ b/site/en/guide/migrate/validate_correctness.ipynb @@ -105,8 +105,8 @@ }, "outputs": [], "source": [ - "# Install tf-nightly as the model mapping shim is available only in\n", - "# TensorFlow 2.7\n", + "# Install tf-nightly as the DeterministicRandomTestTool is available only in\n", + "# Tensorflow 2.8\n", "!pip install -q tf-nightly" ] }, @@ -136,7 +136,6 @@ "import tf_slim as slim\n", "import sys\n", "\n", - "from unittest import mock\n", "\n", "from contextlib import contextmanager" ] @@ -686,7 +685,7 @@ "id": "BQbb8Hyk5YVi" }, "source": [ - "The following `DeterministicTestTool` object provides a context manager `scope()` that can make stateful random operations use the same seed across both TF1 graphs/sessions and eager execution,\n", + "The following [`v1.keras.utils.DeterministicRandomTestTool`](https://www.tensorflow.org/api_docs/python/tf/compat/v1/keras/utils/DeterministicRandomTestTool) class provides a context manager `scope()` that can make stateful random operations use the same seed across both TF1 graphs/sessions and eager execution.\n", "\n", "The tool provides two testing modes: \n", "1. `constant` which uses the same seed for every single operation no matter how many times it has been called and,\n", @@ -695,67 +694,6 @@ "This applies both to the stateful random operations used for creating and initializing variables, and to the stateful random operations used in computation (such as for dropout layers)." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VRTg0bQlcPeP" - }, - "outputs": [], - "source": [ - "seed_implementation = sys.modules[tf.compat.v1.get_seed.__module__]\n", - "\n", - "class DeterministicTestTool(object):\n", - " def __init__(self, seed: int = 42, mode='constant'):\n", - " \"\"\"Set mode to 'constant' or 'num_random_ops'. Defaults to 'constant'.\"\"\"\n", - " if mode not in {'constant', 'num_random_ops'}:\n", - " raise ValueError(\"Mode arg must be 'constant' or 'num_random_ops'. \" +\n", - " \"Got: {}\".format(mode))\n", - "\n", - " self._mode = mode\n", - " self._seed = seed\n", - " self.operation_seed = 0\n", - " self._observed_seeds = set()\n", - "\n", - " def scope(self):\n", - " tf.random.set_seed(self._seed)\n", - "\n", - " def _get_seed(_):\n", - " \"\"\"Wraps TF get_seed to make deterministic random generation easier.\n", - "\n", - " This makes a variable's initialization (and calls that involve random\n", - " number generation) depend only on how many random number generations\n", - " were used in the scope so far, rather than on how many unrelated\n", - " operations the graph contains.\n", - "\n", - " Returns:\n", - " Random seed tuple.\n", - " \"\"\"\n", - " op_seed = self.operation_seed\n", - " if self._mode == \"constant\":\n", - " tf.random.set_seed(op_seed)\n", - " else:\n", - " if op_seed in self._observed_seeds:\n", - " raise ValueError(\n", - " 'This `DeterministicTestTool` object is trying to re-use the ' +\n", - " 'already-used operation seed {}. '.format(op_seed) +\n", - " 'It cannot guarantee random numbers will match between eager ' +\n", - " 'and sessions when an operation seed is reused. ' +\n", - " 'You most likely set ' +\n", - " '`operation_seed` explicitly but used a value that caused the ' +\n", - " 'naturally-incrementing operation seed sequences to overlap ' +\n", - " 'with an already-used seed.')\n", - "\n", - " self._observed_seeds.add(op_seed)\n", - " self.operation_seed += 1\n", - "\n", - " return (self._seed, op_seed)\n", - "\n", - " # mock.patch internal symbols to modify the behavior of TF APIs relying on them\n", - "\n", - " return mock.patch.object(seed_implementation, 'get_seed', wraps=_get_seed)" - ] - }, { "cell_type": "markdown", "metadata": { @@ -773,7 +711,7 @@ }, "outputs": [], "source": [ - "random_tool = DeterministicTestTool()\n", + "random_tool = v1.keras.utils.DeterministicRandomTestTool()\n", "with random_tool.scope():\n", " graph = tf.Graph()\n", " with graph.as_default(), tf.compat.v1.Session(graph=graph) as sess:\n", @@ -796,7 +734,7 @@ }, "outputs": [], "source": [ - "random_tool = DeterministicTestTool()\n", + "random_tool = v1.keras.utils.DeterministicRandomTestTool()\n", "with random_tool.scope():\n", " a = tf.random.uniform(shape=(3,1))\n", " a = a * 3\n", @@ -860,7 +798,7 @@ }, "outputs": [], "source": [ - "random_tool = DeterministicTestTool(mode='num_random_ops')\n", + "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", "with random_tool.scope():\n", " graph = tf.Graph()\n", " with graph.as_default(), tf.compat.v1.Session(graph=graph) as sess:\n", @@ -883,7 +821,7 @@ }, "outputs": [], "source": [ - "random_tool = DeterministicTestTool(mode='num_random_ops')\n", + "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", "with random_tool.scope():\n", " a = tf.random.uniform(shape=(3,1))\n", " a = a * 3\n", @@ -940,14 +878,14 @@ }, "outputs": [], "source": [ - "random_tool = DeterministicTestTool(mode='num_random_ops')\n", + "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", "with random_tool.scope():\n", " a = tf.random.uniform(shape=(3,1))\n", " a = a * 3\n", " b = tf.random.uniform(shape=(3,3))\n", " b = b * 3\n", "\n", - "random_tool = DeterministicTestTool(mode='num_random_ops')\n", + "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", "with random_tool.scope():\n", " b_prime = tf.random.uniform(shape=(3,3))\n", " b_prime = b_prime * 3\n", @@ -964,7 +902,7 @@ "id": "nHhOLHyQIkAe" }, "source": [ - "To allow for debugging variations due to tracing order, `DeterministicTestTool` in `num_random_ops` mode allows you to see how many random operations have been traced with the `operation_seed` property." + "To allow for debugging variations due to tracing order, `DeterministicRandomTestTool` in `num_random_ops` mode allows you to see how many random operations have been traced with the `operation_seed` property." ] }, { @@ -975,7 +913,7 @@ }, "outputs": [], "source": [ - "random_tool = DeterministicTestTool(mode='num_random_ops')\n", + "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", "with random_tool.scope():\n", " print(random_tool.operation_seed)\n", " a = tf.random.uniform(shape=(3,1))\n", @@ -1003,7 +941,7 @@ }, "outputs": [], "source": [ - "random_tool = DeterministicTestTool(mode='num_random_ops')\n", + "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", "with random_tool.scope():\n", " print(random_tool.operation_seed)\n", " a = tf.random.uniform(shape=(3,1))\n", @@ -1012,7 +950,7 @@ " b = tf.random.uniform(shape=(3,3))\n", " b = b * 3\n", "\n", - "random_tool = DeterministicTestTool(mode='num_random_ops')\n", + "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", "with random_tool.scope():\n", " random_tool.operation_seed = 1\n", " b_prime = tf.random.uniform(shape=(3,3))\n", @@ -1031,7 +969,7 @@ "id": "bP5Kx1OcNbvM" }, "source": [ - "However, `DeterministicTestTool` disallows reusing already-used operation seeds, so make sure the auto-incremented sequences cannot overlap. This is because eager execution generates different numbers for follow-on usages of the same operation seed while TF1 graphs and sessions do not, so raising an error helps keep session and eager stateful random number generation in line." + "However, `DeterministicRandomTestTool` disallows reusing already-used operation seeds, so make sure the auto-incremented sequences cannot overlap. This is because eager execution generates different numbers for follow-on usages of the same operation seed while TF1 graphs and sessions do not, so raising an error helps keep session and eager stateful random number generation in line." ] }, { @@ -1042,7 +980,7 @@ }, "outputs": [], "source": [ - "random_tool = DeterministicTestTool(mode='num_random_ops')\n", + "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", "with random_tool.scope():\n", " random_tool.operation_seed = 1\n", " b_prime = tf.random.uniform(shape=(3,3))\n", @@ -1067,7 +1005,7 @@ "source": [ "### Verifying Inference\n", "\n", - "You can now use the `DeterministicTestTool` to make sure the `InceptionResnetV2` model matches in inference, even when using the random weight initialization. For a stronger test condition due to matching program order, use the `num_random_ops` mode." + "You can now use the `DeterministicRandomTestTool` to make sure the `InceptionResnetV2` model matches in inference, even when using the random weight initialization. For a stronger test condition due to matching program order, use the `num_random_ops` mode." ] }, { @@ -1078,7 +1016,7 @@ }, "outputs": [], "source": [ - "random_tool = DeterministicTestTool(mode='num_random_ops')\n", + "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", "with random_tool.scope():\n", " graph = tf.Graph()\n", " with graph.as_default(), tf.compat.v1.Session(graph=graph) as sess:\n", @@ -1110,7 +1048,7 @@ "height, width = 299, 299\n", "num_classes = 1000\n", "\n", - "random_tool = DeterministicTestTool(mode='num_random_ops')\n", + "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", "with random_tool.scope():\n", " model = InceptionResnetV2(num_classes)\n", "\n", @@ -1132,7 +1070,7 @@ "outputs": [], "source": [ "# Verify that the regularization loss and output both match\n", - "# when using the DeterministicTestTool:\n", + "# when using the DeterministicRandomTestTool:\n", "np.testing.assert_allclose(tf1_regularization_loss, tf2_regularization_loss.numpy(), **tol_dict)\n", "np.testing.assert_allclose(tf1_output, tf2_output.numpy(), **tol_dict)" ] @@ -1145,7 +1083,7 @@ "source": [ "### Verifying Training\n", "\n", - "Because `DeterministicTestTool` works for *all* stateful random operations (including both weight initialization and computation such as dropout layers), you can use it to verify the models match in training mode as well. You can again use the `num_random_ops` mode because the program order of the stateful random ops matches." + "Because `DeterministicRandomTestTool` works for *all* stateful random operations (including both weight initialization and computation such as dropout layers), you can use it to verify the models match in training mode as well. You can again use the `num_random_ops` mode because the program order of the stateful random ops matches." ] }, { @@ -1156,7 +1094,7 @@ }, "outputs": [], "source": [ - "random_tool = DeterministicTestTool(mode='num_random_ops')\n", + "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", "with random_tool.scope():\n", " graph = tf.Graph()\n", " with graph.as_default(), tf.compat.v1.Session(graph=graph) as sess:\n", @@ -1188,7 +1126,7 @@ "height, width = 299, 299\n", "num_classes = 1000\n", "\n", - "random_tool = DeterministicTestTool(mode='num_random_ops')\n", + "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", "with random_tool.scope():\n", " model = InceptionResnetV2(num_classes)\n", "\n", @@ -1210,7 +1148,7 @@ "outputs": [], "source": [ "# Verify that the regularization loss and output both match\n", - "# when using the DeterministicTestTool\n", + "# when using the DeterministicRandomTestTool\n", "np.testing.assert_allclose(tf1_regularization_loss, tf2_regularization_loss.numpy(), **tol_dict)\n", "np.testing.assert_allclose(tf1_output, tf2_output.numpy(), **tol_dict)" ] @@ -1230,7 +1168,7 @@ "id": "xpOAei5vRAPa" }, "source": [ - "Note: When using the `DeterministicTestTool` in `num_random_ops` mode, it is suggested you directly use and call the `tf.keras.layers.Layer` method decorator when testing for numerical equivalence. Embedding it within a Keras functional model or other Keras models can produce differences in stateful random operation tracing order that can be tricky to reason about or match exactly when comparing TF1.x graphs/sessions and eager execution. \n", + "Note: When using the `DeterministicRandomTestTool` in `num_random_ops` mode, it is suggested you directly use and call the `tf.keras.layers.Layer` method decorator when testing for numerical equivalence. Embedding it within a Keras functional model or other Keras models can produce differences in stateful random operation tracing order that can be tricky to reason about or match exactly when comparing TF1.x graphs/sessions and eager execution. \n", "\n", "For example, calling the `InceptionResnetV2` layer directly with `training=True` interleaves variable initialization with the dropout order according to the network creation order.\n", "\n", @@ -1247,7 +1185,7 @@ }, "outputs": [], "source": [ - "random_tool = DeterministicTestTool()\n", + "random_tool = v1.keras.utils.DeterministicRandomTestTool()\n", "with random_tool.scope():\n", " graph = tf.Graph()\n", " with graph.as_default(), tf.compat.v1.Session(graph=graph) as sess:\n", @@ -1279,7 +1217,7 @@ "height, width = 299, 299\n", "num_classes = 1000\n", "\n", - "random_tool = DeterministicTestTool()\n", + "random_tool = v1.keras.utils.DeterministicRandomTestTool()\n", "with random_tool.scope():\n", " keras_input = tf.keras.Input(shape=(height, width, 3))\n", " layer = InceptionResnetV2(num_classes)\n", @@ -1303,7 +1241,7 @@ "outputs": [], "source": [ "# Verify that the regularization loss and output both match\n", - "# when using the DeterministicTestTool\n", + "# when using the DeterministicRandomTestTool\n", "np.testing.assert_allclose(tf1_regularization_loss, tf2_regularization_loss.numpy(), **tol_dict)\n", "np.testing.assert_allclose(tf1_output, tf2_output.numpy(), **tol_dict)" ] @@ -1316,7 +1254,7 @@ "source": [ "## Step 3b or 4b (optional): Testing with pre-existing checkpoints\n", "\n", - "After step 3 or step 4 above, it can be useful to run your numerical equivalence tests when starting from pre-existing name-based checkpoints if you have some. This can test both that your legacy checkpoint loading is working correctly and that the model itself is working right. The [Reusing TF1.x checkpoints guide](./reuse_checkpoints.ipynb) covers how to reuse your pre-existing TF1.x checkpoints and transfer them over to TF2 checkpoints.\n" + "After step 3 or step 4 above, it can be useful to run your numerical equivalence tests when starting from pre-existing name-based checkpoints if you have some. This can test both that your legacy checkpoint loading is working correctly and that the model itself is working right. The [Reusing TF1.x checkpoints guide](./migrating_checkpoints.ipynb) covers how to reuse your pre-existing TF1.x checkpoints and transfer them over to TF2 checkpoints.\n" ] }, { @@ -1341,7 +1279,6 @@ "colab": { "collapsed_sections": [], "name": "validate_correctness.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/mixed_precision.ipynb b/site/en/guide/mixed_precision.ipynb index 984f1e8abc6..a19d6f254f3 100644 --- a/site/en/guide/mixed_precision.ipynb +++ b/site/en/guide/mixed_precision.ipynb @@ -70,7 +70,7 @@ "source": [ "## Overview\n", "\n", - "Mixed precision is the use of both 16-bit and 32-bit floating-point types in a model during training to make it run faster and use less memory. By keeping certain parts of the model in the 32-bit types for numeric stability, the model will have a lower step time and train equally as well in terms of the evaluation metrics such as accuracy. This guide describes how to use the Keras mixed precision API to speed up your models. Using this API can improve performance by more than 3 times on modern GPUs and 60% on TPUs." + "Mixed precision is the use of both 16-bit and 32-bit floating-point types in a model during training to make it run faster and use less memory. By keeping certain parts of the model in the 32-bit types for numeric stability, the model will have a lower step time and train equally as well in terms of the evaluation metrics such as accuracy. This guide describes how to use the Keras mixed precision API to speed up your models. Using this API can improve performance by more than 3 times on modern GPUs, 60% on TPUs and more than 2 times on latest Intel CPUs." ] }, { @@ -81,7 +81,7 @@ "source": [ "Today, most models use the float32 dtype, which takes 32 bits of memory. However, there are two lower-precision dtypes, float16 and bfloat16, each which take 16 bits of memory instead. Modern accelerators can run operations faster in the 16-bit dtypes, as they have specialized hardware to run 16-bit computations and 16-bit dtypes can be read from memory faster.\n", "\n", - "NVIDIA GPUs can run operations in float16 faster than in float32, and TPUs can run operations in bfloat16 faster than float32. Therefore, these lower-precision dtypes should be used whenever possible on those devices. However, variables and a few computations should still be in float32 for numeric reasons so that the model trains to the same quality. The Keras mixed precision API allows you to use a mix of either float16 or bfloat16 with float32, to get the performance benefits from float16/bfloat16 and the numeric stability benefits from float32.\n", + "NVIDIA GPUs can run operations in float16 faster than in float32, and TPUs and supporting Intel CPUs can run operations in bfloat16 faster than float32. Therefore, these lower-precision dtypes should be used whenever possible on those devices. However, variables and a few computations should still be in float32 for numeric reasons so that the model trains to the same quality. The Keras mixed precision API allows you to use a mix of either float16 or bfloat16 with float32, to get the performance benefits from float16/bfloat16 and the numeric stability benefits from float32.\n", "\n", "Note: In this guide, the term \"numeric stability\" refers to how a model's quality is affected by the use of a lower-precision dtype instead of a higher precision dtype. An operation is \"numerically unstable\" in float16 or bfloat16 if running it in one of those dtypes causes the model to have worse evaluation accuracy or other metrics compared to running the operation in float32." ] @@ -118,9 +118,11 @@ "source": [ "## Supported hardware\n", "\n", - "While mixed precision will run on most hardware, it will only speed up models on recent NVIDIA GPUs and Cloud TPUs. NVIDIA GPUs support using a mix of float16 and float32, while TPUs support a mix of bfloat16 and float32.\n", + "While mixed precision will run on most hardware, it will only speed up models on recent NVIDIA GPUs, Cloud TPUs and recent Intel CPUs. NVIDIA GPUs support using a mix of float16 and float32, while TPUs and Intel CPUs support a mix of bfloat16 and float32.\n", "\n", - "Among NVIDIA GPUs, those with compute capability 7.0 or higher will see the greatest performance benefit from mixed precision because they have special hardware units, called Tensor Cores, to accelerate float16 matrix multiplications and convolutions. Older GPUs offer no math performance benefit for using mixed precision, however memory and bandwidth savings can enable some speedups. You can look up the compute capability for your GPU at NVIDIA's [CUDA GPU web page](https://developer.nvidia.com/cuda-gpus). Examples of GPUs that will benefit most from mixed precision include RTX GPUs, the V100, and the A100." + "Among NVIDIA GPUs, those with compute capability 7.0 or higher will see the greatest performance benefit from mixed precision because they have special hardware units, called Tensor Cores, to accelerate float16 matrix multiplications and convolutions. Older GPUs offer no math performance benefit for using mixed precision, however memory and bandwidth savings can enable some speedups. You can look up the compute capability for your GPU at NVIDIA's [CUDA GPU web page](https://developer.nvidia.com/cuda-gpus). Examples of GPUs that will benefit most from mixed precision include RTX GPUs, the V100, and the A100.\n", + "\n", + "Among Intel CPUs, starting with the 4th Gen Intel Xeon Processors (code name Sapphire Rapids), will see the greatest performance benefit from mixed precision as they can accelerate bfloat16 computations using AMX instructions (requires Tensorflow 2.12 or later)." ] }, { @@ -129,7 +131,7 @@ "id": "-q2hisD60F0_" }, "source": [ - "Note: If running this guide in Google Colab, the GPU runtime typically has a P100 connected. The P100 has compute capability 6.0 and is not expected to show a significant speedup.\n", + "Note: If running this guide in Google Colab, the GPU runtime typically has a P100 connected. The P100 has compute capability 6.0 and is not expected to show a significant speedup. If running on CPU runtime, there may be a slow down as the runtime likely has a CPU without AMX.\n", "\n", "You can check your GPU type with the following. The command only exists if the\n", "NVIDIA drivers are installed, so the following will raise an error otherwise." @@ -154,7 +156,7 @@ "source": [ "All Cloud TPUs support bfloat16.\n", "\n", - "Even on CPUs and older GPUs, where no speedup is expected, mixed precision APIs can still be used for unit testing, debugging, or just to try out the API. On CPUs, mixed precision will run significantly slower, however." + "Even on older Intel CPUs, other x86 CPUs without AMX, and older GPUs, where no speedup is expected, mixed precision APIs can still be used for unit testing, debugging, or just to try out the API. However, mixed_bfloat16 on CPUs without AMX instructions and mixed_float16 on all x86 CPUs will run significantly slower." ] }, { @@ -235,7 +237,7 @@ "id": "MOFEcna28o4T" }, "source": [ - "As mentioned before, the `mixed_float16` policy will most significantly improve performance on NVIDIA GPUs with compute capability of at least 7.0. The policy will run on other GPUs and CPUs but may not improve performance. For TPUs, the `mixed_bfloat16` policy should be used instead." + "As mentioned before, the `mixed_float16` policy will most significantly improve performance on NVIDIA GPUs with compute capability of at least 7.0. The policy will run on other GPUs and CPUs but may not improve performance. For TPUs and CPUs, the `mixed_bfloat16` policy should be used instead." ] }, { @@ -411,7 +413,7 @@ "id": "0Sm8FJHegVRN" }, "source": [ - "This example cast the input data from int8 to float32. You don't cast to float16 since the division by 255 is on the CPU, which runs float16 operations slower than float32 operations. In this case, the performance difference in negligible, but in general you should run input processing math in float32 if it runs on the CPU. The first layer of the model will cast the inputs to float16, as each layer casts floating-point inputs to its compute dtype.\n", + "This example casts the input data from int8 to float32. You don't cast to float16 since the division by 255 is on the CPU, which runs float16 operations slower than float32 operations. In this case, the performance difference is negligible, but in general you should run input processing math in float32 if it runs on the CPU. The first layer of the model will cast the inputs to float16, as each layer casts floating-point inputs to its compute dtype.\n", "\n", "The initial weights of the model are retrieved. This will allow training from scratch again by loading the weights." ] @@ -465,7 +467,7 @@ " \n", "If you are running this guide in Colab, you can compare the performance of mixed precision with float32. To do so, change the policy from `mixed_float16` to `float32` in the \"Setting the dtype policy\" section, then rerun all the cells up to this point. On GPUs with compute capability 7.X, you should see the time per step significantly increase, indicating mixed precision sped up the model. Make sure to change the policy back to `mixed_float16` and rerun the cells before continuing with the guide.\n", "\n", - "On GPUs with compute capability of at least 8.0 (Ampere GPUs and above), you likely will see no performance improvement in the toy model in this guide when using mixed precision compared to float32. This is due to the use of [TensorFloat-32](https://www.tensorflow.org/api_docs/python/tf/config/experimental/enable_tensor_float_32_execution), which automatically uses lower precision math in certain float32 ops such as `tf.linalg.matmul`. TensorFloat-32 gives some of the performance advantages of mixed precision when using float32. However, in real-world models, you will still typically see significantly performance improvements from mixed precision due to memory bandwidth savings and ops which TensorFloat-32 does not support.\n", + "On GPUs with compute capability of at least 8.0 (Ampere GPUs and above), you likely will see no performance improvement in the toy model in this guide when using mixed precision compared to float32. This is due to the use of [TensorFloat-32](https://www.tensorflow.org/api_docs/python/tf/config/experimental/enable_tensor_float_32_execution), which automatically uses lower precision math in certain float32 ops such as `tf.linalg.matmul`. TensorFloat-32 gives some of the performance advantages of mixed precision when using float32. However, in real-world models, you will still typically experience significant performance improvements from mixed precision due to memory bandwidth savings and ops which TensorFloat-32 does not support.\n", "\n", "If running mixed precision on a TPU, you will not see as much of a performance gain compared to running mixed precision on GPUs, especially pre-Ampere GPUs. This is because TPUs do certain ops in bfloat16 under the hood even with the default dtype policy of float32. This is similar to how Ampere GPUs use TensorFloat-32 by default. Compared to Ampere GPUs, TPUs typically see less performance gains with mixed precision on real-world models.\n", "\n", @@ -480,7 +482,9 @@ "source": [ "## Loss scaling\n", "\n", - "Loss scaling is a technique which `tf.keras.Model.fit` automatically performs with the `mixed_float16` policy to avoid numeric underflow. This section describes what loss scaling is and the next section describes how to use it with a custom training loop." + "Loss scaling is a technique which `tf.keras.Model.fit` automatically performs with the `mixed_float16` policy to avoid numeric underflow. This section describes what loss scaling is and the next section describes how to use it with a custom training loop.\n", + "\n", + "Note: When using `mixed_bfloat16` policy, there is no need to do loss scaling." ] }, { @@ -612,7 +616,7 @@ "id": "FVy5gnBqTE9z" }, "source": [ - "If you want, it is possible choose an explicit loss scale or otherwise customize the loss scaling behavior, but it is highly recommended to keep the default loss scaling behavior, as it has been found to work well on all known models. See the `tf.keras.mixed_precision.LossScaleOptimizer` documention if you want to customize the loss scaling behavior." + "If you want, it is possible choose an explicit loss scale or otherwise customize the loss scaling behavior, but it is highly recommended to keep the default loss scaling behavior, as it has been found to work well on all known models. See the `tf.keras.mixed_precision.LossScaleOptimizer` documentation if you want to customize the loss scaling behavior." ] }, { @@ -806,20 +810,21 @@ "source": [ "## Summary\n", "\n", - "- You should use mixed precision if you use TPUs or NVIDIA GPUs with at least compute capability 7.0, as it will improve performance by up to 3x.\n", + "- You should use mixed precision if you use TPUs, NVIDIA GPUs with at least compute capability 7.0, or Intel CPUs with support for AMX instructions, as it will improve performance by up to 3x.\n", "- You can use mixed precision with the following lines:\n", "\n", " ```python\n", - " # On TPUs, use 'mixed_bfloat16' instead\n", + " # On TPUs and CPUs, use 'mixed_bfloat16' instead\n", " mixed_precision.set_global_policy('mixed_float16')\n", " ```\n", "\n", "* If your model ends in softmax, make sure it is float32. And regardless of what your model ends in, make sure the output is float32.\n", "* If you use a custom training loop with `mixed_float16`, in addition to the above lines, you need to wrap your optimizer with a `tf.keras.mixed_precision.LossScaleOptimizer`. Then call `optimizer.get_scaled_loss` to scale the loss, and `optimizer.get_unscaled_gradients` to unscale the gradients.\n", + "* If you use a custom training loop with `mixed_bfloat16`, setting the global_policy mentioned above is sufficient.\n", "* Double the training batch size if it does not reduce evaluation accuracy\n", "* On GPUs, ensure most tensor dimensions are a multiple of $8$ to maximize performance\n", "\n", - "For more examples of mixed precision using the `tf.keras.mixed_precision` API, check the [official models repository](https://github.com/tensorflow/models/tree/master/official). Most official models, such as [ResNet](https://github.com/tensorflow/models/tree/master/official/vision/image_classification) and [Transformer](https://github.com/tensorflow/models/blob/master/official/nlp/transformer), will run using mixed precision by passing `--dtype=fp16`.\n" + "For an example of mixed precision using the `tf.keras.mixed_precision` API, check [functions and classes related to training performance](https://github.com/tensorflow/models/blob/master/official/modeling/performance.py). Check out the official models, such as [Transformer](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/transformer_encoder_block.py), for details.\n" ] } ], diff --git a/site/en/guide/profiler.md b/site/en/guide/profiler.md index 1cd19c109fe..dee8a5a84af 100644 --- a/site/en/guide/profiler.md +++ b/site/en/guide/profiler.md @@ -55,7 +55,7 @@ found. When you run profiling with CUDA® Toolkit in a Docker environment or on Linux, you may encounter issues related to insufficient CUPTI privileges (`CUPTI_ERROR_INSUFFICIENT_PRIVILEGES`). Go to the -[NVIDIA Developer Docs](https://developer.nvidia.com/nvidia-development-tools-solutions-ERR_NVGPUCTRPERM-permission-issue-performance-counters){:.external} +[NVIDIA Developer Docs](https://developer.nvidia.com/nvidia-development-tools-solutions-ERR_NVGPUCTRPERM-permission-issue-performance-counters) to learn more about how you can resolve these issues on Linux. To resolve CUPTI privilege issues in a Docker environment, run @@ -694,7 +694,7 @@ first few batches to avoid inaccuracies due to initialization overhead. An example for profiling multiple workers: ```python - # E.g. your worker IP addresses are 10.0.0.2, 10.0.0.3, 10.0.0.4, and you + # E.g., your worker IP addresses are 10.0.0.2, 10.0.0.3, 10.0.0.4, and you # would like to profile for a duration of 2 seconds. tf.profiler.experimental.client.trace( 'grpc://10.0.0.2:8466,grpc://10.0.0.3:8466,grpc://10.0.0.4:8466', @@ -845,7 +845,7 @@ more efficient by casting to different data types after applying spatial transformations, such as flipping, cropping, rotating, etc. Note: Some ops like `tf.image.resize` transparently change the `dtype` to -`fp32`. Make sure you normalize your data to lie between `0` and `1` if its not +`fp32`. Make sure you normalize your data to lie between `0` and `1` if it's not done automatically. Skipping this step could lead to `NaN` errors if you have enabled [AMP](https://developer.nvidia.com/automatic-mixed-precision). diff --git a/site/en/guide/ragged_tensor.ipynb b/site/en/guide/ragged_tensor.ipynb index 4bc0d679499..ba0be2928ce 100644 --- a/site/en/guide/ragged_tensor.ipynb +++ b/site/en/guide/ragged_tensor.ipynb @@ -81,6 +81,7 @@ }, "outputs": [], "source": [ + "!pip install --pre -U tensorflow\n", "import math\n", "import tensorflow as tf" ] @@ -109,7 +110,7 @@ "source": [ "### What you can do with a ragged tensor\n", "\n", - "Ragged tensors are supported by more than a hundred TensorFlow operations, including math operations (such as `tf.add` and `tf.reduce_mean`), array operations (such as `tf.concat` and `tf.tile`), string manipulation ops (such as `tf.substr`), control flow operations (such as `tf.while_loop` and `tf.map_fn`), and many others:" + "Ragged tensors are supported by more than a hundred TensorFlow operations, including math operations (such as `tf.add` and `tf.reduce_mean`), array operations (such as `tf.concat` and `tf.tile`), string manipulation ops (such as `tf.strings.substr`), control flow operations (such as `tf.while_loop` and `tf.map_fn`), and many others:" ] }, { @@ -673,14 +674,14 @@ "source": [ "### Keras\n", "\n", - "[tf.keras](https://www.tensorflow.org/guide/keras) is TensorFlow's high-level API for building and training deep learning models. Ragged tensors may be passed as inputs to a Keras model by setting `ragged=True` on `tf.keras.Input` or `tf.keras.layers.InputLayer`. Ragged tensors may also be passed between Keras layers, and returned by Keras models. The following example shows a toy LSTM model that is trained using ragged tensors." + "[tf.keras](https://www.tensorflow.org/guide/keras) is TensorFlow's high-level API for building and training deep learning models. It doesn't have ragged support. But it does support masked tensors. So the easiest way to use a ragged tensor in a Keras model is to convert the ragged tensor to a dense tensor, using `.to_tensor()` and then using Keras's builtin masking:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "pHls7hQVJlk5" + "id": "ucYf2sSzTvQo" }, "outputs": [], "source": [ @@ -690,26 +691,77 @@ " 'She turned me into a newt.',\n", " 'A newt?',\n", " 'Well, I got better.'])\n", - "is_question = tf.constant([True, False, True, False])\n", - "\n", + "is_question = tf.constant([True, False, True, False])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MGYKmizJTw8B" + }, + "outputs": [], + "source": [ "# Preprocess the input strings.\n", "hash_buckets = 1000\n", "words = tf.strings.split(sentences, ' ')\n", "hashed_words = tf.strings.to_hash_bucket_fast(words, hash_buckets)\n", - "\n", + "hashed_words.to_list()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7FTujwOlUT8J" + }, + "outputs": [], + "source": [ + "hashed_words.to_tensor()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vzWudaESUBOZ" + }, + "outputs": [], + "source": [ + "tf.keras.Input?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pHls7hQVJlk5" + }, + "outputs": [], + "source": [ "# Build the Keras model.\n", "keras_model = tf.keras.Sequential([\n", - " tf.keras.layers.Input(shape=[None], dtype=tf.int64, ragged=True),\n", - " tf.keras.layers.Embedding(hash_buckets, 16),\n", - " tf.keras.layers.LSTM(32, use_bias=False),\n", + " tf.keras.layers.Embedding(hash_buckets, 16, mask_zero=True),\n", + " tf.keras.layers.LSTM(32, return_sequences=True, use_bias=False),\n", + " tf.keras.layers.GlobalAveragePooling1D(),\n", " tf.keras.layers.Dense(32),\n", " tf.keras.layers.Activation(tf.nn.relu),\n", " tf.keras.layers.Dense(1)\n", "])\n", "\n", "keras_model.compile(loss='binary_crossentropy', optimizer='rmsprop')\n", - "keras_model.fit(hashed_words, is_question, epochs=5)\n", - "print(keras_model.predict(hashed_words))" + "keras_model.fit(hashed_words.to_tensor(), is_question, epochs=5)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1IAjjmdTU9OU" + }, + "outputs": [], + "source": [ + "print(keras_model.predict(hashed_words.to_tensor()))" ] }, { @@ -798,7 +850,7 @@ "source": [ "### Datasets\n", "\n", - "[tf.data](https://www.tensorflow.org/guide/data) is an API that enables you to build complex input pipelines from simple, reusable pieces. Its core data structure is `tf.data.Dataset`, which represents a sequence of elements, in which each element consists of one or more components. " + "[tf.data](https://www.tensorflow.org/guide/data) is an API that enables you to build complex input pipelines from simple, reusable pieces. Its core data structure is `tf.data.Dataset`, which represents a sequence of elements, in which each element consists of one or more components." ] }, { @@ -1077,9 +1129,11 @@ "import tempfile\n", "\n", "keras_module_path = tempfile.mkdtemp()\n", - "tf.saved_model.save(keras_model, keras_module_path)\n", - "imported_model = tf.saved_model.load(keras_module_path)\n", - "imported_model(hashed_words)" + "keras_model.save(keras_module_path+\"/my_model.keras\")\n", + "\n", + "imported_model = tf.keras.models.load_model(keras_module_path+\"/my_model.keras\")\n", + "\n", + "imported_model(hashed_words.to_tensor())" ] }, { @@ -1439,7 +1493,7 @@ "\n", "1. Use `tf.RaggedTensor.to_list` to convert the ragged tensor to a nested Python list.\n", "2. Use `tf.RaggedTensor.numpy` to convert the ragged tensor to a NumPy array whose values are nested NumPy arrays.\n", - "3. Decompose the ragged tensor into its components, using the `tf.RaggedTensor.values` and `tf.RaggedTensor.row_splits` properties, or row-paritioning methods such as `tf.RaggedTensor.row_lengths` and `tf.RaggedTensor.value_rowids`.\n", + "3. Decompose the ragged tensor into its components, using the `tf.RaggedTensor.values` and `tf.RaggedTensor.row_splits` properties, or row-partitioning methods such as `tf.RaggedTensor.row_lengths` and `tf.RaggedTensor.value_rowids`.\n", "4. Use Python indexing to select values from the ragged tensor.\n" ] }, @@ -1459,13 +1513,267 @@ "print(\"Indexed value:\", rt[1].numpy())" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "J87jMZa0M_YW" + }, + "source": [ + "## Ragged Shapes\n", + "\n", + "The shape of a tensor specifies the size of each axis. For example, the shape of `[[1, 2], [3, 4], [5, 6]]` is `[3, 2]`, since there are 3 rows and 2 columns. TensorFlow has two separate but related ways to describe shapes:\n", + "\n", + "* ***static shape***: Information about axis sizes that is known statically (e.g., while tracing a `tf.function`). May be partially specified.\n", + "\n", + "* ***dynamic shape***: Runtime information about the axis sizes." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IOETE_OLPLZo" + }, + "source": [ + "### Static shape\n", + "\n", + "A Tensor's static shape contains information about its axis sizes that is known at graph-construction time. For both `tf.Tensor` and `tf.RaggedTensor`, it is available using the `.shape` property, and is encoded using `tf.TensorShape`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "btGDjT4uNgQy" + }, + "outputs": [], + "source": [ + "x = tf.constant([[1, 2], [3, 4], [5, 6]])\n", + "x.shape # shape of a tf.tensor" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "__OgvmrGPEjq" + }, + "outputs": [], + "source": [ + "rt = tf.ragged.constant([[1], [2, 3], [], [4]])\n", + "rt.shape # shape of a tf.RaggedTensor" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9EWnQd3qPWaw" + }, + "source": [ + "The static shape of a ragged dimension is always `None` (i.e., unspecified). However, the inverse is not true -- if a `TensorShape` dimension is `None`, then that could indicate that the dimension is ragged, *or* it could indicate that the dimension is uniform but that its size is not statically known." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "75E9YXYMNfne" + }, + "source": [ + "### Dynamic shape\n", + "\n", + "A tensor's dynamic shape contains information about its axis sizes that is known when the graph is run. It is constructed using the `tf.shape` operation. For `tf.Tensor`, `tf.shape` returns the shape as a 1D integer `Tensor`, where `tf.shape(x)[i]` is the size of axis `i`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kWJ7Cn1EQTD_" + }, + "outputs": [], + "source": [ + "x = tf.constant([['a', 'b'], ['c', 'd'], ['e', 'f']])\n", + "tf.shape(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BeZEfxwmRcSv" + }, + "source": [ + "However, a 1D `Tensor` is not expressive enough to describe the shape of a `tf.RaggedTensor`. Instead, the dynamic shape for ragged tensors is encoded using a dedicated type, `tf.experimental.DynamicRaggedShape`. In the following example, the `DynamicRaggedShape` returned by `tf.shape(rt)` indicates that the ragged tensor has 4 rows, with lengths 1, 3, 0, and 2:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nZc2wqgQQUFU" + }, + "outputs": [], + "source": [ + "rt = tf.ragged.constant([[1], [2, 3, 4], [], [5, 6]])\n", + "rt_shape = tf.shape(rt)\n", + "print(rt_shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EphU60YvTf98" + }, + "source": [ + "#### Dynamic shape: operations\n", + "\n", + "`DynamicRaggedShape`s can be used with most TensorFlow ops that expect shapes, including `tf.reshape`, `tf.zeros`, `tf.ones`. `tf.fill`, `tf.broadcast_dynamic_shape`, and `tf.broadcast_to`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pclAODLXT6Gr" + }, + "outputs": [], + "source": [ + "print(f\"tf.reshape(x, rt_shape) = {tf.reshape(x, rt_shape)}\")\n", + "print(f\"tf.zeros(rt_shape) = {tf.zeros(rt_shape)}\")\n", + "print(f\"tf.ones(rt_shape) = {tf.ones(rt_shape)}\")\n", + "print(f\"tf.fill(rt_shape, 9) = {tf.fill(rt_shape, 'x')}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rNP_3_btRAHj" + }, + "source": [ + "#### Dynamic shape: indexing and slicing\n", + "\n", + "`DynamicRaggedShape` can be also be indexed to get the sizes of uniform dimensions. For example, we can find the number of rows in a raggedtensor using `tf.shape(rt)[0]` (just as we would for a non-ragged tensor):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MzQvPhsxS6HN" + }, + "outputs": [], + "source": [ + "rt_shape[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wvr2iT6zS_e8" + }, + "source": [ + "However, it is an error to use indexing to try to retrieve the size of a ragged dimension, since it doesn't have a single size. (Since `RaggedTensor` keeps track of which axes are ragged, this error is only thrown during eager execution or when tracing a `tf.function`; it will never be thrown when executing a concrete function.)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HgGMk0LeTGik" + }, + "outputs": [], + "source": [ + "try:\n", + " rt_shape[1]\n", + "except ValueError as e:\n", + " print(\"Got expected ValueError:\", e)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5QUsdawGU0SM" + }, + "source": [ + "`DynamicRaggedShape`s can also be sliced, as long as the slice either begins with axis `0`, or contains only dense dimensions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "APT72EaBU70t" + }, + "outputs": [], + "source": [ + "rt_shape[:1]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a-Wl9IrQXcdY" + }, + "source": [ + "#### Dynamic shape: encoding\n", + "\n", + "`DynamicRaggedShape` is encoded using two fields:\n", + "\n", + "* `inner_shape`: An integer vector giving the shape of a dense `tf.Tensor`.\n", + "* `row_partitions`: A list of `tf.experimental.RowPartition` objects, describing how the outermost dimension of that inner shape should be partitioned to add ragged axes.\n", + "\n", + "For more information about row partitions, see the \"RaggedTensor encoding\" section below, and the API docs for `tf.experimental.RowPartition`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jfeY9tTcV_zL" + }, + "source": [ + "#### Dynamic shape: construction\n", + "\n", + "`DynamicRaggedShape` is most often constructed by applying `tf.shape` to a `RaggedTensor`, but it can also be constructed directly:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NSRgD667WwIZ" + }, + "outputs": [], + "source": [ + "tf.experimental.DynamicRaggedShape(\n", + " row_partitions=[tf.experimental.RowPartition.from_row_lengths([5, 3, 2])],\n", + " inner_shape=[10, 8])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EjzVjs9MXIIA" + }, + "source": [ + "If the lengths of all rows are known statically, `DynamicRaggedShape.from_lengths` can also be used to construct a dynamic ragged shape. (This is mostly useful for testing and demonstration code, since it's rare for the lengths of ragged dimensions to be known statically).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gMxCzADUYIjY" + }, + "outputs": [], + "source": [ + "tf.experimental.DynamicRaggedShape.from_lengths([4, (2, 1, 0, 8), 12])" + ] + }, { "cell_type": "markdown", "metadata": { "id": "EdljbNPq-PWS" }, "source": [ - "## Broadcasting\n", + "### Broadcasting\n", "\n", "Broadcasting is the process of making tensors with different shapes have compatible shapes for elementwise operations. For more background on broadcasting, refer to:\n", "\n", @@ -1491,7 +1799,7 @@ "id": "-S2hOUWx-PWU" }, "source": [ - "### Broadcasting examples" + "#### Broadcasting examples" ] }, { @@ -1870,7 +2178,6 @@ ], "metadata": { "colab": { - "collapsed_sections": [], "name": "ragged_tensor.ipynb", "toc_visible": true }, diff --git a/site/en/guide/random_numbers.ipynb b/site/en/guide/random_numbers.ipynb index 37c83ae76a0..f8b824ad906 100644 --- a/site/en/guide/random_numbers.ipynb +++ b/site/en/guide/random_numbers.ipynb @@ -166,7 +166,7 @@ "source": [ "See the *Algorithms* section below for more information about it.\n", "\n", - "Another way to create a generator is with `Generator.from_non_deterministic_state`. A generator created this way will start from a non-deterministic state, depending on e.g. time and OS." + "Another way to create a generator is with `Generator.from_non_deterministic_state`. A generator created this way will start from a non-deterministic state, depending on e.g., time and OS." ] }, { @@ -268,7 +268,7 @@ "source": [ "Note: In theory, you can use constructors such as `from_seed` instead of `split` here to obtain a new generator, but by doing so you lose the guarantee that the new generator is independent of the global generator. You will also run the risk that you may accidentally create two generators with the same seed or with seeds that lead to overlapping random-number streams.\n", "\n", - "You can do splitting recursively, calling `split` on splitted generators. There are no limits (barring integer overflow) on the depth of recursions." + "You can do splitting recursively, calling `split` on split generators. There are no limits (barring integer overflow) on the depth of recursions." ] }, { @@ -325,7 +325,7 @@ "source": [ "#### Creating generators inside `tf.function` \n", "\n", - "Creation of generators inside a `tf.function` can only happend during the first run of the function. " + "Creation of generators inside a `tf.function` can only happened during the first run of the function. " ] }, { diff --git a/site/en/guide/saved_model.ipynb b/site/en/guide/saved_model.ipynb index 355a8240977..2601e504669 100644 --- a/site/en/guide/saved_model.ipynb +++ b/site/en/guide/saved_model.ipynb @@ -74,9 +74,10 @@ "- Low-level `tf.saved_model` API. This document describes how to use this API in detail.\n", " - Save: `tf.saved_model.save(model, path_to_dir)`\n", " - Load: `model = tf.saved_model.load(path_to_dir)`\n", - "- High-level `tf.keras.Model` API. Refer to [the keras save and serialize guide](keras/save_and_serialize.ipynb).\n", + "- High-level `tf.keras.Model` API. Refer to [the keras save and serialize guide](https://www.tensorflow.org/guide/keras/save_and_serialize).\n", "- If you just want to save/load weights during training, refer to [the checkpoints guide](./checkpoint.ipynb).\n", - "\n" + "\n", + "Caution: TensorFlow models are code and it is important to be careful with untrusted code. Learn more in [Using TensorFlow securely](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md).\n" ] }, { @@ -85,8 +86,24 @@ "id": "9SuIC7FiI9g8" }, "source": [ - "## Creating a SavedModel from Keras\n", - "\n", + "## Creating a SavedModel from Keras" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AtSmftAvhJvE" + }, + "source": [ + "Deprecated: For Keras objects, it's recommended to use the new high-level `.keras` format and `tf.keras.Model.export`, as demonstrated in the guide [here](https://www.tensorflow.org/guide/keras/save_and_serialize). The low-level SavedModel format continues to be supported for existing code." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eLSOptpYhJvE" + }, + "source": [ "For a quick introduction, this section exports a pre-trained Keras model and serves image classification requests with it. The rest of the guide will fill in details and discuss other ways to create SavedModels." ] }, @@ -132,10 +149,10 @@ "file = tf.keras.utils.get_file(\n", " \"grace_hopper.jpg\",\n", " \"https://storage.googleapis.com/download.tensorflow.org/example_images/grace_hopper.jpg\")\n", - "img = tf.keras.preprocessing.image.load_img(file, target_size=[224, 224])\n", + "img = tf.keras.utils.load_img(file, target_size=[224, 224])\n", "plt.imshow(img)\n", "plt.axis('off')\n", - "x = tf.keras.preprocessing.image.img_to_array(img)\n", + "x = tf.keras.utils.img_to_array(img)\n", "x = tf.keras.applications.mobilenet.preprocess_input(\n", " x[tf.newaxis,...])" ] @@ -353,7 +370,9 @@ "source": [ "The `assets` directory contains files used by the TensorFlow graph, for example text files used to initialize vocabulary tables. It is unused in this example.\n", "\n", - "SavedModels may have an `assets.extra` directory for any files not used by the TensorFlow graph, for example information for consumers about what to do with the SavedModel. TensorFlow itself does not use this directory." + "SavedModels may have an `assets.extra` directory for any files not used by the TensorFlow graph, for example information for consumers about what to do with the SavedModel. TensorFlow itself does not use this directory.\n", + "\n", + "The `fingerprint.pb` file contains the [fingerprint](https://en.wikipedia.org/wiki/Fingerprint_(computing)) of the SavedModel, which is composed of several 64-bit hashes that uniquely identify the contents of the SavedModel. The fingerprinting API is currently experimental, but `tf.saved_model.experimental.read_fingerprint` can be used to read the SavedModel fingerprint into a `tf.saved_model.experimental.Fingerprint` object." ] }, { @@ -407,7 +426,7 @@ "\n", "Briefly, `tf.function` works by tracing the Python code to generate a ConcreteFunction (a callable wrapper around `tf.Graph`). When saving a `tf.function`, you're really saving the `tf.function`'s cache of ConcreteFunctions.\n", "\n", - "To learn more about the relationship between `tf.function` and ConcreteFunctions, see the [tf.function guide](../../guide/function)." + "To learn more about the relationship between `tf.function` and ConcreteFunctions, refer to the [tf.function guide](function.ipynb)." ] }, { @@ -492,7 +511,7 @@ }, "outputs": [], "source": [ - "optimizer = tf.optimizers.SGD(0.05)\n", + "optimizer = tf.keras.optimizers.SGD(0.05)\n", "\n", "def train_step():\n", " with tf.GradientTape() as tape:\n", @@ -620,7 +639,7 @@ "outputs": [], "source": [ "imported_with_signatures = tf.saved_model.load(module_with_signature_path)\n", - "list(imported_with_signatures.signatures.keys())\n" + "list(imported_with_signatures.signatures.keys()) # [\"serving_default\"]" ] }, { @@ -655,8 +674,12 @@ }, "outputs": [], "source": [ - "imported_with_multiple_signatures = tf.saved_model.load(module_multiple_signatures_path)\n", - "list(imported_with_multiple_signatures.signatures.keys())" + "imported_with_multiple_signatures = tf.saved_model.load(\n", + " module_multiple_signatures_path\n", + ")\n", + "list(\n", + " imported_with_multiple_signatures.signatures.keys()\n", + ") # [\"serving_default\", \"array_input\"]" ] }, { @@ -681,7 +704,7 @@ " super(CustomModuleWithOutputName, self).__init__()\n", " self.v = tf.Variable(1.)\n", "\n", - " @tf.function(input_signature=[tf.TensorSpec([], tf.float32)])\n", + " @tf.function(input_signature=[tf.TensorSpec(None, tf.float32)])\n", " def __call__(self, x):\n", " return {'custom_output_name': x * self.v}\n", "\n", @@ -701,7 +724,41 @@ "outputs": [], "source": [ "imported_with_output_name = tf.saved_model.load(module_output_path)\n", - "imported_with_output_name.signatures['serving_default'].structured_outputs" + "imported_with_output_name.signatures[\n", + " 'serving_default'\n", + "].structured_outputs # {'custom_output_name': TensorSpec(shape=, dtype=tf.float32, name='custom_output_name')}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q4bCK55x1IBW" + }, + "source": [ + "## Proto-splitting\n", + "\n", + "Note: This feature will be part of the TensorFlow 2.15 release. It is currently available in the nightly build which you cqan install with `pip install tf-nightly`.\n", + "\n", + "Due to limits of the protobuf implementation, proto sizes cannot exceed 2GB. This can lead to the following errors when attempting to save very large models:\n", + "\n", + "```\n", + "ValueError: Message tensorflow.SavedModel exceeds maximum protobuf size of 2GB: ...\n", + "```\n", + "\n", + "```\n", + "google.protobuf.message.DecodeError: Error parsing message as the message exceeded the protobuf limit with type 'tensorflow.GraphDef'\n", + "```\n", + "\n", + "If you wish to save models that exceed the 2GB limit, then you'll need to save using the new proto-splitting option:\n", + "\n", + "```python\n", + "tf.saved_model.save(\n", + " ...,\n", + " options=tf.saved_model.SaveOptions(experimental_image_format=True)\n", + ")\n", + "```\n", + "\n", + "More information can be found in the [Proto Splitter / Merger Library guide](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/proto_splitter/g3doc/in-depth-guide.md)." ] }, { @@ -759,7 +816,7 @@ "additional command to build `saved_model_cli`:\n", "\n", "```\n", - "$ bazel build tensorflow/python/tools:saved_model_cli\n", + "$ bazel build //tensorflow/python/tools:saved_model_cli\n", "```\n", "\n", "### Overview of commands\n", @@ -975,7 +1032,6 @@ ], "metadata": { "colab": { - "collapsed_sections": [], "name": "saved_model.ipynb", "provenance": [], "toc_visible": true diff --git a/site/en/guide/sparse_tensor.ipynb b/site/en/guide/sparse_tensor.ipynb index 2395c6e6365..3d4daca7fad 100644 --- a/site/en/guide/sparse_tensor.ipynb +++ b/site/en/guide/sparse_tensor.ipynb @@ -79,7 +79,7 @@ "source": [ "## Sparse tensors in TensorFlow\n", "\n", - "TensorFlow represents sparse tensors through the `tf.SparseTensor` object. Currently, sparse tensors in TensorFlow are encoded using the coordinate list (COO) format. This encoding format is optimized for hyper-sparse matrices such as embeddings.\n", + "TensorFlow represents sparse tensors through the `tf.sparse.SparseTensor` object. Currently, sparse tensors in TensorFlow are encoded using the coordinate list (COO) format. This encoding format is optimized for hyper-sparse matrices such as embeddings.\n", "\n", "The COO encoding for sparse tensors is comprised of:\n", "\n", @@ -87,9 +87,9 @@ " * `indices`: A 2D tensor with shape `[N, rank]`, containing the indices of the nonzero values.\n", " * `dense_shape`: A 1D tensor with shape `[rank]`, specifying the shape of the tensor.\n", "\n", - "A ***nonzero*** value in the context of a `tf.SparseTensor` is a value that's not explicitly encoded. It is possible to explicitly include zero values in the `values` of a COO sparse matrix, but these \"explicit zeros\" are generally not included when referring to nonzero values in a sparse tensor.\n", + "A ***nonzero*** value in the context of a `tf.sparse.SparseTensor` is a value that's not explicitly encoded. It is possible to explicitly include zero values in the `values` of a COO sparse matrix, but these \"explicit zeros\" are generally not included when referring to nonzero values in a sparse tensor.\n", "\n", - "Note: `tf.SparseTensor` does not require that indices/values be in any particular order, but several ops assume that they're in row-major order. Use `tf.sparse.reorder` to create a copy of the sparse tensor that is sorted in the canonical row-major order. " + "Note: `tf.sparse.SparseTensor` does not require that indices/values be in any particular order, but several ops assume that they're in row-major order. Use `tf.sparse.reorder` to create a copy of the sparse tensor that is sorted in the canonical row-major order. " ] }, { @@ -98,7 +98,7 @@ "id": "6Aq7ruwlyz79" }, "source": [ - "## Creating a `tf.SparseTensor`\n", + "## Creating a `tf.sparse.SparseTensor`\n", "\n", "Construct sparse tensors by directly specifying their `values`, `indices`, and `dense_shape`." ] @@ -122,7 +122,7 @@ }, "outputs": [], "source": [ - "st1 = tf.SparseTensor(indices=[[0, 3], [2, 4]],\n", + "st1 = tf.sparse.SparseTensor(indices=[[0, 3], [2, 4]],\n", " values=[10, 20],\n", " dense_shape=[3, 10])" ] @@ -252,11 +252,11 @@ }, "outputs": [], "source": [ - "st_a = tf.SparseTensor(indices=[[0, 2], [3, 4]],\n", + "st_a = tf.sparse.SparseTensor(indices=[[0, 2], [3, 4]],\n", " values=[31, 2], \n", " dense_shape=[4, 10])\n", "\n", - "st_b = tf.SparseTensor(indices=[[0, 2], [7, 0]],\n", + "st_b = tf.sparse.SparseTensor(indices=[[0, 2], [3, 0]],\n", " values=[56, 38],\n", " dense_shape=[4, 10])\n", "\n", @@ -282,7 +282,7 @@ }, "outputs": [], "source": [ - "st_c = tf.SparseTensor(indices=([0, 1], [1, 0], [1, 1]),\n", + "st_c = tf.sparse.SparseTensor(indices=([0, 1], [1, 0], [1, 1]),\n", " values=[13, 15, 17],\n", " dense_shape=(2,2))\n", "\n", @@ -309,14 +309,14 @@ }, "outputs": [], "source": [ - "sparse_pattern_A = tf.SparseTensor(indices = [[2,4], [3,3], [3,4], [4,3], [4,4], [5,4]],\n", + "sparse_pattern_A = tf.sparse.SparseTensor(indices = [[2,4], [3,3], [3,4], [4,3], [4,4], [5,4]],\n", " values = [1,1,1,1,1,1],\n", " dense_shape = [8,5])\n", - "sparse_pattern_B = tf.SparseTensor(indices = [[0,2], [1,1], [1,3], [2,0], [2,4], [2,5], [3,5], \n", + "sparse_pattern_B = tf.sparse.SparseTensor(indices = [[0,2], [1,1], [1,3], [2,0], [2,4], [2,5], [3,5], \n", " [4,5], [5,0], [5,4], [5,5], [6,1], [6,3], [7,2]],\n", " values = [1,1,1,1,1,1,1,1,1,1,1,1,1,1],\n", " dense_shape = [8,6])\n", - "sparse_pattern_C = tf.SparseTensor(indices = [[3,0], [4,0]],\n", + "sparse_pattern_C = tf.sparse.SparseTensor(indices = [[3,0], [4,0]],\n", " values = [1,1],\n", " dense_shape = [8,6])\n", "\n", @@ -381,7 +381,7 @@ }, "outputs": [], "source": [ - "st2_plus_5 = tf.SparseTensor(\n", + "st2_plus_5 = tf.sparse.SparseTensor(\n", " st2.indices,\n", " st2.values + 5,\n", " st2.dense_shape)\n", @@ -394,7 +394,7 @@ "id": "GFhO2ZZ53ga1" }, "source": [ - "## Using `tf.SparseTensor` with other TensorFlow APIs\n", + "## Using `tf.sparse.SparseTensor` with other TensorFlow APIs\n", "\n", "Sparse tensors work transparently with these TensorFlow APIs:\n", "\n", @@ -449,7 +449,7 @@ "y = tf.keras.layers.Dense(4)(x)\n", "model = tf.keras.Model(x, y)\n", "\n", - "sparse_data = tf.SparseTensor(\n", + "sparse_data = tf.sparse.SparseTensor(\n", " indices = [(0,0),(0,1),(0,2),\n", " (4,3),(5,0),(5,1)],\n", " values = [1,1,1,1,1,1],\n", @@ -569,9 +569,9 @@ "\n", "`tf.train.Example` is a standard protobuf encoding for TensorFlow data. When using sparse tensors with `tf.train.Example`, you can:\n", "\n", - "* Read variable-length data into a `tf.SparseTensor` using `tf.io.VarLenFeature`. However, you should consider using `tf.io.RaggedFeature` instead.\n", + "* Read variable-length data into a `tf.sparse.SparseTensor` using `tf.io.VarLenFeature`. However, you should consider using `tf.io.RaggedFeature` instead.\n", "\n", - "* Read arbitrary sparse data into a `tf.SparseTensor` using `tf.io.SparseFeature`, which uses three separate feature keys to store the `indices`, `values`, and `dense_shape`." + "* Read arbitrary sparse data into a `tf.sparse.SparseTensor` using `tf.io.SparseFeature`, which uses three separate feature keys to store the `indices`, `values`, and `dense_shape`." ] }, { @@ -597,7 +597,7 @@ "def f(x,y):\n", " return tf.sparse.sparse_dense_matmul(x,y)\n", "\n", - "a = tf.SparseTensor(indices=[[0, 3], [2, 4]],\n", + "a = tf.sparse.SparseTensor(indices=[[0, 3], [2, 4]],\n", " values=[15, 25],\n", " dense_shape=[3, 10])\n", "\n", @@ -616,11 +616,11 @@ "source": [ "## Distinguishing missing values from zero values\n", "\n", - "Most ops on `tf.SparseTensor`s treat missing values and explicit zero values identically. This is by design — a `tf.SparseTensor` is supposed to act just like a dense tensor.\n", + "Most ops on `tf.sparse.SparseTensor`s treat missing values and explicit zero values identically. This is by design — a `tf.sparse.SparseTensor` is supposed to act just like a dense tensor.\n", "\n", "However, there are a few cases where it can be useful to distinguish zero values from missing values. In particular, this allows for one way to encode missing/unknown data in your training data. For example, consider a use case where you have a tensor of scores (that can have any floating point value from -Inf to +Inf), with some missing scores. You can encode this tensor using a sparse tensor where the explicit zeros are known zero scores but the implicit zero values actually represent missing data and not zero. \n", "\n", - "Note: This is generally not the intended usage of `tf.SparseTensor`s; and you might want to also consier other techniques for encoding this such as for example using a separate mask tensor that identifies the locations of known/unknown values. However, exercise caution while using this approach, since most sparse operations will treat explicit and implicit zero values identically." + "Note: This is generally not the intended usage of `tf.sparse.SparseTensor`s; and you might want to also consider other techniques for encoding this such as for example using a separate mask tensor that identifies the locations of known/unknown values. However, exercise caution while using this approach, since most sparse operations will treat explicit and implicit zero values identically." ] }, { @@ -680,8 +680,7 @@ "metadata": { "colab": { "collapsed_sections": [], - "name": "sparse_tensor_guide.ipynb", - "provenance": [], + "name": "sparse_tensor.ipynb", "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/tensor.ipynb b/site/en/guide/tensor.ipynb index 45dbd37fb20..2eb261aad75 100644 --- a/site/en/guide/tensor.ipynb +++ b/site/en/guide/tensor.ipynb @@ -80,7 +80,7 @@ "id": "VQ3s2J8Vgowq" }, "source": [ - "Tensors are multi-dimensional arrays with a uniform type (called a `dtype`). You can see all supported `dtypes` at `tf.dtypes.DType`.\n", + "Tensors are multi-dimensional arrays with a uniform type (called a `dtype`). You can see all supported `dtypes` at `tf.dtypes`.\n", "\n", "If you're familiar with [NumPy](https://numpy.org/devdocs/user/quickstart.html), tensors are (kind of) like `np.arrays`.\n", "\n", @@ -95,7 +95,7 @@ "source": [ "## Basics\n", "\n", - "Let's create some basic tensors." + "First, create some basic tensors." ] }, { @@ -326,7 +326,7 @@ "a = tf.constant([[1, 2],\n", " [3, 4]])\n", "b = tf.constant([[1, 1],\n", - " [1, 1]]) # Could have also said `tf.ones([2,2])`\n", + " [1, 1]]) # Could have also said `tf.ones([2,2], dtype=tf.int32)`\n", "\n", "print(tf.add(a, b), \"\\n\")\n", "print(tf.multiply(a, b), \"\\n\")\n", @@ -352,7 +352,7 @@ "id": "S3_vIAl2JPVc" }, "source": [ - "Tensors are used in all kinds of operations (ops)." + "Tensors are used in all kinds of operations (or \"Ops\")." ] }, { @@ -368,11 +368,53 @@ "# Find the largest value\n", "print(tf.reduce_max(c))\n", "# Find the index of the largest value\n", - "print(tf.argmax(c))\n", + "print(tf.math.argmax(c))\n", "# Compute the softmax\n", "print(tf.nn.softmax(c))" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "0MNM-q7-MZLz" + }, + "source": [ + "Note: Typically, anywhere a TensorFlow function expects a `Tensor` as input, the function will also accept anything that can be converted to a `Tensor` using `tf.convert_to_tensor`. See below for an example." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_wch0N8xNEt-" + }, + "outputs": [], + "source": [ + "tf.convert_to_tensor([1,2,3])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ngqIeWYeNJVI" + }, + "outputs": [], + "source": [ + "tf.reduce_max([1,2,3])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ThVMxqbVNOq3" + }, + "outputs": [], + "source": [ + "tf.reduce_max(np.array([1,2,3]))" + ] + }, { "cell_type": "markdown", "metadata": { @@ -393,7 +435,7 @@ "* **Shape**: The length (number of elements) of each of the axes of a tensor.\n", "* **Rank**: Number of tensor axes. A scalar has rank 0, a vector has rank 1, a matrix is rank 2.\n", "* **Axis** or **Dimension**: A particular dimension of a tensor.\n", - "* **Size**: The total number of items in the tensor, the product shape vector.\n" + "* **Size**: The total number of items in the tensor, the product of the shape vector's elements.\n" ] }, { @@ -461,6 +503,37 @@ "print(\"Total number of elements (3*2*4*5): \", tf.size(rank_4_tensor).numpy())" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "2ZGZp_JOOPOv" + }, + "source": [ + "But note that the `Tensor.ndim` and `Tensor.shape` attributes don't return `Tensor` objects. If you need a `Tensor` use the `tf.rank` or `tf.shape` function. This difference is subtle, but it can be important when building graphs (later)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ptq0-y6APCpD" + }, + "outputs": [], + "source": [ + "tf.rank(rank_4_tensor)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HslrDOEBPICN" + }, + "outputs": [], + "source": [ + "tf.shape(rank_4_tensor)" + ] + }, { "cell_type": "markdown", "metadata": { @@ -919,7 +992,7 @@ "Except for [tf.RaggedTensor](#ragged_tensors), such shapes will only occur in the context of TensorFlow's symbolic, graph-building APIs:\n", "\n", "* [tf.function](function.ipynb) \n", - "* The [keras functional API](keras/functional.ipynb).\n" + "* The [keras functional API](https://www.tensorflow.org/guide/keras/functional).\n" ] }, { @@ -962,7 +1035,7 @@ "source": [ "## Broadcasting\n", "\n", - "Broadcasting is a concept borrowed from the [equivalent feature in NumPy](https://numpy.org/doc/stable/user/basics.html). In short, under certain conditions, smaller tensors are \"stretched\" automatically to fit larger tensors when running combined operations on them.\n", + "Broadcasting is a concept borrowed from the [equivalent feature in NumPy](https://numpy.org/doc/stable/user/basics.broadcasting.html). In short, under certain conditions, smaller tensors are \"stretched\" automatically to fit larger tensors when running combined operations on them.\n", "\n", "The simplest and most common case is when you attempt to multiply or add a tensor to a scalar. In that case, the scalar is broadcast to be the same shape as the other argument. " ] @@ -1372,7 +1445,7 @@ "id": "st9OxrUxWSKY" }, "source": [ - "And `tf.string.to_number`:" + "And `tf.strings.to_number`:" ] }, { @@ -1491,7 +1564,6 @@ "Tce3stUlHN0L" ], "name": "tensor.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/tensor_slicing.ipynb b/site/en/guide/tensor_slicing.ipynb index 9f58a206de6..c5cb2d71356 100644 --- a/site/en/guide/tensor_slicing.ipynb +++ b/site/en/guide/tensor_slicing.ipynb @@ -635,7 +635,6 @@ "colab": { "collapsed_sections": [], "name": "tensor_slicing.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/tf_numpy.ipynb b/site/en/guide/tf_numpy.ipynb index 9b469241a3b..3083acb147d 100644 --- a/site/en/guide/tf_numpy.ipynb +++ b/site/en/guide/tf_numpy.ipynb @@ -70,7 +70,7 @@ "source": [ "## Overview\n", "\n", - "TensorFlow implements a subset of the [NumPy API](https://numpy.org/doc/1.16), available as `tf.experimental.numpy`. This allows running NumPy code, accelerated by TensorFlow, while also allowing access to all of TensorFlow's APIs." + "TensorFlow implements a subset of the [NumPy API](https://numpy.org/doc/stable/index.html), available as `tf.experimental.numpy`. This allows running NumPy code, accelerated by TensorFlow, while also allowing access to all of TensorFlow's APIs." ] }, { @@ -142,7 +142,7 @@ "\n", "An instance of `tf.experimental.numpy.ndarray`, called **ND Array**, represents a multidimensional dense array of a given `dtype` placed on a certain device. It is an alias to `tf.Tensor`. Check out the ND array class for useful methods like `ndarray.T`, `ndarray.reshape`, `ndarray.ravel` and others.\n", "\n", - "First create an ND array object, and then invoke different methods. " + "First create an ND array object, and then invoke different methods." ] }, { @@ -170,11 +170,28 @@ { "cell_type": "markdown", "metadata": { - "id": "Mub8-dvJMUr4" + "id": "-BOY8CGRKEhE" }, "source": [ "### Type promotion\n", "\n", + "There are 4 options for type promotion in TensorFlow.\n", + "\n", + "- By default, TensorFlow raises errors instead of promoting types for mixed type operations.\n", + "- Running `tf.numpy.experimental_enable_numpy_behavior()` switches TensorFlow to use `NumPy` type promotion rules (described below).\n", + "- After TensorFlow 2.15, there are two new options (refer to [TF NumPy Type Promotion](tf_numpy_type_promotion.ipynb) for details):\n", + " - `tf.numpy.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")` uses Jax type promotion rules.\n", + " - `tf.numpy.experimental_enable_numpy_behavior(dtype_conversion_mode=\"safe\")` uses Jax type promotion rules, but disallows certain unsafe promotions." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SXskSHrX5J45" + }, + "source": [ + "#### NumPy Type Promotion\n", + "\n", "TensorFlow NumPy APIs have well-defined semantics for converting literals to ND array, as well as for performing type promotion on ND array inputs. Please see [`np.result_type`](https://numpy.org/doc/1.16/reference/generated/numpy.result_type.html) for more details." ] }, @@ -200,7 +217,7 @@ " (tnp.int32, tnp.int64, tnp.float32, tnp.float64)]\n", "for i, v1 in enumerate(values):\n", " for v2 in values[i + 1:]:\n", - " print(\"%s + %s => %s\" % \n", + " print(\"%s + %s => %s\" %\n", " (v1.dtype.name, v2.dtype.name, (v1 + v2).dtype.name))" ] }, @@ -932,8 +949,8 @@ "metadata": { "accelerator": "GPU", "colab": { - "collapsed_sections": [], "name": "tf_numpy.ipynb", + "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/tf_numpy_type_promotion.ipynb b/site/en/guide/tf_numpy_type_promotion.ipynb new file mode 100644 index 00000000000..f984310822a --- /dev/null +++ b/site/en/guide/tf_numpy_type_promotion.ipynb @@ -0,0 +1,1138 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ZjN_IJ8mhJ-4" + }, + "source": [ + "##### Copyright 2023 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sY3Ffd83hK3b" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "03Pw58e6mTHI" + }, + "source": [ + "# TF-NumPy Type Promotion" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l9nPKvxK-_pM" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uma-W5v__DYh" + }, + "source": [ + "## Overview\n", + "\n", + "There are 4 options for type promotion in TensorFlow.\n", + "\n", + "- By default, TensorFlow raises errors instead of promoting types for mixed type operations.\n", + "- Running `tf.numpy.experimental_enable_numpy_behavior()` switches TensorFlow to use [NumPy type promotion rules](https://www.tensorflow.org/guide/tf_numpy#type_promotion).\n", + "- **This doc** describes two new options that will be available in TensorFlow 2.15 (or currently in `tf-nightly`):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vMvEKDFOsau7" + }, + "outputs": [], + "source": [ + "!pip install -q tf_nightly" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a6hOFBfPsd3y" + }, + "source": [ + " **Note**: `experimental_enable_numpy_behavior` changes the behavior of all of TensorFlow." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ob1HNwUmYR5b" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AJR558zjAZQu" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import tensorflow as tf\n", + "import tensorflow.experimental.numpy as tnp\n", + "\n", + "print(\"Using TensorFlow version %s\" % tf.__version__)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "M6tacoy0DU6e" + }, + "source": [ + "### Enabling the new type promotion\n", + "\n", + "In order to use the [JAX-like type promotion](https://jax.readthedocs.io/en/latest/type_promotion.html) in TF-Numpy, specify either `'all'` or `'safe'` as the dtype conversion mode when enabling NumPy behavior for TensorFlow.\n", + "\n", + "This new system (with `dtype_conversion_mode=\"all\"`) is associative, commutative, and makes it easy to control what width of float you end up with (it doesn't automatically convert to wider floats). It does introduce some risks of overflows and precision loss, but `dtype_conversion_mode=\"safe\"` forces you to handle those cases explicitly. The two modes are explained more in detail in the [next section](#two_modes)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TfCyofpFDQxm" + }, + "outputs": [], + "source": [ + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sEMXK8-ZWMun" + }, + "source": [ + "\n", + "\n", + "## Two Modes : ALL mode vs SAFE mode\n", + "\n", + "In the new type promotion system, we introduce two modes: `ALL` mode and `SAFE` mode. `SAFE` mode is used to mitigate the concerns of \"risky\" promotions that can result in precision loss or bit-widening." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-ULvTWj_KnHU" + }, + "source": [ + "### Dtypes\n", + "\n", + "We will be using the following abbreviations for brevity.\n", + "\n", + "* `b` means `tf.bool`\n", + "* `u8` means `tf.uint8`\n", + "* `i16` means `tf.int16`\n", + "* `i32` means `tf.int32`\n", + "* `bf16` means `tf.bfloat16`\n", + "* `f32` means `tf.float32`\n", + "* `f64` means `tf.float64`\n", + "* `i32*` means Python `int` or weakly-typed `i32`\n", + "* `f32*` means Python `float` or weakly-typed `f32`\n", + "* `c128*` means Python `complex` or weakly-typed `c128`\n", + "\n", + "The asterisk (*) denotes that the corresponding type is “weak” - such a dtype is temporarily inferred by the system, and could defer to other dtypes. This concept is explained more in detail [here](#weak_tensor)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hXZxLCkuzzq3" + }, + "source": [ + "### Example of precision losing operations\n", + "\n", + "In the following example, `i32` + `f32` is allowed in `ALL` mode but\n", + "not in `SAFE` mode due to the risk of precision loss." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Y-yeIvstWStL" + }, + "outputs": [], + "source": [ + "# i32 + f32 returns a f32 result in ALL mode.\n", + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")\n", + "a = tf.constant(10, dtype = tf.int32)\n", + "b = tf.constant(5.0, dtype = tf.float32)\n", + "a + b # " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JNNmZow2WY3G" + }, + "outputs": [], + "source": [ + "# This promotion is not allowed in SAFE mode.\n", + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"safe\")\n", + "a = tf.constant(10, dtype = tf.int32)\n", + "b = tf.constant(5.0, dtype = tf.float32)\n", + "try:\n", + " a + b\n", + "except TypeError as e:\n", + " print(f'{type(e)}: {e}') # TypeError: explicitly specify the dtype or switch to ALL mode." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f0x4Qhff0AKS" + }, + "source": [ + "### Example of bit-widening operations\n", + "\n", + "In the following example, `i8` + `u32` is allowed in `ALL` mode but\n", + "not in `SAFE` mode due to bit-widening, which means using more bits than the number of bits in the inputs. Note that the new type promotion semantics only allows necessary bit-widening." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Etbv-WoWzUXf" + }, + "outputs": [], + "source": [ + "# i8 + u32 returns an i64 result in ALL mode.\n", + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")\n", + "a = tf.constant(10, dtype = tf.int8)\n", + "b = tf.constant(5, dtype = tf.uint32)\n", + "a + b" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yKRdvtvw0Lvt" + }, + "outputs": [], + "source": [ + "# This promotion is not allowed in SAFE mode.\n", + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"safe\")\n", + "a = tf.constant(10, dtype = tf.int8)\n", + "b = tf.constant(5, dtype = tf.uint32)\n", + "try:\n", + " a + b\n", + "except TypeError as e:\n", + " print(f'{type(e)}: {e}') # TypeError: explicitly specify the dtype or switch to ALL mode." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yh2BwqUzH3C3" + }, + "source": [ + "## A System Based on a Lattice" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HHUnfTPiYVN5" + }, + "source": [ + "### Type Promotion Lattice\n", + "\n", + "The new type promotion behavior is determined via the following type promotion lattice:\n", + "\n", + "![Type Promotion Lattice](https://tensorflow.org/guide/images/new_type_promotion/type_promotion_lattice.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QykluwRyDDle" + }, + "source": [ + "More specifically, promotion between any two types is determined by finding the first common child of the two nodes (including the nodes themselves).\n", + "\n", + "For example, in the diagram above, the first common child of `i8` and `i32` is `i32` because the two nodes intersect for the first time at `i32` when following the direction of the arrows.\n", + "\n", + "Similarly as another example, the result promotion type between `u64` and `f16` would be `f16`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nthziRHaDAUY" + }, + "source": [ + "\n", + "\n", + "### Type Promotion Table\n", + "\n", + "Following the lattice generates the binary promotion table below:\n", + "\n", + "**Note**: `SAFE` mode disallows the highlighted cells. `ALL` mode allows all cases.\n", + "\n", + "![Type Promotion Table](https://tensorflow.org/guide/images/new_type_promotion/type_promotion_table.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TPDt5QTkucSC" + }, + "source": [ + "## Advantages of The New Type Promotion\n", + "\n", + "We adopt a JAX-like lattice-based system for our new type promotion, which offers the following advantages:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NUS_b13nue1p" + }, + "source": [ + "\n", + "\n", + "#### Advantages of Lattice-Based System\n", + "\n", + "First, using a lattice-based system ensures three very important properties:\n", + "\n", + "* Existence: There is a unique result promotion type for any combinations of types.\n", + "* Commutativity: `a + b = b + a`\n", + "* Associativity: `a + (b + c) = (a + b) = c`\n", + "\n", + "These three properties are critical for constructing a type promotion semantics that is consistent and predictable." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Sz88hRR6uhls" + }, + "source": [ + "#### Advantages of JAX-like Lattice System\n", + "\n", + "Another crucial advantage of the JAX-like lattice system is that outside unsigned ints, it avoids all wider-than-necessary promotions. This means you cannot get 64-bit results without 64-bit inputs. This is especially beneficial for working on accelerators as it avoids unnecessary 64-bit values, which was frequent in the old type promotion." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rlylb7ieOVbJ" + }, + "source": [ + "However, this comes with a trade-off: mixed float/integer promotion is very prone to precision loss. For instance, in the example below, `i64` + `f16` results in promoting `i64` to `f16`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "abqIkV02OXEF" + }, + "outputs": [], + "source": [ + "# The first input is promoted to f16 in ALL mode.\n", + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")\n", + "tf.constant(1, tf.int64) + tf.constant(3.2, tf.float16) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mYnh1gZdObfI" + }, + "source": [ + "To migitage this concern, we introduced a `SAFE` mode that will disallow these \"risky\" promotions.\n", + "\n", + "**Note**: To learn more about the design considerations in constructing the lattice system, please refer to the [Design of Type Promotion Semantics for JAX](https://jax.readthedocs.io/en/latest/jep/9407-type-promotion.html)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gAc7LFV0S2dP" + }, + "source": [ + "\n", + "\n", + "## WeakTensor" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "olQ2gsFlS9BH" + }, + "source": [ + "### Overview\n", + "\n", + "*Weak tensors* are Tensors that are \"weakly typed\", similar to a [concept in JAX](https://jax.readthedocs.io/en/latest/type_promotion.html#weakly-typed-values-in-jax).\n", + "\n", + "`WeakTensor`'s dtype is temporarily inferred by the system, and could defer to other dtypes. This concept is introduced in the new type promotion to prevent unwanted type promotion within binary operations between TF values and values with no explicitly user-specified type, such as Python scalar literals." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MYmoFIqZTFtw" + }, + "source": [ + "For instance, in the example below, `tf.constant(1.2)` is considered \"weak\" because it doesn't have a specific dtype. Therefore, `tf.constant(1.2)` defers to the type of `tf.constant(3.1, tf.float16)`, resulting in a `f16` output." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eSBv_mzyTE97" + }, + "outputs": [], + "source": [ + "tf.constant(1.2) + tf.constant(3.1, tf.float16) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KxuqBIFuTm5Z" + }, + "source": [ + "### WeakTensor Construction\n", + "\n", + "WeakTensors are created if you create a tensor without specifying a dtype the result is a WeakTensor. You can check whether a Tensor is \"weak\" or not by checking the weak attribute at the end of the Tensor's string representation." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7UmunnJ8True3" + }, + "source": [ + "**First Case**: When `tf.constant` is called with an input with no user-specified dtype." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fLEtMluNTsI5" + }, + "outputs": [], + "source": [ + "tf.constant(5) # " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZQX6MBWHTt__" + }, + "outputs": [], + "source": [ + "tf.constant([5.0, 10.0, 3]) # " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ftsKSC5BTweP" + }, + "outputs": [], + "source": [ + "# A normal Tensor is created when dtype arg is specified.\n", + "tf.constant(5, tf.int32) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RqhoRy5iTyag" + }, + "source": [ + "**Second Case**: When an input with no user-specified dtype is passed into a [WeakTensor-supporting API](#weak_tensor_apis)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DuwpgoQJTzE-" + }, + "outputs": [], + "source": [ + "tf.math.abs([100.0, 4.0]) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UTcoR1xvR39k" + }, + "source": [ + "##Effects of turning on the new type promotion\n", + "\n", + "Below is a non-exhaustive list of changes that result from turning on the new type promotion.\n", + "\n", + "* More consistent and predictable promotion results.\n", + "* Reduced risk of bit-widening.\n", + "* `tf.Tensor` mathematical dunder methods use new type promotion.\n", + "* `tf.constant` can return `WeakTensor`.\n", + "* `tf.constant` allows implicit conversions when a Tensor input with a dtype different from the `dtype` arg is passed in.\n", + "* `tf.Variable` in-place ops (`assign`, `assign-add`, `assign-sub`) allow implicit conversions.\n", + "* `tnp.array(1)` and `tnp.array(1.0)` returns 32-bit WeakTensor.\n", + "* `WeakTensor`s will be created and used for [WeakTensor-supporting unary and binary API](#weak_tensor_apis)'s.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KyvonwYcsFX2" + }, + "source": [ + "### More consistent and predictable promotion results\n", + "\n", + "Using a [lattice-based system](#lattice_system_design) allows the new type promotion to produce consistent and predictable type promotion results." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "q0Z1njfb7lRa" + }, + "source": [ + "#### Old Type Promotion\n", + "\n", + "Changing the order of operations produces inconsistent results using old type promotion." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "M1Ca9v4m7z8e" + }, + "outputs": [], + "source": [ + "# Setup\n", + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"legacy\")\n", + "a = np.array(1, dtype=np.int8)\n", + "b = tf.constant(1)\n", + "c = np.array(1, dtype=np.float16)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WwhTzJ-a4rTc" + }, + "outputs": [], + "source": [ + "# (a + b) + c throws an InvalidArgumentError.\n", + "try:\n", + " tf.add(tf.add(a, b), c)\n", + "except tf.errors.InvalidArgumentError as e:\n", + " print(f'{type(e)}: {e}') # InvalidArgumentError" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "d3qDgVYn7ezT" + }, + "outputs": [], + "source": [ + "# (b + a) + c returns an i32 result.\n", + "tf.add(tf.add(b, a), c) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YMH1skEs7oI5" + }, + "source": [ + "#### New Type Promotion\n", + "\n", + "New type promotion produces consistent results regardless of the order." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BOHyJJ8z8uCN" + }, + "outputs": [], + "source": [ + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")\n", + "a = np.array(1, dtype=np.int8)\n", + "b = tf.constant(1)\n", + "c = np.array(1, dtype=np.float16)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZUKU70jf7E1l" + }, + "outputs": [], + "source": [ + "# (a + b) + c returns a f16 result.\n", + "tf.add(tf.add(a, b), c) # " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YOEycjFx7qDn" + }, + "outputs": [], + "source": [ + "# (b + a) + c also returns a f16 result.\n", + "tf.add(tf.add(b, a), c) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FpGMkm6aJsn6" + }, + "source": [ + "### Reduced risk of bit-widening" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JxV2AL-U9Grg" + }, + "source": [ + "#### Old Type Promotion\n", + "\n", + "Old type promotion often resulted in 64-bit results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7L1pxyvn9MlP" + }, + "outputs": [], + "source": [ + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"legacy\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zMJVFdWf4XHp" + }, + "outputs": [], + "source": [ + "np.array(3.2, np.float16) + tf.constant(1, tf.int8) + tf.constant(50) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fBhUH_wD9Is7" + }, + "source": [ + "#### New Type Promotion\n", + "\n", + "New type promotion returns results with minimal number of bits necessary." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aJsj2ZyI9T9Y" + }, + "outputs": [], + "source": [ + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jj0N_Plp4X9l" + }, + "outputs": [], + "source": [ + "np.array(3.2, np.float16) + tf.constant(1, tf.int8) + tf.constant(50) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yKUx7xe-KZ5O" + }, + "source": [ + "### tf.Tensor mathematical dunder methods\n", + "\n", + "All `tf.Tensor` mathematical dunder methods will follow the new type promotion." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2c3icBUX4wNl" + }, + "outputs": [], + "source": [ + "-tf.constant(5) # " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ydJHQjid45s7" + }, + "outputs": [], + "source": [ + "tf.constant(5, tf.int16) - tf.constant(1, tf.float32) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pLbIjIvbKqcU" + }, + "source": [ + "### tf.Variable in-place ops\n", + "\n", + "Implicit conversions will be allowed in `tf.Variable` in-place ops.\n", + "\n", + "**Note**: Any promotion that results in a dtype that is different from the variable's original dtype will be not allowed. This is because `tf.Variable` cannot change its dtype." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QsXhyK1h-i5S" + }, + "outputs": [], + "source": [ + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")\n", + "a = tf.Variable(10, tf.int32)\n", + "a.assign_add(tf.constant(5, tf.int16)) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PiA4H-otLDit" + }, + "source": [ + "### tf.constant implicit conversions\n", + "\n", + "In the old type promotion, `tf.constant` required an input Tensor to have the same dtype as the dtype argument. However, in the new type promotion, we implicitly convert Tensor to the specified dtype." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ArrQ9Dj0_OR8" + }, + "outputs": [], + "source": [ + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")\n", + "a = tf.constant(10, tf.int16)\n", + "tf.constant(a, tf.float32) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WAcK_-XnLWaP" + }, + "source": [ + "### TF-NumPy Array\n", + "\n", + "`tnp.array` defaults to `i32*` and `f32*` for python inputs using the new type promotion." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "K1pZnYNh_ahm" + }, + "outputs": [], + "source": [ + "tnp.array(1) # " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QoQl2PYP_fMT" + }, + "outputs": [], + "source": [ + "tnp.array(1.0) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wK5DpQ3Pz3k5" + }, + "source": [ + "##Input Type Inference\n", + "\n", + "This is how different inputs' types are inferred in the new type promotion.\n", + "\n", + "\n", + "* `tf.Tensor`: Since `tf.Tensor` has a dtype property, we don't do further inference.\n", + "* NumPy types: This includes types like `np.array(1)`, `np.int16(1)`, and `np.float`. Since NumPy inputs also have a dtype property, we take the dtype property as the result inference type. Note that NumPy defaults to `i64` and `f64`.\n", + "* Python scalars/Nested types: This includes types like `1`, `[1, 2, 3]`, and `(1.0, 2.0)`.\n", + " * Python `int` is inferred as `i32*`.\n", + " * Python `float` is inferred as `f32*`.\n", + " * Python `complex` is inferred as `c128*`.\n", + "* If the input doesn't fall into any of the above categories but has a dtype property, we take the dtype property as the result inference type." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "g_SPfalfSPgg" + }, + "source": [ + "# Further Reading\n", + "\n", + "The new type promotion closely resembles JAX-NumPy's type promotion. If you want to know more details about the new type promotion and the design choices, check out the resources below.\n", + "\n", + "* [JAX Type Promotion Semantics](https://jax.readthedocs.io/en/latest/type_promotion.html)\n", + "* [Design of Type Promotion Semantics for JAX](https://jax.readthedocs.io/en/latest/jep/9407-type-promotion.html)\n", + "* [Old TF-NumPy Promotion Semantics](https://www.tensorflow.org/guide/tf_numpy#type_promotion)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Qg5xBbImT31S" + }, + "source": [ + "# References" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gjB0CVhVXBfW" + }, + "source": [ + "\n", + "\n", + "## WeakTensor-supporting APIs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_GVbqlN9aBS2" + }, + "source": [ + "Below is a list of APIs that supports `WeakTensor`.\n", + "\n", + "For an unary op, this means that if an input with no user-specified type is passed in, it will return a `WeakTensor`.\n", + "\n", + "For a binary op, it will follow the promotion table [here](#promotion_table). It may or may not return a `WeakTensor` depending on the promotion result of the two inputs.\n", + "\n", + "**Note**: All mathematical operations (`+`, `-`, `*`, ...) are supported." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Gi-G68Z8WN2P" + }, + "source": [ + "* `tf.bitwise.invert`\n", + "* `tf.clip_by_value`\n", + "* `tf.debugging.check_numerics`\n", + "* `tf.expand_dims`\n", + "* `tf.identity`\n", + "* `tf.image.adjust_brightness`\n", + "* `tf.image.adjust_gamma`\n", + "* `tf.image.extract_patches`\n", + "* `tf.image.random_brightness`\n", + "* `tf.image.stateless_random_brightness`\n", + "* `tf.linalg.diag`\n", + "* `tf.linalg.diag_part`\n", + "* `tf.linalg.matmul`\n", + "* `tf.linalg.matrix_transpose`\n", + "* `tf.linalg.tensor_diag_part`\n", + "* `tf.linalg.trace`\n", + "* `tf.math.abs`\n", + "* `tf.math.acos`\n", + "* `tf.math.acosh`\n", + "* `tf.math.add`\n", + "* `tf.math.angle`\n", + "* `tf.math.asin`\n", + "* `tf.math.asinh`\n", + "* `tf.math.atan`\n", + "* `tf.math.atanh`\n", + "* `tf.math.ceil`\n", + "* `tf.math.conj`\n", + "* `tf.math.cos`\n", + "* `tf.math.cosh`\n", + "* `tf.math.digamma`\n", + "* `tf.math.divide_no_nan`\n", + "* `tf.math.divide`\n", + "* `tf.math.erf`\n", + "* `tf.math.erfc`\n", + "* `tf.math.erfcinv`\n", + "* `tf.math.erfinv`\n", + "* `tf.math.exp`\n", + "* `tf.math.expm1`\n", + "* `tf.math.floor`\n", + "* `tf.math.floordiv`\n", + "* `tf.math.floormod`\n", + "* `tf.math.imag`\n", + "* `tf.math.lgamma`\n", + "* `tf.math.log1p`\n", + "* `tf.math.log_sigmoid`\n", + "* `tf.math.log`\n", + "* `tf.math.multiply_no_nan`\n", + "* `tf.math.multiply`\n", + "* `tf.math.ndtri`\n", + "* `tf.math.negative`\n", + "* `tf.math.pow`\n", + "* `tf.math.real`\n", + "* `tf.math.real`\n", + "* `tf.math.reciprocal_no_nan`\n", + "* `tf.math.reciprocal`\n", + "* `tf.math.reduce_euclidean_norm`\n", + "* `tf.math.reduce_logsumexp`\n", + "* `tf.math.reduce_max`\n", + "* `tf.math.reduce_mean`\n", + "* `tf.math.reduce_min`\n", + "* `tf.math.reduce_prod`\n", + "* `tf.math.reduce_std`\n", + "* `tf.math.reduce_sum`\n", + "* `tf.math.reduce_variance`\n", + "* `tf.math.rint`\n", + "* `tf.math.round`\n", + "* `tf.math.rsqrt`\n", + "* `tf.math.scalar_mul`\n", + "* `tf.math.sigmoid`\n", + "* `tf.math.sign`\n", + "* `tf.math.sin`\n", + "* `tf.math.sinh`\n", + "* `tf.math.softplus`\n", + "* `tf.math.special.bessel_i0`\n", + "* `tf.math.special.bessel_i0e`\n", + "* `tf.math.special.bessel_i1`\n", + "* `tf.math.special.bessel_i1e`\n", + "* `tf.math.special.bessel_j0`\n", + "* `tf.math.special.bessel_j1`\n", + "* `tf.math.special.bessel_k0`\n", + "* `tf.math.special.bessel_k0e`\n", + "* `tf.math.special.bessel_k1`\n", + "* `tf.math.special.bessel_k1e`\n", + "* `tf.math.special.bessel_y0`\n", + "* `tf.math.special.bessel_y1`\n", + "* `tf.math.special.dawsn`\n", + "* `tf.math.special.expint`\n", + "* `tf.math.special.fresnel_cos`\n", + "* `tf.math.special.fresnel_sin`\n", + "* `tf.math.special.spence`\n", + "* `tf.math.sqrt`\n", + "* `tf.math.square`\n", + "* `tf.math.subtract`\n", + "* `tf.math.tan`\n", + "* `tf.math.tanh`\n", + "* `tf.nn.depth_to_space`\n", + "* `tf.nn.elu`\n", + "* `tf.nn.gelu`\n", + "* `tf.nn.leaky_relu`\n", + "* `tf.nn.log_softmax`\n", + "* `tf.nn.relu6`\n", + "* `tf.nn.relu`\n", + "* `tf.nn.selu`\n", + "* `tf.nn.softsign`\n", + "* `tf.nn.space_to_depth`\n", + "* `tf.nn.swish`\n", + "* `tf.ones_like`\n", + "* `tf.realdiv`\n", + "* `tf.reshape`\n", + "* `tf.squeeze`\n", + "* `tf.stop_gradient`\n", + "* `tf.transpose`\n", + "* `tf.truncatediv`\n", + "* `tf.truncatemod`\n", + "* `tf.zeros_like`\n", + "* `tf.experimental.numpy.abs`\n", + "* `tf.experimental.numpy.absolute`\n", + "* `tf.experimental.numpy.amax`\n", + "* `tf.experimental.numpy.amin`\n", + "* `tf.experimental.numpy.angle`\n", + "* `tf.experimental.numpy.arange`\n", + "* `tf.experimental.numpy.arccos`\n", + "* `tf.experimental.numpy.arccosh`\n", + "* `tf.experimental.numpy.arcsin`\n", + "* `tf.experimental.numpy.arcsinh`\n", + "* `tf.experimental.numpy.arctan`\n", + "* `tf.experimental.numpy.arctanh`\n", + "* `tf.experimental.numpy.around`\n", + "* `tf.experimental.numpy.array`\n", + "* `tf.experimental.numpy.asanyarray`\n", + "* `tf.experimental.numpy.asarray`\n", + "* `tf.experimental.numpy.ascontiguousarray`\n", + "* `tf.experimental.numpy.average`\n", + "* `tf.experimental.numpy.bitwise_not`\n", + "* `tf.experimental.numpy.cbrt`\n", + "* `tf.experimental.numpy.ceil`\n", + "* `tf.experimental.numpy.conj`\n", + "* `tf.experimental.numpy.conjugate`\n", + "* `tf.experimental.numpy.copy`\n", + "* `tf.experimental.numpy.cos`\n", + "* `tf.experimental.numpy.cosh`\n", + "* `tf.experimental.numpy.cumprod`\n", + "* `tf.experimental.numpy.cumsum`\n", + "* `tf.experimental.numpy.deg2rad`\n", + "* `tf.experimental.numpy.diag`\n", + "* `tf.experimental.numpy.diagflat`\n", + "* `tf.experimental.numpy.diagonal`\n", + "* `tf.experimental.numpy.diff`\n", + "* `tf.experimental.numpy.empty_like`\n", + "* `tf.experimental.numpy.exp2`\n", + "* `tf.experimental.numpy.exp`\n", + "* `tf.experimental.numpy.expand_dims`\n", + "* `tf.experimental.numpy.expm1`\n", + "* `tf.experimental.numpy.fabs`\n", + "* `tf.experimental.numpy.fix`\n", + "* `tf.experimental.numpy.flatten`\n", + "* `tf.experimental.numpy.flip`\n", + "* `tf.experimental.numpy.fliplr`\n", + "* `tf.experimental.numpy.flipud`\n", + "* `tf.experimental.numpy.floor`\n", + "* `tf.experimental.numpy.full_like`\n", + "* `tf.experimental.numpy.imag`\n", + "* `tf.experimental.numpy.log10`\n", + "* `tf.experimental.numpy.log1p`\n", + "* `tf.experimental.numpy.log2`\n", + "* `tf.experimental.numpy.log`\n", + "* `tf.experimental.numpy.max`\n", + "* `tf.experimental.numpy.mean`\n", + "* `tf.experimental.numpy.min`\n", + "* `tf.experimental.numpy.moveaxis`\n", + "* `tf.experimental.numpy.nanmean`\n", + "* `tf.experimental.numpy.negative`\n", + "* `tf.experimental.numpy.ones_like`\n", + "* `tf.experimental.numpy.positive`\n", + "* `tf.experimental.numpy.prod`\n", + "* `tf.experimental.numpy.rad2deg`\n", + "* `tf.experimental.numpy.ravel`\n", + "* `tf.experimental.numpy.real`\n", + "* `tf.experimental.numpy.reciprocal`\n", + "* `tf.experimental.numpy.repeat`\n", + "* `tf.experimental.numpy.reshape`\n", + "* `tf.experimental.numpy.rot90`\n", + "* `tf.experimental.numpy.round`\n", + "* `tf.experimental.numpy.signbit`\n", + "* `tf.experimental.numpy.sin`\n", + "* `tf.experimental.numpy.sinc`\n", + "* `tf.experimental.numpy.sinh`\n", + "* `tf.experimental.numpy.sort`\n", + "* `tf.experimental.numpy.sqrt`\n", + "* `tf.experimental.numpy.square`\n", + "* `tf.experimental.numpy.squeeze`\n", + "* `tf.experimental.numpy.std`\n", + "* `tf.experimental.numpy.sum`\n", + "* `tf.experimental.numpy.swapaxes`\n", + "* `tf.experimental.numpy.tan`\n", + "* `tf.experimental.numpy.tanh`\n", + "* `tf.experimental.numpy.trace`\n", + "* `tf.experimental.numpy.transpose`\n", + "* `tf.experimental.numpy.triu`\n", + "* `tf.experimental.numpy.vander`\n", + "* `tf.experimental.numpy.var`\n", + "* `tf.experimental.numpy.zeros_like`" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "tf_numpy_type_promotion.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/tpu.ipynb b/site/en/guide/tpu.ipynb index f64450ba04c..49eee544bec 100644 --- a/site/en/guide/tpu.ipynb +++ b/site/en/guide/tpu.ipynb @@ -6,7 +6,7 @@ "id": "Tce3stUlHN0L" }, "source": [ - "##### Copyright 2018 The TensorFlow Authors.\n" + "##### Copyright 2024 The TensorFlow Authors.\n" ] }, { @@ -61,7 +61,9 @@ "id": "Ys81cOhXOWUP" }, "source": [ - "Before you run this Colab notebook, make sure that your hardware accelerator is a TPU by checking your notebook settings: **Runtime** > **Change runtime type** > **Hardware accelerator** > **TPU**." + "This guide demonstrates how to perform basic training on [Tensor Processing Units (TPUs)](https://cloud.google.com/tpu/) and TPU Pods, a collection of TPU devices connected by dedicated high-speed network interfaces, with `tf.keras` and custom training loops.\n", + "\n", + "TPUs are Google's custom-developed application-specific integrated circuits (ASICs) used to accelerate machine learning workloads. They are available through [Google Colab](https://colab.research.google.com/), the [TPU Research Cloud](https://sites.research.google/trc/), and [Cloud TPU](https://cloud.google.com/tpu)." ] }, { @@ -73,6 +75,17 @@ "## Setup" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "ebf7f8489bb7" + }, + "source": [ + "Before you run this Colab notebook, make sure that your hardware accelerator is a TPU by checking your notebook settings: **Runtime** > **Change runtime type** > **Hardware accelerator** > **TPU v2**.\n", + "\n", + "Import some necessary libraries, including TensorFlow Datasets:" + ] + }, { "cell_type": "code", "execution_count": null, @@ -95,7 +108,7 @@ "source": [ "## TPU initialization\n", "\n", - "TPUs are typically Cloud TPU workers, which are different from the local process running the user's Python program. Thus, you need to do some initialization work to connect to the remote cluster and initialize the TPUs. Note that the `tpu` argument to `tf.distribute.cluster_resolver.TPUClusterResolver` is a special address just for Colab. If you are running your code on Google Compute Engine (GCE), you should instead pass in the name of your Cloud TPU." + "TPUs are typically [Cloud TPU](https://cloud.google.com/tpu/docs/) workers, which are different from the local process running the user's Python program. Thus, you need to do some initialization work to connect to the remote cluster and initialize the TPUs. Note that the `tpu` argument to `tf.distribute.cluster_resolver.TPUClusterResolver` is a special address just for Colab. If you are running your code on Google Compute Engine (GCE), you should instead pass in the name of your Cloud TPU." ] }, { @@ -115,7 +128,7 @@ }, "outputs": [], "source": [ - "resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')\n", + "resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='local')\n", "tf.config.experimental_connect_to_cluster(resolver)\n", "# This is the TPU initialization code that has to be at the beginning.\n", "tf.tpu.experimental.initialize_tpu_system(resolver)\n", @@ -159,7 +172,7 @@ "source": [ "## Distribution strategies\n", "\n", - "Usually you run your model on multiple TPUs in a data-parallel way. To distribute your model on multiple TPUs (or other accelerators), TensorFlow offers several distribution strategies. You can replace your distribution strategy and the model will run on any given (TPU) device. Check the [distribution strategy guide](./distributed_training.ipynb) for more information." + "Usually, you run your model on multiple TPUs in a data-parallel way. To distribute your model on multiple TPUs (as well as multiple GPUs or multiple machines), TensorFlow offers the `tf.distribute.Strategy` API. You can replace your distribution strategy and the model will run on any given (TPU) device. Learn more in the [Distributed training with TensorFlow](./distributed_training.ipynb) guide." ] }, { @@ -168,6 +181,8 @@ "id": "DcDPMZs-9uLJ" }, "source": [ + "Using the `tf.distribute.TPUStrategy` option implements synchronous distributed training. TPUs provide their own implementation of efficient all-reduce and other collective operations across multiple TPU cores, which are used in `TPUStrategy`.\n", + "\n", "To demonstrate this, create a `tf.distribute.TPUStrategy` object:" ] }, @@ -188,7 +203,7 @@ "id": "JlaAmswWPsU6" }, "source": [ - "To replicate a computation so it can run in all TPU cores, you can pass it into the `strategy.run` API. Below is an example that shows all cores receiving the same inputs `(a, b)` and performing matrix multiplication on each core independently. The outputs will be the values from all the replicas." + "To replicate a computation so it can run in all TPU cores, you can pass it into the `Strategy.run` API. Below is an example that shows all cores receiving the same inputs `(a, b)` and performing matrix multiplication on each core independently. The outputs will be the values from all the replicas." ] }, { @@ -216,7 +231,7 @@ "source": [ "## Classification on TPUs\n", "\n", - "Having covered the basic concepts, consider a more concrete example. This section demonstrates how to use the distribution strategy—`tf.distribute.TPUStrategy`—to train a Keras model on a Cloud TPU.\n" + "Having covered the basic concepts, consider a more concrete example. This section demonstrates how to use the distribution strategy—`tf.distribute.TPUStrategy`—to train a Keras model on a Cloud TPU." ] }, { @@ -227,7 +242,7 @@ "source": [ "### Define a Keras model\n", "\n", - "Start with a definition of a `Sequential` Keras model for image classification on the MNIST dataset using Keras. It's no different than what you would use if you were training on CPUs or GPUs. Note that Keras model creation needs to be inside `strategy.scope`, so the variables can be created on each TPU device. Other parts of the code are not necessary to be inside the strategy scope." + "Start with a definition of a [`Sequential` Keras model](https://www.tensorflow.org/guide/keras/sequential_model) for image classification on the MNIST dataset. It's no different than what you would use if you were training on CPUs or GPUs. Note that Keras model creation needs to be inside the `Strategy.scope`, so the variables can be created on each TPU device. Other parts of the code are not necessary to be inside the `Strategy` scope." ] }, { @@ -239,13 +254,32 @@ "outputs": [], "source": [ "def create_model():\n", + " regularizer = tf.keras.regularizers.L2(1e-5)\n", " return tf.keras.Sequential(\n", - " [tf.keras.layers.Conv2D(256, 3, activation='relu', input_shape=(28, 28, 1)),\n", - " tf.keras.layers.Conv2D(256, 3, activation='relu'),\n", + " [tf.keras.layers.Conv2D(256, 3, input_shape=(28, 28, 1),\n", + " activation='relu',\n", + " kernel_regularizer=regularizer),\n", + " tf.keras.layers.Conv2D(256, 3,\n", + " activation='relu',\n", + " kernel_regularizer=regularizer),\n", " tf.keras.layers.Flatten(),\n", - " tf.keras.layers.Dense(256, activation='relu'),\n", - " tf.keras.layers.Dense(128, activation='relu'),\n", - " tf.keras.layers.Dense(10)])" + " tf.keras.layers.Dense(256,\n", + " activation='relu',\n", + " kernel_regularizer=regularizer),\n", + " tf.keras.layers.Dense(128,\n", + " activation='relu',\n", + " kernel_regularizer=regularizer),\n", + " tf.keras.layers.Dense(10,\n", + " kernel_regularizer=regularizer)])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h-2qaXgfyONQ" + }, + "source": [ + "This model puts L2 regularization terms on the weights of each layer, so that the custom training loop below can show how you pick them up from `Model.losses`." ] }, { @@ -256,9 +290,9 @@ "source": [ "### Load the dataset\n", "\n", - "Efficient use of the `tf.data.Dataset` API is critical when using a Cloud TPU, as it is impossible to use the Cloud TPUs unless you can feed them data quickly enough. You can learn more about dataset performance in the [Input pipeline performance guide](./data_performance.ipynb).\n", + "Efficient use of the `tf.data.Dataset` API is critical when using a Cloud TPU. You can learn more about dataset performance in the [Input pipeline performance guide](./data_performance.ipynb).\n", "\n", - "For all but the simplest experiments (using `tf.data.Dataset.from_tensor_slices` or other in-graph data), you need to store all data files read by the Dataset in Google Cloud Storage (GCS) buckets.\n", + "If you are using [TPU Nodes](https://cloud.google.com/tpu/docs/managing-tpus-tpu-vm), you need to store all data files read by the TensorFlow `Dataset` in [Google Cloud Storage (GCS) buckets](https://cloud.google.com/tpu/docs/storage-buckets). If you are using [TPU VMs](https://cloud.google.com/tpu/docs/users-guide-tpu-vm), you can store data wherever you like. For more information on TPU Nodes and TPU VMs, refer to the [TPU System Architecture](https://cloud.google.com/tpu/docs/system-architecture-tpu-vm) documentation.\n", "\n", "For most use cases, it is recommended to convert your data into the `TFRecord` format and use a `tf.data.TFRecordDataset` to read it. Check the [TFRecord and tf.Example tutorial](../tutorials/load_data/tfrecord.ipynb) for details on how to do this. It is not a hard requirement and you can use other dataset readers, such as `tf.data.FixedLengthRecordDataset` or `tf.data.TextLineDataset`.\n", "\n", @@ -266,7 +300,7 @@ "\n", "Regardless of the data format used, it is strongly recommended that you use large files on the order of 100MB. This is especially important in this networked setting, as the overhead of opening a file is significantly higher.\n", "\n", - "As shown in the code below, you should use the `tensorflow_datasets` module to get a copy of the MNIST training and test data. Note that `try_gcs` is specified to use a copy that is available in a public GCS bucket. If you don't specify this, the TPU will not be able to access the downloaded data. " + "As shown in the code below, you should use the Tensorflow Datasets `tfds.load` module to get a copy of the MNIST training and test data. Note that `try_gcs` is specified to use a copy that is available in a public GCS bucket. If you don't specify this, the TPU will not be able to access the downloaded data." ] }, { @@ -311,7 +345,7 @@ "source": [ "### Train the model using Keras high-level APIs\n", "\n", - "You can train your model with Keras `fit` and `compile` APIs. There is nothing TPU-specific in this step—you write the code as if you were using mutliple GPUs and a `MirroredStrategy` instead of the `TPUStrategy`. You can learn more in the [Distributed training with Keras](https://www.tensorflow.org/tutorials/distribute/keras) tutorial." + "You can train your model with Keras `Model.fit` and `Model.compile` APIs. There is nothing TPU-specific in this step—you write the code as if you were using multiple GPUs and a `MirroredStrategy` instead of the `TPUStrategy`. You can learn more in the [Distributed training with Keras](../tutorials/distribute/keras.ipynb) tutorial." ] }, { @@ -338,7 +372,7 @@ "model.fit(train_dataset,\n", " epochs=5,\n", " steps_per_epoch=steps_per_epoch,\n", - " validation_data=test_dataset, \n", + " validation_data=test_dataset,\n", " validation_steps=validation_steps)" ] }, @@ -348,7 +382,7 @@ "id": "8hSGBIYtUugJ" }, "source": [ - "To reduce Python overhead and maximize the performance of your TPU, pass in the argument—`steps_per_execution`—to `Model.compile`. In this example, it increases throughput by about 50%:" + "To reduce Python overhead and maximize the performance of your TPU, pass in the `steps_per_execution` argument to Keras `Model.compile`. In this example, it increases throughput by about 50%:" ] }, { @@ -382,7 +416,7 @@ "source": [ "### Train the model using a custom training loop\n", "\n", - "You can also create and train your model using `tf.function` and `tf.distribute` APIs directly. You can use the `strategy.experimental_distribute_datasets_from_function` API to distribute the dataset given a dataset function. Note that in the example below the batch size passed into the dataset is the per-replica batch size instead of the global batch size. To learn more, check out the [Custom training with tf.distribute.Strategy](https://www.tensorflow.org/tutorials/distribute/custom_training) tutorial.\n" + "You can also create and train your model using `tf.function` and `tf.distribute` APIs directly. You can use the `Strategy.distribute_datasets_from_function` API to distribute the `tf.data.Dataset` given a dataset function. Note that in the example below the batch size passed into the `Dataset` is the per-replica batch size instead of the global batch size. To learn more, check out the [Custom training with `tf.distribute.Strategy`](../tutorials/distribute/custom_training.ipynb) tutorial.\n" ] }, { @@ -391,7 +425,7 @@ "id": "DxdgXPAL6iFE" }, "source": [ - "First, create the model, datasets and tf.functions:" + "First, create the model, datasets and `tf.function`s:" ] }, { @@ -402,8 +436,8 @@ }, "outputs": [], "source": [ - "# Create the model, optimizer and metrics inside the strategy scope, so that the\n", - "# variables can be mirrored on each device.\n", + "# Create the model, optimizer and metrics inside the `tf.distribute.Strategy`\n", + "# scope, so that the variables can be mirrored on each device.\n", "with strategy.scope():\n", " model = create_model()\n", " optimizer = tf.keras.optimizers.Adam()\n", @@ -411,11 +445,11 @@ " training_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(\n", " 'training_accuracy', dtype=tf.float32)\n", "\n", - "# Calculate per replica batch size, and distribute the datasets on each TPU\n", - "# worker.\n", + "# Calculate per replica batch size, and distribute the `tf.data.Dataset`s\n", + "# on each TPU worker.\n", "per_replica_batch_size = batch_size // strategy.num_replicas_in_sync\n", "\n", - "train_dataset = strategy.experimental_distribute_datasets_from_function(\n", + "train_dataset = strategy.distribute_datasets_from_function(\n", " lambda _: get_dataset(per_replica_batch_size, is_training=True))\n", "\n", "@tf.function\n", @@ -427,9 +461,13 @@ " images, labels = inputs\n", " with tf.GradientTape() as tape:\n", " logits = model(images, training=True)\n", - " loss = tf.keras.losses.sparse_categorical_crossentropy(\n", + " per_example_loss = tf.keras.losses.sparse_categorical_crossentropy(\n", " labels, logits, from_logits=True)\n", - " loss = tf.nn.compute_average_loss(loss, global_batch_size=batch_size)\n", + " loss = tf.nn.compute_average_loss(per_example_loss)\n", + " model_losses = model.losses\n", + " if model_losses:\n", + " loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n", + "\n", " grads = tape.gradient(loss, model.trainable_variables)\n", " optimizer.apply_gradients(list(zip(grads, model.trainable_variables)))\n", " training_loss.update_state(loss * strategy.num_replicas_in_sync)\n", @@ -463,7 +501,7 @@ "\n", " for step in range(steps_per_epoch):\n", " train_step(train_iterator)\n", - " print('Current step: {}, training loss: {}, accuracy: {}%'.format(\n", + " print('Current step: {}, training loss: {}, training accuracy: {}%'.format(\n", " optimizer.iterations.numpy(),\n", " round(float(training_loss.result()), 4),\n", " round(float(training_accuracy.result()) * 100, 2)))\n", @@ -479,9 +517,9 @@ "source": [ "### Improving performance with multiple steps inside `tf.function`\n", "\n", - "You can improve the performance by running multiple steps within a `tf.function`. This is achieved by wrapping the `strategy.run` call with a `tf.range` inside `tf.function`, and AutoGraph will convert it to a `tf.while_loop` on the TPU worker.\n", + "You can improve the performance by running multiple steps within a `tf.function`. This is achieved by wrapping the `Strategy.run` call with a `tf.range` inside `tf.function`, and AutoGraph will convert it to a `tf.while_loop` on the TPU worker. You can learn more about `tf.function`s in the [Better performance with `tf.function`](./function.ipynb) guide.\n", "\n", - "Despite the improved performance, there are tradeoffs with this method compared to running a single step inside `tf.function`. Running multiple steps in a `tf.function` is less flexible—you cannot run things eagerly or arbitrary Python code within the steps.\n" + "Despite the improved performance, there are tradeoffs with this method compared to running a single step inside a `tf.function`. Running multiple steps in a `tf.function` is less flexible—you cannot run things eagerly or arbitrary Python code within the steps.\n" ] }, { @@ -501,9 +539,12 @@ " images, labels = inputs\n", " with tf.GradientTape() as tape:\n", " logits = model(images, training=True)\n", - " loss = tf.keras.losses.sparse_categorical_crossentropy(\n", + " per_example_loss = tf.keras.losses.sparse_categorical_crossentropy(\n", " labels, logits, from_logits=True)\n", - " loss = tf.nn.compute_average_loss(loss, global_batch_size=batch_size)\n", + " loss = tf.nn.compute_average_loss(per_example_loss)\n", + " model_losses = model.losses\n", + " if model_losses:\n", + " loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n", " grads = tape.gradient(loss, model.trainable_variables)\n", " optimizer.apply_gradients(list(zip(grads, model.trainable_variables)))\n", " training_loss.update_state(loss * strategy.num_replicas_in_sync)\n", @@ -512,11 +553,11 @@ " for _ in tf.range(steps):\n", " strategy.run(step_fn, args=(next(iterator),))\n", "\n", - "# Convert `steps_per_epoch` to `tf.Tensor` so the `tf.function` won't get \n", + "# Convert `steps_per_epoch` to `tf.Tensor` so the `tf.function` won't get\n", "# retraced if the value changes.\n", "train_multiple_steps(train_iterator, tf.convert_to_tensor(steps_per_epoch))\n", "\n", - "print('Current step: {}, training loss: {}, accuracy: {}%'.format(\n", + "print('Current step: {}, training loss: {}, training accuracy: {}%'.format(\n", " optimizer.iterations.numpy(),\n", " round(float(training_loss.result()), 4),\n", " round(float(training_accuracy.result()) * 100, 2)))" @@ -530,19 +571,27 @@ "source": [ "## Next steps\n", "\n", - "- [Google Cloud TPU documentation](https://cloud.google.com/tpu/docs/): How to set up and run a Google Cloud TPU.\n", + "To learn more about Cloud TPUs and how to use them:\n", + "\n", + "- [Google Cloud TPU](https://cloud.google.com/tpu): The Google Cloud TPU homepage.\n", + "- [Google Cloud TPU documentation](https://cloud.google.com/tpu/docs/): Google Cloud TPU documentation, which includes:\n", + " - [Introduction to Cloud TPU](https://cloud.google.com/tpu/docs/intro-to-tpu): An overview of working with Cloud TPUs.\n", + " - [Cloud TPU quickstarts](https://cloud.google.com/tpu/docs/quick-starts): Quickstart introductions to working with Cloud TPU VMs using TensorFlow and other main machine learning frameworks.\n", "- [Google Cloud TPU Colab notebooks](https://cloud.google.com/tpu/docs/colabs): End-to-end training examples.\n", "- [Google Cloud TPU performance guide](https://cloud.google.com/tpu/docs/performance-guide): Enhance Cloud TPU performance further by adjusting Cloud TPU configuration parameters for your application\n", - "- [Distributed training with TensorFlow](./distributed_training.ipynb): How to use distribution strategies—including `tf.distribute.TPUStrategy`—with examples showing best practices." + "- [Distributed training with TensorFlow](./distributed_training.ipynb): How to use distribution strategies—including `tf.distribute.TPUStrategy`—with examples showing best practices.\n", + "- TPU embeddings: TensorFlow includes specialized support for training embeddings on TPUs via `tf.tpu.experimental.embedding`. In addition, [TensorFlow Recommenders](https://www.tensorflow.org/recommenders) has `tfrs.layers.embedding.TPUEmbedding`. Embeddings provide efficient and dense representations, capturing complex similarities and relationships between features. TensorFlow's TPU-specific embedding support allows you to train embeddings that are larger than the memory of a single TPU device, and to use sparse and ragged inputs on TPUs.\n", + "- [TPU Research Cloud (TRC)](https://sites.research.google/trc/about/): TRC enables researchers to apply for access to a cluster of more than 1,000 Cloud TPU devices.\n" ] } ], "metadata": { "accelerator": "TPU", "colab": { - "collapsed_sections": [], "name": "tpu.ipynb", - "toc_visible": true + "toc_visible": true, + "machine_shape": "hm", + "gpuType": "V28" }, "kernelspec": { "display_name": "Python 3", @@ -551,4 +600,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file diff --git a/site/en/guide/variable.ipynb b/site/en/guide/variable.ipynb index 1538218e914..868ee9119e2 100644 --- a/site/en/guide/variable.ipynb +++ b/site/en/guide/variable.ipynb @@ -166,7 +166,7 @@ "source": [ "print(\"A variable:\", my_variable)\n", "print(\"\\nViewed as a tensor:\", tf.convert_to_tensor(my_variable))\n", - "print(\"\\nIndex of highest value:\", tf.argmax(my_variable))\n", + "print(\"\\nIndex of highest value:\", tf.math.argmax(my_variable))\n", "\n", "# This creates a new tensor; it does not reshape the variable.\n", "print(\"\\nCopying and reshaping: \", tf.reshape(my_variable, [1,4]))" @@ -298,7 +298,7 @@ "\n", "However, you can override this. In this snippet, place a float tensor and a variable on the CPU, even if a GPU is available. By turning on device placement logging (see [Setup](#scrollTo=xZoJJ4vdvTrD)), you can see where the variable is placed. \n", "\n", - "Note: Although manual placement works, using [distribution strategies](distributed_training) can be a more convenient and scalable way to optimize your computation.\n", + "Note: Although manual placement works, using [distribution strategies](distributed_training.ipynb) can be a more convenient and scalable way to optimize your computation.\n", "\n", "If you run this notebook on different backends with and without a GPU you will see different logging. *Note that logging device placement must be turned on at the start of the session.*" ] @@ -359,7 +359,7 @@ "source": [ "Note: Because `tf.config.set_soft_device_placement` is turned on by default, even if you run this code on a device without a GPU, it will still run. The multiplication step will happen on the CPU.\n", "\n", - "For more on distributed training, see [our guide](distributed_training)." + "For more on distributed training, refer to the [guide](distributed_training.ipynb)." ] }, { diff --git a/site/en/guide/versions.md b/site/en/guide/versions.md index b20eea717bc..5b1206cc5f4 100644 --- a/site/en/guide/versions.md +++ b/site/en/guide/versions.md @@ -6,10 +6,11 @@ to modify TensorFlow while preserving compatibility. ## Semantic versioning 2.0 -TensorFlow follows Semantic Versioning 2.0 ([semver](http://semver.org)) for its -public API. Each release version of TensorFlow has the form `MAJOR.MINOR.PATCH`. -For example, TensorFlow version 1.2.3 has `MAJOR` version 1, `MINOR` version 2, -and `PATCH` version 3. Changes to each number have the following meaning: +TensorFlow mostly follows Semantic Versioning 2.0 ([semver](http://semver.org)) +for its public API. Each release version of TensorFlow has the form +`MAJOR.MINOR.PATCH`. For example, TensorFlow version 1.2.3 has `MAJOR` version +1, `MINOR` version 2, and `PATCH` version 3. Changes to each number have the +following meaning: * **MAJOR**: Potentially backwards incompatible changes. Code and data that worked with a previous major release will not necessarily work with the new @@ -22,6 +23,10 @@ and `PATCH` version 3. Changes to each number have the following meaning: data that worked with a previous minor release *and* which depends only on the non-experimental public API will continue to work unchanged. For details on what is and is not the public API, see [What is covered](#what_is_covered). + Note that TensorFlow sometimes makes breaking changes in new minor releases, + where the impact is expected to be minor. For examples of these kinds of + changes, see the "Breaking Changes" sections for past minor releases at + https://github.com/tensorflow/tensorflow/releases. * **PATCH**: Backwards compatible bug fixes. @@ -34,44 +39,153 @@ release 0.12.1. However, release 1.1.1 was backwards *compatible* with release Only the public APIs of TensorFlow are backwards compatible across minor and patch versions. The public APIs consist of -* All the documented [Python](../api_docs/python) functions and classes in the - `tensorflow` module and its submodules, except for +* All the documented [Python](https://www.tensorflow.org/api_docs/python) + functions and classes in the `tensorflow` module and its submodules, except + for + + * Private symbols: any function, class, etc., whose name start with `_` + * Experimental and `tf.contrib` symbols, see [below](#not_covered) for + details. + + Note that the code in the `examples/` and `tools/` directories is not + reachable through the `tensorflow` Python module and is thus not covered by + the compatibility guarantee. + + If a symbol is available through the `tensorflow` Python module or its + submodules, but is not documented, then it is **not** considered part of the + public API. + +* The compatibility API (in Python, the `tf.compat` module). At major + versions, we may release utilities and additional endpoints to help users + with the transition to a new major version. These API symbols are deprecated + and not supported (i.e., we will not add any features, and we will not fix + bugs other than to fix vulnerabilities), but they do fall under our + compatibility guarantees. + +* The TensorFlow C API: + + * [tensorflow/c/c_api.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/c/c_api.h) + +* The following protocol buffer files: + + * [`attr_value`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/attr_value.proto) + * [`config`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto) + * [`event`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/event.proto) + * [`graph`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/graph.proto) + * [`op_def`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_def.proto) + * [`reader_base`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/reader_base.proto) + * [`summary`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/summary.proto) + * [`tensor`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto) + * [`tensor_shape`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor_shape.proto) + * [`types`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.proto) + + + +## Separate version number for TensorFlow Lite + +Currently TensorFlow Lite is distributed as a part of TensorFlow. However, we +reserve the right to in future release changes to the TensorFlow Lite APIs on a +different schedule than for the other TensorFlow APIs, or even to move +TensorFlow Lite into a separate source distribution and/or a separate source +repository than TensorFlow. + +Because of this, we use a different version number for TensorFlow Lite +(`TFLITE_VERSION_STRING` in `tensorflow/lite/version.h`, and `TfLiteVersion()` +in `tensorflow/lite/c/c_api.h`) than for TensorFlow (`TF_VERSION_STRING` in +`tensorflow/core/public/release_version.h`, and `TF_Version()` in +`tensorflow/c/c_api.h`). Currently, these two version numbers happen to have the +same value. But in future, they may diverge; for example, we may increment the +major version number for TensorFlow Lite without incrementing the major version +number for TensorFlow, or vice versa. + +The API surface that is covered by the TensorFlow Lite version number is +comprised of the following public APIs: + +* The TensorFlow Lite C API: + + * [tensorflow/lite/c/c_api.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/c/c_api.h) + * [tensorflow/lite/c/c_api_types.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/c/c_api_types.h). + +* The TensorFlow Lite Android (Java/Kotlin) API: + + * In `org.tensorflow.lite`: + * [org.tensorflow.lite.TensorFlowLite](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/TensorFlowLite) + * [org.tensorflow.lite.InterpreterApi](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/InterpreterApi) + * [org.tensorflow.lite.Delegate](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/Delegate) + * [org.tensorflow.lite.DelegateFactory](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/DelegateFactory) + * [org.tensorflow.lite.Tensor](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/Tensor) + * [org.tensorflow.lite.DataType](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/DataType) + * [org.tensorflow.lite.RuntimeFlavor](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/RuntimeFlavor) + * In `org.tensorflow.lite.gpu`: + * [org.tensorflow.lite.gpu.GpuDelegate](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/gpu/GpuDelegate) + * [org.tensorflow.lite.gpu.GpuDelegateFactory](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/gpu/GpuDelegateFactory) + +* The TensorFlow Lite Objective-C APIs: + + * [tensorflow/lite/objc/apis/](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/objc/apis/) + * TFLCoreMLDelegate.h + * TFLDelegate.h + * TFLInterpreter.h + * TFLInterpreterOptions.h + * TFLMetalDelegate.h + * TFLQuantizationParameters.h + * TFLSignatureRunner.h + * TFLTensorFlowLite.h + * TFLTensor.h + +* The TensorFlow Lite Swift APIs: + + * [tensorflow/lite/swift/Sources/](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/swift/Sources/). + * CoreMLDelegate.swift + * Delegate.swift + * InterpreterError.swift + * Interpreter.swift + * MetalDelegate.swift + * Model.swift + * QuantizationParameters.swift + * SignatureRunnerError.swift + * SignatureRunner.swift + * TensorFlowLite.swift + * Tensor.swift + +Experimental symbols are not covered; see [below](#not_covered) for details. + +## Separate version number for TensorFlow Lite Extension APIs + +TensorFlow Lite provides C APIs for extending the TensorFlow Lite interpreter +with "custom ops", which provide user-defined operations in a graph, or +"delegates", which allow delegating the computation for a graph (or for a subset +of a graph) to a custom backend. These APIs, which we collectively call the +"TensorFlow Lite Extension APIs", require more intimate dependencies on some of +the details of the TensorFlow Lite implementation. + +We reserve the right to in future release changes to these APIs, potentially +including non-backwards-compatible changes, on a different schedule than for the +other TensorFlow Lite APIs. So we use a different version number for the +TensorFlow Lite Extension APIs than the version numbers for TensorFlow Lite or +TensorFlow (which were described in the previous section). We are introducing +some new APIs in TensorFlow Lite version 2.15 to get the TensorFlow Lite +Extension APIs version (`TFLITE_EXTENSION_APIS_VERSION_STRING` in +`tensorflow/lite/version.h`, and TfLiteExtensionApisVersion() in +`tensorflow/lite/c/c_api.h`). The version number for the TensorFlow Lite +Extension APIs is currently the same as the version number for TensorFlow and +TensorFlow Lite. But in future, they may diverge; for example, we may increment +the major version number for the TensorFlow Lite Extension APIs without +incrementing the major version number for TensorFlow Lite, or vice versa. + +The API surface that is covered by the TensorFlow Lite Extension APIs version +number is comprised of the following public APIs: + +* [tensorflow/lite/c/c_api_opaque.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/c/c_api_opaque.h) +* [tensorflow/lite/c/common.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/c/common.h) +* [tensorflow/lite/c/builtin_op_data.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/c/builtin_op_data.h) +* [tensorflow/lite/builtin_ops.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/builtin_ops.h) + +Again, experimental symbols are not covered; see [below](#not_covered) for +details. + + - * Private symbols: any function, class, etc., whose name start with `_` - * Experimental and `tf.contrib` symbols, see [below](#not_covered) for - details. - - Note that the code in the `examples/` and `tools/` directories is not - reachable through the `tensorflow` Python module and is thus not covered by - the compatibility guarantee. - - If a symbol is available through the `tensorflow` Python module or its - submodules, but is not documented, then it is **not** considered part of the - public API. - -* The compatibility API (in Python, the `tf.compat` module). At major versions, - we may release utilities and additional endpoints to help users with the - transition to a new major version. These API symbols are deprecated and not - supported (i.e., we will not add any features, and we will not fix bugs - other than to fix vulnerabilities), but they do fall under our compatibility - guarantees. - -* The [C API](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/c/c_api.h). - -* The following protocol buffer files: - - * [`attr_value`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/attr_value.proto) - * [`config`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto) - * [`event`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/event.proto) - * [`graph`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/graph.proto) - * [`op_def`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_def.proto) - * [`reader_base`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/reader_base.proto) - * [`summary`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/summary.proto) - * [`tensor`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto) - * [`tensor_shape`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor_shape.proto) - * [`types`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.proto) - - ## What is *not* covered Some parts of TensorFlow can change in backward incompatible ways at any point. @@ -82,21 +196,27 @@ These include: particular, the following are not covered by any compatibility guarantees: - any symbol in the `tf.contrib` module or its submodules; - - any symbol (module, function, argument, property, class, or constant) - whose name contains `experimental` or `Experimental`; or - - any symbol whose fully qualified name includes a module or class which - is itself experimental. This includes fields and submessages of any - protocol buffer called `experimental`. + - any symbol (module, function, argument, property, class, constant, type, + package, etc.) whose name contains `experimental` or `Experimental`; or + - any symbol whose fully qualified name includes a module or class or + package which is itself experimental. This includes fields and + submessages of any protocol buffer called `experimental`. * **Other languages**: TensorFlow APIs in languages other than Python and C, such as: - - [C++](../install/lang_c.md) (exposed through header files in - [`tensorflow/cc`](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/cc)). - - [Java](../install/lang_java.md), - - [Go](../install/lang_go.md) + - [C++](../install/lang_c.ipynb) (exposed through header files in + [`tensorflow/cc/`](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/cc)). + - [Java](../install/lang_java_legacy.md), + - [Go](https://github.com/tensorflow/build/blob/master/golang_install_guide/README.md) - [JavaScript](https://www.tensorflow.org/js) + and TensorFlow **Lite** APIs in languages other than Java/Kotlin, C, + Objective-C, and Swift, in particular + + - **C++** (exposed through header files in + [`tensorflow/lite/`](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/)) + * **Details of composite ops:** Many public functions in Python expand to several primitive ops in the graph, and these details will be part of any graphs saved to disk as `GraphDef`s. These details may change for minor @@ -222,7 +342,8 @@ This section is relevant only when making incompatible changes to the `GraphDef` format, such as when adding ops, removing ops, or changing the functionality of existing ops. The previous section should suffice for most users. - + + ### Backward and partial forward compatibility @@ -253,7 +374,9 @@ guidelines for evolving `GraphDef` versions. There are different data versions for graphs and checkpoints. The two data formats evolve at different rates from each other and also at different rates from TensorFlow. Both versioning systems are defined in -[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h). +[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h) +and +[`core/public/release_version.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/release_version.h). Whenever a new version is added, a note is added to the header detailing what changed and the date. @@ -353,7 +476,7 @@ existing producer scripts will not suddenly use the new functionality. 1. Add a new similar op named `SomethingV2` or similar and go through the process of adding it and switching existing Python wrappers to use it. To ensure forward compatibility use the checks suggested in - [compat.py](https://www.tensorflow.org/code/tensorflow/python/compat/compat.py) + [compat.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/compat/compat.py) when changing the Python wrappers. 2. Remove the old op (Can only take place with a major version change due to backward compatibility). diff --git a/site/en/hub/README.md b/site/en/hub/README.md deleted file mode 100644 index 3ca77792508..00000000000 --- a/site/en/hub/README.md +++ /dev/null @@ -1,5 +0,0 @@ -Welcome to the warp zone! - -# TensorFlow Hub - -These docs are available here: https://github.com/tensorflow/hub/tree/master/docs diff --git a/site/en/hub/_book.yaml b/site/en/hub/_book.yaml new file mode 100644 index 00000000000..4a969d413bc --- /dev/null +++ b/site/en/hub/_book.yaml @@ -0,0 +1,85 @@ +upper_tabs: +# Tabs left of dropdown menu +- include: /_upper_tabs_left.yaml +- include: /api_docs/_upper_tabs_api.yaml +# Dropdown menu +- name: Resources + path: /resources + is_default: true + menu: + - include: /resources/_menu_toc.yaml + lower_tabs: + # Subsite tabs + other: + # [Guide] + - name: "Guide" + contents: + # TF Hub Platform overview. + - heading: Getting Started + - title: Overview + path: /hub/overview + - title: Installation + path: /hub/installation + - title: Community and support + path: /hub/community + # Python library usage information + - heading: Using the library + - title: Overview + path: /hub/lib_overview + - title: SavedModels for TensorFlow 2 + path: /hub/tf2_saved_model + - title: Caching model downloads + path: /hub/caching + - title: Migration to TF2 + path: /hub/migration_tf2 + - title: Model compatibility for TF1/TF2 + path: /hub/model_compatibility + - title: "Deprecated: TF1 Hub format" + path: /hub/tf1_hub_module + status: deprecated + # SavedModel APIs + - heading: Common SavedModel APIs + - title: Overview + path: /hub/common_saved_model_apis/index.md + - title: Reusable SavedModels (for all tasks) + path: /hub/reusable_saved_models + - title: Image tasks + path: /hub/common_saved_model_apis/images + - title: Text tasks + path: /hub/common_saved_model_apis/text + # Publishing models + - heading: Publishing models + - title: Publishing process + path: /hub/publish + - title: Data portability and deletion + path: /hub/portability_and_deletion + # Advanced developer info + - heading: Advanced developer info + - title: Model formats + path: /hub/model_formats + - title: Model hosting protocol + path: /hub/hosting + - title: Build from source + path: /hub/build_from_source + - title: Common issues + path: /hub/common_issues + - title: Contribute to TensorFlow Hub + path: /hub/contribute + # [Tutorials] + - name: Tutorials + path: /hub/tutorials + contents: + - include: /hub/tutorials/_toc.yaml + # [API] + - name: API + skip_translation: true + contents: + - include: /hub/api_docs/python/hub/_toc.yaml + # [Models] + - name: "Models ↗" + contents: + - title: Models + path: https://tfhub.dev + status: external + +- include: /_upper_tabs_right.yaml diff --git a/site/en/hub/_index.yaml b/site/en/hub/_index.yaml new file mode 100644 index 00000000000..00e67b15265 --- /dev/null +++ b/site/en/hub/_index.yaml @@ -0,0 +1,145 @@ +# This file is rendered on tensorflow.org/hub. +# ../README.md is rendered on github.com/tensorflow/hub. +# Both link to ./overview.md and ./*.md for detailed docs. +book_path: /hub/_book.yaml +project_path: /hub/_project.yaml +description: > + TensorFlow Hub is a repository of trained machine learning models ready for fine-tuning and + deployable anywhere. Reuse trained models like BERT and Faster R-CNN with just a few lines of code. +landing_page: + custom_css_path: /site-assets/css/style.css + rows: + - heading: TensorFlow Hub is a repository of trained machine learning models. + items: + - classname: + tfo-landing-row-item-code-block + devsite-landing-row-50 + description: > + TensorFlow Hub is a repository of trained machine learning models ready for fine-tuning and + deployable anywhere. Reuse trained models like BERT and Faster R-CNN with just a few lines of code. + list: + - heading: See the guide + description: Learn about how to use TensorFlow Hub and how it works. + path: /hub/overview + icon: + path: /hub/images/guide_basics.png + - heading: See tutorials + description: Tutorials show you end-to-end examples using TensorFlow Hub. + path: /hub/tutorials + icon: + path: /site-assets/images/marketing/learn/lite-pick.svg + - heading: See models + description: Find trained TF, TFLite, and TF.js models for your use case. + path: https://tfhub.dev + icon: + path: /site-assets/images/marketing/learn/js-run.svg + code_block: | +
+          !pip install --upgrade tensorflow_hub
+
+          import tensorflow_hub as hub
+
+          model = hub.KerasLayer("https://tfhub.dev/google/nnlm-en-dim128/2")
+          embeddings = model(["The rain in Spain.", "falls",
+                              "mainly", "In the plain!"])
+
+          print(embeddings.shape)  #(4,128)
+        
+ - options: + - cards + - centered-header + heading: > +

Models + description: > + Find trained models from the TensorFlow community on TFHub.dev + items: + - heading: BERT + description: Check out BERT for NLP tasks including text classification and question answering. + path: https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3 + image_path: /hub/images/bert.png + buttons: + - label: See the model + path: https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3 + - heading: Object detection + description: Use the Faster R-CNN Inception ResNet V2 640x640 model for detecting objects in images. + path: https://tfhub.dev/tensorflow/faster_rcnn/inception_resnet_v2_640x640/1 + image_path: /hub/images/object_detection.png + buttons: + - label: See the model + path: https://tfhub.dev/tensorflow/faster_rcnn/inception_resnet_v2_640x640/1 + - heading: Style transfer + description: Transfer the style of one image to another using the image style transfer model. + path: https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2 + image_path: /hub/images/style_transfer.png + buttons: + - label: See the model + path: https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2 + - heading: On-device food classifier + description: Use this TFLite model to classify photos of food on a mobile device. + path: https://tfhub.dev/google/lite-model/aiy/vision/classifier/food_V1/1 + image_path: /hub/images/food.png + buttons: + - label: See the model + path: https://tfhub.dev/google/lite-model/aiy/vision/classifier/food_V1/1 + - options: + - cards + - centered-header + heading: > +

News & announcements + description: > + Check out our blog for more announcements and view the latest #TFHub updates on Twitter + items: + - heading: TensorFlow Hub for Real World Impact at Google I/O + youtube_id: BE5nkhFe3AE + description: > + Learn how you can use TensorFlow Hub to build ML solutions with real world impact. + buttons: + - label: Watch the video + path: https://www.youtube.com/watch?v=BE5nkhFe3AE + - heading: "On-device ML solutions" + description: > + To explore ML solutions for your mobile and web apps including TensorFlow Hub, visit the Google on-device machine learning page. + path: https://g.co/on-device-ml + image_path: /hub/images/odml.png + buttons: + - label: Visit the site + path: https://g.co/on-device-ml + - heading: "Making BERT Easier with Preprocessing Models From TensorFlow Hub" + description: > + TensorFlow Hub makes BERT simple to use with new preprocessing models. + path: https://blog.tensorflow.org/2020/12/making-bert-easier-with-preprocessing-models-from-tensorflow-hub.html + image_path: /hub/images/bert_preprocess_wide.png + buttons: + - label: Read the blog + path: https://blog.tensorflow.org/2020/12/making-bert-easier-with-preprocessing-models-from-tensorflow-hub.html + - heading: "From singing to musical scores: Estimating pitch with SPICE and Tensorflow Hub" + description: > + Learn how to use the SPICE model to automatically transcribe sheet music from live audio. + path: https://blog.tensorflow.org/2020/06/estimating-pitch-with-spice-and-tensorflow-hub.html + image_path: /hub/images/spice_blog.png + buttons: + - label: Read the blog + path: https://blog.tensorflow.org/2020/06/estimating-pitch-with-spice-and-tensorflow-hub.html + - options: + - cards + - centered-header + heading: > +

Community + description: Join the TensorFlow Hub community + items: + - heading: TensorFlow Hub on GitHub + icon: + path: /hub/images/github_icon.svg + path: https://github.com/tensorflow/hub + - heading: Contribute models + icon: + name: publish + path: /hub/publish + - options: + - cta + items: + - heading: Get started with TensorFlow Hub + buttons: + - label: Find trained models + path: https://tfhub.dev + classname: button diff --git a/site/en/hub/_redirects.yaml b/site/en/hub/_redirects.yaml new file mode 100644 index 00000000000..bee1cbec873 --- /dev/null +++ b/site/en/hub/_redirects.yaml @@ -0,0 +1,7 @@ +redirects: +- from: /hub/becoming_a_publisher + to: /hub/publish +- from: /hub/writing_model_documentation + to: /hub/writing_documentation#model +- from: /hub/creating_a_collection + to: /hub/writing_documentation#collection diff --git a/site/en/hub/build_from_source.md b/site/en/hub/build_from_source.md new file mode 100644 index 00000000000..42e19eb6208 --- /dev/null +++ b/site/en/hub/build_from_source.md @@ -0,0 +1,195 @@ + + + +# Creating the TensorFlow Hub pip package using Linux + +Note: This document is for developers interested in modifying TensorFlow Hub +itself. To _use_ TensorFlow Hub, see the [Install instructions](installation.md) + +If you make changes to TensorFlow Hub pip package, you will likely want to +rebuild the pip package from source to try out your changes. + +This requires: + +* Python +* TensorFlow +* Git +* [Bazel](https://docs.bazel.build/versions/master/install.html) + +Alternatively, if you install the protobuf compiler you can +[try out your changes without using bazel](#develop). + +## Setup a virtualenv {:#setup} + +### Activate virtualenv + +Install virtualenv if it's not installed already: + +```shell +~$ sudo apt-get install python-virtualenv +``` + +Create a virtual environment for the package creation: + +```shell +~$ virtualenv --system-site-packages tensorflow_hub_env +``` + +And activate it: + +```shell +~$ source ~/tensorflow_hub_env/bin/activate # bash, sh, ksh, or zsh +~$ source ~/tensorflow_hub_env/bin/activate.csh # csh or tcsh +``` + +### Clone the TensorFlow Hub repository. + +```shell +(tensorflow_hub_env)~/$ git clone https://github.com/tensorflow/hub +(tensorflow_hub_env)~/$ cd hub +``` + +## Test your changes + +### Run TensorFlow Hub's tests + +```shell +(tensorflow_hub_env)~/hub/$ bazel test tensorflow_hub:all +``` + +## Build and install the package + +### Build TensorFlow Hub pip packaging script + +To build a pip package for TensorFlow Hub: + +```shell +(tensorflow_hub_env)~/hub/$ bazel build tensorflow_hub/pip_package:build_pip_package +``` + +### Create the TensorFlow Hub pip package + +```shell +(tensorflow_hub_env)~/hub/$ bazel-bin/tensorflow_hub/pip_package/build_pip_package \ +/tmp/tensorflow_hub_pkg +``` + +### Install and test the pip package (optional) + +Run the following commands to install the pip package. + +```shell +(tensorflow_hub_env)~/hub/$ pip install /tmp/tensorflow_hub_pkg/*.whl +``` + +Test import TensorFlow Hub: + +```shell +(tensorflow_hub_env)~/hub/$ cd .. # exit the directory to avoid confusion +(tensorflow_hub_env)~/$ python -c "import tensorflow_hub as hub" +``` + +## "Developer" install (experimental) + + + +Warning: This approach to running TensorFlow is experimental, and not officially +supported by the TensorFlow Hub team. + +Building the package with bazel is the only officially supported method. However +if you are unfamiliar with bazel simpler to work with open source tools. For +that you can do a "developer install" of the package. + +This installation method allows you to install the working directory into your +python environment, so that ongoing changes are reflected when you import the +package. + +### Setup the repository + +First setup the virtualenv and repository, as described [above](#setup). + +### Install `protoc` + +Because TensorFlow Hub uses protobufs you will need the protobuf compiler to +create the necessary python `_pb2.py` files from the `.proto` files. + +#### On a Mac: + +``` +(tensorflow_hub_env)~/hub/$ brew install protobuf +``` + +#### On Linux + +``` +(tensorflow_hub_env)~/hub/$ sudo apt install protobuf-compiler +``` + +### Compile the `.proto` files + +Initially there are no `_pb2.py` files in the directory: + +``` +(tensorflow_hub_env)~/hub/$ ls -1 tensorflow_hub/*_pb2.py +``` + +Run `protoc` to create them: + +``` +(tensorflow_hub_env)~/hub/$ protoc -I=tensorflow_hub --python_out=tensorflow_hub tensorflow_hub/*.proto +(tensorflow_hub_env)~/hub/$ ls -1 tensorflow_hub/*_pb2.py +``` + +
+tensorflow_hub/image_module_info_pb2.py
+tensorflow_hub/module_attachment_pb2.py
+tensorflow_hub/module_def_pb2.py
+
+ +Note: Don't forget to recompile the `_pb2.py` files if you make changes to the +`.proto` definitions. + +### Import directly from the repository + +With the `_pb2.py` files in place, you can use try out your modifications +directly from the TensorFlow Hub directory: + +``` +(tensorflow_hub_env)~/$ python -c "import tensorflow_hub as hub" +``` + +### Install in "developer" mode + +Or to use this from outside the repository root, you can use the `setup.py +develop` installation: + +``` +(tensorflow_hub_env)~/hub/$ python tensorflow_hub/pip_package/setup.py develop +``` + +Now you can use your local changes in a regular python virtualenv, without the +need to rebuild and install the pip package for each new change: + +```shell +(tensorflow_hub_env)~/hub/$ cd .. # exit the directory to avoid confusion +(tensorflow_hub_env)~/$ python -c "import tensorflow_hub as hub" +``` + +## De-activate the virtualenv + +```shell +(tensorflow_hub_env)~/hub/$ deactivate +``` diff --git a/site/en/hub/caching.md b/site/en/hub/caching.md new file mode 100644 index 00000000000..678b2c22af0 --- /dev/null +++ b/site/en/hub/caching.md @@ -0,0 +1,86 @@ + +# Caching model downloads from TF Hub + +## Overview + +The `tensorflow_hub` library currently supports two modes for downloading +models. By default, a model is downloaded as a compressed archive and cached on +disk. Secondly, models can directly be read from remote storage into TensorFlow. +Either way, the calls to `tensorflow_hub` functions in the actual Python code +can and should continue to use the canonical tfhub.dev URLs of models, which are +portable across systems and navigable for documentation. In the rare case that +user code needs the actual filesystem location (after downloading and +decompressing, or after resolving a model handle into a filesystem path), +it can be obtained by the function `hub.resolve(handle)`. + +### Caching of compressed downloads + +The `tensorflow_hub` library by default caches models on the filesystem when +they have been downloaded from tfhub.dev (or other [hosting sites](hosting.md)) +and decompressed. This mode is recommended for most environments, except if disk +space is scarce but network bandwidth and latency are superb. + +The download location defaults to a local temporary directory but can be +customized by setting the environment variable `TFHUB_CACHE_DIR` (recommended) +or by passing the command-line flag `--tfhub_cache_dir`. The default cache +location `/tmp/tfhub_modules` (or whatever `os.path.join(tempfile.gettempdir(), +"tfhub_modules")` is evaluated to) should work in most cases. + +Users who prefer persistent caching across system reboots can instead set +`TFHUB_CACHE_DIR` to a location in their home directory. For example, a user of +the bash shell on a Linux system can add a line like the following to +`~/.bashrc`: + +```bash +export TFHUB_CACHE_DIR=$HOME/.cache/tfhub_modules +``` + +...restart the shell, and then this location will be used. When using a +persistent location, be aware that there is no automatic cleanup. + +### Reading from remote storage + +Users can instruct the `tensorflow_hub` library to directly read models from +remote storage (GCS) instead of downloading the models locally with: + +```shell +os.environ["TFHUB_MODEL_LOAD_FORMAT"] = "UNCOMPRESSED" +``` + +or by setting the command-line flag `--tfhub_model_load_format` to +`UNCOMPRESSED`. This way, no caching directory is needed, which is especially +helpful in environments that provide little disk space but a fast internet +connection. + +### Running on TPU in Colab notebooks + +On [colab.research.google.com](https://colab.research.google.com), downloading +compressed models will conflict with the TPU runtime since the computation +workload is delegated to another machine that does not have access to the cache +location by default. There are two workarounds for this situation: + +#### 1) Use a GCS bucket that the TPU worker can access + +The easiest solution is to instruct the `tensorflow_hub` library to read the +models from TF Hub's GCS bucket as explained above. Users with their own GCS +bucket can instead specify a directory in their bucket as the cache location +with code like: + +```python +import os +os.environ["TFHUB_CACHE_DIR"] = "gs://my-bucket/tfhub-modules-cache" +``` + +...before calling the `tensorflow_hub` library. + +#### 2) Redirect all reads through the Colab host + +Another workaround is to redirect all reads (even of large variables) through +the Colab host: + +```python +load_options = +tf.saved_model.LoadOptions(experimental_io_device='/job:localhost') +reloaded_model = hub.load("https://tfhub.dev/...", options=load_options) +``` +**Note:** See more information regarding valid handles [here](tf2_saved_model.md#model_handles). diff --git a/site/en/hub/common_issues.md b/site/en/hub/common_issues.md new file mode 100644 index 00000000000..03ba4a62a8e --- /dev/null +++ b/site/en/hub/common_issues.md @@ -0,0 +1,148 @@ + +# Common issues + +If your issue is not listed here, please search the +[github issues](https://github.com/tensorflow/hub/issues) before filling a new +one. + +**Note:** This documentation uses TFhub.dev URL handles in examples. See more +information regarding other valid handle types [here](tf2_saved_model.md#model_handles). + +## TypeError: 'AutoTrackable' object is not callable + +```python +# BAD: Raises error +embed = hub.load('https://tfhub.dev/google/nnlm-en-dim128/1') +embed(['my text', 'batch']) +``` + +This error frequently arises when loading models in TF1 Hub format with the +`hub.load()` API in TF2. Adding the correct signature should fix this problem. +See the [TF-Hub migration guide for TF2](migration_tf2.md) for more details on +moving to TF2 and the use of models in TF1 Hub format in TF2. + +```python + +embed = hub.load('https://tfhub.dev/google/nnlm-en-dim128/1') +embed.signatures['default'](['my text', 'batch']) +``` + +## Cannot download a module + +In the process of using a module from an URL there are many errors that can show +up due to the network stack. Often this is a problem specific to the machine +running the code and not an issue with the library. Here is a list of the common +ones: + +* **"EOF occurred in violation of protocol"** - This issue is likely to be + generated if the installed python version does not support the TLS + requirements of the server hosting the module. Notably, python 2.7.5 is + known to fail resolving modules from tfhub.dev domain. **FIX**: Please + update to a newer python version. + +* **"cannot verify tfhub.dev's certificate"** - This issue is likely to be + generated if something on the network is trying to act as the dev gTLD. + Before .dev was used as a gTLD, developers and frameworks would sometimes + use .dev names to help testing code. **FIX:** Identify and reconfigure the + software that intercepts name resolution in the ".dev" domain. + +* Failures to write to the cache directory `/tmp/tfhub_modules` (or similar): + see [Caching](caching.md) for what that is and how to change its location. + +If the above errors and fixes do not work, one can try to manually download a +module by simulating the protocol of attaching `?tf-hub-format=compressed` to +the URL to download a tar compressed file that has to be manually decompressed +into a local file. The path to the local file can then be used instead of the +URL. Here is a quick example: + +```bash +# Create a folder for the TF hub module. +$ mkdir /tmp/moduleA +# Download the module, and uncompress it to the destination folder. You might want to do this manually. +$ curl -L "https://tfhub.dev/google/universal-sentence-encoder/2?tf-hub-format=compressed" | tar -zxvC /tmp/moduleA +# Test to make sure it works. +$ python +> import tensorflow_hub as hub +> hub.Module("/tmp/moduleA") +``` + +## Running inference on a pre-initialized module + +If you are writing a Python program that applies a module many times on input +data, you can apply the following recipes. (Note: For serving requests in +production services, consider +[TensorFlow Serving](https://www.tensorflow.org/tfx/guide/serving) or other +scalable, Python-free solutions.) + +Assuming your use-case model is **initialization** and subsequent **requests** +(for example Django, Flask, custom HTTP server, etc.), you can set-up the +serving as follows: + +### TF2 SavedModels + +* In the initialization part: + * Load the TF2.0 model. + +```python +import tensorflow_hub as hub + +embedding_fn = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4") +``` + +* In the request part: + * Use the embedding function to run inference. + +```python +embedding_fn(["Hello world"]) +``` + +This call of a tf.function is optimized for performance, see +[tf.function guide](https://www.tensorflow.org/guide/function). + +### TF1 Hub modules + +* In the initialization part: + * Build the graph with a **placeholder** - entry point into the graph. + * Initialize the session. + +```python +import tensorflow as tf +import tensorflow_hub as hub + +# Create graph and finalize (finalizing optional but recommended). +g = tf.Graph() +with g.as_default(): + # We will be feeding 1D tensors of text into the graph. + text_input = tf.placeholder(dtype=tf.string, shape=[None]) + embed = hub.Module("https://tfhub.dev/google/universal-sentence-encoder/2") + embedded_text = embed(text_input) + init_op = tf.group([tf.global_variables_initializer(), tf.tables_initializer()]) +g.finalize() + +# Create session and initialize. +session = tf.Session(graph=g) +session.run(init_op) +``` + +* In the request part: + * Use the session to feed data into the graph through the placeholder. + +```python +result = session.run(embedded_text, feed_dict={text_input: ["Hello world"]}) +``` + +## Cannot change a model's dtype (e.g., float32 to bfloat16) + +TensorFlow's SavedModels (shared on TF Hub or otherwise) contain operations that +work on fixed data types (often, float32 for the weights and intermediate +activations of neural networks). These cannot be changed after the fact when +loading the SavedModel (but model publishers can choose to publish different +models with different data types). + +## Update a model version + +The documentation metadata of model versions can be updated. However, the +version's assets (model files) are immutable. If you want to change the model +assets, you can publish a newer version of the model. It's a good practice to +extend the documentation with a change log that describes what changed between +versions. diff --git a/site/en/hub/common_saved_model_apis/images.md b/site/en/hub/common_saved_model_apis/images.md new file mode 100644 index 00000000000..5413f0adc07 --- /dev/null +++ b/site/en/hub/common_saved_model_apis/images.md @@ -0,0 +1,155 @@ + +# Common SavedModel APIs for Image Tasks + +This page describes how [TF2 SavedModels](../tf2_saved_model.md) for +image-related tasks should implement the +[Reusable SavedModel API](../reusable_saved_models.md). (This replaces the +[Common Signatures for Images](../common_signatures/images.md) for the +now-deprecated [TF1 Hub format](../tf1_hub_module).) + + + +## Image Feature Vector + +### Usage summary + +An **image feature vector** is a dense 1-D tensor that represents a whole image, +typically for use by a simple feed-forward classifier in the consumer model. (In +terms of classic CNNs, this is the bottleneck value after the spatial extent has +been pooled or flattened away, but before classification is done; for that, see +[image classification](#classification) below.) + +A Reusable SavedModel for image feature extraction has a `__call__` method on +the root object that maps a batch of images to a batch of feature vectors. It +can be used like so: + +```python +obj = hub.load("path/to/model") # That's tf.saved_model.load() after download. +images = ... # A batch of images with shape [batch_size, height, width, 3]. +features = obj(images) # A batch with shape [batch_size, num_features]. +``` + +In Keras, the equivalent is + +```python +features = hub.KerasLayer("path/to/model")(images) +``` + +The input follows the general convention for [input of images](#input). The +model documentation specifies the permissible range for `height` and `width` of +the input. + +The output is a single tensor of dtype `float32` and shape `[batch_size, +num_features]`. The `batch_size` is the same as in the input. `num_features` is +a module-specific constant independent of input size. + +### API details + +The [Reusable SavedModel API](../reusable_saved_models.md) also provides a list +of `obj.variables` (e.g., for initialization when not loading eagerly). + +A model that supports fine-tuning provides a list of `obj.trainable_variables`. +It may require you to pass `training=True` to execute in training mode (e.g., +for dropout). Some models allow optional arguments to override hyperparameters +(e.g., dropout rate; to be described in model documentation). The model may also +provide a list of `obj.regularization_losses`. For details, see the +[Reusable SavedModel API](../reusable_saved_models.md). + +In Keras, this is taken care of by `hub.KerasLayer`: initialize it with +`trainable=True` to enable fine-tuning, and (in the rare case that hparam +overrides apply) with `arguments=dict(some_hparam=some_value, ...))`. + +### Notes + +Applying dropout to the output features (or not) should be left to the model +consumer. The SavedModel itself should not perform dropout on the actual outputs +(even if it uses dropout internally in other places). + +### Examples + +Reusable SavedModels for image feature vectors are used in + +* the Colab tutorial + [Retraining an Image Classifier](https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_image_retraining.ipynb), + + + +## Image Classification + +### Usage summary + +**Image classification** maps the pixels of an image to linear scores (logits) +for membership in the classes of a taxonomy _selected by the module publisher_. +This allows model consumers to to draw conclusions from the particular +classification learned by the publisher module. (For image classification with +a new set of classes, it is common to reuse an +[Image Feature Vector](#feature-vector) model with a new classifier instead.) + +A Reusable SavedModel for image classification has a `__call__` method on the +root object that maps a batch of images to a batch of logits. It can be used +like so: + +```python +obj = hub.load("path/to/model") # That's tf.saved_model.load() after download. +images = ... # A batch of images with shape [batch_size, height, width, 3]. +logits = obj(images) # A batch with shape [batch_size, num_classes]. +``` + +In Keras, the equivalent is + +```python +logits = hub.KerasLayer("path/to/model")(images) +``` + +The input follows the general convention for [input of images](#input). The +model documentation specifies the permissible range for `height` and `width` of +the input. + +The output `logits` is a single tensor of dtype `float32` and shape +`[batch_size, num_classes]`. The `batch_size` is the same as in the input. +`num_classes` is the number of classes in the classification, which is a +model-specific constant. + +The value `logits[i, c]` is a score predicting the membership of example `i` in +the class with index `c`. + +It depends on the underlying classification whether these scores are meant to be +used with softmax (for mutually exclusive classes), sigmoid (for orthogonal +classes), or something else. The module documentation should describe this, and +refer to a definition of the class indices. + +### API details + +The [Reusable SavedModel API](../reusable_saved_models.md) also provides a list +of `obj.variables` (e.g., for initialization when not loading eagerly). + +A model that supports fine-tuning provides a list of `obj.trainable_variables`. +It may require you to pass `training=True` to execute in training mode (e.g., +for dropout). Some models allow optional arguments to override hyperparameters +(e.g., dropout rate; to be described in model documentation). The model may also +provide a list of `obj.regularization_losses`. For details, see the +[Reusable SavedModel API](../reusable_saved_models.md). + +In Keras, this is taken care of by `hub.KerasLayer`: initialize it with +`trainable=True` to enable fine-tuning, and (in the rare case that hparam +overrides apply) with `arguments=dict(some_hparam=some_value, ...))`. + + + +## Image input + +This is common to all types of image models. + +A model that takes a batch of images as input accepts them as a dense 4-D tensor +of dtype `float32` and shape `[batch_size, height, width, 3]` whose elements are +RGB color values of pixels normalized to the range [0, 1]. This is what you get +from `tf.image.decode_*()` followed by `tf.image.convert_image_dtype(..., +tf.float32)`. + +The model accepts any `batch_size`. The model documentation specifies the +permissible range for `height` and `width`. The last dimension is fixed to 3 RGB +channels. + +It is recommended that models use the `channels_last` (or `NHWC`) layout of +Tensors throughout, and leave it to TensorFlow's graph optimizer to rewrite to +`channels_first` (or `NCHW`) if needed. diff --git a/site/en/hub/common_saved_model_apis/index.md b/site/en/hub/common_saved_model_apis/index.md new file mode 100644 index 00000000000..356505f9952 --- /dev/null +++ b/site/en/hub/common_saved_model_apis/index.md @@ -0,0 +1,46 @@ + +# Common SavedModel APIs for TF Hub + +## Introduction + +[TensorFlow Hub](https://tfhub.dev) hosts models for a variety of tasks. Models +for the same task are encouraged to implement a common API so that model +consumers can easily exchange them without modifying the code that uses them, +even if they come from different publishers. + +The goal is to make exchanging different models for the same task as simple as +switching a string-valued hyperparameter. With that, model consumers can easily +find the best one for their problem. + +This directory collects specifications of common APIs for models in the +[TF2 SavedModel format](../tf2_saved_model.md). (It replaces the +[Common Signatures](../common_signatures/index.md) for the now-deprecated +[TF1 Hub format](../tf1_hub_module.md).) + +## Reusable SavedModel: the common foundation + +The [Reusable SavedModel API](../reusable_saved_models.md) defines general +conventions how to load a SavedModel back into a Python program and reuse it as +part of a bigger TensorFlow model. + +Basic usage: + +```python +obj = hub.load("path/to/model") # That's tf.saved_model.load() after download. +outputs = obj(inputs, training=False) # Invokes the tf.function obj.__call__. +``` + +Key point: This uses the object-based interface to restored SavedModels that was +added in TensorFlow 2, not the SavedModel signatures for serving. + +For Keras users, the `hub.KerasLayer` class relies on this API to wrap the +Reusable SavedModel as a Keras Layer (shielding Keras users from its details), +with inputs and outputs according to the task-specific APIs listed below. + +## Task-specific APIs + +These refine the [Reusable SavedModel API](../reusable_saved_models.md) with +conventions for particular ML tasks and types of data. + +* [Image tasks](images.md) +* [Text tasks](text.md) diff --git a/site/en/hub/common_saved_model_apis/text.md b/site/en/hub/common_saved_model_apis/text.md new file mode 100644 index 00000000000..209319f27a9 --- /dev/null +++ b/site/en/hub/common_saved_model_apis/text.md @@ -0,0 +1,361 @@ + +# Common SavedModel APIs for Text Tasks + +This page describes how [TF2 SavedModels](../tf2_saved_model.md) for +text-related tasks should implement the +[Reusable SavedModel API](../reusable_saved_models.md). (This replaces and +extends the [Common Signatures for Text](../common_signatures/text.md) for the +now-deprecated [TF1 Hub format](../tf1_hub_module).) + +## Overview + +There are several APIs to compute **text embeddings** (also known as dense +representations of text, or text feature vectors). + +* The API for *text embeddings from text inputs* is implemented by a + SavedModel that maps a batch of strings to a batch of embedding vectors. + This is very easy to use, and many models on TF Hub have implemented it. + However, this does not allow fine-tuning the model on TPU. + +* The API for *text embeddings with preprocessed inputs* solves the same task, + but is implemented by two separate SavedModels: + + * a *preprocessor* that can run inside a tf.data input pipeline and + converts strings and other variable-length data into numeric Tensors, + * an *encoder* that accepts the results of the preprocessor and performs + the trainable part of the embedding computation. + + This split allows inputs to be preprocessed asynchronously before being fed + into the training loop. In particular, it allows building encoders that can + be run and fine-tuned on [TPU](https://www.tensorflow.org/guide/tpu). + +* The API for *text embeddings with Transformer encoders* extends the API for + text embeddings from preprocessed inputs to the particular case of BERT and + other Transformer encoders. + + * The *preprocessor* is extended to build encoder inputs from more than + one segment of input text. + * The *Transformer encoder* exposes the context-aware embeddings of + individual tokens. + +In each case, the text inputs are UTF-8 encoded strings, typically of plain +text, unless the model documentation provides otherwise. + +Regardless of API, different models have been pre-trained on text from different +languages and domains, and with different tasks in mind. Therefore, not every +text embedding model is suitable for every problem. + + + + +## Text Embedding from Text Inputs + +A SavedModel for **text embeddings from text inputs** accepts a batch of inputs +in a string Tensor of shape `[batch_size]` and maps them to a float32 Tensor of +shape `[batch_size, dim]` with dense representations (feature vectors) of the +inputs. + +### Usage synopsis + +```python +obj = hub.load("path/to/model") +text_input = ["A long sentence.", + "single-word", + "http://example.com"] +embeddings = obj(text_input) +``` + +Recall from the [Reusable SavedModel API](../reusable_saved_models.md) that +running the model in training mode (e.g., for dropout) may require a keyword +argument `obj(..., training=True)`, and that `obj` provides attributes +`.variables`, `.trainable_variables` and `.regularization_losses` as applicable. + +In Keras, all this is taken care of by + +```python +embeddings = hub.KerasLayer("path/to/model", trainable=...)(text_input) +``` + +### Distributed training + +If the text embedding is used as part of a model that gets trained with a +distribution strategy, the call to `hub.load("path/to/model")` or +`hub.KerasLayer("path/to/model", ...)`, resp., must happen inside the +DistributionStrategy scope in order to create the model's variables in the +distributed way. For example + +```python + with strategy.scope(): + ... + model = hub.load("path/to/model") + ... +``` + +### Examples + +* Colab tutorial + [Text Classification with Movie Reviews](https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_text_classification.ipynb). + + + +## Text Embeddings with Preprocessed Inputs + +A **text embedding with preprocessed inputs** is implemented by two separate +SavedModels: + +* a **preprocessor** that maps a string Tensor of shape `[batch_size]` to a + dict of numeric Tensors, +* an **encoder** that accepts a dict of Tensors as returned by the + preprocessor, performs the trainable part of the embedding computation, and + returns a dict of outputs. The output under key `"default"` is a float32 + Tensor of shape `[batch_size, dim]`. + +This allows to run the preprocessor in an input pipeline but fine-tune the +embeddings computed by the encoder as part of a larger model. In particular, it +allows to build encoders that can be run and fine-tuned on +[TPU](https://www.tensorflow.org/guide/tpu). + +It is an implementation detail which Tensors are contained in the preprocessor's +output, and which (if any) additional Tensors besides `"default"` are contained +in the encoder's output. + +The documentation of the encoder must specify which preprocessor to use with it. +Typically, there is exactly one correct choice. + +### Usage synopsis + +```python +text_input = tf.constant(["A long sentence.", + "single-word", + "http://example.com"]) +preprocessor = hub.load("path/to/preprocessor") # Must match `encoder`. +encoder_inputs = preprocessor(text_input) + +encoder = hub.load("path/to/encoder") +encoder_outputs = encoder(encoder_inputs) +embeddings = encoder_outputs["default"] +``` + +Recall from the [Reusable SavedModel API](../reusable_saved_models.md) that +running the encoder in training mode (e.g., for dropout) may require a keyword +argument `encoder(..., training=True)`, and that `encoder` provides attributes +`.variables`, `.trainable_variables` and `.regularization_losses` as applicable. + +The `preprocessor` model may have `.variables` but is not meant to be trained +further. Preprocessing is not mode-dependent: if `preprocessor()` has a +`training=...` argument at all, it has no effect. + +In Keras, all this is taken care of by + +```python +encoder_inputs = hub.KerasLayer("path/to/preprocessor")(text_input) +encoder_outputs = hub.KerasLayer("path/to/encoder", trainable=True)(encoder_inputs) +embeddings = encoder_outputs["default"] +``` + +### Distributed training + +If the encoder is used as part of a model that gets trained with a distribution +strategy, the call to `hub.load("path/to/encoder")` or +`hub.KerasLayer("path/to/encoder", ...)`, resp., must happen inside + +```python + with strategy.scope(): + ... +``` + +in order to re-create the encoder variables in the distributed way. + +Likewise, if the preprocessor is part of the trained model (as in the simple +example above), it also needs to be loaded under the distribution strategy +scope. If, however, the preprocessor is used in an input pipeline (e.g., in a +callable passed to `tf.data.Dataset.map()`), its loading must happen *outside* +the distribution strategy scope, in order to place its variables (if any) on the +host CPU. + +### Examples + +* Colab tutorial + [Classify text with BERT](https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/text/classify_text_with_bert.ipynb). + + + +## Text embeddings with Transformer Encoders + +Transformer encoders for text operate on a batch of input sequences, each +sequence comprising *n* ≥ 1 segments of tokenized text, within some +model-specific bound on *n*. For BERT and many of its extensions, that bound is +2, so they accept single segments and segment pairs. + +The API for **text embeddings with Transformer encoders** extends the API for +text embeddings with preprocessed inputs to this setting. + +### Preprocessor + +A preprocessor SavedModel for text embeddings with Transformer encoders +implements the API of a preprocessor SavedModel for text embeddings with +preprocessed inputs (see above), which provides a way to map single-segment text +inputs directly to encoder inputs. + +In addition, the preprocessor SavedModel provides callable subobjects `tokenize` +for tokenization (separately per segment) and `bert_pack_inputs` for packing *n* +tokenized segments into one input sequence for the encoder. Each subobject +follows the [Reusable SavedModel API](../reusable_saved_models.md). + +#### Usage synopsis + +As a concrete example for two segments of text, let us look at a sentence +entailment task that asks whether a premise (first segment) does or does not +imply a hypothesis (second segment). + +```python +preprocessor = hub.load("path/to/preprocessor") + +# Tokenize batches of both text inputs. +text_premises = tf.constant(["The quick brown fox jumped over the lazy dog.", + "Good day."]) +tokenized_premises = preprocessor.tokenize(text_premises) +text_hypotheses = tf.constant(["The dog was lazy.", # Implied. + "Axe handle!"]) # Not implied. +tokenized_hypotheses = preprocessor.tokenize(text_hypotheses) + +# Pack input sequences for the Transformer encoder. +seq_length = 128 +encoder_inputs = preprocessor.bert_pack_inputs( + [tokenized_premises, tokenized_hypotheses], + seq_length=seq_length) # Optional argument. +``` + +In Keras, this computation can be expressed as + +```python +tokenize = hub.KerasLayer(preprocessor.tokenize) +tokenized_hypotheses = tokenize(text_hypotheses) +tokenized_premises = tokenize(text_premises) + +bert_pack_inputs = hub.KerasLayer( + preprocessor.bert_pack_inputs, + arguments=dict(seq_length=seq_length)) # Optional argument. +encoder_inputs = bert_pack_inputs([tokenized_premises, tokenized_hypotheses]) +``` + +#### Details of `tokenize` + +A call to `preprocessor.tokenize()` accepts a string Tensor of shape +`[batch_size]` and returns a +[RaggedTensor](https://www.tensorflow.org/guide/ragged_tensor) of shape +`[batch_size, ...]` whose values are int32 token ids representing the input +strings. There can be *r* ≥ 1 ragged dimensions after `batch_size` but no other +uniform dimension. + +* If *r*=1, the shape is `[batch_size, (tokens)]`, and each input is simply + tokenized into a flat sequence of tokens. +* If *r*>1, there are *r*-1 additional levels of grouping. For example, + [tensorflow_text.BertTokenizer](https://github.com/tensorflow/text/blob/v2.3.0/tensorflow_text/python/ops/bert_tokenizer.py#L138) + uses *r*=2 to group tokens by words and yields shape `[batch_size, (words), + (tokens_per_word)]`. It is up to the model at hand how many of these extra + level(s) exist, if any, and what groupings they represent. + +The user can (but need not) modify tokenized inputs, e.g., to accommodate the +seq_length limit that will be enforced in packing encoder inputs. Extra +dimensions in the tokenizer output can help here (e.g., to respect word +boundaries) but become meaningless in the next step. + +In terms of the [Reusable SavedModel API](../reusable_saved_models.md), the +`preprocessor.tokenize` object may have `.variables` but is not meant to be +trained further. Tokenization is not mode-dependent: if +`preprocessor.tokenize()` has a `training=...` argument at all, it has no +effect. + +#### Details of `bert_pack_inputs` + +A call to `preprocessor.bert_pack_inputs()` accepts a Python list of tokenized +inputs (batched separately for each input segment) and returns a dict of Tensors +representing a batch of fixed-length input sequences for the Transformer encoder +model. + +Each tokenized input is an int32 RaggedTensor of shape `[batch_size, ...]`, +where the number *r* of ragged dimensions after batch_size is either 1 or the +same as in the output of `preprocessor.tokenize().` (The latter is for +convenience only; the extra dimensions are flattened out before packing.) + +Packing adds special tokens around the input segments as expected by the +encoder. The `bert_pack_inputs()` call implements exactly the packing scheme +used by the original BERT models and many of their extensions: the packed +sequence starts with one start-of-sequence token, followed by the tokenized +segments, each terminated by one end-of-segment token. Remaining positions up to +seq_length, if any, are filled up with padding tokens. + +If a packed sequence would exceed seq_length, `bert_pack_inputs()` truncates its +segments to prefixes of approximately equal sizes so that the packed sequence +fits exactly within seq_length. + +Packing is not mode-dependent: if `preprocessor.bert_pack_inputs()` has a +`training=...` argument at all, it has no effect. Also, +`preprocessor.bert_pack_inputs` is not expected to have variables, or support +fine-tuning. + +### Encoder + +The encoder is called on the dict of `encoder_inputs` in the same way as in the +API for text embeddings with preprocessed inputs (see above), including the +provisions from the [Reusable SavedModel API](../reusable_saved_models.md). + +#### Usage synopsis + +```python +encoder = hub.load("path/to/encoder") +encoder_outputs = encoder(encoder_inputs) +``` + +or equivalently in Keras: + +```python +encoder = hub.KerasLayer("path/to/encoder", trainable=True) +encoder_outputs = encoder(encoder_inputs) +``` + +#### Details + +The `encoder_outputs` are a dict of Tensors with the following keys. + + +* `"sequence_output"`: a float32 Tensor of shape `[batch_size, seq_length, + dim]` with the context-aware embedding of each token of every packed input + sequence. +* `"pooled_output"`: a float32 Tensor of shape `[batch_size, dim]` with the + embedding of each input sequence as a whole, derived from sequence_output in + some trainable manner. +* `"default"`, as required by the API for text embeddings with preprocessed + inputs: a float32 Tensor of shape `[batch_size, dim]` with the embedding of + each input sequence. (This might be just an alias of pooled_output.) + +The contents of the `encoder_inputs` are not strictly required by this API +definition. However, for encoders that use BERT-style inputs, it is recommended +to use the following names (from the +[NLP Modeling Toolkit of TensorFlow Model Garden](https://github.com/tensorflow/models/tree/master/official/nlp)) +to minimize friction in interchanging encoders and reusing preprocessor models: + +* `"input_word_ids"`: an int32 Tensor of shape `[batch_size, seq_length]` with + the token ids of the packed input sequence (that is, including a + start-of-sequence token, end-of-segment tokens, and padding). +* `"input_mask"`: an int32 Tensor of shape `[batch_size, seq_length]` with + value 1 at the position of all input tokens present before padding and value + 0 for the padding tokens. +* `"input_type_ids"`: an int32 Tensor of shape `[batch_size, seq_length]` with + the index of the input segment that gave rise to the input token at the + respective position. The first input segment (index 0) includes the + start-of-sequence token and its end-of-segment token. The second and later + segments (if present) include their respective end-of-segment token. Padding + tokens get index 0 again. + +### Distributed training + +For loading the preprocessor and encoder objects inside or outside a +distribution strategy scope, the same rules apply as in the API for text +embeddings with preprocessed inputs (see above). + +### Examples + +* Colab tutorial + [Solve GLUE tasks using BERT on TPU](https://colab.research.google.com/github/tensorflow/text/blob/master/docs/tutorials/bert_glue.ipynb). diff --git a/site/en/hub/common_signatures/images.md b/site/en/hub/common_signatures/images.md new file mode 100644 index 00000000000..5e41c3e2960 --- /dev/null +++ b/site/en/hub/common_signatures/images.md @@ -0,0 +1,155 @@ + +# Common Signatures for Images + +This page describes common signatures that should be implemented by modules in +the [TF1 Hub format](../tf1_hub_module.md) for image-related tasks. (For the +[TF2 SavedModel format](../tf2_saved_model.md), see the analogous +[SavedModel API](../common_saved_model_apis/images.md).) + +Some modules can be used for more than one task (e.g., image classification +modules tend to do some feature extraction on the way). Therefore, each module +provides (1) named signatures for all the tasks anticipated by the publisher, +and (2) a default signature `output = m(images)` for its designated primary +task. + + +## Image Feature Vector + +### Usage summary + +An **image feature vector** is a dense 1-D tensor that represents a whole image, +typically for classification by the consumer model. (Unlike the intermediate +activations of CNNs, it does not offer a spatial breakdown. Unlike [image +classification](#classification), it discards the classification learned +by the publisher model.) + +A module for image feature extraction has a default signature that maps a batch +of images to a batch of feature vectors. It can be used like so: + +```python + module_spec = hub.load_module_spec("path/to/module") + height, width = hub.get_expected_image_size(module_spec) + images = ... # A batch of images with shape [batch_size, height, width, 3]. + module = hub.Module(module_spec) + features = module(images) # A batch with shape [batch_size, num_features]. +``` + +It also defines the corresponding named signature. + +### Signature specification + +The named signature for extracting image feature vectors is invoked as + +```python + outputs = module(dict(images=images), signature="image_feature_vector", + as_dict=True) + features = outputs["default"] +``` + +The input follows the general convention for +[input of images](#input). + +The outputs dictionary contains a `"default"` output of dtype `float32` and +shape `[batch_size, num_features]`. The `batch_size` is the same as in the +input, but not known at graph construction time. `num_features` is a known, +module-specific constant independent of input size. + +These feature vectors are meant to be usable for classification with a simple +feed-forward classifier (like the pooled features from the topmost convolutional +layer in a typical CNN for image classification). + +Applying dropout to the output features (or not) should be left to the module +consumer. The module itself should not perform dropout on the actual outputs +(even if it uses dropout internally in other places). + +The outputs dictionary may provide further outputs, for example, the activations +of hidden layers inside the module. Their keys and values are module-dependent. +It is recommended to prefix architecture-dependent keys with an architecture +name (e.g., to avoid confusing the intermediate layer `"InceptionV3/Mixed_5c"` +with the topmost convolutional layer `"InceptionV2/Mixed_5c"`). + + +## Image Classification + +### Usage summary + +**Image classification** maps the pixels of an image to linear scores (logits) +for membership in the classes of a taxonomy _selected by the module publisher_. +This allows consumers to draw conclusions from the particular classification +learned by the publisher module, and not just its underlying features (cf. +[Image Feature Vector](#feature-vector)). + +A module for image feature extraction has a default signature that maps a batch +of images to a batch of logits. It can be used like so: + +```python + module_spec = hub.load_module_spec("path/to/module") + height, width = hub.get_expected_image_size(module_spec) + images = ... # A batch of images with shape [batch_size, height, width, 3]. + module = hub.Module(module_spec) + logits = module(images) # A batch with shape [batch_size, num_classes]. +``` + +It also defines the corresponding named signature. + +### Signature specification + +The named signature for extracting image feature vectors is invoked as + +```python + outputs = module(dict(images=images), signature="image_classification", + as_dict=True) + logits = outputs["default"] +``` + +The input follows the general convention for +[input of images](#input). + +The outputs dictionary contains a `"default"` output of dtype `float32` and +shape `[batch_size, num_classes]`. The `batch_size` is the same as in the input, +but not known at graph construction time. `num_classes` is the number of classes +in the classification, which is a known constant independent of input size. + +Evaluating `outputs["default"][i, c]` yields a score predicting the membership +of example `i` in the class with index `c`. + +It depends on the underlying classification whether these scores are meant to be +used with softmax (for mutually exclusive classes), sigmoid (for orthogonal +classes), or something else. The module documentation should describe this, +and refer to a definition of the class indices. + +The outputs dictionary may provide further outputs, for example, the activations +of hidden layers inside the module. Their keys and values are module-dependent. +It is recommended to prefix architecture-dependent keys with an architecture +name (e.g., to avoid confusing the intermediate layer `"InceptionV3/Mixed_5c"` +with the topmost convolutional layer `"InceptionV2/Mixed_5c"`). + + +## Image input + +This is common to all types of image modules and image signatures. + +A signature that takes a batch of images as input accepts them as a dense 4-D +tensor of dtype `float32` and shape `[batch_size, height, width, 3]` whose +elements are RGB color values of pixels normalized to the range [0, 1]. This is +what you get from `tf.image.decode_*()` followed by +`tf.image.convert_image_dtype(..., tf.float32)`. + +A module with exactly one (or one principal) input of images uses the name +`"images"` for this input. + +The module accepts any `batch_size`, and correspondingly sets the first +dimension of TensorInfo.tensor_shape to "unknown". The last dimension is fixed +to the number `3` of RGB channels. The `height` and `width` dimensions are +fixed to the expected size of input images. (Future work may remove that +restriction for fully convolutional modules.) + +Consumers of the module should not inspect the shape directly, but obtain +the size information by calling hub.get_expected_image_size() +on the module or module spec, and are expected to resize input images +accordingly (typically before/during batching). + +For simplicity, TF-Hub modules use the `channels_last` +(or `NHWC`) layout of Tensors, and leave it to TensorFlow's graph optimizer +to rewrite to `channels_first` (or `NCHW`) if needed. It has been doing that +by default since TensorFlow version 1.7. diff --git a/site/en/hub/common_signatures/index.md b/site/en/hub/common_signatures/index.md new file mode 100644 index 00000000000..05eacc8b37f --- /dev/null +++ b/site/en/hub/common_signatures/index.md @@ -0,0 +1,25 @@ + +# Common Signatures for TF Hub Modules + +## Introduction + +[TensorFlow Hub](https://tfhub.dev) hosts models for a variety of tasks. Models +for the same task are encouraged to implement a common API so that model +consumers can easily exchange them without modifying the code that uses them, +even if they come from different publishers. + +The goal is to make exchanging different models for the same task as simple as +switching a string-valued hyperparameter. With that, model consumers can easily +find the best one for their problem. + +This directory collects specifications of common signatures for modules in the +[TF1 Hub format](../tf1_hub_module.md). + +Note that the TF1 Hub format has been **deprecated** in favor of the +[TF2 SavedModel format](../tf2_saved_model.md) and its +[Common SavedModel APIs](../common_saved_model_apis/index.md). + +## Signatures + +* [Image Signatures](images.md) +* [Text Signatures](text.md) diff --git a/site/en/hub/common_signatures/text.md b/site/en/hub/common_signatures/text.md new file mode 100644 index 00000000000..3ea8f27c91d --- /dev/null +++ b/site/en/hub/common_signatures/text.md @@ -0,0 +1,46 @@ + +# Common Signatures for Text + +This page describes common signatures that should be implemented by modules in +the [TF1 Hub format](../tf1_hub_module.md) for tasks that accept text inputs. +(For the [TF2 SavedModel format](../tf2_saved_model.md), see the analogous +[SavedModel API](../common_saved_model_apis/text.md).) + +## Text feature vector + +A **text feature vector** module creates a dense vector representation +from text features. +It accepts a batch of strings of shape `[batch_size]` and maps them to +a `float32` tensor of shape `[batch_size, N]`. This is often called +**text embedding** in dimension `N`. + +### Basic usage + +```python + embed = hub.Module("path/to/module") + representations = embed([ + "A long sentence.", + "single-word", + "http://example.com"]) +``` + +### Feature column usage + +```python + feature_columns = [ + hub.text_embedding_column("comment", "path/to/module", trainable=False), + ] + input_fn = tf.estimator.inputs.numpy_input_fn(features, labels, shuffle=True) + estimator = tf.estimator.DNNClassifier(hidden_units, feature_columns) + estimator.train(input_fn, max_steps=100) +``` + +## Notes + +Modules have been pre-trained on different domains and/or tasks, +and therefore not every text feature vector module would be suitable for +your problem. E.g.: some modules could have been trained on a single language. + +This interface does not allow fine-tuning of the text representation on TPUs, +because it requires the module to instantiate both string processing and the +trainable variables at the same time. diff --git a/site/en/hub/community.md b/site/en/hub/community.md new file mode 100644 index 00000000000..a7a4c2bf0ec --- /dev/null +++ b/site/en/hub/community.md @@ -0,0 +1,6 @@ + +# Community and support + +* The source code is available on [GitHub](https://github.com/tensorflow/hub). + We use [GitHub issues](https://github.com/tensorflow/hub/issues) for + tracking feature requests and bugs. \ No newline at end of file diff --git a/site/en/hub/contribute.md b/site/en/hub/contribute.md new file mode 100644 index 00000000000..e537f79f766 --- /dev/null +++ b/site/en/hub/contribute.md @@ -0,0 +1,16 @@ + +# Contribute + +To learn more about how to publish a model or model collection on +[tfhub.dev](https://tfhub.dev/), see the [becoming_a_publisher](publish.md) +guide. + +You can find more information of how to contribute to the +[TensorFlow Hub library](https://github.com/tensorflow/hub) in our +[GitHub contributing guide](https://github.com/tensorflow/hub/blob/master/CONTRIBUTING.md). + +Content published to tfhub.dev can be automatically mirrored to other model +hubs, provided it follows a specified format and is permitted by our Terms +(https://tfhub.dev/terms). See [our publishing documentation](publish.md) for +more details, and [our contribution documentation](contribute_a_model.md) if +you'd like to opt-out of mirroring. diff --git a/site/en/hub/hosting.md b/site/en/hub/hosting.md new file mode 100644 index 00000000000..ce2ce76b0a6 --- /dev/null +++ b/site/en/hub/hosting.md @@ -0,0 +1,175 @@ + +# Model hosting protocol + +This document describes the URL conventions used when hosting all model types on +[tfhub.dev](https://tfhub.dev) - TFJS, TF Lite and TensorFlow models. It also +describes the HTTP(S)-based protocol implemented by the `tensorflow_hub` library +in order to load TensorFlow models from [tfhub.dev](https://tfhub.dev) and +compatible services into TensorFlow programs. + +Its key feature is to use the same URL in code to load a model and in a browser +to view the model documentation. + +## General URL conventions + +[tfhub.dev](https://tfhub.dev) supports the following URL formats: + +* TF Hub publishers follow `https://tfhub.dev/` +* TF Hub collections follow + `https://tfhub.dev//collection/` +* TF Hub models have versioned url + `https://tfhub.dev///` and unversioned url + `https://tfhub.dev//` that resolves to the latest + version of the model. + +TF Hub models can be downloaded as compressed assets by appending URL parameters +to the [tfhub.dev](https://tfhub.dev) model URL. However, the URL parameters +required to achieve that depend on the model type: + +* TensorFlow models (both SavedModel and TF1 Hub formats): append + `?tf-hub-format=compressed` to the TensorFlow model url. +* TFJS models: append `?tfjs-format=compressed` to the TFJS model url to + download the compressed or `/model.json?tfjs-format=file` to read if from + remote storage. +* TF lite models: append `?lite-format=tflite` to the TF Lite model url. + +For example: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TypeModel URLDownload typeURL paramDownload URL
TensorFlow (SavedModel, TF1 Hub format)https://tfhub.dev/google/spice/2.tar.gz?tf-hub-format=compressed https://tfhub.dev/google/spice/2?tf-hub-format=compressed
TF Litehttps://tfhub.dev/google/lite-model/spice/1.tflite?lite-format=tflitehttps://tfhub.dev/google/lite-model/spice/1?lite-format=tflite
TF.jshttps://tfhub.dev/google/tfjs-model/spice/2/default/1.tar.gz?tfjs-format=compressedhttps://tfhub.dev/google/tfjs-model/spice/2/default/1?tfjs-format=compressed
+ +Additionally, some models also are hosted in a format that can be read directly +from remote storage without being downloaded. This is especially useful if there +is no local storage available, such as running a TF.js model in the browser or +loading a SavedModel on [Colab](https://colab.research.google.com/). Be +conscious that reading models that are hosted remotely without being downloaded +locally may increase latency. + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TypeModel URLResponse typeURL paramRequest URL
TensorFlow (SavedModel, TF1 Hub format)https://tfhub.dev/google/spice/2String (Path to GCS folder where the uncompressed model is stored)?tf-hub-format=uncompressedhttps://tfhub.dev/google/spice/2?tf-hub-format=uncompressed
TF.jshttps://tfhub.dev/google/tfjs-model/spice/2/default/1.json?tfjs-format=filehttps://tfhub.dev/google/tfjs-model/spice/2/default/1/model.json?tfjs-format=file
+ +## tensorflow_hub library protocol + +This section describes how we host models on [tfhub.dev](https://tfhub.dev) for +use with the tensorflow_hub library. If you want to host your own model +repository to work with the tensorflow_hub library, your HTTP(s) distribution +service should provide an implementation of this protocol. + +Note that this section does not address hosting TF Lite and TFJS models since +they are not downloaded via the `tensorflow_hub` library. For more information +on hosting these model types, please check [above](#general-url-conventions). + +### Compressed Hosting + +Models are stored on [tfhub.dev](https://tfhub.dev) as compressed tar.gz files. +By default, the tensorflow_hub library automatically downloads the compressed +model. They can also be manually downloaded by appending the +`?tf-hub-format=compressed` to the model url, for example: + +```shell +wget https://tfhub.dev/tensorflow/albert_en_xxlarge/1?tf-hub-format=compressed +``` + +The root of the archive is the root of the model directory and should contain a +SavedModel, as in this example: + +```shell +# Create a compressed model from a SavedModel directory. +$ tar -cz -f model.tar.gz --owner=0 --group=0 -C /tmp/export-model/ . + +# Inspect files inside a compressed model +$ tar -tf model.tar.gz +./ +./variables/ +./variables/variables.data-00000-of-00001 +./variables/variables.index +./assets/ +./saved_model.pb +``` + +Tarballs for use with the legacy +[TF1 Hub format](https://www.tensorflow.org/hub/tf1_hub_module) will also +contain a `./tfhub_module.pb` file. + +When one of `tensorflow_hub` library model loading APIs is invoked +([hub.KerasLayer](https://www.tensorflow.org/hub/api_docs/python/hub/KerasLayer), +[hub.load](https://www.tensorflow.org/hub/api_docs/python/hub/load), etc) the +library downloads the model, uncompresses the model and caches it locally. The +`tensorflow_hub` library expects that model URLs are versioned and that the +model content of a given version is immutable, so that it can be cached +indefinitely. Learn more about [caching models](caching.md). + +![](https://raw.githubusercontent.com/tensorflow/hub/master/docs/images/library_download_cache.png) + +### Uncompressed Hosting + +When the environment variable `TFHUB_MODEL_LOAD_FORMAT` or the command-line flag +`--tfhub_model_load_format` is set to `UNCOMPRESSED`, the model is read directly +from remote storage (GCS) instead of being downloaded and uncompressed locally. +When this behavior is enabled the library appends `?tf-hub-format=uncompressed` +to the model URL. That request returns the path to the folder on GCS that +contains the uncompressed model files. As an example, \ +`https://tfhub.dev/google/spice/2?tf-hub-format=uncompressed` \ +returns \ +`gs://kaggle-tfhub-models-uncompressed/tfhub-modules/google/spice/2/uncompressed` +in the body of the 303 response. The library then reads the model from that GCS +destination. diff --git a/site/en/hub/images/action_recognition.gif b/site/en/hub/images/action_recognition.gif new file mode 100644 index 00000000000..a58c22ac8c3 Binary files /dev/null and b/site/en/hub/images/action_recognition.gif differ diff --git a/site/en/hub/images/bert.png b/site/en/hub/images/bert.png new file mode 100644 index 00000000000..e36f69c9c7b Binary files /dev/null and b/site/en/hub/images/bert.png differ diff --git a/site/en/hub/images/bert_preprocess.png b/site/en/hub/images/bert_preprocess.png new file mode 100644 index 00000000000..18b3b435d1b Binary files /dev/null and b/site/en/hub/images/bert_preprocess.png differ diff --git a/site/en/hub/images/bert_preprocess_wide.png b/site/en/hub/images/bert_preprocess_wide.png new file mode 100644 index 00000000000..b414196724e Binary files /dev/null and b/site/en/hub/images/bert_preprocess_wide.png differ diff --git a/site/en/hub/images/bit_blog.jpg b/site/en/hub/images/bit_blog.jpg new file mode 100644 index 00000000000..260415bf0b1 Binary files /dev/null and b/site/en/hub/images/bit_blog.jpg differ diff --git a/site/en/hub/images/boundless.png b/site/en/hub/images/boundless.png new file mode 100644 index 00000000000..ccc52d17f84 Binary files /dev/null and b/site/en/hub/images/boundless.png differ diff --git a/site/en/hub/images/colab_logo.svg b/site/en/hub/images/colab_logo.svg new file mode 100644 index 00000000000..d03f1106221 --- /dev/null +++ b/site/en/hub/images/colab_logo.svg @@ -0,0 +1 @@ + diff --git a/site/en/hub/images/food.png b/site/en/hub/images/food.png new file mode 100644 index 00000000000..41865be3984 Binary files /dev/null and b/site/en/hub/images/food.png differ diff --git a/site/en/hub/images/gan_faces.gif b/site/en/hub/images/gan_faces.gif new file mode 100644 index 00000000000..a34b8d517f4 Binary files /dev/null and b/site/en/hub/images/gan_faces.gif differ diff --git a/site/en/hub/images/github_icon.svg b/site/en/hub/images/github_icon.svg new file mode 100644 index 00000000000..0a607bb98b3 --- /dev/null +++ b/site/en/hub/images/github_icon.svg @@ -0,0 +1 @@ + diff --git a/site/en/hub/images/guide_basics.png b/site/en/hub/images/guide_basics.png new file mode 100644 index 00000000000..e6aee34f516 Binary files /dev/null and b/site/en/hub/images/guide_basics.png differ diff --git a/site/en/hub/images/image_classification.png b/site/en/hub/images/image_classification.png new file mode 100644 index 00000000000..a3840e3482c Binary files /dev/null and b/site/en/hub/images/image_classification.png differ diff --git a/site/en/hub/images/interpolation.png b/site/en/hub/images/interpolation.png new file mode 100644 index 00000000000..d2f062da7c1 Binary files /dev/null and b/site/en/hub/images/interpolation.png differ diff --git a/site/en/hub/images/library_download_cache.png b/site/en/hub/images/library_download_cache.png new file mode 100644 index 00000000000..1b581a4a819 Binary files /dev/null and b/site/en/hub/images/library_download_cache.png differ diff --git a/site/en/hub/images/object_detection.png b/site/en/hub/images/object_detection.png new file mode 100644 index 00000000000..57b327099ae Binary files /dev/null and b/site/en/hub/images/object_detection.png differ diff --git a/site/en/hub/images/odml.png b/site/en/hub/images/odml.png new file mode 100644 index 00000000000..29bf3bcc61b Binary files /dev/null and b/site/en/hub/images/odml.png differ diff --git a/site/en/hub/images/similarity.png b/site/en/hub/images/similarity.png new file mode 100644 index 00000000000..3155e8706e1 Binary files /dev/null and b/site/en/hub/images/similarity.png differ diff --git a/site/en/hub/images/spice_blog.png b/site/en/hub/images/spice_blog.png new file mode 100644 index 00000000000..cf19769e6d8 Binary files /dev/null and b/site/en/hub/images/spice_blog.png differ diff --git a/site/en/hub/images/spice_color.png b/site/en/hub/images/spice_color.png new file mode 100644 index 00000000000..35b68d7c444 Binary files /dev/null and b/site/en/hub/images/spice_color.png differ diff --git a/site/en/hub/images/stackoverflow_icon.svg b/site/en/hub/images/stackoverflow_icon.svg new file mode 100644 index 00000000000..491a75e464d --- /dev/null +++ b/site/en/hub/images/stackoverflow_icon.svg @@ -0,0 +1 @@ + diff --git a/site/en/hub/images/style_transfer.png b/site/en/hub/images/style_transfer.png new file mode 100644 index 00000000000..d0427408830 Binary files /dev/null and b/site/en/hub/images/style_transfer.png differ diff --git a/site/en/hub/images/super_resolution.png b/site/en/hub/images/super_resolution.png new file mode 100644 index 00000000000..7d3f3741077 Binary files /dev/null and b/site/en/hub/images/super_resolution.png differ diff --git a/site/en/hub/images/text_video.gif b/site/en/hub/images/text_video.gif new file mode 100644 index 00000000000..5fe639b1eea Binary files /dev/null and b/site/en/hub/images/text_video.gif differ diff --git a/site/en/hub/images/yamnet.png b/site/en/hub/images/yamnet.png new file mode 100644 index 00000000000..416956ad6fb Binary files /dev/null and b/site/en/hub/images/yamnet.png differ diff --git a/site/en/hub/installation.md b/site/en/hub/installation.md new file mode 100644 index 00000000000..2381fbea614 --- /dev/null +++ b/site/en/hub/installation.md @@ -0,0 +1,57 @@ + +# Installation + +## Installing tensorflow_hub + +The `tensorflow_hub` library can be installed alongside TensorFlow 1 and +TensorFlow 2. We recommend that new users start with TensorFlow 2 right away, +and current users upgrade to it. + +### Use with TensorFlow 2 + +Use [pip](https://pip.pypa.io/) to +[install TensorFlow 2](https://www.tensorflow.org/install) as usual. (See there +for extra instructions about GPU support.) Then install a current version of +[`tensorflow-hub`](https://pypi.org/project/tensorflow-hub/) next to it (must be +0.5.0 or newer). + +```bash +$ pip install "tensorflow>=2.0.0" +$ pip install --upgrade tensorflow-hub +``` + +The TF1-style API of TensorFlow Hub works with the v1 compatibility mode of +TensorFlow 2. + +### Legacy use with TensorFlow 1 + +TensorFlow 1.15 is the only version of TensorFlow 1.x still supported by the +`tensorflow_hub` library (as of release 0.11.0). TensorFlow 1.15 defaults to +TF1-compatible behavior but contains many TF2 features under the hood to allow +some use of TensorFlow Hub's TF2-style APIs. + +```bash +$ pip install "tensorflow>=1.15,<2.0" +$ pip install --upgrade tensorflow-hub +``` + +### Use of pre-release versions + +The pip packages `tf-nightly` and `tf-hub-nightly` are built automatically from +the source code on github, with no release testing. This lets developers try out +the latest code without [building from source](build_from_source.md). + +```bash +$ pip install tf-nightly +$ pip install --upgrade tf-hub-nightly +``` + +## Next Steps + +- [Library overview](lib_overview.md) +- Tutorials: + - [Text classification](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_text_classification.ipynb) + - [Image classification](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_image_retraining.ipynb) + - Additional examples + [on GitHub](https://github.com/tensorflow/hub/blob/master/examples/README.md) +- Find models on [tfhub.dev](https://tfhub.dev). \ No newline at end of file diff --git a/site/en/hub/lib_overview.md b/site/en/hub/lib_overview.md new file mode 100644 index 00000000000..c480ad2fbdf --- /dev/null +++ b/site/en/hub/lib_overview.md @@ -0,0 +1,50 @@ + +# TensorFlow Hub Library Overview + +The [`tensorflow_hub`](https://github.com/tensorflow/hub) library lets you +download and reuse trained models in your TensorFlow program with a minimum +amount of code. The main way to load a trained model is using the +`hub.KerasLayer` API. + +```python +import tensorflow_hub as hub + +embed = hub.KerasLayer("https://tfhub.dev/google/nnlm-en-dim128/2") +embeddings = embed(["A long sentence.", "single-word", "http://example.com"]) +print(embeddings.shape, embeddings.dtype) +``` +**Note:** This documentation uses TFhub.dev URL handles in examples. See more +information regarding other valid handle types [here](tf2_saved_model.md#model_handles). + +## Setting the cache location for downloads. + +By default, `tensorflow_hub` uses a system-wide, temporary directory to cache +downloaded and uncompressed models. See [Caching](caching.md) for options to use +other, possibly more persistent locations. + +## API stability + +Although we hope to prevent breaking changes, this project is still under active +development and is not yet guaranteed to have a stable API or model format. + +## Fairness + +As in all of machine learning, [fairness](http://ml-fairness.com) is an +[important](https://research.googleblog.com/2016/10/equality-of-opportunity-in-machine.html) +consideration. Many pre-trained models are trained on large datasets. When +reusing any model, it’s important to be mindful of what data the model was +trained on (and whether there are any existing biases there), and how these +might impact your use of it. + +## Security + +Since they contain arbitrary TensorFlow graphs, models can be thought of as +programs. +[Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md) +describes the security implications of referencing a model from an untrusted +source. + +## Next Steps + +- [Use the library](tf2_saved_model.md) +- [Reusable SavedModels](reusable_saved_models.md) diff --git a/site/en/hub/migration_tf2.md b/site/en/hub/migration_tf2.md new file mode 100644 index 00000000000..c2cc4b50759 --- /dev/null +++ b/site/en/hub/migration_tf2.md @@ -0,0 +1,114 @@ + +# Migrating from TF1 to TF2 with TensorFlow Hub + +This page explains how to keep using TensorFlow Hub while migrating your +TensorFlow code from TensorFlow 1 to TensorFlow 2. It complements TensorFlow's +general [migration guide](https://www.tensorflow.org/guide/migrate). + +For TF2, TF Hub has switched away from the legacy `hub.Module` API for building +a `tf.compat.v1.Graph` like `tf.contrib.v1.layers` do. Instead, there is now a +`hub.KerasLayer` for use alongside other Keras layers for building a +`tf.keras.Model` (typically in TF2's new +[eager execution environment](https://www.tensorflow.org/api_docs/python/tf/executing_eagerly)) +and its underlying `hub.load()` method for low-level TensorFlow code. + +The `hub.Module` API remains available in the `tensorflow_hub` library for use +in TF1 and in the TF1 compatibility mode of TF2. It can only load models in the +[TF1 Hub format](tf1_hub_module.md). + +The new API of `hub.load()` and `hub.KerasLayer` works for TensorFlow 1.15 (in +eager and graph mode) and in TensorFlow 2. This new API can load the new +[TF2 SavedModel](tf2_saved_model.md) assets, and, with the restrictions laid out +in the [model compatibility guide](model_compatibility.md), the legacy models in +TF1 Hub format. + +In general, it is recommended to use new API wherever possible. + +## Summary of the new API + +`hub.load()` is the new low-level function to load a SavedModel from TensorFlow +Hub (or compatible services). It wraps TF2's `tf.saved_model.load()`; +TensorFlow's [SavedModel Guide](https://www.tensorflow.org/guide/saved_model) +describes what you can do with the result. + +```python +m = hub.load(handle) +outputs = m(inputs) +``` + +The `hub.KerasLayer` class calls `hub.load()` and adapts the result for use in +Keras alongside other Keras layers. (It may even be a convenient wrapper for +loaded SavedModels used in other ways.) + +```python +model = tf.keras.Sequential([ + hub.KerasLayer(handle), + ...]) +``` + +Many tutorials show these APIs in action. Here are some examples: + +* [Text classification example notebook](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_text_classification.ipynb) +* [Image classification example notebook](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_image_retraining.ipynb) + +### Using the new API in Estimator training + +If you use a TF2 SavedModel in an Estimator for training with parameter servers +(or otherwise in a TF1 Session with variables placed on remote devices), you +need to set `experimental.share_cluster_devices_in_session` in the tf.Session's +ConfigProto, or else you will get an error like "Assigned device +'/job:ps/replica:0/task:0/device:CPU:0' does not match any device." + +The necessary option can be set like + +```python +session_config = tf.compat.v1.ConfigProto() +session_config.experimental.share_cluster_devices_in_session = True +run_config = tf.estimator.RunConfig(..., session_config=session_config) +estimator = tf.estimator.Estimator(..., config=run_config) +``` + +Starting with TF2.2, this option is no longer experimental, and the +`.experimental` piece can be dropped. + +## Loading legacy models in TF1 Hub format + +It can happen that a new TF2 SavedModel is not yet available for your use-case +and you need to load an legacy model in TF1 Hub format. Starting in +`tensorflow_hub` release 0.7, you can use legacy model in TF1 Hub format +together with `hub.KerasLayer` as shown below: + +```python +m = hub.KerasLayer(handle) +tensor_out = m(tensor_in) +``` + +Additionally `KerasLayer` exposes the ability to specify `tags`, `signature`, +`output_key` and `signature_outputs_as_dict` for more specific usages of legacy +models in TF1 Hub format and legacy SavedModels. + +For more information on TF1 Hub format compatibility see the +[model compatibility guide](model_compatibility.md). + +## Using lower level APIs + +Legacy TF1 Hub format models can be loaded via `tf.saved_model.load`. Instead of + +```python +# DEPRECATED: TensorFlow 1 +m = hub.Module(handle, tags={"foo", "bar"}) +tensors_out_dict = m(dict(x1=..., x2=...), signature="sig", as_dict=True) +``` + +it is recommended to use: + +```python +# TensorFlow 2 +m = hub.load(path, tags={"foo", "bar"}) +tensors_out_dict = m.signatures["sig"](x1=..., x2=...) +``` + +In these examples `m.signatures` is a dict of TensorFlow +[concrete functions](https://www.tensorflow.org/tutorials/customization/performance#tracing) +keyed by signature names. Calling such a function computes all its outputs, even +if unused. (This is different from the lazy evaluation of TF1's graph mode.) diff --git a/site/en/hub/model_compatibility.md b/site/en/hub/model_compatibility.md new file mode 100644 index 00000000000..e37ed717c3b --- /dev/null +++ b/site/en/hub/model_compatibility.md @@ -0,0 +1,144 @@ + +# Model compatibility for TF1/TF2 + +## TF Hub model formats + +TF Hub offers reusable model pieces that can be loaded back, built upon, and +possibly be retrained in a TensorFlow program. These come in two different +formats: + +* The custom [TF1 Hub format](https://www.tensorflow.org/hub/tf1_hub_module) . + Its main intended use is in TF1 (or TF1 compatibility mode in TF2) via its + [hub.Module API](https://www.tensorflow.org/hub/api_docs/python/hub/Module). + Full compatibility details [below](#compatibility_of_hubmodule). +* The native [TF2 SavedModel](https://www.tensorflow.org/hub/tf2_saved_model) + format. Its main intended use is in TF2 via the + [hub.load](https://www.tensorflow.org/hub/api_docs/python/hub/load) and + [hub.KerasLayer](https://www.tensorflow.org/hub/api_docs/python/hub/KerasLayer) + APIs. Full compatibility details [below](#compatibility_of_tf2_savedmodel). + +The model format can be found on the model page on +[tfhub.dev](https://tfhub.dev). Model **loading/inference**, **fine-tuning** or +**creation** might not be supported in TF1/2 based on the model formats. + +## Compatibility of the TF1 Hub format {:#compatibility_of_hubmodule} + + + + + + + + + + + + + + + + + + + + + + + + + +
OperationTF1/ TF1 compat mode in TF2 [1]TF2
Loading / Inference + Fully supported (complete TF1 Hub format loading guide) +
m = hub.Module(handle)
+outputs = m(inputs)
+
It's recommended to use either hub.load +
m = hub.load(handle)
+outputs = m.signatures["sig"](inputs)
+ or hub.KerasLayer +
m = hub.KerasLayer(handle, signature="sig")
+outputs = m(inputs)
+
Fine-tuning + Fully supported (complete TF1 Hub format fine-tuning guide) +
m = hub.Module(handle,
+               trainable=True,
+               tags=["train"]*is_training)
+outputs = m(inputs)
+
+ Note: modules that don't need a separate train graph don't have a train + tag. +
+
+ Not supported +
Creation Fully supported (see complete TF1 Hub format creation guide)
+ Note: The TF1 Hub format is geared towards TF1 and is only partially supported in TF2. Consider creating a TF2 SavedModel. +
Not supported
+ +## Compatibility of TF2 SavedModel {:#compatibility_of_tf2_savedmodel} + +Not supported before TF1.15. + + + + + + + + + + + + + + + + + + + + + + + + +
OperationTF1.15/ TF1 compat mode in TF2 [1]TF2
Loading / Inference + Use either hub.load +
m = hub.load(handle)
+outputs = m(inputs)
+ or hub.KerasLayer +
m = hub.KerasLayer(handle)
+outputs = m(inputs)
+
Fully supported (complete TF2 SavedModel loading guide). Use either hub.load +
m = hub.load(handle)
+outputs = m(inputs)
+ or hub.KerasLayer +
m = hub.KerasLayer(handle)
+outputs = m(inputs)
+
Fine-tuning + Supported for a hub.KerasLayer used in tf.keras.Model when trained with + Model.fit() or trained in an Estimator whose model_fn wraps the Model per the custom model_fn guide. +
+ Note: hub.KerasLayer does not + fill in graph collections like the old tf.compat.v1.layers or hub.Module + APIs did. +
+
+ Fully supported (complete TF2 SavedModel fine-tuning guide). + Use either hub.load: +
m = hub.load(handle)
+outputs = m(inputs, training=is_training)
+ or hub.KerasLayer: +
m =  hub.KerasLayer(handle, trainable=True)
+outputs = m(inputs)
+
Creation + The TF2 API + tf.saved_model.save() can be called from within compat mode. + Fully supported (see complete TF2 SavedModel creation guide)
+ +

[1] "TF1 compat mode in TF2" refers to the combined + effect of importing TF2 with + import tensorflow.compat.v1 as tf + and running + tf.disable_v2_behavior() + as described in the + TensorFlow Migration guide + .

diff --git a/site/en/hub/model_formats.md b/site/en/hub/model_formats.md new file mode 100644 index 00000000000..73ae7c247a1 --- /dev/null +++ b/site/en/hub/model_formats.md @@ -0,0 +1,79 @@ + +# Model formats + +[tfhub.dev](https://tfhub.dev) hosts the following model +formats: TF2 SavedModel, TF1 Hub format, TF.js and TFLite. This page provides an +overview of each model format. + +Content published to tfhub.dev can be automatically mirrored to other model +hubs, provided it follows a specified format and is permitted by our Terms +(https://tfhub.dev/terms). See [our publishing documentation](publish.md) for +more details, and [our contribution documentation](contribute_a_model.md) if +you'd like to opt-out of mirroring. + +## TensorFlow formats + +[tfhub.dev](https://tfhub.dev) hosts TensorFlow models in the TF2 SavedModel +format and TF1 Hub format. We recommend using models in the standardized TF2 +SavedModel format instead of the deprecated TF1 Hub format when possible. + +### SavedModel + +TF2 SavedModel is the recommended format for sharing TensorFlow models. You can +learn more about the SavedModel format in the +[TensorFlow SavedModel](https://www.tensorflow.org/guide/saved_model) guide. + +You can browse SavedModels on tfhub.dev by using the TF2 version filter on the +[tfhub.dev browse page](https://tfhub.dev/s?subtype=module,placeholder) or by +following +[this link](https://tfhub.dev/s?subtype=module,placeholder&tf-version=tf2). + +You can use SavedModels from tfhub.dev without depending on the `tensorflow_hub` +library, since this format is a part of core TensorFlow. + +Learn more about SavedModels on TF Hub: + +* [Using TF2 SavedModels](tf2_saved_model.md) +* [Exporting a TF2 SavedModel](exporting_tf2_saved_model.md) +* [TF1/TF2 compatibility of TF2 SavedModels](model_compatibility.md) + +### TF1 Hub format + +The TF1 Hub format is a custom serialization format used in by TF Hub library. +The TF1 Hub format is similar to the SavedModel format of TensorFlow 1 on a +syntactic level (same file names and protocol messages) but semantically +different to allow for module reuse, composition and re-training (e.g., +different storage of resource initializers, different tagging conventions for +metagraphs). The easiest way to tell them apart on disk is the presence or +absence of the `tfhub_module.pb` file. + +You can browse models in the TF1 Hub format on tfhub.dev by using the TF1 +version filter on the +[tfhub.dev browse page](https://tfhub.dev/s?subtype=module,placeholder) or by +following +[this link](https://tfhub.dev/s?subtype=module,placeholder&tf-version=tf1). + +Learn more about models in TF1 Hub format on TF Hub: + +* [Using TF1 Hub format models](tf1_hub_module.md) +* [Exporting a model in the TF1 Hub format](exporting_hub_format.md) +* [TF1/TF2 compatibility of TF1 Hub format](model_compatibility.md) + +## TFLite format + +The TFLite format is used for on-device inference. You can learn more at the +[TFLite documentation](https://www.tensorflow.org/lite). + +You can browse TF Lite models on tfhub.dev by using the TF Lite model format +filter on the +[tfhub.dev browse page](https://tfhub.dev/s?subtype=module,placeholder) or by +following [this link](https://tfhub.dev/lite). + +## TFJS format + +The TF.js format is used for in-browser ML. You can learn more at the +[TF.js documentation](https://www.tensorflow.org/js). + +You can browse TF.js models on tfhub.dev by using the TF.js model format filter +on the [tfhub.dev browse page](https://tfhub.dev/s?subtype=module,placeholder) +or by following [this link](https://tfhub.dev/js). diff --git a/site/en/hub/overview.md b/site/en/hub/overview.md new file mode 100644 index 00000000000..b6d814eba73 --- /dev/null +++ b/site/en/hub/overview.md @@ -0,0 +1,31 @@ + +# TensorFlow Hub + +TensorFlow Hub is an open repository and library for reusable machine learning. +The [tfhub.dev](https://tfhub.dev) repository provides many pre-trained models: +text embeddings, image classification models, TF.js/TFLite models and much more. +The repository is open to +[community contributors](https://tfhub.dev/s?subtype=publisher). + +The [`tensorflow_hub`](https://github.com/tensorflow/hub) library lets you +download and reuse them in your TensorFlow program with a minimum amount of +code. + +```python +import tensorflow_hub as hub + +model = hub.KerasLayer("https://tfhub.dev/google/nnlm-en-dim128/2") +embeddings = model(["The rain in Spain.", "falls", + "mainly", "In the plain!"]) + +print(embeddings.shape) #(4,128) +``` + +## Next Steps + +- [Find models on tfhub.dev](https://tfhub.dev) +- [Publish models on tfhub.dev](publish.md) +- TensorFlow Hub library + - [Install TensorFlow Hub](installation.md) + - [Library overview](lib_overview.md) +- [Follow tutorials](tutorials) diff --git a/site/en/hub/portability_and_deletion.md b/site/en/hub/portability_and_deletion.md new file mode 100644 index 00000000000..67fa401d161 --- /dev/null +++ b/site/en/hub/portability_and_deletion.md @@ -0,0 +1,18 @@ + +## I want to see what I’ve uploaded to TensorFlow Hub. Can I get a copy of my data? + +Yes. If you’d like the Kaggle Team to **send you a copy** of all of the +data you have uploaded, please send us an email at [support@kaggle.com](mailto:support@kaggle.com) +and we’ll respond as soon as possible. + +## How do I delete what I’ve uploaded to TensorFlow Hub? + +Similarly, if you’d like us to **delete or remove content**, please send us an +email at [support@kaggle.com](mailto:support@kaggle.com) and we’ll delete +all copies that we have and stop serving it on tfhub.dev. Please note: + +* Because TensorFlow Hub is an open-source platform, copies of your assets may +still be retained by members of the public. +* Deletion is permanent and cannot be undone. +* Deletion can cause downstream breakages if users are not caching your model +locally and/or are not properly warned prior to deletion. diff --git a/site/en/hub/publish.md b/site/en/hub/publish.md new file mode 100644 index 00000000000..7fc5e7c1751 --- /dev/null +++ b/site/en/hub/publish.md @@ -0,0 +1,19 @@ + +# Publishing Process + +Thank you for considering to publish your models! + +**Please join the Early Access Model Publishing (EAP) on +[Kaggle Models](https://www.kaggle.com/models):** + +- Email [kaggle-models@google.com](mailto:kaggle-models@google.com) and + provide the following to get access to EAP: + - (1) Your Kaggle username + - (2) Your desired organization slug + - (3) A URL to a square-shaped profile image (which is needed for the + organization creation) +- Follow the + [documentation instructions](https://www.kaggle.com/model-publishing-instructions) + to create and publish your model +- Feel free to raise any questions and get support from + [Kaggle Discord channel](https://discord.gg/rKEyxj9WF) diff --git a/site/en/hub/reusable_saved_models.md b/site/en/hub/reusable_saved_models.md new file mode 100644 index 00000000000..b2114135d77 --- /dev/null +++ b/site/en/hub/reusable_saved_models.md @@ -0,0 +1,208 @@ + +# Reusable SavedModels + +## Introduction + +TensorFlow Hub hosts SavedModels for TensorFlow 2, among other assets. +They can be loaded back into a Python program with `obj = hub.load(url)` +[[learn more](tf2_saved_model)]. The returned `obj` is the result +of `tf.saved_model.load()` (see TensorFlow's +[SavedModel guide](https://www.tensorflow.org/guide/saved_model)). +This object can have arbitrary attributes that are tf.functions, +tf.Variables (initialized from their pre-trained values), other resources +and, recursively, more such objects. + +This page describes an interface to be implemented by the loaded `obj` +in order to be *reused* in a TensorFlow Python program. +SavedModels conforming to this interface are called *Reusable SavedModels*. + +Reusing means building a larger model around `obj`, including the ability +to fine-tune it. Fine-tuning means further training of the weights in the loaded +`obj` as part of the surrounding model. The loss function and the +optimizer are determined by the surrounding model; `obj` only defines +the mapping of input to output activations (the "forward pass"), possibly +including techniques such as dropout or batch normalization. + +**The TensorFlow Hub team recommends implementing the Reusable SavedModel +interface** in all SavedModels that are meant to be reused in the above sense. +Many utilities from the `tensorflow_hub` library, notably `hub.KerasLayer`, +require SavedModels to implement it. + +### Relation to SignatureDefs + +This interface in terms of tf.functions and other TF2 features +is separate from the SavedModel's signatures, which have been +available since TF1 and continue to be used in TF2 for inference +(such as deploying SavedModels to TF Serving or TF Lite). +Signatures for inference are not expressive enough to support fine-tuning, +and [`tf.function`](https://www.tensorflow.org/api_docs/python/tf/function) +provides a more natural and expressive +[Python API](https://www.tensorflow.org/tutorials/customization/performance) +for the reused model. + +### Relation to model-building libraries + +A Reusable SavedModel uses only TensorFlow 2 primitives, independent of any +particular model-building library like Keras or Sonnet. This facilitates reuse +across model-building libraries, free from dependencies on the original +model-building code. + +Some amount of adaptation will be needed load Reusable SavedModels into or save +them from any given model-building library. For Keras, +[hub.KerasLayer](https://www.tensorflow.org/hub/api_docs/python/hub/KerasLayer) +provides the loading, and Keras's built-in saving in the SavedModel format has +been redesigned for TF2 with the goal of providing a superset of this interface +(see the +[RFC](https://github.com/tensorflow/community/blob/master/rfcs/20190509-keras-saved-model.md) +from May 2019). + +### Relation to task-specific "Common SavedModel APIs" + +The interface definition on this page allows for any number and type of inputs +and outputs. The +[Common SavedModel APIs for TF Hub](common_saved_model_apis/index.md) refine +this general interface with usage conventions for specific tasks to make models +easily interchangeable. + +## Interface definition + +### Attributes + +A Reusable SavedModel is a TensorFlow 2 SavedModel such that +`obj = tf.saved_model.load(...)` returns an object that has the following +attributes + + * `__call__`. Required. A tf.function implementing the model's computation + (the "forward pass") subject to the specification below. + + * `variables`: A list of tf.Variable objects, listing all the variables + used by any possible invocation of `__call__`, including both + trainable and non-trainable ones. + + This list can be omitted if empty. + + Note: Conveniently, this name coincides with the attribute synthesized by + `tf.saved_model.load(...)` when loading a TF1 SavedModel to represent + its `GLOBAL_VARIABLES` collection. + + * `trainable_variables`: A list of tf.Variable objects such that + `v.trainable` is true for all elements. + These variables must be a subset of `variables`. + These are the variables to be trained when fine-tuning the object. + The SavedModel creator may choose to omit some variables here that were + originally trainable to indicate that these should not be modified during + fine-tuning. + + This list can be omitted if empty, in particular, if the SavedModel does not + support fine-tuning. + + * `regularization_losses`: A list of tf.functions, each taking zero inputs + and returning a single scalar float tensor. For fine-tuning, the + SavedModel user is advised to include these as additional regularization + terms into the loss (in the simplest case without further scaling). + Typically, these are used to represent weight regularizers. + (For lack of inputs, these tf.functions cannot express + activity regularizers.) + + This list can be omitted if empty, in particular, if the SavedModel does not + support fine-tuning or does not wish to prescribe weight regularization. + +### The `__call__` function + +A Restored SavedModel `obj` has an `obj.__call__` attribute that is +a restored tf.function and allows `obj` to be called as follows. + +Synopsis (pseudo-code): + +```python +outputs = obj(inputs, trainable=..., **kwargs) +``` + +#### Arguments + +The arguments are as follows. + + * There is one positional, required argument with a batch of input activations + of the SavedModel. Its type is one of + + * a single Tensor for a single input, + * a list of Tensors for an ordered sequence of unnamed inputs, + * a dict of Tensors keyed by a particular set of input names. + + (Future revisions of this interface may allow more general nests.) + The SavedModel creator chooses one of those and the tensor shapes + and dtypes. Where useful, some dimensions of the shape should be + undefined (notably batch size). + + * There may be an optional keyword argument `training` that accepts a Python + boolean, `True` or `False`. The default is `False`. + If the model supports fine-tuning, and if its computation differs between + the two (e.g., as in dropout and batch normalization), that distinction + is implemented with this argument. Otherwise, this argument may be absent. + + It is not required that `__call__` accept a Tensor-valued `training` + argument. It falls on the caller to use `tf.cond()` if necessary + to dispatch between them. + + * The SavedModel creator may choose to accept more optional `kwargs` + of particular names. + + * For Tensor-valued arguments, the SavedModel creator defines their + permissible dtypes and shapes. `tf.function` accepts a Python default + value on an argument that is traced with a tf.TensorSpec input. + Such arguments can be used to allow customization of numeric + hyperparameters involved in `__call__` (e.g., dropout rate). + + * For Python-valued arguments, the SavedModel creator defines their + permissible values. Such arguments can be used as flags to make + discrete choices in the traced function (but mind the combinatorial + explosion of traces). + +The restored `__call__` function must provide traces for all permissible +combinations of arguments. Flipping `training` between `True` and `False` +must not change the permissibility of arguments. + +#### Result + +The `outputs` from calling `obj` can be + + * a single Tensor for a single output, + * a list of Tensors for an ordered sequence of unnamed outputs, + * a dict of Tensors keyed by a particular set of output names. + +(Future revisions of this interface may allow more general nests.) +The return type may vary depending on the Python-valued kwargs. +This allows for flags producing extra outputs. +The SavedModel creator defines the output dtypes and shapes and their +dependency on inputs. + + +### Named callables + +A Reusable SavedModel can provide multiple model pieces in the way +described above by putting them into named subobjects, for example, +`obj.foo`, `obj.bar` and so on. +Each subobject provides a `__call__` method and supporting attributes +about the variables etc. specific to that model piece. +For the example above, there would be `obj.foo.__call__`, +`obj.foo.variables` and so on. + +Note that this interface does *not* cover the approach of adding +a bare tf.function directly as `tf.foo`. + +Users of Reusable SavedModels are only expected to handle one level of nesting +(`obj.bar` but not `obj.bar.baz`). (Future revisions of this interface may allow +deeper nesting, and may waive the requirement that the top-level object be +callable itself.) + +## Closing remarks + +### Relation to in-process APIs + +This document describes an interface of a Python class which consists +of primitives like tf.function and tf.Variable that survive a +round-trip through serialization via `tf.saved_model.save()` +and `tf.saved_model.load()`. However, the interface was already present +on the original object that was passed to `tf.saved_model.save()`. +Adaptation to that interface enables the exchange of model pieces +across model-building APIs within a single TensorFlow program. diff --git a/site/en/hub/tf1_hub_module.md b/site/en/hub/tf1_hub_module.md new file mode 100644 index 00000000000..7601878dc1b --- /dev/null +++ b/site/en/hub/tf1_hub_module.md @@ -0,0 +1,198 @@ + +# TF1 Hub format + +At its launch in 2018, TensorFlow Hub offered a single type of asset: TF1 Hub +format for import into TensorFlow 1 programs. + +This page explains how to use TF1 Hub format in TF1 (or the TF1 compatibility +mode of TF2) with the `hub.Module` class and associated APIs. (The typical use +is to build a `tf.Graph`, possibly inside a TF1 `Estimator`, by combining one or +more models in TF1 Hub format with `tf.compat.layers` or `tf.layers`). + +Users of TensorFlow 2 (outside TF1 compatibility mode) must use +[the new API with `hub.load()` or `hub.KerasLayer`](tf2_saved_model.md). The new +API loads the new TF2 SavedModel asset type, but also has limited +[support for loading TF1 Hub format into TF2](migration_tf2.md). + +## Using a model in TF1 Hub format + +### Instantiating a model in TF1 Hub format + +A model in TF1 Hub format is imported into a TensorFlow program by creating a +`hub.Module` object from a string with its URL or filesystem path, such as: + +```python +m = hub.Module("path/to/a/module_dir") +``` +**Note:** See more information regarding other valid handle types [here](tf2_saved_model.md#model_handles). + +This adds the module's variables to the current TensorFlow graph. +Running their initializers will read their pre-trained values from disk. +Likewise, tables and other state is added to the graph. + +### Caching Modules + +When creating a module from a URL, the module content is downloaded and cached +in the local system temporary directory. The location where modules are cached +can be overridden using `TFHUB_CACHE_DIR` environment variable. For details, see +[Caching](caching.md). + +### Applying a Module + +Once instantiated, a module `m` can be called zero or more times like a Python +function from tensor inputs to tensor outputs: + +```python +y = m(x) +``` + +Each such call adds operations to the current TensorFlow graph to compute +`y` from `x`. If this involves variables with trained weights, these are +shared between all applications. + +Modules can define multiple named *signatures* in order to allow being applied +in more than one way (similar to how Python objects have *methods*). +A module's documentation should describe the available +signatures. The call above applies the signature named `"default"`. Any +signature can be selected by passing its name to the optional `signature=` +argument. + +If a signature has multiple inputs, they must be passed as a dict, with the keys +defined by the signature. Likewise, if a signature has multiple outputs, these +can be retrieved as a dict by passing `as_dict=True`, under the keys defined by +the signature (the key `"default"` is for the single output returned if +`as_dict=False`). So the most general form of applying a Module looks like: + +```python +outputs = m(dict(apples=x1, oranges=x2), signature="fruit_to_pet", as_dict=True) +y1 = outputs["cats"] +y2 = outputs["dogs"] +``` + +A caller must supply all inputs defined by a signature, but there is no +requirement to use all of a module's outputs. +TensorFlow will run only those parts of the module that end up +as dependencies of a target in `tf.Session.run()`. Indeed, module publishers may +choose to provide various outputs for advanced uses (like activations of +intermediate layers) along with the main outputs. Module consumers should +handle additional outputs gracefully. + +### Trying out alternative modules + +Whenever there are multiple modules for the same task, TensorFlow Hub +encourages to equip them with compatible signatures (interfaces) +such that trying different ones is as easy as varying the module handle +as a string-valued hyperparameter. + +To this end, we maintain a collection of recommended +[Common Signatures](common_signatures/index.md) for popular tasks. + + +## Creating a New Module + +### Compatibility note + +The TF1 Hub format is geared towards TensorFlow 1. It is only partially +supported by TF Hub in TensorFlow 2. Please do consider publishing in the new +[TF2 SavedModel](tf2_saved_model.md) format instead. + +The TF1 Hub format is similar to the SavedModel format of TensorFlow 1 on a +syntactic level (same file names and protocol messages) but semantically +different to allow for module reuse, composition and re-training (e.g., +different storage of resource initializers, different tagging conventions for +metagraphs). The easiest way to tell them apart on disk is the presence or +absence of the `tfhub_module.pb` file. + +### General approach + +To define a new module, a publisher calls `hub.create_module_spec()` with a +function `module_fn`. This function constructs a graph representing the module's +internal structure, using `tf.placeholder()` for inputs to be supplied by +the caller. Then it defines signatures by calling +`hub.add_signature(name, inputs, outputs)` one or more times. + +For example: + +```python +def module_fn(): + inputs = tf.placeholder(dtype=tf.float32, shape=[None, 50]) + layer1 = tf.layers.dense(inputs, 200) + layer2 = tf.layers.dense(layer1, 100) + outputs = dict(default=layer2, hidden_activations=layer1) + # Add default signature. + hub.add_signature(inputs=inputs, outputs=outputs) + +... +spec = hub.create_module_spec(module_fn) +``` + +The result of `hub.create_module_spec()` can be used, instead of a path, +to instantiate a module object within a particular TensorFlow graph. In +such case, there is no checkpoint, and the module instance will use the +variable initializers instead. + +Any module instance can be serialized to disk via its `export(path, session)` +method. Exporting a module serializes its definition together with the current +state of its variables in `session` into the passed path. This can be used +when exporting a module for the first time, as well as when exporting a fine +tuned module. + +For compatibility with TensorFlow Estimators, `hub.LatestModuleExporter` exports +modules from the latest checkpoint, just like `tf.estimator.LatestExporter` +exports the entire model from the latest checkpoint. + +Module publishers should implement a [common +signature](common_signatures/index.md) when possible, so that consumers can +easily exchange modules and find the best one for their problem. + +### Real example + +Take a look at our [text embedding module exporter](https://github.com/tensorflow/hub/blob/master/examples/text_embeddings/export.py) +for a real-world example of how to create a module from a common text embedding +format. + + +## Fine-Tuning + +Training the variables of an imported module together with those of the model +around it is called *fine-tuning*. Fine-tuning can result in better quality, but +adds new complications. We advise consumers to look into fine-tuning only after +exploring simpler quality tweaks, and only if the module publisher recommends +it. + +### For Consumers + +To enable fine-tuning, instantiate the module with +`hub.Module(..., trainable=True)` to make its variables trainable and +import TensorFlow's `REGULARIZATION_LOSSES`. If the module has multiple +graph variants, make sure to pick the one appropriate for training. +Usually, that's the one with tags `{"train"}`. + +Choose a training regime that does not ruin the pre-trained weights, +for example, a lower learning rate than for training from scratch. + +### For Publishers + +To make fine-tuning easier for consumers, please be mindful of the following: + +* Fine-tuning needs regularization. Your module is exported with the + `REGULARIZATION_LOSSES` collection, which is what puts your choice of + `tf.layers.dense(..., kernel_regularizer=...)` etc. into what the consumer + gets from `tf.losses.get_regularization_losses()`. Prefer this way of + defining L1/L2 regularization losses. + +* In the publisher model, avoid defining L1/L2 regularization via the `l1_` + and `l2_regularization_strength` parameters of `tf.train.FtrlOptimizer`, + `tf.train.ProximalGradientDescentOptimizer`, and other proximal optimizers. + These are not exported alongside the module, and setting regularization + strengths globally may not be appropriate for the consumer. Except for L1 + regularization in wide (i.e. sparse linear) or wide & deep models, it should + be possible to use individual regularization losses instead. + +* If you use dropout, batch normalization, or similar training techniques, set + their hyperparameters to values that make sense across many expected uses. + The dropout rate may have to be adjusted to the target problem's propensity + to overfitting. In batch normalization, the momentum (a.k.a. decay + coefficient) should be small enough to enable fine-tuning with small + datasets and/or large batches. For advanced consumers, consider adding a + signature that exposes control over critical hyperparameters. diff --git a/site/en/hub/tf2_saved_model.md b/site/en/hub/tf2_saved_model.md new file mode 100644 index 00000000000..e41337b2548 --- /dev/null +++ b/site/en/hub/tf2_saved_model.md @@ -0,0 +1,289 @@ + +# SavedModels from TF Hub in TensorFlow 2 + +The +[SavedModel format of TensorFlow 2](https://www.tensorflow.org/guide/saved_model) +is the recommended way to share pre-trained models and model pieces on +TensorFlow Hub. It replaces the older [TF1 Hub format](tf1_hub_module.md) and +comes with a new set of APIs. + +This page explains how to reuse TF2 SavedModels in a TensorFlow 2 program with +the low-level `hub.load()` API and its `hub.KerasLayer` wrapper. (Typically, +`hub.KerasLayer` is combined with other `tf.keras.layers` to build a Keras model +or the `model_fn` of a TF2 Estimator.) These APIs can also load the legacy +models in TF1 Hub format, within limits, see the +[compatibility guide](model_compatibility.md). + +Users of TensorFlow 1 can update to TF 1.15 and then use the same APIs. +Older versions of TF1 do not work. + +## Using SavedModels from TF Hub + +### Using a SavedModel in Keras + +[Keras](https://www.tensorflow.org/guide/keras/) is TensorFlow's high-level API +for building deep learning models by composing Keras Layer objects. +The `tensorflow_hub` library provides the class `hub.KerasLayer` that gets +initialized with the URL (or filesystem path) of a SavedModel and then +provides the computation from the SavedModel, including its pre-trained +weights. + +Here is an example of using a pre-trained text embedding: + +```python +import tensorflow as tf +import tensorflow_hub as hub + +hub_url = "https://tfhub.dev/google/nnlm-en-dim128/2" +embed = hub.KerasLayer(hub_url) +embeddings = embed(["A long sentence.", "single-word", "http://example.com"]) +print(embeddings.shape, embeddings.dtype) +``` + +From this, a text classifier can be built in the usual Keras way: + +```python +model = tf.keras.Sequential([ + embed, + tf.keras.layers.Dense(16, activation="relu"), + tf.keras.layers.Dense(1, activation="sigmoid"), +]) +``` + +The [Text classification +colab](https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_text_classification.ipynb) +is a complete example how to train and evaluate such a classifier. + +The model weights in a `hub.KerasLayer` are set to non-trainable by default. +See the section on fine-tuning below for how to change that. Weights are +shared between all applications of the same layer object, as usual in Keras. + + +### Using a SavedModel in an Estimator + +Users of TensorFlow's +[Estimator](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_estimator) +API for distributed training can use SavedModels from TF Hub by +writing their `model_fn` in terms of `hub.KerasLayer` among other +`tf.keras.layers`. + + +### Behind the scenes: SavedModel downloading and caching + +Using a SavedModel from TensorFlow Hub (or other HTTPS servers that implement +its [hosting](hosting.md) protocol) downloads and decompresses it to the local +filesystem if not already present. The environment variable `TFHUB_CACHE_DIR` +can be set to override the default temporary location for caching the downloaded +and uncompressed SavedModels. For details, see [Caching](caching.md). + +### Using a SavedModel in low-level TensorFlow +#### Model Handles + +SavedModels can be loaded from a specified `handle`, where the `handle` is a +filesystem path, valid TFhub.dev model URL (e.g. "https://tfhub.dev/..."). +Kaggle Models URLs mirror TFhub.dev handles in accordance with our Terms and the +license associated with the model assets, e.g., "https://www.kaggle.com/...". +Handles from Kaggle Models are equivalent to their corresponding TFhub.dev +handle. + +The function `hub.load(handle)` downloads and decompresses a SavedModel +(unless `handle` is already a filesystem path) and then returns the result +of loading it with TensorFlow's built-in function `tf.saved_model.load()`. +Therefore, `hub.load()` can handle any valid SavedModel (unlike its +predecessor `hub.Module` for TF1). + +#### Advanced topic: what to expect from the SavedModel after loading + +Depending on the contents of the SavedModel, the result of +`obj = hub.load(...)` can be invoked in various ways (as explained in +much greater detail in TensorFlow's [SavedModel +Guide](https://www.tensorflow.org/guide/saved_model): + + * The serving signatures of the SavedModel (if any) are represented as a + dictionary of concrete functions and can be called like + `tensors_out = obj.signatures["serving_default"](**tensors_in)`, + with dictionaries of tensors keyed by the respective input and output + names and subject to the signature's shape and dtype constraints. + + * The + [`@tf.function`](https://www.tensorflow.org/api_docs/python/tf/function)-decorated + methods of the saved object (if any) are restored as tf.function objects + that can be called by all combinations of Tensor and non-Tensor arguments + for which the tf.function had been + [traced](https://www.tensorflow.org/tutorials/customization/performance#tracing) + prior to saving. In particular, if there is an `obj.__call__` method + with suitable traces, `obj` itself can be called like a Python function. + A simple example could look like + `output_tensor = obj(input_tensor, training=False)`. + +This leaves enormous liberty in the interfaces that SavedModels can +implement. The [Reusable SavedModels interface](reusable_saved_models.md) +for `obj` establishes conventions such that client code, including adapters +like `hub.KerasLayer`, know how to use the SavedModel. + +Some SavedModels may not follow that convention, especially whole models +not meant to be reused in larger models, and just provide serving signatures. + +The trainable variables in a SavedModel are reloaded as trainable, +and `tf.GradientTape` will watch them by default. See the section on +fine-tuning below for some caveats, and consider avoiding this for starters. +Even if you want to fine-tune, you may want to see if `obj.trainable_variables` +advises to re-train only a subset of the originally trainable variables. + + +## Creating SavedModels for TF Hub + +### Overview + +SavedModel is TensorFlow's standard serialization format for trained models +or model pieces. +It stores the model's trained weights together with the exact TensorFlow +operations to perform its computation. It can be used independently from +the code that created it. In particular, it can be reused across different +high-level model-building APIs like Keras, because TensorFlow operations +are their common basic language. + +### Saving from Keras + +Starting with TensorFlow 2, `tf.keras.Model.save()` and +`tf.keras.models.save_model()` default to the SavedModel format (not HDF5). +The resulting SavedModels that can be used with `hub.load()`, +`hub.KerasLayer` and similar adapters for other high-level APIs +as they become available. + +To share a complete Keras Model, just save it with `include_optimizer=False`. + +To share a piece of a Keras Model, make the piece a Model in itself and then +save that. You can either lay out the code like that from the start.... + +```python +piece_to_share = tf.keras.Model(...) +full_model = tf.keras.Sequential([piece_to_share, ...]) +full_model.fit(...) +piece_to_share.save(...) +``` + +...or cut out the piece to share after the fact (if it aligns with the +layering of your full model): + +```python +full_model = tf.keras.Model(...) +sharing_input = full_model.get_layer(...).get_output_at(0) +sharing_output = full_model.get_layer(...).get_output_at(0) +piece_to_share = tf.keras.Model(sharing_input, sharing_output) +piece_to_share.save(..., include_optimizer=False) +``` + +[TensorFlow Models](https://github.com/tensorflow/models) on GitHub uses the +former approach for BERT (see +[nlp/tools/export_tfhub_lib.py](https://github.com/tensorflow/models/blob/master/official/nlp/tools/export_tfhub_lib.py), +note the split between `core_model` for export and the `pretrainer` for +restoring the checkpoint) and the latter approach for ResNet (see +[legacy/image_classification/tfhub_export.py](https://github.com/tensorflow/models/blob/master/official/legacy/image_classification/resnet/tfhub_export.py)). + +### Saving from low-level TensorFlow + +This requires good familiarity with TensorFlow's [SavedModel +Guide](https://www.tensorflow.org/guide/saved_model). + +If you want to provide more than just a serving signature, you should +implement the [Reusable SavedModel interface](reusable_saved_models.md). +Conceptually, this looks like + +```python +class MyMulModel(tf.train.Checkpoint): + def __init__(self, v_init): + super().__init__() + self.v = tf.Variable(v_init) + self.variables = [self.v] + self.trainable_variables = [self.v] + self.regularization_losses = [ + tf.function(input_signature=[])(lambda: 0.001 * self.v**2), + ] + + @tf.function(input_signature=[tf.TensorSpec(shape=None, dtype=tf.float32)]) + def __call__(self, inputs): + return tf.multiply(inputs, self.v) + +tf.saved_model.save(MyMulModel(2.0), "/tmp/my_mul") + +layer = hub.KerasLayer("/tmp/my_mul") +print(layer([10., 20.])) # [20., 40.] +layer.trainable = True +print(layer.trainable_weights) # [2.] +print(layer.losses) # 0.004 +``` + + +## Fine-Tuning + +Training the already-trained variables of an imported SavedModel together with +those of the model around it is called *fine-tuning* the SavedModel. +This can result in better quality, but often makes the training more +demanding (may take more time, depend more on the optimizer and its +hyperparameters, increase the risk of overfitting and require dataset +augmentation, esp. for CNNs). We advise SavedModel consumers to look into +fine-tuning only after having established a good training regime, +and only if the SavedModel publisher recommends it. + +Fine-tuning changes the "continuous" model parameters that are trained. +It does not change hard-coded transformations, such as tokenizing text +input and mapping tokens to their corresponding entries in an embedding matrix. + +### For SavedModel consumers + +Creating a `hub.KerasLayer` like + +```python +layer = hub.KerasLayer(..., trainable=True) +``` + +enables fine-tuning of the SavedModel loaded by the layer. It adds the +trainable weights and weight regularizers declared in the SavedModel +to the Keras model, and runs the SavedModel's computation in training +mode (think of dropout etc.). + +The [image classification +colab](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_image_retraining.ipynb) +contains an end-to-end example with optional fine-tuning. + +#### Re-exporting the fine-tuning result + +Advanced users may want to save the results of fine-tuning back into +a SavedModel that can be used instead of the originally loaded one. +This can be done with code like + +```python +loaded_obj = hub.load("https://tfhub.dev/...") +hub_layer = hub.KerasLayer(loaded_obj, trainable=True, ...) + +model = keras.Sequential([..., hub_layer, ...]) +model.compile(...) +model.fit(...) + +export_module_dir = os.path.join(os.getcwd(), "finetuned_model_export") +tf.saved_model.save(loaded_obj, export_module_dir) +``` + +### For SavedModel creators + +When creating a SavedModel for sharing on TensorFlow Hub, +think ahead if and how its consumers should fine-tune it, +and provide guidance in the documentation. + +Saving from a Keras Model should make all the mechanics of fine-tuning work +(saving weight regularization losses, declaring trainable variables, tracing +`__call__` for both `training=True` and `training=False`, etc.) + +Choose a model interface that plays well with gradient flow, +e.g., output logits instead of softmax probabilities or top-k predictions. + +If the model use dropout, batch normalization, or similar training techniques +that involve hyperparameters, set them to values that make sense across many +expected target problems and batch sizes. (As of this writing, saving from +Keras does not make it easy to let consumers adjust them.) + +Weight regularizers on individual layers are saved (with their regularization +strength coefficients), but weight regularization from within the optimizer +(like `tf.keras.optimizers.Ftrl.l1_regularization_strength=...)`) +is lost. Advise consumers of your SavedModel accordingly. diff --git a/site/en/hub/tutorials/_index.yaml b/site/en/hub/tutorials/_index.yaml new file mode 100644 index 00000000000..deb98108393 --- /dev/null +++ b/site/en/hub/tutorials/_index.yaml @@ -0,0 +1,174 @@ +book_path: /hub/_book.yaml +project_path: /hub/_project.yaml +title: Tutorials +landing_page: + custom_css_path: /site-assets/css/style.css + nav: left + meta_tags: + - name: description + content: > + TensorFlow Hub tutorials to help you get started with using and adapting pre-trained + machine learning models to your needs. + rows: + # Getting started + - classname: devsite-landing-row-100 + items: + - description: > + +

TensorFlow Hub is a comprehensive repository of pre-trained + models ready for fine-tuning and deployable anywhere. Download the latest trained models + with a minimal amount of code with the tensorflow_hub library.

+

The following tutorials should help you getting + started with using and applying models from TF Hub for your needs. Interactive tutorials let you + modify them and execute them with your changes. Click the Run in Google Colab + button at the top of an interactive tutorial to tinker with it.

+ + # For beginners + - classname: devsite-landing-row-100 + items: + - description: > + +

If you are unfamiliar with machine learning and TensorFlow, you can start by getting + an overview of how to classify images and text, detecting objects in images, or by stylizing your own pictures like famous artwork:

+ + - classname: devsite-landing-row-100 + items: + - classname: tfo-landing-page-card + description: > + + Build a Keras model on top of a pre-trained image classifier to distinguish flowers. + path: /hub/tutorials/tf2_image_retraining + image_path: /hub/images/image_classification.png + - classname: tfo-landing-page-card + description: > + + Use BERT to build a Keras model to solve a text classificaton sentiment analysis task. + path: /tutorials/text/classify_text_with_bert + image_path: /hub/images/bert_preprocess.png + - classname: tfo-landing-page-card + description: > + + + + Let a neural network redraw an image in the style of Picasso, van Gogh or like your own style image. + path: /hub/tutorials/tf2_arbitrary_image_stylization + image_path: /hub/images/style_transfer.png + - classname: tfo-landing-page-card + description: > + + Detect objects in images using models like FasterRCNN or SSD. + path: /hub/tutorials/tf2_object_detection + image_path: /hub/images/object_detection.png + + # More advanced users + - classname: devsite-landing-row-100 + items: + - description: > + +

Check out more advanced tutorials for how to use NLP, images, audio, and video models from TensorFlow Hub.

+ + # NLP tutorials + - classname: devsite-landing-row-100 + items: + - description: > + +

Solve common NLP tasks with models from TensorFlow Hub. View all available NLP tutorials in the left nav.

+ + - classname: devsite-landing-row-100 + items: + - classname: tfo-landing-page-card + description: > + + Classify and semantically compare sentences with the Universal Sentence Encoder. + path: /hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder + image_path: /hub/images/similarity.png + - classname: tfo-landing-page-card + description: > + + Use BERT to solve GLUE benchmark tasks running on TPU. + path: /tutorials/text/solve_glue_tasks_using_bert_on_tpu + image_path: /hub/images/bert.png + - classname: tfo-landing-page-card + description: > + + Answer cross-lingual questions from the SQuAD dataset using the multilingual universal sentence encoder Q&A model. + path: /hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa + image_path: /hub/images/colab_logo.svg + + # Image tutorials + - classname: devsite-landing-row-100 + items: + - description: > + +

Explore how to use GANs, super resolution models and more. View all available image tutorials in the left nav.

+ + - classname: devsite-landing-row-100 + items: + - classname: tfo-landing-page-card + description: > + + Generate artificial faces and interpolate between them using GANs. + path: /hub/tutorials/tf_hub_generative_image_module + image_path: /hub/images/gan_faces.gif + - classname: tfo-landing-page-card + description: > + + Enhance the resolution of downsampled images. + path: /hub/tutorials/image_enhancing + image_path: /hub/images/super_resolution.png + - classname: tfo-landing-page-card + description: > + + Fill the masked part of given images. + path: /hub/tutorials/boundless + image_path: /hub/images/boundless.png + + # Audio tutorials + - classname: devsite-landing-row-100 + items: + - description: > + +

Explore tutorials using trained models for audio data including pitch recognition and sound classification.

+ + - classname: devsite-landing-row-100 + items: + - classname: tfo-landing-page-card + description: > + + Record yourself singing and detect the pitch of your voice using the SPICE model. + path: /hub/tutorials/spice + image_path: /hub/images/spice_color.png + - classname: tfo-landing-page-card + description: > + + Use the YAMNet model to classify sounds as 521 audio event classes from the AudioSet-YouTube corpus. + path: /hub/tutorials/yamnet + image_path: /hub/images/yamnet.png + + # Video tutorials + - classname: devsite-landing-row-100 + items: + - description: > + +

Try out trained ML models for video data for action recognition, video interpolation, and more.

+ + - classname: devsite-landing-row-100 + items: + - classname: tfo-landing-page-card + description: > + + Detect one of 400 actions in a video using the Inflated 3D ConvNet model. + path: /hub/tutorials/action_recognition_with_tf_hub + image_path: /hub/images/action_recognition.gif + - classname: tfo-landing-page-card + description: > + + Interpolate between video frames using Inbetweening with 3D Convolutions. + path: /hub/tutorials/tweening_conv3d + image_path: /hub/images/interpolation.png + - classname: tfo-landing-page-card + description: > + + Find videos that are the most related to a text query. + path: /hub/tutorials/text_to_video_retrieval_with_s3d_milnce + image_path: /hub/images/text_video.gif diff --git a/site/en/hub/tutorials/_toc.yaml b/site/en/hub/tutorials/_toc.yaml new file mode 100644 index 00000000000..04d95a267d7 --- /dev/null +++ b/site/en/hub/tutorials/_toc.yaml @@ -0,0 +1,118 @@ +toc: +- heading: "Getting started" + style: divider +- title: Overview + path: /hub/tutorials/_index.yaml + +- heading: "NLP Tutorials" + style: divider +- title: Text classification + path: /hub/tutorials/tf2_text_classification +- title: Classify text with BERT + path: /tutorials/text/classify_text_with_bert + status: external +- title: BERT on TPU + path: /tutorials/text/solve_glue_tasks_using_bert_on_tpu + status: external +- title: Real-time semantic search + path: /hub/tutorials/tf2_semantic_approximate_nearest_neighbors +- title: Multilingual question answering + path: /hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa +- title: "Additional NLP tutorials" + style: accordion + section: + - title: BERT Experts + path: /hub/tutorials/bert_experts + - title: Semantic similarity + path: /hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder + - title: Text classification on Kaggle + path: /hub/tutorials/text_classification_with_tf_hub_on_kaggle + - title: Bangla article classifier + path: /hub/tutorials/bangla_article_classifier + - title: Explore CORD-19 text embeddings + path: /hub/tutorials/cord_19_embeddings_keras + - title: Multilingual universal sentence encoder + path: /hub/tutorials/cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder + - title: Text cookbook + path: /hub/tutorials/text_cookbook + - title: SentEval for Universal Sentence Encoder CMLM model. + path: /hub/tutorials/senteval_for_universal_sentence_encoder_cmlm + +- heading: "Image Tutorials" + style: divider +- title: Image classification + path: /hub/tutorials/image_classification +- title: Transfer Learning for Image classification + path: /hub/tutorials/tf2_image_retraining +- title: Style transfer + path: /hub/tutorials/tf2_arbitrary_image_stylization +- title: Large-scale image retrieval with DELF + path: /hub/tutorials/tf_hub_delf_module +- title: Object detection + path: /hub/tutorials/tf2_object_detection +- title: GANs for image generation + path: /hub/tutorials/tf_hub_generative_image_module +- title: Human Pose Estimation + path: /hub/tutorials/movenet +- title: "Additional image tutorials" + style: accordion + section: + - title: "CropNet: Cassava Disease Detection" + path: /hub/tutorials/cropnet_cassava + - title: "CropNet: Fine tuning models for on-device inference" + path: /hub/tutorials/cropnet_on_device + - title: Boundless GAN + path: /hub/tutorials/boundless + - title: Super resolution + path: /hub/tutorials/image_enhancing + - title: HRNet model inference for semantic segmentation + path: /hub/tutorials/hrnet_semantic_segmentation + status: new + +- heading: "Audio Tutorials" + style: divider +- title: Pitch recognition + path: /hub/tutorials/spice +- title: Sound classification + path: /hub/tutorials/yamnet +- title: Automatic speech recognition with Wav2Vec2 + path: /hub/tutorials/wav2vec2_saved_model_finetuning + +- heading: "Video Tutorials" + style: divider +- title: Frame interpolation with FILM + path: /hub/tutorials/tf_hub_film_example + status: new +- title: Action recognition + path: /hub/tutorials/action_recognition_with_tf_hub +- title: Streaming action recognition + path: /hub/tutorials/movinet +- title: Video interpolation + path: /hub/tutorials/tweening_conv3d +- title: Text-to-video retrieval + path: /hub/tutorials/text_to_video_retrieval_with_s3d_milnce + +- title: "Tutorials (TF1)" + style: accordion + status: deprecated + section: + - heading: "Image Tutorials" + - title: Image classification + path: /hub/tutorials/image_feature_vector + - title: Object detection + path: /hub/tutorials/object_detection + - title: BigGAN image generation + path: /hub/tutorials/biggan_generation_with_tf_hub + - title: BigBiGAN image generation + path: /hub/tutorials/bigbigan_with_tf_hub + - title: S3 GAN image generation + path: /hub/tutorials/s3gan_generation_with_tf_hub + - heading: "NLP Tutorials" + - title: Semantic similarity lite + path: /hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder_lite + - title: Nearest neighbor index for real-time semantic search + path: /hub/tutorials/semantic_approximate_nearest_neighbors + - title: Explore CORD-19 text embeddings + path: /hub/tutorials/cord_19_embeddings + - title: Wiki40B Language Models + path: /hub/tutorials/wiki40b_lm diff --git a/site/en/hub/tutorials/action_recognition_with_tf_hub.ipynb b/site/en/hub/tutorials/action_recognition_with_tf_hub.ipynb new file mode 100644 index 00000000000..3f586991ba9 --- /dev/null +++ b/site/en/hub/tutorials/action_recognition_with_tf_hub.ipynb @@ -0,0 +1,438 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "x8Q7Un821X1A" + }, + "source": [ + "##### Copyright 2018 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1W4rIAFt1Ui3" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cDq0CIKc1vO_" + }, + "source": [ + "# Action Recognition with an Inflated 3D CNN\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h6W3FhoP3TxC" + }, + "source": [ + "This Colab demonstrates recognizing actions in video data using the\n", + "[tfhub.dev/deepmind/i3d-kinetics-400/1](https://tfhub.dev/deepmind/i3d-kinetics-400/1) module. More models to detect actions in videos can be found [here](https://tfhub.dev/s?module-type=video-classification).\n", + "\n", + "The underlying model is described in the paper \"[Quo Vadis, Action Recognition? A New\n", + "Model and the Kinetics Dataset](https://arxiv.org/abs/1705.07750)\" by Joao\n", + "Carreira and Andrew Zisserman. The paper was posted on arXiv in May 2017, and\n", + "was published as a CVPR 2017 conference paper.\n", + "The source code is publicly available on\n", + "[github](https://github.com/deepmind/kinetics-i3d).\n", + "\n", + "\"Quo Vadis\" introduced a new architecture for video classification, the Inflated\n", + "3D Convnet or I3D. This architecture achieved state-of-the-art results on the UCF101\n", + "and HMDB51 datasets from fine-tuning these models. I3D models pre-trained on Kinetics\n", + "also placed first in the CVPR 2017 [Charades challenge](http://vuchallenge.org/charades.html).\n", + "\n", + "The original module was trained on the [kinetics-400 dateset](https://www.deepmind.com/open-source/kinetics)\n", + "and knows about 400 different actions.\n", + "Labels for these actions can be found in the\n", + "[label map file](https://github.com/deepmind/kinetics-i3d/blob/master/data/label_map.txt).\n", + "\n", + "In this Colab we will use it recognize activities in videos from a UCF101 dataset." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "R_0xc2jyNGRp" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mOHMWsFnITdi" + }, + "outputs": [], + "source": [ + "!pip install -q imageio\n", + "!pip install -q opencv-python\n", + "!pip install -q git+https://github.com/tensorflow/docs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "USf0UvkYIlKo" + }, + "outputs": [], + "source": [ + "#@title Import the necessary modules\n", + "# TensorFlow and TF-Hub modules.\n", + "from absl import logging\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "from tensorflow_docs.vis import embed\n", + "\n", + "logging.set_verbosity(logging.ERROR)\n", + "\n", + "# Some modules to help with reading the UCF101 dataset.\n", + "import random\n", + "import re\n", + "import os\n", + "import tempfile\n", + "import ssl\n", + "import cv2\n", + "import numpy as np\n", + "\n", + "# Some modules to display an animation using imageio.\n", + "import imageio\n", + "from IPython import display\n", + "\n", + "from urllib import request # requires python3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "IuMMS3TGdws7" + }, + "outputs": [], + "source": [ + "#@title Helper functions for the UCF101 dataset\n", + "\n", + "# Utilities to fetch videos from UCF101 dataset\n", + "UCF_ROOT = \"https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/\"\n", + "_VIDEO_LIST = None\n", + "_CACHE_DIR = tempfile.mkdtemp()\n", + "# As of July 2020, crcv.ucf.edu doesn't use a certificate accepted by the\n", + "# default Colab environment anymore.\n", + "unverified_context = ssl._create_unverified_context()\n", + "\n", + "def list_ucf_videos():\n", + " \"\"\"Lists videos available in UCF101 dataset.\"\"\"\n", + " global _VIDEO_LIST\n", + " if not _VIDEO_LIST:\n", + " index = request.urlopen(UCF_ROOT, context=unverified_context).read().decode(\"utf-8\")\n", + " videos = re.findall(\"(v_[\\w_]+\\.avi)\", index)\n", + " _VIDEO_LIST = sorted(set(videos))\n", + " return list(_VIDEO_LIST)\n", + "\n", + "def fetch_ucf_video(video):\n", + " \"\"\"Fetches a video and cache into local filesystem.\"\"\"\n", + " cache_path = os.path.join(_CACHE_DIR, video)\n", + " if not os.path.exists(cache_path):\n", + " urlpath = request.urljoin(UCF_ROOT, video)\n", + " print(\"Fetching %s => %s\" % (urlpath, cache_path))\n", + " data = request.urlopen(urlpath, context=unverified_context).read()\n", + " open(cache_path, \"wb\").write(data)\n", + " return cache_path\n", + "\n", + "# Utilities to open video files using CV2\n", + "def crop_center_square(frame):\n", + " y, x = frame.shape[0:2]\n", + " min_dim = min(y, x)\n", + " start_x = (x // 2) - (min_dim // 2)\n", + " start_y = (y // 2) - (min_dim // 2)\n", + " return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]\n", + "\n", + "def load_video(path, max_frames=0, resize=(224, 224)):\n", + " cap = cv2.VideoCapture(path)\n", + " frames = []\n", + " try:\n", + " while True:\n", + " ret, frame = cap.read()\n", + " if not ret:\n", + " break\n", + " frame = crop_center_square(frame)\n", + " frame = cv2.resize(frame, resize)\n", + " frame = frame[:, :, [2, 1, 0]]\n", + " frames.append(frame)\n", + " \n", + " if len(frames) == max_frames:\n", + " break\n", + " finally:\n", + " cap.release()\n", + " return np.array(frames) / 255.0\n", + "\n", + "def to_gif(images):\n", + " converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)\n", + " imageio.mimsave('./animation.gif', converted_images, duration=40)\n", + " return embed.embed_file('./animation.gif')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "pIKTs-KneUfz" + }, + "outputs": [], + "source": [ + "#@title Get the kinetics-400 labels\n", + "# Get the kinetics-400 action labels from the GitHub repository.\n", + "KINETICS_URL = \"https://raw.githubusercontent.com/deepmind/kinetics-i3d/master/data/label_map.txt\"\n", + "with request.urlopen(KINETICS_URL) as obj:\n", + " labels = [line.decode(\"utf-8\").strip() for line in obj.readlines()]\n", + "print(\"Found %d labels.\" % len(labels))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GBvmjVICIp3W" + }, + "source": [ + "# Using the UCF101 dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "V-QcxdhLIfi2" + }, + "outputs": [], + "source": [ + "# Get the list of videos in the dataset.\n", + "ucf_videos = list_ucf_videos()\n", + " \n", + "categories = {}\n", + "for video in ucf_videos:\n", + " category = video[2:-12]\n", + " if category not in categories:\n", + " categories[category] = []\n", + " categories[category].append(video)\n", + "print(\"Found %d videos in %d categories.\" % (len(ucf_videos), len(categories)))\n", + "\n", + "for category, sequences in categories.items():\n", + " summary = \", \".join(sequences[:2])\n", + " print(\"%-20s %4d videos (%s, ...)\" % (category, len(sequences), summary))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "c0ZvVDruN2nU" + }, + "outputs": [], + "source": [ + "# Get a sample cricket video.\n", + "video_path = fetch_ucf_video(\"v_CricketShot_g04_c02.avi\")\n", + "sample_video = load_video(video_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hASLA90YFPTO" + }, + "outputs": [], + "source": [ + "sample_video.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "POf5XgffvXlD" + }, + "outputs": [], + "source": [ + "i3d = hub.load(\"https://tfhub.dev/deepmind/i3d-kinetics-400/1\").signatures['default']" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mDXgaOD1zhMP" + }, + "source": [ + "Run the id3 model and print the top-5 action predictions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3mTbqA5JGYUx" + }, + "outputs": [], + "source": [ + "def predict(sample_video):\n", + " # Add a batch axis to the sample video.\n", + " model_input = tf.constant(sample_video, dtype=tf.float32)[tf.newaxis, ...]\n", + "\n", + " logits = i3d(model_input)['default'][0]\n", + " probabilities = tf.nn.softmax(logits)\n", + "\n", + " print(\"Top 5 actions:\")\n", + " for i in np.argsort(probabilities)[::-1][:5]:\n", + " print(f\" {labels[i]:22}: {probabilities[i] * 100:5.2f}%\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ykaXQcGRvK4E" + }, + "outputs": [], + "source": [ + "predict(sample_video)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PHsq0lHXCsD4" + }, + "source": [ + "Now try a new video, from: https://commons.wikimedia.org/wiki/Category:Videos_of_sports\n", + "\n", + "How about [this video](https://commons.wikimedia.org/wiki/File:End_of_a_jam.ogv) by Patrick Gillett: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "p-mZ9fFPCoNq" + }, + "outputs": [], + "source": [ + "!curl -O https://upload.wikimedia.org/wikipedia/commons/8/86/End_of_a_jam.ogv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lpLmE8rjEbAF" + }, + "outputs": [], + "source": [ + "video_path = \"End_of_a_jam.ogv\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CHZJ9qTLErhV" + }, + "outputs": [], + "source": [ + "sample_video = load_video(video_path)[:100]\n", + "sample_video.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2ZNLkEZ9Er-c" + }, + "outputs": [], + "source": [ + "to_gif(sample_video)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yskHIRbxEtjS" + }, + "outputs": [], + "source": [ + "predict(sample_video)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "x8Q7Un821X1A" + ], + "name": "action_recognition_with_tf_hub.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/bangla_article_classifier.ipynb b/site/en/hub/tutorials/bangla_article_classifier.ipynb new file mode 100644 index 00000000000..988a68c4023 --- /dev/null +++ b/site/en/hub/tutorials/bangla_article_classifier.ipynb @@ -0,0 +1,646 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "IDdZSPcLtKx4" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-g5By3P4tavy" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS, \n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vpaLrN0mteAS" + }, + "source": [ + "# Bangla Article Classification With TF-Hub" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GhN2WtIrBQ4y" + }, + "source": [ + "Caution: In addition to installing Python packages with pip, this notebook uses\n", + "`sudo apt install` to install system packages: `unzip`.\n", + "\n", + "This Colab is a demonstration of using [Tensorflow Hub](https://www.tensorflow.org/hub/) for text classification in non-English/local languages. Here we choose [Bangla](https://en.wikipedia.org/wiki/Bengali_language) as the local language and use pretrained word embeddings to solve a multiclass classification task where we classify Bangla news articles in 5 categories. The pretrained embeddings for Bangla comes from [fastText](https://fasttext.cc/docs/en/crawl-vectors.html) which is a library by Facebook with released pretrained word vectors for 157 languages. \n", + "\n", + "We'll use TF-Hub's pretrained embedding exporter for converting the word embeddings to a text embedding module first and then use the module to train a classifier with [tf.keras](https://www.tensorflow.org/api_docs/python/tf/keras), Tensorflow's high level user friendly API to build deep learning models. Even if we are using fastText embeddings here, it's possible to export any other embeddings pretrained from other tasks and quickly get results with Tensorflow hub. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q4DN769E2O_R" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9Vt-StAAZguA" + }, + "outputs": [], + "source": [ + "%%bash\n", + "# https://github.com/pypa/setuptools/issues/1694#issuecomment-466010982\n", + "pip install gdown --no-use-pep517" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WcBA19FlDPZO" + }, + "outputs": [], + "source": [ + "%%bash\n", + "sudo apt-get install -y unzip" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zSeyZMq-BYsu" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "\n", + "import gdown\n", + "import numpy as np\n", + "from sklearn.metrics import classification_report\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9FB7gLU4F54l" + }, + "source": [ + "# Dataset\n", + "\n", + "We will use [BARD](https://www.researchgate.net/publication/328214545_BARD_Bangla_Article_Classification_Using_a_New_Comprehensive_Dataset) (Bangla Article Dataset) which has around 376,226 articles collected from different Bangla news portals and labelled with 5 categories: economy, state, international, sports, and entertainment. We download the file from Google Drive this ([bit.ly/BARD_DATASET](https://bit.ly/BARD_DATASET)) link is referring to from [this](https://github.com/tanvirfahim15/BARD-Bangla-Article-Classifier) GitHub repository.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zdQrL_rwa-1K" + }, + "outputs": [], + "source": [ + "gdown.download(\n", + " url='https://drive.google.com/uc?id=1Ag0jd21oRwJhVFIBohmX_ogeojVtapLy',\n", + " output='bard.zip',\n", + " quiet=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "P2YW4GGa9Y5o" + }, + "outputs": [], + "source": [ + "%%bash\n", + "unzip -qo bard.zip" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "js75OARBF_B8" + }, + "source": [ + "# Export pretrained word vectors to TF-Hub module" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-uAicYA6vLsf" + }, + "source": [ + "TF-Hub provides some useful scripts for converting word embeddings to TF-hub text embedding modules [here](https://github.com/tensorflow/hub/tree/master/examples/text_embeddings_v2). To make the module for Bangla or any other languages, we simply have to download the word embedding `.txt` or `.vec` file to the same directory as `export_v2.py` and run the script.\n", + "\n", + "\n", + "The exporter reads the embedding vectors and exports it to a Tensorflow [SavedModel](https://www.tensorflow.org/beta/guide/saved_model). A SavedModel contains a complete TensorFlow program including weights and graph. TF-Hub can load the SavedModel as a [module](https://www.tensorflow.org/hub/api_docs/python/hub/Module), which we will use to build the model for text classification. Since we are using `tf.keras` to build the model, we will use [hub.KerasLayer](https://www.tensorflow.org/hub/api_docs/python/hub/KerasLayer), which provides a wrapper for a TF-Hub module to use as a Keras Layer.\n", + "\n", + "First we will get our word embeddings from fastText and embedding exporter from TF-Hub [repo](https://github.com/tensorflow/hub).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5DY5Ze6pO1G5" + }, + "outputs": [], + "source": [ + "%%bash\n", + "curl -O https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bn.300.vec.gz\n", + "curl -O https://raw.githubusercontent.com/tensorflow/hub/master/examples/text_embeddings_v2/export_v2.py\n", + "gunzip -qf cc.bn.300.vec.gz --k" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PAzdNZaHmdl1" + }, + "source": [ + "Then, we will run the exporter script on our embedding file. Since fastText embeddings have a header line and are pretty large (around 3.3 GB for Bangla after converting to a module) we ignore the first line and export only the first 100, 000 tokens to the text embedding module." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Tkv5acr_Q9UU" + }, + "outputs": [], + "source": [ + "%%bash\n", + "python export_v2.py --embedding_file=cc.bn.300.vec --export_path=text_module --num_lines_to_ignore=1 --num_lines_to_use=100000" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "k9WEpmedF_3_" + }, + "outputs": [], + "source": [ + "module_path = \"text_module\"\n", + "embedding_layer = hub.KerasLayer(module_path, trainable=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fQHbmS_D4YIo" + }, + "source": [ + "The text embedding module takes a batch of sentences in a 1D tensor of strings as input and outputs the embedding vectors of shape (batch_size, embedding_dim) corresponding to the sentences. It preprocesses the input by splitting on spaces. Word embeddings are combined to sentence embeddings with the `sqrtn` combiner(See [here](https://www.tensorflow.org/api_docs/python/tf/nn/embedding_lookup_sparse)). For demonstration we pass a list of Bangla words as input and get the corresponding embedding vectors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Z1MBnaBUihWn" + }, + "outputs": [], + "source": [ + "embedding_layer(['বাস', 'বসবাস', 'ট্রেন', 'যাত্রী', 'ট্রাক']) " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4KY8LiFOHmcd" + }, + "source": [ + "# Convert to Tensorflow Dataset \n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pNguCDNe6bvz" + }, + "source": [ + "Since the dataset is really large instead of loading the entire dataset in memory we will use a generator to yield samples in run-time in batches using [Tensorflow Dataset](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) functions. The dataset is also very imbalanced, so, before using the generator, we will shuffle the dataset. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bYv6LqlEChO1" + }, + "outputs": [], + "source": [ + "dir_names = ['economy', 'sports', 'entertainment', 'state', 'international']\n", + "\n", + "file_paths = []\n", + "labels = []\n", + "for i, dir in enumerate(dir_names):\n", + " file_names = [\"/\".join([dir, name]) for name in os.listdir(dir)]\n", + " file_paths += file_names\n", + " labels += [i] * len(os.listdir(dir))\n", + " \n", + "np.random.seed(42)\n", + "permutation = np.random.permutation(len(file_paths))\n", + "\n", + "file_paths = np.array(file_paths)[permutation]\n", + "labels = np.array(labels)[permutation]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8b-UtAP5TL-W" + }, + "source": [ + "We can check the distribution of labels in the training and validation examples after shuffling." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mimhWVSzzAmS" + }, + "outputs": [], + "source": [ + "train_frac = 0.8\n", + "train_size = int(len(file_paths) * train_frac)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4BNXFrkotAYu" + }, + "outputs": [], + "source": [ + "# plot training vs validation distribution\n", + "plt.subplot(1, 2, 1)\n", + "plt.hist(labels[0:train_size])\n", + "plt.title(\"Train labels\")\n", + "plt.subplot(1, 2, 2)\n", + "plt.hist(labels[train_size:])\n", + "plt.title(\"Validation labels\")\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RVbHb2I3TUNA" + }, + "source": [ + "To create a [Dataset](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) using a generator, we first write a generator function which reads each of the articles from `file_paths` and the labels from the label array, and yields one training example at each step. We pass this generator function to the [`tf.data.Dataset.from_generator`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_generator) method and specify the output types. Each training example is a tuple containing an article of `tf.string` data type and one-hot encoded label. We split the dataset with a train-validation split of 80-20 using [`tf.data.Dataset.skip`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#skip) and [`tf.data.Dataset.take`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#take) methods." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eZRGTzEhUi7Q" + }, + "outputs": [], + "source": [ + "def load_file(path, label):\n", + " return tf.io.read_file(path), label" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2g4nRflB7fbF" + }, + "outputs": [], + "source": [ + "def make_datasets(train_size):\n", + " batch_size = 256\n", + "\n", + " train_files = file_paths[:train_size]\n", + " train_labels = labels[:train_size]\n", + " train_ds = tf.data.Dataset.from_tensor_slices((train_files, train_labels))\n", + " train_ds = train_ds.map(load_file).shuffle(5000)\n", + " train_ds = train_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)\n", + "\n", + " test_files = file_paths[train_size:]\n", + " test_labels = labels[train_size:]\n", + " test_ds = tf.data.Dataset.from_tensor_slices((test_files, test_labels))\n", + " test_ds = test_ds.map(load_file)\n", + " test_ds = test_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)\n", + "\n", + "\n", + " return train_ds, test_ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8PuuN6el8tv9" + }, + "outputs": [], + "source": [ + "train_data, validation_data = make_datasets(train_size)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MrdZI6FqPJNP" + }, + "source": [ + "# Model Training and Evaluation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jgr7YScGVS58" + }, + "source": [ + "Since we have already added a wrapper around our module to use it as any other layer in Keras, we can create a small [Sequential](https://www.tensorflow.org/api_docs/python/tf/keras/Sequential) model which is a linear stack of layers. We can add our text embedding module with `model.add` just like any other layer. We compile the model by specifying the loss and optimizer and train it for 10 epochs. The `tf.keras` API can handle Tensorflow Datasets as input, so we can pass a Dataset instance to the fit method for model training. Since we are using the generator function, `tf.data` will handle generating the samples, batching them and feeding them to the model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WhCqbDK2uUV5" + }, + "source": [ + "## Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nHUw807XPPM9" + }, + "outputs": [], + "source": [ + "def create_model():\n", + " model = tf.keras.Sequential([\n", + " tf.keras.layers.Input(shape=[], dtype=tf.string),\n", + " embedding_layer,\n", + " tf.keras.layers.Dense(64, activation=\"relu\"),\n", + " tf.keras.layers.Dense(16, activation=\"relu\"),\n", + " tf.keras.layers.Dense(5),\n", + " ])\n", + " model.compile(loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),\n", + " optimizer=\"adam\", metrics=['accuracy'])\n", + " return model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5J4EXJUmPVNG" + }, + "outputs": [], + "source": [ + "model = create_model()\n", + "# Create earlystopping callback\n", + "early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=3)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZZ7XJLg2u2No" + }, + "source": [ + "## Training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OoBkN2tAaXWD" + }, + "outputs": [], + "source": [ + "history = model.fit(train_data, \n", + " validation_data=validation_data, \n", + " epochs=5, \n", + " callbacks=[early_stopping_callback])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XoDk8otmMoT7" + }, + "source": [ + "## Evaluation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G5ZRKGOsXEh4" + }, + "source": [ + "We can visualize the accuracy and loss curves for training and validation data using the `tf.keras.callbacks.History` object returned by the `tf.keras.Model.fit` method, which contains the loss and accuracy value for each epoch." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "V6tOnByIOeGn" + }, + "outputs": [], + "source": [ + "# Plot training & validation accuracy values\n", + "plt.plot(history.history['accuracy'])\n", + "plt.plot(history.history['val_accuracy'])\n", + "plt.title('Model accuracy')\n", + "plt.ylabel('Accuracy')\n", + "plt.xlabel('Epoch')\n", + "plt.legend(['Train', 'Test'], loc='upper left')\n", + "plt.show()\n", + "\n", + "# Plot training & validation loss values\n", + "plt.plot(history.history['loss'])\n", + "plt.plot(history.history['val_loss'])\n", + "plt.title('Model loss')\n", + "plt.ylabel('Loss')\n", + "plt.xlabel('Epoch')\n", + "plt.legend(['Train', 'Test'], loc='upper left')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D54IXLqcG8Cq" + }, + "source": [ + "## Prediction\n", + "\n", + "We can get the predictions for the validation data and check the confusion matrix to see the model's performance for each of the 5 classes. Because `tf.keras.Model.predict` method returns an n-d array for probabilities for each class, they can be converted to class labels using `np.argmax`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dptEywzZJk4l" + }, + "outputs": [], + "source": [ + "y_pred = model.predict(validation_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7Dzeml6Pk0ub" + }, + "outputs": [], + "source": [ + "y_pred = np.argmax(y_pred, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "T4M3Lzg8jHcB" + }, + "outputs": [], + "source": [ + "samples = file_paths[0:3]\n", + "for i, sample in enumerate(samples):\n", + " f = open(sample)\n", + " text = f.read()\n", + " print(text[0:100])\n", + " print(\"True Class: \", sample.split(\"/\")[0])\n", + " print(\"Predicted Class: \", dir_names[y_pred[i]])\n", + " f.close()\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PlDTIpMBu6h-" + }, + "source": [ + "## Compare Performance\n", + "\n", + "Now we can take the correct labels for the validation data from `labels` and compare them with our predictions to get a [classification_report](http://scikit-learn.org/stable/modules/generated/sklearn.metrics.classification_report.html). " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mqrERUCS1Xn7" + }, + "outputs": [], + "source": [ + "y_true = np.array(labels[train_size:])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NX5w-NuTKuVP" + }, + "outputs": [], + "source": [ + "print(classification_report(y_true, y_pred, target_names=dir_names))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p5e9m3bV6oXK" + }, + "source": [ + "We can also compare our model's performance with the published results obtained in the original [paper](https://www.researchgate.net/publication/328214545_BARD_Bangla_Article_Classification_Using_a_New_Comprehensive_Dataset), which had a 0.96 precision .The original authors described many preprocessing steps performed on the dataset, such as dropping punctuations and digits, removing top 25 most frequest stop words. As we can see in the `classification_report`, we also manage to obtain a 0.96 precision and accuracy after training for only 5 epochs without any preprocessing! \n", + "\n", + "In this example, when we created the Keras layer from our embedding module, we set the parameter`trainable=False`, which means the embedding weights will not be updated during training. Try setting it to `True` to reach around 97% accuracy using this dataset after only 2 epochs. " + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "IDdZSPcLtKx4" + ], + "name": "bangla_article_classifier.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/bert_experts.ipynb b/site/en/hub/tutorials/bert_experts.ipynb new file mode 100644 index 00000000000..5440909f7cb --- /dev/null +++ b/site/en/hub/tutorials/bert_experts.ipynb @@ -0,0 +1,286 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "-1vOMEXIhMQt" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "pRfq9ZU5hQhg" + }, + "outputs": [], + "source": [ + "#@title Copyright 2020 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mTL0TERThT6z" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FkthMlVk8bHp" + }, + "source": [ + "# BERT Experts from TF-Hub\n", + "\n", + "This colab demonstrates how to:\n", + "* Load BERT models from [TensorFlow Hub](https://tfhub.dev) that have been trained on different tasks including MNLI, SQuAD, and PubMed\n", + "* Use a matching preprocessing model to tokenize raw text and convert it to ids\n", + "* Generate the pooled and sequence output from the token input ids using the loaded model\n", + "* Look at the semantic similarity of the pooled outputs of different sentences\n", + "\n", + "#### Note: This colab should be run with a GPU runtime" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jspO02jDPfPG" + }, + "source": [ + "## Set up and imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "r-ed8zj-dbwm" + }, + "outputs": [], + "source": [ + "!pip install --quiet \"tensorflow-text==2.11.*\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "czDmtrGKYw_5" + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "from sklearn.metrics import pairwise\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "import tensorflow_text as text # Imports TF ops for preprocessing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "GSuDcPSaY5aB" + }, + "outputs": [], + "source": [ + "#@title Configure the model { run: \"auto\" }\n", + "BERT_MODEL = \"https://tfhub.dev/google/experts/bert/wiki_books/2\" # @param {type: \"string\"} [\"https://tfhub.dev/google/experts/bert/wiki_books/2\", \"https://tfhub.dev/google/experts/bert/wiki_books/mnli/2\", \"https://tfhub.dev/google/experts/bert/wiki_books/qnli/2\", \"https://tfhub.dev/google/experts/bert/wiki_books/qqp/2\", \"https://tfhub.dev/google/experts/bert/wiki_books/squad2/2\", \"https://tfhub.dev/google/experts/bert/wiki_books/sst2/2\", \"https://tfhub.dev/google/experts/bert/pubmed/2\", \"https://tfhub.dev/google/experts/bert/pubmed/squad2/2\"]\n", + "# Preprocessing must match the model, but all the above use the same.\n", + "PREPROCESS_MODEL = \"https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pvaZiGVgwtqw" + }, + "source": [ + "## Sentences\n", + "\n", + "Let's take some sentences from Wikipedia to run through the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tytu-rSpeDNG" + }, + "outputs": [], + "source": [ + "sentences = [\n", + " \"Here We Go Then, You And I is a 1999 album by Norwegian pop artist Morten Abel. It was Abel's second CD as a solo artist.\",\n", + " \"The album went straight to number one on the Norwegian album chart, and sold to double platinum.\",\n", + " \"Among the singles released from the album were the songs \\\"Be My Lover\\\" and \\\"Hard To Stay Awake\\\".\",\n", + " \"Riccardo Zegna is an Italian jazz musician.\",\n", + " \"Rajko Maksimović is a composer, writer, and music pedagogue.\",\n", + " \"One of the most significant Serbian composers of our time, Maksimović has been and remains active in creating works for different ensembles.\",\n", + " \"Ceylon spinach is a common name for several plants and may refer to: Basella alba Talinum fruticosum\",\n", + " \"A solar eclipse occurs when the Moon passes between Earth and the Sun, thereby totally or partly obscuring the image of the Sun for a viewer on Earth.\",\n", + " \"A partial solar eclipse occurs in the polar regions of the Earth when the center of the Moon's shadow misses the Earth.\",\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zI39475kxCKh" + }, + "source": [ + "## Run the model\n", + "\n", + "We'll load the BERT model from TF-Hub, tokenize our sentences using the matching preprocessing model from TF-Hub, then feed in the tokenized sentences to the model. To keep this colab fast and simple, we recommend running on GPU.\n", + "\n", + "Go to **Runtime** → **Change runtime type** to make sure that **GPU** is selected" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "x4t6r22ErQg0" + }, + "outputs": [], + "source": [ + "preprocess = hub.load(PREPROCESS_MODEL)\n", + "bert = hub.load(BERT_MODEL)\n", + "inputs = preprocess(sentences)\n", + "outputs = bert(inputs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gItjCg4315Cv" + }, + "outputs": [], + "source": [ + "print(\"Sentences:\")\n", + "print(sentences)\n", + "\n", + "print(\"\\nBERT inputs:\")\n", + "print(inputs)\n", + "\n", + "print(\"\\nPooled embeddings:\")\n", + "print(outputs[\"pooled_output\"])\n", + "\n", + "print(\"\\nPer token embeddings:\")\n", + "print(outputs[\"sequence_output\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ptiW2mgw6x-l" + }, + "source": [ + "## Semantic similarity\n", + "\n", + "Now let's take a look at the `pooled_output` embeddings of our sentences and compare how similar they are across sentences." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "GXrSO2Vc1Qtr" + }, + "outputs": [], + "source": [ + "#@title Helper functions\n", + "\n", + "def plot_similarity(features, labels):\n", + " \"\"\"Plot a similarity matrix of the embeddings.\"\"\"\n", + " cos_sim = pairwise.cosine_similarity(features)\n", + " sns.set(font_scale=1.2)\n", + " cbar_kws=dict(use_gridspec=False, location=\"left\")\n", + " g = sns.heatmap(\n", + " cos_sim, xticklabels=labels, yticklabels=labels,\n", + " vmin=0, vmax=1, cmap=\"Blues\", cbar_kws=cbar_kws)\n", + " g.tick_params(labelright=True, labelleft=False)\n", + " g.set_yticklabels(labels, rotation=0)\n", + " g.set_title(\"Semantic Textual Similarity\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "td6jcT0pJMZ5" + }, + "outputs": [], + "source": [ + "plot_similarity(outputs[\"pooled_output\"], sentences)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tJ4QCyzhSL7B" + }, + "source": [ + "## Learn more\n", + "\n", + "* Find more BERT models on [TensorFlow Hub](https://tfhub.dev)\n", + "* This notebook demonstrates simple inference with BERT, you can find a more advanced tutorial about fine-tuning BERT at [tensorflow.org/official_models/fine_tuning_bert](https://www.tensorflow.org/official_models/fine_tuning_bert)\n", + "* We used just one GPU chip to run the model, you can learn more about how to load models using tf.distribute at [tensorflow.org/tutorials/distribute/save_and_load](https://www.tensorflow.org/tutorials/distribute/save_and_load)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "bert_experts.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/bigbigan_with_tf_hub.ipynb b/site/en/hub/tutorials/bigbigan_with_tf_hub.ipynb new file mode 100644 index 00000000000..919abc7e354 --- /dev/null +++ b/site/en/hub/tutorials/bigbigan_with_tf_hub.ipynb @@ -0,0 +1,713 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "pLOYL1PJAAtK" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3fJWQ8WSAFhh" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-1NTVIH6ABK-" + }, + "source": [ + "# Generating Images with BigBiGAN\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AVvOoEhswyZg" + }, + "source": [ + "This notebook is a demo for the *BigBiGAN* models available on [TF Hub](https://tfhub.dev/s?publisher=deepmind&q=bigbigan).\n", + "\n", + "BigBiGAN extends standard (Big)GANs by adding an *encoder* module which can be used for unsupervised representation learning. Roughly speaking, the encoder inverts the generator by predicting latents `z` given real data `x`. See the [BigBiGAN paper on arXiv](https://arxiv.org/abs/1907.02544) [1] for more information about these models.\n", + "\n", + "After connecting to a runtime, get started by following these instructions:\n", + "\n", + "1. (Optional) Update the selected **`module_path`** in the first code cell below to load a BigBiGAN generator for a different encoder architecture.\n", + "2. Click **Runtime > Run all** to run each cell in order. Afterwards, the outputs, including visualizations of BigBiGAN samples and reconstructions, should automatically appear below.\n", + "\n", + "Note: if you run into any issues, it can help to click **Runtime > Restart and run all...** to restart your runtime and rerun all cells from scratch.\n", + "\n", + "[1] Jeff Donahue and Karen Simonyan. [Large Scale Adversarial Representation Learning](https://arxiv.org/abs/1907.02544). *arxiv:1907.02544*, 2019." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DtGFwUKOA9jt" + }, + "source": [ + "First, set the module path.\n", + "By default, we load the BigBiGAN model with the smaller ResNet-50-based encoder from **`https://tfhub.dev/deepmind/bigbigan-resnet50/1`**.\n", + "To load the larger RevNet-50-x4 based model used to achieve the best representation learning results, comment out the active **`module_path`** setting and uncomment the other." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xoY9pl0FBoUS" + }, + "outputs": [], + "source": [ + "module_path = 'https://tfhub.dev/deepmind/bigbigan-resnet50/1' # ResNet-50\n", + "# module_path = 'https://tfhub.dev/deepmind/bigbigan-revnet50x4/1' # RevNet-50 x4" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Lr01cszC_vcC" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TPdT-hYj1XXQ" + }, + "outputs": [], + "source": [ + "import io\n", + "import IPython.display\n", + "import PIL.Image\n", + "from pprint import pformat\n", + "\n", + "import numpy as np\n", + "\n", + "import tensorflow.compat.v1 as tf\n", + "tf.disable_v2_behavior()\n", + "\n", + "import tensorflow_hub as hub" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ouePZy6-CFJl" + }, + "source": [ + "## Define some functions to display images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MBQPtmrY2N91" + }, + "outputs": [], + "source": [ + "def imgrid(imarray, cols=4, pad=1, padval=255, row_major=True):\n", + " \"\"\"Lays out a [N, H, W, C] image array as a single image grid.\"\"\"\n", + " pad = int(pad)\n", + " if pad < 0:\n", + " raise ValueError('pad must be non-negative')\n", + " cols = int(cols)\n", + " assert cols >= 1\n", + " N, H, W, C = imarray.shape\n", + " rows = N // cols + int(N % cols != 0)\n", + " batch_pad = rows * cols - N\n", + " assert batch_pad >= 0\n", + " post_pad = [batch_pad, pad, pad, 0]\n", + " pad_arg = [[0, p] for p in post_pad]\n", + " imarray = np.pad(imarray, pad_arg, 'constant', constant_values=padval)\n", + " H += pad\n", + " W += pad\n", + " grid = (imarray\n", + " .reshape(rows, cols, H, W, C)\n", + " .transpose(0, 2, 1, 3, 4)\n", + " .reshape(rows*H, cols*W, C))\n", + " if pad:\n", + " grid = grid[:-pad, :-pad]\n", + " return grid\n", + "\n", + "def interleave(*args):\n", + " \"\"\"Interleaves input arrays of the same shape along the batch axis.\"\"\"\n", + " if not args:\n", + " raise ValueError('At least one argument is required.')\n", + " a0 = args[0]\n", + " if any(a.shape != a0.shape for a in args):\n", + " raise ValueError('All inputs must have the same shape.')\n", + " if not a0.shape:\n", + " raise ValueError('Inputs must have at least one axis.')\n", + " out = np.transpose(args, [1, 0] + list(range(2, len(a0.shape) + 1)))\n", + " out = out.reshape(-1, *a0.shape[1:])\n", + " return out\n", + "\n", + "def imshow(a, format='png', jpeg_fallback=True):\n", + " \"\"\"Displays an image in the given format.\"\"\"\n", + " a = a.astype(np.uint8)\n", + " data = io.BytesIO()\n", + " PIL.Image.fromarray(a).save(data, format)\n", + " im_data = data.getvalue()\n", + " try:\n", + " disp = IPython.display.display(IPython.display.Image(im_data))\n", + " except IOError:\n", + " if jpeg_fallback and format != 'jpeg':\n", + " print ('Warning: image was too large to display in format \"{}\"; '\n", + " 'trying jpeg instead.').format(format)\n", + " return imshow(a, format='jpeg')\n", + " else:\n", + " raise\n", + " return disp\n", + "\n", + "def image_to_uint8(x):\n", + " \"\"\"Converts [-1, 1] float array to [0, 255] uint8.\"\"\"\n", + " x = np.asarray(x)\n", + " x = (256. / 2.) * (x + 1.)\n", + " x = np.clip(x, 0, 255)\n", + " x = x.astype(np.uint8)\n", + " return x" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8ASXPMb6CaXR" + }, + "source": [ + "## Load a BigBiGAN TF Hub module and display its available functionality" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IuG7G1ToCtaf" + }, + "outputs": [], + "source": [ + "# module = hub.Module(module_path, trainable=True, tags={'train'}) # training\n", + "module = hub.Module(module_path) # inference\n", + "\n", + "for signature in module.get_signature_names():\n", + " print('Signature:', signature)\n", + " print('Inputs:', pformat(module.get_input_info_dict(signature)))\n", + " print('Outputs:', pformat(module.get_output_info_dict(signature)))\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sAY-AmcNCj9_" + }, + "source": [ + "## Define a wrapper class for convenient access to various functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aTKHkxfx1dAL" + }, + "outputs": [], + "source": [ + "class BigBiGAN(object):\n", + "\n", + " def __init__(self, module):\n", + " \"\"\"Initialize a BigBiGAN from the given TF Hub module.\"\"\"\n", + " self._module = module\n", + "\n", + " def generate(self, z, upsample=False):\n", + " \"\"\"Run a batch of latents z through the generator to generate images.\n", + "\n", + " Args:\n", + " z: A batch of 120D Gaussian latents, shape [N, 120].\n", + "\n", + " Returns: a batch of generated RGB images, shape [N, 128, 128, 3], range\n", + " [-1, 1].\n", + " \"\"\"\n", + " outputs = self._module(z, signature='generate', as_dict=True)\n", + " return outputs['upsampled' if upsample else 'default']\n", + "\n", + " def make_generator_ph(self):\n", + " \"\"\"Creates a tf.placeholder with the dtype & shape of generator inputs.\"\"\"\n", + " info = self._module.get_input_info_dict('generate')['z']\n", + " return tf.placeholder(dtype=info.dtype, shape=info.get_shape())\n", + "\n", + " def gen_pairs_for_disc(self, z):\n", + " \"\"\"Compute generator input pairs (G(z), z) for discriminator, given z.\n", + "\n", + " Args:\n", + " z: A batch of latents (120D standard Gaussians), shape [N, 120].\n", + "\n", + " Returns: a tuple (G(z), z) of discriminator inputs.\n", + " \"\"\"\n", + " # Downsample 256x256 image x for 128x128 discriminator input.\n", + " x = self.generate(z)\n", + " return x, z\n", + "\n", + " def encode(self, x, return_all_features=False):\n", + " \"\"\"Run a batch of images x through the encoder.\n", + "\n", + " Args:\n", + " x: A batch of data (256x256 RGB images), shape [N, 256, 256, 3], range\n", + " [-1, 1].\n", + " return_all_features: If True, return all features computed by the encoder.\n", + " Otherwise (default) just return a sample z_hat.\n", + "\n", + " Returns: the sample z_hat of shape [N, 120] (or a dict of all features if\n", + " return_all_features).\n", + " \"\"\"\n", + " outputs = self._module(x, signature='encode', as_dict=True)\n", + " return outputs if return_all_features else outputs['z_sample']\n", + "\n", + " def make_encoder_ph(self):\n", + " \"\"\"Creates a tf.placeholder with the dtype & shape of encoder inputs.\"\"\"\n", + " info = self._module.get_input_info_dict('encode')['x']\n", + " return tf.placeholder(dtype=info.dtype, shape=info.get_shape())\n", + "\n", + " def enc_pairs_for_disc(self, x):\n", + " \"\"\"Compute encoder input pairs (x, E(x)) for discriminator, given x.\n", + "\n", + " Args:\n", + " x: A batch of data (256x256 RGB images), shape [N, 256, 256, 3], range\n", + " [-1, 1].\n", + "\n", + " Returns: a tuple (downsample(x), E(x)) of discriminator inputs.\n", + " \"\"\"\n", + " # Downsample 256x256 image x for 128x128 discriminator input.\n", + " x_down = tf.nn.avg_pool(x, ksize=2, strides=2, padding='SAME')\n", + " z = self.encode(x)\n", + " return x_down, z\n", + "\n", + " def discriminate(self, x, z):\n", + " \"\"\"Compute the discriminator scores for pairs of data (x, z).\n", + "\n", + " (x, z) must be batches with the same leading batch dimension, and joint\n", + " scores are computed on corresponding pairs x[i] and z[i].\n", + "\n", + " Args:\n", + " x: A batch of data (128x128 RGB images), shape [N, 128, 128, 3], range\n", + " [-1, 1].\n", + " z: A batch of latents (120D standard Gaussians), shape [N, 120].\n", + "\n", + " Returns:\n", + " A dict of scores:\n", + " score_xz: the joint scores for the (x, z) pairs.\n", + " score_x: the unary scores for x only.\n", + " score_z: the unary scores for z only.\n", + " \"\"\"\n", + " inputs = dict(x=x, z=z)\n", + " return self._module(inputs, signature='discriminate', as_dict=True)\n", + "\n", + " def reconstruct_x(self, x, use_sample=True, upsample=False):\n", + " \"\"\"Compute BigBiGAN reconstructions of images x via G(E(x)).\n", + "\n", + " Args:\n", + " x: A batch of data (256x256 RGB images), shape [N, 256, 256, 3], range\n", + " [-1, 1].\n", + " use_sample: takes a sample z_hat ~ E(x). Otherwise, deterministically\n", + " use the mean. (Though a sample z_hat may be far from the mean z,\n", + " typically the resulting recons G(z_hat) and G(z) are very\n", + " similar.\n", + " upsample: if set, upsample the reconstruction to the input resolution\n", + " (256x256). Otherwise return the raw lower resolution generator output\n", + " (128x128).\n", + "\n", + " Returns: a batch of recons G(E(x)), shape [N, 256, 256, 3] if\n", + " `upsample`, otherwise [N, 128, 128, 3].\n", + " \"\"\"\n", + " if use_sample:\n", + " z = self.encode(x)\n", + " else:\n", + " z = self.encode(x, return_all_features=True)['z_mean']\n", + " recons = self.generate(z, upsample=upsample)\n", + " return recons\n", + "\n", + " def losses(self, x, z):\n", + " \"\"\"Compute per-module BigBiGAN losses given data & latent sample batches.\n", + "\n", + " Args:\n", + " x: A batch of data (256x256 RGB images), shape [N, 256, 256, 3], range\n", + " [-1, 1].\n", + " z: A batch of latents (120D standard Gaussians), shape [M, 120].\n", + "\n", + " For the original BigBiGAN losses, pass batches of size N=M=2048, with z's\n", + " sampled from a 120D standard Gaussian (e.g., np.random.randn(2048, 120)),\n", + " and x's sampled from the ImageNet (ILSVRC2012) training set with the\n", + " \"ResNet-style\" preprocessing from:\n", + "\n", + " https://github.com/tensorflow/tpu/blob/master/models/official/resnet/resnet_preprocessing.py\n", + "\n", + " Returns:\n", + " A dict of per-module losses:\n", + " disc: loss for the discriminator.\n", + " enc: loss for the encoder.\n", + " gen: loss for the generator.\n", + " \"\"\"\n", + " # Compute discriminator scores on (x, E(x)) pairs.\n", + " # Downsample 256x256 image x for 128x128 discriminator input.\n", + " scores_enc_x_dict = self.discriminate(*self.enc_pairs_for_disc(x))\n", + " scores_enc_x = tf.concat([scores_enc_x_dict['score_xz'],\n", + " scores_enc_x_dict['score_x'],\n", + " scores_enc_x_dict['score_z']], axis=0)\n", + "\n", + " # Compute discriminator scores on (G(z), z) pairs.\n", + " scores_gen_z_dict = self.discriminate(*self.gen_pairs_for_disc(z))\n", + " scores_gen_z = tf.concat([scores_gen_z_dict['score_xz'],\n", + " scores_gen_z_dict['score_x'],\n", + " scores_gen_z_dict['score_z']], axis=0)\n", + "\n", + " disc_loss_enc_x = tf.reduce_mean(tf.nn.relu(1. - scores_enc_x))\n", + " disc_loss_gen_z = tf.reduce_mean(tf.nn.relu(1. + scores_gen_z))\n", + " disc_loss = disc_loss_enc_x + disc_loss_gen_z\n", + "\n", + " enc_loss = tf.reduce_mean(scores_enc_x)\n", + " gen_loss = tf.reduce_mean(-scores_gen_z)\n", + "\n", + " return dict(disc=disc_loss, enc=enc_loss, gen=gen_loss)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5L5SFfH4C9gu" + }, + "source": [ + "## Create tensors to be used later for computing samples, reconstructions, discriminator scores, and losses" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "goxtzcb-19NA" + }, + "outputs": [], + "source": [ + "bigbigan = BigBiGAN(module)\n", + "\n", + "# Make input placeholders for x (`enc_ph`) and z (`gen_ph`).\n", + "enc_ph = bigbigan.make_encoder_ph()\n", + "gen_ph = bigbigan.make_generator_ph()\n", + "\n", + "# Compute samples G(z) from encoder input z (`gen_ph`).\n", + "gen_samples = bigbigan.generate(gen_ph)\n", + "\n", + "# Compute reconstructions G(E(x)) of encoder input x (`enc_ph`).\n", + "recon_x = bigbigan.reconstruct_x(enc_ph, upsample=True)\n", + "\n", + "# Compute encoder features used for representation learning evaluations given\n", + "# encoder input x (`enc_ph`).\n", + "enc_features = bigbigan.encode(enc_ph, return_all_features=True)\n", + "\n", + "# Compute discriminator scores for encoder pairs (x, E(x)) given x (`enc_ph`)\n", + "# and generator pairs (G(z), z) given z (`gen_ph`).\n", + "disc_scores_enc = bigbigan.discriminate(*bigbigan.enc_pairs_for_disc(enc_ph))\n", + "disc_scores_gen = bigbigan.discriminate(*bigbigan.gen_pairs_for_disc(gen_ph))\n", + "\n", + "# Compute losses.\n", + "losses = bigbigan.losses(enc_ph, gen_ph)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ly7LWnSUDQ_P" + }, + "source": [ + "## Create a TensorFlow session and initialize variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CPnzCHDWFJwx" + }, + "outputs": [], + "source": [ + "init = tf.global_variables_initializer()\n", + "sess = tf.Session()\n", + "sess.run(init)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gcEVS26D-ues" + }, + "source": [ + "# Generator samples" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LYSA8Zvb-w7S" + }, + "source": [ + "First, we'll visualize samples from the pretrained BigBiGAN generator by sampling generator inputs `z` from a standard Gaussian (via `np.random.randn`) and displaying the images it produces. So far we're not going beyond the capabilites of a standard GAN -- we're just using the generator (and ignoring the encoder) for now." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9zfpvw8fGNMr" + }, + "outputs": [], + "source": [ + "feed_dict = {gen_ph: np.random.randn(32, 120)}\n", + "_out_samples = sess.run(gen_samples, feed_dict=feed_dict)\n", + "print('samples shape:', _out_samples.shape)\n", + "imshow(imgrid(image_to_uint8(_out_samples), cols=4))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9v58CTfl8jTc" + }, + "source": [ + "# Load `test_images` from the TF-Flowers dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "o0kmzQ4EqKJt" + }, + "source": [ + "BigBiGAN is trained on ImageNet, but as it's too large to work with in this demo, we use the smaller TF-Flowers [1] dataset as our inputs for visualizing reconstructions and computing encoder features.\n", + "\n", + "In this cell we load TF-Flowers (downloading the dataset if needed) and store a fixed batch of 256x256 RGB image samples in a NumPy array `test_images`.\n", + "\n", + "[1] https://www.tensorflow.org/datasets/catalog/tf_flowers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OBgpkMdkUjL-" + }, + "outputs": [], + "source": [ + "def get_flowers_data():\n", + " \"\"\"Returns a [32, 256, 256, 3] np.array of preprocessed TF-Flowers samples.\"\"\"\n", + " import tensorflow_datasets as tfds\n", + " ds, info = tfds.load('tf_flowers', split='train', with_info=True)\n", + "\n", + " # Just get the images themselves as we don't need labels for this demo.\n", + " ds = ds.map(lambda x: x['image'])\n", + "\n", + " # Filter out small images (with minor edge length <256).\n", + " ds = ds.filter(lambda x: tf.reduce_min(tf.shape(x)[:2]) >= 256)\n", + "\n", + " # Take the center square crop of the image and resize to 256x256.\n", + " def crop_and_resize(image):\n", + " imsize = tf.shape(image)[:2]\n", + " minor_edge = tf.reduce_min(imsize)\n", + " start = (imsize - minor_edge) // 2\n", + " stop = start + minor_edge\n", + " cropped_image = image[start[0] : stop[0], start[1] : stop[1]]\n", + " resized_image = tf.image.resize_bicubic([cropped_image], [256, 256])[0]\n", + " return resized_image\n", + " ds = ds.map(crop_and_resize)\n", + "\n", + " # Convert images from [0, 255] uint8 to [-1, 1] float32.\n", + " ds = ds.map(lambda image: tf.cast(image, tf.float32) / (255. / 2.) - 1)\n", + "\n", + " # Take the first 32 samples.\n", + " ds = ds.take(32)\n", + "\n", + " return np.array(list(tfds.as_numpy(ds)))\n", + "\n", + "test_images = get_flowers_data()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QAFJQU597n2A" + }, + "source": [ + "# Reconstructions" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EmCQ9N9b7ptM" + }, + "source": [ + "Now we visualize BigBiGAN reconstructions by passing real images through the encoder and back through the generator, computing `G(E(x))` given images `x`.\n", + "Below, input images `x` are shown in the left column, and corresponding reconstructions are shown on the right.\n", + "\n", + "Note that reconstructions are not pixel-perfect matches to the input images; rather, they tend to capture the higher level semantic content of the input while \"forgetting\" most of the low-level detail. This suggests the BigBiGAN encoder may learn to capture the types of high level semantic information about images that we'd like to see in a representation learning approach.\n", + "\n", + "Also note that the raw reconstructions of the 256x256 input images are at the lower resolution produced by our generator -- 128x128. We upsample them for visualization purposes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "R2F3eq8aFRle" + }, + "outputs": [], + "source": [ + "test_images_batch = test_images[:16]\n", + "_out_recons = sess.run(recon_x, feed_dict={enc_ph: test_images_batch})\n", + "print('reconstructions shape:', _out_recons.shape)\n", + "\n", + "inputs_and_recons = interleave(test_images_batch, _out_recons)\n", + "print('inputs_and_recons shape:', inputs_and_recons.shape)\n", + "imshow(imgrid(image_to_uint8(inputs_and_recons), cols=2))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zPpW3qdbEpXL" + }, + "source": [ + "# Encoder features" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2gAW76YxEsZa" + }, + "source": [ + "We now demonstrate how to compute features from the encoder used for standard representation learning evaluations.\n", + "\n", + "These features could be used in a linear or nearest neighbors-based classifier. We include the standard feature taken after the global average pooling (key `avepool_feat`) as well as the larger \"BN+CReLU\" feature (key `bn_crelu_feat`) used to achieve the best results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hpZYe5S_FQEw" + }, + "outputs": [], + "source": [ + "_out_features = sess.run(enc_features, feed_dict={enc_ph: test_images_batch})\n", + "print('AvePool features shape:', _out_features['avepool_feat'].shape)\n", + "print('BN+CReLU features shape:', _out_features['bn_crelu_feat'].shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TGzahsms2w9a" + }, + "source": [ + "# Discriminator scores and losses" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "B2_5BIBN21Hr" + }, + "source": [ + "Finally, we'll compute the discriminator scores and losses on batches of encoder and generator pairs. These losses could be passed into an optimizer to train BigBiGAN.\n", + "\n", + "We use our batch of images above as the encoder inputs `x`, computing the encoder score as `D(x, E(x))`. For the generator inputs we sample `z` from a 120D standard Gaussian via `np.random.randn`, computing the generator score as `D(G(z), z)`.\n", + "\n", + "The discriminator predicts a joint score `score_xz` for the `(x, z)` pairs as well as unary scores `score_x` and `score_z` for `x` and `z` alone, respectively. It's trained to give high (positive) scores to encoder pairs and low (negative) scores to generator pairs. This mostly holds below, though the unary `score_z` is negative in both cases, indicating that the encoder outputs `E(x)` resemble actual samples from a Gaussian." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8JJ8Go0dr22-" + }, + "outputs": [], + "source": [ + "feed_dict = {enc_ph: test_images, gen_ph: np.random.randn(32, 120)}\n", + "_out_scores_enc, _out_scores_gen, _out_losses = sess.run(\n", + " [disc_scores_enc, disc_scores_gen, losses], feed_dict=feed_dict)\n", + "print('Encoder scores:', {k: v.mean() for k, v in _out_scores_enc.items()})\n", + "print('Generator scores:', {k: v.mean() for k, v in _out_scores_gen.items()})\n", + "print('Losses:', _out_losses)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "9v58CTfl8jTc" + ], + "name": "bigbigan_with_tf_hub.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/biggan_generation_with_tf_hub.ipynb b/site/en/hub/tutorials/biggan_generation_with_tf_hub.ipynb new file mode 100644 index 00000000000..e388f91fbcc --- /dev/null +++ b/site/en/hub/tutorials/biggan_generation_with_tf_hub.ipynb @@ -0,0 +1,421 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "pLOYL1PJAAtK" + }, + "source": [ + "##### Copyright 2018 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3fJWQ8WSAFhh" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cd1dhL4Ykbm7" + }, + "source": [ + "# Generating Images with BigGAN\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-1NTVIH6ABK-" + }, + "source": [ + "This notebook is a demo for the *BigGAN* image generators available on [TF Hub](https://tfhub.dev/s?publisher=deepmind&q=biggan).\n", + "\n", + "See the [BigGAN paper on arXiv](https://arxiv.org/abs/1809.11096) [1] for more information about these models.\n", + "\n", + "After connecting to a runtime, get started by following these instructions:\n", + "\n", + "1. (Optional) Update the selected **`module_path`** in the first code cell below to load a BigGAN generator for a different image resolution.\n", + "2. Click **Runtime > Run all** to run each cell in order.\n", + " * Afterwards, the interactive visualizations should update automatically when you modify the settings using the sliders and dropdown menus.\n", + " * If not, press the **Play** button by the cell to re-render outputs manually.\n", + "\n", + "Note: if you run into any issues, it can help to click **Runtime > Restart and run all...** to restart your runtime and rerun all cells from scratch.\n", + "\n", + "[1] Andrew Brock, Jeff Donahue, and Karen Simonyan. [Large Scale GAN Training for High Fidelity Natural Image Synthesis](https://arxiv.org/abs/1809.11096). *arxiv:1809.11096*, 2018." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XS1_N6hKj8cz" + }, + "source": [ + "First, set the module path.\n", + "By default, we load the BigGAN-deep generator for 256x256 images from **`https://tfhub.dev/deepmind/biggan-deep-256/1`**.\n", + "To generate 128x128 or 512x512 images or to use the original BigGAN generators, comment out the active **`module_path`** setting and uncomment one of the others." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OJCIhQPClKJ1" + }, + "outputs": [], + "source": [ + "# BigGAN-deep models\n", + "# module_path = 'https://tfhub.dev/deepmind/biggan-deep-128/1' # 128x128 BigGAN-deep\n", + "module_path = 'https://tfhub.dev/deepmind/biggan-deep-256/1' # 256x256 BigGAN-deep\n", + "# module_path = 'https://tfhub.dev/deepmind/biggan-deep-512/1' # 512x512 BigGAN-deep\n", + "\n", + "# BigGAN (original) models\n", + "# module_path = 'https://tfhub.dev/deepmind/biggan-128/2' # 128x128 BigGAN\n", + "# module_path = 'https://tfhub.dev/deepmind/biggan-256/2' # 256x256 BigGAN\n", + "# module_path = 'https://tfhub.dev/deepmind/biggan-512/2' # 512x512 BigGAN" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JJrTM6hAi0CJ" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lOZnst2jeWDL" + }, + "outputs": [], + "source": [ + "import tensorflow.compat.v1 as tf\n", + "tf.disable_v2_behavior()\n", + "\n", + "import os\n", + "import io\n", + "import IPython.display\n", + "import numpy as np\n", + "import PIL.Image\n", + "from scipy.stats import truncnorm\n", + "import tensorflow_hub as hub\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "stWb21nlcyCm" + }, + "source": [ + "## Load a BigGAN generator module from TF Hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tVgwgJiCH3PV" + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "print('Loading BigGAN module from:', module_path)\n", + "module = hub.Module(module_path)\n", + "inputs = {k: tf.placeholder(v.dtype, v.get_shape().as_list(), k)\n", + " for k, v in module.get_input_info_dict().items()}\n", + "output = module(inputs)\n", + "\n", + "print()\n", + "print('Inputs:\\n', '\\n'.join(\n", + " ' {}: {}'.format(*kv) for kv in inputs.items()))\n", + "print()\n", + "print('Output:', output)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ry62-8SWfuds" + }, + "source": [ + "## Define some functions for sampling and displaying BigGAN images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "46M8prJPDEsV" + }, + "outputs": [], + "source": [ + "input_z = inputs['z']\n", + "input_y = inputs['y']\n", + "input_trunc = inputs['truncation']\n", + "\n", + "dim_z = input_z.shape.as_list()[1]\n", + "vocab_size = input_y.shape.as_list()[1]\n", + "\n", + "def truncated_z_sample(batch_size, truncation=1., seed=None):\n", + " state = None if seed is None else np.random.RandomState(seed)\n", + " values = truncnorm.rvs(-2, 2, size=(batch_size, dim_z), random_state=state)\n", + " return truncation * values\n", + "\n", + "def one_hot(index, vocab_size=vocab_size):\n", + " index = np.asarray(index)\n", + " if len(index.shape) == 0:\n", + " index = np.asarray([index])\n", + " assert len(index.shape) == 1\n", + " num = index.shape[0]\n", + " output = np.zeros((num, vocab_size), dtype=np.float32)\n", + " output[np.arange(num), index] = 1\n", + " return output\n", + "\n", + "def one_hot_if_needed(label, vocab_size=vocab_size):\n", + " label = np.asarray(label)\n", + " if len(label.shape) <= 1:\n", + " label = one_hot(label, vocab_size)\n", + " assert len(label.shape) == 2\n", + " return label\n", + "\n", + "def sample(sess, noise, label, truncation=1., batch_size=8,\n", + " vocab_size=vocab_size):\n", + " noise = np.asarray(noise)\n", + " label = np.asarray(label)\n", + " num = noise.shape[0]\n", + " if len(label.shape) == 0:\n", + " label = np.asarray([label] * num)\n", + " if label.shape[0] != num:\n", + " raise ValueError('Got # noise samples ({}) != # label samples ({})'\n", + " .format(noise.shape[0], label.shape[0]))\n", + " label = one_hot_if_needed(label, vocab_size)\n", + " ims = []\n", + " for batch_start in range(0, num, batch_size):\n", + " s = slice(batch_start, min(num, batch_start + batch_size))\n", + " feed_dict = {input_z: noise[s], input_y: label[s], input_trunc: truncation}\n", + " ims.append(sess.run(output, feed_dict=feed_dict))\n", + " ims = np.concatenate(ims, axis=0)\n", + " assert ims.shape[0] == num\n", + " ims = np.clip(((ims + 1) / 2.0) * 256, 0, 255)\n", + " ims = np.uint8(ims)\n", + " return ims\n", + "\n", + "def interpolate(A, B, num_interps):\n", + " if A.shape != B.shape:\n", + " raise ValueError('A and B must have the same shape to interpolate.')\n", + " alphas = np.linspace(0, 1, num_interps)\n", + " return np.array([(1-a)*A + a*B for a in alphas])\n", + "\n", + "def imgrid(imarray, cols=5, pad=1):\n", + " if imarray.dtype != np.uint8:\n", + " raise ValueError('imgrid input imarray must be uint8')\n", + " pad = int(pad)\n", + " assert pad >= 0\n", + " cols = int(cols)\n", + " assert cols >= 1\n", + " N, H, W, C = imarray.shape\n", + " rows = N // cols + int(N % cols != 0)\n", + " batch_pad = rows * cols - N\n", + " assert batch_pad >= 0\n", + " post_pad = [batch_pad, pad, pad, 0]\n", + " pad_arg = [[0, p] for p in post_pad]\n", + " imarray = np.pad(imarray, pad_arg, 'constant', constant_values=255)\n", + " H += pad\n", + " W += pad\n", + " grid = (imarray\n", + " .reshape(rows, cols, H, W, C)\n", + " .transpose(0, 2, 1, 3, 4)\n", + " .reshape(rows*H, cols*W, C))\n", + " if pad:\n", + " grid = grid[:-pad, :-pad]\n", + " return grid\n", + "\n", + "def imshow(a, format='png', jpeg_fallback=True):\n", + " a = np.asarray(a, dtype=np.uint8)\n", + " data = io.BytesIO()\n", + " PIL.Image.fromarray(a).save(data, format)\n", + " im_data = data.getvalue()\n", + " try:\n", + " disp = IPython.display.display(IPython.display.Image(im_data))\n", + " except IOError:\n", + " if jpeg_fallback and format != 'jpeg':\n", + " print(('Warning: image was too large to display in format \"{}\"; '\n", + " 'trying jpeg instead.').format(format))\n", + " return imshow(a, format='jpeg')\n", + " else:\n", + " raise\n", + " return disp" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uCeCg3Sdf8Nv" + }, + "source": [ + "## Create a TensorFlow session and initialize variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rYJor5bOaVn1" + }, + "outputs": [], + "source": [ + "initializer = tf.global_variables_initializer()\n", + "sess = tf.Session()\n", + "sess.run(initializer)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SeZ7u3rWd9jz" + }, + "source": [ + "# Explore BigGAN samples of a particular category\n", + "\n", + "Try varying the **`truncation`** value.\n", + "\n", + "(Double-click on the cell to view code.)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HuCO9tv3IKT2" + }, + "outputs": [], + "source": [ + "#@title Category-conditional sampling { display-mode: \"form\", run: \"auto\" }\n", + "\n", + "num_samples = 10 #@param {type:\"slider\", min:1, max:20, step:1}\n", + "truncation = 0.4 #@param {type:\"slider\", min:0.02, max:1, step:0.02}\n", + "noise_seed = 0 #@param {type:\"slider\", min:0, max:100, step:1}\n", + "category = \"933) cheeseburger\"\n", + "\n", + "z = truncated_z_sample(num_samples, truncation, noise_seed)\n", + "y = int(category.split(')')[0])\n", + "\n", + "ims = sample(sess, z, y, truncation=truncation)\n", + "imshow(imgrid(ims, cols=min(num_samples, 5)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hHNXtvuQgKwa" + }, + "source": [ + "# Interpolate between BigGAN samples\n", + "\n", + "Try setting different **`category`**s with the same **`noise_seed`**s, or the same **`category`**s with different **`noise_seed`**s. Or go wild and set both any way you like!\n", + "\n", + "(Double-click on the cell to view code.)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dSAyfDfnVugs" + }, + "outputs": [], + "source": [ + "#@title Interpolation { display-mode: \"form\", run: \"auto\" }\n", + "\n", + "num_samples = 2 #@param {type:\"slider\", min:1, max:5, step:1}\n", + "num_interps = 5 #@param {type:\"slider\", min:2, max:10, step:1}\n", + "truncation = 0.2 #@param {type:\"slider\", min:0.02, max:1, step:0.02}\n", + "noise_seed_A = 0 #@param {type:\"slider\", min:0, max:100, step:1}\n", + "category_A = \"207) golden retriever\"\n", + "noise_seed_B = 0 #@param {type:\"slider\", min:0, max:100, step:1}\n", + "category_B = \"8) hen\"\n", + "\n", + "def interpolate_and_shape(A, B, num_interps):\n", + " interps = interpolate(A, B, num_interps)\n", + " return (interps.transpose(1, 0, *range(2, len(interps.shape)))\n", + " .reshape(num_samples * num_interps, *interps.shape[2:]))\n", + "\n", + "z_A, z_B = [truncated_z_sample(num_samples, truncation, noise_seed)\n", + " for noise_seed in [noise_seed_A, noise_seed_B]]\n", + "y_A, y_B = [one_hot([int(category.split(')')[0])] * num_samples)\n", + " for category in [category_A, category_B]]\n", + "\n", + "z_interp = interpolate_and_shape(z_A, z_B, num_interps)\n", + "y_interp = interpolate_and_shape(y_A, y_B, num_interps)\n", + "\n", + "ims = sample(sess, z_interp, y_interp, truncation=truncation)\n", + "imshow(imgrid(ims, cols=num_interps))" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "pLOYL1PJAAtK" + ], + "name": "biggan_generation_with_tf_hub.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/bird_vocalization_classifier.ipynb b/site/en/hub/tutorials/bird_vocalization_classifier.ipynb new file mode 100644 index 00000000000..563be9b425a --- /dev/null +++ b/site/en/hub/tutorials/bird_vocalization_classifier.ipynb @@ -0,0 +1,375 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "QD3FvutQsaqc" + }, + "source": [ + "##### Copyright 2023 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-5fm9kVRsfuG" + }, + "outputs": [], + "source": [ + "#@title Copyright 2023 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QNDQZiSGtXMu" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1JAO_rv_QEBr" + }, + "source": [ + "# Using Google Bird Vocalization model\n", + "\n", + "The Google Bird Vocalization is a global bird embedding and classification model.\n", + "\n", + "This model expects as input a 5-second audio segment sampled at 32kHz\n", + "\n", + "The model outputs both the logits and the embeddigs for each input window of audio.\n", + "\n", + "On this notebook you'll learn how to feed the audio properly to the model and how to use the logits for inference.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bytIYq0MjEKT" + }, + "outputs": [], + "source": [ + "!pip install -q \"tensorflow_io==0.28.*\"\n", + "!pip install -q librosa" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aXXTdq-eq6lk" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "import tensorflow_io as tfio\n", + "\n", + "import numpy as np\n", + "import librosa\n", + "\n", + "import csv\n", + "import io\n", + "\n", + "from IPython.display import Audio" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "B6mFpgMWQjgk" + }, + "source": [ + "Loading the Model from TFHub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CQ1P3IkpQiya" + }, + "outputs": [], + "source": [ + "model_handle = \"https://tfhub.dev/google/bird-vocalization-classifier/1\"\n", + "model = hub.load(model_handle)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3OOw23B3fZT6" + }, + "source": [ + "Lets load the labels that the model was trained on.\n", + "\n", + "The labels file is in the assets forlder under label.csv. Each line is an ebird id." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "f5i-R4k9ZhwN" + }, + "outputs": [], + "source": [ + "# Find the name of the class with the top score when mean-aggregated across frames.\n", + "def class_names_from_csv(class_map_csv_text):\n", + " \"\"\"Returns list of class names corresponding to score vector.\"\"\"\n", + " with open(labels_path) as csv_file:\n", + " csv_reader = csv.reader(csv_file, delimiter=',')\n", + " class_names = [mid for mid, desc in csv_reader]\n", + " return class_names[1:]\n", + "\n", + "labels_path = hub.resolve(model_handle) + \"/assets/label.csv\"\n", + "classes = class_names_from_csv(labels_path)\n", + "print(classes)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b2JYPafeRRi_" + }, + "source": [ + "The ```frame_audio``` function is based on the [Chirp lib](https://github.com/google-research/chirp/blob/10c5faa325a3c3468fa6f18a736fc1aeb9bf8129/chirp/inference/interface.py#L128) version but using tf.signal instead of librosa.\n", + "\n", + "The `ensure_sample_rate` is a function to make sure that any audio used with the model has the expected sample rate of 32kHz" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "t65gi_DTrRaa" + }, + "outputs": [], + "source": [ + "def frame_audio(\n", + " audio_array: np.ndarray,\n", + " window_size_s: float = 5.0,\n", + " hop_size_s: float = 5.0,\n", + " sample_rate = 32000,\n", + " ) -> np.ndarray:\n", + " \"\"\"Helper function for framing audio for inference.\"\"\"\n", + " if window_size_s is None or window_size_s < 0:\n", + " return audio_array[np.newaxis, :]\n", + " frame_length = int(window_size_s * sample_rate)\n", + " hop_length = int(hop_size_s * sample_rate)\n", + " framed_audio = tf.signal.frame(audio_array, frame_length, hop_length, pad_end=True)\n", + " return framed_audio\n", + "\n", + "def ensure_sample_rate(waveform, original_sample_rate,\n", + " desired_sample_rate=32000):\n", + " \"\"\"Resample waveform if required.\"\"\"\n", + " if original_sample_rate != desired_sample_rate:\n", + " waveform = tfio.audio.resample(waveform, original_sample_rate, desired_sample_rate)\n", + " return desired_sample_rate, waveform" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G7uAuI4f6ehb" + }, + "source": [ + "Lets load a file from Wikipedia.\n", + "\n", + "To be more precise, the audio of a [Common Blackbird](https://es.wikipedia.org/wiki/Turdus_merula)\n", + "\n", + "|

\"Common|\n", + "|:--:|\n", + "| *By Andreas Trepte - Own work, CC BY-SA 2.5, Link*

|\n", + "\n", + "\n", + "The audio was contributed by Oona Räisänen (Mysid) under the public domain license." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "whkmGeJ9lmyd" + }, + "outputs": [], + "source": [ + "!curl -O \"https://upload.wikimedia.org/wikipedia/commons/7/7c/Turdus_merula_2.ogg\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ff6nOV2EurAO" + }, + "outputs": [], + "source": [ + "turdus_merula = \"Turdus_merula_2.ogg\"\n", + "\n", + "audio, sample_rate = librosa.load(turdus_merula)\n", + "\n", + "sample_rate, wav_data_turdus = ensure_sample_rate(audio, sample_rate)\n", + "Audio(wav_data_turdus, rate=sample_rate)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sjpKLk9K7TTV" + }, + "source": [ + "The audio has 24 seconds and the model expects chunks of 5 seconds.\n", + "\n", + "The `frame_audio` function can fix that and split the audio in proper frames" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VzgK0xWw9g8X" + }, + "outputs": [], + "source": [ + "fixed_tm = frame_audio(wav_data_turdus)\n", + "fixed_tm.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rU5-UqaCAVZ7" + }, + "source": [ + "Let's apply the model only on the first frame:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0zveWSOU9QBC" + }, + "outputs": [], + "source": [ + "logits, embeddings = model.infer_tf(fixed_tm[:1])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "osmRNWciEEuG" + }, + "source": [ + "The label.csv file contains ebirds ids.\n", + "The ebird id for Turdus Merula is eurbla" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "E-UehjA6Acn_" + }, + "outputs": [], + "source": [ + "probabilities = tf.nn.softmax(logits)\n", + "argmax = np.argmax(probabilities)\n", + "print(f\"The audio is from the class {classes[argmax]} (element:{argmax} in the label.csv file), with probability of {probabilities[0][argmax]}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bGK84egXBg2f" + }, + "source": [ + "Lets apply the model on all the frames now:\n", + "\n", + "*note*: this code is also based on the [Chirp library](https://github.com/google-research/chirp/blob/d6ff5e7cee3865940f31697bf4b70176c1072572/chirp/inference/models.py#L174)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UT_Im9i50EGy" + }, + "outputs": [], + "source": [ + "all_logits, all_embeddings = model.infer_tf(fixed_tm[:1])\n", + "for window in fixed_tm[1:]:\n", + " logits, embeddings = model.infer_tf(window[np.newaxis, :])\n", + " all_logits = np.concatenate([all_logits, logits], axis=0)\n", + "\n", + "all_logits.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kKuJWq4SxyR1" + }, + "outputs": [], + "source": [ + "frame = 0\n", + "for frame_logits in all_logits:\n", + " probabilities = tf.nn.softmax(frame_logits)\n", + " argmax = np.argmax(probabilities)\n", + " print(f\"For frame {frame}, the audio is from the class {classes[argmax]} (element:{argmax} in the label.csv file), with probability of {probabilities[argmax]}\")\n", + " frame += 1" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "bird_vocalization_classifier.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/boundless.ipynb b/site/en/hub/tutorials/boundless.ipynb new file mode 100644 index 00000000000..f53fc5bb004 --- /dev/null +++ b/site/en/hub/tutorials/boundless.ipynb @@ -0,0 +1,306 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "9veUEV0CfmHX" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "BlCInyRifxHS" + }, + "outputs": [], + "source": [ + "#@title Copyright 2020 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_LRMeRxCfzC4" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QOjczJJ4gWHS" + }, + "source": [ + "# Boundless Colab\n", + "\n", + "Welcome to the Boundless model Colab! This notebook will take you through the steps of running the model on images and visualize the results.\n", + "\n", + "## Overview\n", + "\n", + "Boundless is a model for image extrapolation. This model takes an image, internally masks a portion of it ([1/2](https://tfhub.dev/google/boundless/half/1), [1/4](https://tfhub.dev/google/boundless/quarter/1), [3/4](https://tfhub.dev/google/boundless/three_quarter/1)) and completes the masked part. For more details refer to [Boundless: Generative Adversarial Networks for Image Extension](https://arxiv.org/pdf/1908.07007.pdf) or the model documentation on TensorFlow Hub." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hDKbpAEZf8Lt" + }, + "source": [ + "## Imports and setup\n", + "\n", + "Start with the base imports:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xJMFtTqPr7lf" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "from io import BytesIO\n", + "from PIL import Image as PilImage\n", + "import numpy as np\n", + "from matplotlib import pyplot as plt\n", + "from six.moves.urllib.request import urlopen" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pigUDIXtciQO" + }, + "source": [ + "## Create a function for reading an image\n", + "\n", + "Create a utility function to help load an image and format it for the model (257x257x3). This method will also crop the image to a square to avoid distortion and you can use it with local images or from the internet." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KTEVPgXH6rtV" + }, + "outputs": [], + "source": [ + " def read_image(filename):\n", + " fd = None\n", + " if(filename.startswith('http')):\n", + " fd = urlopen(filename)\n", + " else:\n", + " fd = tf.io.gfile.GFile(filename, 'rb')\n", + "\n", + " pil_image = PilImage.open(fd)\n", + " width, height = pil_image.size\n", + " # crop to make the image square\n", + " pil_image = pil_image.crop((0, 0, height, height))\n", + " pil_image = pil_image.resize((257,257),PilImage.LANCZOS)\n", + " image_unscaled = np.array(pil_image)\n", + " image_np = np.expand_dims(\n", + " image_unscaled.astype(np.float32) / 255., axis=0)\n", + " return image_np" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lonrLxuKcsL0" + }, + "source": [ + "## Create a visualization function\n", + "\n", + "Create a visualization function to show the original image side-by-side with the masked version and the \"filled\" version, both generated by the model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "j7AkoMFG7r-O" + }, + "outputs": [], + "source": [ + "def visualize_output_comparison(img_original, img_masked, img_filled):\n", + " plt.figure(figsize=(24,12))\n", + " plt.subplot(131)\n", + " plt.imshow((np.squeeze(img_original)))\n", + " plt.title(\"Original\", fontsize=24)\n", + " plt.axis('off')\n", + " plt.subplot(132)\n", + " plt.imshow((np.squeeze(img_masked)))\n", + " plt.title(\"Masked\", fontsize=24)\n", + " plt.axis('off')\n", + " plt.subplot(133)\n", + " plt.imshow((np.squeeze(img_filled)))\n", + " plt.title(\"Generated\", fontsize=24)\n", + " plt.axis('off')\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8rwaCWmxdJGH" + }, + "source": [ + "## Load an image\n", + "\n", + "Now you can load a sample image. Feel free to use your own image by uploading it to the Colab notebook. Remember that the model may have some limitations regarding human images." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "92w-Jfbm60XA" + }, + "outputs": [], + "source": [ + "wikimedia = \"https://upload.wikimedia.org/wikipedia/commons/thumb/3/31/Nusfjord_road%2C_2010_09.jpg/800px-Nusfjord_road%2C_2010_09.jpg\"\n", + "# wikimedia = \"https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/Beech_forest_M%C3%A1tra_in_winter.jpg/640px-Beech_forest_M%C3%A1tra_in_winter.jpg\"\n", + "# wikimedia = \"https://upload.wikimedia.org/wikipedia/commons/thumb/b/b2/Marmolada_Sunset.jpg/640px-Marmolada_Sunset.jpg\"\n", + "# wikimedia = \"https://upload.wikimedia.org/wikipedia/commons/thumb/9/9d/Aegina_sunset.jpg/640px-Aegina_sunset.jpg\"\n", + "\n", + "input_img = read_image(wikimedia)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4lIkmZL_dtyX" + }, + "source": [ + "## Select a model from TensorFlow Hub\n", + "\n", + "On TensorFlow Hub there are three versions of the Boundless model: Half, Quarter and Three Quarters.\n", + "In the following cell you can choose any of the models and apply them on your image. If you want to pick another model, select it below and then run the following cells." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "B3myNctEQ5GE" + }, + "outputs": [], + "source": [ + "#@title Model Selection { display-mode: \"form\" }\n", + "model_name = 'Boundless Quarter' # @param ['Boundless Half', 'Boundless Quarter', 'Boundless Three Quarters']\n", + "model_handle_map = {\n", + " 'Boundless Half' : 'https://tfhub.dev/google/boundless/half/1',\n", + " 'Boundless Quarter' : 'https://tfhub.dev/google/boundless/quarter/1', \n", + " 'Boundless Three Quarters' : 'https://tfhub.dev/google/boundless/three_quarter/1'\n", + "}\n", + "\n", + "model_handle = model_handle_map[model_name]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aSJFeNNSeOn8" + }, + "source": [ + "After choosing your model, you can load it from TensorFlow Hub.\n", + "\n", + "**Note**: You can point to a model handle to read the model's documentation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0IDKMNyYSWsj" + }, + "outputs": [], + "source": [ + "print(\"Loading model {} ({})\".format(model_name, model_handle))\n", + "model = hub.load(model_handle)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L4G7CPOaeuQb" + }, + "source": [ + "## Perform inference\n", + "\n", + "The boundless model has two outputs:\n", + "\n", + "* The input image with a mask applied\n", + "* The masked image with the extrapolation to complete it\n", + "\n", + "You can compare these two images with a visualization as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "W7uCAuKxSd-M" + }, + "outputs": [], + "source": [ + "result = model.signatures['default'](tf.constant(input_img))\n", + "generated_image = result['default']\n", + "masked_image = result['masked_image']\n", + "\n", + "visualize_output_comparison(input_img, masked_image, generated_image)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "boundless.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/cord_19_embeddings.ipynb b/site/en/hub/tutorials/cord_19_embeddings.ipynb new file mode 100644 index 00000000000..01f43e5f9a9 --- /dev/null +++ b/site/en/hub/tutorials/cord_19_embeddings.ipynb @@ -0,0 +1,537 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "5wFF5JFyD2Ki" + }, + "source": [ + "#### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Uf6NouXxDqGk" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ORy-KvWXGXBo" + }, + "source": [ + "# Exploring the TF-Hub CORD-19 Swivel Embeddings\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9VusdTAH0isl" + }, + "source": [ + "The CORD-19 Swivel text embedding module from TF-Hub (https://tfhub.dev/tensorflow/cord-19/swivel-128d/1)\n", + " was built to support researchers analyzing natural languages text related to COVID-19.\n", + "These embeddings were trained on the titles, authors, abstracts, body texts, and\n", + "reference titles of articles in the [CORD-19 dataset](https://api.semanticscholar.org/CorpusID:216056360).\n", + "\n", + "In this colab we will:\n", + "- Analyze semantically similar words in the embedding space\n", + "- Train a classifier on the SciCite dataset using the CORD-19 embeddings\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L69VQv2Z0isl" + }, + "source": [ + "## Setup\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ym2nXOPuPV__" + }, + "outputs": [], + "source": [ + "import functools\n", + "import itertools\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import seaborn as sns\n", + "import pandas as pd\n", + "\n", + "import tensorflow.compat.v1 as tf\n", + "tf.disable_eager_execution()\n", + "tf.logging.set_verbosity('ERROR')\n", + "\n", + "import tensorflow_datasets as tfds\n", + "import tensorflow_hub as hub\n", + "\n", + "try:\n", + " from google.colab import data_table\n", + " def display_df(df):\n", + " return data_table.DataTable(df, include_index=False)\n", + "except ModuleNotFoundError:\n", + " # If google-colab is not available, just display the raw DataFrame\n", + " def display_df(df):\n", + " return df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_VgRRf2I7tER" + }, + "source": [ + "# Analyze the embeddings\n", + "\n", + "Let's start off by analyzing the embedding by calculating and plotting a correlation matrix between different terms. If the embedding learned to successfully capture the meaning of different words, the embedding vectors of semantically similar words should be close together. Let's take a look at some COVID-19 related terms." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HNN_9bBKSLHU" + }, + "outputs": [], + "source": [ + "# Use the inner product between two embedding vectors as the similarity measure\n", + "def plot_correlation(labels, features):\n", + " corr = np.inner(features, features)\n", + " corr /= np.max(corr)\n", + " sns.heatmap(corr, xticklabels=labels, yticklabels=labels)\n", + "\n", + "\n", + "with tf.Graph().as_default():\n", + " # Load the module\n", + " query_input = tf.placeholder(tf.string)\n", + " module = hub.Module('https://tfhub.dev/tensorflow/cord-19/swivel-128d/1')\n", + " embeddings = module(query_input)\n", + "\n", + " with tf.train.MonitoredTrainingSession() as sess:\n", + "\n", + " # Generate embeddings for some terms\n", + " queries = [\n", + " # Related viruses\n", + " \"coronavirus\", \"SARS\", \"MERS\",\n", + " # Regions\n", + " \"Italy\", \"Spain\", \"Europe\",\n", + " # Symptoms\n", + " \"cough\", \"fever\", \"throat\"\n", + " ]\n", + "\n", + " features = sess.run(embeddings, feed_dict={query_input: queries})\n", + " plot_correlation(queries, features)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Bg-PGqtm8B7K" + }, + "source": [ + "We can see that the embedding successfully captured the meaning of the different terms. Each word is similar to the other words of its cluster (i.e. \"coronavirus\" highly correlates with \"SARS\" and \"MERS\"), while they are different from terms of other clusters (i.e. the similarity between \"SARS\" and \"Spain\" is close to 0).\n", + "\n", + "Now let's see how we can use these embeddings to solve a specific task." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "idJ1jFmH7xMa" + }, + "source": [ + "## SciCite: Citation Intent Classification\n", + "\n", + "This section shows how one can use the embedding for downstream tasks such as text classification. We'll use the [SciCite dataset](https://www.tensorflow.org/datasets/catalog/scicite) from TensorFlow Datasets to classify citation intents in academic papers. Given a sentence with a citation from an academic paper, classify whether the main intent of the citation is as background information, use of methods, or comparing results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "-FB19HLfVp2V" + }, + "outputs": [], + "source": [ + "#@title Set up the dataset from TFDS\n", + "\n", + "class Dataset:\n", + " \"\"\"Build a dataset from a TFDS dataset.\"\"\"\n", + " def __init__(self, tfds_name, feature_name, label_name):\n", + " self.dataset_builder = tfds.builder(tfds_name)\n", + " self.dataset_builder.download_and_prepare()\n", + " self.feature_name = feature_name\n", + " self.label_name = label_name\n", + " \n", + " def get_data(self, for_eval):\n", + " splits = THE_DATASET.dataset_builder.info.splits\n", + " if tfds.Split.TEST in splits:\n", + " split = tfds.Split.TEST if for_eval else tfds.Split.TRAIN\n", + " else:\n", + " SPLIT_PERCENT = 80\n", + " split = \"train[{}%:]\".format(SPLIT_PERCENT) if for_eval else \"train[:{}%]\".format(SPLIT_PERCENT)\n", + " return self.dataset_builder.as_dataset(split=split)\n", + "\n", + " def num_classes(self):\n", + " return self.dataset_builder.info.features[self.label_name].num_classes\n", + "\n", + " def class_names(self):\n", + " return self.dataset_builder.info.features[self.label_name].names\n", + "\n", + " def preprocess_fn(self, data):\n", + " return data[self.feature_name], data[self.label_name]\n", + "\n", + " def example_fn(self, data):\n", + " feature, label = self.preprocess_fn(data)\n", + " return {'feature': feature, 'label': label}, label\n", + "\n", + "\n", + "def get_example_data(dataset, num_examples, **data_kw):\n", + " \"\"\"Show example data\"\"\"\n", + " with tf.Session() as sess:\n", + " batched_ds = dataset.get_data(**data_kw).take(num_examples).map(dataset.preprocess_fn).batch(num_examples)\n", + " it = tf.data.make_one_shot_iterator(batched_ds).get_next()\n", + " data = sess.run(it)\n", + " return data\n", + "\n", + "\n", + "TFDS_NAME = 'scicite' #@param {type: \"string\"}\n", + "TEXT_FEATURE_NAME = 'string' #@param {type: \"string\"}\n", + "LABEL_NAME = 'label' #@param {type: \"string\"}\n", + "THE_DATASET = Dataset(TFDS_NAME, TEXT_FEATURE_NAME, LABEL_NAME)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "CVjyBD0ZPh4Z" + }, + "outputs": [], + "source": [ + "#@title Let's take a look at a few labeled examples from the training set\n", + "NUM_EXAMPLES = 20 #@param {type:\"integer\"}\n", + "data = get_example_data(THE_DATASET, NUM_EXAMPLES, for_eval=False)\n", + "display_df(\n", + " pd.DataFrame({\n", + " TEXT_FEATURE_NAME: [ex.decode('utf8') for ex in data[0]],\n", + " LABEL_NAME: [THE_DATASET.class_names()[x] for x in data[1]]\n", + " }))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "65s9UpYJ_1ct" + }, + "source": [ + "## Training a citaton intent classifier\n", + "\n", + "We'll train a classifier on the [SciCite dataset](https://www.tensorflow.org/datasets/catalog/scicite) using an Estimator. Let's set up the input_fns to read the dataset into the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "OldapWmKSGsW" + }, + "outputs": [], + "source": [ + "def preprocessed_input_fn(for_eval):\n", + " data = THE_DATASET.get_data(for_eval=for_eval)\n", + " data = data.map(THE_DATASET.example_fn, num_parallel_calls=1)\n", + " return data\n", + "\n", + "\n", + "def input_fn_train(params):\n", + " data = preprocessed_input_fn(for_eval=False)\n", + " data = data.repeat(None)\n", + " data = data.shuffle(1024)\n", + " data = data.batch(batch_size=params['batch_size'])\n", + " return data\n", + "\n", + "\n", + "def input_fn_eval(params):\n", + " data = preprocessed_input_fn(for_eval=True)\n", + " data = data.repeat(1)\n", + " data = data.batch(batch_size=params['batch_size'])\n", + " return data\n", + "\n", + "\n", + "def input_fn_predict(params):\n", + " data = preprocessed_input_fn(for_eval=True)\n", + " data = data.batch(batch_size=params['batch_size'])\n", + " return data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KcrmWUkVKg2u" + }, + "source": [ + "Let's build a model which use the CORD-19 embeddings with a classification layer on top." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ff0uKqJCA9zh" + }, + "outputs": [], + "source": [ + "def model_fn(features, labels, mode, params):\n", + " # Embed the text\n", + " embed = hub.Module(params['module_name'], trainable=params['trainable_module'])\n", + " embeddings = embed(features['feature'])\n", + "\n", + " # Add a linear layer on top\n", + " logits = tf.layers.dense(\n", + " embeddings, units=THE_DATASET.num_classes(), activation=None)\n", + " predictions = tf.argmax(input=logits, axis=1)\n", + "\n", + " if mode == tf.estimator.ModeKeys.PREDICT:\n", + " return tf.estimator.EstimatorSpec(\n", + " mode=mode,\n", + " predictions={\n", + " 'logits': logits,\n", + " 'predictions': predictions,\n", + " 'features': features['feature'],\n", + " 'labels': features['label']\n", + " })\n", + " \n", + " # Set up a multi-class classification head\n", + " loss = tf.nn.sparse_softmax_cross_entropy_with_logits(\n", + " labels=labels, logits=logits)\n", + " loss = tf.reduce_mean(loss)\n", + "\n", + " if mode == tf.estimator.ModeKeys.TRAIN:\n", + " optimizer = tf.train.GradientDescentOptimizer(learning_rate=params['learning_rate'])\n", + " train_op = optimizer.minimize(loss, global_step=tf.train.get_or_create_global_step())\n", + " return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)\n", + "\n", + " elif mode == tf.estimator.ModeKeys.EVAL:\n", + " accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions)\n", + " precision = tf.metrics.precision(labels=labels, predictions=predictions)\n", + " recall = tf.metrics.recall(labels=labels, predictions=predictions)\n", + "\n", + " return tf.estimator.EstimatorSpec(\n", + " mode=mode,\n", + " loss=loss,\n", + " eval_metric_ops={\n", + " 'accuracy': accuracy,\n", + " 'precision': precision,\n", + " 'recall': recall,\n", + " })\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "yZUclu8xBYlj" + }, + "outputs": [], + "source": [ + "#@title Hyperparmeters { run: \"auto\" }\n", + "\n", + "EMBEDDING = 'https://tfhub.dev/tensorflow/cord-19/swivel-128d/1' #@param {type: \"string\"}\n", + "TRAINABLE_MODULE = False #@param {type: \"boolean\"}\n", + "STEPS = 8000#@param {type: \"integer\"}\n", + "EVAL_EVERY = 200 #@param {type: \"integer\"}\n", + "BATCH_SIZE = 10 #@param {type: \"integer\"}\n", + "LEARNING_RATE = 0.01 #@param {type: \"number\"}\n", + "\n", + "params = {\n", + " 'batch_size': BATCH_SIZE,\n", + " 'learning_rate': LEARNING_RATE,\n", + " 'module_name': EMBEDDING,\n", + " 'trainable_module': TRAINABLE_MODULE\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "weZKWK-pLBll" + }, + "source": [ + "## Train and evaluate the model\n", + "\n", + "Let's train and evaluate the model to see the performance on the SciCite task" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cO1FWkZW2WS9" + }, + "outputs": [], + "source": [ + "estimator = tf.estimator.Estimator(functools.partial(model_fn, params=params))\n", + "metrics = []\n", + "\n", + "for step in range(0, STEPS, EVAL_EVERY):\n", + " estimator.train(input_fn=functools.partial(input_fn_train, params=params), steps=EVAL_EVERY)\n", + " step_metrics = estimator.evaluate(input_fn=functools.partial(input_fn_eval, params=params))\n", + " print('Global step {}: loss {:.3f}, accuracy {:.3f}'.format(step, step_metrics['loss'], step_metrics['accuracy']))\n", + " metrics.append(step_metrics)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RUNGAeyf1ygC" + }, + "outputs": [], + "source": [ + "global_steps = [x['global_step'] for x in metrics]\n", + "fig, axes = plt.subplots(ncols=2, figsize=(20,8))\n", + "\n", + "for axes_index, metric_names in enumerate([['accuracy', 'precision', 'recall'],\n", + " ['loss']]):\n", + " for metric_name in metric_names:\n", + " axes[axes_index].plot(global_steps, [x[metric_name] for x in metrics], label=metric_name)\n", + " axes[axes_index].legend()\n", + " axes[axes_index].set_xlabel(\"Global Step\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1biWylvB6ayg" + }, + "source": [ + "We can see that the loss quickly decreases while especially the accuracy rapidly increases. Let's plot some examples to check how the prediction relates to the true labels:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zK_NJXtoyG2o" + }, + "outputs": [], + "source": [ + "predictions = estimator.predict(functools.partial(input_fn_predict, params))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nlxFER_Oriam" + }, + "outputs": [], + "source": [ + "first_10_predictions = list(itertools.islice(predictions, 10))\n", + "\n", + "display_df(\n", + " pd.DataFrame({\n", + " TEXT_FEATURE_NAME: [pred['features'].decode('utf8') for pred in first_10_predictions],\n", + " LABEL_NAME: [THE_DATASET.class_names()[pred['labels']] for pred in first_10_predictions],\n", + " 'prediction': [THE_DATASET.class_names()[pred['predictions']] for pred in first_10_predictions]\n", + " }))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OSGcrkE069_Q" + }, + "source": [ + "We can see that for this random sample, the model predicts the correct label most of the times, indicating that it can embed scientific sentences pretty well." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oLE0kCfO5CIA" + }, + "source": [ + "# What's next?\n", + "\n", + "Now that you've gotten to know a bit more about the CORD-19 Swivel embeddings from TF-Hub, we encourage you to participate in the CORD-19 Kaggle competition to contribute to gaining scientific insights from COVID-19 related academic texts.\n", + "\n", + "* Participate in the [CORD-19 Kaggle Challenge](https://www.kaggle.com/allen-institute-for-ai/CORD-19-research-challenge)\n", + "* Learn more about the [COVID-19 Open Research Dataset (CORD-19)](https://api.semanticscholar.org/CorpusID:216056360)\n", + "* See documentation and more about the TF-Hub embeddings at https://tfhub.dev/tensorflow/cord-19/swivel-128d/1\n", + "* Explore the CORD-19 embedding space with the [TensorFlow Embedding Projector](http://projector.tensorflow.org/?config=https://storage.googleapis.com/tfhub-examples/tensorflow/cord-19/swivel-128d/1/tensorboard/full_projector_config.json)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "5wFF5JFyD2Ki" + ], + "name": "cord_19_embeddings.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/cord_19_embeddings_keras.ipynb b/site/en/hub/tutorials/cord_19_embeddings_keras.ipynb new file mode 100644 index 00000000000..388de741e34 --- /dev/null +++ b/site/en/hub/tutorials/cord_19_embeddings_keras.ipynb @@ -0,0 +1,421 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "5wFF5JFyD2Ki" + }, + "source": [ + "#### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Uf6NouXxDqGk" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ORy-KvWXGXBo" + }, + "source": [ + "# Exploring the TF-Hub CORD-19 Swivel Embeddings\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yI6Mh3-P0_Pk" + }, + "source": [ + "The CORD-19 Swivel text embedding module from TF-Hub (https://tfhub.dev/tensorflow/cord-19/swivel-128d/3)\n", + " was built to support researchers analyzing natural languages text related to COVID-19.\n", + "These embeddings were trained on the titles, authors, abstracts, body texts, and\n", + "reference titles of articles in the [CORD-19 dataset](https://api.semanticscholar.org/CorpusID:216056360).\n", + "\n", + "In this colab we will:\n", + "- Analyze semantically similar words in the embedding space\n", + "- Train a classifier on the SciCite dataset using the CORD-19 embeddings\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gVWOrccw0_Pl" + }, + "source": [ + "## Setup\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ym2nXOPuPV__" + }, + "outputs": [], + "source": [ + "import functools\n", + "import itertools\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import seaborn as sns\n", + "import pandas as pd\n", + "\n", + "import tensorflow as tf\n", + "\n", + "import tensorflow_datasets as tfds\n", + "import tensorflow_hub as hub\n", + "\n", + "from tqdm import trange" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_VgRRf2I7tER" + }, + "source": [ + "# Analyze the embeddings\n", + "\n", + "Let's start off by analyzing the embedding by calculating and plotting a correlation matrix between different terms. If the embedding learned to successfully capture the meaning of different words, the embedding vectors of semantically similar words should be close together. Let's take a look at some COVID-19 related terms." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HNN_9bBKSLHU" + }, + "outputs": [], + "source": [ + "# Use the inner product between two embedding vectors as the similarity measure\n", + "def plot_correlation(labels, features):\n", + " corr = np.inner(features, features)\n", + " corr /= np.max(corr)\n", + " sns.heatmap(corr, xticklabels=labels, yticklabels=labels)\n", + "\n", + "# Generate embeddings for some terms\n", + "queries = [\n", + " # Related viruses\n", + " 'coronavirus', 'SARS', 'MERS',\n", + " # Regions\n", + " 'Italy', 'Spain', 'Europe',\n", + " # Symptoms\n", + " 'cough', 'fever', 'throat'\n", + "]\n", + "\n", + "module = hub.load('https://tfhub.dev/tensorflow/cord-19/swivel-128d/3')\n", + "embeddings = module(queries)\n", + "\n", + "plot_correlation(queries, embeddings)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Bg-PGqtm8B7K" + }, + "source": [ + "We can see that the embedding successfully captured the meaning of the different terms. Each word is similar to the other words of its cluster (i.e. \"coronavirus\" highly correlates with \"SARS\" and \"MERS\"), while they are different from terms of other clusters (i.e. the similarity between \"SARS\" and \"Spain\" is close to 0).\n", + "\n", + "Now let's see how we can use these embeddings to solve a specific task." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "idJ1jFmH7xMa" + }, + "source": [ + "## SciCite: Citation Intent Classification\n", + "\n", + "This section shows how one can use the embedding for downstream tasks such as text classification. We'll use the [SciCite dataset](https://www.tensorflow.org/datasets/catalog/scicite) from TensorFlow Datasets to classify citation intents in academic papers. Given a sentence with a citation from an academic paper, classify whether the main intent of the citation is as background information, use of methods, or comparing results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ghc-CzT8DDaZ" + }, + "outputs": [], + "source": [ + "builder = tfds.builder(name='scicite')\n", + "builder.download_and_prepare()\n", + "train_data, validation_data, test_data = builder.as_dataset(\n", + " split=('train', 'validation', 'test'),\n", + " as_supervised=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CVjyBD0ZPh4Z" + }, + "outputs": [], + "source": [ + "#@title Let's take a look at a few labeled examples from the training set\n", + "NUM_EXAMPLES = 10#@param {type:\"integer\"}\n", + "\n", + "TEXT_FEATURE_NAME = builder.info.supervised_keys[0]\n", + "LABEL_NAME = builder.info.supervised_keys[1]\n", + "\n", + "def label2str(numeric_label):\n", + " m = builder.info.features[LABEL_NAME].names\n", + " return m[numeric_label]\n", + "\n", + "data = next(iter(train_data.batch(NUM_EXAMPLES)))\n", + "\n", + "\n", + "pd.DataFrame({\n", + " TEXT_FEATURE_NAME: [ex.numpy().decode('utf8') for ex in data[0]],\n", + " LABEL_NAME: [label2str(x) for x in data[1]]\n", + "})" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "65s9UpYJ_1ct" + }, + "source": [ + "## Training a citaton intent classifier\n", + "\n", + "We'll train a classifier on the [SciCite dataset](https://www.tensorflow.org/datasets/catalog/scicite) using Keras. Let's build a model which use the CORD-19 embeddings with a classification layer on top." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yZUclu8xBYlj" + }, + "outputs": [], + "source": [ + "#@title Hyperparameters { run: \"auto\" }\n", + "\n", + "EMBEDDING = 'https://tfhub.dev/tensorflow/cord-19/swivel-128d/3' #@param {type: \"string\"}\n", + "TRAINABLE_MODULE = False #@param {type: \"boolean\"}\n", + "\n", + "hub_layer = hub.KerasLayer(EMBEDDING, input_shape=[], \n", + " dtype=tf.string, trainable=TRAINABLE_MODULE)\n", + "\n", + "model = tf.keras.Sequential()\n", + "model.add(hub_layer)\n", + "model.add(tf.keras.layers.Dense(3))\n", + "model.summary()\n", + "model.compile(optimizer='adam',\n", + " loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", + " metrics=['accuracy'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "weZKWK-pLBll" + }, + "source": [ + "## Train and evaluate the model\n", + "\n", + "Let's train and evaluate the model to see the performance on the SciCite task" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cO1FWkZW2WS9" + }, + "outputs": [], + "source": [ + "EPOCHS = 35#@param {type: \"integer\"}\n", + "BATCH_SIZE = 32#@param {type: \"integer\"}\n", + "\n", + "history = model.fit(train_data.shuffle(10000).batch(BATCH_SIZE),\n", + " epochs=EPOCHS,\n", + " validation_data=validation_data.batch(BATCH_SIZE),\n", + " verbose=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2sKE7kEyLJQZ" + }, + "outputs": [], + "source": [ + "from matplotlib import pyplot as plt\n", + "def display_training_curves(training, validation, title, subplot):\n", + " if subplot%10==1: # set up the subplots on the first call\n", + " plt.subplots(figsize=(10,10), facecolor='#F0F0F0')\n", + " plt.tight_layout()\n", + " ax = plt.subplot(subplot)\n", + " ax.set_facecolor('#F8F8F8')\n", + " ax.plot(training)\n", + " ax.plot(validation)\n", + " ax.set_title('model '+ title)\n", + " ax.set_ylabel(title)\n", + " ax.set_xlabel('epoch')\n", + " ax.legend(['train', 'valid.'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nnQfxevhLKld" + }, + "outputs": [], + "source": [ + "display_training_curves(history.history['accuracy'], history.history['val_accuracy'], 'accuracy', 211)\n", + "display_training_curves(history.history['loss'], history.history['val_loss'], 'loss', 212)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BjvtOw72Lpyw" + }, + "source": [ + "## Evaluate the model\n", + "\n", + "And let's see how the model performs. Two values will be returned. Loss (a number which represents our error, lower values are better), and accuracy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "y0ExC8D0LX8m" + }, + "outputs": [], + "source": [ + "results = model.evaluate(test_data.batch(512), verbose=2)\n", + "\n", + "for name, value in zip(model.metrics_names, results):\n", + " print('%s: %.3f' % (name, value))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dWp5OWeTL2EW" + }, + "source": [ + "We can see that the loss quickly decreases while especially the accuracy rapidly increases. Let's plot some examples to check how the prediction relates to the true labels:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VzHzAOaaOVC0" + }, + "outputs": [], + "source": [ + "prediction_dataset = next(iter(test_data.batch(20)))\n", + "\n", + "prediction_texts = [ex.numpy().decode('utf8') for ex in prediction_dataset[0]]\n", + "prediction_labels = [label2str(x) for x in prediction_dataset[1]]\n", + "\n", + "predictions = [\n", + " label2str(x) for x in np.argmax(model.predict(prediction_texts), axis=-1)]\n", + "\n", + "\n", + "pd.DataFrame({\n", + " TEXT_FEATURE_NAME: prediction_texts,\n", + " LABEL_NAME: prediction_labels,\n", + " 'prediction': predictions\n", + "})" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OSGcrkE069_Q" + }, + "source": [ + "We can see that for this random sample, the model predicts the correct label most of the times, indicating that it can embed scientific sentences pretty well." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oLE0kCfO5CIA" + }, + "source": [ + "# What's next?\n", + "\n", + "Now that you've gotten to know a bit more about the CORD-19 Swivel embeddings from TF-Hub, we encourage you to participate in the CORD-19 Kaggle competition to contribute to gaining scientific insights from COVID-19 related academic texts.\n", + "\n", + "* Participate in the [CORD-19 Kaggle Challenge](https://www.kaggle.com/allen-institute-for-ai/CORD-19-research-challenge)\n", + "* Learn more about the [COVID-19 Open Research Dataset (CORD-19)](https://api.semanticscholar.org/CorpusID:216056360)\n", + "* See documentation and more about the TF-Hub embeddings at https://tfhub.dev/tensorflow/cord-19/swivel-128d/3\n", + "* Explore the CORD-19 embedding space with the [TensorFlow Embedding Projector](http://projector.tensorflow.org/?config=https://storage.googleapis.com/tfhub-examples/tensorflow/cord-19/swivel-128d/3/tensorboard/projector_config.json)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "cord_19_embeddings_keras.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/cropnet_cassava.ipynb b/site/en/hub/tutorials/cropnet_cassava.ipynb new file mode 100644 index 00000000000..926b5395e41 --- /dev/null +++ b/site/en/hub/tutorials/cropnet_cassava.ipynb @@ -0,0 +1,413 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vtNtfcHHoHNP" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jZwnHZ70oUIM" + }, + "source": [ + "# CropNet: Cassava Disease Detection" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6sg9wHP9oR3q" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "grEgSWu2iTxm" + }, + "source": [ + "This notebook shows how to use the CropNet [cassava disease classifier](https://tfhub.dev/google/cropnet/classifier/cassava_disease_V1/2) model from **TensorFlow Hub**. The model classifies images of cassava leaves into one of 6 classes: *bacterial blight, brown streak disease, green mite, mosaic disease, healthy, or unknown*.\n", + "\n", + "This colab demonstrates how to:\n", + " * Load the https://tfhub.dev/google/cropnet/classifier/cassava_disease_V1/2 model from **TensorFlow Hub**\n", + " * Load the [cassava](https://www.tensorflow.org/datasets/catalog/cassava) dataset from **TensorFlow Datasets (TFDS)**\n", + " * Classify images of cassava leaves into 4 distinct cassava disease categories or as healthy or unknown.\n", + " * Evaluate the *accuracy* of the classifier and look at how *robust* the model is when applied to out of domain images." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bKn4Fiq2OD7u" + }, + "source": [ + "## Imports and setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LMgjpSoYqJIz" + }, + "outputs": [], + "source": [ + "!pip install matplotlib==3.2.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "FIP4rkjp45MG" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_datasets as tfds\n", + "import tensorflow_hub as hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "mIqmq_qmWw78" + }, + "outputs": [], + "source": [ + "#@title Helper function for displaying examples\n", + "def plot(examples, predictions=None):\n", + " # Get the images, labels, and optionally predictions\n", + " images = examples['image']\n", + " labels = examples['label']\n", + " batch_size = len(images)\n", + " if predictions is None:\n", + " predictions = batch_size * [None]\n", + "\n", + " # Configure the layout of the grid\n", + " x = np.ceil(np.sqrt(batch_size))\n", + " y = np.ceil(batch_size / x)\n", + " fig = plt.figure(figsize=(x * 6, y * 7))\n", + "\n", + " for i, (image, label, prediction) in enumerate(zip(images, labels, predictions)):\n", + " # Render the image\n", + " ax = fig.add_subplot(x, y, i+1)\n", + " ax.imshow(image, aspect='auto')\n", + " ax.grid(False)\n", + " ax.set_xticks([])\n", + " ax.set_yticks([])\n", + "\n", + " # Display the label and optionally prediction\n", + " x_label = 'Label: ' + name_map[class_names[label]]\n", + " if prediction is not None:\n", + " x_label = 'Prediction: ' + name_map[class_names[prediction]] + '\\n' + x_label\n", + " ax.xaxis.label.set_color('green' if label == prediction else 'red')\n", + " ax.set_xlabel(x_label)\n", + "\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kwrg9yIlaUSb" + }, + "source": [ + "## Dataset\n", + "\n", + "Let's load the *cassava* dataset from TFDS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0rTcnxoSkp31" + }, + "outputs": [], + "source": [ + "dataset, info = tfds.load('cassava', with_info=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GpC71TFDhJFO" + }, + "source": [ + "Let's take a look at the dataset info to learn more about it, like the description and citation and information about how many examples are available" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "btJBMovmbYtR" + }, + "outputs": [], + "source": [ + "info" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QT3XWAtR6BRy" + }, + "source": [ + "The *cassava* dataset has images of cassava leaves with 4 distinct diseases as well as healthy cassava leaves. The model can predict all of these classes as well as sixth class for \"unknown\" when the model is not confident in its prediction." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9NT9q8yyXZfX" + }, + "outputs": [], + "source": [ + "# Extend the cassava dataset classes with 'unknown'\n", + "class_names = info.features['label'].names + ['unknown']\n", + "\n", + "# Map the class names to human readable names\n", + "name_map = dict(\n", + " cmd='Mosaic Disease',\n", + " cbb='Bacterial Blight',\n", + " cgm='Green Mite',\n", + " cbsd='Brown Streak Disease',\n", + " healthy='Healthy',\n", + " unknown='Unknown')\n", + "\n", + "print(len(class_names), 'classes:')\n", + "print(class_names)\n", + "print([name_map[name] for name in class_names])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "I6y_MGxgiW09" + }, + "source": [ + "Before we can feed the data to the model, we need to do a bit of preprocessing. The model expects 224 x 224 images with RGB channel values in [0, 1]. Let's normalize and resize the images." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UxtxvqRjh7Nm" + }, + "outputs": [], + "source": [ + "def preprocess_fn(data):\n", + " image = data['image']\n", + "\n", + " # Normalize [0, 255] to [0, 1]\n", + " image = tf.cast(image, tf.float32)\n", + " image = image / 255.\n", + "\n", + " # Resize the images to 224 x 224\n", + " image = tf.image.resize(image, (224, 224))\n", + "\n", + " data['image'] = image\n", + " return data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qz27YrZahdvn" + }, + "source": [ + "Let's take a look at a few examples from the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "j6LkAxv3f-aJ" + }, + "outputs": [], + "source": [ + "batch = dataset['validation'].map(preprocess_fn).batch(25).as_numpy_iterator()\n", + "examples = next(batch)\n", + "plot(examples)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eHlEAhL3hq2R" + }, + "source": [ + "## Model\n", + "\n", + "Let's load the classifier from TF Hub and get some predictions and see the predictions of the model is on a few examples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "b6eIWkTjIQhS" + }, + "outputs": [], + "source": [ + "classifier = hub.KerasLayer('https://tfhub.dev/google/cropnet/classifier/cassava_disease_V1/2')\n", + "probabilities = classifier(examples['image'])\n", + "predictions = tf.argmax(probabilities, axis=-1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MTQA1YAltfRZ" + }, + "outputs": [], + "source": [ + "plot(examples, predictions)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MuFE8A5aZv9z" + }, + "source": [ + "## Evaluation & robustness\n", + "\n", + "Let's measure the *accuracy* of our classifier on a split of the dataset. We can also look at the *robustness* of the model by evaluating its performance on a non-cassava dataset. For image of other plant datasets like iNaturalist or beans, the model should almost always return *unknown*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "0ERcNxs0kHd3" + }, + "outputs": [], + "source": [ + "#@title Parameters {run: \"auto\"}\n", + "\n", + "DATASET = 'cassava' #@param {type:\"string\"} ['cassava', 'beans', 'i_naturalist2017']\n", + "DATASET_SPLIT = 'test' #@param {type:\"string\"} ['train', 'test', 'validation']\n", + "BATCH_SIZE = 32 #@param {type:\"integer\"}\n", + "MAX_EXAMPLES = 1000 #@param {type:\"integer\"}\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Mt0-IVmZplbb" + }, + "outputs": [], + "source": [ + "def label_to_unknown_fn(data):\n", + " data['label'] = 5 # Override label to unknown.\n", + " return data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cQYvY3IvY2Nx" + }, + "outputs": [], + "source": [ + "# Preprocess the examples and map the image label to unknown for non-cassava datasets.\n", + "ds = tfds.load(DATASET, split=DATASET_SPLIT).map(preprocess_fn).take(MAX_EXAMPLES)\n", + "dataset_description = DATASET\n", + "if DATASET != 'cassava':\n", + " ds = ds.map(label_to_unknown_fn)\n", + " dataset_description += ' (labels mapped to unknown)'\n", + "ds = ds.batch(BATCH_SIZE)\n", + "\n", + "# Calculate the accuracy of the model\n", + "metric = tf.keras.metrics.Accuracy()\n", + "for examples in ds:\n", + " probabilities = classifier(examples['image'])\n", + " predictions = tf.math.argmax(probabilities, axis=-1)\n", + " labels = examples['label']\n", + " metric.update_state(labels, predictions)\n", + "\n", + "print('Accuracy on %s: %.2f' % (dataset_description, metric.result().numpy()))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rvS18sBExpdL" + }, + "source": [ + "## Learn more\n", + "\n", + "* Learn more about the model on TensorFlow Hub: https://tfhub.dev/google/cropnet/classifier/cassava_disease_V1/2\n", + "* Learn how to build a custom image classifier running on a mobile phone with [ML Kit](https://developers.google.com/ml-kit/custom-models#tfhub) with the [TensorFlow Lite version of this model](https://tfhub.dev/google/lite-model/cropnet/classifier/cassava_disease_V1/1)." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "cropnet_cassava.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/cropnet_on_device.ipynb b/site/en/hub/tutorials/cropnet_on_device.ipynb new file mode 100644 index 00000000000..0e1cb1e0b0d --- /dev/null +++ b/site/en/hub/tutorials/cropnet_on_device.ipynb @@ -0,0 +1,724 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "3XX46cTrh6iD" + }, + "source": [ + "##### Copyright 2021 The TensorFlow Hub Authors. \n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sKrlWr6Kh-mF" + }, + "outputs": [], + "source": [ + "#@title Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DMVmlJ0fAMkH" + }, + "source": [ + "# Fine tuning models for plant disease detection\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hk5u_9KN1m-t" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OEHq-hV5sWYO" + }, + "source": [ + "This notebook shows you how to **fine-tune CropNet models from TensorFlow Hub** on a dataset from TFDS or your own crop disease detection dataset.\n", + "\n", + "You will:\n", + "- Load the TFDS cassava dataset or your own data\n", + "- Enrich the data with unknown (negative) examples to get a more robust model\n", + "- Apply image augmentations to the data\n", + "- Load and fine tune a [CropNet model](https://tfhub.dev/s?module-type=image-feature-vector&q=cropnet) from TF Hub\n", + "- Export a TFLite model, ready to be deployed on your app with [Task Library](https://www.tensorflow.org/lite/inference_with_metadata/task_library/image_classifier), [MLKit](https://developers.google.com/ml-kit/vision/image-labeling/custom-models/android) or [TFLite](https://www.tensorflow.org/lite/guide/inference) directly" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dQvS4p807mZf" + }, + "source": [ + "## Imports and Dependencies\n", + "\n", + "Before starting, you'll need to install some of the dependencies that will be needed like [Model Maker](https://www.tensorflow.org/lite/guide/model_maker#installation) and the latest version of TensorFlow Datasets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5BDTEMtexXE3" + }, + "outputs": [], + "source": [ + "!sudo apt install -q libportaudio2\n", + "## image_classifier library requires numpy <= 1.23.5\n", + "!pip install \"numpy<=1.23.5\"\n", + "!pip install --use-deprecated=legacy-resolver tflite-model-maker-nightly\n", + "!pip install -U tensorflow-datasets\n", + "## scann library requires tensorflow < 2.9.0\n", + "!pip install \"tensorflow<2.9.0\"\n", + "!pip install \"tensorflow-datasets~=4.8.0\" # protobuf>=3.12.2\n", + "!pip install tensorflow-metadata~=1.10.0 # protobuf>=3.13\n", + "## tensorflowjs requires packaging < 20.10\n", + "!pip install \"packaging<20.10\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nekG9Iwgxbx0" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import os\n", + "import seaborn as sns\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_datasets as tfds\n", + "\n", + "from tensorflow_examples.lite.model_maker.core.export_format import ExportFormat\n", + "from tensorflow_examples.lite.model_maker.core.task import image_preprocessing\n", + "\n", + "from tflite_model_maker import image_classifier\n", + "from tflite_model_maker import ImageClassifierDataLoader\n", + "from tflite_model_maker.image_classifier import ModelSpec" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fV0k2Q4x4N_4" + }, + "source": [ + "## Load a TFDS dataset to fine-tune on\n", + "\n", + "Lets use the publicly available [Cassava Leaf Disease dataset](https://www.tensorflow.org/datasets/catalog/cassava) from TFDS." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TTaD5W_1xjUz" + }, + "outputs": [], + "source": [ + "tfds_name = 'cassava'\n", + "(ds_train, ds_validation, ds_test), ds_info = tfds.load(\n", + " name=tfds_name,\n", + " split=['train', 'validation', 'test'],\n", + " with_info=True,\n", + " as_supervised=True)\n", + "TFLITE_NAME_PREFIX = tfds_name" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xDuDGUAxyHtA" + }, + "source": [ + "## Or alternatively load your own data to fine-tune on\n", + "\n", + "Instead of using a TFDS dataset, you can also train on your own data. This code snippet shows how to load your own custom dataset. See [this](https://www.tensorflow.org/datasets/api_docs/python/tfds/folder_dataset/ImageFolder) link for the supported structure of the data. An example is provided here using the publicly available [Cassava Leaf Disease dataset](https://www.tensorflow.org/datasets/catalog/cassava)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "k003tLvflHpC" + }, + "outputs": [], + "source": [ + "# data_root_dir = tf.keras.utils.get_file(\n", + "# 'cassavaleafdata.zip',\n", + "# 'https://storage.googleapis.com/emcassavadata/cassavaleafdata.zip',\n", + "# extract=True)\n", + "# data_root_dir = os.path.splitext(data_root_dir)[0] # Remove the .zip extension\n", + "\n", + "# builder = tfds.ImageFolder(data_root_dir)\n", + "\n", + "# ds_info = builder.info\n", + "# ds_train = builder.as_dataset(split='train', as_supervised=True)\n", + "# ds_validation = builder.as_dataset(split='validation', as_supervised=True)\n", + "# ds_test = builder.as_dataset(split='test', as_supervised=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hs3XCVLo4Fa1" + }, + "source": [ + "## Visualize samples from train split\n", + "\n", + "Let's take a look at some examples from the dataset including the class id and the class name for the image samples and their labels." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "89GkD60Eyfe0" + }, + "outputs": [], + "source": [ + "_ = tfds.show_examples(ds_train, ds_info)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-KW-n0lV4AZ-" + }, + "source": [ + "## Add images to be used as Unknown examples from TFDS datasets\n", + "\n", + "Add additional unknown (negative) examples to the training dataset and assign a new unknown class label number to them. The goal is to have a model that, when used in practice (e.g. in the field), has the option of predicting \"Unknown\" when it sees something unexpected.\n", + "\n", + "Below you can see a list of datasets that will be used to sample the additional unknown imagery. It includes 3 completely different datasets to increase diversity. One of them is a beans leaf disease dataset, so that the model has exposure to diseased plants other than cassava.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SYDMjRhDkDnd" + }, + "outputs": [], + "source": [ + "UNKNOWN_TFDS_DATASETS = [{\n", + " 'tfds_name': 'imagenet_v2/matched-frequency',\n", + " 'train_split': 'test[:80%]',\n", + " 'test_split': 'test[80%:]',\n", + " 'num_examples_ratio_to_normal': 1.0,\n", + "}, {\n", + " 'tfds_name': 'oxford_flowers102',\n", + " 'train_split': 'train',\n", + " 'test_split': 'test',\n", + " 'num_examples_ratio_to_normal': 1.0,\n", + "}, {\n", + " 'tfds_name': 'beans',\n", + " 'train_split': 'train',\n", + " 'test_split': 'test',\n", + " 'num_examples_ratio_to_normal': 1.0,\n", + "}]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XUM_d0evktGi" + }, + "source": [ + "The UNKNOWN datasets are also loaded from TFDS." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5DdWgBTe8uKR" + }, + "outputs": [], + "source": [ + "# Load unknown datasets.\n", + "weights = [\n", + " spec['num_examples_ratio_to_normal'] for spec in UNKNOWN_TFDS_DATASETS\n", + "]\n", + "num_unknown_train_examples = sum(\n", + " int(w * ds_train.cardinality().numpy()) for w in weights)\n", + "ds_unknown_train = tf.data.Dataset.sample_from_datasets([\n", + " tfds.load(\n", + " name=spec['tfds_name'], split=spec['train_split'],\n", + " as_supervised=True).repeat(-1) for spec in UNKNOWN_TFDS_DATASETS\n", + "], weights).take(num_unknown_train_examples)\n", + "ds_unknown_train = ds_unknown_train.apply(\n", + " tf.data.experimental.assert_cardinality(num_unknown_train_examples))\n", + "ds_unknown_tests = [\n", + " tfds.load(\n", + " name=spec['tfds_name'], split=spec['test_split'], as_supervised=True)\n", + " for spec in UNKNOWN_TFDS_DATASETS\n", + "]\n", + "ds_unknown_test = ds_unknown_tests[0]\n", + "for ds in ds_unknown_tests[1:]:\n", + " ds_unknown_test = ds_unknown_test.concatenate(ds)\n", + "\n", + "# All examples from the unknown datasets will get a new class label number.\n", + "num_normal_classes = len(ds_info.features['label'].names)\n", + "unknown_label_value = tf.convert_to_tensor(num_normal_classes, tf.int64)\n", + "ds_unknown_train = ds_unknown_train.map(lambda image, _:\n", + " (image, unknown_label_value))\n", + "ds_unknown_test = ds_unknown_test.map(lambda image, _:\n", + " (image, unknown_label_value))\n", + "\n", + "# Merge the normal train dataset with the unknown train dataset.\n", + "weights = [\n", + " ds_train.cardinality().numpy(),\n", + " ds_unknown_train.cardinality().numpy()\n", + "]\n", + "ds_train_with_unknown = tf.data.Dataset.sample_from_datasets(\n", + " [ds_train, ds_unknown_train], [float(w) for w in weights])\n", + "ds_train_with_unknown = ds_train_with_unknown.apply(\n", + " tf.data.experimental.assert_cardinality(sum(weights)))\n", + "\n", + "print((f\"Added {ds_unknown_train.cardinality().numpy()} negative examples.\"\n", + " f\"Training dataset has now {ds_train_with_unknown.cardinality().numpy()}\"\n", + " ' examples in total.'))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "am6eKbzt7raH" + }, + "source": [ + "## Apply augmentations" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sxIUP0Flk35V" + }, + "source": [ + "For all the images, to make them more diverse, you'll apply some augmentation, like changes in:\n", + "- Brightness\n", + "- Contrast\n", + "- Saturation\n", + "- Hue\n", + "- Crop\n", + "\n", + "These types of augmentations help make the model more robust to variations in image inputs.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "q_BiOkXjqRju" + }, + "outputs": [], + "source": [ + "def random_crop_and_random_augmentations_fn(image):\n", + " # preprocess_for_train does random crop and resize internally.\n", + " image = image_preprocessing.preprocess_for_train(image)\n", + " image = tf.image.random_brightness(image, 0.2)\n", + " image = tf.image.random_contrast(image, 0.5, 2.0)\n", + " image = tf.image.random_saturation(image, 0.75, 1.25)\n", + " image = tf.image.random_hue(image, 0.1)\n", + " return image\n", + "\n", + "\n", + "def random_crop_fn(image):\n", + " # preprocess_for_train does random crop and resize internally.\n", + " image = image_preprocessing.preprocess_for_train(image)\n", + " return image\n", + "\n", + "\n", + "def resize_and_center_crop_fn(image):\n", + " image = tf.image.resize(image, (256, 256))\n", + " image = image[16:240, 16:240]\n", + " return image\n", + "\n", + "\n", + "no_augment_fn = lambda image: image\n", + "\n", + "train_augment_fn = lambda image, label: (\n", + " random_crop_and_random_augmentations_fn(image), label)\n", + "eval_augment_fn = lambda image, label: (resize_and_center_crop_fn(image), label)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RUfqE1c3l6my" + }, + "source": [ + "To apply the augmentation, it uses the `map` method from the Dataset class." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Uq-NCtaH_h8j" + }, + "outputs": [], + "source": [ + "ds_train_with_unknown = ds_train_with_unknown.map(train_augment_fn)\n", + "ds_validation = ds_validation.map(eval_augment_fn)\n", + "ds_test = ds_test.map(eval_augment_fn)\n", + "ds_unknown_test = ds_unknown_test.map(eval_augment_fn)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DvnwolLiCqYX" + }, + "source": [ + "## Wrap the data into Model Maker friendly format\n", + "\n", + "To use these dataset with Model Maker, they need to be in a ImageClassifierDataLoader class." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OXPWEDFDRlVu" + }, + "outputs": [], + "source": [ + "label_names = ds_info.features['label'].names + ['UNKNOWN']\n", + "\n", + "train_data = ImageClassifierDataLoader(ds_train_with_unknown,\n", + " ds_train_with_unknown.cardinality(),\n", + " label_names)\n", + "validation_data = ImageClassifierDataLoader(ds_validation,\n", + " ds_validation.cardinality(),\n", + " label_names)\n", + "test_data = ImageClassifierDataLoader(ds_test, ds_test.cardinality(),\n", + " label_names)\n", + "unknown_test_data = ImageClassifierDataLoader(ds_unknown_test,\n", + " ds_unknown_test.cardinality(),\n", + " label_names)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j2iDwq2Njpb_" + }, + "source": [ + "## Run training\n", + "\n", + "[TensorFlow Hub](https://tfhub.dev) has multiple models available for Transfer Learning.\n", + "\n", + "Here you can choose one and you can also keep experimenting with other ones to try to get better results.\n", + "\n", + "If you want even more models to try, you can add them from this [collection](https://tfhub.dev/google/collections/image/1).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "5UhNpR0Ex_5-" + }, + "outputs": [], + "source": [ + "#@title Choose a base model\n", + "\n", + "model_name = 'mobilenet_v3_large_100_224' #@param ['cropnet_cassava', 'cropnet_concat', 'cropnet_imagenet', 'mobilenet_v3_large_100_224']\n", + "\n", + "map_model_name = {\n", + " 'cropnet_cassava':\n", + " 'https://tfhub.dev/google/cropnet/feature_vector/cassava_disease_V1/1',\n", + " 'cropnet_concat':\n", + " 'https://tfhub.dev/google/cropnet/feature_vector/concat/1',\n", + " 'cropnet_imagenet':\n", + " 'https://tfhub.dev/google/cropnet/feature_vector/imagenet/1',\n", + " 'mobilenet_v3_large_100_224':\n", + " 'https://tfhub.dev/google/imagenet/mobilenet_v3_large_100_224/feature_vector/5',\n", + "}\n", + "\n", + "model_handle = map_model_name[model_name]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Y1ecXlQgR5Uk" + }, + "source": [ + "To fine tune the model, you will use Model Maker. This makes the overall solution easier since after the training of the model, it'll also convert it to TFLite.\n", + "\n", + "Model Maker makes this conversion be the best one possible and with all the necessary information to easily deploy the model on-device later.\n", + "\n", + "The model spec is how you tell Model Maker which base model you'd like to use." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "L8P-VTqJ8GaF" + }, + "outputs": [], + "source": [ + "image_model_spec = ModelSpec(uri=model_handle)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AnWN3kk6jCHf" + }, + "source": [ + "One important detail here is setting `train_whole_model` which will make the base model fine tuned during training. This makes the process slower but the final model has a higher accuracy. Setting `shuffle` will make sure the model sees the data in a random shuffled order which is a best practice for model learning." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KRbSDbnA6Xap" + }, + "outputs": [], + "source": [ + "model = image_classifier.create(\n", + " train_data,\n", + " model_spec=image_model_spec,\n", + " batch_size=128,\n", + " learning_rate=0.03,\n", + " epochs=5,\n", + " shuffle=True,\n", + " train_whole_model=True,\n", + " validation_data=validation_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "buFDW0izBqIQ" + }, + "source": [ + "## Evaluate model on test split" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OYIZ1rlV7lxm" + }, + "outputs": [], + "source": [ + "model.evaluate(test_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YJaReZ_OVU71" + }, + "source": [ + "To have an even better understanding of the fine tuned model, it's good to analyse the confusion matrix. This will show how often one class is predicted as another." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "o9_vs1nNKOLF" + }, + "outputs": [], + "source": [ + "def predict_class_label_number(dataset):\n", + " \"\"\"Runs inference and returns predictions as class label numbers.\"\"\"\n", + " rev_label_names = {l: i for i, l in enumerate(label_names)}\n", + " return [\n", + " rev_label_names[o[0][0]]\n", + " for o in model.predict_top_k(dataset, batch_size=128)\n", + " ]\n", + "\n", + "def show_confusion_matrix(cm, labels):\n", + " plt.figure(figsize=(10, 8))\n", + " sns.heatmap(cm, xticklabels=labels, yticklabels=labels, \n", + " annot=True, fmt='g')\n", + " plt.xlabel('Prediction')\n", + " plt.ylabel('Label')\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7BWZCKerCNF_" + }, + "outputs": [], + "source": [ + "confusion_mtx = tf.math.confusion_matrix(\n", + " list(ds_test.map(lambda x, y: y)),\n", + " predict_class_label_number(test_data),\n", + " num_classes=len(label_names))\n", + "\n", + "show_confusion_matrix(confusion_mtx, label_names)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ksu9BFULBvmj" + }, + "source": [ + "## Evaluate model on unknown test data\n", + "\n", + "In this evaluation we expect the model to have accuracy of almost 1. All images the model is tested on are not related to the normal dataset and hence we expect the model to predict the \"Unknown\" class label." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "f5wvZwliZcJP" + }, + "outputs": [], + "source": [ + "model.evaluate(unknown_test_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jm47Odo5Vaiq" + }, + "source": [ + "Print the confusion matrix." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "E_gEX3oWH1YT" + }, + "outputs": [], + "source": [ + "unknown_confusion_mtx = tf.math.confusion_matrix(\n", + " list(ds_unknown_test.map(lambda x, y: y)),\n", + " predict_class_label_number(unknown_test_data),\n", + " num_classes=len(label_names))\n", + "\n", + "show_confusion_matrix(unknown_confusion_mtx, label_names)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "o2agDx2fCHyd" + }, + "source": [ + "## Export the model as TFLite and SavedModel\n", + "\n", + "Now we can export the trained models in TFLite and SavedModel formats for deploying on-device and using for inference in TensorFlow." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bAFvBmMr7owW" + }, + "outputs": [], + "source": [ + "tflite_filename = f'{TFLITE_NAME_PREFIX}_model_{model_name}.tflite'\n", + "model.export(export_dir='.', tflite_filename=tflite_filename)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Pz0-6To2C4yM" + }, + "outputs": [], + "source": [ + "# Export saved model version.\n", + "model.export(export_dir='.', export_format=ExportFormat.SAVED_MODEL)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4V4GdQqxjEU7" + }, + "source": [ + "## Next steps\n", + "\n", + "The model that you've just trained can be used on mobile devices and even deployed in the field!\n", + "\n", + "**To download the model, click the folder icon for the Files menu on the left side of the colab, and choose the download option.**\n", + "\n", + "The same technique used here could be applied to other plant diseases tasks that might be more suitable for your use case or any other type of image classification task. If you want to follow up and deploy on an Android app, you can continue on this [Android quickstart guide](https://www.tensorflow.org/lite/android/quickstart)." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "3XX46cTrh6iD", + "xDuDGUAxyHtA" + ], + "name": "cropnet_on_device.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder.ipynb b/site/en/hub/tutorials/cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder.ipynb new file mode 100644 index 00000000000..920d197811e --- /dev/null +++ b/site/en/hub/tutorials/cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder.ipynb @@ -0,0 +1,4463 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "RUymE2l9GZfO" + }, + "source": [ + "**Copyright 2019 The TensorFlow Hub Authors.**\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "code", + "id": "JMyTNwSJGGWg" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "co7MV6sX7Xto" + }, + "source": [ + "# Cross-Lingual Similarity and Semantic Search Engine with Multilingual Universal Sentence Encoder\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eAVQGidpL8v5" + }, + "source": [ + "This notebook illustrates how to access the Multilingual Universal Sentence Encoder module and use it for sentence similarity across multiple languages. This module is an extension of the [original Universal Encoder module](https://tfhub.dev/google/universal-sentence-encoder/2).\n", + "\n", + "The notebook is divided as follows:\n", + "\n", + "* The first section shows a visualization of sentences between pair of languages. This is a more academic exercise. \n", + "* In the second section, we show how to build a semantic search engine from a sample of a Wikipedia corpus in multiple languages." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UvNRbHGarYeR" + }, + "source": [ + "## Citation\n", + "\n", + "*Research papers that make use of the models explored in this colab should cite:*\n", + "\n", + "### [Multilingual universal sentence encoder for semantic retrieval](https://arxiv.org/abs/1907.04307)\n", + "Yinfei Yang, Daniel Cer, Amin Ahmad, Mandy Guo, Jax Law, Noah Constant, Gustavo Hernandez Abrego, Steve Yuan, Chris Tar, Yun-Hsuan Sung, Brian Strope, and Ray Kurzweil. 2019.\n", + " arXiv preprint arXiv:1907.04307" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pOTzp8O36CyQ" + }, + "source": [ + "## Setup\n", + "\n", + "This section sets up the environment for access to the Multilingual Universal Sentence Encoder Module and also prepares a set of English sentences and their translations. In the following sections, the multilingual module will be used to compute similarity *across languages*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "lVjNK8shFKOC" + }, + "outputs": [], + "source": [ + "%%capture\n", + "#@title Setup Environment\n", + "# Install the latest Tensorflow version.\n", + "!pip install \"tensorflow-text==2.11.*\"\n", + "!pip install bokeh\n", + "!pip install simpleneighbors[annoy]\n", + "!pip install tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "MSeY-MUQo2Ha" + }, + "outputs": [], + "source": [ + "#@title Setup common imports and functions\n", + "import bokeh\n", + "import bokeh.models\n", + "import bokeh.plotting\n", + "import numpy as np\n", + "import os\n", + "import pandas as pd\n", + "import tensorflow.compat.v2 as tf\n", + "import tensorflow_hub as hub\n", + "from tensorflow_text import SentencepieceTokenizer\n", + "import sklearn.metrics.pairwise\n", + "\n", + "from simpleneighbors import SimpleNeighbors\n", + "from tqdm import tqdm\n", + "from tqdm import trange\n", + "\n", + "def visualize_similarity(embeddings_1, embeddings_2, labels_1, labels_2,\n", + " plot_title,\n", + " plot_width=1200, plot_height=600,\n", + " xaxis_font_size='12pt', yaxis_font_size='12pt'):\n", + "\n", + " assert len(embeddings_1) == len(labels_1)\n", + " assert len(embeddings_2) == len(labels_2)\n", + "\n", + " # arccos based text similarity (Yang et al. 2019; Cer et al. 2019)\n", + " sim = 1 - np.arccos(\n", + " sklearn.metrics.pairwise.cosine_similarity(embeddings_1,\n", + " embeddings_2))/np.pi\n", + "\n", + " embeddings_1_col, embeddings_2_col, sim_col = [], [], []\n", + " for i in range(len(embeddings_1)):\n", + " for j in range(len(embeddings_2)):\n", + " embeddings_1_col.append(labels_1[i])\n", + " embeddings_2_col.append(labels_2[j])\n", + " sim_col.append(sim[i][j])\n", + " df = pd.DataFrame(zip(embeddings_1_col, embeddings_2_col, sim_col),\n", + " columns=['embeddings_1', 'embeddings_2', 'sim'])\n", + "\n", + " mapper = bokeh.models.LinearColorMapper(\n", + " palette=[*reversed(bokeh.palettes.YlOrRd[9])], low=df.sim.min(),\n", + " high=df.sim.max())\n", + "\n", + " p = bokeh.plotting.figure(title=plot_title, x_range=labels_1,\n", + " x_axis_location=\"above\",\n", + " y_range=[*reversed(labels_2)],\n", + " plot_width=plot_width, plot_height=plot_height,\n", + " tools=\"save\",toolbar_location='below', tooltips=[\n", + " ('pair', '@embeddings_1 ||| @embeddings_2'),\n", + " ('sim', '@sim')])\n", + " p.rect(x=\"embeddings_1\", y=\"embeddings_2\", width=1, height=1, source=df,\n", + " fill_color={'field': 'sim', 'transform': mapper}, line_color=None)\n", + "\n", + " p.title.text_font_size = '12pt'\n", + " p.axis.axis_line_color = None\n", + " p.axis.major_tick_line_color = None\n", + " p.axis.major_label_standoff = 16\n", + " p.xaxis.major_label_text_font_size = xaxis_font_size\n", + " p.xaxis.major_label_orientation = 0.25 * np.pi\n", + " p.yaxis.major_label_text_font_size = yaxis_font_size\n", + " p.min_border_right = 300\n", + "\n", + " bokeh.io.output_notebook()\n", + " bokeh.io.show(p)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gk2IRjZFGDsK" + }, + "source": [ + "This is additional boilerplate code where we import the pre-trained ML model we will use to encode text throughout this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mkmF3w8WGLcM" + }, + "outputs": [], + "source": [ + "# The 16-language multilingual module is the default but feel free\n", + "# to pick others from the list and compare the results.\n", + "module_url = 'https://tfhub.dev/google/universal-sentence-encoder-multilingual/3' #@param ['https://tfhub.dev/google/universal-sentence-encoder-multilingual/3', 'https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/3']\n", + "\n", + "model = hub.load(module_url)\n", + "\n", + "def embed_text(input):\n", + " return model(input)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jhLPq6AROyFk" + }, + "source": [ + "# Visualize Text Similarity Between Languages\n", + "With the sentence embeddings now in hand, we can visualize semantic similarity across different languages." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8xdAogbxJDTD" + }, + "source": [ + "## Computing Text Embeddings\n", + "\n", + "We first define a set of sentences translated to various languages in parallel. Then, we precompute the embeddings for all of our sentences." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Q8F4LNGFqOiq" + }, + "outputs": [], + "source": [ + "# Some texts of different lengths in different languages.\n", + "arabic_sentences = ['كلب', 'الجراء لطيفة.', 'أستمتع بالمشي لمسافات طويلة على طول الشاطئ مع كلبي.']\n", + "chinese_sentences = ['狗', '小狗很好。', '我喜欢和我的狗一起沿着海滩散步。']\n", + "english_sentences = ['dog', 'Puppies are nice.', 'I enjoy taking long walks along the beach with my dog.']\n", + "french_sentences = ['chien', 'Les chiots sont gentils.', 'J\\'aime faire de longues promenades sur la plage avec mon chien.']\n", + "german_sentences = ['Hund', 'Welpen sind nett.', 'Ich genieße lange Spaziergänge am Strand entlang mit meinem Hund.']\n", + "italian_sentences = ['cane', 'I cuccioli sono carini.', 'Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.']\n", + "japanese_sentences = ['犬', '子犬はいいです', '私は犬と一緒にビーチを散歩するのが好きです']\n", + "korean_sentences = ['개', '강아지가 좋다.', '나는 나의 개와 해변을 따라 길게 산책하는 것을 즐긴다.']\n", + "russian_sentences = ['собака', 'Милые щенки.', 'Мне нравится подолгу гулять по пляжу со своей собакой.']\n", + "spanish_sentences = ['perro', 'Los cachorros son agradables.', 'Disfruto de dar largos paseos por la playa con mi perro.']\n", + "\n", + "# Multilingual example\n", + "multilingual_example = [\"Willkommen zu einfachen, aber\", \"verrassend krachtige\", \"multilingüe\", \"compréhension du language naturel\", \"модели.\", \"大家是什么意思\" , \"보다 중요한\", \".اللغة التي يتحدثونها\"]\n", + "multilingual_example_in_en = [\"Welcome to simple yet\", \"surprisingly powerful\", \"multilingual\", \"natural language understanding\", \"models.\", \"What people mean\", \"matters more than\", \"the language they speak.\"]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "weXZqLtTJY9b" + }, + "outputs": [], + "source": [ + "# Compute embeddings.\n", + "ar_result = embed_text(arabic_sentences)\n", + "en_result = embed_text(english_sentences)\n", + "es_result = embed_text(spanish_sentences)\n", + "de_result = embed_text(german_sentences)\n", + "fr_result = embed_text(french_sentences)\n", + "it_result = embed_text(italian_sentences)\n", + "ja_result = embed_text(japanese_sentences)\n", + "ko_result = embed_text(korean_sentences)\n", + "ru_result = embed_text(russian_sentences)\n", + "zh_result = embed_text(chinese_sentences)\n", + "\n", + "multilingual_result = embed_text(multilingual_example)\n", + "multilingual_in_en_result = embed_text(multilingual_example_in_en)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_3zGWuF-GhUm" + }, + "source": [ + "## Visualizing Similarity\n", + "\n", + "With text embeddings in hand, we can take their dot-product to visualize how similar sentences are between languages. A darker color indicates the embeddings are semantically similar." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WOEIJA0mh70g" + }, + "source": [ + "### Multilingual Similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "R2hbCMhmiDWR" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + " var JS_MIME_TYPE = 'application/javascript';\n", + " var HTML_MIME_TYPE = 'text/html';\n", + " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " var CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " var cell = handle.cell;\n", + "\n", + " var id = cell.output_area._bokeh_element_id;\n", + " var server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " var id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + "\n", + " \n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " var NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"
    \\n\"+\n", + " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", + " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", + " \"
\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " var el = document.getElementById(null);\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " \n", + " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n", + " var css_urls = [];\n", + " \n", + "\n", + " var inline_js = [\n", + " function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + " function(Bokeh) {\n", + " \n", + " \n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " \n", + " if (root.Bokeh !== undefined || force === true) {\n", + " \n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + "\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function embed_document(root) {\n", + " \n", + " var docs_json = {\"b33996e2-7bdd-4097-888b-8bf79a526bff\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1013\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1016\",\"type\":\"Grid\"},{\"id\":\"1020\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1017\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1030\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1003\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1023\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1005\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1009\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1007\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1011\",\"type\":\"CategoricalScale\"}},\"id\":\"1002\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"Welcome to simple yet\",\"Welcome to simple yet\",\"Welcome to simple yet\",\"Welcome to simple yet\",\"Welcome to simple yet\",\"Welcome to simple yet\",\"Welcome to simple yet\",\"Welcome to simple yet\",\"surprisingly powerful\",\"surprisingly powerful\",\"surprisingly powerful\",\"surprisingly powerful\",\"surprisingly powerful\",\"surprisingly powerful\",\"surprisingly powerful\",\"surprisingly powerful\",\"multilingual\",\"multilingual\",\"multilingual\",\"multilingual\",\"multilingual\",\"multilingual\",\"multilingual\",\"multilingual\",\"natural language understanding\",\"natural language understanding\",\"natural language understanding\",\"natural language understanding\",\"natural language understanding\",\"natural language understanding\",\"natural language understanding\",\"natural language understanding\",\"models.\",\"models.\",\"models.\",\"models.\",\"models.\",\"models.\",\"models.\",\"models.\",\"What people mean\",\"What people mean\",\"What people mean\",\"What people mean\",\"What people mean\",\"What people mean\",\"What people mean\",\"What people mean\",\"matters more than\",\"matters more than\",\"matters more than\",\"matters more than\",\"matters more than\",\"matters more than\",\"matters more than\",\"matters more than\",\"the language they speak.\",\"the language they speak.\",\"the language they speak.\",\"the language they speak.\",\"the language they speak.\",\"the language they speak.\",\"the language they speak.\",\"the language they speak.\"],\"embeddings_2\":[\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\"],\"index\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63],\"sim\":{\"__ndarray__\":\"AAAAwK4B6j8AAACgVl7iPwAAAMD5l94/AAAAQOVr4T8AAABA8/TfPwAAACDzG+A/AAAAAIa+4T8AAABAMivgPwAAAKBdieE/AAAAgBBH6T8AAAAA5griPwAAAIAUkeE/AAAAAJhp4D8AAACAkSTgPwAAAICvreI/AAAAgIcw4j8AAAAAELXfPwAAAKD19uE/AAAAwH/P6T8AAADAZbjjPwAAAABaX+I/AAAAgG6I4T8AAACAx/viPwAAACBrc+Y/AAAAQApe4T8AAACA32XiPwAAAAD0j+M/AAAAADsK7D8AAADgBnHhPwAAAAAIZeE/AAAAAHZm4T8AAACgXprmPwAAAAB5Jt8/AAAAQMci4D8AAACgPI7hPwAAAKBmSOE/AAAAQArv7D8AAABA7O7fPwAAAID9d+E/AAAAgIRv4T8AAACAN4PgPwAAAMD/yOA/AAAAgEIH4j8AAADAAFbiPwAAAMDwOuE/AAAAQE1v5j8AAABg2bLiPwAAAMDSyOI/AAAAwLXb4D8AAACAztLhPwAAAOB1HeI/AAAAgCd/4T8AAAAg1xHhPwAAAAAPQeE/AAAAAELU6T8AAAAgZjLiPwAAAACyYd8/AAAA4FKw4D8AAACgbUHkPwAAAEDcVeU/AAAAIHV34j8AAAAARznhPwAAAOCXLOE/AAAAQP8H6T8=\",\"dtype\":\"float64\",\"shape\":[64]}},\"selected\":{\"id\":\"1037\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1038\",\"type\":\"UnionRenderers\"}},\"id\":\"1026\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"text\":\"Multilingual Universal Sentence Encoder for Semantic Retrieval (Yang et al., 2019)\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1003\",\"type\":\"Title\"},{\"attributes\":{},\"id\":\"1033\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"callback\":null,\"factors\":[\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"compr\\u00e9hension du langage naturel\",\"multiling\\u00fce\",\"verrassend krachtige\",\"Willkommen zu einfachen, aber\"]},\"id\":\"1007\",\"type\":\"FactorRange\"},{\"attributes\":{\"callback\":null,\"factors\":[\"Welcome to simple yet\",\"surprisingly powerful\",\"multilingual\",\"natural language understanding\",\"models.\",\"What people mean\",\"matters more than\",\"the language they speak.\"]},\"id\":\"1005\",\"type\":\"FactorRange\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1021\",\"type\":\"SaveTool\"},{\"id\":\"1022\",\"type\":\"HoverTool\"}]},\"id\":\"1023\",\"type\":\"Toolbar\"},{\"attributes\":{\"data_source\":{\"id\":\"1026\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1028\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1029\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1031\",\"type\":\"CDSView\"}},\"id\":\"1030\",\"type\":\"GlyphRenderer\"},{\"attributes\":{},\"id\":\"1009\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"source\":{\"id\":\"1026\",\"type\":\"ColumnDataSource\"}},\"id\":\"1031\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"1011\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1035\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1014\",\"type\":\"CategoricalTicker\"}},\"id\":\"1013\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"ticker\":{\"id\":\"1014\",\"type\":\"CategoricalTicker\"}},\"id\":\"1016\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1014\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1037\",\"type\":\"Selection\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1033\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1018\",\"type\":\"CategoricalTicker\"}},\"id\":\"1017\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1038\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"1018\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1035\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1018\",\"type\":\"CategoricalTicker\"}},\"id\":\"1020\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1021\",\"type\":\"SaveTool\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1022\",\"type\":\"HoverTool\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1001\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1028\",\"type\":\"Rect\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1029\",\"type\":\"Rect\"},{\"attributes\":{\"high\":0.9041796922683716,\"low\":0.4780258536338806,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1001\",\"type\":\"LinearColorMapper\"}],\"root_ids\":[\"1002\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n", + " var render_items = [{\"docid\":\"b33996e2-7bdd-4097-888b-8bf79a526bff\",\"roots\":{\"1002\":\"c23bc7cf-8f04-4fa0-a38c-20c29e5098b9\"}}];\n", + " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", + "\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " embed_document(root);\n", + " } else {\n", + " var attempts = 0;\n", + " var timer = setInterval(function(root) {\n", + " if (root.Bokeh !== undefined) {\n", + " clearInterval(timer);\n", + " embed_document(root);\n", + " } else {\n", + " attempts++;\n", + " if (attempts > 100) {\n", + " clearInterval(timer);\n", + " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n", + " }\n", + " }\n", + " }, 10, root)\n", + " }\n", + "})(window);" + ], + "application/vnd.bokehjs_exec.v0+json": "" + }, + "metadata": { + "application/vnd.bokehjs_exec.v0+json": { + "id": "1002" + }, + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "visualize_similarity(multilingual_in_en_result, multilingual_result,\n", + " multilingual_example_in_en, multilingual_example, \"Multilingual Universal Sentence Encoder for Semantic Retrieval (Yang et al., 2019)\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h3TEhllsq3ax" + }, + "source": [ + "### English-Arabic Similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Q9UDpStmq7Ii" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + " var JS_MIME_TYPE = 'application/javascript';\n", + " var HTML_MIME_TYPE = 'text/html';\n", + " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " var CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " var cell = handle.cell;\n", + "\n", + " var id = cell.output_area._bokeh_element_id;\n", + " var server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " var id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + "\n", + " \n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " var NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"
    \\n\"+\n", + " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", + " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", + " \"
\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " var el = document.getElementById(null);\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " \n", + " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n", + " var css_urls = [];\n", + " \n", + "\n", + " var inline_js = [\n", + " function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + " function(Bokeh) {\n", + " \n", + " \n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " \n", + " if (root.Bokeh !== undefined || force === true) {\n", + " \n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + "\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function embed_document(root) {\n", + " \n", + " var docs_json = {\"a8ebb685-fc25-4a41-b1f1-80df179ccab5\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1093\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1096\",\"type\":\"Grid\"},{\"id\":\"1100\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1097\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1110\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1083\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1103\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1085\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1089\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1087\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1091\",\"type\":\"CategoricalScale\"}},\"id\":\"1082\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"data_source\":{\"id\":\"1106\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1108\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1109\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1111\",\"type\":\"CDSView\"}},\"id\":\"1110\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"source\":{\"id\":\"1106\",\"type\":\"ColumnDataSource\"}},\"id\":\"1111\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"1120\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1109\",\"type\":\"Rect\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1102\",\"type\":\"HoverTool\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1101\",\"type\":\"SaveTool\"},{\"id\":\"1102\",\"type\":\"HoverTool\"}]},\"id\":\"1103\",\"type\":\"Toolbar\"},{\"attributes\":{},\"id\":\"1094\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1098\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1098\",\"type\":\"CategoricalTicker\"}},\"id\":\"1100\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1122\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{},\"id\":\"1124\",\"type\":\"Selection\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1120\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1098\",\"type\":\"CategoricalTicker\"}},\"id\":\"1097\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1125\",\"type\":\"UnionRenderers\"},{\"attributes\":{\"ticker\":{\"id\":\"1094\",\"type\":\"CategoricalTicker\"}},\"id\":\"1096\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1091\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1122\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1094\",\"type\":\"CategoricalTicker\"}},\"id\":\"1093\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1089\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"callback\":null,\"factors\":[\"\\u0623\\u0633\\u062a\\u0645\\u062a\\u0639 \\u0628\\u0627\\u0644\\u0645\\u0634\\u064a \\u0644\\u0645\\u0633\\u0627\\u0641\\u0627\\u062a \\u0637\\u0648\\u064a\\u0644\\u0629 \\u0639\\u0644\\u0649 \\u0637\\u0648\\u0644 \\u0627\\u0644\\u0634\\u0627\\u0637\\u0626 \\u0645\\u0639 \\u0643\\u0644\\u0628\\u064a.\",\"\\u0627\\u0644\\u062c\\u0631\\u0627\\u0621 \\u0644\\u0637\\u064a\\u0641\\u0629.\",\"\\u0643\\u0644\\u0628\"]},\"id\":\"1087\",\"type\":\"FactorRange\"},{\"attributes\":{\"callback\":null,\"factors\":[\"dog\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\"]},\"id\":\"1085\",\"type\":\"FactorRange\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1081\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1108\",\"type\":\"Rect\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"dog\",\"dog\",\"dog\",\"Puppies are nice.\",\"Puppies are nice.\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\"],\"embeddings_2\":[\"\\u0643\\u0644\\u0628\",\"\\u0627\\u0644\\u062c\\u0631\\u0627\\u0621 \\u0644\\u0637\\u064a\\u0641\\u0629.\",\"\\u0623\\u0633\\u062a\\u0645\\u062a\\u0639 \\u0628\\u0627\\u0644\\u0645\\u0634\\u064a \\u0644\\u0645\\u0633\\u0627\\u0641\\u0627\\u062a \\u0637\\u0648\\u064a\\u0644\\u0629 \\u0639\\u0644\\u0649 \\u0637\\u0648\\u0644 \\u0627\\u0644\\u0634\\u0627\\u0637\\u0626 \\u0645\\u0639 \\u0643\\u0644\\u0628\\u064a.\",\"\\u0643\\u0644\\u0628\",\"\\u0627\\u0644\\u062c\\u0631\\u0627\\u0621 \\u0644\\u0637\\u064a\\u0641\\u0629.\",\"\\u0623\\u0633\\u062a\\u0645\\u062a\\u0639 \\u0628\\u0627\\u0644\\u0645\\u0634\\u064a \\u0644\\u0645\\u0633\\u0627\\u0641\\u0627\\u062a \\u0637\\u0648\\u064a\\u0644\\u0629 \\u0639\\u0644\\u0649 \\u0637\\u0648\\u0644 \\u0627\\u0644\\u0634\\u0627\\u0637\\u0626 \\u0645\\u0639 \\u0643\\u0644\\u0628\\u064a.\",\"\\u0643\\u0644\\u0628\",\"\\u0627\\u0644\\u062c\\u0631\\u0627\\u0621 \\u0644\\u0637\\u064a\\u0641\\u0629.\",\"\\u0623\\u0633\\u062a\\u0645\\u062a\\u0639 \\u0628\\u0627\\u0644\\u0645\\u0634\\u064a \\u0644\\u0645\\u0633\\u0627\\u0641\\u0627\\u062a \\u0637\\u0648\\u064a\\u0644\\u0629 \\u0639\\u0644\\u0649 \\u0637\\u0648\\u0644 \\u0627\\u0644\\u0634\\u0627\\u0637\\u0626 \\u0645\\u0639 \\u0643\\u0644\\u0628\\u064a.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAQLvE6j8AAABAlJHkPwAAAAAOj+A/AAAAwOqU5D8AAACA9aTnPwAAAMACcuA/AAAAAFtl4j8AAACAFvjiPwAAAIB6QOg/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1124\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1125\",\"type\":\"UnionRenderers\"}},\"id\":\"1106\",\"type\":\"ColumnDataSource\"},{\"attributes\":{},\"id\":\"1101\",\"type\":\"SaveTool\"},{\"attributes\":{\"text\":\"English-Arabic Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1083\",\"type\":\"Title\"},{\"attributes\":{\"high\":0.8365150690078735,\"low\":0.5139173269271851,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1081\",\"type\":\"LinearColorMapper\"}],\"root_ids\":[\"1082\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n", + " var render_items = [{\"docid\":\"a8ebb685-fc25-4a41-b1f1-80df179ccab5\",\"roots\":{\"1082\":\"d20b027b-6bf0-444d-8763-df8d299e1642\"}}];\n", + " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", + "\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " embed_document(root);\n", + " } else {\n", + " var attempts = 0;\n", + " var timer = setInterval(function(root) {\n", + " if (root.Bokeh !== undefined) {\n", + " clearInterval(timer);\n", + " embed_document(root);\n", + " } else {\n", + " attempts++;\n", + " if (attempts > 100) {\n", + " clearInterval(timer);\n", + " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n", + " }\n", + " }\n", + " }, 10, root)\n", + " }\n", + "})(window);" + ], + "application/vnd.bokehjs_exec.v0+json": "" + }, + "metadata": { + "application/vnd.bokehjs_exec.v0+json": { + "id": "1082" + }, + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "visualize_similarity(en_result, ar_result, english_sentences, arabic_sentences, 'English-Arabic Similarity')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QF9z48HMp4WL" + }, + "source": [ + "### Engish-Russian Similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QE68UejYp86z" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + " var JS_MIME_TYPE = 'application/javascript';\n", + " var HTML_MIME_TYPE = 'text/html';\n", + " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " var CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " var cell = handle.cell;\n", + "\n", + " var id = cell.output_area._bokeh_element_id;\n", + " var server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " var id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + "\n", + " \n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " var NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"
    \\n\"+\n", + " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", + " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", + " \"
\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " var el = document.getElementById(null);\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " \n", + " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n", + " var css_urls = [];\n", + " \n", + "\n", + " var inline_js = [\n", + " function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + " function(Bokeh) {\n", + " \n", + " \n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " \n", + " if (root.Bokeh !== undefined || force === true) {\n", + " \n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + "\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function embed_document(root) {\n", + " \n", + " var docs_json = {\"c09fc8f6-d5fa-4ba4-ae4a-0ce276d613ba\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1180\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1183\",\"type\":\"Grid\"},{\"id\":\"1187\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1184\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1197\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1170\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1190\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1172\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1176\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1174\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1178\",\"type\":\"CategoricalScale\"}},\"id\":\"1169\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1189\",\"type\":\"HoverTool\"},{\"attributes\":{},\"id\":\"1218\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"1216\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{},\"id\":\"1219\",\"type\":\"UnionRenderers\"},{\"attributes\":{\"ticker\":{\"id\":\"1181\",\"type\":\"CategoricalTicker\"}},\"id\":\"1183\",\"type\":\"Grid\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1216\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1181\",\"type\":\"CategoricalTicker\"}},\"id\":\"1180\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1178\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1176\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"callback\":null,\"factors\":[\"\\u041c\\u043d\\u0435 \\u043d\\u0440\\u0430\\u0432\\u0438\\u0442\\u0441\\u044f \\u043f\\u043e\\u0434\\u043e\\u043b\\u0433\\u0443 \\u0433\\u0443\\u043b\\u044f\\u0442\\u044c \\u043f\\u043e \\u043f\\u043b\\u044f\\u0436\\u0443 \\u0441\\u043e \\u0441\\u0432\\u043e\\u0435\\u0439 \\u0441\\u043e\\u0431\\u0430\\u043a\\u043e\\u0439.\",\"\\u041c\\u0438\\u043b\\u044b\\u0435 \\u0449\\u0435\\u043d\\u043a\\u0438.\",\"\\u0441\\u043e\\u0431\\u0430\\u043a\\u0430\"]},\"id\":\"1174\",\"type\":\"FactorRange\"},{\"attributes\":{},\"id\":\"1185\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1214\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1185\",\"type\":\"CategoricalTicker\"}},\"id\":\"1184\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1168\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1195\",\"type\":\"Rect\"},{\"attributes\":{\"high\":0.8845847249031067,\"low\":0.5800867676734924,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1168\",\"type\":\"LinearColorMapper\"},{\"attributes\":{},\"id\":\"1181\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"data_source\":{\"id\":\"1193\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1195\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1196\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1198\",\"type\":\"CDSView\"}},\"id\":\"1197\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1188\",\"type\":\"SaveTool\"},{\"id\":\"1189\",\"type\":\"HoverTool\"}]},\"id\":\"1190\",\"type\":\"Toolbar\"},{\"attributes\":{\"callback\":null,\"factors\":[\"dog\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\"]},\"id\":\"1172\",\"type\":\"FactorRange\"},{\"attributes\":{\"text\":\"English-Russian Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1170\",\"type\":\"Title\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1185\",\"type\":\"CategoricalTicker\"}},\"id\":\"1187\",\"type\":\"Grid\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"dog\",\"dog\",\"dog\",\"Puppies are nice.\",\"Puppies are nice.\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\"],\"embeddings_2\":[\"\\u0441\\u043e\\u0431\\u0430\\u043a\\u0430\",\"\\u041c\\u0438\\u043b\\u044b\\u0435 \\u0449\\u0435\\u043d\\u043a\\u0438.\",\"\\u041c\\u043d\\u0435 \\u043d\\u0440\\u0430\\u0432\\u0438\\u0442\\u0441\\u044f \\u043f\\u043e\\u0434\\u043e\\u043b\\u0433\\u0443 \\u0433\\u0443\\u043b\\u044f\\u0442\\u044c \\u043f\\u043e \\u043f\\u043b\\u044f\\u0436\\u0443 \\u0441\\u043e \\u0441\\u0432\\u043e\\u0435\\u0439 \\u0441\\u043e\\u0431\\u0430\\u043a\\u043e\\u0439.\",\"\\u0441\\u043e\\u0431\\u0430\\u043a\\u0430\",\"\\u041c\\u0438\\u043b\\u044b\\u0435 \\u0449\\u0435\\u043d\\u043a\\u0438.\",\"\\u041c\\u043d\\u0435 \\u043d\\u0440\\u0430\\u0432\\u0438\\u0442\\u0441\\u044f \\u043f\\u043e\\u0434\\u043e\\u043b\\u0433\\u0443 \\u0433\\u0443\\u043b\\u044f\\u0442\\u044c \\u043f\\u043e \\u043f\\u043b\\u044f\\u0436\\u0443 \\u0441\\u043e \\u0441\\u0432\\u043e\\u0435\\u0439 \\u0441\\u043e\\u0431\\u0430\\u043a\\u043e\\u0439.\",\"\\u0441\\u043e\\u0431\\u0430\\u043a\\u0430\",\"\\u041c\\u0438\\u043b\\u044b\\u0435 \\u0449\\u0435\\u043d\\u043a\\u0438.\",\"\\u041c\\u043d\\u0435 \\u043d\\u0440\\u0430\\u0432\\u0438\\u0442\\u0441\\u044f \\u043f\\u043e\\u0434\\u043e\\u043b\\u0433\\u0443 \\u0433\\u0443\\u043b\\u044f\\u0442\\u044c \\u043f\\u043e \\u043f\\u043b\\u044f\\u0436\\u0443 \\u0441\\u043e \\u0441\\u0432\\u043e\\u0435\\u0439 \\u0441\\u043e\\u0431\\u0430\\u043a\\u043e\\u0439.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAoIRO7D8AAABgiPvkPwAAAMCko+M/AAAAgDBm5D8AAADAi1DoPwAAAKCr8eI/AAAAIBKQ4j8AAABARq3iPwAAAMAlV+g/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1218\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1219\",\"type\":\"UnionRenderers\"}},\"id\":\"1193\",\"type\":\"ColumnDataSource\"},{\"attributes\":{},\"id\":\"1214\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1196\",\"type\":\"Rect\"},{\"attributes\":{\"source\":{\"id\":\"1193\",\"type\":\"ColumnDataSource\"}},\"id\":\"1198\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"1188\",\"type\":\"SaveTool\"}],\"root_ids\":[\"1169\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n", + " var render_items = [{\"docid\":\"c09fc8f6-d5fa-4ba4-ae4a-0ce276d613ba\",\"roots\":{\"1169\":\"02b6f17f-c980-4d2c-b9d7-58e2d001b1bf\"}}];\n", + " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", + "\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " embed_document(root);\n", + " } else {\n", + " var attempts = 0;\n", + " var timer = setInterval(function(root) {\n", + " if (root.Bokeh !== undefined) {\n", + " clearInterval(timer);\n", + " embed_document(root);\n", + " } else {\n", + " attempts++;\n", + " if (attempts > 100) {\n", + " clearInterval(timer);\n", + " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n", + " }\n", + " }\n", + " }, 10, root)\n", + " }\n", + "})(window);" + ], + "application/vnd.bokehjs_exec.v0+json": "" + }, + "metadata": { + "application/vnd.bokehjs_exec.v0+json": { + "id": "1169" + }, + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "visualize_similarity(en_result, ru_result, english_sentences, russian_sentences, 'English-Russian Similarity')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BJkL6Az0QXNN" + }, + "source": [ + "### English-Spanish Similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CH_BXVGhQ0GL" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + " var JS_MIME_TYPE = 'application/javascript';\n", + " var HTML_MIME_TYPE = 'text/html';\n", + " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " var CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " var cell = handle.cell;\n", + "\n", + " var id = cell.output_area._bokeh_element_id;\n", + " var server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " var id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + "\n", + " \n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " var NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"
    \\n\"+\n", + " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", + " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", + " \"
\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " var el = document.getElementById(null);\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " \n", + " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n", + " var css_urls = [];\n", + " \n", + "\n", + " var inline_js = [\n", + " function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + " function(Bokeh) {\n", + " \n", + " \n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " \n", + " if (root.Bokeh !== undefined || force === true) {\n", + " \n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + "\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function embed_document(root) {\n", + " \n", + " var docs_json = {\"bb848e12-e360-4876-aa19-21896caab34d\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1274\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1277\",\"type\":\"Grid\"},{\"id\":\"1281\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1278\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1291\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1264\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1284\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1266\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1270\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1268\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1272\",\"type\":\"CategoricalScale\"}},\"id\":\"1263\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{},\"id\":\"1272\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1270\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"callback\":null,\"factors\":[\"Disfruto de dar largos paseos por la playa con mi perro.\",\"Los cachorros son agradables.\",\"perro\"]},\"id\":\"1268\",\"type\":\"FactorRange\"},{\"attributes\":{\"callback\":null,\"factors\":[\"dog\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\"]},\"id\":\"1266\",\"type\":\"FactorRange\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1262\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1289\",\"type\":\"Rect\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"dog\",\"dog\",\"dog\",\"Puppies are nice.\",\"Puppies are nice.\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\"],\"embeddings_2\":[\"perro\",\"Los cachorros son agradables.\",\"Disfruto de dar largos paseos por la playa con mi perro.\",\"perro\",\"Los cachorros son agradables.\",\"Disfruto de dar largos paseos por la playa con mi perro.\",\"perro\",\"Los cachorros son agradables.\",\"Disfruto de dar largos paseos por la playa con mi perro.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAA4AIT7T8AAAAAcHfkPwAAAKAngeI/AAAAQNoA5D8AAADAvfvoPwAAAGCGFeI/AAAAgMNr4j8AAAAArbPjPwAAAGCJ1eo/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1319\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1320\",\"type\":\"UnionRenderers\"}},\"id\":\"1287\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"high\":0.9085707068443298,\"low\":0.5651275515556335,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1262\",\"type\":\"LinearColorMapper\"},{\"attributes\":{\"source\":{\"id\":\"1287\",\"type\":\"ColumnDataSource\"}},\"id\":\"1292\",\"type\":\"CDSView\"},{\"attributes\":{\"text\":\"English-Spanish Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1264\",\"type\":\"Title\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1283\",\"type\":\"HoverTool\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1290\",\"type\":\"Rect\"},{\"attributes\":{\"ticker\":{\"id\":\"1275\",\"type\":\"CategoricalTicker\"}},\"id\":\"1277\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1282\",\"type\":\"SaveTool\"},{\"attributes\":{\"data_source\":{\"id\":\"1287\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1289\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1290\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1292\",\"type\":\"CDSView\"}},\"id\":\"1291\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1279\",\"type\":\"CategoricalTicker\"}},\"id\":\"1281\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1315\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1282\",\"type\":\"SaveTool\"},{\"id\":\"1283\",\"type\":\"HoverTool\"}]},\"id\":\"1284\",\"type\":\"Toolbar\"},{\"attributes\":{},\"id\":\"1320\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"1279\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1315\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1279\",\"type\":\"CategoricalTicker\"}},\"id\":\"1278\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1319\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"1317\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1317\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1275\",\"type\":\"CategoricalTicker\"}},\"id\":\"1274\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1275\",\"type\":\"CategoricalTicker\"}],\"root_ids\":[\"1263\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n", + " var render_items = [{\"docid\":\"bb848e12-e360-4876-aa19-21896caab34d\",\"roots\":{\"1263\":\"81e993c9-fc6b-4169-8c6b-a0101097b959\"}}];\n", + " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", + "\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " embed_document(root);\n", + " } else {\n", + " var attempts = 0;\n", + " var timer = setInterval(function(root) {\n", + " if (root.Bokeh !== undefined) {\n", + " clearInterval(timer);\n", + " embed_document(root);\n", + " } else {\n", + " attempts++;\n", + " if (attempts > 100) {\n", + " clearInterval(timer);\n", + " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n", + " }\n", + " }\n", + " }, 10, root)\n", + " }\n", + "})(window);" + ], + "application/vnd.bokehjs_exec.v0+json": "" + }, + "metadata": { + "application/vnd.bokehjs_exec.v0+json": { + "id": "1263" + }, + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "visualize_similarity(en_result, es_result, english_sentences, spanish_sentences, 'English-Spanish Similarity')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "imn28LCiQO7d" + }, + "source": [ + "### English-Italian Similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "X9uD3DirPIGd" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + " var JS_MIME_TYPE = 'application/javascript';\n", + " var HTML_MIME_TYPE = 'text/html';\n", + " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " var CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " var cell = handle.cell;\n", + "\n", + " var id = cell.output_area._bokeh_element_id;\n", + " var server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " var id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + "\n", + " \n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " var NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"
    \\n\"+\n", + " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", + " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", + " \"
\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " var el = document.getElementById(null);\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " \n", + " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n", + " var css_urls = [];\n", + " \n", + "\n", + " var inline_js = [\n", + " function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + " function(Bokeh) {\n", + " \n", + " \n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " \n", + " if (root.Bokeh !== undefined || force === true) {\n", + " \n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + "\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function embed_document(root) {\n", + " \n", + " var docs_json = {\"85d1d6b6-c6cd-4e71-b29b-2cc49ada74c5\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1375\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1378\",\"type\":\"Grid\"},{\"id\":\"1382\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1379\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1392\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1365\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1385\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1367\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1371\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1369\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1373\",\"type\":\"CategoricalScale\"}},\"id\":\"1364\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{},\"id\":\"1371\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1427\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"1373\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1423\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1425\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1376\",\"type\":\"CategoricalTicker\"}},\"id\":\"1375\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"callback\":null,\"factors\":[\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\",\"I cuccioli sono carini.\",\"cane\"]},\"id\":\"1369\",\"type\":\"FactorRange\"},{\"attributes\":{},\"id\":\"1376\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1425\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"ticker\":{\"id\":\"1376\",\"type\":\"CategoricalTicker\"}},\"id\":\"1378\",\"type\":\"Grid\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1423\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1380\",\"type\":\"CategoricalTicker\"}},\"id\":\"1379\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1380\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"dog\",\"dog\",\"dog\",\"Puppies are nice.\",\"Puppies are nice.\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\"],\"embeddings_2\":[\"cane\",\"I cuccioli sono carini.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\",\"cane\",\"I cuccioli sono carini.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\",\"cane\",\"I cuccioli sono carini.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAoHcI7T8AAADAU2/jPwAAAIBLIeM/AAAAAO8N5D8AAAAA5GToPwAAAIDhjeI/AAAAQLlt4j8AAAAAEj3iPwAAAGCPHuw/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1427\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1428\",\"type\":\"UnionRenderers\"}},\"id\":\"1388\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1380\",\"type\":\"CategoricalTicker\"}},\"id\":\"1382\",\"type\":\"Grid\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1383\",\"type\":\"SaveTool\"},{\"id\":\"1384\",\"type\":\"HoverTool\"}]},\"id\":\"1385\",\"type\":\"Toolbar\"},{\"attributes\":{},\"id\":\"1383\",\"type\":\"SaveTool\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1384\",\"type\":\"HoverTool\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1391\",\"type\":\"Rect\"},{\"attributes\":{\"text\":\"English-Italian Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1365\",\"type\":\"Title\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1363\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1390\",\"type\":\"Rect\"},{\"attributes\":{\"callback\":null,\"factors\":[\"dog\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\"]},\"id\":\"1367\",\"type\":\"FactorRange\"},{\"attributes\":{\"data_source\":{\"id\":\"1388\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1390\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1391\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1393\",\"type\":\"CDSView\"}},\"id\":\"1392\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"source\":{\"id\":\"1388\",\"type\":\"ColumnDataSource\"}},\"id\":\"1393\",\"type\":\"CDSView\"},{\"attributes\":{\"high\":0.90728360414505,\"low\":0.5699548721313477,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1363\",\"type\":\"LinearColorMapper\"},{\"attributes\":{},\"id\":\"1428\",\"type\":\"UnionRenderers\"}],\"root_ids\":[\"1364\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n", + " var render_items = [{\"docid\":\"85d1d6b6-c6cd-4e71-b29b-2cc49ada74c5\",\"roots\":{\"1364\":\"5e20475c-62a7-4a19-87ed-a605dc444c96\"}}];\n", + " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", + "\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " embed_document(root);\n", + " } else {\n", + " var attempts = 0;\n", + " var timer = setInterval(function(root) {\n", + " if (root.Bokeh !== undefined) {\n", + " clearInterval(timer);\n", + " embed_document(root);\n", + " } else {\n", + " attempts++;\n", + " if (attempts > 100) {\n", + " clearInterval(timer);\n", + " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n", + " }\n", + " }\n", + " }, 10, root)\n", + " }\n", + "})(window);" + ], + "application/vnd.bokehjs_exec.v0+json": "" + }, + "metadata": { + "application/vnd.bokehjs_exec.v0+json": { + "id": "1364" + }, + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "visualize_similarity(en_result, it_result, english_sentences, italian_sentences, 'English-Italian Similarity')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m6ySvEGbQaTM" + }, + "source": [ + "### Italian-Spanish Similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "irfwIeitQ7V6" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + " var JS_MIME_TYPE = 'application/javascript';\n", + " var HTML_MIME_TYPE = 'text/html';\n", + " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " var CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " var cell = handle.cell;\n", + "\n", + " var id = cell.output_area._bokeh_element_id;\n", + " var server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " var id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + "\n", + " \n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " var NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"
    \\n\"+\n", + " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", + " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", + " \"
\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " var el = document.getElementById(null);\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " \n", + " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n", + " var css_urls = [];\n", + " \n", + "\n", + " var inline_js = [\n", + " function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + " function(Bokeh) {\n", + " \n", + " \n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " \n", + " if (root.Bokeh !== undefined || force === true) {\n", + " \n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + "\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function embed_document(root) {\n", + " \n", + " var docs_json = {\"9eb33289-2019-49f5-b3a2-c34c93bf2800\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1483\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1486\",\"type\":\"Grid\"},{\"id\":\"1490\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1487\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1500\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1473\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1493\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1475\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1479\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1477\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1481\",\"type\":\"CategoricalScale\"}},\"id\":\"1472\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1491\",\"type\":\"SaveTool\"},{\"id\":\"1492\",\"type\":\"HoverTool\"}]},\"id\":\"1493\",\"type\":\"Toolbar\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1492\",\"type\":\"HoverTool\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1499\",\"type\":\"Rect\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1471\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1498\",\"type\":\"Rect\"},{\"attributes\":{\"data_source\":{\"id\":\"1496\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1498\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1499\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1501\",\"type\":\"CDSView\"}},\"id\":\"1500\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"ticker\":{\"id\":\"1484\",\"type\":\"CategoricalTicker\"}},\"id\":\"1486\",\"type\":\"Grid\"},{\"attributes\":{\"source\":{\"id\":\"1496\",\"type\":\"ColumnDataSource\"}},\"id\":\"1501\",\"type\":\"CDSView\"},{\"attributes\":{\"callback\":null,\"factors\":[\"Disfruto de dar largos paseos por la playa con mi perro.\",\"Los cachorros son agradables.\",\"perro\"]},\"id\":\"1477\",\"type\":\"FactorRange\"},{\"attributes\":{},\"id\":\"1491\",\"type\":\"SaveTool\"},{\"attributes\":{\"high\":0.9059451818466187,\"low\":0.564821720123291,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1471\",\"type\":\"LinearColorMapper\"},{\"attributes\":{},\"id\":\"1488\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1540\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1484\",\"type\":\"CategoricalTicker\"}},\"id\":\"1483\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1484\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1538\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1488\",\"type\":\"CategoricalTicker\"}},\"id\":\"1487\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1481\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1543\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"1479\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1542\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"1540\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"text\":\"Italian-Spanish Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1473\",\"type\":\"Title\"},{\"attributes\":{\"callback\":null,\"factors\":[\"cane\",\"I cuccioli sono carini.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\"]},\"id\":\"1475\",\"type\":\"FactorRange\"},{\"attributes\":{},\"id\":\"1538\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"cane\",\"cane\",\"cane\",\"I cuccioli sono carini.\",\"I cuccioli sono carini.\",\"I cuccioli sono carini.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\"],\"embeddings_2\":[\"perro\",\"Los cachorros son agradables.\",\"Disfruto de dar largos paseos por la playa con mi perro.\",\"perro\",\"Los cachorros son agradables.\",\"Disfruto de dar largos paseos por la playa con mi perro.\",\"perro\",\"Los cachorros son agradables.\",\"Disfruto de dar largos paseos por la playa con mi perro.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAwID97D8AAACAiUjkPwAAAICyQuI/AAAAQJl+4z8AAABAqebnPwAAAAAFE+I/AAAAgDrw4j8AAAAg++LjPwAAAEDo1eo/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1542\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1543\",\"type\":\"UnionRenderers\"}},\"id\":\"1496\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1488\",\"type\":\"CategoricalTicker\"}},\"id\":\"1490\",\"type\":\"Grid\"}],\"root_ids\":[\"1472\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n", + " var render_items = [{\"docid\":\"9eb33289-2019-49f5-b3a2-c34c93bf2800\",\"roots\":{\"1472\":\"6f559e42-2dec-4a29-a3d9-62c969a8c08a\"}}];\n", + " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", + "\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " embed_document(root);\n", + " } else {\n", + " var attempts = 0;\n", + " var timer = setInterval(function(root) {\n", + " if (root.Bokeh !== undefined) {\n", + " clearInterval(timer);\n", + " embed_document(root);\n", + " } else {\n", + " attempts++;\n", + " if (attempts > 100) {\n", + " clearInterval(timer);\n", + " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n", + " }\n", + " }\n", + " }, 10, root)\n", + " }\n", + "})(window);" + ], + "application/vnd.bokehjs_exec.v0+json": "" + }, + "metadata": { + "application/vnd.bokehjs_exec.v0+json": { + "id": "1472" + }, + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "visualize_similarity(it_result, es_result, italian_sentences, spanish_sentences, 'Italian-Spanish Similarity')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ueoRO8balwwr" + }, + "source": [ + "### English-Chinese Similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xA7anofVlxL7" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + " var JS_MIME_TYPE = 'application/javascript';\n", + " var HTML_MIME_TYPE = 'text/html';\n", + " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " var CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " var cell = handle.cell;\n", + "\n", + " var id = cell.output_area._bokeh_element_id;\n", + " var server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " var id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + "\n", + " \n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " var NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"
    \\n\"+\n", + " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", + " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", + " \"
\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " var el = document.getElementById(null);\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " \n", + " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n", + " var css_urls = [];\n", + " \n", + "\n", + " var inline_js = [\n", + " function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + " function(Bokeh) {\n", + " \n", + " \n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " \n", + " if (root.Bokeh !== undefined || force === true) {\n", + " \n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + "\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function embed_document(root) {\n", + " \n", + " var docs_json = {\"e65d5833-780d-410f-a7be-915f30e03b59\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1598\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1601\",\"type\":\"Grid\"},{\"id\":\"1605\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1602\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1615\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1588\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1608\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1590\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1594\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1592\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1596\",\"type\":\"CategoricalScale\"}},\"id\":\"1587\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{},\"id\":\"1664\",\"type\":\"Selection\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1603\",\"type\":\"CategoricalTicker\"}},\"id\":\"1605\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1665\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"1603\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1660\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1603\",\"type\":\"CategoricalTicker\"}},\"id\":\"1602\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1662\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1599\",\"type\":\"CategoricalTicker\"}},\"id\":\"1598\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1607\",\"type\":\"HoverTool\"},{\"attributes\":{},\"id\":\"1599\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1596\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1594\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"callback\":null,\"factors\":[\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u72d7\"]},\"id\":\"1592\",\"type\":\"FactorRange\"},{\"attributes\":{\"callback\":null,\"factors\":[\"dog\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\"]},\"id\":\"1590\",\"type\":\"FactorRange\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"dog\",\"dog\",\"dog\",\"Puppies are nice.\",\"Puppies are nice.\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\"],\"embeddings_2\":[\"\\u72d7\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\",\"\\u72d7\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\",\"\\u72d7\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAAF0H7D8AAAAgK6rlPwAAACCmLeM/AAAAgLC95D8AAABAJcfnPwAAAMDQZOM/AAAAAFSj4j8AAACg4TfjPwAAAIBszug/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1664\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1665\",\"type\":\"UnionRenderers\"}},\"id\":\"1611\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1606\",\"type\":\"SaveTool\"},{\"id\":\"1607\",\"type\":\"HoverTool\"}]},\"id\":\"1608\",\"type\":\"Toolbar\"},{\"attributes\":{\"text\":\"English-Chinese Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1588\",\"type\":\"Title\"},{\"attributes\":{},\"id\":\"1662\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"high\":0.8758988380432129,\"low\":0.5824375152587891,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1586\",\"type\":\"LinearColorMapper\"},{\"attributes\":{\"source\":{\"id\":\"1611\",\"type\":\"ColumnDataSource\"}},\"id\":\"1616\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"1660\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"data_source\":{\"id\":\"1611\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1613\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1614\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1616\",\"type\":\"CDSView\"}},\"id\":\"1615\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1586\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1613\",\"type\":\"Rect\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1614\",\"type\":\"Rect\"},{\"attributes\":{},\"id\":\"1606\",\"type\":\"SaveTool\"},{\"attributes\":{\"ticker\":{\"id\":\"1599\",\"type\":\"CategoricalTicker\"}},\"id\":\"1601\",\"type\":\"Grid\"}],\"root_ids\":[\"1587\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n", + " var render_items = [{\"docid\":\"e65d5833-780d-410f-a7be-915f30e03b59\",\"roots\":{\"1587\":\"5b8e7d08-b7e7-4a05-a22d-c27aa1873e6d\"}}];\n", + " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", + "\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " embed_document(root);\n", + " } else {\n", + " var attempts = 0;\n", + " var timer = setInterval(function(root) {\n", + " if (root.Bokeh !== undefined) {\n", + " clearInterval(timer);\n", + " embed_document(root);\n", + " } else {\n", + " attempts++;\n", + " if (attempts > 100) {\n", + " clearInterval(timer);\n", + " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n", + " }\n", + " }\n", + " }, 10, root)\n", + " }\n", + "})(window);" + ], + "application/vnd.bokehjs_exec.v0+json": "" + }, + "metadata": { + "application/vnd.bokehjs_exec.v0+json": { + "id": "1587" + }, + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "visualize_similarity(en_result, zh_result, english_sentences, chinese_sentences, 'English-Chinese Similarity')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8zV1BJc3mL3W" + }, + "source": [ + "### English-Korean Similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iqWy1e1UmQeX" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + " var JS_MIME_TYPE = 'application/javascript';\n", + " var HTML_MIME_TYPE = 'text/html';\n", + " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " var CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " var cell = handle.cell;\n", + "\n", + " var id = cell.output_area._bokeh_element_id;\n", + " var server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " var id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + "\n", + " \n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " var NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"
    \\n\"+\n", + " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", + " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", + " \"
\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " var el = document.getElementById(null);\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " \n", + " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n", + " var css_urls = [];\n", + " \n", + "\n", + " var inline_js = [\n", + " function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + " function(Bokeh) {\n", + " \n", + " \n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " \n", + " if (root.Bokeh !== undefined || force === true) {\n", + " \n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + "\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function embed_document(root) {\n", + " \n", + " var docs_json = {\"1bbe715c-608d-49a6-8927-e818fa752480\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1720\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1723\",\"type\":\"Grid\"},{\"id\":\"1727\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1724\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1737\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1710\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1730\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1712\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1716\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1714\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1718\",\"type\":\"CategoricalScale\"}},\"id\":\"1709\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{},\"id\":\"1794\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"1725\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1789\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1725\",\"type\":\"CategoricalTicker\"}},\"id\":\"1724\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1725\",\"type\":\"CategoricalTicker\"}},\"id\":\"1727\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1721\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1728\",\"type\":\"SaveTool\"},{\"attributes\":{\"callback\":null,\"factors\":[\"dog\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\"]},\"id\":\"1712\",\"type\":\"FactorRange\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1728\",\"type\":\"SaveTool\"},{\"id\":\"1729\",\"type\":\"HoverTool\"}]},\"id\":\"1730\",\"type\":\"Toolbar\"},{\"attributes\":{},\"id\":\"1718\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1793\",\"type\":\"Selection\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1791\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1721\",\"type\":\"CategoricalTicker\"}},\"id\":\"1720\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1729\",\"type\":\"HoverTool\"},{\"attributes\":{},\"id\":\"1716\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1708\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1735\",\"type\":\"Rect\"},{\"attributes\":{\"callback\":null,\"factors\":[\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\uac1c\"]},\"id\":\"1714\",\"type\":\"FactorRange\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1736\",\"type\":\"Rect\"},{\"attributes\":{\"source\":{\"id\":\"1733\",\"type\":\"ColumnDataSource\"}},\"id\":\"1738\",\"type\":\"CDSView\"},{\"attributes\":{\"data_source\":{\"id\":\"1733\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1735\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1736\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1738\",\"type\":\"CDSView\"}},\"id\":\"1737\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"text\":\"English-Korean Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1710\",\"type\":\"Title\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"dog\",\"dog\",\"dog\",\"Puppies are nice.\",\"Puppies are nice.\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\"],\"embeddings_2\":[\"\\uac1c\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\",\"\\uac1c\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\",\"\\uac1c\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAgICf6z8AAAAAKEjlPwAAAKBNk+A/AAAAwFZl5D8AAAAAUpnnPwAAAID69uA/AAAAQJua4j8AAADgCQ3jPwAAAIC9gOg/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1793\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1794\",\"type\":\"UnionRenderers\"}},\"id\":\"1733\",\"type\":\"ColumnDataSource\"},{\"attributes\":{},\"id\":\"1789\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{},\"id\":\"1791\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"ticker\":{\"id\":\"1721\",\"type\":\"CategoricalTicker\"}},\"id\":\"1723\",\"type\":\"Grid\"},{\"attributes\":{\"high\":0.8632204532623291,\"low\":0.5179813504219055,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1708\",\"type\":\"LinearColorMapper\"}],\"root_ids\":[\"1709\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n", + " var render_items = [{\"docid\":\"1bbe715c-608d-49a6-8927-e818fa752480\",\"roots\":{\"1709\":\"7b449243-0dbd-46b6-8b02-a89fdf92645e\"}}];\n", + " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", + "\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " embed_document(root);\n", + " } else {\n", + " var attempts = 0;\n", + " var timer = setInterval(function(root) {\n", + " if (root.Bokeh !== undefined) {\n", + " clearInterval(timer);\n", + " embed_document(root);\n", + " } else {\n", + " attempts++;\n", + " if (attempts > 100) {\n", + " clearInterval(timer);\n", + " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n", + " }\n", + " }\n", + " }, 10, root)\n", + " }\n", + "})(window);" + ], + "application/vnd.bokehjs_exec.v0+json": "" + }, + "metadata": { + "application/vnd.bokehjs_exec.v0+json": { + "id": "1709" + }, + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "visualize_similarity(en_result, ko_result, english_sentences, korean_sentences, 'English-Korean Similarity')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dfTj-JaunFTv" + }, + "source": [ + "### Chinese-Korean Similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MndSgKGPnJuF" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + " var JS_MIME_TYPE = 'application/javascript';\n", + " var HTML_MIME_TYPE = 'text/html';\n", + " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " var CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " var cell = handle.cell;\n", + "\n", + " var id = cell.output_area._bokeh_element_id;\n", + " var server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " var id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + "\n", + " \n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " var NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"
    \\n\"+\n", + " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", + " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", + " \"
\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " var el = document.getElementById(null);\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " \n", + " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n", + " var css_urls = [];\n", + " \n", + "\n", + " var inline_js = [\n", + " function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + " function(Bokeh) {\n", + " \n", + " \n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " \n", + " if (root.Bokeh !== undefined || force === true) {\n", + " \n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + "\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function embed_document(root) {\n", + " \n", + " var docs_json = {\"3d4a0aff-b8c3-43fb-a5af-6dfe1b9d0e1f\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1849\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1852\",\"type\":\"Grid\"},{\"id\":\"1856\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1853\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1866\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1839\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1859\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1841\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1845\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1843\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1847\",\"type\":\"CategoricalScale\"}},\"id\":\"1838\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{},\"id\":\"1925\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1927\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1850\",\"type\":\"CategoricalTicker\"}},\"id\":\"1849\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"callback\":null,\"factors\":[\"\\u72d7\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\"]},\"id\":\"1841\",\"type\":\"FactorRange\"},{\"attributes\":{},\"id\":\"1929\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"1850\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1927\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"ticker\":{\"id\":\"1850\",\"type\":\"CategoricalTicker\"}},\"id\":\"1852\",\"type\":\"Grid\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1925\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1854\",\"type\":\"CategoricalTicker\"}},\"id\":\"1853\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1854\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1854\",\"type\":\"CategoricalTicker\"}},\"id\":\"1856\",\"type\":\"Grid\"},{\"attributes\":{\"text\":\"Chinese-Korean Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1839\",\"type\":\"Title\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1857\",\"type\":\"SaveTool\"},{\"id\":\"1858\",\"type\":\"HoverTool\"}]},\"id\":\"1859\",\"type\":\"Toolbar\"},{\"attributes\":{},\"id\":\"1857\",\"type\":\"SaveTool\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1858\",\"type\":\"HoverTool\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"\\u72d7\",\"\\u72d7\",\"\\u72d7\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\"],\"embeddings_2\":[\"\\uac1c\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\",\"\\uac1c\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\",\"\\uac1c\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAwIKP6z8AAACAHL7lPwAAAKDsSuA/AAAAoN0A5j8AAACgWsboPwAAAIANGeE/AAAAQMFJ4z8AAADA8D7jPwAAAABna+c/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1929\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1930\",\"type\":\"UnionRenderers\"}},\"id\":\"1862\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1865\",\"type\":\"Rect\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1837\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1864\",\"type\":\"Rect\"},{\"attributes\":{},\"id\":\"1847\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"data_source\":{\"id\":\"1862\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1864\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1865\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1867\",\"type\":\"CDSView\"}},\"id\":\"1866\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"source\":{\"id\":\"1862\",\"type\":\"ColumnDataSource\"}},\"id\":\"1867\",\"type\":\"CDSView\"},{\"attributes\":{\"high\":0.8612684011459351,\"low\":0.5091460347175598,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1837\",\"type\":\"LinearColorMapper\"},{\"attributes\":{\"callback\":null,\"factors\":[\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\uac1c\"]},\"id\":\"1843\",\"type\":\"FactorRange\"},{\"attributes\":{},\"id\":\"1845\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1930\",\"type\":\"UnionRenderers\"}],\"root_ids\":[\"1838\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n", + " var render_items = [{\"docid\":\"3d4a0aff-b8c3-43fb-a5af-6dfe1b9d0e1f\",\"roots\":{\"1838\":\"63952aa4-d54a-4445-ad10-ef5bef98f1ef\"}}];\n", + " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", + "\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " embed_document(root);\n", + " } else {\n", + " var attempts = 0;\n", + " var timer = setInterval(function(root) {\n", + " if (root.Bokeh !== undefined) {\n", + " clearInterval(timer);\n", + " embed_document(root);\n", + " } else {\n", + " attempts++;\n", + " if (attempts > 100) {\n", + " clearInterval(timer);\n", + " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n", + " }\n", + " }\n", + " }, 10, root)\n", + " }\n", + "})(window);" + ], + "application/vnd.bokehjs_exec.v0+json": "" + }, + "metadata": { + "application/vnd.bokehjs_exec.v0+json": { + "id": "1838" + }, + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "visualize_similarity(zh_result, ko_result, chinese_sentences, korean_sentences, 'Chinese-Korean Similarity')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rRabHHQYQfLr" + }, + "source": [ + "### And more...\n", + "\n", + "The above examples can be extended to any language pair from **English, Arabic, Chinese, Dutch, French, German, Italian, Japanese, Korean, Polish, Portuguese, Russian, Spanish, Thai and Turkish**. Happy coding!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mxAFAJI9xsAU" + }, + "source": [ + "# Creating a Multilingual Semantic-Similarity Search Engine\n", + "\n", + "Whereas in the previous example we visualized a handful of sentences, in this section we will build a semantic-search index of about 200,000 sentences from a Wikipedia Corpus. About half will be in English and the other half in Spanish to demonstrate the multilingual capabilities of the Universal Sentence Encoder.\n", + "\n", + "## Download Data to Index\n", + "First, we will download news sentences in multiples languages from the [News Commentary Corpus](http://opus.nlpl.eu/News-Commentary-v11.php) [1]. Without loss of generality, this approach should also work for indexing the rest of the supported languages.\n", + "\n", + "To speed up the demo, we limit to 1000 sentences per language." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "587I9ye6yXEU" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading data from http://opus.nlpl.eu/download.php?f=News-Commentary/v11/moses/ar-en.txt.zip\n", + "24715264/24714354 [==============================] - 2s 0us/step\n", + "1,000 Arabic sentences\n", + "Downloading data from http://opus.nlpl.eu/download.php?f=News-Commentary/v11/moses/en-zh.txt.zip\n", + "18104320/18101984 [==============================] - 2s 0us/step\n", + "1,000 Chinese sentences\n", + "Downloading data from http://opus.nlpl.eu/download.php?f=News-Commentary/v11/moses/en-es.txt.zip\n", + "28106752/28106064 [==============================] - 2s 0us/step\n", + "1,000 English sentences\n", + "Downloading data from http://opus.nlpl.eu/download.php?f=News-Commentary/v11/moses/en-ru.txt.zip\n", + "24854528/24849511 [==============================] - 2s 0us/step\n", + "1,000 Russian sentences\n", + "1,000 Spanish sentences\n" + ] + } + ], + "source": [ + "corpus_metadata = [\n", + " ('ar', 'ar-en.txt.zip', 'News-Commentary.ar-en.ar', 'Arabic'),\n", + " ('zh', 'en-zh.txt.zip', 'News-Commentary.en-zh.zh', 'Chinese'),\n", + " ('en', 'en-es.txt.zip', 'News-Commentary.en-es.en', 'English'),\n", + " ('ru', 'en-ru.txt.zip', 'News-Commentary.en-ru.ru', 'Russian'),\n", + " ('es', 'en-es.txt.zip', 'News-Commentary.en-es.es', 'Spanish'),\n", + "]\n", + "\n", + "language_to_sentences = {}\n", + "language_to_news_path = {}\n", + "for language_code, zip_file, news_file, language_name in corpus_metadata:\n", + " zip_path = tf.keras.utils.get_file(\n", + " fname=zip_file,\n", + " origin='http://opus.nlpl.eu/download.php?f=News-Commentary/v11/moses/' + zip_file,\n", + " extract=True)\n", + " news_path = os.path.join(os.path.dirname(zip_path), news_file)\n", + " language_to_sentences[language_code] = pd.read_csv(news_path, sep='\\t', header=None)[0][:1000]\n", + " language_to_news_path[language_code] = news_path\n", + "\n", + " print('{:,} {} sentences'.format(len(language_to_sentences[language_code]), language_name))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m3DIT9uT7Z34" + }, + "source": [ + "## Using a pre-trained model to transform sentences into vectors\n", + "\n", + "We compute embeddings in _batches_ so that they fit in the GPU's RAM." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yRoRT5qCEIYy" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r 0%| | 0/1000 [00:00\n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "U5POcTVNB_dv" + }, + "source": [ + "# HRNet based model for semantic segmentation\n", + "\n", + "In this notebook, you will:\n", + "\n", + "- Choose and load one of the 17 pre-trained HRNet models on different semantic segmentation datasets\n", + "- Run inference to extract features from the model backbone and predictions from the model head" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_XgTpm9ZxoN9" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "import matplotlib.pyplot as plt\n", + "from PIL import Image\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UVtEyxDFpKE1" + }, + "source": [ + "## Loading models from TensorFlow Hub\n", + "\n", + "Here you can choose the pre-trained HRNet model to load, different models means a different training dataset used. All models have the same architecture, except for the model head, which has a different dimension based on the number of classes contained in the training dataset (dataset_output_classes). For more information about the different datasets we refer to the links above and the [factors of influence dataset collection](https://github.com/google-research/google-research/tree/master/factors_of_influence)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "y8_ctG55-uTX" + }, + "outputs": [], + "source": [ + "#@title Choose a pre-trained HRNet model to load.\n", + "\n", + "hrnet_model_name = 'ade20k-hrnetv2-w48/1' #@param [\"ade20k-hrnetv2-w48/1\", \"isprs-hrnetv2-w48/1\", \"vkitti2-hrnetv2-w48/1\", \"vgallery-hrnetv2-w48/1\", \"sunrgbd-hrnetv2-w48/1\", \"suim-hrnetv2-w48/1\", \"scannet-hrnetv2-w48/1\", \"pvoc-hrnetv2-w48/1\", \"msegpcontext-hrnetv2-w48/1\", \"mapillary-hrnetv2-w48/1\", \"kitti-hrnetv2-w48/1\", \"isaid-hrnetv2-w48/1\", \"idd-hrnetv2-w48/1\", \"coco-hrnetv2-w48/1\", \"city-hrnetv2-w48/1\", \"camvid-hrnetv2-w48/1\", \"bdd-hrnetv2-w48/1\"]\n", + "\n", + "tfhub_model_name = 'https://tfhub.dev/google/HRNet/' + hrnet_model_name\n", + "\n", + "print('HRNet model selected :', tfhub_model_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "T-yHJ5X55kWN" + }, + "outputs": [], + "source": [ + "hrnet_model = hub.load(tfhub_model_name)\n", + "\n", + "print('HRNet model loaded :', tfhub_model_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pMP_7v9x6kol" + }, + "source": [ + "## Loading an image and running inference\n", + "\n", + "This is a demonstration on how to run inference for extracting features and predictions from an image. The image was taken from the scene150 dataset.\n", + "\n", + "To perform inference on the datasets that were used during training we refer to the [factors of influence dataset collection](https://github.com/google-research/google-research/tree/master/factors_of_influence)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GNzjieS66td_" + }, + "outputs": [], + "source": [ + "img_file = tf.keras.utils.get_file(origin=\"https://tensorflow.org/images/bedroom_hrnet_tutorial.jpg\")\n", + "img = np.array(Image.open(img_file))/255.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Lp54vD_FZuHw" + }, + "outputs": [], + "source": [ + "plt.imshow(img)\n", + "plt.show()\n", + "\n", + "# Predictions will have shape (batch_size, h, w, dataset_output_classes)\n", + "predictions = hrnet_model.predict([img])\n", + "plt.imshow(predictions[0,:,:,1])\n", + "plt.title('Predictions for class #1')\n", + "plt.show() \n", + "# Features will have shape (batch_size, h/4, w/4, 720)\n", + "features = hrnet_model.get_features([img])\n", + "plt.imshow(features[0,:,:,1])\n", + "plt.title('Feature #1 out of 720')\n", + "plt.show()" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "hrnet_semantic_segmentation.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/image_classification.ipynb b/site/en/hub/tutorials/image_classification.ipynb new file mode 100644 index 00000000000..91aadab727e --- /dev/null +++ b/site/en/hub/tutorials/image_classification.ipynb @@ -0,0 +1,463 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ScitaPqhKtuW" + }, + "source": [ + "##### Copyright 2021 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jvztxQ6VsK2k" + }, + "outputs": [], + "source": [ + "# Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7t7KGfIwHaXz" + }, + "source": [ + "# Image Classification with TensorFlow Hub\n", + "\n", + "In this colab, you'll try multiple image classification models from TensorFlow Hub and decide which one is best for your use case.\n", + "\n", + "Because TF Hub encourages a [consistent input convention](https://www.tensorflow.org/hub/common_saved_model_apis/images#image_input) for models that operate on images, it's easy to experiment with different architectures to find the one that best fits your needs." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "N8H5ufxkc2mk" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "\n", + "import requests\n", + "from PIL import Image\n", + "from io import BytesIO\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "oKvj6lY6kZx8" + }, + "outputs": [], + "source": [ + "#@title Helper functions for loading image (hidden)\n", + "\n", + "original_image_cache = {}\n", + "\n", + "def preprocess_image(image):\n", + " image = np.array(image)\n", + " # reshape into shape [batch_size, height, width, num_channels]\n", + " img_reshaped = tf.reshape(image, [1, image.shape[0], image.shape[1], image.shape[2]])\n", + " # Use `convert_image_dtype` to convert to floats in the [0,1] range.\n", + " image = tf.image.convert_image_dtype(img_reshaped, tf.float32)\n", + " return image\n", + "\n", + "def load_image_from_url(img_url):\n", + " \"\"\"Returns an image with shape [1, height, width, num_channels].\"\"\"\n", + " user_agent = {'User-agent': 'Colab Sample (https://tensorflow.org)'}\n", + " response = requests.get(img_url, headers=user_agent)\n", + " image = Image.open(BytesIO(response.content))\n", + " image = preprocess_image(image)\n", + " return image\n", + "\n", + "def load_image(image_url, image_size=256, dynamic_size=False, max_dynamic_size=512):\n", + " \"\"\"Loads and preprocesses images.\"\"\"\n", + " # Cache image file locally.\n", + " if image_url in original_image_cache:\n", + " img = original_image_cache[image_url]\n", + " elif image_url.startswith('https://'):\n", + " img = load_image_from_url(image_url)\n", + " else:\n", + " fd = tf.io.gfile.GFile(image_url, 'rb')\n", + " img = preprocess_image(Image.open(fd))\n", + " original_image_cache[image_url] = img\n", + " # Load and convert to float32 numpy array, add batch dimension, and normalize to range [0, 1].\n", + " img_raw = img\n", + " if tf.reduce_max(img) > 1.0:\n", + " img = img / 255.\n", + " if len(img.shape) == 3:\n", + " img = tf.stack([img, img, img], axis=-1)\n", + " if not dynamic_size:\n", + " img = tf.image.resize_with_pad(img, image_size, image_size)\n", + " elif img.shape[1] > max_dynamic_size or img.shape[2] > max_dynamic_size:\n", + " img = tf.image.resize_with_pad(img, max_dynamic_size, max_dynamic_size)\n", + " return img, img_raw\n", + "\n", + "def show_image(image, title=''):\n", + " image_size = image.shape[1]\n", + " w = (image_size * 6) // 320\n", + " plt.figure(figsize=(w, w))\n", + " plt.imshow(image[0], aspect='equal')\n", + " plt.axis('off')\n", + " plt.title(title)\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ws1AMDT_CDPq" + }, + "source": [ + "Select an Image Classification Model. After that, some internal variables are set and the labels file is downloaded and prepared for use.\n", + "\n", + "There are some technical differences between the models, like different input size, model size, accuracy, and inference time. Here you can change the model you are using until you find the one most suitable for your use case.\n", + "\n", + "The handle (url) of the model is printed for your convenience. More documentation about each model is available there.\n", + "\n", + "Note: All these models were trained on the ImageNet dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iQ3aamrBfs-c" + }, + "outputs": [], + "source": [ + "#@title Select an Image Classification model\n", + "\n", + "image_size = 224\n", + "dynamic_size = False\n", + "\n", + "model_name = \"efficientnetv2-s\" # @param ['efficientnetv2-s', 'efficientnetv2-m', 'efficientnetv2-l', 'efficientnetv2-s-21k', 'efficientnetv2-m-21k', 'efficientnetv2-l-21k', 'efficientnetv2-xl-21k', 'efficientnetv2-b0-21k', 'efficientnetv2-b1-21k', 'efficientnetv2-b2-21k', 'efficientnetv2-b3-21k', 'efficientnetv2-s-21k-ft1k', 'efficientnetv2-m-21k-ft1k', 'efficientnetv2-l-21k-ft1k', 'efficientnetv2-xl-21k-ft1k', 'efficientnetv2-b0-21k-ft1k', 'efficientnetv2-b1-21k-ft1k', 'efficientnetv2-b2-21k-ft1k', 'efficientnetv2-b3-21k-ft1k', 'efficientnetv2-b0', 'efficientnetv2-b1', 'efficientnetv2-b2', 'efficientnetv2-b3', 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'efficientnet_b3', 'efficientnet_b4', 'efficientnet_b5', 'efficientnet_b6', 'efficientnet_b7', 'bit_s-r50x1', 'inception_v3', 'inception_resnet_v2', 'resnet_v1_50', 'resnet_v1_101', 'resnet_v1_152', 'resnet_v2_50', 'resnet_v2_101', 'resnet_v2_152', 'nasnet_large', 'nasnet_mobile', 'pnasnet_large', 'mobilenet_v2_100_224', 'mobilenet_v2_130_224', 'mobilenet_v2_140_224', 'mobilenet_v3_small_100_224', 'mobilenet_v3_small_075_224', 'mobilenet_v3_large_100_224', 'mobilenet_v3_large_075_224']\n", + "\n", + "model_handle_map = {\n", + " \"efficientnetv2-s\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_s/classification/2\",\n", + " \"efficientnetv2-m\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_m/classification/2\",\n", + " \"efficientnetv2-l\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_l/classification/2\",\n", + " \"efficientnetv2-s-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_s/classification/2\",\n", + " \"efficientnetv2-m-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_m/classification/2\",\n", + " \"efficientnetv2-l-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_l/classification/2\",\n", + " \"efficientnetv2-xl-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_xl/classification/2\",\n", + " \"efficientnetv2-b0-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b0/classification/2\",\n", + " \"efficientnetv2-b1-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b1/classification/2\",\n", + " \"efficientnetv2-b2-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b2/classification/2\",\n", + " \"efficientnetv2-b3-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b3/classification/2\",\n", + " \"efficientnetv2-s-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_s/classification/2\",\n", + " \"efficientnetv2-m-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_m/classification/2\",\n", + " \"efficientnetv2-l-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_l/classification/2\",\n", + " \"efficientnetv2-xl-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_xl/classification/2\",\n", + " \"efficientnetv2-b0-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b0/classification/2\",\n", + " \"efficientnetv2-b1-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b1/classification/2\",\n", + " \"efficientnetv2-b2-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b2/classification/2\",\n", + " \"efficientnetv2-b3-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b3/classification/2\",\n", + " \"efficientnetv2-b0\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b0/classification/2\",\n", + " \"efficientnetv2-b1\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b1/classification/2\",\n", + " \"efficientnetv2-b2\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b2/classification/2\",\n", + " \"efficientnetv2-b3\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b3/classification/2\",\n", + " \"efficientnet_b0\": \"https://tfhub.dev/tensorflow/efficientnet/b0/classification/1\",\n", + " \"efficientnet_b1\": \"https://tfhub.dev/tensorflow/efficientnet/b1/classification/1\",\n", + " \"efficientnet_b2\": \"https://tfhub.dev/tensorflow/efficientnet/b2/classification/1\",\n", + " \"efficientnet_b3\": \"https://tfhub.dev/tensorflow/efficientnet/b3/classification/1\",\n", + " \"efficientnet_b4\": \"https://tfhub.dev/tensorflow/efficientnet/b4/classification/1\",\n", + " \"efficientnet_b5\": \"https://tfhub.dev/tensorflow/efficientnet/b5/classification/1\",\n", + " \"efficientnet_b6\": \"https://tfhub.dev/tensorflow/efficientnet/b6/classification/1\",\n", + " \"efficientnet_b7\": \"https://tfhub.dev/tensorflow/efficientnet/b7/classification/1\",\n", + " \"bit_s-r50x1\": \"https://tfhub.dev/google/bit/s-r50x1/ilsvrc2012_classification/1\",\n", + " \"inception_v3\": \"https://tfhub.dev/google/imagenet/inception_v3/classification/4\",\n", + " \"inception_resnet_v2\": \"https://tfhub.dev/google/imagenet/inception_resnet_v2/classification/4\",\n", + " \"resnet_v1_50\": \"https://tfhub.dev/google/imagenet/resnet_v1_50/classification/4\",\n", + " \"resnet_v1_101\": \"https://tfhub.dev/google/imagenet/resnet_v1_101/classification/4\",\n", + " \"resnet_v1_152\": \"https://tfhub.dev/google/imagenet/resnet_v1_152/classification/4\",\n", + " \"resnet_v2_50\": \"https://tfhub.dev/google/imagenet/resnet_v2_50/classification/4\",\n", + " \"resnet_v2_101\": \"https://tfhub.dev/google/imagenet/resnet_v2_101/classification/4\",\n", + " \"resnet_v2_152\": \"https://tfhub.dev/google/imagenet/resnet_v2_152/classification/4\",\n", + " \"nasnet_large\": \"https://tfhub.dev/google/imagenet/nasnet_large/classification/4\",\n", + " \"nasnet_mobile\": \"https://tfhub.dev/google/imagenet/nasnet_mobile/classification/4\",\n", + " \"pnasnet_large\": \"https://tfhub.dev/google/imagenet/pnasnet_large/classification/4\",\n", + " \"mobilenet_v2_100_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/classification/4\",\n", + " \"mobilenet_v2_130_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v2_130_224/classification/4\",\n", + " \"mobilenet_v2_140_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/classification/4\",\n", + " \"mobilenet_v3_small_100_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_small_100_224/classification/5\",\n", + " \"mobilenet_v3_small_075_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_small_075_224/classification/5\",\n", + " \"mobilenet_v3_large_100_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_large_100_224/classification/5\",\n", + " \"mobilenet_v3_large_075_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_large_075_224/classification/5\",\n", + "}\n", + "\n", + "model_image_size_map = {\n", + " \"efficientnetv2-s\": 384,\n", + " \"efficientnetv2-m\": 480,\n", + " \"efficientnetv2-l\": 480,\n", + " \"efficientnetv2-b0\": 224,\n", + " \"efficientnetv2-b1\": 240,\n", + " \"efficientnetv2-b2\": 260,\n", + " \"efficientnetv2-b3\": 300,\n", + " \"efficientnetv2-s-21k\": 384,\n", + " \"efficientnetv2-m-21k\": 480,\n", + " \"efficientnetv2-l-21k\": 480,\n", + " \"efficientnetv2-xl-21k\": 512,\n", + " \"efficientnetv2-b0-21k\": 224,\n", + " \"efficientnetv2-b1-21k\": 240,\n", + " \"efficientnetv2-b2-21k\": 260,\n", + " \"efficientnetv2-b3-21k\": 300,\n", + " \"efficientnetv2-s-21k-ft1k\": 384,\n", + " \"efficientnetv2-m-21k-ft1k\": 480,\n", + " \"efficientnetv2-l-21k-ft1k\": 480,\n", + " \"efficientnetv2-xl-21k-ft1k\": 512,\n", + " \"efficientnetv2-b0-21k-ft1k\": 224,\n", + " \"efficientnetv2-b1-21k-ft1k\": 240,\n", + " \"efficientnetv2-b2-21k-ft1k\": 260,\n", + " \"efficientnetv2-b3-21k-ft1k\": 300, \n", + " \"efficientnet_b0\": 224,\n", + " \"efficientnet_b1\": 240,\n", + " \"efficientnet_b2\": 260,\n", + " \"efficientnet_b3\": 300,\n", + " \"efficientnet_b4\": 380,\n", + " \"efficientnet_b5\": 456,\n", + " \"efficientnet_b6\": 528,\n", + " \"efficientnet_b7\": 600,\n", + " \"inception_v3\": 299,\n", + " \"inception_resnet_v2\": 299,\n", + " \"mobilenet_v2_100_224\": 224,\n", + " \"mobilenet_v2_130_224\": 224,\n", + " \"mobilenet_v2_140_224\": 224,\n", + " \"nasnet_large\": 331,\n", + " \"nasnet_mobile\": 224,\n", + " \"pnasnet_large\": 331,\n", + " \"resnet_v1_50\": 224,\n", + " \"resnet_v1_101\": 224,\n", + " \"resnet_v1_152\": 224,\n", + " \"resnet_v2_50\": 224,\n", + " \"resnet_v2_101\": 224,\n", + " \"resnet_v2_152\": 224,\n", + " \"mobilenet_v3_small_100_224\": 224,\n", + " \"mobilenet_v3_small_075_224\": 224,\n", + " \"mobilenet_v3_large_100_224\": 224,\n", + " \"mobilenet_v3_large_075_224\": 224,\n", + "}\n", + "\n", + "model_handle = model_handle_map[model_name]\n", + "\n", + "print(f\"Selected model: {model_name} : {model_handle}\")\n", + "\n", + "\n", + "max_dynamic_size = 512\n", + "if model_name in model_image_size_map:\n", + " image_size = model_image_size_map[model_name]\n", + " dynamic_size = False\n", + " print(f\"Images will be converted to {image_size}x{image_size}\")\n", + "else:\n", + " dynamic_size = True\n", + " print(f\"Images will be capped to a max size of {max_dynamic_size}x{max_dynamic_size}\")\n", + "\n", + "labels_file = \"https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt\"\n", + "\n", + "#download labels and creates a maps\n", + "downloaded_file = tf.keras.utils.get_file(\"labels.txt\", origin=labels_file)\n", + "\n", + "classes = []\n", + "\n", + "with open(downloaded_file) as f:\n", + " labels = f.readlines()\n", + " classes = [l.strip() for l in labels]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vxcASidjBAE8" + }, + "source": [ + "You can select one of the images below, or use your own image. Just remember that the input size for the models vary and some of them use a dynamic input size (enabling inference on the unscaled image). Given that, the method `load_image` will already rescale the image to the expected format." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "o2rMsr4CgET2" + }, + "outputs": [], + "source": [ + "#@title Select an Input Image\n", + "\n", + "image_name = \"turtle\" # @param ['tiger', 'bus', 'car', 'cat', 'dog', 'apple', 'banana', 'turtle', 'flamingo', 'piano', 'honeycomb', 'teapot']\n", + "\n", + "images_for_test_map = {\n", + " \"tiger\": \"https://upload.wikimedia.org/wikipedia/commons/b/b0/Bengal_tiger_%28Panthera_tigris_tigris%29_female_3_crop.jpg\",\n", + " #by Charles James Sharp, CC BY-SA 4.0 , via Wikimedia Commons\n", + " \"bus\": \"https://upload.wikimedia.org/wikipedia/commons/6/63/LT_471_%28LTZ_1471%29_Arriva_London_New_Routemaster_%2819522859218%29.jpg\",\n", + " #by Martin49 from London, England, CC BY 2.0 , via Wikimedia Commons\n", + " \"car\": \"https://upload.wikimedia.org/wikipedia/commons/4/49/2013-2016_Toyota_Corolla_%28ZRE172R%29_SX_sedan_%282018-09-17%29_01.jpg\",\n", + " #by EurovisionNim, CC BY-SA 4.0 , via Wikimedia Commons\n", + " \"cat\": \"https://upload.wikimedia.org/wikipedia/commons/4/4d/Cat_November_2010-1a.jpg\",\n", + " #by Alvesgaspar, CC BY-SA 3.0 , via Wikimedia Commons\n", + " \"dog\": \"https://upload.wikimedia.org/wikipedia/commons/archive/a/a9/20090914031557%21Saluki_dog_breed.jpg\",\n", + " #by Craig Pemberton, CC BY-SA 3.0 , via Wikimedia Commons\n", + " \"apple\": \"https://upload.wikimedia.org/wikipedia/commons/1/15/Red_Apple.jpg\",\n", + " #by Abhijit Tembhekar from Mumbai, India, CC BY 2.0 , via Wikimedia Commons\n", + " \"banana\": \"https://upload.wikimedia.org/wikipedia/commons/1/1c/Bananas_white_background.jpg\",\n", + " #by fir0002 flagstaffotos [at] gmail.com\t\tCanon 20D + Tamron 28-75mm f/2.8, GFDL 1.2 , via Wikimedia Commons\n", + " \"turtle\": \"https://upload.wikimedia.org/wikipedia/commons/8/80/Turtle_golfina_escobilla_oaxaca_mexico_claudio_giovenzana_2010.jpg\",\n", + " #by Claudio Giovenzana, CC BY-SA 3.0 , via Wikimedia Commons\n", + " \"flamingo\": \"https://upload.wikimedia.org/wikipedia/commons/b/b8/James_Flamingos_MC.jpg\",\n", + " #by Christian Mehlführer, User:Chmehl, CC BY 3.0 , via Wikimedia Commons\n", + " \"piano\": \"https://upload.wikimedia.org/wikipedia/commons/d/da/Steinway_%26_Sons_upright_piano%2C_model_K-132%2C_manufactured_at_Steinway%27s_factory_in_Hamburg%2C_Germany.png\",\n", + " #by \"Photo: © Copyright Steinway & Sons\", CC BY-SA 3.0 , via Wikimedia Commons\n", + " \"honeycomb\": \"https://upload.wikimedia.org/wikipedia/commons/f/f7/Honey_comb.jpg\",\n", + " #by Merdal, CC BY-SA 3.0 , via Wikimedia Commons\n", + " \"teapot\": \"https://upload.wikimedia.org/wikipedia/commons/4/44/Black_tea_pot_cropped.jpg\",\n", + " #by Mendhak, CC BY-SA 2.0 , via Wikimedia Commons\n", + "}\n", + "\n", + "img_url = images_for_test_map[image_name]\n", + "image, original_image = load_image(img_url, image_size, dynamic_size, max_dynamic_size)\n", + "show_image(image, 'Scaled image')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CMwWx8_8Aw3X" + }, + "source": [ + "Now that the model was chosen, loading it with TensorFlow Hub is simple.\n", + "\n", + "This also calls the model with a random input as a \"warmup\" run. Subsequent calls are often much faster, and you can compare this with the latency below.\n", + "\n", + "*Note:* models that use a dynamic size might need a fresh \"warmup\" run for each image size." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LRAccT3UhRga" + }, + "outputs": [], + "source": [ + "classifier = hub.load(model_handle)\n", + "\n", + "input_shape = image.shape\n", + "warmup_input = tf.random.uniform(input_shape, 0, 1.0)\n", + "%time warmup_logits = classifier(warmup_input).numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e7vkdUqpBkfE" + }, + "source": [ + "Everything is ready for inference. Here you can see the top 5 results from the model for the selected image." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "I0QNHg3bk-G1" + }, + "outputs": [], + "source": [ + "# Run model on image\n", + "%time probabilities = tf.nn.softmax(classifier(image)).numpy()\n", + "\n", + "top_5 = tf.argsort(probabilities, axis=-1, direction=\"DESCENDING\")[0][:5].numpy()\n", + "np_classes = np.array(classes)\n", + "\n", + "# Some models include an additional 'background' class in the predictions, so\n", + "# we must account for this when reading the class labels.\n", + "includes_background_class = probabilities.shape[1] == 1001\n", + "\n", + "for i, item in enumerate(top_5):\n", + " class_index = item if includes_background_class else item + 1\n", + " line = f'({i+1}) {class_index:4} - {classes[class_index]}: {probabilities[0][top_5][i]}'\n", + " print(line)\n", + "\n", + "show_image(image, '')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e4IJrq5eZDWl" + }, + "source": [ + "## Learn More\n", + "\n", + "If you want to learn more and try how to do Transfer Learning with these models you can try this tutorial: [Transfer Learning for Image classification](https://www.tensorflow.org/hub/tutorials/tf2_image_retraining) \n", + "\n", + "If you want to check on more image models you can check them out on [tfhub.dev](https://tfhub.dev/s?module-type=image-augmentation,image-classification,image-classification-logits,image-classifier,image-feature-vector,image-generator,image-object-detection,image-others,image-pose-detection,image-segmentation,image-style-transfer,image-super-resolution,image-rnn-agent)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "image_classification.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/image_enhancing.ipynb b/site/en/hub/tutorials/image_enhancing.ipynb new file mode 100644 index 00000000000..3710ebd6d66 --- /dev/null +++ b/site/en/hub/tutorials/image_enhancing.ipynb @@ -0,0 +1,455 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "GeerbrLA0uju" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "\n", + "Created by @[Adrish Dey](https://github.com/captain-pool) for [Google Summer of Code](https://summerofcode.withgoogle.com/) 2019" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yntM0JbY0uj5" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS, \n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UJeo2a5C0uj2" + }, + "source": [ + "# Image Super Resolution using ESRGAN" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ps4toA1d_tkc" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LkW9jAmt_zjB" + }, + "source": [ + "This colab demonstrates use of TensorFlow Hub Module for Enhanced Super Resolution Generative Adversarial Network (*by Xintao Wang et.al.*) [[Paper](https://arxiv.org/pdf/1809.00219.pdf)] [[Code](https://github.com/captain-pool/GSOC/)]\n", + "\n", + "for image enhancing. *(Preferrably bicubically downsampled images).*\n", + "\n", + "Model trained on DIV2K Dataset (on bicubically downsampled images) on image patches of size 128 x 128." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LBGty4O_0ukJ" + }, + "source": [ + "**Preparing Environment**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lnyLTyUt0ukN" + }, + "outputs": [], + "source": [ + "import os\n", + "import time\n", + "from PIL import Image\n", + "import numpy as np\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "import matplotlib.pyplot as plt\n", + "os.environ[\"TFHUB_DOWNLOAD_PROGRESS\"] = \"True\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dremsFdh0ukX" + }, + "outputs": [], + "source": [ + "!wget \"https://user-images.githubusercontent.com/12981474/40157448-eff91f06-5953-11e8-9a37-f6b5693fa03f.png\" -O original.png" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DXot2kru0ukh" + }, + "outputs": [], + "source": [ + "# Declaring Constants\n", + "IMAGE_PATH = \"original.png\"\n", + "SAVED_MODEL_PATH = \"https://tfhub.dev/captain-pool/esrgan-tf2/1\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KF_tHde-p3rn" + }, + "source": [ + "**Defining Helper Functions**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IslbQmTj0ukz" + }, + "outputs": [], + "source": [ + "def preprocess_image(image_path):\n", + " \"\"\" Loads image from path and preprocesses to make it model ready\n", + " Args:\n", + " image_path: Path to the image file\n", + " \"\"\"\n", + " hr_image = tf.image.decode_image(tf.io.read_file(image_path))\n", + " # If PNG, remove the alpha channel. The model only supports\n", + " # images with 3 color channels.\n", + " if hr_image.shape[-1] == 4:\n", + " hr_image = hr_image[...,:-1]\n", + " hr_size = (tf.convert_to_tensor(hr_image.shape[:-1]) // 4) * 4\n", + " hr_image = tf.image.crop_to_bounding_box(hr_image, 0, 0, hr_size[0], hr_size[1])\n", + " hr_image = tf.cast(hr_image, tf.float32)\n", + " return tf.expand_dims(hr_image, 0)\n", + "\n", + "def save_image(image, filename):\n", + " \"\"\"\n", + " Saves unscaled Tensor Images.\n", + " Args:\n", + " image: 3D image tensor. [height, width, channels]\n", + " filename: Name of the file to save.\n", + " \"\"\"\n", + " if not isinstance(image, Image.Image):\n", + " image = tf.clip_by_value(image, 0, 255)\n", + " image = Image.fromarray(tf.cast(image, tf.uint8).numpy())\n", + " image.save(\"%s.jpg\" % filename)\n", + " print(\"Saved as %s.jpg\" % filename)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uh1E2rBpnWxV" + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "def plot_image(image, title=\"\"):\n", + " \"\"\"\n", + " Plots images from image tensors.\n", + " Args:\n", + " image: 3D image tensor. [height, width, channels].\n", + " title: Title to display in the plot.\n", + " \"\"\"\n", + " image = np.asarray(image)\n", + " image = tf.clip_by_value(image, 0, 255)\n", + " image = Image.fromarray(tf.cast(image, tf.uint8).numpy())\n", + " plt.imshow(image)\n", + " plt.axis(\"off\")\n", + " plt.title(title)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ycrCTvmlqBMD" + }, + "source": [ + "#### Performing Super Resolution of images loaded from path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "L7XpMk8Y0uk7" + }, + "outputs": [], + "source": [ + "hr_image = preprocess_image(IMAGE_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hWgCbUa_0ulG" + }, + "outputs": [], + "source": [ + "# Plotting Original Resolution image\n", + "plot_image(tf.squeeze(hr_image), title=\"Original Image\")\n", + "save_image(tf.squeeze(hr_image), filename=\"Original Image\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ouwEyKLA0ulO" + }, + "outputs": [], + "source": [ + "model = hub.load(SAVED_MODEL_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dz79ncnT0ulX" + }, + "outputs": [], + "source": [ + "start = time.time()\n", + "fake_image = model(hr_image)\n", + "fake_image = tf.squeeze(fake_image)\n", + "print(\"Time Taken: %f\" % (time.time() - start))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ABjkkJHC2jNL" + }, + "outputs": [], + "source": [ + "# Plotting Super Resolution Image\n", + "plot_image(tf.squeeze(fake_image), title=\"Super Resolution\")\n", + "save_image(tf.squeeze(fake_image), filename=\"Super Resolution\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tuKu18UYptkx" + }, + "source": [ + "### Evaluating Performance of the Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Qdz55sxMgiwO" + }, + "outputs": [], + "source": [ + "!wget \"https://lh4.googleusercontent.com/-Anmw5df4gj0/AAAAAAAAAAI/AAAAAAAAAAc/6HxU8XFLnQE/photo.jpg64\" -O test.jpg\n", + "IMAGE_PATH = \"test.jpg\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "F6tMNtqy0ukq" + }, + "outputs": [], + "source": [ + "# Defining helper functions\n", + "def downscale_image(image):\n", + " \"\"\"\n", + " Scales down images using bicubic downsampling.\n", + " Args:\n", + " image: 3D or 4D tensor of preprocessed image\n", + " \"\"\"\n", + " image_size = []\n", + " if len(image.shape) == 3:\n", + " image_size = [image.shape[1], image.shape[0]]\n", + " else:\n", + " raise ValueError(\"Dimension mismatch. Can work only on single image.\")\n", + "\n", + " image = tf.squeeze(\n", + " tf.cast(\n", + " tf.clip_by_value(image, 0, 255), tf.uint8))\n", + "\n", + " lr_image = np.asarray(\n", + " Image.fromarray(image.numpy())\n", + " .resize([image_size[0] // 4, image_size[1] // 4],\n", + " Image.BICUBIC))\n", + "\n", + " lr_image = tf.expand_dims(lr_image, 0)\n", + " lr_image = tf.cast(lr_image, tf.float32)\n", + " return lr_image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "r2ANR1XDy77I" + }, + "outputs": [], + "source": [ + "hr_image = preprocess_image(IMAGE_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "r_defaultO6qbTV" + }, + "outputs": [], + "source": [ + "lr_image = downscale_image(tf.squeeze(hr_image))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jRw1x6xY0ulj" + }, + "outputs": [], + "source": [ + "# Plotting Low Resolution Image\n", + "plot_image(tf.squeeze(lr_image), title=\"Low Resolution\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "g--yyHg7qXCw" + }, + "outputs": [], + "source": [ + "model = hub.load(SAVED_MODEL_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZX-deZlhqaYz" + }, + "outputs": [], + "source": [ + "start = time.time()\n", + "fake_image = model(lr_image)\n", + "fake_image = tf.squeeze(fake_image)\n", + "print(\"Time Taken: %f\" % (time.time() - start))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AmSga6MSq1PB" + }, + "outputs": [], + "source": [ + "plot_image(tf.squeeze(fake_image), title=\"Super Resolution\")\n", + "# Calculating PSNR wrt Original Image\n", + "psnr = tf.image.psnr(\n", + " tf.clip_by_value(fake_image, 0, 255),\n", + " tf.clip_by_value(hr_image, 0, 255), max_val=255)\n", + "print(\"PSNR Achieved: %f\" % psnr)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5YTBKCXPq9UZ" + }, + "source": [ + "**Comparing Outputs size by side.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ubdupldDypCy" + }, + "outputs": [], + "source": [ + "plt.rcParams['figure.figsize'] = [15, 10]\n", + "fig, axes = plt.subplots(1, 3)\n", + "fig.tight_layout()\n", + "plt.subplot(131)\n", + "plot_image(tf.squeeze(hr_image), title=\"Original\")\n", + "plt.subplot(132)\n", + "fig.tight_layout()\n", + "plot_image(tf.squeeze(lr_image), \"x4 Bicubic\")\n", + "plt.subplot(133)\n", + "fig.tight_layout()\n", + "plot_image(tf.squeeze(fake_image), \"Super Resolution\")\n", + "plt.savefig(\"ESRGAN_DIV2K.jpg\", bbox_inches=\"tight\")\n", + "print(\"PSNR: %f\" % psnr)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "image_enhancing.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/image_feature_vector.ipynb b/site/en/hub/tutorials/image_feature_vector.ipynb new file mode 100644 index 00000000000..b5283c45b3d --- /dev/null +++ b/site/en/hub/tutorials/image_feature_vector.ipynb @@ -0,0 +1,533 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ScitaPqhKtuW" + }, + "source": [ + "##### Copyright 2018 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bNnChGfZK2_w" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9Z_ZvMk5JPFV" + }, + "source": [ + "# Classify Flowers with Transfer Learning\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gh-LWtlqLtgH" + }, + "source": [ + "Have you ever seen a beautiful flower and wondered what kind of flower it is? Well, you're not the first, so let's build a way to identify the type of flower from a photo!\n", + "\n", + "For classifying images, a particular type of *deep neural network*, called a *convolutional neural network* has proved to be particularly powerful. However, modern convolutional neural networks have millions of parameters. Training them from scratch requires a lot of labeled training data and a lot of computing power (hundreds of GPU-hours or more). We only have about three thousand labeled photos and want to spend much less time, so we need to be more clever.\n", + "\n", + "We will use a technique called *transfer learning* where we take a pre-trained network (trained on about a million general images), use it to extract features, and train a new layer on top for our own task of classifying images of flowers.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ORy-KvWXGXBo" + }, + "source": [ + "## Setup\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NTrs9zBKJK1c" + }, + "outputs": [], + "source": [ + "import collections\n", + "import io\n", + "import math\n", + "import os\n", + "import random\n", + "from six.moves import urllib\n", + "\n", + "from IPython.display import clear_output, Image, display, HTML\n", + "\n", + "import tensorflow.compat.v1 as tf\n", + "tf.disable_v2_behavior()\n", + "\n", + "import tensorflow_hub as hub\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import sklearn.metrics as sk_metrics\n", + "import time" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Do-T63G7NCSB" + }, + "source": [ + "## The flowers dataset\n", + "\n", + "The flowers dataset consists of images of flowers with 5 possible class labels.\n", + "\n", + "When training a machine learning model, we split our data into training and test datasets. We will train the model on our training data and then evaluate how well the model performs on data it has never seen - the test set.\n", + "\n", + "Let's download our training and test examples (it may take a while) and split them into train and test sets.\n", + "\n", + "Run the following two cells:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "HYQr1SILIxSK" + }, + "outputs": [], + "source": [ + "FLOWERS_DIR = './flower_photos'\n", + "TRAIN_FRACTION = 0.8\n", + "RANDOM_SEED = 2018\n", + "\n", + "\n", + "def download_images():\n", + " \"\"\"If the images aren't already downloaded, save them to FLOWERS_DIR.\"\"\"\n", + " if not os.path.exists(FLOWERS_DIR):\n", + " DOWNLOAD_URL = 'http://download.tensorflow.org/example_images/flower_photos.tgz'\n", + " print('Downloading flower images from %s...' % DOWNLOAD_URL)\n", + " urllib.request.urlretrieve(DOWNLOAD_URL, 'flower_photos.tgz')\n", + " !tar xfz flower_photos.tgz\n", + " print('Flower photos are located in %s' % FLOWERS_DIR)\n", + "\n", + "\n", + "def make_train_and_test_sets():\n", + " \"\"\"Split the data into train and test sets and get the label classes.\"\"\"\n", + " train_examples, test_examples = [], []\n", + " shuffler = random.Random(RANDOM_SEED)\n", + " is_root = True\n", + " for (dirname, subdirs, filenames) in tf.gfile.Walk(FLOWERS_DIR):\n", + " # The root directory gives us the classes\n", + " if is_root:\n", + " subdirs = sorted(subdirs)\n", + " classes = collections.OrderedDict(enumerate(subdirs))\n", + " label_to_class = dict([(x, i) for i, x in enumerate(subdirs)])\n", + " is_root = False\n", + " # The sub directories give us the image files for training.\n", + " else:\n", + " filenames.sort()\n", + " shuffler.shuffle(filenames)\n", + " full_filenames = [os.path.join(dirname, f) for f in filenames]\n", + " label = dirname.split('/')[-1]\n", + " label_class = label_to_class[label]\n", + " # An example is the image file and it's label class.\n", + " examples = list(zip(full_filenames, [label_class] * len(filenames)))\n", + " num_train = int(len(filenames) * TRAIN_FRACTION)\n", + " train_examples.extend(examples[:num_train])\n", + " test_examples.extend(examples[num_train:])\n", + "\n", + " shuffler.shuffle(train_examples)\n", + " shuffler.shuffle(test_examples)\n", + " return train_examples, test_examples, classes\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_9NklpcANhtB" + }, + "outputs": [], + "source": [ + "# Download the images and split the images into train and test sets.\n", + "download_images()\n", + "TRAIN_EXAMPLES, TEST_EXAMPLES, CLASSES = make_train_and_test_sets()\n", + "NUM_CLASSES = len(CLASSES)\n", + "\n", + "print('\\nThe dataset has %d label classes: %s' % (NUM_CLASSES, CLASSES.values()))\n", + "print('There are %d training images' % len(TRAIN_EXAMPLES))\n", + "print('there are %d test images' % len(TEST_EXAMPLES))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tHF7bHTfnD6S" + }, + "source": [ + "## Explore the data\n", + "\n", + "The flowers dataset consists of examples which are labeled images of flowers. Each example contains a JPEG flower image and the class label: what type of flower it is. Let's display a few images together with their labels." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "1friUvN6kPYM" + }, + "outputs": [], + "source": [ + "#@title Show some labeled images\n", + "def get_label(example):\n", + " \"\"\"Get the label (number) for given example.\"\"\"\n", + " return example[1]\n", + "\n", + "def get_class(example):\n", + " \"\"\"Get the class (string) of given example.\"\"\"\n", + " return CLASSES[get_label(example)]\n", + "\n", + "def get_encoded_image(example):\n", + " \"\"\"Get the image data (encoded jpg) of given example.\"\"\"\n", + " image_path = example[0]\n", + " return tf.gfile.GFile(image_path, 'rb').read()\n", + "\n", + "def get_image(example):\n", + " \"\"\"Get image as np.array of pixels for given example.\"\"\"\n", + " return plt.imread(io.BytesIO(get_encoded_image(example)), format='jpg')\n", + "\n", + "def display_images(images_and_classes, cols=5):\n", + " \"\"\"Display given images and their labels in a grid.\"\"\"\n", + " rows = int(math.ceil(len(images_and_classes) / cols))\n", + " fig = plt.figure()\n", + " fig.set_size_inches(cols * 3, rows * 3)\n", + " for i, (image, flower_class) in enumerate(images_and_classes):\n", + " plt.subplot(rows, cols, i + 1)\n", + " plt.axis('off')\n", + " plt.imshow(image)\n", + " plt.title(flower_class)\n", + "\n", + "NUM_IMAGES = 15 #@param {type: 'integer'}\n", + "display_images([(get_image(example), get_class(example))\n", + " for example in TRAIN_EXAMPLES[:NUM_IMAGES]])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Hyjr6PuboTAg" + }, + "source": [ + "## Build the model\n", + "\n", + "We will load a [TF-Hub](https://tensorflow.org/hub) image feature vector module, stack a linear classifier on it, and add training and evaluation ops. The following cell builds a TF graph describing the model and its training, but it doesn't run the training (that will be the next step)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LbkSRaK_oW5Y" + }, + "outputs": [], + "source": [ + "LEARNING_RATE = 0.01\n", + "\n", + "tf.reset_default_graph()\n", + "\n", + "# Load a pre-trained TF-Hub module for extracting features from images. We've\n", + "# chosen this particular module for speed, but many other choices are available.\n", + "image_module = hub.Module('https://tfhub.dev/google/imagenet/mobilenet_v2_035_128/feature_vector/2')\n", + "\n", + "# Preprocessing images into tensors with size expected by the image module.\n", + "encoded_images = tf.placeholder(tf.string, shape=[None])\n", + "image_size = hub.get_expected_image_size(image_module)\n", + "\n", + "\n", + "def decode_and_resize_image(encoded):\n", + " decoded = tf.image.decode_jpeg(encoded, channels=3)\n", + " decoded = tf.image.convert_image_dtype(decoded, tf.float32)\n", + " return tf.image.resize_images(decoded, image_size)\n", + "\n", + "\n", + "batch_images = tf.map_fn(decode_and_resize_image, encoded_images, dtype=tf.float32)\n", + "\n", + "# The image module can be applied as a function to extract feature vectors for a\n", + "# batch of images.\n", + "features = image_module(batch_images)\n", + "\n", + "\n", + "def create_model(features):\n", + " \"\"\"Build a model for classification from extracted features.\"\"\"\n", + " # Currently, the model is just a single linear layer. You can try to add\n", + " # another layer, but be careful... two linear layers (when activation=None)\n", + " # are equivalent to a single linear layer. You can create a nonlinear layer\n", + " # like this:\n", + " # layer = tf.layers.dense(inputs=..., units=..., activation=tf.nn.relu)\n", + " layer = tf.layers.dense(inputs=features, units=NUM_CLASSES, activation=None)\n", + " return layer\n", + "\n", + "\n", + "# For each class (kind of flower), the model outputs some real number as a score\n", + "# how much the input resembles this class. This vector of numbers is often\n", + "# called the \"logits\".\n", + "logits = create_model(features)\n", + "labels = tf.placeholder(tf.float32, [None, NUM_CLASSES])\n", + "\n", + "# Mathematically, a good way to measure how much the predicted probabilities\n", + "# diverge from the truth is the \"cross-entropy\" between the two probability\n", + "# distributions. For numerical stability, this is best done directly from the\n", + "# logits, not the probabilities extracted from them.\n", + "cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels)\n", + "cross_entropy_mean = tf.reduce_mean(cross_entropy)\n", + "\n", + "# Let's add an optimizer so we can train the network.\n", + "optimizer = tf.train.GradientDescentOptimizer(learning_rate=LEARNING_RATE)\n", + "train_op = optimizer.minimize(loss=cross_entropy_mean)\n", + "\n", + "# The \"softmax\" function transforms the logits vector into a vector of\n", + "# probabilities: non-negative numbers that sum up to one, and the i-th number\n", + "# says how likely the input comes from class i.\n", + "probabilities = tf.nn.softmax(logits)\n", + "\n", + "# We choose the highest one as the predicted class.\n", + "prediction = tf.argmax(probabilities, 1)\n", + "correct_prediction = tf.equal(prediction, tf.argmax(labels, 1))\n", + "\n", + "# The accuracy will allow us to eval on our test set. \n", + "accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0vvhYQ7-3AG_" + }, + "source": [ + "## Train the network\n", + "\n", + "Now that our model is built, let's train it and see how it performs on our test set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1YnBg7-OS3Dz" + }, + "outputs": [], + "source": [ + "# How long will we train the network (number of batches).\n", + "NUM_TRAIN_STEPS = 100 #@param {type: 'integer'}\n", + "# How many training examples we use in each step.\n", + "TRAIN_BATCH_SIZE = 10 #@param {type: 'integer'}\n", + "# How often to evaluate the model performance.\n", + "EVAL_EVERY = 10 #@param {type: 'integer'}\n", + "\n", + "def get_batch(batch_size=None, test=False):\n", + " \"\"\"Get a random batch of examples.\"\"\"\n", + " examples = TEST_EXAMPLES if test else TRAIN_EXAMPLES\n", + " batch_examples = random.sample(examples, batch_size) if batch_size else examples\n", + " return batch_examples\n", + "\n", + "def get_images_and_labels(batch_examples):\n", + " images = [get_encoded_image(e) for e in batch_examples]\n", + " one_hot_labels = [get_label_one_hot(e) for e in batch_examples]\n", + " return images, one_hot_labels\n", + "\n", + "def get_label_one_hot(example):\n", + " \"\"\"Get the one hot encoding vector for the example.\"\"\"\n", + " one_hot_vector = np.zeros(NUM_CLASSES)\n", + " np.put(one_hot_vector, get_label(example), 1)\n", + " return one_hot_vector\n", + "\n", + "with tf.Session() as sess:\n", + " sess.run(tf.global_variables_initializer())\n", + " for i in range(NUM_TRAIN_STEPS):\n", + " # Get a random batch of training examples.\n", + " train_batch = get_batch(batch_size=TRAIN_BATCH_SIZE)\n", + " batch_images, batch_labels = get_images_and_labels(train_batch)\n", + " # Run the train_op to train the model.\n", + " train_loss, _, train_accuracy = sess.run(\n", + " [cross_entropy_mean, train_op, accuracy],\n", + " feed_dict={encoded_images: batch_images, labels: batch_labels})\n", + " is_final_step = (i == (NUM_TRAIN_STEPS - 1))\n", + " if i % EVAL_EVERY == 0 or is_final_step:\n", + " # Get a batch of test examples.\n", + " test_batch = get_batch(batch_size=None, test=True)\n", + " batch_images, batch_labels = get_images_and_labels(test_batch)\n", + " # Evaluate how well our model performs on the test set.\n", + " test_loss, test_accuracy, test_prediction, correct_predicate = sess.run(\n", + " [cross_entropy_mean, accuracy, prediction, correct_prediction],\n", + " feed_dict={encoded_images: batch_images, labels: batch_labels})\n", + " print('Test accuracy at step %s: %.2f%%' % (i, (test_accuracy * 100)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZFUNJxuH2t0V" + }, + "outputs": [], + "source": [ + "def show_confusion_matrix(test_labels, predictions):\n", + " \"\"\"Compute confusion matrix and normalize.\"\"\"\n", + " confusion = sk_metrics.confusion_matrix(\n", + " np.argmax(test_labels, axis=1), predictions)\n", + " confusion_normalized = confusion.astype(\"float\") / confusion.sum(axis=1)\n", + " axis_labels = list(CLASSES.values())\n", + " ax = sns.heatmap(\n", + " confusion_normalized, xticklabels=axis_labels, yticklabels=axis_labels,\n", + " cmap='Blues', annot=True, fmt='.2f', square=True)\n", + " plt.title(\"Confusion matrix\")\n", + " plt.ylabel(\"True label\")\n", + " plt.xlabel(\"Predicted label\")\n", + "\n", + "show_confusion_matrix(batch_labels, test_prediction)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Uu3vo8DK8BdL" + }, + "source": [ + "## Incorrect predictions\n", + "\n", + "Let's a take a closer look at the test examples that our model got wrong.\n", + "\n", + "- Are there any mislabeled examples in our test set?\n", + "- Is there any bad data in the test set - images that aren't actually pictures of flowers?\n", + "- Are there images where you can understand why the model made a mistake?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hqa0V3WN8C9M" + }, + "outputs": [], + "source": [ + "incorrect = [\n", + " (example, CLASSES[prediction])\n", + " for example, prediction, is_correct in zip(test_batch, test_prediction, correct_predicate)\n", + " if not is_correct\n", + "]\n", + "display_images(\n", + " [(get_image(example), \"prediction: {0}\\nlabel:{1}\".format(incorrect_prediction, get_class(example)))\n", + " for (example, incorrect_prediction) in incorrect[:20]])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YN_s04Il8TvK" + }, + "source": [ + "## Exercises: Improve the model!\n", + "\n", + "We've trained a baseline model, now let's try to improve it to achieve better accuracy. (Remember that you'll need to re-run the cells when you make a change.)\n", + "\n", + "### Exercise 1: Try a different image model.\n", + "With TF-Hub, trying a few different image models is simple. Just replace the `\"https://tfhub.dev/google/imagenet/mobilenet_v2_050_128/feature_vector/2\"` handle in the `hub.Module()` call with a handle of different module and rerun all the code. You can see all available image modules at [tfhub.dev](https://tfhub.dev/s?module-type=image-feature-vector). \n", + "\n", + "A good choice might be one of the other [MobileNet V2 modules](https://tfhub.dev/s?module-type=image-feature-vector&network-architecture=mobilenet-v2). Many of the modules -- including the MobileNet modules -- were trained on the [ImageNet dataset](https://www.tensorflow.org/datasets/catalog/imagenet2012) which contains over 1 million images and 1000 classes. Choosing a network architecture provides a tradeoff between speed and classification accuracy: models like MobileNet or NASNet Mobile are fast and small, more traditional architectures like Inception and ResNet were designed for accuracy.\n", + "\n", + "For the larger Inception V3 architecture, you can also explore the benefits of pre-training on a domain closer to your own task: it is also available as a [module trained on the iNaturalist dataset](https://tfhub.dev/google/inaturalist/inception_v3/feature_vector/1) of plants and animals.\n", + "\n", + "### Exercise 2: Add a hidden layer.\n", + "Stack a hidden layer between extracted image features and the linear classifier (in function `create_model()` above). To create a non-linear hidden layer with e.g. 100 nodes, use [tf.layers.dense](https://www.tensorflow.org/api_docs/python/tf/compat/v1/layers/dense) with units set to 100 and activation set to `tf.nn.relu`. Does changing the size of the hidden layer affect the test accuracy? Does adding second hidden layer improve the accuracy?\n", + "\n", + "### Exercise 3: Change hyperparameters.\n", + "Does increasing *number of training steps* improves final accuracy? Can you *change the learning rate* to make your model converge more quickly? Does the training *batch size* affect your model's performance?\n", + "\n", + "### Exercise 4: Try a different optimizer.\n", + "\n", + "Replace the basic GradientDescentOptimizer with a more sophisticate optimizer, e.g. [AdagradOptimizer](https://www.tensorflow.org/api_docs/python/tf/compat/v1/train/AdagradOptimizer). Does it make a difference to your model training? If you want to learn more about the benefits of different optimization algorithms, check out [this post](http://ruder.io/optimizing-gradient-descent/)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kdwVXO1eJS5-" + }, + "source": [ + "## Want to learn more?\n", + "\n", + "If you are interested in a more advanced version of this tutorial, check out the [TensorFlow image retraining tutorial](https://www.tensorflow.org/hub/tutorials/image_retraining) which walks you through visualizing the training using TensorBoard, advanced techniques like dataset augmentation by distorting images, and replacing the flowers dataset to learn an image classifier on your own dataset.\n", + "\n", + "You can learn more about TensorFlow at [tensorflow.org](http://tensorflow.org) and see the TF-Hub API documentation is available at [tensorflow.org/hub](https://www.tensorflow.org/hub/). Find available TensorFlow Hub modules at [tfhub.dev](http://tfhub.dev) including more image feature vector modules and text embedding modules.\n", + "\n", + "Also check out the [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/) which is Google's fast-paced, practical introduction to machine learning." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "ScitaPqhKtuW" + ], + "name": "image_feature_vector.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/movenet.ipynb b/site/en/hub/tutorials/movenet.ipynb new file mode 100644 index 00000000000..f7955a5253b --- /dev/null +++ b/site/en/hub/tutorials/movenet.ipynb @@ -0,0 +1,816 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "toCy3v03Dwx7" + }, + "source": [ + "##### Copyright 2021 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QKe-ubNcDvgv" + }, + "outputs": [], + "source": [ + "# Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KqtQzBCpIJ7Y" + }, + "source": [ + "# MoveNet: Ultra fast and accurate pose detection model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MCmFOosnSkCd" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6x99e0aEY_d6" + }, + "source": [ + "**[MoveNet](https://t.co/QpfnVL0YYI?amp=1)** is an ultra fast and accurate model that detects 17 keypoints of a body. The model is offered on [TF Hub](https://tfhub.dev/s?q=movenet) with two variants, known as Lightning and Thunder. Lightning is intended for latency-critical applications, while Thunder is intended for applications that require high accuracy. Both models run faster than real time (30+ FPS) on most modern desktops, laptops, and phones, which proves crucial for live fitness, health, and wellness applications.\n", + "\n", + "\n", + "\"drawing\"/\n", + "\n", + "*Images downloaded from Pexels (https://www.pexels.com/)\n", + "\n", + "This Colab walks you through the details of how to load MoveNet, and run inference on the input image and video below.\n", + "\n", + "Note: check out the [live demo](https://storage.googleapis.com/tfjs-models/demos/pose-detection/index.html?model=movenet) for how the model works!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "10_zkgbZBkIE" + }, + "source": [ + "# Human Pose Estimation with MoveNet" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9u_VGR6_BmbZ" + }, + "source": [ + "## Visualization libraries & Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TtcwSIcgbIVN" + }, + "outputs": [], + "source": [ + "!pip install -q imageio\n", + "!pip install -q opencv-python\n", + "!pip install -q git+https://github.com/tensorflow/docs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9BLeJv-pCCld" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "from tensorflow_docs.vis import embed\n", + "import numpy as np\n", + "import cv2\n", + "\n", + "# Import matplotlib libraries\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.collections import LineCollection\n", + "import matplotlib.patches as patches\n", + "\n", + "# Some modules to display an animation using imageio.\n", + "import imageio\n", + "from IPython.display import HTML, display" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "bEJBMeRb3YUy" + }, + "outputs": [], + "source": [ + "#@title Helper functions for visualization\n", + "\n", + "# Dictionary that maps from joint names to keypoint indices.\n", + "KEYPOINT_DICT = {\n", + " 'nose': 0,\n", + " 'left_eye': 1,\n", + " 'right_eye': 2,\n", + " 'left_ear': 3,\n", + " 'right_ear': 4,\n", + " 'left_shoulder': 5,\n", + " 'right_shoulder': 6,\n", + " 'left_elbow': 7,\n", + " 'right_elbow': 8,\n", + " 'left_wrist': 9,\n", + " 'right_wrist': 10,\n", + " 'left_hip': 11,\n", + " 'right_hip': 12,\n", + " 'left_knee': 13,\n", + " 'right_knee': 14,\n", + " 'left_ankle': 15,\n", + " 'right_ankle': 16\n", + "}\n", + "\n", + "# Maps bones to a matplotlib color name.\n", + "KEYPOINT_EDGE_INDS_TO_COLOR = {\n", + " (0, 1): 'm',\n", + " (0, 2): 'c',\n", + " (1, 3): 'm',\n", + " (2, 4): 'c',\n", + " (0, 5): 'm',\n", + " (0, 6): 'c',\n", + " (5, 7): 'm',\n", + " (7, 9): 'm',\n", + " (6, 8): 'c',\n", + " (8, 10): 'c',\n", + " (5, 6): 'y',\n", + " (5, 11): 'm',\n", + " (6, 12): 'c',\n", + " (11, 12): 'y',\n", + " (11, 13): 'm',\n", + " (13, 15): 'm',\n", + " (12, 14): 'c',\n", + " (14, 16): 'c'\n", + "}\n", + "\n", + "def _keypoints_and_edges_for_display(keypoints_with_scores,\n", + " height,\n", + " width,\n", + " keypoint_threshold=0.11):\n", + " \"\"\"Returns high confidence keypoints and edges for visualization.\n", + "\n", + " Args:\n", + " keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing\n", + " the keypoint coordinates and scores returned from the MoveNet model.\n", + " height: height of the image in pixels.\n", + " width: width of the image in pixels.\n", + " keypoint_threshold: minimum confidence score for a keypoint to be\n", + " visualized.\n", + "\n", + " Returns:\n", + " A (keypoints_xy, edges_xy, edge_colors) containing:\n", + " * the coordinates of all keypoints of all detected entities;\n", + " * the coordinates of all skeleton edges of all detected entities;\n", + " * the colors in which the edges should be plotted.\n", + " \"\"\"\n", + " keypoints_all = []\n", + " keypoint_edges_all = []\n", + " edge_colors = []\n", + " num_instances, _, _, _ = keypoints_with_scores.shape\n", + " for idx in range(num_instances):\n", + " kpts_x = keypoints_with_scores[0, idx, :, 1]\n", + " kpts_y = keypoints_with_scores[0, idx, :, 0]\n", + " kpts_scores = keypoints_with_scores[0, idx, :, 2]\n", + " kpts_absolute_xy = np.stack(\n", + " [width * np.array(kpts_x), height * np.array(kpts_y)], axis=-1)\n", + " kpts_above_thresh_absolute = kpts_absolute_xy[\n", + " kpts_scores > keypoint_threshold, :]\n", + " keypoints_all.append(kpts_above_thresh_absolute)\n", + "\n", + " for edge_pair, color in KEYPOINT_EDGE_INDS_TO_COLOR.items():\n", + " if (kpts_scores[edge_pair[0]] > keypoint_threshold and\n", + " kpts_scores[edge_pair[1]] > keypoint_threshold):\n", + " x_start = kpts_absolute_xy[edge_pair[0], 0]\n", + " y_start = kpts_absolute_xy[edge_pair[0], 1]\n", + " x_end = kpts_absolute_xy[edge_pair[1], 0]\n", + " y_end = kpts_absolute_xy[edge_pair[1], 1]\n", + " line_seg = np.array([[x_start, y_start], [x_end, y_end]])\n", + " keypoint_edges_all.append(line_seg)\n", + " edge_colors.append(color)\n", + " if keypoints_all:\n", + " keypoints_xy = np.concatenate(keypoints_all, axis=0)\n", + " else:\n", + " keypoints_xy = np.zeros((0, 17, 2))\n", + "\n", + " if keypoint_edges_all:\n", + " edges_xy = np.stack(keypoint_edges_all, axis=0)\n", + " else:\n", + " edges_xy = np.zeros((0, 2, 2))\n", + " return keypoints_xy, edges_xy, edge_colors\n", + "\n", + "\n", + "def draw_prediction_on_image(\n", + " image, keypoints_with_scores, crop_region=None, close_figure=False,\n", + " output_image_height=None):\n", + " \"\"\"Draws the keypoint predictions on image.\n", + "\n", + " Args:\n", + " image: A numpy array with shape [height, width, channel] representing the\n", + " pixel values of the input image.\n", + " keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing\n", + " the keypoint coordinates and scores returned from the MoveNet model.\n", + " crop_region: A dictionary that defines the coordinates of the bounding box\n", + " of the crop region in normalized coordinates (see the init_crop_region\n", + " function below for more detail). If provided, this function will also\n", + " draw the bounding box on the image.\n", + " output_image_height: An integer indicating the height of the output image.\n", + " Note that the image aspect ratio will be the same as the input image.\n", + "\n", + " Returns:\n", + " A numpy array with shape [out_height, out_width, channel] representing the\n", + " image overlaid with keypoint predictions.\n", + " \"\"\"\n", + " height, width, channel = image.shape\n", + " aspect_ratio = float(width) / height\n", + " fig, ax = plt.subplots(figsize=(12 * aspect_ratio, 12))\n", + " # To remove the huge white borders\n", + " fig.tight_layout(pad=0)\n", + " ax.margins(0)\n", + " ax.set_yticklabels([])\n", + " ax.set_xticklabels([])\n", + " plt.axis('off')\n", + "\n", + " im = ax.imshow(image)\n", + " line_segments = LineCollection([], linewidths=(4), linestyle='solid')\n", + " ax.add_collection(line_segments)\n", + " # Turn off tick labels\n", + " scat = ax.scatter([], [], s=60, color='#FF1493', zorder=3)\n", + "\n", + " (keypoint_locs, keypoint_edges,\n", + " edge_colors) = _keypoints_and_edges_for_display(\n", + " keypoints_with_scores, height, width)\n", + "\n", + " line_segments.set_segments(keypoint_edges)\n", + " line_segments.set_color(edge_colors)\n", + " if keypoint_edges.shape[0]:\n", + " line_segments.set_segments(keypoint_edges)\n", + " line_segments.set_color(edge_colors)\n", + " if keypoint_locs.shape[0]:\n", + " scat.set_offsets(keypoint_locs)\n", + "\n", + " if crop_region is not None:\n", + " xmin = max(crop_region['x_min'] * width, 0.0)\n", + " ymin = max(crop_region['y_min'] * height, 0.0)\n", + " rec_width = min(crop_region['x_max'], 0.99) * width - xmin\n", + " rec_height = min(crop_region['y_max'], 0.99) * height - ymin\n", + " rect = patches.Rectangle(\n", + " (xmin,ymin),rec_width,rec_height,\n", + " linewidth=1,edgecolor='b',facecolor='none')\n", + " ax.add_patch(rect)\n", + "\n", + " fig.canvas.draw()\n", + " image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)\n", + " image_from_plot = image_from_plot.reshape(\n", + " fig.canvas.get_width_height()[::-1] + (3,))\n", + " plt.close(fig)\n", + " if output_image_height is not None:\n", + " output_image_width = int(output_image_height / height * width)\n", + " image_from_plot = cv2.resize(\n", + " image_from_plot, dsize=(output_image_width, output_image_height),\n", + " interpolation=cv2.INTER_CUBIC)\n", + " return image_from_plot\n", + "\n", + "def to_gif(images, duration):\n", + " \"\"\"Converts image sequence (4D numpy array) to gif.\"\"\"\n", + " imageio.mimsave('./animation.gif', images, duration=duration)\n", + " return embed.embed_file('./animation.gif')\n", + "\n", + "def progress(value, max=100):\n", + " return HTML(\"\"\"\n", + " \n", + " {value}\n", + " \n", + " \"\"\".format(value=value, max=max))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UvrN0iQiOxhR" + }, + "source": [ + "## Load Model from TF hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zeGHgANcT7a1" + }, + "outputs": [], + "source": [ + "model_name = \"movenet_lightning\" #@param [\"movenet_lightning\", \"movenet_thunder\", \"movenet_lightning_f16.tflite\", \"movenet_thunder_f16.tflite\", \"movenet_lightning_int8.tflite\", \"movenet_thunder_int8.tflite\"]\n", + "\n", + "if \"tflite\" in model_name:\n", + " if \"movenet_lightning_f16\" in model_name:\n", + " !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/float16/4?lite-format=tflite\n", + " input_size = 192\n", + " elif \"movenet_thunder_f16\" in model_name:\n", + " !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/thunder/tflite/float16/4?lite-format=tflite\n", + " input_size = 256\n", + " elif \"movenet_lightning_int8\" in model_name:\n", + " !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/int8/4?lite-format=tflite\n", + " input_size = 192\n", + " elif \"movenet_thunder_int8\" in model_name:\n", + " !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/thunder/tflite/int8/4?lite-format=tflite\n", + " input_size = 256\n", + " else:\n", + " raise ValueError(\"Unsupported model name: %s\" % model_name)\n", + "\n", + " # Initialize the TFLite interpreter\n", + " interpreter = tf.lite.Interpreter(model_path=\"model.tflite\")\n", + " interpreter.allocate_tensors()\n", + "\n", + " def movenet(input_image):\n", + " \"\"\"Runs detection on an input image.\n", + "\n", + " Args:\n", + " input_image: A [1, height, width, 3] tensor represents the input image\n", + " pixels. Note that the height/width should already be resized and match the\n", + " expected input resolution of the model before passing into this function.\n", + "\n", + " Returns:\n", + " A [1, 1, 17, 3] float numpy array representing the predicted keypoint\n", + " coordinates and scores.\n", + " \"\"\"\n", + " # TF Lite format expects tensor type of uint8.\n", + " input_image = tf.cast(input_image, dtype=tf.uint8)\n", + " input_details = interpreter.get_input_details()\n", + " output_details = interpreter.get_output_details()\n", + " interpreter.set_tensor(input_details[0]['index'], input_image.numpy())\n", + " # Invoke inference.\n", + " interpreter.invoke()\n", + " # Get the model prediction.\n", + " keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])\n", + " return keypoints_with_scores\n", + "\n", + "else:\n", + " if \"movenet_lightning\" in model_name:\n", + " module = hub.load(\"https://tfhub.dev/google/movenet/singlepose/lightning/4\")\n", + " input_size = 192\n", + " elif \"movenet_thunder\" in model_name:\n", + " module = hub.load(\"https://tfhub.dev/google/movenet/singlepose/thunder/4\")\n", + " input_size = 256\n", + " else:\n", + " raise ValueError(\"Unsupported model name: %s\" % model_name)\n", + "\n", + " def movenet(input_image):\n", + " \"\"\"Runs detection on an input image.\n", + "\n", + " Args:\n", + " input_image: A [1, height, width, 3] tensor represents the input image\n", + " pixels. Note that the height/width should already be resized and match the\n", + " expected input resolution of the model before passing into this function.\n", + "\n", + " Returns:\n", + " A [1, 1, 17, 3] float numpy array representing the predicted keypoint\n", + " coordinates and scores.\n", + " \"\"\"\n", + " model = module.signatures['serving_default']\n", + "\n", + " # SavedModel format expects tensor type of int32.\n", + " input_image = tf.cast(input_image, dtype=tf.int32)\n", + " # Run model inference.\n", + " outputs = model(input_image)\n", + " # Output is a [1, 1, 17, 3] tensor.\n", + " keypoints_with_scores = outputs['output_0'].numpy()\n", + " return keypoints_with_scores" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-h1qHYaqD9ap" + }, + "source": [ + "## Single Image Example" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ymTVR2I9x22I" + }, + "source": [ + "This session demonstrates the minimum working example of running the model on a **single image** to predict the 17 human keypoints." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5I3xBq80E3N_" + }, + "source": [ + "### Load Input Image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GMO4B-wx5psP" + }, + "outputs": [], + "source": [ + "!curl -o input_image.jpeg https://images.pexels.com/photos/4384679/pexels-photo-4384679.jpeg --silent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lJZYQ8KYFQ6x" + }, + "outputs": [], + "source": [ + "# Load the input image.\n", + "image_path = 'input_image.jpeg'\n", + "image = tf.io.read_file(image_path)\n", + "image = tf.image.decode_jpeg(image)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "S_UWRdQxE6WN" + }, + "source": [ + "### Run Inference" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VHmTwACwFW-v" + }, + "outputs": [], + "source": [ + "# Resize and pad the image to keep the aspect ratio and fit the expected size.\n", + "input_image = tf.expand_dims(image, axis=0)\n", + "input_image = tf.image.resize_with_pad(input_image, input_size, input_size)\n", + "\n", + "# Run model inference.\n", + "keypoints_with_scores = movenet(input_image)\n", + "\n", + "# Visualize the predictions with image.\n", + "display_image = tf.expand_dims(image, axis=0)\n", + "display_image = tf.cast(tf.image.resize_with_pad(\n", + " display_image, 1280, 1280), dtype=tf.int32)\n", + "output_overlay = draw_prediction_on_image(\n", + " np.squeeze(display_image.numpy(), axis=0), keypoints_with_scores)\n", + "\n", + "plt.figure(figsize=(5, 5))\n", + "plt.imshow(output_overlay)\n", + "_ = plt.axis('off')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rKm-B0eMYeg8" + }, + "source": [ + "## Video (Image Sequence) Example" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gdPFXabLyiKv" + }, + "source": [ + "This section demonstrates how to apply intelligent cropping based on detections from the previous frame when the input is a sequence of frames. This allows the model to devote its attention and resources to the main subject, resulting in much better prediction quality without sacrificing the speed.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "SYFdK-JHYhrv" + }, + "outputs": [], + "source": [ + "#@title Cropping Algorithm\n", + "\n", + "# Confidence score to determine whether a keypoint prediction is reliable.\n", + "MIN_CROP_KEYPOINT_SCORE = 0.2\n", + "\n", + "def init_crop_region(image_height, image_width):\n", + " \"\"\"Defines the default crop region.\n", + "\n", + " The function provides the initial crop region (pads the full image from both\n", + " sides to make it a square image) when the algorithm cannot reliably determine\n", + " the crop region from the previous frame.\n", + " \"\"\"\n", + " if image_width > image_height:\n", + " box_height = image_width / image_height\n", + " box_width = 1.0\n", + " y_min = (image_height / 2 - image_width / 2) / image_height\n", + " x_min = 0.0\n", + " else:\n", + " box_height = 1.0\n", + " box_width = image_height / image_width\n", + " y_min = 0.0\n", + " x_min = (image_width / 2 - image_height / 2) / image_width\n", + "\n", + " return {\n", + " 'y_min': y_min,\n", + " 'x_min': x_min,\n", + " 'y_max': y_min + box_height,\n", + " 'x_max': x_min + box_width,\n", + " 'height': box_height,\n", + " 'width': box_width\n", + " }\n", + "\n", + "def torso_visible(keypoints):\n", + " \"\"\"Checks whether there are enough torso keypoints.\n", + "\n", + " This function checks whether the model is confident at predicting one of the\n", + " shoulders/hips which is required to determine a good crop region.\n", + " \"\"\"\n", + " return ((keypoints[0, 0, KEYPOINT_DICT['left_hip'], 2] >\n", + " MIN_CROP_KEYPOINT_SCORE or\n", + " keypoints[0, 0, KEYPOINT_DICT['right_hip'], 2] >\n", + " MIN_CROP_KEYPOINT_SCORE) and\n", + " (keypoints[0, 0, KEYPOINT_DICT['left_shoulder'], 2] >\n", + " MIN_CROP_KEYPOINT_SCORE or\n", + " keypoints[0, 0, KEYPOINT_DICT['right_shoulder'], 2] >\n", + " MIN_CROP_KEYPOINT_SCORE))\n", + "\n", + "def determine_torso_and_body_range(\n", + " keypoints, target_keypoints, center_y, center_x):\n", + " \"\"\"Calculates the maximum distance from each keypoints to the center location.\n", + "\n", + " The function returns the maximum distances from the two sets of keypoints:\n", + " full 17 keypoints and 4 torso keypoints. The returned information will be\n", + " used to determine the crop size. See determineCropRegion for more detail.\n", + " \"\"\"\n", + " torso_joints = ['left_shoulder', 'right_shoulder', 'left_hip', 'right_hip']\n", + " max_torso_yrange = 0.0\n", + " max_torso_xrange = 0.0\n", + " for joint in torso_joints:\n", + " dist_y = abs(center_y - target_keypoints[joint][0])\n", + " dist_x = abs(center_x - target_keypoints[joint][1])\n", + " if dist_y > max_torso_yrange:\n", + " max_torso_yrange = dist_y\n", + " if dist_x > max_torso_xrange:\n", + " max_torso_xrange = dist_x\n", + "\n", + " max_body_yrange = 0.0\n", + " max_body_xrange = 0.0\n", + " for joint in KEYPOINT_DICT.keys():\n", + " if keypoints[0, 0, KEYPOINT_DICT[joint], 2] < MIN_CROP_KEYPOINT_SCORE:\n", + " continue\n", + " dist_y = abs(center_y - target_keypoints[joint][0]);\n", + " dist_x = abs(center_x - target_keypoints[joint][1]);\n", + " if dist_y > max_body_yrange:\n", + " max_body_yrange = dist_y\n", + "\n", + " if dist_x > max_body_xrange:\n", + " max_body_xrange = dist_x\n", + "\n", + " return [max_torso_yrange, max_torso_xrange, max_body_yrange, max_body_xrange]\n", + "\n", + "def determine_crop_region(\n", + " keypoints, image_height,\n", + " image_width):\n", + " \"\"\"Determines the region to crop the image for the model to run inference on.\n", + "\n", + " The algorithm uses the detected joints from the previous frame to estimate\n", + " the square region that encloses the full body of the target person and\n", + " centers at the midpoint of two hip joints. The crop size is determined by\n", + " the distances between each joints and the center point.\n", + " When the model is not confident with the four torso joint predictions, the\n", + " function returns a default crop which is the full image padded to square.\n", + " \"\"\"\n", + " target_keypoints = {}\n", + " for joint in KEYPOINT_DICT.keys():\n", + " target_keypoints[joint] = [\n", + " keypoints[0, 0, KEYPOINT_DICT[joint], 0] * image_height,\n", + " keypoints[0, 0, KEYPOINT_DICT[joint], 1] * image_width\n", + " ]\n", + "\n", + " if torso_visible(keypoints):\n", + " center_y = (target_keypoints['left_hip'][0] +\n", + " target_keypoints['right_hip'][0]) / 2;\n", + " center_x = (target_keypoints['left_hip'][1] +\n", + " target_keypoints['right_hip'][1]) / 2;\n", + "\n", + " (max_torso_yrange, max_torso_xrange,\n", + " max_body_yrange, max_body_xrange) = determine_torso_and_body_range(\n", + " keypoints, target_keypoints, center_y, center_x)\n", + "\n", + " crop_length_half = np.amax(\n", + " [max_torso_xrange * 1.9, max_torso_yrange * 1.9,\n", + " max_body_yrange * 1.2, max_body_xrange * 1.2])\n", + "\n", + " tmp = np.array(\n", + " [center_x, image_width - center_x, center_y, image_height - center_y])\n", + " crop_length_half = np.amin(\n", + " [crop_length_half, np.amax(tmp)]);\n", + "\n", + " crop_corner = [center_y - crop_length_half, center_x - crop_length_half];\n", + "\n", + " if crop_length_half > max(image_width, image_height) / 2:\n", + " return init_crop_region(image_height, image_width)\n", + " else:\n", + " crop_length = crop_length_half * 2;\n", + " return {\n", + " 'y_min': crop_corner[0] / image_height,\n", + " 'x_min': crop_corner[1] / image_width,\n", + " 'y_max': (crop_corner[0] + crop_length) / image_height,\n", + " 'x_max': (crop_corner[1] + crop_length) / image_width,\n", + " 'height': (crop_corner[0] + crop_length) / image_height -\n", + " crop_corner[0] / image_height,\n", + " 'width': (crop_corner[1] + crop_length) / image_width -\n", + " crop_corner[1] / image_width\n", + " }\n", + " else:\n", + " return init_crop_region(image_height, image_width)\n", + "\n", + "def crop_and_resize(image, crop_region, crop_size):\n", + " \"\"\"Crops and resize the image to prepare for the model input.\"\"\"\n", + " boxes=[[crop_region['y_min'], crop_region['x_min'],\n", + " crop_region['y_max'], crop_region['x_max']]]\n", + " output_image = tf.image.crop_and_resize(\n", + " image, box_indices=[0], boxes=boxes, crop_size=crop_size)\n", + " return output_image\n", + "\n", + "def run_inference(movenet, image, crop_region, crop_size):\n", + " \"\"\"Runs model inference on the cropped region.\n", + "\n", + " The function runs the model inference on the cropped region and updates the\n", + " model output to the original image coordinate system.\n", + " \"\"\"\n", + " image_height, image_width, _ = image.shape\n", + " input_image = crop_and_resize(\n", + " tf.expand_dims(image, axis=0), crop_region, crop_size=crop_size)\n", + " # Run model inference.\n", + " keypoints_with_scores = movenet(input_image)\n", + " # Update the coordinates.\n", + " for idx in range(17):\n", + " keypoints_with_scores[0, 0, idx, 0] = (\n", + " crop_region['y_min'] * image_height +\n", + " crop_region['height'] * image_height *\n", + " keypoints_with_scores[0, 0, idx, 0]) / image_height\n", + " keypoints_with_scores[0, 0, idx, 1] = (\n", + " crop_region['x_min'] * image_width +\n", + " crop_region['width'] * image_width *\n", + " keypoints_with_scores[0, 0, idx, 1]) / image_width\n", + " return keypoints_with_scores" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L2JmA1xAEntQ" + }, + "source": [ + "### Load Input Image Sequence" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CzJxbxDckWl2" + }, + "outputs": [], + "source": [ + "!wget -q -O dance.gif https://github.com/tensorflow/tfjs-models/raw/master/pose-detection/assets/dance_input.gif" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IxbMFZJUkd6W" + }, + "outputs": [], + "source": [ + "# Load the input image.\n", + "image_path = 'dance.gif'\n", + "image = tf.io.read_file(image_path)\n", + "image = tf.image.decode_gif(image)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CJKeQ4siEtU9" + }, + "source": [ + "### Run Inference with Cropping Algorithm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9B57XS0NZPIy" + }, + "outputs": [], + "source": [ + "# Load the input image.\n", + "num_frames, image_height, image_width, _ = image.shape\n", + "crop_region = init_crop_region(image_height, image_width)\n", + "\n", + "output_images = []\n", + "bar = display(progress(0, num_frames-1), display_id=True)\n", + "for frame_idx in range(num_frames):\n", + " keypoints_with_scores = run_inference(\n", + " movenet, image[frame_idx, :, :, :], crop_region,\n", + " crop_size=[input_size, input_size])\n", + " output_images.append(draw_prediction_on_image(\n", + " image[frame_idx, :, :, :].numpy().astype(np.int32),\n", + " keypoints_with_scores, crop_region=None,\n", + " close_figure=True, output_image_height=300))\n", + " crop_region = determine_crop_region(\n", + " keypoints_with_scores, image_height, image_width)\n", + " bar.update(progress(frame_idx, num_frames-1))\n", + "\n", + "# Prepare gif visualization.\n", + "output = np.stack(output_images, axis=0)\n", + "to_gif(output, duration=100)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "9u_VGR6_BmbZ", + "5I3xBq80E3N_", + "L2JmA1xAEntQ" + ], + "name": "movenet.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/movinet.ipynb b/site/en/hub/tutorials/movinet.ipynb new file mode 100644 index 00000000000..24600256cf9 --- /dev/null +++ b/site/en/hub/tutorials/movinet.ipynb @@ -0,0 +1,1047 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "toCy3v03Dwx7" + }, + "source": [ + "##### Copyright 2021 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QKe-ubNcDvgv" + }, + "outputs": [], + "source": [ + "# Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qFdPvlXBOdUN" + }, + "source": [ + "# MoViNet for streaming action recognition " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-vxk2Kbc_KSP" + }, + "source": [ + "This tutorial demonstrates how to use a pretrained video classification model to classify an activity (such as dancing, swimming, biking etc) in the given video. \n", + "\n", + "The model architecture used in this tutorial is called [MoViNet](https://arxiv.org/pdf/2103.11511.pdf) (Mobile Video Networks). MoVieNets are a family of efficient video classification models trained on huge dataset ([Kinetics 600](https://deepmind.com/research/open-source/kinetics)).\n", + "\n", + "In contrast to the [i3d models](https://tfhub.dev/s?q=i3d-kinetics) available on TF Hub, MoViNets also support frame-by-frame inference on streaming video. \n", + "\n", + "The pretrained models are available from [TF Hub](https://tfhub.dev/google/collections/movinet/1). The TF Hub collection also includes quantized models optimized for [TFLite](https://tensorflow.org/lite).\n", + "\n", + "The source for these models is available in the [TensorFlow Model Garden](https://github.com/tensorflow/models/tree/master/official/projects/movinet). This includes a [longer version of this tutorial](https://colab.sandbox.google.com/github/tensorflow/models/blob/master/official/projects/movinet/movinet_tutorial.ipynb) that also covers building and fine-tuning a MoViNet model. \n", + "\n", + "This MoViNet tutorial is part of a series of TensorFlow video tutorials. Here are the other three tutorials:\n", + "\n", + "- [Load video data](https://www.tensorflow.org/tutorials/load_data/video): This tutorial explains how to load and preprocess video data into a TensorFlow dataset pipeline from scratch.\n", + "- [Build a 3D CNN model for video classification](https://www.tensorflow.org/tutorials/video/video_classification). Note that this tutorial uses a (2+1)D CNN that decomposes the spatial and temporal aspects of 3D data; if you are using volumetric data such as an MRI scan, consider using a 3D CNN instead of a (2+1)D CNN.\n", + "- [Transfer learning for video classification with MoViNet](https://www.tensorflow.org/tutorials/video/transfer_learning_with_movinet): This tutorial explains how to use a pre-trained video classification model trained on a different dataset with the UCF-101 dataset.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3E96e1UKQ8uR" + }, + "source": [ + "![jumping jacks plot](https://storage.googleapis.com/tf_model_garden/vision/movinet/artifacts/jumpingjacks_plot.gif)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8_oLnvJy7kz5" + }, + "source": [ + "## Setup\n", + "\n", + "For inference on smaller models (A0-A2), CPU is sufficient for this Colab." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GUgUMGmY1yq-" + }, + "outputs": [], + "source": [ + "!sudo apt install -y ffmpeg\n", + "!pip install -q mediapy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "s3khsunT7kWa" + }, + "outputs": [], + "source": [ + "!pip uninstall -q -y opencv-python-headless\n", + "!pip install -q \"opencv-python-headless<4.3\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dI_1csl6Q-gH" + }, + "outputs": [], + "source": [ + "# Import libraries\n", + "import pathlib\n", + "\n", + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", + "import mediapy as media\n", + "import numpy as np\n", + "import PIL\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "import tqdm\n", + "\n", + "mpl.rcParams.update({\n", + " 'font.size': 10,\n", + "})" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Pn8K9oWbmREi" + }, + "source": [ + "Get the kinetics 600 label list, and print the first few labels:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2VJUAcjhkfb3" + }, + "outputs": [], + "source": [ + "labels_path = tf.keras.utils.get_file(\n", + " fname='labels.txt',\n", + " origin='https://raw.githubusercontent.com/tensorflow/models/f8af2291cced43fc9f1d9b41ddbf772ae7b0d7d2/official/projects/movinet/files/kinetics_600_labels.txt'\n", + ")\n", + "labels_path = pathlib.Path(labels_path)\n", + "\n", + "lines = labels_path.read_text().splitlines()\n", + "KINETICS_600_LABELS = np.array([line.strip() for line in lines])\n", + "KINETICS_600_LABELS[:20]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G9BU5XsOmaq3" + }, + "source": [ + "To provide a simple example video for classification, we can load a short gif of jumping jacks being performed.\n", + "\n", + "![jumping jacks](https://github.com/tensorflow/models/raw/f8af2291cced43fc9f1d9b41ddbf772ae7b0d7d2/official/projects/movinet/files/jumpingjack.gif)\n", + "\n", + "Attribution: Footage shared by [Coach Bobby Bluford](https://www.youtube.com/watch?v=-AxHpj-EuPg) on YouTube under the CC-BY license." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8aFKMbr4mfSg" + }, + "source": [ + "Download the gif." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "w62jqXhaSb15" + }, + "outputs": [], + "source": [ + "jumpingjack_url = 'https://github.com/tensorflow/models/raw/f8af2291cced43fc9f1d9b41ddbf772ae7b0d7d2/official/projects/movinet/files/jumpingjack.gif'\n", + "jumpingjack_path = tf.keras.utils.get_file(\n", + " fname='jumpingjack.gif',\n", + " origin=jumpingjack_url,\n", + " cache_dir='.', cache_subdir='.',\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hdRS_22PebfB" + }, + "source": [ + "Define a function to read a gif file into a `tf.Tensor`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mPhmCu6oSi5f" + }, + "outputs": [], + "source": [ + "#@title\n", + "# Read and process a video\n", + "def load_gif(file_path, image_size=(224, 224)):\n", + " \"\"\"Loads a gif file into a TF tensor.\n", + "\n", + " Use images resized to match what's expected by your model.\n", + " The model pages say the \"A2\" models expect 224 x 224 images at 5 fps\n", + "\n", + " Args:\n", + " file_path: path to the location of a gif file.\n", + " image_size: a tuple of target size.\n", + "\n", + " Returns:\n", + " a video of the gif file\n", + " \"\"\"\n", + " # Load a gif file, convert it to a TF tensor\n", + " raw = tf.io.read_file(file_path)\n", + " video = tf.io.decode_gif(raw)\n", + " # Resize the video\n", + " video = tf.image.resize(video, image_size)\n", + " # change dtype to a float32\n", + " # Hub models always want images normalized to [0,1]\n", + " # ref: https://www.tensorflow.org/hub/common_signatures/images#input\n", + " video = tf.cast(video, tf.float32) / 255.\n", + " return video" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Xx7cZm8vpDJm" + }, + "source": [ + "The video's shape is `(frames, height, width, colors)`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "E7k_PmbFSkHv" + }, + "outputs": [], + "source": [ + "jumpingjack=load_gif(jumpingjack_path)\n", + "jumpingjack.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LcKFy3oedBvF" + }, + "source": [ + "## How to use the model\n", + "\n", + "This section contains a walkthrough showing how to use the [models from TensorFlow Hub](https://tfhub.dev/google/collections/movinet/1). If you just want to see the models in action, skip to the next section.\n", + "\n", + "There are two versions of each model: `base` and `streaming`.\n", + "\n", + "* The `base` version takes a video as input, and returns the probabilities averaged over the frames.\n", + "* The `streaming` version takes a video frame and an RNN state as input, and returns the predictions for that frame, and the new RNN state. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WQO6Zb8Hm-9q" + }, + "source": [ + "### The base model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RfnYU20JnPqp" + }, + "source": [ + "Download the [pretrained model from TensorFlow Hub](https://tfhub.dev/tensorflow/movinet/a2/base/kinetics-600/classification/3). " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FnpPo6HSR7qv" + }, + "outputs": [], + "source": [ + "%%time\n", + "id = 'a2'\n", + "mode = 'base'\n", + "version = '3'\n", + "hub_url = f'https://tfhub.dev/tensorflow/movinet/{id}/{mode}/kinetics-600/classification/{version}'\n", + "model = hub.load(hub_url)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jvaFwKhxndmb" + }, + "source": [ + "This version of the model has one `signature`. It takes an `image` argument which is a `tf.float32` with shape `(batch, frames, height, width, colors)`. It returns a dictionary containing one output: A `tf.float32` tensor of logits with shape `(batch, classes)`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7GzZ4Y03T_gH" + }, + "outputs": [], + "source": [ + "sig = model.signatures['serving_default']\n", + "print(sig.pretty_printed_signature())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "M4Xny1ANomi4" + }, + "source": [ + "To run this signature on the video you need to add the outer `batch` dimension to the video first." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LBOFEDG1XvZE" + }, + "outputs": [], + "source": [ + "#warmup\n", + "sig(image = jumpingjack[tf.newaxis, :1]);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jCeW3KycVbGn" + }, + "outputs": [], + "source": [ + "%%time\n", + "logits = sig(image = jumpingjack[tf.newaxis, ...])\n", + "logits = logits['classifier_head'][0]\n", + "\n", + "print(logits.shape)\n", + "print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AE8doqkPpxED" + }, + "source": [ + "Define a `get_top_k` function that packages the above output processing for later." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OozPNO6LvZ00" + }, + "outputs": [], + "source": [ + "#@title\n", + "# Get top_k labels and probabilities\n", + "def get_top_k(probs, k=5, label_map=KINETICS_600_LABELS):\n", + " \"\"\"Outputs the top k model labels and probabilities on the given video.\n", + "\n", + " Args:\n", + " probs: probability tensor of shape (num_frames, num_classes) that represents\n", + " the probability of each class on each frame.\n", + " k: the number of top predictions to select.\n", + " label_map: a list of labels to map logit indices to label strings.\n", + "\n", + " Returns:\n", + " a tuple of the top-k labels and probabilities.\n", + " \"\"\"\n", + " # Sort predictions to find top_k\n", + " top_predictions = tf.argsort(probs, axis=-1, direction='DESCENDING')[:k]\n", + " # collect the labels of top_k predictions\n", + " top_labels = tf.gather(label_map, top_predictions, axis=-1)\n", + " # decode lablels\n", + " top_labels = [label.decode('utf8') for label in top_labels.numpy()]\n", + " # top_k probabilities of the predictions\n", + " top_probs = tf.gather(probs, top_predictions, axis=-1).numpy()\n", + " return tuple(zip(top_labels, top_probs))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kTfKMT29pP_Z" + }, + "source": [ + "Convert the `logits` to probabilities, and look up the top 5 classes for the video. The model confirms that the video is probably of `jumping jacks`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Z-SrNGsGV5Mt" + }, + "outputs": [], + "source": [ + "probs = tf.nn.softmax(logits, axis=-1)\n", + "for label, p in get_top_k(probs):\n", + " print(f'{label:20s}: {p:.3f}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ltdijoQpqjxZ" + }, + "source": [ + "### The streaming model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9dqdUPQXq45b" + }, + "source": [ + "The previous section used a model that runs over a whole video. Often when processing a video you don't want a single prediction at the end, you want to update predictions frame by frame. The `stream` versions of the model allow you to do this.\n", + "\n", + "Load the `stream` version of the model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mxt0hRXFZkAM" + }, + "outputs": [], + "source": [ + "%%time\n", + "id = 'a2'\n", + "mode = 'stream'\n", + "version = '3'\n", + "hub_url = f'https://tfhub.dev/tensorflow/movinet/{id}/{mode}/kinetics-600/classification/{version}'\n", + "model = hub.load(hub_url)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pDswtsGgsYGS" + }, + "source": [ + "Using this model is slightly more complex than the `base` model. You have to keep track of the internal state of the model's RNNs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0fM_Vb1VsbDm" + }, + "outputs": [], + "source": [ + "list(model.signatures.keys())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ojr1_iYCtPvp" + }, + "source": [ + "The `init_states` signature takes the video's **shape** `(batch, frames, height, width, colors)` as input, and returns a large dictionary of tensors containing the initial RNN states: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "67loYFGpo_RP" + }, + "outputs": [], + "source": [ + "lines = model.signatures['init_states'].pretty_printed_signature().splitlines()\n", + "lines = lines[:10]\n", + "lines.append(' ...')\n", + "print('.\\n'.join(lines))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "v5lG3vejn5df" + }, + "outputs": [], + "source": [ + "initial_state = model.init_states(jumpingjack[tf.newaxis, ...].shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "J3DwmyHnuhH_" + }, + "outputs": [], + "source": [ + "type(initial_state)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "K8SyiEU6tB-e" + }, + "outputs": [], + "source": [ + "list(sorted(initial_state.keys()))[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xeMCzJMBvwRF" + }, + "source": [ + "Once you have the initial state for the RNNs, you can pass the state and a video frame as input (keeping the `(batch, frames, height, width, colors)` shape for the video frame). The model returns a `(logits, state)` pair. \n", + "\n", + "After just seeing the first frame, the model is not convinced that the video is of \"jumping jacks\":" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "McSLdIgtsI3d" + }, + "outputs": [], + "source": [ + "inputs = initial_state.copy()\n", + "\n", + "# Add the batch axis, take the first frme, but keep the frame-axis.\n", + "inputs['image'] = jumpingjack[tf.newaxis, 0:1, ...] " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WlH7PqLPX664" + }, + "outputs": [], + "source": [ + "# warmup\n", + "model(inputs);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7uzNXtu7X5sr" + }, + "outputs": [], + "source": [ + "logits, new_state = model(inputs)\n", + "logits = logits[0]\n", + "probs = tf.nn.softmax(logits, axis=-1)\n", + "\n", + "for label, p in get_top_k(probs):\n", + " print(f'{label:20s}: {p:.3f}')\n", + "\n", + "print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oLU644FQwXSb" + }, + "source": [ + "If you run the model in a loop, passing the updated state with each frame, the model quickly converges to the correct result:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Fzm7T4ImmIEg" + }, + "outputs": [], + "source": [ + "%%time\n", + "state = initial_state.copy()\n", + "all_logits = []\n", + "\n", + "for n in range(len(jumpingjack)):\n", + " inputs = state\n", + " inputs['image'] = jumpingjack[tf.newaxis, n:n+1, ...]\n", + " result, state = model(inputs)\n", + " all_logits.append(logits)\n", + "\n", + "probabilities = tf.nn.softmax(all_logits, axis=-1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "B7UtHoSWcOT2" + }, + "outputs": [], + "source": [ + "for label, p in get_top_k(probabilities[-1]):\n", + " print(f'{label:20s}: {p:.3f}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6ffV3NhZcsrv" + }, + "outputs": [], + "source": [ + "id = tf.argmax(probabilities[-1])\n", + "plt.plot(probabilities[:, id])\n", + "plt.xlabel('Frame #')\n", + "plt.ylabel(f\"p('{KINETICS_600_LABELS[id]}')\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d7MZ_AfRW845" + }, + "source": [ + "You may notice that the final probability is much more certain than in the previous section where you ran the `base` model. The `base` model returns an average of the predictions over the frames." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0Wij4tsyW8dR" + }, + "outputs": [], + "source": [ + "for label, p in get_top_k(tf.reduce_mean(probabilities, axis=0)):\n", + " print(f'{label:20s}: {p:.3f}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qLUoC9ejggGo" + }, + "source": [ + "## Animate the predictions over time\n", + "\n", + "The previous section went into some details about how to use these models. This section builds on top of that to produce some nice inference animations. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OnFqOXazoWgy" + }, + "source": [ + "The hidden cell below to defines helper functions used in this section." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "dx55NK3ZoZeh" + }, + "outputs": [], + "source": [ + "#@title\n", + "# Get top_k labels and probabilities predicted using MoViNets streaming model\n", + "def get_top_k_streaming_labels(probs, k=5, label_map=KINETICS_600_LABELS):\n", + " \"\"\"Returns the top-k labels over an entire video sequence.\n", + "\n", + " Args:\n", + " probs: probability tensor of shape (num_frames, num_classes) that represents\n", + " the probability of each class on each frame.\n", + " k: the number of top predictions to select.\n", + " label_map: a list of labels to map logit indices to label strings.\n", + "\n", + " Returns:\n", + " a tuple of the top-k probabilities, labels, and logit indices\n", + " \"\"\"\n", + " top_categories_last = tf.argsort(probs, -1, 'DESCENDING')[-1, :1]\n", + " # Sort predictions to find top_k\n", + " categories = tf.argsort(probs, -1, 'DESCENDING')[:, :k]\n", + " categories = tf.reshape(categories, [-1])\n", + "\n", + " counts = sorted([\n", + " (i.numpy(), tf.reduce_sum(tf.cast(categories == i, tf.int32)).numpy())\n", + " for i in tf.unique(categories)[0]\n", + " ], key=lambda x: x[1], reverse=True)\n", + "\n", + " top_probs_idx = tf.constant([i for i, _ in counts[:k]])\n", + " top_probs_idx = tf.concat([top_categories_last, top_probs_idx], 0)\n", + " # find unique indices of categories\n", + " top_probs_idx = tf.unique(top_probs_idx)[0][:k+1]\n", + " # top_k probabilities of the predictions\n", + " top_probs = tf.gather(probs, top_probs_idx, axis=-1)\n", + " top_probs = tf.transpose(top_probs, perm=(1, 0))\n", + " # collect the labels of top_k predictions\n", + " top_labels = tf.gather(label_map, top_probs_idx, axis=0)\n", + " # decode the top_k labels\n", + " top_labels = [label.decode('utf8') for label in top_labels.numpy()]\n", + "\n", + " return top_probs, top_labels, top_probs_idx\n", + "\n", + "# Plot top_k predictions at a given time step\n", + "def plot_streaming_top_preds_at_step(\n", + " top_probs,\n", + " top_labels,\n", + " step=None,\n", + " image=None,\n", + " legend_loc='lower left',\n", + " duration_seconds=10,\n", + " figure_height=500,\n", + " playhead_scale=0.8,\n", + " grid_alpha=0.3):\n", + " \"\"\"Generates a plot of the top video model predictions at a given time step.\n", + "\n", + " Args:\n", + " top_probs: a tensor of shape (k, num_frames) representing the top-k\n", + " probabilities over all frames.\n", + " top_labels: a list of length k that represents the top-k label strings.\n", + " step: the current time step in the range [0, num_frames].\n", + " image: the image frame to display at the current time step.\n", + " legend_loc: the placement location of the legend.\n", + " duration_seconds: the total duration of the video.\n", + " figure_height: the output figure height.\n", + " playhead_scale: scale value for the playhead.\n", + " grid_alpha: alpha value for the gridlines.\n", + "\n", + " Returns:\n", + " A tuple of the output numpy image, figure, and axes.\n", + " \"\"\"\n", + " # find number of top_k labels and frames in the video\n", + " num_labels, num_frames = top_probs.shape\n", + " if step is None:\n", + " step = num_frames\n", + " # Visualize frames and top_k probabilities of streaming video\n", + " fig = plt.figure(figsize=(6.5, 7), dpi=300)\n", + " gs = mpl.gridspec.GridSpec(8, 1)\n", + " ax2 = plt.subplot(gs[:-3, :])\n", + " ax = plt.subplot(gs[-3:, :])\n", + " # display the frame\n", + " if image is not None:\n", + " ax2.imshow(image, interpolation='nearest')\n", + " ax2.axis('off')\n", + " # x-axis (frame number)\n", + " preview_line_x = tf.linspace(0., duration_seconds, num_frames)\n", + " # y-axis (top_k probabilities)\n", + " preview_line_y = top_probs\n", + "\n", + " line_x = preview_line_x[:step+1]\n", + " line_y = preview_line_y[:, :step+1]\n", + "\n", + " for i in range(num_labels):\n", + " ax.plot(preview_line_x, preview_line_y[i], label=None, linewidth='1.5',\n", + " linestyle=':', color='gray')\n", + " ax.plot(line_x, line_y[i], label=top_labels[i], linewidth='2.0')\n", + "\n", + "\n", + " ax.grid(which='major', linestyle=':', linewidth='1.0', alpha=grid_alpha)\n", + " ax.grid(which='minor', linestyle=':', linewidth='0.5', alpha=grid_alpha)\n", + "\n", + " min_height = tf.reduce_min(top_probs) * playhead_scale\n", + " max_height = tf.reduce_max(top_probs)\n", + " ax.vlines(preview_line_x[step], min_height, max_height, colors='red')\n", + " ax.scatter(preview_line_x[step], max_height, color='red')\n", + "\n", + " ax.legend(loc=legend_loc)\n", + "\n", + " plt.xlim(0, duration_seconds)\n", + " plt.ylabel('Probability')\n", + " plt.xlabel('Time (s)')\n", + " plt.yscale('log')\n", + "\n", + " fig.tight_layout()\n", + " fig.canvas.draw()\n", + "\n", + " data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)\n", + " data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))\n", + " plt.close()\n", + "\n", + " figure_width = int(figure_height * data.shape[1] / data.shape[0])\n", + " image = PIL.Image.fromarray(data).resize([figure_width, figure_height])\n", + " image = np.array(image)\n", + "\n", + " return image\n", + "\n", + "# Plotting top_k predictions from MoViNets streaming model\n", + "def plot_streaming_top_preds(\n", + " probs,\n", + " video,\n", + " top_k=5,\n", + " video_fps=25.,\n", + " figure_height=500,\n", + " use_progbar=True):\n", + " \"\"\"Generates a video plot of the top video model predictions.\n", + "\n", + " Args:\n", + " probs: probability tensor of shape (num_frames, num_classes) that represents\n", + " the probability of each class on each frame.\n", + " video: the video to display in the plot.\n", + " top_k: the number of top predictions to select.\n", + " video_fps: the input video fps.\n", + " figure_fps: the output video fps.\n", + " figure_height: the height of the output video.\n", + " use_progbar: display a progress bar.\n", + "\n", + " Returns:\n", + " A numpy array representing the output video.\n", + " \"\"\"\n", + " # select number of frames per second\n", + " video_fps = 8.\n", + " # select height of the image\n", + " figure_height = 500\n", + " # number of time steps of the given video\n", + " steps = video.shape[0]\n", + " # estimate duration of the video (in seconds)\n", + " duration = steps / video_fps\n", + " # estimate top_k probabilities and corresponding labels\n", + " top_probs, top_labels, _ = get_top_k_streaming_labels(probs, k=top_k)\n", + "\n", + " images = []\n", + " step_generator = tqdm.trange(steps) if use_progbar else range(steps)\n", + " for i in step_generator:\n", + " image = plot_streaming_top_preds_at_step(\n", + " top_probs=top_probs,\n", + " top_labels=top_labels,\n", + " step=i,\n", + " image=video[i],\n", + " duration_seconds=duration,\n", + " figure_height=figure_height,\n", + " )\n", + " images.append(image)\n", + "\n", + " return np.array(images)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eLgFBslcZOQO" + }, + "source": [ + "Start by running the streaming model across the frames of the video, and collecting the logits:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tXWR13wthnK5" + }, + "outputs": [], + "source": [ + "init_states = model.init_states(jumpingjack[tf.newaxis].shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YqSkt7l8ltwt" + }, + "outputs": [], + "source": [ + "# Insert your video clip here\n", + "video = jumpingjack\n", + "images = tf.split(video[tf.newaxis], video.shape[0], axis=1)\n", + "\n", + "all_logits = []\n", + "\n", + "# To run on a video, pass in one frame at a time\n", + "states = init_states\n", + "for image in tqdm.tqdm(images):\n", + " # predictions for each frame\n", + " logits, states = model({**states, 'image': image})\n", + " all_logits.append(logits)\n", + "\n", + "# concatenating all the logits\n", + "logits = tf.concat(all_logits, 0)\n", + "# estimating probabilities\n", + "probs = tf.nn.softmax(logits, axis=-1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OOGcCMMJyuPl" + }, + "outputs": [], + "source": [ + "final_probs = probs[-1]\n", + "print('Top_k predictions and their probablities\\n')\n", + "for label, p in get_top_k(final_probs):\n", + " print(f'{label:20s}: {p:.3f}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GaybT0rbZct-" + }, + "source": [ + "Convert the sequence of probabilities into a video:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Xdox556CtMRb" + }, + "outputs": [], + "source": [ + "# Generate a plot and output to a video tensor\n", + "plot_video = plot_streaming_top_preds(probs, video, video_fps=8.)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NSStKE9klCs3" + }, + "outputs": [], + "source": [ + "# For gif format, set codec='gif'\n", + "media.show_video(plot_video, fps=3)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LCImgZ3OdJw7" + }, + "source": [ + "## Resources\n", + "\n", + "The pretrained models are available from [TF Hub](https://tfhub.dev/google/collections/movinet/1). The TF Hub collection also includes quantized models optimized for [TFLite](https://tensorflow.org/lite).\n", + "\n", + "The source for these models is available in the [TensorFlow Model Garden](https://github.com/tensorflow/models/tree/master/official/projects/movinet). This includes a [longer version of this tutorial](https://colab.sandbox.google.com/github/tensorflow/models/blob/master/official/projects/movinet/movinet_tutorial.ipynb) that also covers building and fine-tuning a MoViNet model. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gh5lLAo-HpVF" + }, + "source": [ + "## Next Steps\n", + "\n", + "To learn more about working with video data in TensorFlow, check out the following tutorials:\n", + "\n", + "* [Load video data](https://www.tensorflow.org/tutorials/load_data/video)\n", + "* [Build a 3D CNN model for video classification](https://www.tensorflow.org/tutorials/video/video_classification)\n", + "* [Transfer learning for video classification with MoViNet](https://www.tensorflow.org/tutorials/video/transfer_learning_with_movinet)" + ] + } + ], + "metadata": { + "colab": { + "name": "movinet.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/object_detection.ipynb b/site/en/hub/tutorials/object_detection.ipynb new file mode 100644 index 00000000000..e1262f3084c --- /dev/null +++ b/site/en/hub/tutorials/object_detection.ipynb @@ -0,0 +1,442 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "N6ZDpd9XzFeN" + }, + "source": [ + "##### Copyright 2018 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "KUu4vOt5zI9d" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CxmDMK4yupqg" + }, + "source": [ + "# Object Detection\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Sy553YSVmYiK" + }, + "source": [ + "This Colab demonstrates use of a TF-Hub module trained to perform object detection." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "v4XGxDrCkeip" + }, + "source": [ + "## Setup\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "6cPY9Ou4sWs_" + }, + "outputs": [], + "source": [ + "#@title Imports and function definitions\n", + "\n", + "# For running inference on the TF-Hub module.\n", + "import tensorflow as tf\n", + "\n", + "import tensorflow_hub as hub\n", + "\n", + "# For downloading the image.\n", + "import matplotlib.pyplot as plt\n", + "import tempfile\n", + "from six.moves.urllib.request import urlopen\n", + "from six import BytesIO\n", + "\n", + "# For drawing onto the image.\n", + "import numpy as np\n", + "from PIL import Image\n", + "from PIL import ImageColor\n", + "from PIL import ImageDraw\n", + "from PIL import ImageFont\n", + "from PIL import ImageOps\n", + "\n", + "# For measuring the inference time.\n", + "import time\n", + "\n", + "# Print Tensorflow version\n", + "print(tf.__version__)\n", + "\n", + "# Check available GPU devices.\n", + "print(\"The following GPU devices are available: %s\" % tf.test.gpu_device_name())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZGkrXGy62409" + }, + "source": [ + "## Example use" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vlA3CftFpRiW" + }, + "source": [ + "### Helper functions for downloading images and for visualization.\n", + "\n", + "Visualization code adapted from [TF object detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/utils/visualization_utils.py) for the simplest required functionality." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "D9IwDpOtpIHW" + }, + "outputs": [], + "source": [ + "def display_image(image):\n", + " fig = plt.figure(figsize=(20, 15))\n", + " plt.grid(False)\n", + " plt.imshow(image)\n", + "\n", + "\n", + "def download_and_resize_image(url, new_width=256, new_height=256,\n", + " display=False):\n", + " _, filename = tempfile.mkstemp(suffix=\".jpg\")\n", + " response = urlopen(url)\n", + " image_data = response.read()\n", + " image_data = BytesIO(image_data)\n", + " pil_image = Image.open(image_data)\n", + " pil_image = ImageOps.fit(pil_image, (new_width, new_height), Image.LANCZOS)\n", + " pil_image_rgb = pil_image.convert(\"RGB\")\n", + " pil_image_rgb.save(filename, format=\"JPEG\", quality=90)\n", + " print(\"Image downloaded to %s.\" % filename)\n", + " if display:\n", + " display_image(pil_image)\n", + " return filename\n", + "\n", + "\n", + "def draw_bounding_box_on_image(image,\n", + " ymin,\n", + " xmin,\n", + " ymax,\n", + " xmax,\n", + " color,\n", + " font,\n", + " thickness=4,\n", + " display_str_list=()):\n", + " \"\"\"Adds a bounding box to an image.\"\"\"\n", + " draw = ImageDraw.Draw(image)\n", + " im_width, im_height = image.size\n", + " (left, right, top, bottom) = (xmin * im_width, xmax * im_width,\n", + " ymin * im_height, ymax * im_height)\n", + " draw.line([(left, top), (left, bottom), (right, bottom), (right, top),\n", + " (left, top)],\n", + " width=thickness,\n", + " fill=color)\n", + "\n", + " # If the total height of the display strings added to the top of the bounding\n", + " # box exceeds the top of the image, stack the strings below the bounding box\n", + " # instead of above.\n", + " display_str_heights = [font.getbbox(ds)[3] for ds in display_str_list]\n", + " # Each display_str has a top and bottom margin of 0.05x.\n", + " total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)\n", + "\n", + " if top > total_display_str_height:\n", + " text_bottom = top\n", + " else:\n", + " text_bottom = top + total_display_str_height\n", + " # Reverse list and print from bottom to top.\n", + " for display_str in display_str_list[::-1]:\n", + " bbox = font.getbbox(display_str)\n", + " text_width, text_height = bbox[2], bbox[3]\n", + " margin = np.ceil(0.05 * text_height)\n", + " draw.rectangle([(left, text_bottom - text_height - 2 * margin),\n", + " (left + text_width, text_bottom)],\n", + " fill=color)\n", + " draw.text((left + margin, text_bottom - text_height - margin),\n", + " display_str,\n", + " fill=\"black\",\n", + " font=font)\n", + " text_bottom -= text_height - 2 * margin\n", + "\n", + "\n", + "def draw_boxes(image, boxes, class_names, scores, max_boxes=10, min_score=0.1):\n", + " \"\"\"Overlay labeled boxes on an image with formatted scores and label names.\"\"\"\n", + " colors = list(ImageColor.colormap.values())\n", + "\n", + " try:\n", + " font = ImageFont.truetype(\"/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Regular.ttf\",\n", + " 25)\n", + " except IOError:\n", + " print(\"Font not found, using default font.\")\n", + " font = ImageFont.load_default()\n", + "\n", + " for i in range(min(boxes.shape[0], max_boxes)):\n", + " if scores[i] >= min_score:\n", + " ymin, xmin, ymax, xmax = tuple(boxes[i])\n", + " display_str = \"{}: {}%\".format(class_names[i].decode(\"ascii\"),\n", + " int(100 * scores[i]))\n", + " color = colors[hash(class_names[i]) % len(colors)]\n", + " image_pil = Image.fromarray(np.uint8(image)).convert(\"RGB\")\n", + " draw_bounding_box_on_image(\n", + " image_pil,\n", + " ymin,\n", + " xmin,\n", + " ymax,\n", + " xmax,\n", + " color,\n", + " font,\n", + " display_str_list=[display_str])\n", + " np.copyto(image, np.array(image_pil))\n", + " return image" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D19UCu9Q2-_8" + }, + "source": [ + "## Apply module\n", + "\n", + "Load a public image from Open Images v4, save locally, and display." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "YLWNhjUY1mhg" + }, + "outputs": [], + "source": [ + "# By Heiko Gorski, Source: https://commons.wikimedia.org/wiki/File:Naxos_Taverna.jpg\n", + "image_url = \"https://upload.wikimedia.org/wikipedia/commons/6/60/Naxos_Taverna.jpg\" #@param\n", + "downloaded_image_path = download_and_resize_image(image_url, 1280, 856, True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "t-VdfLbC1w51" + }, + "source": [ + "Pick an object detection module and apply on the downloaded image. Modules:\n", + "* **FasterRCNN+InceptionResNet V2**: high accuracy,\n", + "* **ssd+mobilenet V2**: small and fast." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uazJ5ASc2_QE" + }, + "outputs": [], + "source": [ + "module_handle = \"https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1\" #@param [\"https://tfhub.dev/google/openimages_v4/ssd/mobilenet_v2/1\", \"https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1\"]\n", + "\n", + "detector = hub.load(module_handle).signatures['default']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "znW8Fq1EC0x7" + }, + "outputs": [], + "source": [ + "def load_img(path):\n", + " img = tf.io.read_file(path)\n", + " img = tf.image.decode_jpeg(img, channels=3)\n", + " return img" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kwGJV96WWBLH" + }, + "outputs": [], + "source": [ + "def run_detector(detector, path):\n", + " img = load_img(path)\n", + "\n", + " converted_img = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...]\n", + " start_time = time.time()\n", + " result = detector(converted_img)\n", + " end_time = time.time()\n", + "\n", + " result = {key:value.numpy() for key,value in result.items()}\n", + "\n", + " print(\"Found %d objects.\" % len(result[\"detection_scores\"]))\n", + " print(\"Inference time: \", end_time-start_time)\n", + "\n", + " image_with_boxes = draw_boxes(\n", + " img.numpy(), result[\"detection_boxes\"],\n", + " result[\"detection_class_entities\"], result[\"detection_scores\"])\n", + "\n", + " display_image(image_with_boxes)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vchaUW1XDodD" + }, + "outputs": [], + "source": [ + "run_detector(detector, downloaded_image_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WUUY3nfRX7VF" + }, + "source": [ + "### More images\n", + "Perform inference on some additional images with time tracking.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rubdr2JXfsa1" + }, + "outputs": [], + "source": [ + "image_urls = [\n", + " # Source: https://commons.wikimedia.org/wiki/File:The_Coleoptera_of_the_British_islands_(Plate_125)_(8592917784).jpg\n", + " \"https://upload.wikimedia.org/wikipedia/commons/1/1b/The_Coleoptera_of_the_British_islands_%28Plate_125%29_%288592917784%29.jpg\",\n", + " # By Américo Toledano, Source: https://commons.wikimedia.org/wiki/File:Biblioteca_Maim%C3%B3nides,_Campus_Universitario_de_Rabanales_007.jpg\n", + " \"https://upload.wikimedia.org/wikipedia/commons/thumb/0/0d/Biblioteca_Maim%C3%B3nides%2C_Campus_Universitario_de_Rabanales_007.jpg/1024px-Biblioteca_Maim%C3%B3nides%2C_Campus_Universitario_de_Rabanales_007.jpg\",\n", + " # Source: https://commons.wikimedia.org/wiki/File:The_smaller_British_birds_(8053836633).jpg\n", + " \"https://upload.wikimedia.org/wikipedia/commons/0/09/The_smaller_British_birds_%288053836633%29.jpg\",\n", + " ]\n", + "\n", + "def detect_img(image_url):\n", + " start_time = time.time()\n", + " image_path = download_and_resize_image(image_url, 640, 480)\n", + " run_detector(detector, image_path)\n", + " end_time = time.time()\n", + " print(\"Inference time:\",end_time-start_time)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "otPnrxMKIrj5" + }, + "outputs": [], + "source": [ + "detect_img(image_urls[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "H5F7DkD5NtOx" + }, + "outputs": [], + "source": [ + "detect_img(image_urls[1])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DZ18R7dWNyoU" + }, + "outputs": [], + "source": [ + "detect_img(image_urls[2])" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "object_detection.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa.ipynb b/site/en/hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa.ipynb new file mode 100644 index 00000000000..0166a7408d5 --- /dev/null +++ b/site/en/hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa.ipynb @@ -0,0 +1,361 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "VFMCdVJIIraw" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "code", + "id": "ZxMYj8OpIrCp" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0fO2R2BBKx3l" + }, + "source": [ + "# Multilingual Universal Sentence Encoder Q&A Retrieval\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zsDm_WgMNlJQ" + }, + "source": [ + "This is a demo for using [Universal Encoder Multilingual Q&A model](https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3) for question-answer retrieval of text, illustrating the use of **question_encoder** and **response_encoder** of the model. We use sentences from [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/) paragraphs as the demo dataset, each sentence and its context (the text surrounding the sentence) is encoded into high dimension embeddings with the **response_encoder**. These embeddings are stored in an index built using the [simpleneighbors](https://pypi.org/project/simpleneighbors/) library for question-answer retrieval.\n", + "\n", + "On retrieval a random question is selected from the [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/) dataset and encoded into high dimension embedding with the **question_encoder** and query the simpleneighbors index returning a list of approximate nearest neighbors in semantic space." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "U0eOW2LTWiLg" + }, + "source": [ + "### More models\n", + "You can find all currently hosted text embedding models [here](https://tfhub.dev/s?module-type=text-embedding) and all models that have been trained on SQuAD as well [here](https://tfhub.dev/s?dataset=squad)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ORy-KvWXGXBo" + }, + "source": [ + "## Setup\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "x00t_uJCEbeb" + }, + "outputs": [], + "source": [ + "%%capture\n", + "#@title Setup Environment\n", + "# Install the latest Tensorflow version.\n", + "!pip install -q \"tensorflow-text==2.11.*\"\n", + "!pip install -q simpleneighbors[annoy]\n", + "!pip install -q nltk\n", + "!pip install -q tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "DmeFAuVsyWxg" + }, + "outputs": [], + "source": [ + "#@title Setup common imports and functions\n", + "import json\n", + "import nltk\n", + "import os\n", + "import pprint\n", + "import random\n", + "import simpleneighbors\n", + "import urllib\n", + "from IPython.display import HTML, display\n", + "from tqdm.notebook import tqdm\n", + "\n", + "import tensorflow.compat.v2 as tf\n", + "import tensorflow_hub as hub\n", + "from tensorflow_text import SentencepieceTokenizer\n", + "\n", + "nltk.download('punkt')\n", + "\n", + "\n", + "def download_squad(url):\n", + " return json.load(urllib.request.urlopen(url))\n", + "\n", + "def extract_sentences_from_squad_json(squad):\n", + " all_sentences = []\n", + " for data in squad['data']:\n", + " for paragraph in data['paragraphs']:\n", + " sentences = nltk.tokenize.sent_tokenize(paragraph['context'])\n", + " all_sentences.extend(zip(sentences, [paragraph['context']] * len(sentences)))\n", + " return list(set(all_sentences)) # remove duplicates\n", + "\n", + "def extract_questions_from_squad_json(squad):\n", + " questions = []\n", + " for data in squad['data']:\n", + " for paragraph in data['paragraphs']:\n", + " for qas in paragraph['qas']:\n", + " if qas['answers']:\n", + " questions.append((qas['question'], qas['answers'][0]['text']))\n", + " return list(set(questions))\n", + "\n", + "def output_with_highlight(text, highlight):\n", + " output = \"
  • \"\n", + " i = text.find(highlight)\n", + " while True:\n", + " if i == -1:\n", + " output += text\n", + " break\n", + " output += text[0:i]\n", + " output += ''+text[i:i+len(highlight)]+''\n", + " text = text[i+len(highlight):]\n", + " i = text.find(highlight)\n", + " return output + \"
  • \\n\"\n", + "\n", + "def display_nearest_neighbors(query_text, answer_text=None):\n", + " query_embedding = model.signatures['question_encoder'](tf.constant([query_text]))['outputs'][0]\n", + " search_results = index.nearest(query_embedding, n=num_results)\n", + "\n", + " if answer_text:\n", + " result_md = '''\n", + "

    Random Question from SQuAD:

    \n", + "

      %s

    \n", + "

    Answer:

    \n", + "

      %s

    \n", + " ''' % (query_text , answer_text)\n", + " else:\n", + " result_md = '''\n", + "

    Question:

    \n", + "

      %s

    \n", + " ''' % query_text\n", + "\n", + " result_md += '''\n", + "

    Retrieved sentences :\n", + "

      \n", + " '''\n", + "\n", + " if answer_text:\n", + " for s in search_results:\n", + " result_md += output_with_highlight(s, answer_text)\n", + " else:\n", + " for s in search_results:\n", + " result_md += '
    1. ' + s + '
    2. \\n'\n", + "\n", + " result_md += \"
    \"\n", + " display(HTML(result_md))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1kbkT8i3FL_C" + }, + "source": [ + "Run the following code block to download and extract the SQuAD dataset into:\n", + "\n", + "* **sentences** is a list of (text, context) tuples - each paragraph from the SQuAD dataset are split into sentences using nltk library and the sentence and paragraph text forms the (text, context) tuple.\n", + "* **questions** is a list of (question, answer) tuples.\n", + "\n", + "Note: You can use this demo to index the SQuAD train dataset or the smaller dev dataset (1.1 or 2.0) by selecting the **squad_url** below.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "iYqV2GAty_Eh" + }, + "outputs": [], + "source": [ + "#@title Download and extract SQuAD data\n", + "squad_url = 'https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json' #@param [\"https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json\", \"https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json\", \"https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json\", \"https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json\"]\n", + "\n", + "squad_json = download_squad(squad_url)\n", + "sentences = extract_sentences_from_squad_json(squad_json)\n", + "questions = extract_questions_from_squad_json(squad_json)\n", + "print(\"%s sentences, %s questions extracted from SQuAD %s\" % (len(sentences), len(questions), squad_url))\n", + "\n", + "print(\"\\nExample sentence and context:\\n\")\n", + "sentence = random.choice(sentences)\n", + "print(\"sentence:\\n\")\n", + "pprint.pprint(sentence[0])\n", + "print(\"\\ncontext:\\n\")\n", + "pprint.pprint(sentence[1])\n", + "print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9x3u-2uSGbDf" + }, + "source": [ + "The following code block setup the tensorflow graph **g** and **session** with the [Universal Encoder Multilingual Q&A model](https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3)'s **question_encoder** and **response_encoder** signatures." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "44I0uCRQRiFO" + }, + "outputs": [], + "source": [ + "#@title Load model from tensorflow hub\n", + "module_url = \"https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3\" #@param [\"https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3\", \"https://tfhub.dev/google/universal-sentence-encoder-qa/3\"]\n", + "model = hub.load(module_url)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SCQpDmTZG0O6" + }, + "source": [ + "The following code block compute the embeddings for all the text, context tuples and store them in a [simpleneighbors](https://pypi.org/project/simpleneighbors/) index using the **response_encoder**.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FwDUryIfSLp2" + }, + "outputs": [], + "source": [ + "#@title Compute embeddings and build simpleneighbors index\n", + "batch_size = 100\n", + "\n", + "encodings = model.signatures['response_encoder'](\n", + " input=tf.constant([sentences[0][0]]),\n", + " context=tf.constant([sentences[0][1]]))\n", + "index = simpleneighbors.SimpleNeighbors(\n", + " len(encodings['outputs'][0]), metric='angular')\n", + "\n", + "print('Computing embeddings for %s sentences' % len(sentences))\n", + "slices = zip(*(iter(sentences),) * batch_size)\n", + "num_batches = int(len(sentences) / batch_size)\n", + "for s in tqdm(slices, total=num_batches):\n", + " response_batch = list([r for r, c in s])\n", + " context_batch = list([c for r, c in s])\n", + " encodings = model.signatures['response_encoder'](\n", + " input=tf.constant(response_batch),\n", + " context=tf.constant(context_batch)\n", + " )\n", + " for batch_index, batch in enumerate(response_batch):\n", + " index.add_one(batch, encodings['outputs'][batch_index])\n", + "\n", + "index.build()\n", + "print('simpleneighbors index for %s sentences built.' % len(sentences))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZkNcjoPzHJpP" + }, + "source": [ + "On retrieval, the question is encoded using the **question_encoder** and the question embedding is used to query the simpleneighbors index." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "J0xTw2w3UViK" + }, + "outputs": [], + "source": [ + "#@title Retrieve nearest neighbors for a random question from SQuAD\n", + "num_results = 25 #@param {type:\"slider\", min:5, max:40, step:1}\n", + "\n", + "query = random.choice(questions)\n", + "display_nearest_neighbors(query[0], query[1])" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "VFMCdVJIIraw" + ], + "name": "retrieval_with_tf_hub_universal_encoder_qa.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/s3gan_generation_with_tf_hub.ipynb b/site/en/hub/tutorials/s3gan_generation_with_tf_hub.ipynb new file mode 100644 index 00000000000..bd73cffebdf --- /dev/null +++ b/site/en/hub/tutorials/s3gan_generation_with_tf_hub.ipynb @@ -0,0 +1,429 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "BhN1AplL0Hpv" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LMgeG2swVVi6" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AqBuuwrIxlGs" + }, + "source": [ + "# Generating Images with Little Data Using S3GAN\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p5AWAusyySDA" + }, + "source": [ + "This notebook is a demo of Generative Adversarial Networks trained on ImageNet with as little as 2.5% labeled data using self- and semi-supervised learning techniques. Both generator and discriminator models are available on [TF Hub](https://tfhub.dev/s?publisher=google&q=compare_gan).\n", + "\n", + "For more information about the models and the training procedure see our [blogpost](https://ai.googleblog.com/2019/03/reducing-need-for-labeled-data-in.html) and the [paper](https://arxiv.org/abs/1903.02271) [1].\n", + "The code for training these models is available on [GitHub](https://github.com/google/compare_gan).\n", + "\n", + "To get started, connect to a runtime and follow these steps:\n", + "\n", + "1. (Optional) Select a model in the second code cell below.\n", + "2. Click **Runtime > Run all** to run each cell in order.\n", + " * Afterwards, the interactive visualizations should update automatically when you modify the settings using the sliders and dropdown menus.\n", + "\n", + "Note: if you run into any issues, you can try restarting the runtime and rerunning all cells from scratch by clicking **Runtime > Restart and run all...**.\n", + "\n", + "[1] Mario Lucic\\*, Michael Tschannen\\*, Marvin Ritter\\*, Xiaohua Zhai, Olivier\n", + " Bachem, Sylvain Gelly, [High-Fidelity Image Generation With Fewer Labels](https://arxiv.org/abs/1903.02271), ICML 2019." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_m5jsOM9kXWP" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NhlMa_tHs0_W" + }, + "outputs": [], + "source": [ + "# @title Imports and utility functions\n", + "import os\n", + "\n", + "import IPython\n", + "from IPython.display import display\n", + "import numpy as np\n", + "import PIL.Image\n", + "import pandas as pd\n", + "import six\n", + "\n", + "import tensorflow.compat.v1 as tf\n", + "tf.disable_v2_behavior()\n", + "\n", + "import tensorflow_hub as hub\n", + "\n", + "def imgrid(imarray, cols=8, pad=1):\n", + " pad = int(pad)\n", + " assert pad >= 0\n", + " cols = int(cols)\n", + " assert cols >= 1\n", + " N, H, W, C = imarray.shape\n", + " rows = int(np.ceil(N / float(cols)))\n", + " batch_pad = rows * cols - N\n", + " assert batch_pad >= 0\n", + " post_pad = [batch_pad, pad, pad, 0]\n", + " pad_arg = [[0, p] for p in post_pad]\n", + " imarray = np.pad(imarray, pad_arg, 'constant')\n", + " H += pad\n", + " W += pad\n", + " grid = (imarray\n", + " .reshape(rows, cols, H, W, C)\n", + " .transpose(0, 2, 1, 3, 4)\n", + " .reshape(rows*H, cols*W, C))\n", + " return grid[:-pad, :-pad]\n", + "\n", + "\n", + "def imshow(a, format='png', jpeg_fallback=True):\n", + " a = np.asarray(a, dtype=np.uint8)\n", + " if six.PY3:\n", + " str_file = six.BytesIO()\n", + " else:\n", + " str_file = six.StringIO()\n", + " PIL.Image.fromarray(a).save(str_file, format)\n", + " png_data = str_file.getvalue()\n", + " try:\n", + " disp = display(IPython.display.Image(png_data))\n", + " except IOError:\n", + " if jpeg_fallback and format != 'jpeg':\n", + " print ('Warning: image was too large to display in format \"{}\"; '\n", + " 'trying jpeg instead.').format(format)\n", + " return imshow(a, format='jpeg')\n", + " else:\n", + " raise\n", + " return disp\n", + "\n", + "\n", + "class Generator(object):\n", + "\n", + " def __init__(self, module_spec):\n", + " self._module_spec = module_spec\n", + " self._sess = None\n", + " self._graph = tf.Graph()\n", + " self._load_model()\n", + "\n", + " @property\n", + " def z_dim(self):\n", + " return self._z.shape[-1].value\n", + "\n", + " @property\n", + " def conditional(self):\n", + " return self._labels is not None\n", + "\n", + " def _load_model(self):\n", + " with self._graph.as_default():\n", + " self._generator = hub.Module(self._module_spec, name=\"gen_module\",\n", + " tags={\"gen\", \"bsNone\"})\n", + " input_info = self._generator.get_input_info_dict()\n", + " inputs = {k: tf.placeholder(v.dtype, v.get_shape().as_list(), k)\n", + " for k, v in self._generator.get_input_info_dict().items()}\n", + " self._samples = self._generator(inputs=inputs, as_dict=True)[\"generated\"]\n", + " print(\"Inputs:\", inputs)\n", + " print(\"Outputs:\", self._samples)\n", + " self._z = inputs[\"z\"]\n", + " self._labels = inputs.get(\"labels\", None)\n", + "\n", + " def _init_session(self):\n", + " if self._sess is None:\n", + " self._sess = tf.Session(graph=self._graph)\n", + " self._sess.run(tf.global_variables_initializer())\n", + "\n", + " def get_noise(self, num_samples, seed=None):\n", + " if np.isscalar(seed):\n", + " np.random.seed(seed)\n", + " return np.random.normal(size=[num_samples, self.z_dim])\n", + " z = np.empty(shape=(len(seed), self.z_dim), dtype=np.float32)\n", + " for i, s in enumerate(seed):\n", + " np.random.seed(s)\n", + " z[i] = np.random.normal(size=[self.z_dim])\n", + " return z\n", + "\n", + " def get_samples(self, z, labels=None):\n", + " with self._graph.as_default():\n", + " self._init_session()\n", + " feed_dict = {self._z: z}\n", + " if self.conditional:\n", + " assert labels is not None\n", + " assert labels.shape[0] == z.shape[0]\n", + " feed_dict[self._labels] = labels\n", + " samples = self._sess.run(self._samples, feed_dict=feed_dict)\n", + " return np.uint8(np.clip(256 * samples, 0, 255))\n", + "\n", + "\n", + "class Discriminator(object):\n", + "\n", + " def __init__(self, module_spec):\n", + " self._module_spec = module_spec\n", + " self._sess = None\n", + " self._graph = tf.Graph()\n", + " self._load_model()\n", + "\n", + " @property\n", + " def conditional(self):\n", + " return \"labels\" in self._inputs\n", + "\n", + " @property\n", + " def image_shape(self):\n", + " return self._inputs[\"images\"].shape.as_list()[1:]\n", + "\n", + " def _load_model(self):\n", + " with self._graph.as_default():\n", + " self._discriminator = hub.Module(self._module_spec, name=\"disc_module\",\n", + " tags={\"disc\", \"bsNone\"})\n", + " input_info = self._discriminator.get_input_info_dict()\n", + " self._inputs = {k: tf.placeholder(v.dtype, v.get_shape().as_list(), k)\n", + " for k, v in input_info.items()}\n", + " self._outputs = self._discriminator(inputs=self._inputs, as_dict=True)\n", + " print(\"Inputs:\", self._inputs)\n", + " print(\"Outputs:\", self._outputs)\n", + "\n", + " def _init_session(self):\n", + " if self._sess is None:\n", + " self._sess = tf.Session(graph=self._graph)\n", + " self._sess.run(tf.global_variables_initializer())\n", + "\n", + " def predict(self, images, labels=None):\n", + " with self._graph.as_default():\n", + " self._init_session()\n", + " feed_dict = {self._inputs[\"images\"]: images}\n", + " if \"labels\" in self._inputs:\n", + " assert labels is not None\n", + " assert labels.shape[0] == images.shape[0]\n", + " feed_dict[self._inputs[\"labels\"]] = labels\n", + " return self._sess.run(self._outputs, feed_dict=feed_dict)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "msTFS1UPkugr" + }, + "source": [ + "## Select a model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-hBEi9IFdoI-" + }, + "outputs": [], + "source": [ + "# @title Select a model { run: \"auto\" }\n", + "\n", + "model_name = \"S3GAN 128x128 20% labels (FID 6.9, IS 98.1)\" # @param [\"S3GAN 256x256 10% labels (FID 8.8, IS 130.7)\", \"S3GAN 128x128 2.5% labels (FID 12.6, IS 48.7)\", \"S3GAN 128x128 5% labels (FID 8.4, IS 74.0)\", \"S3GAN 128x128 10% labels (FID 7.6, IS 90.3)\", \"S3GAN 128x128 20% labels (FID 6.9, IS 98.1)\"]\n", + "models = {\n", + " \"S3GAN 256x256 10% labels\": \"https://tfhub.dev/google/compare_gan/s3gan_10_256x256/1\",\n", + " \"S3GAN 128x128 2.5% labels\": \"https://tfhub.dev/google/compare_gan/s3gan_2_5_128x128/1\",\n", + " \"S3GAN 128x128 5% labels\": \"https://tfhub.dev/google/compare_gan/s3gan_5_128x128/1\",\n", + " \"S3GAN 128x128 10% labels\": \"https://tfhub.dev/google/compare_gan/s3gan_10_128x128/1\",\n", + " \"S3GAN 128x128 20% labels\": \"https://tfhub.dev/google/compare_gan/s3gan_20_128x128/1\",\n", + "}\n", + "\n", + "module_spec = models[model_name.split(\" (\")[0]]\n", + "print(\"Module spec:\", module_spec)\n", + "\n", + "tf.reset_default_graph()\n", + "print(\"Loading model...\")\n", + "sampler = Generator(module_spec)\n", + "print(\"Model loaded.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ePQuAme_kxLj" + }, + "source": [ + "## Sample" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "kGgTXtFYq_FV" + }, + "outputs": [], + "source": [ + "# @title Sampling { run: \"auto\" }\n", + "\n", + "num_rows = 2 # @param {type: \"slider\", min:1, max:16}\n", + "num_cols = 3 # @param {type: \"slider\", min:1, max:16}\n", + "noise_seed = 23 # @param {type:\"slider\", min:0, max:100, step:1}\n", + "label_str = \"980) volcano\" # @param [\"-1) Random\", \"0) tench, Tinca tinca\", \"1) goldfish, Carassius auratus\", \"2) great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias\", \"3) tiger shark, Galeocerdo cuvieri\", \"4) hammerhead, hammerhead shark\", \"5) electric ray, crampfish, numbfish, torpedo\", \"6) stingray\", \"7) cock\", \"8) hen\", \"9) ostrich, Struthio camelus\", \"10) brambling, Fringilla montifringilla\", \"11) goldfinch, Carduelis carduelis\", \"12) house finch, linnet, Carpodacus mexicanus\", \"13) junco, snowbird\", \"14) indigo bunting, indigo finch, indigo bird, Passerina cyanea\", \"15) robin, American robin, Turdus migratorius\", \"16) bulbul\", \"17) jay\", \"18) magpie\", \"19) chickadee\", \"20) water ouzel, dipper\", \"21) kite\", \"22) bald eagle, American eagle, Haliaeetus leucocephalus\", \"23) vulture\", \"24) great grey owl, great gray owl, Strix nebulosa\", \"25) European fire salamander, Salamandra salamandra\", \"980) volcano\"]\n", + "\n", + "num_samples = num_rows * num_cols\n", + "z = sampler.get_noise(num_samples, seed=noise_seed)\n", + "\n", + "label = int(label_str.split(')')[0])\n", + "if label == -1:\n", + " labels = np.random.randint(0, num_classes, size=(num_samples))\n", + "else:\n", + " labels = np.asarray([label] * num_samples)\n", + "\n", + "samples = sampler.get_samples(z, labels)\n", + "imshow(imgrid(samples, cols=num_cols))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "vCffdVZvTtxL" + }, + "outputs": [], + "source": [ + "# @title Interpolation { run: \"auto\" }\n", + "\n", + "num_samples = 1 # @param {type: \"slider\", min: 1, max: 6, step: 1}\n", + "num_interps = 6 # @param {type: \"slider\", min: 2, max: 10, step: 1}\n", + "noise_seed_A = 11 # @param {type: \"slider\", min: 0, max: 100, step: 1}\n", + "noise_seed_B = 0 # @param {type: \"slider\", min: 0, max: 100, step: 1}\n", + "label_str = \"1) goldfish, Carassius auratus\" # @param [\"0) tench, Tinca tinca\", \"1) goldfish, Carassius auratus\", \"2) great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias\", \"3) tiger shark, Galeocerdo cuvieri\", \"4) hammerhead, hammerhead shark\", \"5) electric ray, crampfish, numbfish, torpedo\", \"6) stingray\", \"7) cock\", \"8) hen\", \"9) ostrich, Struthio camelus\", \"10) brambling, Fringilla montifringilla\", \"11) goldfinch, Carduelis carduelis\", \"12) house finch, linnet, Carpodacus mexicanus\", \"13) junco, snowbird\", \"14) indigo bunting, indigo finch, indigo bird, Passerina cyanea\", \"15) robin, American robin, Turdus migratorius\", \"16) bulbul\", \"17) jay\", \"18) magpie\", \"19) chickadee\", \"20) water ouzel, dipper\", \"21) kite\", \"22) bald eagle, American eagle, Haliaeetus leucocephalus\", \"23) vulture\", \"24) great grey owl, great gray owl, Strix nebulosa\", \"25) European fire salamander, Salamandra salamandra\"]\n", + "\n", + "\n", + "def interpolate(A, B, num_interps):\n", + " alphas = np.linspace(0, 1, num_interps)\n", + " if A.shape != B.shape:\n", + " raise ValueError('A and B must have the same shape to interpolate.')\n", + " return np.array([((1-a)*A + a*B)/np.sqrt(a**2 + (1-a)**2) for a in alphas])\n", + "\n", + "\n", + "def interpolate_and_shape(A, B, num_interps):\n", + " interps = interpolate(A, B, num_interps)\n", + " return (interps.transpose(1, 0, *range(2, len(interps.shape)))\n", + " .reshape(num_samples * num_interps, -1))\n", + "\n", + "label = int(label_str.split(')')[0])\n", + "labels = np.asarray([label] * num_samples * num_interps)\n", + "\n", + "\n", + "z_A = sampler.get_noise(num_samples, seed=noise_seed_A)\n", + "z_B = sampler.get_noise(num_samples, seed=noise_seed_B)\n", + "z = interpolate_and_shape(z_A, z_B, num_interps)\n", + "\n", + "samples = sampler.get_samples(z, labels)\n", + "imshow(imgrid(samples, cols=num_interps))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "esW0Up95Ob6U" + }, + "source": [ + "## Discriminator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ButxPSq0OzgL" + }, + "outputs": [], + "source": [ + "disc = Discriminator(module_spec)\n", + "\n", + "batch_size = 4\n", + "num_classes = 1000\n", + "images = np.random.random(size=[batch_size] + disc.image_shape)\n", + "labels = np.random.randint(0, num_classes, size=(batch_size))\n", + "\n", + "disc.predict(images, labels=labels)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "BhN1AplL0Hpv" + ], + "name": "s3gan_generation_with_tf_hub.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/semantic_approximate_nearest_neighbors.ipynb b/site/en/hub/tutorials/semantic_approximate_nearest_neighbors.ipynb new file mode 100644 index 00000000000..55bcebcc447 --- /dev/null +++ b/site/en/hub/tutorials/semantic_approximate_nearest_neighbors.ipynb @@ -0,0 +1,882 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ACbjNjyO4f_8" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MCM50vaM4jiK" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9qOVy-_vmuUP" + }, + "source": [ + "# Semantic Search with Approximate Nearest Neighbors and Text Embeddings\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7Hks9F5qq6m2" + }, + "source": [ + "This tutorial illustrates how to generate embeddings from a [TensorFlow Hub](https://tfhub.dev) (TF-Hub) module given input data, and build an approximate nearest neighbours (ANN) index using the extracted embeddings. The index can then be used for real-time similarity matching and retrieval. \n", + "\n", + "When dealing with a large corpus of data, it's not efficient to perform exact matching by scanning the whole repository to find the most similar items to a given query in real-time. Thus, we use an approximate similarity matching algorithm which allows us to trade off a little bit of accuracy in finding exact nearest neighbor matches for a significant boost in speed. \n", + "\n", + "In this tutorial, we show an example of real-time text search over a corpus of news headlines to find the headlines that are most similar to a query. Unlike keyword search, this captures the semantic similarity encoded in the text embedding.\n", + "\n", + "The steps of this tutorial are:\n", + "1. Download sample data.\n", + "2. Generate embeddings for the data using a TF-Hub module\n", + "3. Build an ANN index for the embeddings\n", + "4. Use the index for similarity matching\n", + "\n", + "We use [Apache Beam](https://beam.apache.org/documentation/programming-guide/) with [TensorFlow Transform](https://www.tensorflow.org/tfx/tutorials/transform/simple) (TF-Transform) to generate the embeddings from the TF-Hub module. We also use Spotify's [ANNOY](https://github.com/spotify/annoy) library to build the approximate nearest neighbours index. You can find benchmarking of ANN framework in this [Github repository](https://github.com/erikbern/ann-benchmarks).\n", + "\n", + "This tutorial uses TensorFlow 1.0 and works only with TF1 [Hub modules](https://www.tensorflow.org/hub/tf1_hub_module) from TF-Hub. See the updated [TF2 version of this tutorial](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_semantic_approximate_nearest_neighbors.ipynb)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q0jr0QK9qO5P" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "whMRj9qeqed4" + }, + "source": [ + "Install the required libraries." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qmXkLPoaqS--" + }, + "outputs": [], + "source": [ + "!pip install -q apache_beam\n", + "!pip install -q 'scikit_learn~=0.23.0' # For gaussian_random_matrix.\n", + "!pip install -q annoy" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A-vBZiCCqld0" + }, + "source": [ + "Import the required libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6NTYbdWcseuK" + }, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "import pathlib\n", + "import pickle\n", + "from collections import namedtuple\n", + "from datetime import datetime\n", + "\n", + "import numpy as np\n", + "import apache_beam as beam\n", + "import annoy\n", + "from sklearn.random_projection import gaussian_random_matrix\n", + "\n", + "import tensorflow.compat.v1 as tf\n", + "import tensorflow_hub as hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_GF0GnLqGdPQ" + }, + "outputs": [], + "source": [ + "# TFT needs to be installed afterwards\n", + "!pip install -q tensorflow_transform==0.24\n", + "import tensorflow_transform as tft\n", + "import tensorflow_transform.beam as tft_beam" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tx0SZa6-7b-f" + }, + "outputs": [], + "source": [ + "print('TF version: {}'.format(tf.__version__))\n", + "print('TF-Hub version: {}'.format(hub.__version__))\n", + "print('TF-Transform version: {}'.format(tft.__version__))\n", + "print('Apache Beam version: {}'.format(beam.__version__))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P6Imq876rLWx" + }, + "source": [ + "## 1. Download Sample Data\n", + "\n", + "[A Million News Headlines](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/SYBGZL#) dataset contains news headlines published over a period of 15 years sourced from the reputable Australian Broadcasting Corp. (ABC). This news dataset has a summarised historical record of noteworthy events in the globe from early-2003 to end-2017 with a more granular focus on Australia. \n", + "\n", + "**Format**: Tab-separated two-column data: 1) publication date and 2) headline text. We are only interested in the headline text.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OpF57n8e5C9D" + }, + "outputs": [], + "source": [ + "!wget 'https://dataverse.harvard.edu/api/access/datafile/3450625?format=tab&gbrecs=true' -O raw.tsv\n", + "!wc -l raw.tsv\n", + "!head raw.tsv" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Reeoc9z0zTxJ" + }, + "source": [ + "For simplicity, we only keep the headline text and remove the publication date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "INPWa4upv_yJ" + }, + "outputs": [], + "source": [ + "!rm -r corpus\n", + "!mkdir corpus\n", + "\n", + "with open('corpus/text.txt', 'w') as out_file:\n", + " with open('raw.tsv', 'r') as in_file:\n", + " for line in in_file:\n", + " headline = line.split('\\t')[1].strip().strip('\"')\n", + " out_file.write(headline+\"\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5-oedX40z6o2" + }, + "outputs": [], + "source": [ + "!tail corpus/text.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ls0Zh7kYz3PM" + }, + "source": [ + "## Helper function to load a TF-Hub module" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vSt_jmyKz3Xp" + }, + "outputs": [], + "source": [ + "def load_module(module_url):\n", + " embed_module = hub.Module(module_url)\n", + " placeholder = tf.placeholder(dtype=tf.string)\n", + " embed = embed_module(placeholder)\n", + " session = tf.Session()\n", + " session.run([tf.global_variables_initializer(), tf.tables_initializer()])\n", + " print('TF-Hub module is loaded.')\n", + "\n", + " def _embeddings_fn(sentences):\n", + " computed_embeddings = session.run(\n", + " embed, feed_dict={placeholder: sentences})\n", + " return computed_embeddings\n", + "\n", + " return _embeddings_fn" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2AngMtH50jNb" + }, + "source": [ + "## 2. Generate Embeddings for the Data.\n", + "\n", + "In this tutorial, we use the [Universal Sentence Encoder](https://tfhub.dev/google/universal-sentence-encoder/2) to generate embeddings for the headline data. The sentence embeddings can then be easily used to compute sentence level meaning similarity. We run the embedding generation process using Apache Beam and TF-Transform." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F_DvXnDB1pEX" + }, + "source": [ + "### Embedding extraction method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yL7OEY1E0A35" + }, + "outputs": [], + "source": [ + "encoder = None\n", + "\n", + "def embed_text(text, module_url, random_projection_matrix):\n", + " # Beam will run this function in different processes that need to\n", + " # import hub and load embed_fn (if not previously loaded)\n", + " global encoder\n", + " if not encoder:\n", + " encoder = hub.Module(module_url)\n", + " embedding = encoder(text)\n", + " if random_projection_matrix is not None:\n", + " # Perform random projection for the embedding\n", + " embedding = tf.matmul(\n", + " embedding, tf.cast(random_projection_matrix, embedding.dtype))\n", + " return embedding\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_don5gXy9D59" + }, + "source": [ + "### Make TFT preprocess_fn method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fwYlrzzK9ECE" + }, + "outputs": [], + "source": [ + "def make_preprocess_fn(module_url, random_projection_matrix=None):\n", + " '''Makes a tft preprocess_fn'''\n", + "\n", + " def _preprocess_fn(input_features):\n", + " '''tft preprocess_fn'''\n", + " text = input_features['text']\n", + " # Generate the embedding for the input text\n", + " embedding = embed_text(text, module_url, random_projection_matrix)\n", + " \n", + " output_features = {\n", + " 'text': text, \n", + " 'embedding': embedding\n", + " }\n", + " \n", + " return output_features\n", + " \n", + " return _preprocess_fn" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SQ492LN7A-NZ" + }, + "source": [ + "### Create dataset metadata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "d2D4332VA-2V" + }, + "outputs": [], + "source": [ + "def create_metadata():\n", + " '''Creates metadata for the raw data'''\n", + " from tensorflow_transform.tf_metadata import dataset_metadata\n", + " from tensorflow_transform.tf_metadata import schema_utils\n", + " feature_spec = {'text': tf.FixedLenFeature([], dtype=tf.string)}\n", + " schema = schema_utils.schema_from_feature_spec(feature_spec)\n", + " metadata = dataset_metadata.DatasetMetadata(schema)\n", + " return metadata" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5zlSLPzRBm6H" + }, + "source": [ + "### Beam pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jCGUIB172m2G" + }, + "outputs": [], + "source": [ + "def run_hub2emb(args):\n", + " '''Runs the embedding generation pipeline'''\n", + "\n", + " options = beam.options.pipeline_options.PipelineOptions(**args)\n", + " args = namedtuple(\"options\", args.keys())(*args.values())\n", + "\n", + " raw_metadata = create_metadata()\n", + " converter = tft.coders.CsvCoder(\n", + " column_names=['text'], schema=raw_metadata.schema)\n", + "\n", + " with beam.Pipeline(args.runner, options=options) as pipeline:\n", + " with tft_beam.Context(args.temporary_dir):\n", + " # Read the sentences from the input file\n", + " sentences = ( \n", + " pipeline\n", + " | 'Read sentences from files' >> beam.io.ReadFromText(\n", + " file_pattern=args.data_dir)\n", + " | 'Convert to dictionary' >> beam.Map(converter.decode)\n", + " )\n", + "\n", + " sentences_dataset = (sentences, raw_metadata)\n", + " preprocess_fn = make_preprocess_fn(args.module_url, args.random_projection_matrix)\n", + " # Generate the embeddings for the sentence using the TF-Hub module\n", + " embeddings_dataset, _ = (\n", + " sentences_dataset\n", + " | 'Extract embeddings' >> tft_beam.AnalyzeAndTransformDataset(preprocess_fn)\n", + " )\n", + "\n", + " embeddings, transformed_metadata = embeddings_dataset\n", + " # Write the embeddings to TFRecords files\n", + " embeddings | 'Write embeddings to TFRecords' >> beam.io.tfrecordio.WriteToTFRecord(\n", + " file_path_prefix='{}/emb'.format(args.output_dir),\n", + " file_name_suffix='.tfrecords',\n", + " coder=tft.coders.ExampleProtoCoder(transformed_metadata.schema))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uHbq4t2gCDAG" + }, + "source": [ + "### Generaring Random Projection Weight Matrix\n", + "\n", + "[Random projection](https://en.wikipedia.org/wiki/Random_projection) is a simple, yet powerfull technique used to reduce the dimensionality of a set of points which lie in Euclidean space. For a theoretical background, see the [Johnson-Lindenstrauss lemma](https://en.wikipedia.org/wiki/Johnson%E2%80%93Lindenstrauss_lemma).\n", + "\n", + "Reducing the dimensionality of the embeddings with random projection means less time needed to build and query the ANN index.\n", + "\n", + "In this tutorial we use [Gaussian Random Projection](https://en.wikipedia.org/wiki/Random_projection#Gaussian_random_projection) from the [Scikit-learn](https://scikit-learn.org/stable/modules/random_projection.html#gaussian-random-projection) library." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "T1aYPeOUCDIP" + }, + "outputs": [], + "source": [ + "def generate_random_projection_weights(original_dim, projected_dim):\n", + " random_projection_matrix = None\n", + " if projected_dim and original_dim > projected_dim:\n", + " random_projection_matrix = gaussian_random_matrix(\n", + " n_components=projected_dim, n_features=original_dim).T\n", + " print(\"A Gaussian random weight matrix was creates with shape of {}\".format(random_projection_matrix.shape))\n", + " print('Storing random projection matrix to disk...')\n", + " with open('random_projection_matrix', 'wb') as handle:\n", + " pickle.dump(random_projection_matrix, \n", + " handle, protocol=pickle.HIGHEST_PROTOCOL)\n", + " \n", + " return random_projection_matrix" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CHxZX2Z3Nk64" + }, + "source": [ + "### Set parameters\n", + "If you want to build an index using the original embedding space without random projection, set the `projected_dim` parameter to `None`. Note that this will slow down the indexing step for high-dimensional embeddings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "feMVXFL0NlIM" + }, + "outputs": [], + "source": [ + "module_url = 'https://tfhub.dev/google/universal-sentence-encoder/2' #@param {type:\"string\"}\n", + "projected_dim = 64 #@param {type:\"number\"}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "On-MbzD922kb" + }, + "source": [ + "### Run pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Y3I1Wv4i21yY" + }, + "outputs": [], + "source": [ + "import tempfile\n", + "\n", + "output_dir = pathlib.Path(tempfile.mkdtemp())\n", + "temporary_dir = pathlib.Path(tempfile.mkdtemp())\n", + "\n", + "g = tf.Graph()\n", + "with g.as_default():\n", + " original_dim = load_module(module_url)(['']).shape[1]\n", + " random_projection_matrix = None\n", + "\n", + " if projected_dim:\n", + " random_projection_matrix = generate_random_projection_weights(\n", + " original_dim, projected_dim)\n", + "\n", + "args = {\n", + " 'job_name': 'hub2emb-{}'.format(datetime.utcnow().strftime('%y%m%d-%H%M%S')),\n", + " 'runner': 'DirectRunner',\n", + " 'batch_size': 1024,\n", + " 'data_dir': 'corpus/*.txt',\n", + " 'output_dir': output_dir,\n", + " 'temporary_dir': temporary_dir,\n", + " 'module_url': module_url,\n", + " 'random_projection_matrix': random_projection_matrix,\n", + "}\n", + "\n", + "print(\"Pipeline args are set.\")\n", + "args" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iS9obmeP4ZOA" + }, + "outputs": [], + "source": [ + "!rm -r {output_dir}\n", + "!rm -r {temporary_dir}\n", + "\n", + "print(\"Running pipeline...\")\n", + "%time run_hub2emb(args)\n", + "print(\"Pipeline is done.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JAwOo7gQWvVd" + }, + "outputs": [], + "source": [ + "!ls {output_dir}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HVnee4e6U90u" + }, + "source": [ + "Read some of the generated embeddings..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-K7pGXlXOj1N" + }, + "outputs": [], + "source": [ + "import itertools\n", + "\n", + "embed_file = os.path.join(output_dir, 'emb-00000-of-00001.tfrecords')\n", + "sample = 5\n", + "record_iterator = tf.io.tf_record_iterator(path=embed_file)\n", + "for string_record in itertools.islice(record_iterator, sample):\n", + " example = tf.train.Example()\n", + " example.ParseFromString(string_record)\n", + " text = example.features.feature['text'].bytes_list.value\n", + " embedding = np.array(example.features.feature['embedding'].float_list.value)\n", + " print(\"Embedding dimensions: {}\".format(embedding.shape[0]))\n", + " print(\"{}: {}\".format(text, embedding[:10]))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "agGoaMSgY8wN" + }, + "source": [ + "## 3. Build the ANN Index for the Embeddings\n", + "\n", + "[ANNOY](https://github.com/spotify/annoy) (Approximate Nearest Neighbors Oh Yeah) is a C++ library with Python bindings to search for points in space that are close to a given query point. It also creates large read-only file-based data structures that are mmapped into memory. It is built and used by [Spotify](https://www.spotify.com) for music recommendations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UcPDspU3WjgH" + }, + "outputs": [], + "source": [ + "def build_index(embedding_files_pattern, index_filename, vector_length, \n", + " metric='angular', num_trees=100):\n", + " '''Builds an ANNOY index'''\n", + "\n", + " annoy_index = annoy.AnnoyIndex(vector_length, metric=metric)\n", + " # Mapping between the item and its identifier in the index\n", + " mapping = {}\n", + "\n", + " embed_files = tf.gfile.Glob(embedding_files_pattern)\n", + " print('Found {} embedding file(s).'.format(len(embed_files)))\n", + "\n", + " item_counter = 0\n", + " for f, embed_file in enumerate(embed_files):\n", + " print('Loading embeddings in file {} of {}...'.format(\n", + " f+1, len(embed_files)))\n", + " record_iterator = tf.io.tf_record_iterator(\n", + " path=embed_file)\n", + "\n", + " for string_record in record_iterator:\n", + " example = tf.train.Example()\n", + " example.ParseFromString(string_record)\n", + " text = example.features.feature['text'].bytes_list.value[0].decode(\"utf-8\")\n", + " mapping[item_counter] = text\n", + " embedding = np.array(\n", + " example.features.feature['embedding'].float_list.value)\n", + " annoy_index.add_item(item_counter, embedding)\n", + " item_counter += 1\n", + " if item_counter % 100000 == 0:\n", + " print('{} items loaded to the index'.format(item_counter))\n", + "\n", + " print('A total of {} items added to the index'.format(item_counter))\n", + "\n", + " print('Building the index with {} trees...'.format(num_trees))\n", + " annoy_index.build(n_trees=num_trees)\n", + " print('Index is successfully built.')\n", + " \n", + " print('Saving index to disk...')\n", + " annoy_index.save(index_filename)\n", + " print('Index is saved to disk.')\n", + " print(\"Index file size: {} GB\".format(\n", + " round(os.path.getsize(index_filename) / float(1024 ** 3), 2)))\n", + " annoy_index.unload()\n", + "\n", + " print('Saving mapping to disk...')\n", + " with open(index_filename + '.mapping', 'wb') as handle:\n", + " pickle.dump(mapping, handle, protocol=pickle.HIGHEST_PROTOCOL)\n", + " print('Mapping is saved to disk.')\n", + " print(\"Mapping file size: {} MB\".format(\n", + " round(os.path.getsize(index_filename + '.mapping') / float(1024 ** 2), 2)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AgyOQhUq6FNE" + }, + "outputs": [], + "source": [ + "embedding_files = \"{}/emb-*.tfrecords\".format(output_dir)\n", + "embedding_dimension = projected_dim\n", + "index_filename = \"index\"\n", + "\n", + "!rm {index_filename}\n", + "!rm {index_filename}.mapping\n", + "\n", + "%time build_index(embedding_files, index_filename, embedding_dimension)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ic31Tm5cgAd5" + }, + "outputs": [], + "source": [ + "!ls" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "maGxDl8ufP-p" + }, + "source": [ + "## 4. Use the Index for Similarity Matching\n", + "Now we can use the ANN index to find news headlines that are semantically close to an input query." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_dIs8W78fYPp" + }, + "source": [ + "### Load the index and the mapping files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jlTTrbQHayvb" + }, + "outputs": [], + "source": [ + "index = annoy.AnnoyIndex(embedding_dimension)\n", + "index.load(index_filename, prefault=True)\n", + "print('Annoy index is loaded.')\n", + "with open(index_filename + '.mapping', 'rb') as handle:\n", + " mapping = pickle.load(handle)\n", + "print('Mapping file is loaded.')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y6liFMSUh08J" + }, + "source": [ + "### Similarity matching method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mUxjTag8hc16" + }, + "outputs": [], + "source": [ + "def find_similar_items(embedding, num_matches=5):\n", + " '''Finds similar items to a given embedding in the ANN index'''\n", + " ids = index.get_nns_by_vector(\n", + " embedding, num_matches, search_k=-1, include_distances=False)\n", + " items = [mapping[i] for i in ids]\n", + " return items" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hjerNpmZja0A" + }, + "source": [ + "### Extract embedding from a given query" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "a0IIXzfBjZ19" + }, + "outputs": [], + "source": [ + "# Load the TF-Hub module\n", + "print(\"Loading the TF-Hub module...\")\n", + "g = tf.Graph()\n", + "with g.as_default():\n", + " embed_fn = load_module(module_url)\n", + "print(\"TF-Hub module is loaded.\")\n", + "\n", + "random_projection_matrix = None\n", + "if os.path.exists('random_projection_matrix'):\n", + " print(\"Loading random projection matrix...\")\n", + " with open('random_projection_matrix', 'rb') as handle:\n", + " random_projection_matrix = pickle.load(handle)\n", + " print('random projection matrix is loaded.')\n", + "\n", + "def extract_embeddings(query):\n", + " '''Generates the embedding for the query'''\n", + " query_embedding = embed_fn([query])[0]\n", + " if random_projection_matrix is not None:\n", + " query_embedding = query_embedding.dot(random_projection_matrix)\n", + " return query_embedding" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kCoCNROujEIO" + }, + "outputs": [], + "source": [ + "extract_embeddings(\"Hello Machine Learning!\")[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nE_Q60nCk_ZB" + }, + "source": [ + "### Enter a query to find the most similar items" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "wC0uLjvfk5nB" + }, + "outputs": [], + "source": [ + "#@title { run: \"auto\" }\n", + "query = \"confronting global challenges\" #@param {type:\"string\"}\n", + "print(\"Generating embedding for the query...\")\n", + "%time query_embedding = extract_embeddings(query)\n", + "\n", + "print(\"\")\n", + "print(\"Finding relevant items in the index...\")\n", + "%time items = find_similar_items(query_embedding, 10)\n", + "\n", + "print(\"\")\n", + "print(\"Results:\")\n", + "print(\"=========\")\n", + "for item in items:\n", + " print(item)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wwtMtyOeDKwt" + }, + "source": [ + "## Want to learn more?\n", + "\n", + "You can learn more about TensorFlow at [tensorflow.org](https://www.tensorflow.org/) and see the TF-Hub API documentation at [tensorflow.org/hub](https://www.tensorflow.org/hub/). Find available TensorFlow Hub modules at [tfhub.dev](https://tfhub.dev/) including more text embedding modules and image feature vector modules.\n", + "\n", + "Also check out the [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/) which is Google's fast-paced, practical introduction to machine learning." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "ls0Zh7kYz3PM", + "_don5gXy9D59", + "SQ492LN7A-NZ" + ], + "name": "semantic_approximate_nearest_neighbors.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder.ipynb b/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder.ipynb new file mode 100644 index 00000000000..0c2874bc030 --- /dev/null +++ b/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder.ipynb @@ -0,0 +1,363 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "RUymE2l9GZfO" + }, + "source": [ + "##### Copyright 2018 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "code", + "id": "JMyTNwSJGGWg" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "co7MV6sX7Xto" + }, + "source": [ + "# Universal Sentence Encoder\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eAVQGidpL8v5" + }, + "source": [ + "This notebook illustrates how to access the Universal Sentence Encoder and use it for sentence similarity and sentence classification tasks.\n", + "\n", + "The Universal Sentence Encoder makes getting sentence level embeddings as easy as it has historically been to lookup the embeddings for individual words. The sentence embeddings can then be trivially used to compute sentence level meaning similarity as well as to enable better performance on downstream classification tasks using less supervised training data.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pOTzp8O36CyQ" + }, + "source": [ + "## Setup\n", + "\n", + "This section sets up the environment for access to the Universal Sentence Encoder on TF Hub and provides examples of applying the encoder to words, sentences, and paragraphs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lVjNK8shFKOC" + }, + "outputs": [], + "source": [ + "%%capture\n", + "!pip3 install seaborn" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "63Pd3nJnTl-i" + }, + "source": [ + "More detailed information about installing Tensorflow can be found at [https://www.tensorflow.org/install/](https://www.tensorflow.org/install/)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "zwty8Z6mAkdV" + }, + "outputs": [], + "source": [ + "#@title Load the Universal Sentence Encoder's TF Hub module\n", + "from absl import logging\n", + "\n", + "import tensorflow as tf\n", + "\n", + "import tensorflow_hub as hub\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import os\n", + "import pandas as pd\n", + "import re\n", + "import seaborn as sns\n", + "\n", + "module_url = \"https://tfhub.dev/google/universal-sentence-encoder/4\" #@param [\"https://tfhub.dev/google/universal-sentence-encoder/4\", \"https://tfhub.dev/google/universal-sentence-encoder-large/5\"]\n", + "model = hub.load(module_url)\n", + "print (\"module %s loaded\" % module_url)\n", + "def embed(input):\n", + " return model(input)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Q8F4LNGFqOiq" + }, + "outputs": [], + "source": [ + "#@title Compute a representation for each message, showing various lengths supported.\n", + "word = \"Elephant\"\n", + "sentence = \"I am a sentence for which I would like to get its embedding.\"\n", + "paragraph = (\n", + " \"Universal Sentence Encoder embeddings also support short paragraphs. \"\n", + " \"There is no hard limit on how long the paragraph is. Roughly, the longer \"\n", + " \"the more 'diluted' the embedding will be.\")\n", + "messages = [word, sentence, paragraph]\n", + "\n", + "# Reduce logging output.\n", + "logging.set_verbosity(logging.ERROR)\n", + "\n", + "message_embeddings = embed(messages)\n", + "\n", + "for i, message_embedding in enumerate(np.array(message_embeddings).tolist()):\n", + " print(\"Message: {}\".format(messages[i]))\n", + " print(\"Embedding size: {}\".format(len(message_embedding)))\n", + " message_embedding_snippet = \", \".join(\n", + " (str(x) for x in message_embedding[:3]))\n", + " print(\"Embedding: [{}, ...]\\n\".format(message_embedding_snippet))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BnvjATdy64eR" + }, + "source": [ + "# Semantic Textual Similarity Task Example\n", + "\n", + "The embeddings produced by the Universal Sentence Encoder are approximately normalized. The semantic similarity of two sentences can be trivially computed as the inner product of the encodings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "h1FFCTKm7ba4" + }, + "outputs": [], + "source": [ + "def plot_similarity(labels, features, rotation):\n", + " corr = np.inner(features, features)\n", + " sns.set(font_scale=1.2)\n", + " g = sns.heatmap(\n", + " corr,\n", + " xticklabels=labels,\n", + " yticklabels=labels,\n", + " vmin=0,\n", + " vmax=1,\n", + " cmap=\"YlOrRd\")\n", + " g.set_xticklabels(labels, rotation=rotation)\n", + " g.set_title(\"Semantic Textual Similarity\")\n", + "\n", + "def run_and_plot(messages_):\n", + " message_embeddings_ = embed(messages_)\n", + " plot_similarity(messages_, message_embeddings_, 90)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "339tuJ5Pwqqv" + }, + "source": [ + "## Similarity Visualized\n", + "Here we show the similarity in a heat map. The final graph is a 9x9 matrix where each entry `[i, j]` is colored based on the inner product of the encodings for sentence `i` and `j`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cPMCaxrZwp7t" + }, + "outputs": [], + "source": [ + "messages = [\n", + " # Smartphones\n", + " \"I like my phone\",\n", + " \"My phone is not good.\",\n", + " \"Your cellphone looks great.\",\n", + "\n", + " # Weather\n", + " \"Will it snow tomorrow?\",\n", + " \"Recently a lot of hurricanes have hit the US\",\n", + " \"Global warming is real\",\n", + "\n", + " # Food and health\n", + " \"An apple a day, keeps the doctors away\",\n", + " \"Eating strawberries is healthy\",\n", + " \"Is paleo better than keto?\",\n", + "\n", + " # Asking about age\n", + " \"How old are you?\",\n", + " \"what is your age?\",\n", + "]\n", + "\n", + "run_and_plot(messages)\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6FjdeCqPJeg-" + }, + "source": [ + "## Evaluation: STS (Semantic Textual Similarity) Benchmark\n", + "\n", + "The [**STS Benchmark**](https://ixa2.si.ehu.eus/stswiki/stswiki.html#STS_benchmark) provides an intrinsic evaluation of the degree to which similarity scores computed using sentence embeddings align with human judgements. The benchmark requires systems to return similarity scores for a diverse selection of sentence pairs. [Pearson correlation](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient) is then used to evaluate the quality of the machine similarity scores against human judgements." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "q5nuBbI1iFQR" + }, + "source": [ + "### Download data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VOs8ZfOnJeBF" + }, + "outputs": [], + "source": [ + "import pandas\n", + "import scipy\n", + "import math\n", + "import csv\n", + "\n", + "sts_dataset = tf.keras.utils.get_file(\n", + " fname=\"Stsbenchmark.tar.gz\",\n", + " origin=\"http://ixa2.si.ehu.es/stswiki/images/4/48/Stsbenchmark.tar.gz\",\n", + " extract=True)\n", + "sts_dev = pandas.read_table(\n", + " os.path.join(os.path.dirname(sts_dataset), \"stsbenchmark\", \"sts-dev.csv\"),\n", + " skip_blank_lines=True,\n", + " usecols=[4, 5, 6],\n", + " names=[\"sim\", \"sent_1\", \"sent_2\"])\n", + "sts_test = pandas.read_table(\n", + " os.path.join(\n", + " os.path.dirname(sts_dataset), \"stsbenchmark\", \"sts-test.csv\"),\n", + " quoting=csv.QUOTE_NONE,\n", + " skip_blank_lines=True,\n", + " usecols=[4, 5, 6],\n", + " names=[\"sim\", \"sent_1\", \"sent_2\"])\n", + "# cleanup some NaN values in sts_dev\n", + "sts_dev = sts_dev[[isinstance(s, str) for s in sts_dev['sent_2']]]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8OKy8WhnKRe_" + }, + "source": [ + "### Evaluate Sentence Embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "W-q2r7jyZGb7" + }, + "outputs": [], + "source": [ + "sts_data = sts_dev #@param [\"sts_dev\", \"sts_test\"] {type:\"raw\"}\n", + "\n", + "def run_sts_benchmark(batch):\n", + " sts_encode1 = tf.nn.l2_normalize(embed(tf.constant(batch['sent_1'].tolist())), axis=1)\n", + " sts_encode2 = tf.nn.l2_normalize(embed(tf.constant(batch['sent_2'].tolist())), axis=1)\n", + " cosine_similarities = tf.reduce_sum(tf.multiply(sts_encode1, sts_encode2), axis=1)\n", + " clip_cosine_similarities = tf.clip_by_value(cosine_similarities, -1.0, 1.0)\n", + " scores = 1.0 - tf.acos(clip_cosine_similarities) / math.pi\n", + " \"\"\"Returns the similarity scores\"\"\"\n", + " return scores\n", + "\n", + "dev_scores = sts_data['sim'].tolist()\n", + "scores = []\n", + "for batch in np.array_split(sts_data, 10):\n", + " scores.extend(run_sts_benchmark(batch))\n", + "\n", + "pearson_correlation = scipy.stats.pearsonr(scores, dev_scores)\n", + "print('Pearson correlation coefficient = {0}\\np-value = {1}'.format(\n", + " pearson_correlation[0], pearson_correlation[1]))" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "RUymE2l9GZfO" + ], + "name": "semantic_similarity_with_tf_hub_universal_encoder.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder_lite.ipynb b/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder_lite.ipynb new file mode 100644 index 00000000000..78d4eebadb0 --- /dev/null +++ b/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder_lite.ipynb @@ -0,0 +1,537 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "IJhWonqQN7u0" + }, + "source": [ + "##### Copyright 2018 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MegtYH2UN8tT" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MlHqSdgSEwPE" + }, + "source": [ + "# Universal Sentence Encoder-Lite demo\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j0HuiScHQ3OK" + }, + "source": [ + "This Colab illustrates how to use the Universal Sentence Encoder-Lite for sentence similarity task. This module is very similar to [Universal Sentence Encoder](https://www.tensorflow.org/hub/modules/google/universal-sentence-encoder/2) with the only difference that you need to run [SentencePiece](https://github.com/google/sentencepiece) processing on your input sentences.\n", + "\n", + "The Universal Sentence Encoder makes getting sentence level embeddings as easy as it has historically been to lookup the embeddings for individual words. The sentence embeddings can then be trivially used to compute sentence level meaning similarity as well as to enable better performance on downstream classification tasks using less supervised training data." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wqCB2pyK-WSU" + }, + "source": [ + "# Getting started" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rWeEjoO5M0Cx" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "f5_potQBMzcU" + }, + "outputs": [], + "source": [ + "# Install seaborn for pretty visualizations\n", + "!pip3 install --quiet seaborn\n", + "# Install SentencePiece package\n", + "# SentencePiece package is needed for Universal Sentence Encoder Lite. We'll\n", + "# use it for all the text processing and sentence feature ID lookup.\n", + "!pip3 install --quiet sentencepiece" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dMTa6V4a-cmf" + }, + "outputs": [], + "source": [ + "from absl import logging\n", + "\n", + "import tensorflow.compat.v1 as tf\n", + "tf.disable_v2_behavior()\n", + "\n", + "import tensorflow_hub as hub\n", + "import sentencepiece as spm\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import os\n", + "import pandas as pd\n", + "import re\n", + "import seaborn as sns" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WPXYQDBiFJHd" + }, + "source": [ + "## Load the module from TF-Hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HEWUT-lmAkxM" + }, + "outputs": [], + "source": [ + "module = hub.Module(\"https://tfhub.dev/google/universal-sentence-encoder-lite/2\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5277Z-9qARYF" + }, + "outputs": [], + "source": [ + "input_placeholder = tf.sparse_placeholder(tf.int64, shape=[None, None])\n", + "encodings = module(\n", + " inputs=dict(\n", + " values=input_placeholder.values,\n", + " indices=input_placeholder.indices,\n", + " dense_shape=input_placeholder.dense_shape))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Yydbhuba_nek" + }, + "source": [ + "## Load SentencePiece model from the TF-Hub Module\n", + "The SentencePiece model is conveniently stored inside the module's assets. It has to be loaded in order to initialize the processor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2CyUjKzE_tcJ" + }, + "outputs": [], + "source": [ + "with tf.Session() as sess:\n", + " spm_path = sess.run(module(signature=\"spm_path\"))\n", + "\n", + "sp = spm.SentencePieceProcessor()\n", + "with tf.io.gfile.GFile(spm_path, mode=\"rb\") as f:\n", + " sp.LoadFromSerializedProto(f.read())\n", + "print(\"SentencePiece model loaded at {}.\".format(spm_path))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6y5kkN-l-5QV" + }, + "outputs": [], + "source": [ + "def process_to_IDs_in_sparse_format(sp, sentences):\n", + " # An utility method that processes sentences with the sentence piece processor\n", + " # 'sp' and returns the results in tf.SparseTensor-similar format:\n", + " # (values, indices, dense_shape)\n", + " ids = [sp.EncodeAsIds(x) for x in sentences]\n", + " max_len = max(len(x) for x in ids)\n", + " dense_shape=(len(ids), max_len)\n", + " values=[item for sublist in ids for item in sublist]\n", + " indices=[[row,col] for row in range(len(ids)) for col in range(len(ids[row]))]\n", + " return (values, indices, dense_shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PVpHEWrPAdxR" + }, + "source": [ + "### Test the module with a few examples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pSkjuGYoCBfU" + }, + "outputs": [], + "source": [ + "# Compute a representation for each message, showing various lengths supported.\n", + "word = \"Elephant\"\n", + "sentence = \"I am a sentence for which I would like to get its embedding.\"\n", + "paragraph = (\n", + " \"Universal Sentence Encoder embeddings also support short paragraphs. \"\n", + " \"There is no hard limit on how long the paragraph is. Roughly, the longer \"\n", + " \"the more 'diluted' the embedding will be.\")\n", + "messages = [word, sentence, paragraph]\n", + "\n", + "values, indices, dense_shape = process_to_IDs_in_sparse_format(sp, messages)\n", + "\n", + "# Reduce logging output.\n", + "logging.set_verbosity(logging.ERROR)\n", + "\n", + "with tf.Session() as session:\n", + " session.run([tf.global_variables_initializer(), tf.tables_initializer()])\n", + " message_embeddings = session.run(\n", + " encodings,\n", + " feed_dict={input_placeholder.values: values,\n", + " input_placeholder.indices: indices,\n", + " input_placeholder.dense_shape: dense_shape})\n", + "\n", + " for i, message_embedding in enumerate(np.array(message_embeddings).tolist()):\n", + " print(\"Message: {}\".format(messages[i]))\n", + " print(\"Embedding size: {}\".format(len(message_embedding)))\n", + " message_embedding_snippet = \", \".join(\n", + " (str(x) for x in message_embedding[:3]))\n", + " print(\"Embedding: [{}, ...]\\n\".format(message_embedding_snippet))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "46jrIgHyFDz9" + }, + "source": [ + "# Semantic Textual Similarity (STS) task example\n", + "\n", + "The embeddings produced by the Universal Sentence Encoder are approximately normalized. The semantic similarity of two sentences can be trivially computed as the inner product of the encodings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OIQudHgWBGSk" + }, + "outputs": [], + "source": [ + "def plot_similarity(labels, features, rotation):\n", + " corr = np.inner(features, features)\n", + " sns.set(font_scale=1.2)\n", + " g = sns.heatmap(\n", + " corr,\n", + " xticklabels=labels,\n", + " yticklabels=labels,\n", + " vmin=0,\n", + " vmax=1,\n", + " cmap=\"YlOrRd\")\n", + " g.set_xticklabels(labels, rotation=rotation)\n", + " g.set_title(\"Semantic Textual Similarity\")\n", + "\n", + "\n", + "def run_and_plot(session, input_placeholder, messages):\n", + " values, indices, dense_shape = process_to_IDs_in_sparse_format(sp,messages)\n", + "\n", + " message_embeddings = session.run(\n", + " encodings,\n", + " feed_dict={input_placeholder.values: values,\n", + " input_placeholder.indices: indices,\n", + " input_placeholder.dense_shape: dense_shape})\n", + " \n", + " plot_similarity(messages, message_embeddings, 90)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wlDqttNcE0Bx" + }, + "source": [ + "## Similarity visualized\n", + "Here we show the similarity in a heat map. The final graph is a 9x9 matrix where each entry `[i, j]` is colored based on the inner product of the encodings for sentence `i` and `j`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_GSCW5QIBKVe" + }, + "outputs": [], + "source": [ + "messages = [\n", + " # Smartphones\n", + " \"I like my phone\",\n", + " \"My phone is not good.\",\n", + " \"Your cellphone looks great.\",\n", + "\n", + " # Weather\n", + " \"Will it snow tomorrow?\",\n", + " \"Recently a lot of hurricanes have hit the US\",\n", + " \"Global warming is real\",\n", + "\n", + " # Food and health\n", + " \"An apple a day, keeps the doctors away\",\n", + " \"Eating strawberries is healthy\",\n", + " \"Is paleo better than keto?\",\n", + "\n", + " # Asking about age\n", + " \"How old are you?\",\n", + " \"what is your age?\",\n", + "]\n", + "\n", + "\n", + "with tf.Session() as session:\n", + " session.run(tf.global_variables_initializer())\n", + " session.run(tf.tables_initializer())\n", + " run_and_plot(session, input_placeholder, messages)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QkZ4sRBYBnL8" + }, + "source": [ + "## Evaluation: STS (Semantic Textual Similarity) Benchmark\n", + "\n", + "The [**STS Benchmark**](https://ixa2.si.ehu.es/stswiki/index.php/STSbenchmark) provides an intristic evaluation of the degree to which similarity scores computed using sentence embeddings align with human judgements. The benchmark requires systems to return similarity scores for a diverse selection of sentence pairs. [Pearson correlation](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient) is then used to evaluate the quality of the machine similarity scores against human judgements." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kNMVfSelBsHW" + }, + "source": [ + "### Download data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8zAWVzBMBptq" + }, + "outputs": [], + "source": [ + "import pandas\n", + "import scipy\n", + "import math\n", + "\n", + "\n", + "def load_sts_dataset(filename):\n", + " # Loads a subset of the STS dataset into a DataFrame. In particular both\n", + " # sentences and their human rated similarity score.\n", + " sent_pairs = []\n", + " with tf.gfile.GFile(filename, \"r\") as f:\n", + " for line in f:\n", + " ts = line.strip().split(\"\\t\")\n", + " # (sent_1, sent_2, similarity_score)\n", + " sent_pairs.append((ts[5], ts[6], float(ts[4])))\n", + " return pandas.DataFrame(sent_pairs, columns=[\"sent_1\", \"sent_2\", \"sim\"])\n", + "\n", + "\n", + "def download_and_load_sts_data():\n", + " sts_dataset = tf.keras.utils.get_file(\n", + " fname=\"Stsbenchmark.tar.gz\",\n", + " origin=\"http://ixa2.si.ehu.es/stswiki/images/4/48/Stsbenchmark.tar.gz\",\n", + " extract=True)\n", + "\n", + " sts_dev = load_sts_dataset(\n", + " os.path.join(os.path.dirname(sts_dataset), \"stsbenchmark\", \"sts-dev.csv\"))\n", + " sts_test = load_sts_dataset(\n", + " os.path.join(\n", + " os.path.dirname(sts_dataset), \"stsbenchmark\", \"sts-test.csv\"))\n", + "\n", + " return sts_dev, sts_test\n", + "\n", + "\n", + "sts_dev, sts_test = download_and_load_sts_data()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l8lEawD6B4Fr" + }, + "source": [ + "### Build evaluation graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "etiZUkP-B6bR" + }, + "outputs": [], + "source": [ + "sts_input1 = tf.sparse_placeholder(tf.int64, shape=(None, None))\n", + "sts_input2 = tf.sparse_placeholder(tf.int64, shape=(None, None))\n", + "\n", + "# For evaluation we use exactly normalized rather than\n", + "# approximately normalized.\n", + "sts_encode1 = tf.nn.l2_normalize(\n", + " module(\n", + " inputs=dict(values=sts_input1.values,\n", + " indices=sts_input1.indices,\n", + " dense_shape=sts_input1.dense_shape)),\n", + " axis=1)\n", + "sts_encode2 = tf.nn.l2_normalize(\n", + " module(\n", + " inputs=dict(values=sts_input2.values,\n", + " indices=sts_input2.indices,\n", + " dense_shape=sts_input2.dense_shape)),\n", + " axis=1)\n", + "\n", + "sim_scores = -tf.acos(tf.reduce_sum(tf.multiply(sts_encode1, sts_encode2), axis=1))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e4Q34ssLB-rw" + }, + "source": [ + "### Evaluate sentence embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "-vRFEFPJPyeF" + }, + "outputs": [], + "source": [ + "#@title Choose dataset for benchmark\n", + "dataset = sts_dev #@param [\"sts_dev\", \"sts_test\"] {type:\"raw\"}\n", + "\n", + "values1, indices1, dense_shape1 = process_to_IDs_in_sparse_format(sp, dataset['sent_1'].tolist())\n", + "values2, indices2, dense_shape2 = process_to_IDs_in_sparse_format(sp, dataset['sent_2'].tolist())\n", + "similarity_scores = dataset['sim'].tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_QJ2DI85CBDh" + }, + "outputs": [], + "source": [ + "def run_sts_benchmark(session):\n", + " \"\"\"Returns the similarity scores\"\"\"\n", + " scores = session.run(\n", + " sim_scores,\n", + " feed_dict={\n", + " sts_input1.values: values1,\n", + " sts_input1.indices: indices1,\n", + " sts_input1.dense_shape: dense_shape1,\n", + " sts_input2.values: values2,\n", + " sts_input2.indices: indices2,\n", + " sts_input2.dense_shape: dense_shape2,\n", + " })\n", + " return scores\n", + "\n", + "\n", + "with tf.Session() as session:\n", + " session.run(tf.global_variables_initializer())\n", + " session.run(tf.tables_initializer())\n", + " scores = run_sts_benchmark(session)\n", + "\n", + "pearson_correlation = scipy.stats.pearsonr(scores, similarity_scores)\n", + "print('Pearson correlation coefficient = {0}\\np-value = {1}'.format(\n", + " pearson_correlation[0], pearson_correlation[1]))" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "IJhWonqQN7u0" + ], + "name": "semantic_similarity_with_tf_hub_universal_encoder_lite.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/senteval_for_universal_sentence_encoder_cmlm.ipynb b/site/en/hub/tutorials/senteval_for_universal_sentence_encoder_cmlm.ipynb new file mode 100644 index 00000000000..c33dce64c92 --- /dev/null +++ b/site/en/hub/tutorials/senteval_for_universal_sentence_encoder_cmlm.ipynb @@ -0,0 +1,248 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "CGyzr0tfeUTQ" + }, + "source": [ + "**Copyright 2021 The TensorFlow Hub Authors.**\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zV1OQAGReaGQ" + }, + "outputs": [], + "source": [ + "# Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L5bsDhkRfTpq" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "owWqOcw1e-RZ" + }, + "source": [ + "# Universal Sentence Encoder SentEval demo\n", + "This colab demostrates the [Universal Sentence Encoder CMLM model](https://tfhub.dev/google/universal-sentence-encoder-cmlm/en-base/1) using the [SentEval](https://github.com/facebookresearch/SentEval) toolkit, which is a library for measuring the quality of sentence embeddings. The SentEval toolkit includes a diverse set of downstream tasks that are able to evaluate the generalization power of an embedding model and to evaluate the linguistic properties encoded.\n", + "\n", + "Run the first two code blocks to setup the environment, in the third code block you can pick a SentEval task to evaluate the model. A GPU runtime is recommended to run this Colab.\n", + "\n", + "To learn more about the Universal Sentence Encoder CMLM model, see https://openreview.net/forum?id=WDVD4lUCTzU." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-CerULCLsjzV" + }, + "outputs": [], + "source": [ + "#@title Install dependencies\n", + "!pip install --quiet \"tensorflow-text==2.11.*\"\n", + "!pip install --quiet torch==1.8.1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LjqkqD6aiZGU" + }, + "source": [ + "## Download SentEval and task data\n", + "This step download SentEval from github and execute the data script to download the task data. It may take up to 5 minutes to complete." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3UwhHQiKJmSc" + }, + "outputs": [], + "source": [ + "#@title Install SentEval and download task data\n", + "!rm -rf ./SentEval\n", + "!git clone https://github.com/facebookresearch/SentEval.git\n", + "!cd $PWD/SentEval/data/downstream && bash get_transfer_data.bash > /dev/null 2>&1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7a2ohPn8vMe2" + }, + "source": [ + "#Execute a SentEval evaluation task\n", + "The following code block executes a SentEval task and output the results, choose one of the following tasks to evaluate the USE CMLM model:\n", + "\n", + "```\n", + "MR\tCR\tSUBJ\tMPQA\tSST\tTREC\tMRPC\tSICK-E\n", + "```\n", + "\n", + "Select a model, params and task to run. The rapid prototyping params can be used for reducing computation time for faster result.\n", + "\n", + "It typically takes 5-15 mins to complete a task with the **'rapid prototyping'** params and up to an hour with the **'slower, best performance'** params.\n", + "\n", + "```\n", + "params = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 5}\n", + "params['classifier'] = {'nhid': 0, 'optim': 'rmsprop', 'batch_size': 128,\n", + " 'tenacity': 3, 'epoch_size': 2}\n", + "```\n", + "\n", + "For better result, use the slower **'slower, best performance'** params, computation may take up to 1 hour:\n", + "\n", + "```\n", + "params = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 10}\n", + "params['classifier'] = {'nhid': 0, 'optim': 'adam', 'batch_size': 16,\n", + " 'tenacity': 5, 'epoch_size': 6}\n", + "```\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nenCcawjwowt" + }, + "outputs": [], + "source": [ + "import os\n", + "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n", + "\n", + "import sys\n", + "sys.path.append(f'{os.getcwd()}/SentEval')\n", + "\n", + "import tensorflow as tf\n", + "\n", + "# Prevent TF from claiming all GPU memory so there is some left for pytorch.\n", + "gpus = tf.config.list_physical_devices('GPU')\n", + "if gpus:\n", + " # Memory growth needs to be the same across GPUs.\n", + " for gpu in gpus:\n", + " tf.config.experimental.set_memory_growth(gpu, True)\n", + "\n", + "import tensorflow_hub as hub\n", + "import tensorflow_text\n", + "import senteval\n", + "import time\n", + "\n", + "PATH_TO_DATA = f'{os.getcwd()}/SentEval/data'\n", + "MODEL = 'https://tfhub.dev/google/universal-sentence-encoder-cmlm/en-base/1' #@param ['https://tfhub.dev/google/universal-sentence-encoder-cmlm/en-base/1', 'https://tfhub.dev/google/universal-sentence-encoder-cmlm/en-large/1']\n", + "PARAMS = 'rapid prototyping' #@param ['slower, best performance', 'rapid prototyping']\n", + "TASK = 'CR' #@param ['CR','MR', 'MPQA', 'MRPC', 'SICKEntailment', 'SNLI', 'SST2', 'SUBJ', 'TREC']\n", + "\n", + "params_prototyping = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 5}\n", + "params_prototyping['classifier'] = {'nhid': 0, 'optim': 'rmsprop', 'batch_size': 128,\n", + " 'tenacity': 3, 'epoch_size': 2}\n", + "\n", + "params_best = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 10}\n", + "params_best['classifier'] = {'nhid': 0, 'optim': 'adam', 'batch_size': 16,\n", + " 'tenacity': 5, 'epoch_size': 6}\n", + "\n", + "params = params_best if PARAMS == 'slower, best performance' else params_prototyping\n", + "\n", + "preprocessor = hub.KerasLayer(\n", + " \"https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3\")\n", + "encoder = hub.KerasLayer(\n", + " \"https://tfhub.dev/google/universal-sentence-encoder-cmlm/en-base/1\")\n", + "\n", + "inputs = tf.keras.Input(shape=tf.shape(''), dtype=tf.string)\n", + "outputs = encoder(preprocessor(inputs))\n", + "\n", + "model = tf.keras.Model(inputs=inputs, outputs=outputs)\n", + "\n", + "def prepare(params, samples):\n", + " return\n", + "\n", + "def batcher(_, batch):\n", + " batch = [' '.join(sent) if sent else '.' for sent in batch]\n", + " return model.predict(tf.constant(batch))[\"default\"]\n", + "\n", + "\n", + "se = senteval.engine.SE(params, batcher, prepare)\n", + "print(\"Evaluating task %s with %s parameters\" % (TASK, PARAMS))\n", + "start = time.time()\n", + "results = se.eval(TASK)\n", + "end = time.time()\n", + "print('Time took on task %s : %.1f. seconds' % (TASK, end - start))\n", + "print(results)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SNvsY6Hsvs0_" + }, + "source": [ + "#Learn More\n", + "\n", + "* Find more text embedding models on [TensorFlow Hub](https://tfhub.dev)\n", + "* See also the [Multilingual Universal Sentence Encoder CMLM model](https://tfhub.dev/google/universal-sentence-encoder-cmlm/multilingual-base-br/1)\n", + "* Check out other [Universal Sentence Encoder models](https://tfhub.dev/google/collections/universal-sentence-encoder/1)\n", + "\n", + "## Reference\n", + "\n", + "* Ziyi Yang, Yinfei Yang, Daniel Cer, Jax Law, Eric Darve. [Universal Sentence Representations Learning with Conditional Masked Language Model. November 2020](https://openreview.net/forum?id=WDVD4lUCTzU)\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "senteval_for_universal_sentence_encoder_cmlm.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/spice.ipynb b/site/en/hub/tutorials/spice.ipynb new file mode 100644 index 00000000000..9ff6cd3bd62 --- /dev/null +++ b/site/en/hub/tutorials/spice.ipynb @@ -0,0 +1,937 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "aXehiGc3Kr2I" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "-6LKjmi8Ktoh" + }, + "outputs": [], + "source": [ + "#@title Copyright 2020 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sPQKw4x4bL8w" + }, + "source": [ + "# Pitch Detection with SPICE\n", + "\n", + "This colab will show you how to use the SPICE model downloaded from TensorFlow Hub." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rfKwZlPnPwD1" + }, + "outputs": [], + "source": [ + "!sudo apt-get install -q -y timidity libsndfile1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dYrIdOS8SW3b" + }, + "outputs": [], + "source": [ + "# All the imports to deal with sound data\n", + "!pip install pydub librosa music21" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "p09o78LGYdnz" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import librosa\n", + "from librosa import display as librosadisplay\n", + "\n", + "import logging\n", + "import math\n", + "import statistics\n", + "import sys\n", + "\n", + "from IPython.display import Audio, Javascript\n", + "from scipy.io import wavfile\n", + "\n", + "from base64 import b64decode\n", + "\n", + "import music21\n", + "from pydub import AudioSegment\n", + "\n", + "logger = logging.getLogger()\n", + "logger.setLevel(logging.ERROR)\n", + "\n", + "print(\"tensorflow: %s\" % tf.__version__)\n", + "#print(\"librosa: %s\" % librosa.__version__)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wHxox8hXc3w1" + }, + "source": [ + "# The audio input file\n", + "Now the hardest part: Record your singing! :)\n", + "\n", + "We provide four methods to obtain an audio file:\n", + "\n", + "1. Record audio directly in colab\n", + "2. Upload from your computer\n", + "3. Use a file saved on Google Drive\n", + "4. Download the file from the web\n", + "\n", + "Choose one of the four methods below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "HaCAHOqiVu5B" + }, + "outputs": [], + "source": [ + "#@title [Run this] Definition of the JS code to record audio straight from the browser\n", + "\n", + "RECORD = \"\"\"\n", + "const sleep = time => new Promise(resolve => setTimeout(resolve, time))\n", + "const b2text = blob => new Promise(resolve => {\n", + " const reader = new FileReader()\n", + " reader.onloadend = e => resolve(e.srcElement.result)\n", + " reader.readAsDataURL(blob)\n", + "})\n", + "var record = time => new Promise(async resolve => {\n", + " stream = await navigator.mediaDevices.getUserMedia({ audio: true })\n", + " recorder = new MediaRecorder(stream)\n", + " chunks = []\n", + " recorder.ondataavailable = e => chunks.push(e.data)\n", + " recorder.start()\n", + " await sleep(time)\n", + " recorder.onstop = async ()=>{\n", + " blob = new Blob(chunks)\n", + " text = await b2text(blob)\n", + " resolve(text)\n", + " }\n", + " recorder.stop()\n", + "})\n", + "\"\"\"\n", + "\n", + "def record(sec=5):\n", + " try:\n", + " from google.colab import output\n", + " except ImportError:\n", + " print('No possible to import output from google.colab')\n", + " return ''\n", + " else:\n", + " print('Recording')\n", + " display(Javascript(RECORD))\n", + " s = output.eval_js('record(%d)' % (sec*1000))\n", + " fname = 'recorded_audio.wav'\n", + " print('Saving to', fname)\n", + " b = b64decode(s.split(',')[1])\n", + " with open(fname, 'wb') as f:\n", + " f.write(b)\n", + " return fname" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "sBpWWkTzfUYR" + }, + "outputs": [], + "source": [ + "#@title Select how to input your audio { run: \"auto\" }\n", + "INPUT_SOURCE = 'https://storage.googleapis.com/download.tensorflow.org/data/c-scale-metronome.wav' #@param [\"https://storage.googleapis.com/download.tensorflow.org/data/c-scale-metronome.wav\", \"RECORD\", \"UPLOAD\", \"./drive/My Drive/YOUR_MUSIC_FILE.wav\"] {allow-input: true}\n", + "\n", + "print('You selected', INPUT_SOURCE)\n", + "\n", + "if INPUT_SOURCE == 'RECORD':\n", + " uploaded_file_name = record(5)\n", + "elif INPUT_SOURCE == 'UPLOAD':\n", + " try:\n", + " from google.colab import files\n", + " except ImportError:\n", + " print(\"ImportError: files from google.colab seems to not be available\")\n", + " else:\n", + " uploaded = files.upload()\n", + " for fn in uploaded.keys():\n", + " print('User uploaded file \"{name}\" with length {length} bytes'.format(\n", + " name=fn, length=len(uploaded[fn])))\n", + " uploaded_file_name = next(iter(uploaded))\n", + " print('Uploaded file: ' + uploaded_file_name)\n", + "elif INPUT_SOURCE.startswith('./drive/'):\n", + " try:\n", + " from google.colab import drive\n", + " except ImportError:\n", + " print(\"ImportError: files from google.colab seems to not be available\")\n", + " else:\n", + " drive.mount('/content/drive')\n", + " # don't forget to change the name of the file you\n", + " # will you here!\n", + " gdrive_audio_file = 'YOUR_MUSIC_FILE.wav'\n", + " uploaded_file_name = INPUT_SOURCE\n", + "elif INPUT_SOURCE.startswith('http'):\n", + " !wget --no-check-certificate 'https://storage.googleapis.com/download.tensorflow.org/data/c-scale-metronome.wav' -O c-scale.wav\n", + " uploaded_file_name = 'c-scale.wav'\n", + "else:\n", + " print('Unrecognized input format!')\n", + " print('Please select \"RECORD\", \"UPLOAD\", or specify a file hosted on Google Drive or a file from the web to download file to download')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4S2BvIoDf9nf" + }, + "source": [ + "# Preparing the audio data\n", + "\n", + "Now we have the audio, let's convert it to the expected format and then listen to it!\n", + "\n", + "The SPICE model needs as input an audio file at a sampling rate of 16kHz and with only one channel (mono). \n", + "\n", + "To help you with this part, we created a function (`convert_audio_for_model`) to convert any wav file you have to the model's expected format:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bQ1362i-JoFI" + }, + "outputs": [], + "source": [ + "# Function that converts the user-created audio to the format that the model \n", + "# expects: bitrate 16kHz and only one channel (mono).\n", + "\n", + "EXPECTED_SAMPLE_RATE = 16000\n", + "\n", + "def convert_audio_for_model(user_file, output_file='converted_audio_file.wav'):\n", + " audio = AudioSegment.from_file(user_file)\n", + " audio = audio.set_frame_rate(EXPECTED_SAMPLE_RATE).set_channels(1)\n", + " audio.export(output_file, format=\"wav\")\n", + " return output_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oL9pftZ2nPm9" + }, + "outputs": [], + "source": [ + "# Converting to the expected format for the model\n", + "# in all the input 4 input method before, the uploaded file name is at\n", + "# the variable uploaded_file_name\n", + "converted_audio_file = convert_audio_for_model(uploaded_file_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TslkX2AOZN0p" + }, + "outputs": [], + "source": [ + "# Loading audio samples from the wav file:\n", + "sample_rate, audio_samples = wavfile.read(converted_audio_file, 'rb')\n", + "\n", + "# Show some basic information about the audio.\n", + "duration = len(audio_samples)/sample_rate\n", + "print(f'Sample rate: {sample_rate} Hz')\n", + "print(f'Total duration: {duration:.2f}s')\n", + "print(f'Size of the input: {len(audio_samples)}')\n", + "\n", + "# Let's listen to the wav file.\n", + "Audio(audio_samples, rate=sample_rate)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iBicZu5AgcpR" + }, + "source": [ + "First thing, let's take a look at the waveform of our singing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aAa2M3CLZcWW" + }, + "outputs": [], + "source": [ + "# We can visualize the audio as a waveform.\n", + "_ = plt.plot(audio_samples)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J1eI0b8qgn08" + }, + "source": [ + "A more informative visualization is the [spectrogram](https://en.wikipedia.org/wiki/Spectrogram), which shows frequencies present over time.\n", + "\n", + "Here, we use a logarithmic frequency scale, to make the singing more clearly visible.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fGR4UZtpZvWI" + }, + "outputs": [], + "source": [ + "MAX_ABS_INT16 = 32768.0\n", + "\n", + "def plot_stft(x, sample_rate, show_black_and_white=False):\n", + " x_stft = np.abs(librosa.stft(x, n_fft=2048))\n", + " fig, ax = plt.subplots()\n", + " fig.set_size_inches(20, 10)\n", + " x_stft_db = librosa.amplitude_to_db(x_stft, ref=np.max)\n", + " if(show_black_and_white):\n", + " librosadisplay.specshow(data=x_stft_db, y_axis='log', \n", + " sr=sample_rate, cmap='gray_r')\n", + " else:\n", + " librosadisplay.specshow(data=x_stft_db, y_axis='log', sr=sample_rate)\n", + "\n", + " plt.colorbar(format='%+2.0f dB')\n", + "\n", + "plot_stft(audio_samples / MAX_ABS_INT16 , sample_rate=EXPECTED_SAMPLE_RATE)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MGCzo_cjjH-7" + }, + "source": [ + "We need one last conversion here. The audio samples are in int16 format. They need to be normalized to floats between -1 and 1." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dv4H4O1Xb8T8" + }, + "outputs": [], + "source": [ + "audio_samples = audio_samples / float(MAX_ABS_INT16)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yTdo_TwljVUV" + }, + "source": [ + "# Executing the Model\n", + "Now is the easy part, let's load the model with **TensorFlow Hub**, and feed the audio to it.\n", + "SPICE will give us two outputs: pitch and uncertainty\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xUptYSTAbc3I" + }, + "source": [ + "**TensorFlow Hub** is a library for the publication, discovery, and consumption of reusable parts of machine learning models. It makes easy to use machine learning to solve your challenges.\n", + "\n", + "To load the model you just need the Hub module and the URL pointing to the model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ri0A0DSXY_Yd" + }, + "outputs": [], + "source": [ + "# Loading the SPICE model is easy:\n", + "model = hub.load(\"https://tfhub.dev/google/spice/2\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kQV5H6J4suMT" + }, + "source": [ + "**Note:** An interesting detail here is that all the model urls from Hub can be used for download and also to read the documentation, so if you point your browser to that link you can read documentation on how to use the model and learn more about how it was trained." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GUVICjIps9hI" + }, + "source": [ + "With the model loaded, data prepared, we need 3 lines to get the result: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tP55fXBYcBhb" + }, + "outputs": [], + "source": [ + "# We now feed the audio to the SPICE tf.hub model to obtain pitch and uncertainty outputs as tensors.\n", + "model_output = model.signatures[\"serving_default\"](tf.constant(audio_samples, tf.float32))\n", + "\n", + "pitch_outputs = model_output[\"pitch\"]\n", + "uncertainty_outputs = model_output[\"uncertainty\"]\n", + "\n", + "# 'Uncertainty' basically means the inverse of confidence.\n", + "confidence_outputs = 1.0 - uncertainty_outputs\n", + "\n", + "fig, ax = plt.subplots()\n", + "fig.set_size_inches(20, 10)\n", + "plt.plot(pitch_outputs, label='pitch')\n", + "plt.plot(confidence_outputs, label='confidence')\n", + "plt.legend(loc=\"lower right\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "blJwFWR4kMul" + }, + "source": [ + "Let's make the results easier to understand by removing all pitch estimates with low confidence (confidence < 0.9) and plot the remaining ones.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "d1MRmcm2cEkM" + }, + "outputs": [], + "source": [ + "confidence_outputs = list(confidence_outputs)\n", + "pitch_outputs = [ float(x) for x in pitch_outputs]\n", + "\n", + "indices = range(len (pitch_outputs))\n", + "confident_pitch_outputs = [ (i,p) \n", + " for i, p, c in zip(indices, pitch_outputs, confidence_outputs) if c >= 0.9 ]\n", + "confident_pitch_outputs_x, confident_pitch_outputs_y = zip(*confident_pitch_outputs)\n", + " \n", + "fig, ax = plt.subplots()\n", + "fig.set_size_inches(20, 10)\n", + "ax.set_ylim([0, 1])\n", + "plt.scatter(confident_pitch_outputs_x, confident_pitch_outputs_y, )\n", + "plt.scatter(confident_pitch_outputs_x, confident_pitch_outputs_y, c=\"r\")\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vNBZ7ZblkxOm" + }, + "source": [ + "The pitch values returned by SPICE are in the range from 0 to 1. Let's convert them to absolute pitch values in Hz." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "n-CnpKzmcQi9" + }, + "outputs": [], + "source": [ + "def output2hz(pitch_output):\n", + " # Constants taken from https://tfhub.dev/google/spice/2\n", + " PT_OFFSET = 25.58\n", + " PT_SLOPE = 63.07\n", + " FMIN = 10.0;\n", + " BINS_PER_OCTAVE = 12.0;\n", + " cqt_bin = pitch_output * PT_SLOPE + PT_OFFSET;\n", + " return FMIN * 2.0 ** (1.0 * cqt_bin / BINS_PER_OCTAVE)\n", + " \n", + "confident_pitch_values_hz = [ output2hz(p) for p in confident_pitch_outputs_y ]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "24yK0a6HjCSZ" + }, + "source": [ + "Now, let's see how good the prediction is: We will overlay the predicted pitches over the original spectrogram. To make the pitch predictions more visible, we changed the spectrogram to black and white." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "L1kaAcX9rrDo" + }, + "outputs": [], + "source": [ + "plot_stft(audio_samples / MAX_ABS_INT16 , \n", + " sample_rate=EXPECTED_SAMPLE_RATE, show_black_and_white=True)\n", + "# Note: conveniently, since the plot is in log scale, the pitch outputs \n", + "# also get converted to the log scale automatically by matplotlib.\n", + "plt.scatter(confident_pitch_outputs_x, confident_pitch_values_hz, c=\"r\")\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NskqpiHLxq6V" + }, + "source": [ + "# Converting to musical notes\n", + "\n", + "Now that we have the pitch values, let's convert them to notes!\n", + "This is part is challenging by itself. We have to take into account two things:\n", + "1. the rests (when there's no singing) \n", + "2. the size of each note (offsets) " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KDOlm9PLTTjt" + }, + "source": [ + "### 1: Adding zeros to the output to indicate when there's no singing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9uSQ3bJmTZmo" + }, + "outputs": [], + "source": [ + "pitch_outputs_and_rests = [\n", + " output2hz(p) if c >= 0.9 else 0\n", + " for i, p, c in zip(indices, pitch_outputs, confidence_outputs)\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9fM0UwlsTt4w" + }, + "source": [ + "### 2: Adding note offsets\n", + "\n", + "When a person sings freely, the melody may have an offset to the absolute pitch values that notes can represent.\n", + "Hence, to convert predictions to notes, one needs to correct for this possible offset.\n", + "This is what the following code computes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fsJu-P5ksdFW" + }, + "outputs": [], + "source": [ + "A4 = 440\n", + "C0 = A4 * pow(2, -4.75)\n", + "note_names = [\"C\", \"C#\", \"D\", \"D#\", \"E\", \"F\", \"F#\", \"G\", \"G#\", \"A\", \"A#\", \"B\"]\n", + "\n", + "def hz2offset(freq):\n", + " # This measures the quantization error for a single note.\n", + " if freq == 0: # Rests always have zero error.\n", + " return None\n", + " # Quantized note.\n", + " h = round(12 * math.log2(freq / C0))\n", + " return 12 * math.log2(freq / C0) - h\n", + "\n", + "\n", + "# The ideal offset is the mean quantization error for all the notes\n", + "# (excluding rests):\n", + "offsets = [hz2offset(p) for p in pitch_outputs_and_rests if p != 0]\n", + "print(\"offsets: \", offsets)\n", + "\n", + "ideal_offset = statistics.mean(offsets)\n", + "print(\"ideal offset: \", ideal_offset)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "K17It_qT2DtE" + }, + "source": [ + "We can now use some heuristics to try and estimate the most likely sequence of notes that were sung.\n", + "The ideal offset computed above is one ingredient - but we also need to know the speed (how many predictions make, say, an eighth?), and the time offset to start quantizing. To keep it simple, we'll just try different speeds and time offsets and measure the quantization error, using in the end the values that minimize this error." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eMULTI4L52ZHA" + }, + "outputs": [], + "source": [ + "def quantize_predictions(group, ideal_offset):\n", + " # Group values are either 0, or a pitch in Hz.\n", + " non_zero_values = [v for v in group if v != 0]\n", + " zero_values_count = len(group) - len(non_zero_values)\n", + "\n", + " # Create a rest if 80% is silent, otherwise create a note.\n", + " if zero_values_count > 0.8 * len(group):\n", + " # Interpret as a rest. Count each dropped note as an error, weighted a bit\n", + " # worse than a badly sung note (which would 'cost' 0.5).\n", + " return 0.51 * len(non_zero_values), \"Rest\"\n", + " else:\n", + " # Interpret as note, estimating as mean of non-rest predictions.\n", + " h = round(\n", + " statistics.mean([\n", + " 12 * math.log2(freq / C0) - ideal_offset for freq in non_zero_values\n", + " ]))\n", + " octave = h // 12\n", + " n = h % 12\n", + " note = note_names[n] + str(octave)\n", + " # Quantization error is the total difference from the quantized note.\n", + " error = sum([\n", + " abs(12 * math.log2(freq / C0) - ideal_offset - h)\n", + " for freq in non_zero_values\n", + " ])\n", + " return error, note\n", + "\n", + "\n", + "def get_quantization_and_error(pitch_outputs_and_rests, predictions_per_eighth,\n", + " prediction_start_offset, ideal_offset):\n", + " # Apply the start offset - we can just add the offset as rests.\n", + " pitch_outputs_and_rests = [0] * prediction_start_offset + \\\n", + " pitch_outputs_and_rests\n", + " # Collect the predictions for each note (or rest).\n", + " groups = [\n", + " pitch_outputs_and_rests[i:i + predictions_per_eighth]\n", + " for i in range(0, len(pitch_outputs_and_rests), predictions_per_eighth)\n", + " ]\n", + "\n", + " quantization_error = 0\n", + "\n", + " notes_and_rests = []\n", + " for group in groups:\n", + " error, note_or_rest = quantize_predictions(group, ideal_offset)\n", + " quantization_error += error\n", + " notes_and_rests.append(note_or_rest)\n", + "\n", + " return quantization_error, notes_and_rests\n", + "\n", + "\n", + "best_error = float(\"inf\")\n", + "best_notes_and_rests = None\n", + "best_predictions_per_note = None\n", + "\n", + "for predictions_per_note in range(20, 65, 1):\n", + " for prediction_start_offset in range(predictions_per_note):\n", + "\n", + " error, notes_and_rests = get_quantization_and_error(\n", + " pitch_outputs_and_rests, predictions_per_note,\n", + " prediction_start_offset, ideal_offset)\n", + "\n", + " if error < best_error: \n", + " best_error = error\n", + " best_notes_and_rests = notes_and_rests\n", + " best_predictions_per_note = predictions_per_note\n", + "\n", + "# At this point, best_notes_and_rests contains the best quantization.\n", + "# Since we don't need to have rests at the beginning, let's remove these:\n", + "while best_notes_and_rests[0] == 'Rest':\n", + " best_notes_and_rests = best_notes_and_rests[1:]\n", + "# Also remove silence at the end.\n", + "while best_notes_and_rests[-1] == 'Rest':\n", + " best_notes_and_rests = best_notes_and_rests[:-1]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vMZbWA3aVqee" + }, + "source": [ + "Now let's write the quantized notes as sheet music score!\n", + "\n", + "To do it we will use two libraries: [music21](http://web.mit.edu/music21/) and [Open Sheet Music Display](https://github.com/opensheetmusicdisplay/opensheetmusicdisplay)\n", + "\n", + "**Note:** for simplicity, we assume here that all notes have the same duration (a half note)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yVrk_IOIzpQR" + }, + "outputs": [], + "source": [ + "# Creating the sheet music score.\n", + "sc = music21.stream.Score()\n", + "# Adjust the speed to match the actual singing.\n", + "bpm = 60 * 60 / best_predictions_per_note\n", + "print ('bpm: ', bpm)\n", + "a = music21.tempo.MetronomeMark(number=bpm)\n", + "sc.insert(0,a)\n", + "\n", + "for snote in best_notes_and_rests: \n", + " d = 'half'\n", + " if snote == 'Rest': \n", + " sc.append(music21.note.Rest(type=d))\n", + " else:\n", + " sc.append(music21.note.Note(snote, type=d))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "CEleCWHtG2s4" + }, + "outputs": [], + "source": [ + "#@title [Run this] Helper function to use Open Sheet Music Display (JS code) to show a music score\n", + "\n", + "from IPython.core.display import display, HTML, Javascript\n", + "import json, random\n", + "\n", + "def showScore(score):\n", + " xml = open(score.write('musicxml')).read()\n", + " showMusicXML(xml)\n", + " \n", + "def showMusicXML(xml):\n", + " DIV_ID = \"OSMD_div\"\n", + " display(HTML('
    loading OpenSheetMusicDisplay
    '))\n", + " script = \"\"\"\n", + " var div_id = %%DIV_ID%%;\n", + " function loadOSMD() { \n", + " return new Promise(function(resolve, reject){\n", + " if (window.opensheetmusicdisplay) {\n", + " return resolve(window.opensheetmusicdisplay)\n", + " }\n", + " // OSMD script has a 'define' call which conflicts with requirejs\n", + " var _define = window.define // save the define object \n", + " window.define = undefined // now the loaded script will ignore requirejs\n", + " var s = document.createElement( 'script' );\n", + " s.setAttribute( 'src', \"https://cdn.jsdelivr.net/npm/opensheetmusicdisplay@0.7.6/build/opensheetmusicdisplay.min.js\" );\n", + " //s.setAttribute( 'src', \"/custom/opensheetmusicdisplay.js\" );\n", + " s.onload=function(){\n", + " window.define = _define\n", + " resolve(opensheetmusicdisplay);\n", + " };\n", + " document.body.appendChild( s ); // browser will try to load the new script tag\n", + " }) \n", + " }\n", + " loadOSMD().then((OSMD)=>{\n", + " window.openSheetMusicDisplay = new OSMD.OpenSheetMusicDisplay(div_id, {\n", + " drawingParameters: \"compacttight\"\n", + " });\n", + " openSheetMusicDisplay\n", + " .load(%%data%%)\n", + " .then(\n", + " function() {\n", + " openSheetMusicDisplay.render();\n", + " }\n", + " );\n", + " })\n", + " \"\"\".replace('%%DIV_ID%%',DIV_ID).replace('%%data%%',json.dumps(xml))\n", + " display(Javascript(script))\n", + " return" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WTu4phq4WeAI" + }, + "outputs": [], + "source": [ + "# rendering the music score\n", + "showScore(sc)\n", + "print(best_notes_and_rests)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fGPXm6Z83U2g" + }, + "source": [ + "Let's convert the music notes to a MIDI file and listen to it.\n", + "\n", + "To create this file, we can use the stream we created before." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "klYoWjgmPaod" + }, + "outputs": [], + "source": [ + "# Saving the recognized musical notes as a MIDI file\n", + "converted_audio_file_as_midi = converted_audio_file[:-4] + '.mid'\n", + "fp = sc.write('midi', fp=converted_audio_file_as_midi)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tz7Mj3Qx1lpR" + }, + "outputs": [], + "source": [ + "wav_from_created_midi = converted_audio_file_as_midi.replace(' ', '_') + \"_midioutput.wav\"\n", + "print(wav_from_created_midi)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ahss5EOiWDDp" + }, + "source": [ + "To listen to it on colab, we need to convert it back to wav. An easy way of doing that is using Timidity." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XmeJ-UITV2nq" + }, + "outputs": [], + "source": [ + "!timidity $converted_audio_file_as_midi -Ow -o $wav_from_created_midi" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bnvwmyNj7kCC" + }, + "source": [ + "And finally, listen the audio, created from notes, created via MIDI from the predicted pitches, inferred by the model!\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qNLBB0zJV6vN" + }, + "outputs": [], + "source": [ + "Audio(wav_from_created_midi)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "spice.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/text_classification_with_tf_hub_on_kaggle.ipynb b/site/en/hub/tutorials/text_classification_with_tf_hub_on_kaggle.ipynb new file mode 100644 index 00000000000..e2985bda51e --- /dev/null +++ b/site/en/hub/tutorials/text_classification_with_tf_hub_on_kaggle.ipynb @@ -0,0 +1,477 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "N6ZDpd9XzFeN" + }, + "source": [ + "##### Copyright 2018 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "KUu4vOt5zI9d" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ok9PfyoQ2rH_" + }, + "source": [ + "# How to solve a problem on Kaggle with TF-Hub\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "556YQZLUO4Ih" + }, + "source": [ + "TF-Hub is a platform to share machine learning expertise packaged in reusable resources, notably pre-trained **modules**. In this tutorial, we will use a TF-Hub text embedding module to train a simple sentiment classifier with a reasonable baseline accuracy. We will then submit the predictions to Kaggle.\n", + "\n", + "For more detailed tutorial on text classification with TF-Hub and further steps for improving the accuracy, take a look at [Text classification with TF-Hub](https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/text_classification_with_tf_hub.ipynb)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q4DN769E2O_R" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9KyLct9rq0lo" + }, + "outputs": [], + "source": [ + "!pip install -q kaggle" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "v7hy0bhngTUp" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import zipfile\n", + "\n", + "from sklearn import model_selection" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JvgBdeMsuu_3" + }, + "source": [ + "Since this tutorial will be using a dataset from Kaggle, it requires [creating an API Token](https://github.com/Kaggle/kaggle-api) for your Kaggle account, and uploading it to the Colab environment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nI7C-Zc4urOH" + }, + "outputs": [], + "source": [ + "import os\n", + "import pathlib\n", + "\n", + "# Upload the API token.\n", + "def get_kaggle():\n", + " try:\n", + " import kaggle\n", + " return kaggle\n", + " except OSError:\n", + " pass\n", + "\n", + " token_file = pathlib.Path(\"~/.kaggle/kaggle.json\").expanduser()\n", + " token_file.parent.mkdir(exist_ok=True, parents=True)\n", + "\n", + " try:\n", + " from google.colab import files\n", + " except ImportError:\n", + " raise ValueError(\"Could not find kaggle token.\")\n", + "\n", + " uploaded = files.upload()\n", + " token_content = uploaded.get('kaggle.json', None)\n", + " if token_content:\n", + " token_file.write_bytes(token_content)\n", + " token_file.chmod(0o600)\n", + " else:\n", + " raise ValueError('Need a file named \"kaggle.json\"')\n", + " \n", + " import kaggle\n", + " return kaggle\n", + "\n", + "\n", + "kaggle = get_kaggle()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6OPyVxHuiTEE" + }, + "source": [ + "# Getting started\n", + "\n", + "## Data\n", + "We will try to solve the [Sentiment Analysis on Movie Reviews](https://www.kaggle.com/c/sentiment-analysis-on-movie-reviews/data) task from Kaggle. The dataset consists of syntactic subphrases of the Rotten Tomatoes movie reviews. The task is to label the phrases as **negative** or **positive** on the scale from 1 to 5.\n", + "\n", + "You must [accept the competition rules](https://www.kaggle.com/c/sentiment-analysis-on-movie-reviews/data) before you can use the API to download the data.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "rKzc-fOGV72G" + }, + "outputs": [], + "source": [ + "SENTIMENT_LABELS = [\n", + " \"negative\", \"somewhat negative\", \"neutral\", \"somewhat positive\", \"positive\"\n", + "]\n", + "\n", + "# Add a column with readable values representing the sentiment.\n", + "def add_readable_labels_column(df, sentiment_value_column):\n", + " df[\"SentimentLabel\"] = df[sentiment_value_column].replace(\n", + " range(5), SENTIMENT_LABELS)\n", + " \n", + "# Download data from Kaggle and create a DataFrame.\n", + "def load_data_from_zip(path):\n", + " with zipfile.ZipFile(path, \"r\") as zip_ref:\n", + " name = zip_ref.namelist()[0]\n", + " with zip_ref.open(name) as zf:\n", + " return pd.read_csv(zf, sep=\"\\t\", index_col=0)\n", + "\n", + "\n", + "# The data does not come with a validation set so we'll create one from the\n", + "# training set.\n", + "def get_data(competition, train_file, test_file, validation_set_ratio=0.1):\n", + " data_path = pathlib.Path(\"data\")\n", + " kaggle.api.competition_download_files(competition, data_path)\n", + " competition_path = (data_path/competition)\n", + " competition_path.mkdir(exist_ok=True, parents=True)\n", + " competition_zip_path = competition_path.with_suffix(\".zip\")\n", + "\n", + " with zipfile.ZipFile(competition_zip_path, \"r\") as zip_ref:\n", + " zip_ref.extractall(competition_path)\n", + " \n", + " train_df = load_data_from_zip(competition_path/train_file)\n", + " test_df = load_data_from_zip(competition_path/test_file)\n", + "\n", + " # Add a human readable label.\n", + " add_readable_labels_column(train_df, \"Sentiment\")\n", + "\n", + " # We split by sentence ids, because we don't want to have phrases belonging\n", + " # to the same sentence in both training and validation set.\n", + " train_indices, validation_indices = model_selection.train_test_split(\n", + " np.unique(train_df[\"SentenceId\"]),\n", + " test_size=validation_set_ratio,\n", + " random_state=0)\n", + "\n", + " validation_df = train_df[train_df[\"SentenceId\"].isin(validation_indices)]\n", + " train_df = train_df[train_df[\"SentenceId\"].isin(train_indices)]\n", + " print(\"Split the training data into %d training and %d validation examples.\" %\n", + " (len(train_df), len(validation_df)))\n", + "\n", + " return train_df, validation_df, test_df\n", + "\n", + "\n", + "train_df, validation_df, test_df = get_data(\n", + " \"sentiment-analysis-on-movie-reviews\",\n", + " \"train.tsv.zip\", \"test.tsv.zip\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DFq_EyS1BEyK" + }, + "source": [ + "Note: In this competition the task is not to rate entire reviews, but individual phrases from within the reviews. This is a much harder task." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "42hgsiWNq5y9" + }, + "outputs": [], + "source": [ + "train_df.head(20)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YPuHgx3BWBOg" + }, + "source": [ + "## Training an Model\n", + "\n", + "*Note: We could model this task also as a regression, see [Text classification with TF-Hub](https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/text_classification_with_tf_hub.ipynb).*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "23U30yEkVq4w" + }, + "outputs": [], + "source": [ + "class MyModel(tf.keras.Model):\n", + " def __init__(self, hub_url):\n", + " super().__init__()\n", + " self.hub_url = hub_url\n", + " self.embed = hub.load(self.hub_url).signatures['default']\n", + " self.sequential = tf.keras.Sequential([\n", + " tf.keras.layers.Dense(500),\n", + " tf.keras.layers.Dense(100),\n", + " tf.keras.layers.Dense(5),\n", + " ])\n", + "\n", + " def call(self, inputs):\n", + " phrases = inputs['Phrase'][:,0]\n", + " embedding = 5*self.embed(phrases)['default']\n", + " return self.sequential(embedding)\n", + "\n", + " def get_config(self):\n", + " return {\"hub_url\":self.hub_url}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JE--GDMM2tSp" + }, + "outputs": [], + "source": [ + "model = MyModel(\"https://tfhub.dev/google/nnlm-en-dim128/1\")\n", + "model.compile(\n", + " loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True),\n", + " optimizer=tf.optimizers.Adam(), \n", + " metrics = [tf.keras.metrics.SparseCategoricalAccuracy(name=\"accuracy\")])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SRr-lvhstiNw" + }, + "outputs": [], + "source": [ + "history = model.fit(x=dict(train_df), y=train_df['Sentiment'],\n", + " validation_data=(dict(validation_df), validation_df['Sentiment']),\n", + " epochs = 25)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "s8j7YTRSe7Pj" + }, + "source": [ + "# Prediction\n", + "\n", + "Run predictions for the validation set and training set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iGqVNSl87bgN" + }, + "outputs": [], + "source": [ + "plt.plot(history.history['accuracy'])\n", + "plt.plot(history.history['val_accuracy'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zbLg5LzGwAfC" + }, + "outputs": [], + "source": [ + "train_eval_result = model.evaluate(dict(train_df), train_df['Sentiment'])\n", + "validation_eval_result = model.evaluate(dict(validation_df), validation_df['Sentiment'])\n", + "\n", + "print(f\"Training set accuracy: {train_eval_result[1]}\")\n", + "print(f\"Validation set accuracy: {validation_eval_result[1]}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DR2IsTF5vuAX" + }, + "source": [ + "## Confusion matrix\n", + "\n", + "Another very interesting statistic, especially for multiclass problems, is the [confusion matrix](https://en.wikipedia.org/wiki/Confusion_matrix). The confusion matrix allows visualization of the proportion of correctly and incorrectly labelled examples. We can easily see how much our classifier is biased and whether the distribution of labels makes sense. Ideally the largest fraction of predictions should be distributed along the diagonal." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yKUnJFYY8bO_" + }, + "outputs": [], + "source": [ + "predictions = model.predict(dict(validation_df))\n", + "predictions = tf.argmax(predictions, axis=-1)\n", + "predictions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fjAs8W_Z9BvP" + }, + "outputs": [], + "source": [ + "cm = tf.math.confusion_matrix(validation_df['Sentiment'], predictions)\n", + "cm = cm/cm.numpy().sum(axis=1)[:, tf.newaxis]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nT71CtArpsKz" + }, + "outputs": [], + "source": [ + "sns.heatmap(\n", + " cm, annot=True,\n", + " xticklabels=SENTIMENT_LABELS,\n", + " yticklabels=SENTIMENT_LABELS)\n", + "plt.xlabel(\"Predicted\")\n", + "plt.ylabel(\"True\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Pic7o2m04weY" + }, + "source": [ + "We can easily submit the predictions back to Kaggle by pasting the following code to a code cell and executing it:\n", + "\n", + "``` python\n", + "test_predictions = model.predict(dict(test_df))\n", + "test_predictions = np.argmax(test_predictions, axis=-1)\n", + "\n", + "result_df = test_df.copy()\n", + "\n", + "result_df[\"Predictions\"] = test_predictions\n", + "\n", + "result_df.to_csv(\n", + " \"predictions.csv\",\n", + " columns=[\"Predictions\"],\n", + " header=[\"Sentiment\"])\n", + "kaggle.api.competition_submit(\"predictions.csv\", \"Submitted from Colab\",\n", + " \"sentiment-analysis-on-movie-reviews\")\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "50BLu-JX_dlm" + }, + "source": [ + "After submitting, [check the leaderboard](https://www.kaggle.com/c/sentiment-analysis-on-movie-reviews/leaderboard) to see how you did." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "text_classification_with_tf_hub_on_kaggle.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/text_cookbook.md b/site/en/hub/tutorials/text_cookbook.md new file mode 100644 index 00000000000..dee9c1cf466 --- /dev/null +++ b/site/en/hub/tutorials/text_cookbook.md @@ -0,0 +1,101 @@ +# Text Cookbook + +This page lists a set of known guides and tools solving problems in the text +domain with TensorFlow Hub. It is a starting place for anybody who wants to +solve typical ML problems using pre-trained ML components rather than starting +from scratch. + +## Classification + +When we want to predict a class for a given example, for example **sentiment**, +**toxicity**, **article category**, or any other characteristic. + +![Text Classification Graphic](https://www.gstatic.com/aihub/tfhub/universal-sentence-encoder/example-classification.png) + +The tutorials below are solving the same task from different perspectives and +using different tools. + +### Keras + +[Text classification with Keras](https://www.tensorflow.org/tutorials/keras/text_classification_with_hub) - +example for building an IMDB sentiment classifier with Keras and TensorFlow +Datasets. + +### Estimator + +[Text classification](https://github.com/tensorflow/docs/blob/master/g3doc/en/hub/tutorials/text_classification_with_tf_hub.ipynb) - +example for building an IMDB sentiment classifier with Estimator. Contains +multiple tips for improvement and a module comparison section. + +### BERT +[Predicting Movie Review Sentiment with BERT on TF Hub](https://github.com/google-research/bert/blob/master/predicting_movie_reviews_with_bert_on_tf_hub.ipynb) - +shows how to use a BERT module for classification. Includes use of `bert` +library for tokenization and preprocessing. + +### Kaggle + +[IMDB classification on Kaggle](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/text_classification_with_tf_hub_on_kaggle.ipynb) - +shows how to easily interact with a Kaggle competition from a Colab, including +downloading the data and submitting the results. + + | Estimator | Keras | TF2 | TF Datasets | BERT | Kaggle APIs +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ----------- +[Text classification](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub) | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) | | | | | +[Text classification with Keras](https://www.tensorflow.org/tutorials/keras/text_classification_with_hub) | | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) | | +[Predicting Movie Review Sentiment with BERT on TF Hub](https://github.com/google-research/bert/blob/master/predicting_movie_reviews_with_bert_on_tf_hub.ipynb) | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) | | | | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) | +[IMDB classification on Kaggle](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/text_classification_with_tf_hub_on_kaggle.ipynb) | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) | | | | | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) + +### Bangla task with FastText embeddings +TensorFlow Hub does not currently offer a module in every language. The +following tutorial shows how to leverage TensorFlow Hub for fast experimentation +and modular ML development. + +[Bangla Article Classifier](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/bangla_article_classifier.ipynb) - +demonstrates how to create a reusable TensorFlow Hub text embedding, and use it +to train a Keras classifier for +[BARD Bangla Article dataset](https://github.com/tanvirfahim15/BARD-Bangla-Article-Classifier). + +## Semantic similarity + +When we want to find out which sentences correlate with each other in zero-shot +setup (no training examples). + +![Semantic Similarity Graphic](https://www.gstatic.com/aihub/tfhub/universal-sentence-encoder/example-similarity.png) + +### Basic + +[Semantic similarity](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder.ipynb) - +shows how to use the sentence encoder module to compute sentence similarity. + +### Cross-lingual + +[Cross-lingual semantic similarity](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder.ipynb) - +shows how to use one of the cross-lingual sentence encoders to compute sentence +similarity across languages. + +### Semantic retrieval + +[Semantic retrieval](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa.ipynb) - +shows how to use Q/A sentence encoder to index a collection of documents for +retrieval based on semantic similarity. + +### SentencePiece input + +[Semantic similarity with universal encoder lite](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder_lite.ipynb) - +shows how to use sentence encoder modules that accept +[SentencePiece](https://github.com/google/sentencepiece) ids on input instead of +text. + +## Module creation +Instead of using only modules on [tfhub.dev](https://tfhub.dev), there are ways +to create own modules. This can be a useful tool for better ML codebase +modularity and for further sharing. + +### Wrapping existing pre-trained embeddings +[Text embedding module exporter](https://github.com/tensorflow/hub/blob/master/examples/text_embeddings/export.py) - +a tool to wrap an existing pre-trained embedding into a module. Shows how to +include text pre-processing ops into the module. This allows to create a +sentence embedding module from token embeddings. + +[Text embedding module exporter v2](https://github.com/tensorflow/hub/blob/master/examples/text_embeddings_v2/export_v2.py) - +same as above, but compatible with TensorFlow 2 and eager execution. diff --git a/site/en/hub/tutorials/text_to_video_retrieval_with_s3d_milnce.ipynb b/site/en/hub/tutorials/text_to_video_retrieval_with_s3d_milnce.ipynb new file mode 100644 index 00000000000..52fb3c0e4ab --- /dev/null +++ b/site/en/hub/tutorials/text_to_video_retrieval_with_s3d_milnce.ipynb @@ -0,0 +1,275 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8JSGdaDHc_f4" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z2_BHI6XdJ30" + }, + "source": [ + "# Text-to-Video retrieval with S3D MIL-NCE" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rm0K9ZTgfISB" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bC_xJPpQd-LO" + }, + "outputs": [], + "source": [ + "!pip install -q opencv-python\n", + "\n", + "import os\n", + "\n", + "import tensorflow.compat.v2 as tf\n", + "import tensorflow_hub as hub\n", + "\n", + "import numpy as np\n", + "import cv2\n", + "from IPython import display\n", + "import math" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZxwaK-jf7qkW" + }, + "source": [ + "## Import TF-Hub model\n", + "\n", + "This tutorial demonstrates how to use the [S3D MIL-NCE model](https://tfhub.dev/deepmind/mil-nce/s3d/1) from TensorFlow Hub to do **text-to-video retrieval** to find the most similar videos for a given text query.\n", + "\n", + "The model has 2 signatures, one for generating *video embeddings* and one for generating *text embeddings*. We will use these embedding to find the nearest neighbors in the embedding space." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nwv4ZQ4qmak5" + }, + "outputs": [], + "source": [ + "# Load the model once from TF-Hub.\n", + "hub_handle = 'https://tfhub.dev/deepmind/mil-nce/s3d/1'\n", + "hub_model = hub.load(hub_handle)\n", + "\n", + "def generate_embeddings(model, input_frames, input_words):\n", + " \"\"\"Generate embeddings from the model from video frames and input words.\"\"\"\n", + " # Input_frames must be normalized in [0, 1] and of the shape Batch x T x H x W x 3\n", + " vision_output = model.signatures['video'](tf.constant(tf.cast(input_frames, dtype=tf.float32)))\n", + " text_output = model.signatures['text'](tf.constant(input_words))\n", + " return vision_output['video_embedding'], text_output['text_embedding']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EOZzu9ddekEj" + }, + "outputs": [], + "source": [ + "# @title Define video loading and visualization functions { display-mode: \"form\" }\n", + "\n", + "# Utilities to open video files using CV2\n", + "def crop_center_square(frame):\n", + " y, x = frame.shape[0:2]\n", + " min_dim = min(y, x)\n", + " start_x = (x // 2) - (min_dim // 2)\n", + " start_y = (y // 2) - (min_dim // 2)\n", + " return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]\n", + "\n", + "\n", + "def load_video(video_url, max_frames=32, resize=(224, 224)):\n", + " path = tf.keras.utils.get_file(os.path.basename(video_url)[-128:], video_url)\n", + " cap = cv2.VideoCapture(path)\n", + " frames = []\n", + " try:\n", + " while True:\n", + " ret, frame = cap.read()\n", + " if not ret:\n", + " break\n", + " frame = crop_center_square(frame)\n", + " frame = cv2.resize(frame, resize)\n", + " frame = frame[:, :, [2, 1, 0]]\n", + " frames.append(frame)\n", + "\n", + " if len(frames) == max_frames:\n", + " break\n", + " finally:\n", + " cap.release()\n", + " frames = np.array(frames)\n", + " if len(frames) < max_frames:\n", + " n_repeat = int(math.ceil(max_frames / float(len(frames))))\n", + " frames = frames.repeat(n_repeat, axis=0)\n", + " frames = frames[:max_frames]\n", + " return frames / 255.0\n", + "\n", + "def display_video(urls):\n", + " html = ''\n", + " html += ''\n", + " for url in urls:\n", + " html += ''\n", + " html += '
    Video 1Video 2Video 3
    '\n", + " html += ''.format(url)\n", + " html += '
    '\n", + " return display.HTML(html)\n", + "\n", + "def display_query_and_results_video(query, urls, scores):\n", + " \"\"\"Display a text query and the top result videos and scores.\"\"\"\n", + " sorted_ix = np.argsort(-scores)\n", + " html = ''\n", + " html += '

    Input query: {}

    '.format(query)\n", + " html += 'Results:
    '\n", + " html += ''\n", + " html += ''.format(scores[sorted_ix[0]])\n", + " html += ''.format(scores[sorted_ix[1]])\n", + " html += ''.format(scores[sorted_ix[2]])\n", + " for i, idx in enumerate(sorted_ix):\n", + " url = urls[sorted_ix[i]];\n", + " html += ''\n", + " html += '
    Rank #1, Score:{:.2f}Rank #2, Score:{:.2f}Rank #3, Score:{:.2f}
    '\n", + " html += ''.format(url)\n", + " html += '
    '\n", + " return html\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ime5V4kDewh8" + }, + "outputs": [], + "source": [ + "# @title Load example videos and define text queries { display-mode: \"form\" }\n", + "\n", + "video_1_url = 'https://upload.wikimedia.org/wikipedia/commons/b/b0/YosriAirTerjun.gif' # @param {type:\"string\"}\n", + "video_2_url = 'https://upload.wikimedia.org/wikipedia/commons/e/e6/Guitar_solo_gif.gif' # @param {type:\"string\"}\n", + "video_3_url = 'https://upload.wikimedia.org/wikipedia/commons/3/30/2009-08-16-autodrift-by-RalfR-gif-by-wau.gif' # @param {type:\"string\"}\n", + "\n", + "video_1 = load_video(video_1_url)\n", + "video_2 = load_video(video_2_url)\n", + "video_3 = load_video(video_3_url)\n", + "all_videos = [video_1, video_2, video_3]\n", + "\n", + "query_1_video = 'waterfall' # @param {type:\"string\"}\n", + "query_2_video = 'playing guitar' # @param {type:\"string\"}\n", + "query_3_video = 'car drifting' # @param {type:\"string\"}\n", + "all_queries_video = [query_1_video, query_2_video, query_3_video]\n", + "all_videos_urls = [video_1_url, video_2_url, video_3_url]\n", + "display_video(all_videos_urls)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NCLKv_L_8Anc" + }, + "source": [ + "## Demonstrate text to video retrieval\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9oX8ItFUjybi" + }, + "outputs": [], + "source": [ + "# Prepare video inputs.\n", + "videos_np = np.stack(all_videos, axis=0)\n", + "\n", + "# Prepare text input.\n", + "words_np = np.array(all_queries_video)\n", + "\n", + "# Generate the video and text embeddings.\n", + "video_embd, text_embd = generate_embeddings(hub_model, videos_np, words_np)\n", + "\n", + "# Scores between video and text is computed by dot products.\n", + "all_scores = np.dot(text_embd, tf.transpose(video_embd))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "d4AwYmODmE9Y" + }, + "outputs": [], + "source": [ + "# Display results.\n", + "html = ''\n", + "for i, words in enumerate(words_np):\n", + " html += display_query_and_results_video(words, all_videos_urls, all_scores[i, :])\n", + " html += '
    '\n", + "display.HTML(html)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "text_to_video_retrieval_with_s3d_milnce.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/tf2_arbitrary_image_stylization.ipynb b/site/en/hub/tutorials/tf2_arbitrary_image_stylization.ipynb new file mode 100644 index 00000000000..3a0cb09113e --- /dev/null +++ b/site/en/hub/tutorials/tf2_arbitrary_image_stylization.ipynb @@ -0,0 +1,375 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ScitaPqhKtuW" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jvztxQ6VsK2k" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oXlcl8lqBgAD" + }, + "source": [ + "# Fast Style Transfer for Arbitrary Styles\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YeeuYzbZcJzs" + }, + "source": [ + "Based on the model code in [magenta](https://github.com/tensorflow/magenta/tree/master/magenta/models/arbitrary_image_stylization) and the publication:\n", + "\n", + "[Exploring the structure of a real-time, arbitrary neural artistic stylization\n", + "network](https://arxiv.org/abs/1705.06830).\n", + "*Golnaz Ghiasi, Honglak Lee,\n", + "Manjunath Kudlur, Vincent Dumoulin, Jonathon Shlens*,\n", + "Proceedings of the British Machine Vision Conference (BMVC), 2017.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TaM8BVxrCA2E" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J65jog2ncJzt" + }, + "source": [ + "Let's start with importing TF2 and all relevant dependencies." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "v-KXRY5XBu2u" + }, + "outputs": [], + "source": [ + "import functools\n", + "import os\n", + "\n", + "from matplotlib import gridspec\n", + "import matplotlib.pylab as plt\n", + "import numpy as np\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "\n", + "print(\"TF Version: \", tf.__version__)\n", + "print(\"TF Hub version: \", hub.__version__)\n", + "print(\"Eager mode enabled: \", tf.executing_eagerly())\n", + "print(\"GPU available: \", tf.config.list_physical_devices('GPU'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tsoDv_9geoZn" + }, + "outputs": [], + "source": [ + "# @title Define image loading and visualization functions { display-mode: \"form\" }\n", + "\n", + "def crop_center(image):\n", + " \"\"\"Returns a cropped square image.\"\"\"\n", + " shape = image.shape\n", + " new_shape = min(shape[1], shape[2])\n", + " offset_y = max(shape[1] - shape[2], 0) // 2\n", + " offset_x = max(shape[2] - shape[1], 0) // 2\n", + " image = tf.image.crop_to_bounding_box(\n", + " image, offset_y, offset_x, new_shape, new_shape)\n", + " return image\n", + "\n", + "@functools.lru_cache(maxsize=None)\n", + "def load_image(image_url, image_size=(256, 256), preserve_aspect_ratio=True):\n", + " \"\"\"Loads and preprocesses images.\"\"\"\n", + " # Cache image file locally.\n", + " image_path = tf.keras.utils.get_file(os.path.basename(image_url)[-128:], image_url)\n", + " # Load and convert to float32 numpy array, add batch dimension, and normalize to range [0, 1].\n", + " img = tf.io.decode_image(\n", + " tf.io.read_file(image_path),\n", + " channels=3, dtype=tf.float32)[tf.newaxis, ...]\n", + " img = crop_center(img)\n", + " img = tf.image.resize(img, image_size, preserve_aspect_ratio=True)\n", + " return img\n", + "\n", + "def show_n(images, titles=('',)):\n", + " n = len(images)\n", + " image_sizes = [image.shape[1] for image in images]\n", + " w = (image_sizes[0] * 6) // 320\n", + " plt.figure(figsize=(w * n, w))\n", + " gs = gridspec.GridSpec(1, n, width_ratios=image_sizes)\n", + " for i in range(n):\n", + " plt.subplot(gs[i])\n", + " plt.imshow(images[i][0], aspect='equal')\n", + " plt.axis('off')\n", + " plt.title(titles[i] if len(titles) > i else '')\n", + " plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8etHh05-CJHc" + }, + "source": [ + "Let's get as well some images to play with." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dRc0vat3Alzo" + }, + "outputs": [], + "source": [ + "# @title Load example images { display-mode: \"form\" }\n", + "\n", + "content_image_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/f/fd/Golden_Gate_Bridge_from_Battery_Spencer.jpg/640px-Golden_Gate_Bridge_from_Battery_Spencer.jpg' # @param {type:\"string\"}\n", + "style_image_url = 'https://upload.wikimedia.org/wikipedia/commons/0/0a/The_Great_Wave_off_Kanagawa.jpg' # @param {type:\"string\"}\n", + "output_image_size = 384 # @param {type:\"integer\"}\n", + "\n", + "# The content image size can be arbitrary.\n", + "content_img_size = (output_image_size, output_image_size)\n", + "# The style prediction model was trained with image size 256 and it's the \n", + "# recommended image size for the style image (though, other sizes work as \n", + "# well but will lead to different results).\n", + "style_img_size = (256, 256) # Recommended to keep it at 256.\n", + "\n", + "content_image = load_image(content_image_url, content_img_size)\n", + "style_image = load_image(style_image_url, style_img_size)\n", + "style_image = tf.nn.avg_pool(style_image, ksize=[3,3], strides=[1,1], padding='SAME')\n", + "show_n([content_image, style_image], ['Content image', 'Style image'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yL2Bn5ThR1nY" + }, + "source": [ + "## Import TF Hub module" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "467AVDSuzBPc" + }, + "outputs": [], + "source": [ + "# Load TF Hub module.\n", + "\n", + "hub_handle = 'https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2'\n", + "hub_module = hub.load(hub_handle)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uAR70_3wLEDB" + }, + "source": [ + "The signature of this hub module for image stylization is:\n", + "```\n", + "outputs = hub_module(content_image, style_image)\n", + "stylized_image = outputs[0]\n", + "```\n", + "Where `content_image`, `style_image`, and `stylized_image` are expected to be 4-D Tensors with shapes `[batch_size, image_height, image_width, 3]`.\n", + "\n", + "In the current example we provide only single images and therefore the batch dimension is 1, but one can use the same module to process more images at the same time.\n", + "\n", + "The input and output values of the images should be in the range [0, 1].\n", + "\n", + "The shapes of content and style image don't have to match. Output image shape\n", + "is the same as the content image shape." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qEhYJno1R7rP" + }, + "source": [ + "## Demonstrate image stylization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lnAv-F3O9fLV" + }, + "outputs": [], + "source": [ + "# Stylize content image with given style image.\n", + "# This is pretty fast within a few milliseconds on a GPU.\n", + "\n", + "outputs = hub_module(tf.constant(content_image), tf.constant(style_image))\n", + "stylized_image = outputs[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OEAPEdq698gs" + }, + "outputs": [], + "source": [ + "# Visualize input images and the generated stylized image.\n", + "\n", + "show_n([content_image, style_image, stylized_image], titles=['Original content image', 'Style image', 'Stylized image'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "v-gYvjTWK-lx" + }, + "source": [ + "## Let's try it on more images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WSMaY0YBNfkK" + }, + "outputs": [], + "source": [ + "# @title To Run: Load more images { display-mode: \"form\" }\n", + "\n", + "content_urls = dict(\n", + " sea_turtle='https://upload.wikimedia.org/wikipedia/commons/d/d7/Green_Sea_Turtle_grazing_seagrass.jpg',\n", + " tuebingen='https://upload.wikimedia.org/wikipedia/commons/0/00/Tuebingen_Neckarfront.jpg',\n", + " grace_hopper='https://storage.googleapis.com/download.tensorflow.org/example_images/grace_hopper.jpg',\n", + " )\n", + "style_urls = dict(\n", + " kanagawa_great_wave='https://upload.wikimedia.org/wikipedia/commons/0/0a/The_Great_Wave_off_Kanagawa.jpg',\n", + " kandinsky_composition_7='https://upload.wikimedia.org/wikipedia/commons/b/b4/Vassily_Kandinsky%2C_1913_-_Composition_7.jpg',\n", + " hubble_pillars_of_creation='https://upload.wikimedia.org/wikipedia/commons/6/68/Pillars_of_creation_2014_HST_WFC3-UVIS_full-res_denoised.jpg',\n", + " van_gogh_starry_night='https://upload.wikimedia.org/wikipedia/commons/thumb/e/ea/Van_Gogh_-_Starry_Night_-_Google_Art_Project.jpg/1024px-Van_Gogh_-_Starry_Night_-_Google_Art_Project.jpg',\n", + " turner_nantes='https://upload.wikimedia.org/wikipedia/commons/b/b7/JMW_Turner_-_Nantes_from_the_Ile_Feydeau.jpg',\n", + " munch_scream='https://upload.wikimedia.org/wikipedia/commons/c/c5/Edvard_Munch%2C_1893%2C_The_Scream%2C_oil%2C_tempera_and_pastel_on_cardboard%2C_91_x_73_cm%2C_National_Gallery_of_Norway.jpg',\n", + " picasso_demoiselles_avignon='https://upload.wikimedia.org/wikipedia/en/4/4c/Les_Demoiselles_d%27Avignon.jpg',\n", + " picasso_violin='https://upload.wikimedia.org/wikipedia/en/3/3c/Pablo_Picasso%2C_1911-12%2C_Violon_%28Violin%29%2C_oil_on_canvas%2C_Kr%C3%B6ller-M%C3%BCller_Museum%2C_Otterlo%2C_Netherlands.jpg',\n", + " picasso_bottle_of_rum='https://upload.wikimedia.org/wikipedia/en/7/7f/Pablo_Picasso%2C_1911%2C_Still_Life_with_a_Bottle_of_Rum%2C_oil_on_canvas%2C_61.3_x_50.5_cm%2C_Metropolitan_Museum_of_Art%2C_New_York.jpg',\n", + " fire='https://upload.wikimedia.org/wikipedia/commons/3/36/Large_bonfire.jpg',\n", + " derkovits_woman_head='https://upload.wikimedia.org/wikipedia/commons/0/0d/Derkovits_Gyula_Woman_head_1922.jpg',\n", + " amadeo_style_life='https://upload.wikimedia.org/wikipedia/commons/8/8e/Untitled_%28Still_life%29_%281913%29_-_Amadeo_Souza-Cardoso_%281887-1918%29_%2817385824283%29.jpg',\n", + " derkovtis_talig='https://upload.wikimedia.org/wikipedia/commons/3/37/Derkovits_Gyula_Talig%C3%A1s_1920.jpg',\n", + " amadeo_cardoso='https://upload.wikimedia.org/wikipedia/commons/7/7d/Amadeo_de_Souza-Cardoso%2C_1915_-_Landscape_with_black_figure.jpg'\n", + ")\n", + "\n", + "content_image_size = 384\n", + "style_image_size = 256\n", + "content_images = {k: load_image(v, (content_image_size, content_image_size)) for k, v in content_urls.items()}\n", + "style_images = {k: load_image(v, (style_image_size, style_image_size)) for k, v in style_urls.items()}\n", + "style_images = {k: tf.nn.avg_pool(style_image, ksize=[3,3], strides=[1,1], padding='SAME') for k, style_image in style_images.items()}\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dqB6aNTLNVkK" + }, + "outputs": [], + "source": [ + "#@title Specify the main content image and the style you want to use. { display-mode: \"form\" }\n", + "\n", + "content_name = 'sea_turtle' # @param ['sea_turtle', 'tuebingen', 'grace_hopper']\n", + "style_name = 'munch_scream' # @param ['kanagawa_great_wave', 'kandinsky_composition_7', 'hubble_pillars_of_creation', 'van_gogh_starry_night', 'turner_nantes', 'munch_scream', 'picasso_demoiselles_avignon', 'picasso_violin', 'picasso_bottle_of_rum', 'fire', 'derkovits_woman_head', 'amadeo_style_life', 'derkovtis_talig', 'amadeo_cardoso']\n", + "\n", + "stylized_image = hub_module(tf.constant(content_images[content_name]),\n", + " tf.constant(style_images[style_name]))[0]\n", + "\n", + "show_n([content_images[content_name], style_images[style_name], stylized_image],\n", + " titles=['Original content image', 'Style image', 'Stylized image'])" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "tf2_arbitrary_image_stylization.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/tf2_image_retraining.ipynb b/site/en/hub/tutorials/tf2_image_retraining.ipynb new file mode 100644 index 00000000000..0266f4683c1 --- /dev/null +++ b/site/en/hub/tutorials/tf2_image_retraining.ipynb @@ -0,0 +1,605 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ScitaPqhKtuW" + }, + "source": [ + "##### Copyright 2021 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jvztxQ6VsK2k" + }, + "outputs": [], + "source": [ + "# Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oYM61xrTsP5d" + }, + "source": [ + "# Retraining an Image Classifier\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L1otmJgmbahf" + }, + "source": [ + "## Introduction\n", + "\n", + "Image classification models have millions of parameters. Training them from\n", + "scratch requires a lot of labeled training data and a lot of computing power. Transfer learning is a technique that shortcuts much of this by taking a piece of a model that has already been trained on a related task and reusing it in a new model.\n", + "\n", + "This Colab demonstrates how to build a Keras model for classifying five species of flowers by using a pre-trained TF2 SavedModel from TensorFlow Hub for image feature extraction, trained on the much larger and more general ImageNet dataset. Optionally, the feature extractor can be trained (\"fine-tuned\") alongside the newly added classifier.\n", + "\n", + "### Looking for a tool instead?\n", + "\n", + "This is a TensorFlow coding tutorial. If you want a tool that just builds the TensorFlow or TFLite model for, take a look at the [make_image_classifier](https://github.com/tensorflow/hub/tree/master/tensorflow_hub/tools/make_image_classifier) command-line tool that gets [installed](https://www.tensorflow.org/hub/installation) by the PIP package `tensorflow-hub[make_image_classifier]`, or at [this](https://colab.sandbox.google.com/github/tensorflow/examples/blob/master/tensorflow_examples/lite/model_maker/demo/image_classification.ipynb) TFLite colab.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bL54LWCHt5q5" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dlauq-4FWGZM" + }, + "outputs": [], + "source": [ + "import itertools\n", + "import os\n", + "\n", + "import matplotlib.pylab as plt\n", + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "\n", + "print(\"TF version:\", tf.__version__)\n", + "print(\"Hub version:\", hub.__version__)\n", + "print(\"GPU is\", \"available\" if tf.config.list_physical_devices('GPU') else \"NOT AVAILABLE\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mmaHHH7Pvmth" + }, + "source": [ + "## Select the TF2 SavedModel module to use\n", + "\n", + "For starters, use [https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/4](https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/4). The same URL can be used in code to identify the SavedModel and in your browser to show its documentation. (Note that models in TF1 Hub format won't work here.)\n", + "\n", + "You can find more TF2 models that generate image feature vectors [here](https://tfhub.dev/s?module-type=image-feature-vector&tf-version=tf2).\n", + "\n", + "There are multiple possible models to try. All you need to do is select a different one on the cell below and follow up with the notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FlsEcKVeuCnf" + }, + "outputs": [], + "source": [ + "#@title\n", + "\n", + "model_name = \"efficientnetv2-xl-21k\" # @param ['efficientnetv2-s', 'efficientnetv2-m', 'efficientnetv2-l', 'efficientnetv2-s-21k', 'efficientnetv2-m-21k', 'efficientnetv2-l-21k', 'efficientnetv2-xl-21k', 'efficientnetv2-b0-21k', 'efficientnetv2-b1-21k', 'efficientnetv2-b2-21k', 'efficientnetv2-b3-21k', 'efficientnetv2-s-21k-ft1k', 'efficientnetv2-m-21k-ft1k', 'efficientnetv2-l-21k-ft1k', 'efficientnetv2-xl-21k-ft1k', 'efficientnetv2-b0-21k-ft1k', 'efficientnetv2-b1-21k-ft1k', 'efficientnetv2-b2-21k-ft1k', 'efficientnetv2-b3-21k-ft1k', 'efficientnetv2-b0', 'efficientnetv2-b1', 'efficientnetv2-b2', 'efficientnetv2-b3', 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'efficientnet_b3', 'efficientnet_b4', 'efficientnet_b5', 'efficientnet_b6', 'efficientnet_b7', 'bit_s-r50x1', 'inception_v3', 'inception_resnet_v2', 'resnet_v1_50', 'resnet_v1_101', 'resnet_v1_152', 'resnet_v2_50', 'resnet_v2_101', 'resnet_v2_152', 'nasnet_large', 'nasnet_mobile', 'pnasnet_large', 'mobilenet_v2_100_224', 'mobilenet_v2_130_224', 'mobilenet_v2_140_224', 'mobilenet_v3_small_100_224', 'mobilenet_v3_small_075_224', 'mobilenet_v3_large_100_224', 'mobilenet_v3_large_075_224']\n", + "\n", + "model_handle_map = {\n", + " \"efficientnetv2-s\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_s/feature_vector/2\",\n", + " \"efficientnetv2-m\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_m/feature_vector/2\",\n", + " \"efficientnetv2-l\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_l/feature_vector/2\",\n", + " \"efficientnetv2-s-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_s/feature_vector/2\",\n", + " \"efficientnetv2-m-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_m/feature_vector/2\",\n", + " \"efficientnetv2-l-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_l/feature_vector/2\",\n", + " \"efficientnetv2-xl-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_xl/feature_vector/2\",\n", + " \"efficientnetv2-b0-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b0/feature_vector/2\",\n", + " \"efficientnetv2-b1-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b1/feature_vector/2\",\n", + " \"efficientnetv2-b2-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b2/feature_vector/2\",\n", + " \"efficientnetv2-b3-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b3/feature_vector/2\",\n", + " \"efficientnetv2-s-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_s/feature_vector/2\",\n", + " \"efficientnetv2-m-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_m/feature_vector/2\",\n", + " \"efficientnetv2-l-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_l/feature_vector/2\",\n", + " \"efficientnetv2-xl-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_xl/feature_vector/2\",\n", + " \"efficientnetv2-b0-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b0/feature_vector/2\",\n", + " \"efficientnetv2-b1-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b1/feature_vector/2\",\n", + " \"efficientnetv2-b2-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b2/feature_vector/2\",\n", + " \"efficientnetv2-b3-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b3/feature_vector/2\",\n", + " \"efficientnetv2-b0\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b0/feature_vector/2\",\n", + " \"efficientnetv2-b1\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b1/feature_vector/2\",\n", + " \"efficientnetv2-b2\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b2/feature_vector/2\",\n", + " \"efficientnetv2-b3\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b3/feature_vector/2\",\n", + " \"efficientnet_b0\": \"https://tfhub.dev/tensorflow/efficientnet/b0/feature-vector/1\",\n", + " \"efficientnet_b1\": \"https://tfhub.dev/tensorflow/efficientnet/b1/feature-vector/1\",\n", + " \"efficientnet_b2\": \"https://tfhub.dev/tensorflow/efficientnet/b2/feature-vector/1\",\n", + " \"efficientnet_b3\": \"https://tfhub.dev/tensorflow/efficientnet/b3/feature-vector/1\",\n", + " \"efficientnet_b4\": \"https://tfhub.dev/tensorflow/efficientnet/b4/feature-vector/1\",\n", + " \"efficientnet_b5\": \"https://tfhub.dev/tensorflow/efficientnet/b5/feature-vector/1\",\n", + " \"efficientnet_b6\": \"https://tfhub.dev/tensorflow/efficientnet/b6/feature-vector/1\",\n", + " \"efficientnet_b7\": \"https://tfhub.dev/tensorflow/efficientnet/b7/feature-vector/1\",\n", + " \"bit_s-r50x1\": \"https://tfhub.dev/google/bit/s-r50x1/1\",\n", + " \"inception_v3\": \"https://tfhub.dev/google/imagenet/inception_v3/feature-vector/4\",\n", + " \"inception_resnet_v2\": \"https://tfhub.dev/google/imagenet/inception_resnet_v2/feature-vector/4\",\n", + " \"resnet_v1_50\": \"https://tfhub.dev/google/imagenet/resnet_v1_50/feature-vector/4\",\n", + " \"resnet_v1_101\": \"https://tfhub.dev/google/imagenet/resnet_v1_101/feature-vector/4\",\n", + " \"resnet_v1_152\": \"https://tfhub.dev/google/imagenet/resnet_v1_152/feature-vector/4\",\n", + " \"resnet_v2_50\": \"https://tfhub.dev/google/imagenet/resnet_v2_50/feature-vector/4\",\n", + " \"resnet_v2_101\": \"https://tfhub.dev/google/imagenet/resnet_v2_101/feature-vector/4\",\n", + " \"resnet_v2_152\": \"https://tfhub.dev/google/imagenet/resnet_v2_152/feature-vector/4\",\n", + " \"nasnet_large\": \"https://tfhub.dev/google/imagenet/nasnet_large/feature_vector/4\",\n", + " \"nasnet_mobile\": \"https://tfhub.dev/google/imagenet/nasnet_mobile/feature_vector/4\",\n", + " \"pnasnet_large\": \"https://tfhub.dev/google/imagenet/pnasnet_large/feature_vector/4\",\n", + " \"mobilenet_v2_100_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/4\",\n", + " \"mobilenet_v2_130_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v2_130_224/feature_vector/4\",\n", + " \"mobilenet_v2_140_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/feature_vector/4\",\n", + " \"mobilenet_v3_small_100_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_small_100_224/feature_vector/5\",\n", + " \"mobilenet_v3_small_075_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_small_075_224/feature_vector/5\",\n", + " \"mobilenet_v3_large_100_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_large_100_224/feature_vector/5\",\n", + " \"mobilenet_v3_large_075_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_large_075_224/feature_vector/5\",\n", + "}\n", + "\n", + "model_image_size_map = {\n", + " \"efficientnetv2-s\": 384,\n", + " \"efficientnetv2-m\": 480,\n", + " \"efficientnetv2-l\": 480,\n", + " \"efficientnetv2-b0\": 224,\n", + " \"efficientnetv2-b1\": 240,\n", + " \"efficientnetv2-b2\": 260,\n", + " \"efficientnetv2-b3\": 300,\n", + " \"efficientnetv2-s-21k\": 384,\n", + " \"efficientnetv2-m-21k\": 480,\n", + " \"efficientnetv2-l-21k\": 480,\n", + " \"efficientnetv2-xl-21k\": 512,\n", + " \"efficientnetv2-b0-21k\": 224,\n", + " \"efficientnetv2-b1-21k\": 240,\n", + " \"efficientnetv2-b2-21k\": 260,\n", + " \"efficientnetv2-b3-21k\": 300,\n", + " \"efficientnetv2-s-21k-ft1k\": 384,\n", + " \"efficientnetv2-m-21k-ft1k\": 480,\n", + " \"efficientnetv2-l-21k-ft1k\": 480,\n", + " \"efficientnetv2-xl-21k-ft1k\": 512,\n", + " \"efficientnetv2-b0-21k-ft1k\": 224,\n", + " \"efficientnetv2-b1-21k-ft1k\": 240,\n", + " \"efficientnetv2-b2-21k-ft1k\": 260,\n", + " \"efficientnetv2-b3-21k-ft1k\": 300, \n", + " \"efficientnet_b0\": 224,\n", + " \"efficientnet_b1\": 240,\n", + " \"efficientnet_b2\": 260,\n", + " \"efficientnet_b3\": 300,\n", + " \"efficientnet_b4\": 380,\n", + " \"efficientnet_b5\": 456,\n", + " \"efficientnet_b6\": 528,\n", + " \"efficientnet_b7\": 600,\n", + " \"inception_v3\": 299,\n", + " \"inception_resnet_v2\": 299,\n", + " \"nasnet_large\": 331,\n", + " \"pnasnet_large\": 331,\n", + "}\n", + "\n", + "model_handle = model_handle_map.get(model_name)\n", + "pixels = model_image_size_map.get(model_name, 224)\n", + "\n", + "print(f\"Selected model: {model_name} : {model_handle}\")\n", + "\n", + "IMAGE_SIZE = (pixels, pixels)\n", + "print(f\"Input size {IMAGE_SIZE}\")\n", + "\n", + "BATCH_SIZE = 16#@param {type:\"integer\"}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yTY8qzyYv3vl" + }, + "source": [ + "## Set up the Flowers dataset\n", + "\n", + "Inputs are suitably resized for the selected module. Dataset augmentation (i.e., random distortions of an image each time it is read) improves training, esp. when fine-tuning." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WBtFK1hO8KsO" + }, + "outputs": [], + "source": [ + "data_dir = tf.keras.utils.get_file(\n", + " 'flower_photos',\n", + " 'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',\n", + " untar=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "umB5tswsfTEQ" + }, + "outputs": [], + "source": [ + "def build_dataset(subset):\n", + " return tf.keras.preprocessing.image_dataset_from_directory(\n", + " data_dir,\n", + " validation_split=.20,\n", + " subset=subset,\n", + " label_mode=\"categorical\",\n", + " # Seed needs to provided when using validation_split and shuffle = True.\n", + " # A fixed seed is used so that the validation set is stable across runs.\n", + " seed=123,\n", + " image_size=IMAGE_SIZE,\n", + " batch_size=1)\n", + "\n", + "train_ds = build_dataset(\"training\")\n", + "class_names = tuple(train_ds.class_names)\n", + "train_size = train_ds.cardinality().numpy()\n", + "train_ds = train_ds.unbatch().batch(BATCH_SIZE)\n", + "train_ds = train_ds.repeat()\n", + "\n", + "normalization_layer = tf.keras.layers.Rescaling(1. / 255)\n", + "preprocessing_model = tf.keras.Sequential([normalization_layer])\n", + "do_data_augmentation = False #@param {type:\"boolean\"}\n", + "if do_data_augmentation:\n", + " preprocessing_model.add(\n", + " tf.keras.layers.RandomRotation(40))\n", + " preprocessing_model.add(\n", + " tf.keras.layers.RandomTranslation(0, 0.2))\n", + " preprocessing_model.add(\n", + " tf.keras.layers.RandomTranslation(0.2, 0))\n", + " # Like the old tf.keras.preprocessing.image.ImageDataGenerator(),\n", + " # image sizes are fixed when reading, and then a random zoom is applied.\n", + " # If all training inputs are larger than image_size, one could also use\n", + " # RandomCrop with a batch size of 1 and rebatch later.\n", + " preprocessing_model.add(\n", + " tf.keras.layers.RandomZoom(0.2, 0.2))\n", + " preprocessing_model.add(\n", + " tf.keras.layers.RandomFlip(mode=\"horizontal\"))\n", + "train_ds = train_ds.map(lambda images, labels:\n", + " (preprocessing_model(images), labels))\n", + "\n", + "val_ds = build_dataset(\"validation\")\n", + "valid_size = val_ds.cardinality().numpy()\n", + "val_ds = val_ds.unbatch().batch(BATCH_SIZE)\n", + "val_ds = val_ds.map(lambda images, labels:\n", + " (normalization_layer(images), labels))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FS_gVStowW3G" + }, + "source": [ + "## Defining the model\n", + "\n", + "All it takes is to put a linear classifier on top of the `feature_extractor_layer` with the Hub module.\n", + "\n", + "For speed, we start out with a non-trainable `feature_extractor_layer`, but you can also enable fine-tuning for greater accuracy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RaJW3XrPyFiF" + }, + "outputs": [], + "source": [ + "do_fine_tuning = False #@param {type:\"boolean\"}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "50FYNIb1dmJH" + }, + "outputs": [], + "source": [ + "print(\"Building model with\", model_handle)\n", + "model = tf.keras.Sequential([\n", + " # Explicitly define the input shape so the model can be properly\n", + " # loaded by the TFLiteConverter\n", + " tf.keras.layers.InputLayer(input_shape=IMAGE_SIZE + (3,)),\n", + " hub.KerasLayer(model_handle, trainable=do_fine_tuning),\n", + " tf.keras.layers.Dropout(rate=0.2),\n", + " tf.keras.layers.Dense(len(class_names),\n", + " kernel_regularizer=tf.keras.regularizers.l2(0.0001))\n", + "])\n", + "model.build((None,)+IMAGE_SIZE+(3,))\n", + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u2e5WupIw2N2" + }, + "source": [ + "## Training the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9f3yBUvkd_VJ" + }, + "outputs": [], + "source": [ + "model.compile(\n", + " optimizer=tf.keras.optimizers.SGD(learning_rate=0.005, momentum=0.9), \n", + " loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1),\n", + " metrics=['accuracy'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "w_YKX2Qnfg6x" + }, + "outputs": [], + "source": [ + "steps_per_epoch = train_size // BATCH_SIZE\n", + "validation_steps = valid_size // BATCH_SIZE\n", + "hist = model.fit(\n", + " train_ds,\n", + " epochs=5, steps_per_epoch=steps_per_epoch,\n", + " validation_data=val_ds,\n", + " validation_steps=validation_steps).history" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CYOw0fTO1W4x" + }, + "outputs": [], + "source": [ + "plt.figure()\n", + "plt.ylabel(\"Loss (training and validation)\")\n", + "plt.xlabel(\"Training Steps\")\n", + "plt.ylim([0,2])\n", + "plt.plot(hist[\"loss\"])\n", + "plt.plot(hist[\"val_loss\"])\n", + "\n", + "plt.figure()\n", + "plt.ylabel(\"Accuracy (training and validation)\")\n", + "plt.xlabel(\"Training Steps\")\n", + "plt.ylim([0,1])\n", + "plt.plot(hist[\"accuracy\"])\n", + "plt.plot(hist[\"val_accuracy\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jZ8DKKgeKv4-" + }, + "source": [ + "Try out the model on an image from the validation data:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oi1iCNB9K1Ai" + }, + "outputs": [], + "source": [ + "x, y = next(iter(val_ds))\n", + "image = x[0, :, :, :]\n", + "true_index = np.argmax(y[0])\n", + "plt.imshow(image)\n", + "plt.axis('off')\n", + "plt.show()\n", + "\n", + "# Expand the validation image to (1, 224, 224, 3) before predicting the label\n", + "prediction_scores = model.predict(np.expand_dims(image, axis=0))\n", + "predicted_index = np.argmax(prediction_scores)\n", + "print(\"True label: \" + class_names[true_index])\n", + "print(\"Predicted label: \" + class_names[predicted_index])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YCsAsQM1IRvA" + }, + "source": [ + "Finally, the trained model can be saved for deployment to TF Serving or TFLite (on mobile) as follows." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LGvTi69oIc2d" + }, + "outputs": [], + "source": [ + "saved_model_path = f\"/tmp/saved_flowers_model_{model_name}\"\n", + "tf.saved_model.save(model, saved_model_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QzW4oNRjILaq" + }, + "source": [ + "## Optional: Deployment to TensorFlow Lite\n", + "\n", + "[TensorFlow Lite](https://www.tensorflow.org/lite) lets you deploy TensorFlow models to mobile and IoT devices. The code below shows how to convert the trained model to TFLite and apply post-training tools from the [TensorFlow Model Optimization Toolkit](https://www.tensorflow.org/model_optimization). Finally, it runs it in the TFLite Interpreter to examine the resulting quality\n", + "\n", + " * Converting without optimization provides the same results as before (up to roundoff error).\n", + " * Converting with optimization without any data quantizes the model weights to 8 bits, but inference still uses floating-point computation for the neural network activations. This reduces model size almost by a factor of 4 and improves CPU latency on mobile devices.\n", + " * On top, computation of the neural network activations can be quantized to 8-bit integers as well if a small reference dataset is provided to calibrate the quantization range. On a mobile device, this accelerates inference further and makes it possible to run on accelerators like Edge TPU." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Va1Vo92fSyV6" + }, + "outputs": [], + "source": [ + "#@title Optimization settings\n", + "optimize_lite_model = False #@param {type:\"boolean\"}\n", + "#@markdown Setting a value greater than zero enables quantization of neural network activations. A few dozen is already a useful amount.\n", + "num_calibration_examples = 60 #@param {type:\"slider\", min:0, max:1000, step:1}\n", + "representative_dataset = None\n", + "if optimize_lite_model and num_calibration_examples:\n", + " # Use a bounded number of training examples without labels for calibration.\n", + " # TFLiteConverter expects a list of input tensors, each with batch size 1.\n", + " representative_dataset = lambda: itertools.islice(\n", + " ([image[None, ...]] for batch, _ in train_ds for image in batch),\n", + " num_calibration_examples)\n", + "\n", + "converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_path)\n", + "if optimize_lite_model:\n", + " converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + " if representative_dataset: # This is optional, see above.\n", + " converter.representative_dataset = representative_dataset\n", + "lite_model_content = converter.convert()\n", + "\n", + "with open(f\"/tmp/lite_flowers_model_{model_name}.tflite\", \"wb\") as f:\n", + " f.write(lite_model_content)\n", + "print(\"Wrote %sTFLite model of %d bytes.\" %\n", + " (\"optimized \" if optimize_lite_model else \"\", len(lite_model_content)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_wqEmD0xIqeG" + }, + "outputs": [], + "source": [ + "interpreter = tf.lite.Interpreter(model_content=lite_model_content)\n", + "# This little helper wraps the TFLite Interpreter as a numpy-to-numpy function.\n", + "def lite_model(images):\n", + " interpreter.allocate_tensors()\n", + " interpreter.set_tensor(interpreter.get_input_details()[0]['index'], images)\n", + " interpreter.invoke()\n", + " return interpreter.get_tensor(interpreter.get_output_details()[0]['index'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JMMK-fZrKrk8" + }, + "outputs": [], + "source": [ + "#@markdown For rapid experimentation, start with a moderate number of examples.\n", + "num_eval_examples = 50 #@param {type:\"slider\", min:0, max:700}\n", + "eval_dataset = ((image, label) # TFLite expects batch size 1.\n", + " for batch in train_ds\n", + " for (image, label) in zip(*batch))\n", + "count = 0\n", + "count_lite_tf_agree = 0\n", + "count_lite_correct = 0\n", + "for image, label in eval_dataset:\n", + " probs_lite = lite_model(image[None, ...])[0]\n", + " probs_tf = model(image[None, ...]).numpy()[0]\n", + " y_lite = np.argmax(probs_lite)\n", + " y_tf = np.argmax(probs_tf)\n", + " y_true = np.argmax(label)\n", + " count +=1\n", + " if y_lite == y_tf: count_lite_tf_agree += 1\n", + " if y_lite == y_true: count_lite_correct += 1\n", + " if count >= num_eval_examples: break\n", + "print(\"TFLite model agrees with original model on %d of %d examples (%g%%).\" %\n", + " (count_lite_tf_agree, count, 100.0 * count_lite_tf_agree / count))\n", + "print(\"TFLite model is accurate on %d of %d examples (%g%%).\" %\n", + " (count_lite_correct, count, 100.0 * count_lite_correct / count))" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "ScitaPqhKtuW" + ], + "name": "tf2_image_retraining.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/tf2_object_detection.ipynb b/site/en/hub/tutorials/tf2_object_detection.ipynb new file mode 100644 index 00000000000..d06ad401824 --- /dev/null +++ b/site/en/hub/tutorials/tf2_object_detection.ipynb @@ -0,0 +1,616 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "98rds-2OU-Rd" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "1c95xMGcU5_Z" + }, + "outputs": [], + "source": [ + "#@title Copyright 2020 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V1UUX8SUUiMO" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rOvvWAVTkMR7" + }, + "source": [ + "# TensorFlow Hub Object Detection Colab\n", + "\n", + "Welcome to the TensorFlow Hub Object Detection Colab! This notebook will take you through the steps of running an \"out-of-the-box\" object detection model on images." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IRImnk_7WOq1" + }, + "source": [ + "### More models\n", + "[This](https://tfhub.dev/tensorflow/collections/object_detection/1) collection contains TF2 object detection models that have been trained on the COCO 2017 dataset. [Here](https://tfhub.dev/s?module-type=image-object-detection) you can find all object detection models that are currently hosted on [tfhub.dev](https://tfhub.dev/)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vPs64QA1Zdov" + }, + "source": [ + "## Imports and Setup\n", + "\n", + "Let's start with the base imports." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Xk4FU-jx9kc3" + }, + "outputs": [], + "source": [ + "# This Colab requires a recent numpy version.\n", + "!pip install numpy==1.24.3\n", + "!pip install protobuf==3.20.3\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yn5_uV1HLvaz" + }, + "outputs": [], + "source": [ + "import os\n", + "import pathlib\n", + "\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import io\n", + "import scipy.misc\n", + "import numpy as np\n", + "from six import BytesIO\n", + "from PIL import Image, ImageDraw, ImageFont\n", + "from six.moves.urllib.request import urlopen\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "\n", + "tf.get_logger().setLevel('ERROR')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IogyryF2lFBL" + }, + "source": [ + "## Utilities\n", + "\n", + "Run the following cell to create some utils that will be needed later:\n", + "\n", + "- Helper method to load an image\n", + "- Map of Model Name to TF Hub handle\n", + "- List of tuples with Human Keypoints for the COCO 2017 dataset. This is needed for models with keypoints." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "-y9R0Xllefec" + }, + "outputs": [], + "source": [ + "# @title Run this!!\n", + "\n", + "def load_image_into_numpy_array(path):\n", + " \"\"\"Load an image from file into a numpy array.\n", + "\n", + " Puts image into numpy array to feed into tensorflow graph.\n", + " Note that by convention we put it into a numpy array with shape\n", + " (height, width, channels), where channels=3 for RGB.\n", + "\n", + " Args:\n", + " path: the file path to the image\n", + "\n", + " Returns:\n", + " uint8 numpy array with shape (img_height, img_width, 3)\n", + " \"\"\"\n", + " image = None\n", + " if(path.startswith('http')):\n", + " response = urlopen(path)\n", + " image_data = response.read()\n", + " image_data = BytesIO(image_data)\n", + " image = Image.open(image_data)\n", + " else:\n", + " image_data = tf.io.gfile.GFile(path, 'rb').read()\n", + " image = Image.open(BytesIO(image_data))\n", + "\n", + " (im_width, im_height) = image.size\n", + " return np.array(image.getdata()).reshape(\n", + " (1, im_height, im_width, 3)).astype(np.uint8)\n", + "\n", + "\n", + "ALL_MODELS = {\n", + "'CenterNet HourGlass104 512x512' : 'https://tfhub.dev/tensorflow/centernet/hourglass_512x512/1',\n", + "'CenterNet HourGlass104 Keypoints 512x512' : 'https://tfhub.dev/tensorflow/centernet/hourglass_512x512_kpts/1',\n", + "'CenterNet HourGlass104 1024x1024' : 'https://tfhub.dev/tensorflow/centernet/hourglass_1024x1024/1',\n", + "'CenterNet HourGlass104 Keypoints 1024x1024' : 'https://tfhub.dev/tensorflow/centernet/hourglass_1024x1024_kpts/1',\n", + "'CenterNet Resnet50 V1 FPN 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet50v1_fpn_512x512/1',\n", + "'CenterNet Resnet50 V1 FPN Keypoints 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet50v1_fpn_512x512_kpts/1',\n", + "'CenterNet Resnet101 V1 FPN 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet101v1_fpn_512x512/1',\n", + "'CenterNet Resnet50 V2 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet50v2_512x512/1',\n", + "'CenterNet Resnet50 V2 Keypoints 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet50v2_512x512_kpts/1',\n", + "'EfficientDet D0 512x512' : 'https://tfhub.dev/tensorflow/efficientdet/d0/1',\n", + "'EfficientDet D1 640x640' : 'https://tfhub.dev/tensorflow/efficientdet/d1/1',\n", + "'EfficientDet D2 768x768' : 'https://tfhub.dev/tensorflow/efficientdet/d2/1',\n", + "'EfficientDet D3 896x896' : 'https://tfhub.dev/tensorflow/efficientdet/d3/1',\n", + "'EfficientDet D4 1024x1024' : 'https://tfhub.dev/tensorflow/efficientdet/d4/1',\n", + "'EfficientDet D5 1280x1280' : 'https://tfhub.dev/tensorflow/efficientdet/d5/1',\n", + "'EfficientDet D6 1280x1280' : 'https://tfhub.dev/tensorflow/efficientdet/d6/1',\n", + "'EfficientDet D7 1536x1536' : 'https://tfhub.dev/tensorflow/efficientdet/d7/1',\n", + "'SSD MobileNet v2 320x320' : 'https://tfhub.dev/tensorflow/ssd_mobilenet_v2/2',\n", + "'SSD MobileNet V1 FPN 640x640' : 'https://tfhub.dev/tensorflow/ssd_mobilenet_v1/fpn_640x640/1',\n", + "'SSD MobileNet V2 FPNLite 320x320' : 'https://tfhub.dev/tensorflow/ssd_mobilenet_v2/fpnlite_320x320/1',\n", + "'SSD MobileNet V2 FPNLite 640x640' : 'https://tfhub.dev/tensorflow/ssd_mobilenet_v2/fpnlite_640x640/1',\n", + "'SSD ResNet50 V1 FPN 640x640 (RetinaNet50)' : 'https://tfhub.dev/tensorflow/retinanet/resnet50_v1_fpn_640x640/1',\n", + "'SSD ResNet50 V1 FPN 1024x1024 (RetinaNet50)' : 'https://tfhub.dev/tensorflow/retinanet/resnet50_v1_fpn_1024x1024/1',\n", + "'SSD ResNet101 V1 FPN 640x640 (RetinaNet101)' : 'https://tfhub.dev/tensorflow/retinanet/resnet101_v1_fpn_640x640/1',\n", + "'SSD ResNet101 V1 FPN 1024x1024 (RetinaNet101)' : 'https://tfhub.dev/tensorflow/retinanet/resnet101_v1_fpn_1024x1024/1',\n", + "'SSD ResNet152 V1 FPN 640x640 (RetinaNet152)' : 'https://tfhub.dev/tensorflow/retinanet/resnet152_v1_fpn_640x640/1',\n", + "'SSD ResNet152 V1 FPN 1024x1024 (RetinaNet152)' : 'https://tfhub.dev/tensorflow/retinanet/resnet152_v1_fpn_1024x1024/1',\n", + "'Faster R-CNN ResNet50 V1 640x640' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet50_v1_640x640/1',\n", + "'Faster R-CNN ResNet50 V1 1024x1024' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet50_v1_1024x1024/1',\n", + "'Faster R-CNN ResNet50 V1 800x1333' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet50_v1_800x1333/1',\n", + "'Faster R-CNN ResNet101 V1 640x640' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet101_v1_640x640/1',\n", + "'Faster R-CNN ResNet101 V1 1024x1024' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet101_v1_1024x1024/1',\n", + "'Faster R-CNN ResNet101 V1 800x1333' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet101_v1_800x1333/1',\n", + "'Faster R-CNN ResNet152 V1 640x640' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet152_v1_640x640/1',\n", + "'Faster R-CNN ResNet152 V1 1024x1024' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet152_v1_1024x1024/1',\n", + "'Faster R-CNN ResNet152 V1 800x1333' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet152_v1_800x1333/1',\n", + "'Faster R-CNN Inception ResNet V2 640x640' : 'https://tfhub.dev/tensorflow/faster_rcnn/inception_resnet_v2_640x640/1',\n", + "'Faster R-CNN Inception ResNet V2 1024x1024' : 'https://tfhub.dev/tensorflow/faster_rcnn/inception_resnet_v2_1024x1024/1',\n", + "'Mask R-CNN Inception ResNet V2 1024x1024' : 'https://tfhub.dev/tensorflow/mask_rcnn/inception_resnet_v2_1024x1024/1'\n", + "}\n", + "\n", + "IMAGES_FOR_TEST = {\n", + " 'Beach' : 'models/research/object_detection/test_images/image2.jpg',\n", + " 'Dogs' : 'models/research/object_detection/test_images/image1.jpg',\n", + " # By Heiko Gorski, Source: https://commons.wikimedia.org/wiki/File:Naxos_Taverna.jpg\n", + " 'Naxos Taverna' : 'https://upload.wikimedia.org/wikipedia/commons/6/60/Naxos_Taverna.jpg',\n", + " # Source: https://commons.wikimedia.org/wiki/File:The_Coleoptera_of_the_British_islands_(Plate_125)_(8592917784).jpg\n", + " 'Beatles' : 'https://upload.wikimedia.org/wikipedia/commons/1/1b/The_Coleoptera_of_the_British_islands_%28Plate_125%29_%288592917784%29.jpg',\n", + " # By Américo Toledano, Source: https://commons.wikimedia.org/wiki/File:Biblioteca_Maim%C3%B3nides,_Campus_Universitario_de_Rabanales_007.jpg\n", + " 'Phones' : 'https://upload.wikimedia.org/wikipedia/commons/thumb/0/0d/Biblioteca_Maim%C3%B3nides%2C_Campus_Universitario_de_Rabanales_007.jpg/1024px-Biblioteca_Maim%C3%B3nides%2C_Campus_Universitario_de_Rabanales_007.jpg',\n", + " # Source: https://commons.wikimedia.org/wiki/File:The_smaller_British_birds_(8053836633).jpg\n", + " 'Birds' : 'https://upload.wikimedia.org/wikipedia/commons/0/09/The_smaller_British_birds_%288053836633%29.jpg',\n", + "}\n", + "\n", + "COCO17_HUMAN_POSE_KEYPOINTS = [(0, 1),\n", + " (0, 2),\n", + " (1, 3),\n", + " (2, 4),\n", + " (0, 5),\n", + " (0, 6),\n", + " (5, 7),\n", + " (7, 9),\n", + " (6, 8),\n", + " (8, 10),\n", + " (5, 6),\n", + " (5, 11),\n", + " (6, 12),\n", + " (11, 12),\n", + " (11, 13),\n", + " (13, 15),\n", + " (12, 14),\n", + " (14, 16)]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "14bNk1gzh0TN" + }, + "source": [ + "## Visualization tools\n", + "\n", + "To visualize the images with the proper detected boxes, keypoints and segmentation, we will use the TensorFlow Object Detection API. To install it we will clone the repo." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oi28cqGGFWnY" + }, + "outputs": [], + "source": [ + "# Clone the tensorflow models repository\n", + "!git clone --depth 1 https://github.com/tensorflow/models" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yX3pb_pXDjYA" + }, + "source": [ + "Installing the Object Detection API" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NwdsBdGhFanc" + }, + "outputs": [], + "source": [ + "%%bash\n", + "sudo apt install -y protobuf-compiler\n", + "cd models/research/\n", + "protoc object_detection/protos/*.proto --python_out=.\n", + "cp object_detection/packages/tf2/setup.py .\n", + "python -m pip install .\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3yDNgIx-kV7X" + }, + "source": [ + "Now we can import the dependencies we will need later" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2JCeQU3fkayh" + }, + "outputs": [], + "source": [ + "from object_detection.utils import label_map_util\n", + "from object_detection.utils import visualization_utils as viz_utils\n", + "from object_detection.utils import ops as utils_ops\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NKtD0IeclbL5" + }, + "source": [ + "### Load label map data (for plotting).\n", + "\n", + "Label maps correspond index numbers to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine.\n", + "\n", + "We are going, for simplicity, to load from the repository that we loaded the Object Detection API code" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5mucYUS6exUJ" + }, + "outputs": [], + "source": [ + "PATH_TO_LABELS = './models/research/object_detection/data/mscoco_label_map.pbtxt'\n", + "category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6917xnUSlp9x" + }, + "source": [ + "## Build a detection model and load pre-trained model weights\n", + "\n", + "Here we will choose which Object Detection model we will use.\n", + "Select the architecture and it will be loaded automatically.\n", + "If you want to change the model to try other architectures later, just change the next cell and execute following ones.\n", + "\n", + "**Tip:** if you want to read more details about the selected model, you can follow the link (model handle) and read additional documentation on TF Hub. After you select a model, we will print the handle to make it easier." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HtwrSqvakTNn" + }, + "outputs": [], + "source": [ + "#@title Model Selection { display-mode: \"form\", run: \"auto\" }\n", + "model_display_name = 'CenterNet HourGlass104 Keypoints 512x512' # @param ['CenterNet HourGlass104 512x512','CenterNet HourGlass104 Keypoints 512x512','CenterNet HourGlass104 1024x1024','CenterNet HourGlass104 Keypoints 1024x1024','CenterNet Resnet50 V1 FPN 512x512','CenterNet Resnet50 V1 FPN Keypoints 512x512','CenterNet Resnet101 V1 FPN 512x512','CenterNet Resnet50 V2 512x512','CenterNet Resnet50 V2 Keypoints 512x512','EfficientDet D0 512x512','EfficientDet D1 640x640','EfficientDet D2 768x768','EfficientDet D3 896x896','EfficientDet D4 1024x1024','EfficientDet D5 1280x1280','EfficientDet D6 1280x1280','EfficientDet D7 1536x1536','SSD MobileNet v2 320x320','SSD MobileNet V1 FPN 640x640','SSD MobileNet V2 FPNLite 320x320','SSD MobileNet V2 FPNLite 640x640','SSD ResNet50 V1 FPN 640x640 (RetinaNet50)','SSD ResNet50 V1 FPN 1024x1024 (RetinaNet50)','SSD ResNet101 V1 FPN 640x640 (RetinaNet101)','SSD ResNet101 V1 FPN 1024x1024 (RetinaNet101)','SSD ResNet152 V1 FPN 640x640 (RetinaNet152)','SSD ResNet152 V1 FPN 1024x1024 (RetinaNet152)','Faster R-CNN ResNet50 V1 640x640','Faster R-CNN ResNet50 V1 1024x1024','Faster R-CNN ResNet50 V1 800x1333','Faster R-CNN ResNet101 V1 640x640','Faster R-CNN ResNet101 V1 1024x1024','Faster R-CNN ResNet101 V1 800x1333','Faster R-CNN ResNet152 V1 640x640','Faster R-CNN ResNet152 V1 1024x1024','Faster R-CNN ResNet152 V1 800x1333','Faster R-CNN Inception ResNet V2 640x640','Faster R-CNN Inception ResNet V2 1024x1024','Mask R-CNN Inception ResNet V2 1024x1024']\n", + "model_handle = ALL_MODELS[model_display_name]\n", + "\n", + "print('Selected model:'+ model_display_name)\n", + "print('Model Handle at TensorFlow Hub: {}'.format(model_handle))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "muhUt-wWL582" + }, + "source": [ + "## Loading the selected model from TensorFlow Hub\n", + "\n", + "Here we just need the model handle that was selected and use the Tensorflow Hub library to load it to memory.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rBuD07fLlcEO" + }, + "outputs": [], + "source": [ + "print('loading model...')\n", + "hub_model = hub.load(model_handle)\n", + "print('model loaded!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GIawRDKPPnd4" + }, + "source": [ + "## Loading an image\n", + "\n", + "Let's try the model on a simple image. To help with this, we provide a list of test images.\n", + "\n", + "Here are some simple things to try out if you are curious:\n", + "* Try running inference on your own images, just upload them to colab and load the same way it's done in the cell below.\n", + "* Modify some of the input images and see if detection still works. Some simple things to try out here include flipping the image horizontally, or converting to grayscale (note that we still expect the input image to have 3 channels).\n", + "\n", + "**Be careful:** when using images with an alpha channel, the model expect 3 channels images and the alpha will count as a 4th.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hX-AWUQ1wIEr" + }, + "outputs": [], + "source": [ + "#@title Image Selection (don't forget to execute the cell!) { display-mode: \"form\"}\n", + "selected_image = 'Beach' # @param ['Beach', 'Dogs', 'Naxos Taverna', 'Beatles', 'Phones', 'Birds']\n", + "flip_image_horizontally = False #@param {type:\"boolean\"}\n", + "convert_image_to_grayscale = False #@param {type:\"boolean\"}\n", + "\n", + "image_path = IMAGES_FOR_TEST[selected_image]\n", + "image_np = load_image_into_numpy_array(image_path)\n", + "\n", + "# Flip horizontally\n", + "if(flip_image_horizontally):\n", + " image_np[0] = np.fliplr(image_np[0]).copy()\n", + "\n", + "# Convert image to grayscale\n", + "if(convert_image_to_grayscale):\n", + " image_np[0] = np.tile(\n", + " np.mean(image_np[0], 2, keepdims=True), (1, 1, 3)).astype(np.uint8)\n", + "\n", + "plt.figure(figsize=(24,32))\n", + "plt.imshow(image_np[0])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FTHsFjR6HNwb" + }, + "source": [ + "## Doing the inference\n", + "\n", + "To do the inference we just need to call our TF Hub loaded model.\n", + "\n", + "Things you can try:\n", + "* Print out `result['detection_boxes']` and try to match the box locations to the boxes in the image. Notice that coordinates are given in normalized form (i.e., in the interval [0, 1]).\n", + "* inspect other output keys present in the result. A full documentation can be seen on the models documentation page (pointing your browser to the model handle printed earlier)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Gb_siXKcnnGC" + }, + "outputs": [], + "source": [ + "# running inference\n", + "results = hub_model(image_np)\n", + "\n", + "# different object detection models have additional results\n", + "# all of them are explained in the documentation\n", + "result = {key:value.numpy() for key,value in results.items()}\n", + "print(result.keys())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IZ5VYaBoeeFM" + }, + "source": [ + "## Visualizing the results\n", + "\n", + "Here is where we will need the TensorFlow Object Detection API to show the squares from the inference step (and the keypoints when available).\n", + "\n", + "the full documentation of this method can be seen [here](https://github.com/tensorflow/models/blob/master/research/object_detection/utils/visualization_utils.py)\n", + "\n", + "Here you can, for example, set `min_score_thresh` to other values (between 0 and 1) to allow more detections in or to filter out more detections." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2O7rV8g9s8Bz" + }, + "outputs": [], + "source": [ + "label_id_offset = 0\n", + "image_np_with_detections = image_np.copy()\n", + "\n", + "# Use keypoints if available in detections\n", + "keypoints, keypoint_scores = None, None\n", + "if 'detection_keypoints' in result:\n", + " keypoints = result['detection_keypoints'][0]\n", + " keypoint_scores = result['detection_keypoint_scores'][0]\n", + "\n", + "viz_utils.visualize_boxes_and_labels_on_image_array(\n", + " image_np_with_detections[0],\n", + " result['detection_boxes'][0],\n", + " (result['detection_classes'][0] + label_id_offset).astype(int),\n", + " result['detection_scores'][0],\n", + " category_index,\n", + " use_normalized_coordinates=True,\n", + " max_boxes_to_draw=200,\n", + " min_score_thresh=.30,\n", + " agnostic_mode=False,\n", + " keypoints=keypoints,\n", + " keypoint_scores=keypoint_scores,\n", + " keypoint_edges=COCO17_HUMAN_POSE_KEYPOINTS)\n", + "\n", + "plt.figure(figsize=(24,32))\n", + "plt.imshow(image_np_with_detections[0])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Qaw6Xi08NpEP" + }, + "source": [ + "## [Optional]\n", + "\n", + "Among the available object detection models there's Mask R-CNN and the output of this model allows instance segmentation.\n", + "\n", + "To visualize it we will use the same method we did before but adding an additional parameter: `instance_masks=output_dict.get('detection_masks_reframed', None)`\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zl3qdtR1OvM_" + }, + "outputs": [], + "source": [ + "# Handle models with masks:\n", + "image_np_with_mask = image_np.copy()\n", + "\n", + "if 'detection_masks' in result:\n", + " # we need to convert np.arrays to tensors\n", + " detection_masks = tf.convert_to_tensor(result['detection_masks'][0])\n", + " detection_boxes = tf.convert_to_tensor(result['detection_boxes'][0])\n", + "\n", + " # Reframe the bbox mask to the image size.\n", + " detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(\n", + " detection_masks, detection_boxes,\n", + " image_np.shape[1], image_np.shape[2])\n", + " detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,\n", + " tf.uint8)\n", + " result['detection_masks_reframed'] = detection_masks_reframed.numpy()\n", + "\n", + "viz_utils.visualize_boxes_and_labels_on_image_array(\n", + " image_np_with_mask[0],\n", + " result['detection_boxes'][0],\n", + " (result['detection_classes'][0] + label_id_offset).astype(int),\n", + " result['detection_scores'][0],\n", + " category_index,\n", + " use_normalized_coordinates=True,\n", + " max_boxes_to_draw=200,\n", + " min_score_thresh=.30,\n", + " agnostic_mode=False,\n", + " instance_masks=result.get('detection_masks_reframed', None),\n", + " line_thickness=8)\n", + "\n", + "plt.figure(figsize=(24,32))\n", + "plt.imshow(image_np_with_mask[0])\n", + "plt.show()" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "tf2_object_detection.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/tf2_semantic_approximate_nearest_neighbors.ipynb b/site/en/hub/tutorials/tf2_semantic_approximate_nearest_neighbors.ipynb new file mode 100644 index 00000000000..786065ff5a5 --- /dev/null +++ b/site/en/hub/tutorials/tf2_semantic_approximate_nearest_neighbors.ipynb @@ -0,0 +1,790 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ACbjNjyO4f_8" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MCM50vaM4jiK" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9qOVy-_vmuUP" + }, + "source": [ + "# Semantic Search with Approximate Nearest Neighbors and Text Embeddings\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3T4d77AJaKte" + }, + "source": [ + "This tutorial illustrates how to generate embeddings from a [TensorFlow Hub](https://tfhub.dev) (TF-Hub) model given input data, and build an approximate nearest neighbours (ANN) index using the extracted embeddings. The index can then be used for real-time similarity matching and retrieval.\n", + "\n", + "When dealing with a large corpus of data, it's not efficient to perform exact matching by scanning the whole repository to find the most similar items to a given query in real-time. Thus, we use an approximate similarity matching algorithm which allows us to trade off a little bit of accuracy in finding exact nearest neighbor matches for a significant boost in speed.\n", + "\n", + "In this tutorial, we show an example of real-time text search over a corpus of news headlines to find the headlines that are most similar to a query. Unlike keyword search, this captures the semantic similarity encoded in the text embedding.\n", + "\n", + "The steps of this tutorial are:\n", + "1. Download sample data.\n", + "2. Generate embeddings for the data using a TF-Hub model\n", + "3. Build an ANN index for the embeddings\n", + "4. Use the index for similarity matching\n", + "\n", + "We use [Apache Beam](https://beam.apache.org/documentation/programming-guide/) to generate the embeddings from the TF-Hub model. We also use Spotify's [ANNOY](https://github.com/spotify/annoy) library to build the approximate nearest neighbor index." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nM17v_mEVSnd" + }, + "source": [ + "### More models\n", + "For models that have the same architecture but were trained on a different language, refer to [this](https://tfhub.dev/google/collections/nnlm/1) collection. [Here](https://tfhub.dev/s?module-type=text-embedding) you can find all text embeddings that are currently hosted on [tfhub.dev](https://tfhub.dev/). " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q0jr0QK9qO5P" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "whMRj9qeqed4" + }, + "source": [ + "Install the required libraries." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qmXkLPoaqS--" + }, + "outputs": [], + "source": [ + "!pip install apache_beam\n", + "!pip install 'scikit_learn~=0.23.0' # For gaussian_random_matrix.\n", + "!pip install annoy" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A-vBZiCCqld0" + }, + "source": [ + "Import the required libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6NTYbdWcseuK" + }, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "import pickle\n", + "from collections import namedtuple\n", + "from datetime import datetime\n", + "import numpy as np\n", + "import apache_beam as beam\n", + "from apache_beam.transforms import util\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "import annoy\n", + "from sklearn.random_projection import gaussian_random_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tx0SZa6-7b-f" + }, + "outputs": [], + "source": [ + "print('TF version: {}'.format(tf.__version__))\n", + "print('TF-Hub version: {}'.format(hub.__version__))\n", + "print('Apache Beam version: {}'.format(beam.__version__))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P6Imq876rLWx" + }, + "source": [ + "## 1. Download Sample Data\n", + "\n", + "[A Million News Headlines](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/SYBGZL#) dataset contains news headlines published over a period of 15 years sourced from the reputable Australian Broadcasting Corp. (ABC). This news dataset has a summarised historical record of noteworthy events in the globe from early-2003 to end-2017 with a more granular focus on Australia. \n", + "\n", + "**Format**: Tab-separated two-column data: 1) publication date and 2) headline text. We are only interested in the headline text.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OpF57n8e5C9D" + }, + "outputs": [], + "source": [ + "!wget 'https://dataverse.harvard.edu/api/access/datafile/3450625?format=tab&gbrecs=true' -O raw.tsv\n", + "!wc -l raw.tsv\n", + "!head raw.tsv" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Reeoc9z0zTxJ" + }, + "source": [ + "For simplicity, we only keep the headline text and remove the publication date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "INPWa4upv_yJ" + }, + "outputs": [], + "source": [ + "!rm -r corpus\n", + "!mkdir corpus\n", + "\n", + "with open('corpus/text.txt', 'w') as out_file:\n", + " with open('raw.tsv', 'r') as in_file:\n", + " for line in in_file:\n", + " headline = line.split('\\t')[1].strip().strip('\"')\n", + " out_file.write(headline+\"\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5-oedX40z6o2" + }, + "outputs": [], + "source": [ + "!tail corpus/text.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2AngMtH50jNb" + }, + "source": [ + "## 2. Generate Embeddings for the Data.\n", + "\n", + "In this tutorial, we use the [Neural Network Language Model (NNLM)](https://tfhub.dev/google/nnlm-en-dim128/2) to generate embeddings for the headline data. The sentence embeddings can then be easily used to compute sentence level meaning similarity. We run the embedding generation process using Apache Beam." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F_DvXnDB1pEX" + }, + "source": [ + "### Embedding extraction method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yL7OEY1E0A35" + }, + "outputs": [], + "source": [ + "embed_fn = None\n", + "\n", + "def generate_embeddings(text, model_url, random_projection_matrix=None):\n", + " # Beam will run this function in different processes that need to\n", + " # import hub and load embed_fn (if not previously loaded)\n", + " global embed_fn\n", + " if embed_fn is None:\n", + " embed_fn = hub.load(model_url)\n", + " embedding = embed_fn(text).numpy()\n", + " if random_projection_matrix is not None:\n", + " embedding = embedding.dot(random_projection_matrix)\n", + " return text, embedding\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "g6pXBVxsVUbm" + }, + "source": [ + "### Convert to tf.Example method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JMjqjWZNVVzd" + }, + "outputs": [], + "source": [ + "def to_tf_example(entries):\n", + " examples = []\n", + "\n", + " text_list, embedding_list = entries\n", + " for i in range(len(text_list)):\n", + " text = text_list[i]\n", + " embedding = embedding_list[i]\n", + "\n", + " features = {\n", + " 'text': tf.train.Feature(\n", + " bytes_list=tf.train.BytesList(value=[text.encode('utf-8')])),\n", + " 'embedding': tf.train.Feature(\n", + " float_list=tf.train.FloatList(value=embedding.tolist()))\n", + " }\n", + " \n", + " example = tf.train.Example(\n", + " features=tf.train.Features(\n", + " feature=features)).SerializeToString(deterministic=True)\n", + " \n", + " examples.append(example)\n", + " \n", + " return examples" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gDiV4uQCVYGH" + }, + "source": [ + "### Beam pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jCGUIB172m2G" + }, + "outputs": [], + "source": [ + "def run_hub2emb(args):\n", + " '''Runs the embedding generation pipeline'''\n", + "\n", + " options = beam.options.pipeline_options.PipelineOptions(**args)\n", + " args = namedtuple(\"options\", args.keys())(*args.values())\n", + "\n", + " with beam.Pipeline(args.runner, options=options) as pipeline:\n", + " (\n", + " pipeline\n", + " | 'Read sentences from files' >> beam.io.ReadFromText(\n", + " file_pattern=args.data_dir)\n", + " | 'Batch elements' >> util.BatchElements(\n", + " min_batch_size=args.batch_size, max_batch_size=args.batch_size)\n", + " | 'Generate embeddings' >> beam.Map(\n", + " generate_embeddings, args.model_url, args.random_projection_matrix)\n", + " | 'Encode to tf example' >> beam.FlatMap(to_tf_example)\n", + " | 'Write to TFRecords files' >> beam.io.WriteToTFRecord(\n", + " file_path_prefix='{}/emb'.format(args.output_dir),\n", + " file_name_suffix='.tfrecords')\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nlbQdiYNVvne" + }, + "source": [ + "### Generating Random Projection Weight Matrix\n", + "\n", + "[Random projection](https://en.wikipedia.org/wiki/Random_projection) is a simple, yet powerful technique used to reduce the dimensionality of a set of points which lie in Euclidean space. For a theoretical background, see the [Johnson-Lindenstrauss lemma](https://en.wikipedia.org/wiki/Johnson%E2%80%93Lindenstrauss_lemma).\n", + "\n", + "Reducing the dimensionality of the embeddings with random projection means less time needed to build and query the ANN index.\n", + "\n", + "In this tutorial we use [Gaussian Random Projection](https://en.wikipedia.org/wiki/Random_projection#Gaussian_random_projection) from the [Scikit-learn](https://scikit-learn.org/stable/modules/random_projection.html#gaussian-random-projection) library." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1yw1xgtNVv52" + }, + "outputs": [], + "source": [ + "def generate_random_projection_weights(original_dim, projected_dim):\n", + " random_projection_matrix = None\n", + " random_projection_matrix = gaussian_random_matrix(\n", + " n_components=projected_dim, n_features=original_dim).T\n", + " print(\"A Gaussian random weight matrix was creates with shape of {}\".format(random_projection_matrix.shape))\n", + " print('Storing random projection matrix to disk...')\n", + " with open('random_projection_matrix', 'wb') as handle:\n", + " pickle.dump(random_projection_matrix, \n", + " handle, protocol=pickle.HIGHEST_PROTOCOL)\n", + " \n", + " return random_projection_matrix" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aJZUfT3NE7kj" + }, + "source": [ + "### Set parameters\n", + "If you want to build an index using the original embedding space without random projection, set the `projected_dim` parameter to `None`. Note that this will slow down the indexing step for high-dimensional embeddings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "77-Cow7uE74T" + }, + "outputs": [], + "source": [ + "model_url = 'https://tfhub.dev/google/nnlm-en-dim128/2' #@param {type:\"string\"}\n", + "projected_dim = 64 #@param {type:\"number\"}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "On-MbzD922kb" + }, + "source": [ + "### Run pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Y3I1Wv4i21yY" + }, + "outputs": [], + "source": [ + "import tempfile\n", + "\n", + "output_dir = tempfile.mkdtemp()\n", + "original_dim = hub.load(model_url)(['']).shape[1]\n", + "random_projection_matrix = None\n", + "\n", + "if projected_dim:\n", + " random_projection_matrix = generate_random_projection_weights(\n", + " original_dim, projected_dim)\n", + "\n", + "args = {\n", + " 'job_name': 'hub2emb-{}'.format(datetime.utcnow().strftime('%y%m%d-%H%M%S')),\n", + " 'runner': 'DirectRunner',\n", + " 'batch_size': 1024,\n", + " 'data_dir': 'corpus/*.txt',\n", + " 'output_dir': output_dir,\n", + " 'model_url': model_url,\n", + " 'random_projection_matrix': random_projection_matrix,\n", + "}\n", + "\n", + "print(\"Pipeline args are set.\")\n", + "args" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iS9obmeP4ZOA" + }, + "outputs": [], + "source": [ + "print(\"Running pipeline...\")\n", + "%time run_hub2emb(args)\n", + "print(\"Pipeline is done.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JAwOo7gQWvVd" + }, + "outputs": [], + "source": [ + "!ls {output_dir}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HVnee4e6U90u" + }, + "source": [ + "Read some of the generated embeddings..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-K7pGXlXOj1N" + }, + "outputs": [], + "source": [ + "embed_file = os.path.join(output_dir, 'emb-00000-of-00001.tfrecords')\n", + "sample = 5\n", + "\n", + "# Create a description of the features.\n", + "feature_description = {\n", + " 'text': tf.io.FixedLenFeature([], tf.string),\n", + " 'embedding': tf.io.FixedLenFeature([projected_dim], tf.float32)\n", + "}\n", + "\n", + "def _parse_example(example):\n", + " # Parse the input `tf.Example` proto using the dictionary above.\n", + " return tf.io.parse_single_example(example, feature_description)\n", + "\n", + "dataset = tf.data.TFRecordDataset(embed_file)\n", + "for record in dataset.take(sample).map(_parse_example):\n", + " print(\"{}: {}\".format(record['text'].numpy().decode('utf-8'), record['embedding'].numpy()[:10]))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "agGoaMSgY8wN" + }, + "source": [ + "## 3. Build the ANN Index for the Embeddings\n", + "\n", + "[ANNOY](https://github.com/spotify/annoy) (Approximate Nearest Neighbors Oh Yeah) is a C++ library with Python bindings to search for points in space that are close to a given query point. It also creates large read-only file-based data structures that are mapped into memory. It is built and used by [Spotify](https://www.spotify.com) for music recommendations. If you are interested you can play along with other alternatives to ANNOY such as [NGT](https://github.com/yahoojapan/NGT), [FAISS](https://github.com/facebookresearch/faiss), etc. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UcPDspU3WjgH" + }, + "outputs": [], + "source": [ + "def build_index(embedding_files_pattern, index_filename, vector_length, \n", + " metric='angular', num_trees=100):\n", + " '''Builds an ANNOY index'''\n", + "\n", + " annoy_index = annoy.AnnoyIndex(vector_length, metric=metric)\n", + " # Mapping between the item and its identifier in the index\n", + " mapping = {}\n", + "\n", + " embed_files = tf.io.gfile.glob(embedding_files_pattern)\n", + " num_files = len(embed_files)\n", + " print('Found {} embedding file(s).'.format(num_files))\n", + "\n", + " item_counter = 0\n", + " for i, embed_file in enumerate(embed_files):\n", + " print('Loading embeddings in file {} of {}...'.format(i+1, num_files))\n", + " dataset = tf.data.TFRecordDataset(embed_file)\n", + " for record in dataset.map(_parse_example):\n", + " text = record['text'].numpy().decode(\"utf-8\")\n", + " embedding = record['embedding'].numpy()\n", + " mapping[item_counter] = text\n", + " annoy_index.add_item(item_counter, embedding)\n", + " item_counter += 1\n", + " if item_counter % 100000 == 0:\n", + " print('{} items loaded to the index'.format(item_counter))\n", + "\n", + " print('A total of {} items added to the index'.format(item_counter))\n", + "\n", + " print('Building the index with {} trees...'.format(num_trees))\n", + " annoy_index.build(n_trees=num_trees)\n", + " print('Index is successfully built.')\n", + " \n", + " print('Saving index to disk...')\n", + " annoy_index.save(index_filename)\n", + " print('Index is saved to disk.')\n", + " print(\"Index file size: {} GB\".format(\n", + " round(os.path.getsize(index_filename) / float(1024 ** 3), 2)))\n", + " annoy_index.unload()\n", + "\n", + " print('Saving mapping to disk...')\n", + " with open(index_filename + '.mapping', 'wb') as handle:\n", + " pickle.dump(mapping, handle, protocol=pickle.HIGHEST_PROTOCOL)\n", + " print('Mapping is saved to disk.')\n", + " print(\"Mapping file size: {} MB\".format(\n", + " round(os.path.getsize(index_filename + '.mapping') / float(1024 ** 2), 2)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AgyOQhUq6FNE" + }, + "outputs": [], + "source": [ + "embedding_files = \"{}/emb-*.tfrecords\".format(output_dir)\n", + "embedding_dimension = projected_dim\n", + "index_filename = \"index\"\n", + "\n", + "!rm {index_filename}\n", + "!rm {index_filename}.mapping\n", + "\n", + "%time build_index(embedding_files, index_filename, embedding_dimension)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ic31Tm5cgAd5" + }, + "outputs": [], + "source": [ + "!ls" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "maGxDl8ufP-p" + }, + "source": [ + "## 4. Use the Index for Similarity Matching\n", + "Now we can use the ANN index to find news headlines that are semantically close to an input query." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_dIs8W78fYPp" + }, + "source": [ + "### Load the index and the mapping files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jlTTrbQHayvb" + }, + "outputs": [], + "source": [ + "index = annoy.AnnoyIndex(embedding_dimension)\n", + "index.load(index_filename, prefault=True)\n", + "print('Annoy index is loaded.')\n", + "with open(index_filename + '.mapping', 'rb') as handle:\n", + " mapping = pickle.load(handle)\n", + "print('Mapping file is loaded.')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y6liFMSUh08J" + }, + "source": [ + "### Similarity matching method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mUxjTag8hc16" + }, + "outputs": [], + "source": [ + "def find_similar_items(embedding, num_matches=5):\n", + " '''Finds similar items to a given embedding in the ANN index'''\n", + " ids = index.get_nns_by_vector(\n", + " embedding, num_matches, search_k=-1, include_distances=False)\n", + " items = [mapping[i] for i in ids]\n", + " return items" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hjerNpmZja0A" + }, + "source": [ + "### Extract embedding from a given query" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "a0IIXzfBjZ19" + }, + "outputs": [], + "source": [ + "# Load the TF-Hub model\n", + "print(\"Loading the TF-Hub model...\")\n", + "%time embed_fn = hub.load(model_url)\n", + "print(\"TF-Hub model is loaded.\")\n", + "\n", + "random_projection_matrix = None\n", + "if os.path.exists('random_projection_matrix'):\n", + " print(\"Loading random projection matrix...\")\n", + " with open('random_projection_matrix', 'rb') as handle:\n", + " random_projection_matrix = pickle.load(handle)\n", + " print('random projection matrix is loaded.')\n", + "\n", + "def extract_embeddings(query):\n", + " '''Generates the embedding for the query'''\n", + " query_embedding = embed_fn([query])[0].numpy()\n", + " if random_projection_matrix is not None:\n", + " query_embedding = query_embedding.dot(random_projection_matrix)\n", + " return query_embedding\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kCoCNROujEIO" + }, + "outputs": [], + "source": [ + "extract_embeddings(\"Hello Machine Learning!\")[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "koINo8Du--8C" + }, + "source": [ + "### Enter a query to find the most similar items" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "wC0uLjvfk5nB" + }, + "outputs": [], + "source": [ + "#@title { run: \"auto\" }\n", + "query = \"confronting global challenges\" #@param {type:\"string\"}\n", + "\n", + "print(\"Generating embedding for the query...\")\n", + "%time query_embedding = extract_embeddings(query)\n", + "\n", + "print(\"\")\n", + "print(\"Finding relevant items in the index...\")\n", + "%time items = find_similar_items(query_embedding, 10)\n", + "\n", + "print(\"\")\n", + "print(\"Results:\")\n", + "print(\"=========\")\n", + "for item in items:\n", + " print(item)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TkRSqs77tDuX" + }, + "source": [ + "## Want to learn more?\n", + "\n", + "You can learn more about TensorFlow at [tensorflow.org](https://www.tensorflow.org/) and see the TF-Hub API documentation at [tensorflow.org/hub](https://www.tensorflow.org/hub/). Find available TensorFlow Hub models at [tfhub.dev](https://tfhub.dev/) including more text embedding models and image feature vector models.\n", + "\n", + "Also check out the [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/) which is Google's fast-paced, practical introduction to machine learning." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "ACbjNjyO4f_8", + "g6pXBVxsVUbm" + ], + "name": "tf2_semantic_approximate_nearest_neighbors.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/tf2_text_classification.ipynb b/site/en/hub/tutorials/tf2_text_classification.ipynb new file mode 100644 index 00000000000..e2dae15bde0 --- /dev/null +++ b/site/en/hub/tutorials/tf2_text_classification.ipynb @@ -0,0 +1,571 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Ic4_occAAiAT" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "ioaprt5q5US7" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "yCl0eTNH5RS3" + }, + "outputs": [], + "source": [ + "#@title MIT License\n", + "#\n", + "# Copyright (c) 2017 François Chollet # IGNORE_COPYRIGHT: cleared by OSS licensing\n", + "#\n", + "# Permission is hereby granted, free of charge, to any person obtaining a\n", + "# copy of this software and associated documentation files (the \"Software\"),\n", + "# to deal in the Software without restriction, including without limitation\n", + "# the rights to use, copy, modify, merge, publish, distribute, sublicense,\n", + "# and/or sell copies of the Software, and to permit persons to whom the\n", + "# Software is furnished to do so, subject to the following conditions:\n", + "#\n", + "# The above copyright notice and this permission notice shall be included in\n", + "# all copies or substantial portions of the Software.\n", + "#\n", + "# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n", + "# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n", + "# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n", + "# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n", + "# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n", + "# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n", + "# DEALINGS IN THE SOFTWARE." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ItXfxkxvosLH" + }, + "source": [ + "# Text Classification with Movie Reviews" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Eg62Pmz3o83v" + }, + "source": [ + "This notebook classifies movie reviews as *positive* or *negative* using the text of the review. This is an example of *binary*—or two-class—classification, an important and widely applicable kind of machine learning problem. \n", + "\n", + "We'll use the [IMDB dataset](https://www.tensorflow.org/api_docs/python/tf/keras/datasets/imdb) that contains the text of 50,000 movie reviews from the [Internet Movie Database](https://www.imdb.com/). These are split into 25,000 reviews for training and 25,000 reviews for testing. The training and testing sets are *balanced*, meaning they contain an equal number of positive and negative reviews. \n", + "\n", + "This notebook uses [tf.keras](https://www.tensorflow.org/api_docs/python/tf/keras), a high-level API to build and train models in TensorFlow, and [TensorFlow Hub](https://www.tensorflow.org/hub), a library and platform for transfer learning. For a more advanced text classification tutorial using `tf.keras`, see the [MLCC Text Classification Guide](https://developers.google.com/machine-learning/guides/text-classification/)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qrk8NjzhSBh-" + }, + "source": [ + "### More models\n", + "[Here](https://tfhub.dev/s?module-type=text-embedding) you can find more expressive or performant models that you could use to generate the text embedding." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q4DN769E2O_R" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2ew7HTbPpCJH" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "import tensorflow_datasets as tfds\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "print(\"Version: \", tf.__version__)\n", + "print(\"Eager mode: \", tf.executing_eagerly())\n", + "print(\"Hub version: \", hub.__version__)\n", + "print(\"GPU is\", \"available\" if tf.config.list_physical_devices('GPU') else \"NOT AVAILABLE\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iAsKG535pHep" + }, + "source": [ + "## Download the IMDB dataset\n", + "\n", + "The IMDB dataset is available on [TensorFlow datasets](https://github.com/tensorflow/datasets). The following code downloads the IMDB dataset to your machine (or the colab runtime):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zXXx5Oc3pOmN" + }, + "outputs": [], + "source": [ + "train_data, test_data = tfds.load(name=\"imdb_reviews\", split=[\"train\", \"test\"], \n", + " batch_size=-1, as_supervised=True)\n", + "\n", + "train_examples, train_labels = tfds.as_numpy(train_data)\n", + "test_examples, test_labels = tfds.as_numpy(test_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l50X3GfjpU4r" + }, + "source": [ + "## Explore the data \n", + "\n", + "Let's take a moment to understand the format of the data. Each example is a sentence representing the movie review and a corresponding label. The sentence is not preprocessed in any way. The label is an integer value of either 0 or 1, where 0 is a negative review, and 1 is a positive review." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "y8qCnve_-lkO" + }, + "outputs": [], + "source": [ + "print(\"Training entries: {}, test entries: {}\".format(len(train_examples), len(test_examples)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RnKvHWW4-lkW" + }, + "source": [ + "Let's print first 10 examples." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QtTS4kpEpjbi" + }, + "outputs": [], + "source": [ + "train_examples[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IFtaCHTdc-GY" + }, + "source": [ + "Let's also print the first 10 labels." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tvAjVXOWc6Mj" + }, + "outputs": [], + "source": [ + "train_labels[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LLC02j2g-llC" + }, + "source": [ + "## Build the model\n", + "\n", + "The neural network is created by stacking layers—this requires three main architectural decisions:\n", + "\n", + "* How to represent the text?\n", + "* How many layers to use in the model?\n", + "* How many *hidden units* to use for each layer?\n", + "\n", + "In this example, the input data consists of sentences. The labels to predict are either 0 or 1.\n", + "\n", + "One way to represent the text is to convert sentences into embeddings vectors. We can use a pre-trained text embedding as the first layer, which will have two advantages:\n", + "* we don't have to worry about text preprocessing,\n", + "* we can benefit from transfer learning.\n", + "\n", + "For this example we will use a model from [TensorFlow Hub](https://www.tensorflow.org/hub) called [google/nnlm-en-dim50/2](https://tfhub.dev/google/nnlm-en-dim50/2).\n", + "\n", + "There are two other models to test for the sake of this tutorial:\n", + "* [google/nnlm-en-dim50-with-normalization/2](https://tfhub.dev/google/nnlm-en-dim50-with-normalization/2) - same as [google/nnlm-en-dim50/2](https://tfhub.dev/google/nnlm-en-dim50/2), but with additional text normalization to remove punctuation. This can help to get better coverage of in-vocabulary embeddings for tokens on your input text.\n", + "* [google/nnlm-en-dim128-with-normalization/2](https://tfhub.dev/google/nnlm-en-dim128-with-normalization/2) - A larger model with an embedding dimension of 128 instead of the smaller 50." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "In2nDpTLkgKa" + }, + "source": [ + "Let's first create a Keras layer that uses a TensorFlow Hub model to embed the sentences, and try it out on a couple of input examples. Note that the output shape of the produced embeddings is a expected: `(num_examples, embedding_dimension)`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_NUbzVeYkgcO" + }, + "outputs": [], + "source": [ + "model = \"https://tfhub.dev/google/nnlm-en-dim50/2\"\n", + "hub_layer = hub.KerasLayer(model, input_shape=[], dtype=tf.string, trainable=True)\n", + "hub_layer(train_examples[:3])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dfSbV6igl1EH" + }, + "source": [ + "Let's now build the full model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xpKOoWgu-llD" + }, + "outputs": [], + "source": [ + "model = tf.keras.Sequential()\n", + "model.add(hub_layer)\n", + "model.add(tf.keras.layers.Dense(16, activation='relu'))\n", + "model.add(tf.keras.layers.Dense(1))\n", + "\n", + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6PbKQ6mucuKL" + }, + "source": [ + "The layers are stacked sequentially to build the classifier:\n", + "\n", + "1. The first layer is a TensorFlow Hub layer. This layer uses a pre-trained Saved Model to map a sentence into its embedding vector. The model that we are using ([google/nnlm-en-dim50/2](https://tfhub.dev/google/nnlm-en-dim50/2)) splits the sentence into tokens, embeds each token and then combines the embedding. The resulting dimensions are: `(num_examples, embedding_dimension)`.\n", + "2. This fixed-length output vector is piped through a fully-connected (`Dense`) layer with 16 hidden units.\n", + "3. The last layer is densely connected with a single output node. This outputs logits: the log-odds of the true class, according to the model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0XMwnDOp-llH" + }, + "source": [ + "### Hidden units\n", + "\n", + "The above model has two intermediate or \"hidden\" layers, between the input and output. The number of outputs (units, nodes, or neurons) is the dimension of the representational space for the layer. In other words, the amount of freedom the network is allowed when learning an internal representation.\n", + "\n", + "If a model has more hidden units (a higher-dimensional representation space), and/or more layers, then the network can learn more complex representations. However, it makes the network more computationally expensive and may lead to learning unwanted patterns—patterns that improve performance on training data but not on the test data. This is called *overfitting*, and we'll explore it later." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L4EqVWg4-llM" + }, + "source": [ + "### Loss function and optimizer\n", + "\n", + "A model needs a loss function and an optimizer for training. Since this is a binary classification problem and the model outputs a probability (a single-unit layer with a sigmoid activation), we'll use the `binary_crossentropy` loss function. \n", + "\n", + "This isn't the only choice for a loss function, you could, for instance, choose `mean_squared_error`. But, generally, `binary_crossentropy` is better for dealing with probabilities—it measures the \"distance\" between probability distributions, or in our case, between the ground-truth distribution and the predictions.\n", + "\n", + "Later, when we are exploring regression problems (say, to predict the price of a house), we will see how to use another loss function called mean squared error.\n", + "\n", + "Now, configure the model to use an optimizer and a loss function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Mr0GP-cQ-llN" + }, + "outputs": [], + "source": [ + "model.compile(optimizer='adam',\n", + " loss=tf.losses.BinaryCrossentropy(from_logits=True),\n", + " metrics=[tf.metrics.BinaryAccuracy(threshold=0.0, name='accuracy')])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hCWYwkug-llQ" + }, + "source": [ + "## Create a validation set\n", + "\n", + "When training, we want to check the accuracy of the model on data it hasn't seen before. Create a *validation set* by setting apart 10,000 examples from the original training data. (Why not use the testing set now? Our goal is to develop and tune our model using only the training data, then use the test data just once to evaluate our accuracy)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-NpcXY9--llS" + }, + "outputs": [], + "source": [ + "x_val = train_examples[:10000]\n", + "partial_x_train = train_examples[10000:]\n", + "\n", + "y_val = train_labels[:10000]\n", + "partial_y_train = train_labels[10000:]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "35jv_fzP-llU" + }, + "source": [ + "## Train the model\n", + "\n", + "Train the model for 40 epochs in mini-batches of 512 samples. This is 40 iterations over all samples in the `x_train` and `y_train` tensors. While training, monitor the model's loss and accuracy on the 10,000 samples from the validation set:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tXSGrjWZ-llW" + }, + "outputs": [], + "source": [ + "history = model.fit(partial_x_train,\n", + " partial_y_train,\n", + " epochs=40,\n", + " batch_size=512,\n", + " validation_data=(x_val, y_val),\n", + " verbose=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9EEGuDVuzb5r" + }, + "source": [ + "## Evaluate the model\n", + "\n", + "And let's see how the model performs. Two values will be returned. Loss (a number which represents our error, lower values are better), and accuracy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zOMKywn4zReN" + }, + "outputs": [], + "source": [ + "results = model.evaluate(test_examples, test_labels)\n", + "\n", + "print(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z1iEXVTR0Z2t" + }, + "source": [ + "This fairly naive approach achieves an accuracy of about 87%. With more advanced approaches, the model should get closer to 95%." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5KggXVeL-llZ" + }, + "source": [ + "## Create a graph of accuracy and loss over time\n", + "\n", + "`model.fit()` returns a `History` object that contains a dictionary with everything that happened during training:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VcvSXvhp-llb" + }, + "outputs": [], + "source": [ + "history_dict = history.history\n", + "history_dict.keys()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nRKsqL40-lle" + }, + "source": [ + "There are four entries: one for each monitored metric during training and validation. We can use these to plot the training and validation loss for comparison, as well as the training and validation accuracy:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nGoYf2Js-lle" + }, + "outputs": [], + "source": [ + "acc = history_dict['accuracy']\n", + "val_acc = history_dict['val_accuracy']\n", + "loss = history_dict['loss']\n", + "val_loss = history_dict['val_loss']\n", + "\n", + "epochs = range(1, len(acc) + 1)\n", + "\n", + "# \"bo\" is for \"blue dot\"\n", + "plt.plot(epochs, loss, 'bo', label='Training loss')\n", + "# b is for \"solid blue line\"\n", + "plt.plot(epochs, val_loss, 'b', label='Validation loss')\n", + "plt.title('Training and validation loss')\n", + "plt.xlabel('Epochs')\n", + "plt.ylabel('Loss')\n", + "plt.legend()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6hXx-xOv-llh" + }, + "outputs": [], + "source": [ + "plt.clf() # clear figure\n", + "\n", + "plt.plot(epochs, acc, 'bo', label='Training acc')\n", + "plt.plot(epochs, val_acc, 'b', label='Validation acc')\n", + "plt.title('Training and validation accuracy')\n", + "plt.xlabel('Epochs')\n", + "plt.ylabel('Accuracy')\n", + "plt.legend()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oFEmZ5zq-llk" + }, + "source": [ + "In this plot, the dots represent the training loss and accuracy, and the solid lines are the validation loss and accuracy.\n", + "\n", + "Notice the training loss *decreases* with each epoch and the training accuracy *increases* with each epoch. This is expected when using a gradient descent optimization—it should minimize the desired quantity on every iteration.\n", + "\n", + "This isn't the case for the validation loss and accuracy—they seem to peak after about twenty epochs. This is an example of overfitting: the model performs better on the training data than it does on data it has never seen before. After this point, the model over-optimizes and learns representations *specific* to the training data that do not *generalize* to test data.\n", + "\n", + "For this particular case, we could prevent overfitting by simply stopping the training after twenty or so epochs. Later, you'll see how to do this automatically with a callback." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "tf2_text_classification.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/tf_hub_delf_module.ipynb b/site/en/hub/tutorials/tf_hub_delf_module.ipynb new file mode 100644 index 00000000000..b6dec2eae00 --- /dev/null +++ b/site/en/hub/tutorials/tf_hub_delf_module.ipynb @@ -0,0 +1,372 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "RUymE2l9GZfO" + }, + "source": [ + "##### Copyright 2018 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "code", + "id": "JMyTNwSJGGWg" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0DmDwGPOGfaQ" + }, + "source": [ + "# How to match images using DELF and TensorFlow Hub\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f3nk38tIKytQ" + }, + "source": [ + "TensorFlow Hub (TF-Hub) is a platform to share machine learning expertise packaged in reusable resources, notably pre-trained **modules**.\n", + "\n", + "In this colab, we will use a module that packages the [DELF](https://github.com/tensorflow/models/tree/master/research/delf) neural network and logic for processing images to identify keypoints and their descriptors. The weights of the neural network were trained on images of landmarks as described in [this paper](https://arxiv.org/abs/1612.06321)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q4DN769E2O_R" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lrKaWOB_cuS3" + }, + "outputs": [], + "source": [ + "!pip install scikit-image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SI7eVflHHxvi" + }, + "outputs": [], + "source": [ + "from absl import logging\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from PIL import Image, ImageOps\n", + "from scipy.spatial import cKDTree\n", + "from skimage.feature import plot_matched_features\n", + "from skimage.measure import ransac\n", + "from skimage.transform import AffineTransform\n", + "from six import BytesIO\n", + "\n", + "import tensorflow as tf\n", + "\n", + "import tensorflow_hub as hub\n", + "from six.moves.urllib.request import urlopen" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qquo2HiONiDK" + }, + "source": [ + "## The data\n", + "\n", + "In the next cell, we specify the URLs of two images we would like to process with DELF in order to match and compare them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "l93ye4WFIqIV" + }, + "outputs": [], + "source": [ + "#@title Choose images\n", + "images = \"Bridge of Sighs\" #@param [\"Bridge of Sighs\", \"Golden Gate\", \"Acropolis\", \"Eiffel tower\"]\n", + "if images == \"Bridge of Sighs\":\n", + " # from: https://commons.wikimedia.org/wiki/File:Bridge_of_Sighs,_Oxford.jpg\n", + " # by: N.H. Fischer\n", + " IMAGE_1_URL = 'https://upload.wikimedia.org/wikipedia/commons/2/28/Bridge_of_Sighs%2C_Oxford.jpg'\n", + " # from https://commons.wikimedia.org/wiki/File:The_Bridge_of_Sighs_and_Sheldonian_Theatre,_Oxford.jpg\n", + " # by: Matthew Hoser\n", + " IMAGE_2_URL = 'https://upload.wikimedia.org/wikipedia/commons/c/c3/The_Bridge_of_Sighs_and_Sheldonian_Theatre%2C_Oxford.jpg'\n", + "elif images == \"Golden Gate\":\n", + " IMAGE_1_URL = 'https://upload.wikimedia.org/wikipedia/commons/1/1e/Golden_gate2.jpg'\n", + " IMAGE_2_URL = 'https://upload.wikimedia.org/wikipedia/commons/3/3e/GoldenGateBridge.jpg'\n", + "elif images == \"Acropolis\":\n", + " IMAGE_1_URL = 'https://upload.wikimedia.org/wikipedia/commons/c/ce/2006_01_21_Ath%C3%A8nes_Parth%C3%A9non.JPG'\n", + " IMAGE_2_URL = 'https://upload.wikimedia.org/wikipedia/commons/5/5c/ACROPOLIS_1969_-_panoramio_-_jean_melis.jpg'\n", + "else:\n", + " IMAGE_1_URL = 'https://upload.wikimedia.org/wikipedia/commons/d/d8/Eiffel_Tower%2C_November_15%2C_2011.jpg'\n", + " IMAGE_2_URL = 'https://upload.wikimedia.org/wikipedia/commons/a/a8/Eiffel_Tower_from_immediately_beside_it%2C_Paris_May_2008.jpg'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ttlHtcmiN6QF" + }, + "source": [ + "Download, resize, save and display the images." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "E6RMomGJSfeb" + }, + "outputs": [], + "source": [ + "def download_and_resize(name, url, new_width=256, new_height=256):\n", + " path = tf.keras.utils.get_file(url.split('/')[-1], url)\n", + " image = Image.open(path)\n", + " image = ImageOps.fit(image, (new_width, new_height), Image.LANCZOS)\n", + " return image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "reajtO7XSj7Y" + }, + "outputs": [], + "source": [ + "image1 = download_and_resize('image_1.jpg', IMAGE_1_URL)\n", + "image2 = download_and_resize('image_2.jpg', IMAGE_2_URL)\n", + "\n", + "plt.subplot(1,2,1)\n", + "plt.imshow(image1)\n", + "plt.subplot(1,2,2)\n", + "plt.imshow(image2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "leKqkoT9OP7r" + }, + "source": [ + "## Apply the DELF module to the data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A3WoT1-SPoTI" + }, + "source": [ + "The DELF module takes an image as input and will describe noteworthy points with vectors. The following cell contains the core of this colab's logic." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pXr2tUhvp1Ue" + }, + "outputs": [], + "source": [ + "delf = hub.load('https://tfhub.dev/google/delf/1').signatures['default']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pvAU_gUHoYcY" + }, + "outputs": [], + "source": [ + "def run_delf(image):\n", + " np_image = np.array(image)\n", + " float_image = tf.image.convert_image_dtype(np_image, tf.float32)\n", + "\n", + " return delf(\n", + " image=float_image,\n", + " score_threshold=tf.constant(100.0),\n", + " image_scales=tf.constant([0.25, 0.3536, 0.5, 0.7071, 1.0, 1.4142, 2.0]),\n", + " max_feature_num=tf.constant(1000))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FEzgHAT0UDNP" + }, + "outputs": [], + "source": [ + "result1 = run_delf(image1)\n", + "result2 = run_delf(image2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NByyBA5yOL2b" + }, + "source": [ + "## Use the locations and description vectors to match the images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "mVaKXT3cMSib" + }, + "outputs": [], + "source": [ + "#@title TensorFlow is not needed for this post-processing and visualization\n", + "def match_images(image1, image2, result1, result2):\n", + " distance_threshold = 0.8\n", + "\n", + " # Read features.\n", + " num_features_1 = result1['locations'].shape[0]\n", + " print(\"Loaded image 1's %d features\" % num_features_1)\n", + " \n", + " num_features_2 = result2['locations'].shape[0]\n", + " print(\"Loaded image 2's %d features\" % num_features_2)\n", + "\n", + " # Find nearest-neighbor matches using a KD tree.\n", + " d1_tree = cKDTree(result1['descriptors'])\n", + " _, indices = d1_tree.query(\n", + " result2['descriptors'],\n", + " distance_upper_bound=distance_threshold)\n", + "\n", + " # Select feature locations for putative matches.\n", + " locations_2_to_use = np.array([\n", + " result2['locations'][i,]\n", + " for i in range(num_features_2)\n", + " if indices[i] != num_features_1\n", + " ])\n", + " locations_1_to_use = np.array([\n", + " result1['locations'][indices[i],]\n", + " for i in range(num_features_2)\n", + " if indices[i] != num_features_1\n", + " ])\n", + "\n", + " # Perform geometric verification using RANSAC.\n", + " _, inliers = ransac(\n", + " (locations_1_to_use, locations_2_to_use),\n", + " AffineTransform,\n", + " min_samples=3,\n", + " residual_threshold=20,\n", + " max_trials=1000)\n", + "\n", + " print('Found %d inliers' % sum(inliers))\n", + "\n", + " # Visualize correspondences.\n", + " _, ax = plt.subplots()\n", + " inlier_idxs = np.nonzero(inliers)[0]\n", + " plot_matched_features(\n", + " image1,\n", + " image2,\n", + " keypoints0=locations_1_to_use,\n", + " keypoints1=locations_2_to_use,\n", + " matches=np.column_stack((inlier_idxs, inlier_idxs)),\n", + " ax=ax,\n", + " )\n", + "\n", + " ax.axis('off')\n", + " ax.set_title('DELF correspondences')\n", + "\n", + " for line in ax.lines:\n", + " line.set_color('b')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tpEgqOvCYlPY" + }, + "outputs": [], + "source": [ + "match_images(image1, image2, result1, result2)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "RUymE2l9GZfO" + ], + "name": "tf_hub_delf_module.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/tf_hub_film_example.ipynb b/site/en/hub/tutorials/tf_hub_film_example.ipynb new file mode 100644 index 00000000000..83bcd4bd12c --- /dev/null +++ b/site/en/hub/tutorials/tf_hub_film_example.ipynb @@ -0,0 +1,576 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "qNLUPuRpkFv_" + }, + "source": [ + "##### Copyright 2022 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "DQcWZm0FkPk-" + }, + "outputs": [], + "source": [ + "#@title Copyright 2022 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Exbxve1rHlrF" + }, + "source": [ + "# Frame interpolation using the FILM model\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jMWFVTlbrQ8m" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "61H28S7ArUAZ" + }, + "source": [ + "Frame interpolation is the task of synthesizing many in-between images from a given set of images. The technique is often used for frame rate upsampling or creating slow-motion video effects.\n", + "\n", + "In this colab, you will use the FILM model to do frame interpolation. The colab also provides code snippets to create videos from the interpolated in-between images.\n", + "\n", + "For more information on FILM research, you can read more here:\n", + "- Google AI Blog: [Large Motion Frame Interpolation](https://ai.googleblog.com/2022/10/large-motion-frame-interpolation.html)\n", + "- Project Page: FILM: [Frame Interpolation for Large Motion](https://film-net.github.io/)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dVX7s6zMulsu" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oi5t2OEJsGBW" + }, + "outputs": [], + "source": [ + "!pip install mediapy\n", + "!sudo apt-get install -y ffmpeg" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BA1tq39MjOiF" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "\n", + "import requests\n", + "import numpy as np\n", + "\n", + "from typing import Generator, Iterable, List, Optional\n", + "import mediapy as media" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GTgXmeYGnT7q" + }, + "source": [ + "## Load the model from TFHub\n", + "\n", + "To load a model from TensorFlow Hub you need the tfhub library and the model handle which is its documentation url." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GojhvyAtjUt0" + }, + "outputs": [], + "source": [ + "model = hub.load(\"https://tfhub.dev/google/film/1\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DOQJPsu2CwPk" + }, + "source": [ + "## Util function to load images from a url or locally\n", + "\n", + "This function loads an image and make it ready to be used by the model later." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BPnh5uhQvFln" + }, + "outputs": [], + "source": [ + "_UINT8_MAX_F = float(np.iinfo(np.uint8).max)\n", + "\n", + "def load_image(img_url: str):\n", + " \"\"\"Returns an image with shape [height, width, num_channels], with pixels in [0..1] range, and type np.float32.\"\"\"\n", + "\n", + " if (img_url.startswith(\"https\")):\n", + " user_agent = {'User-agent': 'Colab Sample (https://tensorflow.org)'}\n", + " response = requests.get(img_url, headers=user_agent)\n", + " image_data = response.content\n", + " else:\n", + " image_data = tf.io.read_file(img_url)\n", + "\n", + " image = tf.io.decode_image(image_data, channels=3)\n", + " image_numpy = tf.cast(image, dtype=tf.float32).numpy()\n", + " return image_numpy / _UINT8_MAX_F\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yjDFns1zp5y6" + }, + "source": [ + "FILM's model input is a dictionary with the keys `time`, `x0`, `x1`:\n", + "\n", + "- `time`: position of the interpolated frame. Midway is `0.5`.\n", + "- `x0`: is the initial frame.\n", + "- `x1`: is the final frame.\n", + "\n", + "Both frames need to be normalized (done in the function `load_image` above) where each pixel is in the range of `[0..1]`.\n", + "\n", + "`time` is a value between `[0..1]` and it says where the generated image should be. 0.5 is midway between the input images.\n", + "\n", + "All three values need to have a batch dimension too." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VEQNQlHGsWSM" + }, + "outputs": [], + "source": [ + "# using images from the FILM repository (https://github.com/google-research/frame-interpolation/)\n", + "\n", + "image_1_url = \"https://github.com/google-research/frame-interpolation/blob/main/photos/one.png?raw=true\"\n", + "image_2_url = \"https://github.com/google-research/frame-interpolation/blob/main/photos/two.png?raw=true\"\n", + "\n", + "time = np.array([0.5], dtype=np.float32)\n", + "\n", + "image1 = load_image(image_1_url)\n", + "image2 = load_image(image_2_url)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "r6_MQE9EuF_K" + }, + "outputs": [], + "source": [ + "input = {\n", + " 'time': np.expand_dims(time, axis=0), # adding the batch dimension to the time\n", + " 'x0': np.expand_dims(image1, axis=0), # adding the batch dimension to the image\n", + " 'x1': np.expand_dims(image2, axis=0) # adding the batch dimension to the image\n", + "}\n", + "mid_frame = model(input)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nZkzYE2bptfD" + }, + "source": [ + "The model outputs a couple of results but what you'll use here is the `image` key, whose value is the interpolated frame." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eClVbNFhA5Py" + }, + "outputs": [], + "source": [ + "print(mid_frame.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rE2csH3u8ePe" + }, + "outputs": [], + "source": [ + "frames = [image1, mid_frame['image'][0].numpy(), image2]\n", + "\n", + "media.show_images(frames, titles=['input image one', 'generated image', 'input image two'], height=250)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fS1AT8kn-f_l" + }, + "source": [ + "Let's create a video from the generated frames" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oFc53B3p37SH" + }, + "outputs": [], + "source": [ + "media.show_video(frames, fps=3, title='FILM interpolated video')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x5AOFNkj-lfO" + }, + "source": [ + "## Define a Frame Interpolator Library\n", + "\n", + "As you can see, the transition is not too smooth. \n", + "\n", + "To improve that you'll need many more interpolated frames.\n", + "\n", + "You could just keep running the model many times with intermediary images but there is a better solution.\n", + "\n", + "To generate many interpolated images and have a smoother video you'll create an interpolator library." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tsoDv_9geoZn" + }, + "outputs": [], + "source": [ + "\"\"\"A wrapper class for running a frame interpolation based on the FILM model on TFHub\n", + "\n", + "Usage:\n", + " interpolator = Interpolator()\n", + " result_batch = interpolator(image_batch_0, image_batch_1, batch_dt)\n", + " Where image_batch_1 and image_batch_2 are numpy tensors with TF standard\n", + " (B,H,W,C) layout, batch_dt is the sub-frame time in range [0..1], (B,) layout.\n", + "\"\"\"\n", + "\n", + "\n", + "def _pad_to_align(x, align):\n", + " \"\"\"Pads image batch x so width and height divide by align.\n", + "\n", + " Args:\n", + " x: Image batch to align.\n", + " align: Number to align to.\n", + "\n", + " Returns:\n", + " 1) An image padded so width % align == 0 and height % align == 0.\n", + " 2) A bounding box that can be fed readily to tf.image.crop_to_bounding_box\n", + " to undo the padding.\n", + " \"\"\"\n", + " # Input checking.\n", + " assert np.ndim(x) == 4\n", + " assert align > 0, 'align must be a positive number.'\n", + "\n", + " height, width = x.shape[-3:-1]\n", + " height_to_pad = (align - height % align) if height % align != 0 else 0\n", + " width_to_pad = (align - width % align) if width % align != 0 else 0\n", + "\n", + " bbox_to_pad = {\n", + " 'offset_height': height_to_pad // 2,\n", + " 'offset_width': width_to_pad // 2,\n", + " 'target_height': height + height_to_pad,\n", + " 'target_width': width + width_to_pad\n", + " }\n", + " padded_x = tf.image.pad_to_bounding_box(x, **bbox_to_pad)\n", + " bbox_to_crop = {\n", + " 'offset_height': height_to_pad // 2,\n", + " 'offset_width': width_to_pad // 2,\n", + " 'target_height': height,\n", + " 'target_width': width\n", + " }\n", + " return padded_x, bbox_to_crop\n", + "\n", + "\n", + "class Interpolator:\n", + " \"\"\"A class for generating interpolated frames between two input frames.\n", + "\n", + " Uses the Film model from TFHub\n", + " \"\"\"\n", + "\n", + " def __init__(self, align: int = 64) -> None:\n", + " \"\"\"Loads a saved model.\n", + "\n", + " Args:\n", + " align: 'If >1, pad the input size so it divides with this before\n", + " inference.'\n", + " \"\"\"\n", + " self._model = hub.load(\"https://tfhub.dev/google/film/1\")\n", + " self._align = align\n", + "\n", + " def __call__(self, x0: np.ndarray, x1: np.ndarray,\n", + " dt: np.ndarray) -> np.ndarray:\n", + " \"\"\"Generates an interpolated frame between given two batches of frames.\n", + "\n", + " All inputs should be np.float32 datatype.\n", + "\n", + " Args:\n", + " x0: First image batch. Dimensions: (batch_size, height, width, channels)\n", + " x1: Second image batch. Dimensions: (batch_size, height, width, channels)\n", + " dt: Sub-frame time. Range [0,1]. Dimensions: (batch_size,)\n", + "\n", + " Returns:\n", + " The result with dimensions (batch_size, height, width, channels).\n", + " \"\"\"\n", + " if self._align is not None:\n", + " x0, bbox_to_crop = _pad_to_align(x0, self._align)\n", + " x1, _ = _pad_to_align(x1, self._align)\n", + "\n", + " inputs = {'x0': x0, 'x1': x1, 'time': dt[..., np.newaxis]}\n", + " result = self._model(inputs, training=False)\n", + " image = result['image']\n", + "\n", + " if self._align is not None:\n", + " image = tf.image.crop_to_bounding_box(image, **bbox_to_crop)\n", + " return image.numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZeGYaNBd_7a5" + }, + "source": [ + "## Frame and Video Generation Utility Functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gOJxup6s_1DP" + }, + "outputs": [], + "source": [ + "def _recursive_generator(\n", + " frame1: np.ndarray, frame2: np.ndarray, num_recursions: int,\n", + " interpolator: Interpolator) -> Generator[np.ndarray, None, None]:\n", + " \"\"\"Splits halfway to repeatedly generate more frames.\n", + "\n", + " Args:\n", + " frame1: Input image 1.\n", + " frame2: Input image 2.\n", + " num_recursions: How many times to interpolate the consecutive image pairs.\n", + " interpolator: The frame interpolator instance.\n", + "\n", + " Yields:\n", + " The interpolated frames, including the first frame (frame1), but excluding\n", + " the final frame2.\n", + " \"\"\"\n", + " if num_recursions == 0:\n", + " yield frame1\n", + " else:\n", + " # Adds the batch dimension to all inputs before calling the interpolator,\n", + " # and remove it afterwards.\n", + " time = np.full(shape=(1,), fill_value=0.5, dtype=np.float32)\n", + " mid_frame = interpolator(\n", + " np.expand_dims(frame1, axis=0), np.expand_dims(frame2, axis=0), time)[0]\n", + " yield from _recursive_generator(frame1, mid_frame, num_recursions - 1,\n", + " interpolator)\n", + " yield from _recursive_generator(mid_frame, frame2, num_recursions - 1,\n", + " interpolator)\n", + "\n", + "\n", + "def interpolate_recursively(\n", + " frames: List[np.ndarray], num_recursions: int,\n", + " interpolator: Interpolator) -> Iterable[np.ndarray]:\n", + " \"\"\"Generates interpolated frames by repeatedly interpolating the midpoint.\n", + "\n", + " Args:\n", + " frames: List of input frames. Expected shape (H, W, 3). The colors should be\n", + " in the range[0, 1] and in gamma space.\n", + " num_recursions: Number of times to do recursive midpoint\n", + " interpolation.\n", + " interpolator: The frame interpolation model to use.\n", + "\n", + " Yields:\n", + " The interpolated frames (including the inputs).\n", + " \"\"\"\n", + " n = len(frames)\n", + " for i in range(1, n):\n", + " yield from _recursive_generator(frames[i - 1], frames[i],\n", + " times_to_interpolate, interpolator)\n", + " # Separately yield the final frame.\n", + " yield frames[-1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "X1R2KjhEAHu0" + }, + "outputs": [], + "source": [ + "times_to_interpolate = 6\n", + "interpolator = Interpolator()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AZUo8tg1AYvZ" + }, + "source": [ + "## Running the Interpolator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QMMNjs7sAWTG" + }, + "outputs": [], + "source": [ + "input_frames = [image1, image2]\n", + "frames = list(\n", + " interpolate_recursively(input_frames, times_to_interpolate,\n", + " interpolator))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "s9mHHyCAAhrM" + }, + "outputs": [], + "source": [ + "print(f'video with {len(frames)} frames')\n", + "media.show_video(frames, fps=30, title='FILM interpolated video')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_0AZKeMVFwAc" + }, + "source": [ + "For more information, you can visit [FILM's model repository](https://github.com/google-research/frame-interpolation).\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8764ry3SGDks" + }, + "source": [ + "## Citation\n", + "\n", + "If you find this model and code useful in your works, please acknowledge it appropriately by citing:\n", + "\n", + "```\n", + "@inproceedings{reda2022film,\n", + " title = {FILM: Frame Interpolation for Large Motion},\n", + " author = {Fitsum Reda and Janne Kontkanen and Eric Tabellion and Deqing Sun and Caroline Pantofaru and Brian Curless},\n", + " booktitle = {The European Conference on Computer Vision (ECCV)},\n", + " year = {2022}\n", + "}\n", + "```\n", + "\n", + "```\n", + "@misc{film-tf,\n", + " title = {Tensorflow 2 Implementation of \"FILM: Frame Interpolation for Large Motion\"},\n", + " author = {Fitsum Reda and Janne Kontkanen and Eric Tabellion and Deqing Sun and Caroline Pantofaru and Brian Curless},\n", + " year = {2022},\n", + " publisher = {GitHub},\n", + " journal = {GitHub repository},\n", + " howpublished = {\\url{https://github.com/google-research/frame-interpolation}}\n", + "}\n", + "```" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "tf_hub_film_example.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/tf_hub_generative_image_module.ipynb b/site/en/hub/tutorials/tf_hub_generative_image_module.ipynb new file mode 100644 index 00000000000..4937bc2eb22 --- /dev/null +++ b/site/en/hub/tutorials/tf_hub_generative_image_module.ipynb @@ -0,0 +1,447 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "N6ZDpd9XzFeN" + }, + "source": [ + "##### Copyright 2018 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "KUu4vOt5zI9d" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CxmDMK4yupqg" + }, + "source": [ + "# Generate Artificial Faces with CelebA Progressive GAN Model\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Sy553YSVmYiK" + }, + "source": [ + "This Colab demonstrates use of a TF Hub module based on a generative adversarial network (GAN). The module maps from N-dimensional vectors, called latent space, to RGB images.\n", + "\n", + "Two examples are provided:\n", + "* **Mapping** from latent space to images, and\n", + "* Given a target image, **using gradient descent to find** a latent vector that generates an image similar to the target image." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "v4XGxDrCkeip" + }, + "source": [ + "## Optional prerequisites\n", + "\n", + "* Familiarity with [low level Tensorflow concepts](https://www.tensorflow.org/guide/eager).\n", + "* [Generative Adversarial Network](https://en.wikipedia.org/wiki/Generative_adversarial_network) on Wikipedia.\n", + "* Paper on Progressive GANs: [Progressive Growing of GANs for Improved Quality, Stability, and Variation](https://arxiv.org/abs/1710.10196)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HK3Q2vIaVw56" + }, + "source": [ + "### More models\n", + "[Here](https://tfhub.dev/s?module-type=image-generator) you can find all models currently hosted on [tfhub.dev](https://tfhub.dev/) that can generate images." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q4DN769E2O_R" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KNM3kA0arrUu" + }, + "outputs": [], + "source": [ + "# Install imageio for creating animations. \n", + "!pip -q install imageio\n", + "!pip -q install scikit-image\n", + "!pip install git+https://github.com/tensorflow/docs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "6cPY9Ou4sWs_" + }, + "outputs": [], + "source": [ + "#@title Imports and function definitions\n", + "from absl import logging\n", + "\n", + "import imageio\n", + "import PIL.Image\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "tf.random.set_seed(0)\n", + "\n", + "import tensorflow_hub as hub\n", + "from tensorflow_docs.vis import embed\n", + "import time\n", + "\n", + "try:\n", + " from google.colab import files\n", + "except ImportError:\n", + " pass\n", + "\n", + "from IPython import display\n", + "from skimage import transform\n", + "\n", + "# We could retrieve this value from module.get_input_shapes() if we didn't know\n", + "# beforehand which module we will be using.\n", + "latent_dim = 512\n", + "\n", + "\n", + "# Interpolates between two vectors that are non-zero and don't both lie on a\n", + "# line going through origin. First normalizes v2 to have the same norm as v1. \n", + "# Then interpolates between the two vectors on the hypersphere.\n", + "def interpolate_hypersphere(v1, v2, num_steps):\n", + " v1_norm = tf.norm(v1)\n", + " v2_norm = tf.norm(v2)\n", + " v2_normalized = v2 * (v1_norm / v2_norm)\n", + "\n", + " vectors = []\n", + " for step in range(num_steps):\n", + " interpolated = v1 + (v2_normalized - v1) * step / (num_steps - 1)\n", + " interpolated_norm = tf.norm(interpolated)\n", + " interpolated_normalized = interpolated * (v1_norm / interpolated_norm)\n", + " vectors.append(interpolated_normalized)\n", + " return tf.stack(vectors)\n", + "\n", + "# Simple way to display an image.\n", + "def display_image(image):\n", + " image = tf.constant(image)\n", + " image = tf.image.convert_image_dtype(image, tf.uint8)\n", + " return PIL.Image.fromarray(image.numpy())\n", + "\n", + "# Given a set of images, show an animation.\n", + "def animate(images):\n", + " images = np.array(images)\n", + " converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)\n", + " imageio.mimsave('./animation.gif', converted_images)\n", + " return embed.embed_file('./animation.gif')\n", + "\n", + "logging.set_verbosity(logging.ERROR)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f5EESfBvukYI" + }, + "source": [ + "## Latent space interpolation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nJb9gFmRvynZ" + }, + "source": [ + "### Random vectors\n", + "\n", + "Latent space interpolation between two randomly initialized vectors. We will use a TF Hub module [progan-128](https://tfhub.dev/google/progan-128/1) that contains a pre-trained Progressive GAN." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8StEe9x9wGma" + }, + "outputs": [], + "source": [ + "progan = hub.load(\"https://tfhub.dev/google/progan-128/1\").signatures['default']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fZ0O5_5Jhwio" + }, + "outputs": [], + "source": [ + "def interpolate_between_vectors():\n", + " v1 = tf.random.normal([latent_dim])\n", + " v2 = tf.random.normal([latent_dim])\n", + " \n", + " # Creates a tensor with 25 steps of interpolation between v1 and v2.\n", + " vectors = interpolate_hypersphere(v1, v2, 50)\n", + "\n", + " # Uses module to generate images from the latent space.\n", + " interpolated_images = progan(vectors)['default']\n", + "\n", + " return interpolated_images\n", + "\n", + "interpolated_images = interpolate_between_vectors()\n", + "animate(interpolated_images)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L9-uXoTHuXQC" + }, + "source": [ + "## Finding closest vector in latent space\n", + "Fix a target image. As an example use an image generated from the module or upload your own." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "phT4W66pMmko" + }, + "outputs": [], + "source": [ + "image_from_module_space = True # @param { isTemplate:true, type:\"boolean\" }\n", + "\n", + "def get_module_space_image():\n", + " vector = tf.random.normal([1, latent_dim])\n", + " images = progan(vector)['default'][0]\n", + " return images\n", + "\n", + "def upload_image():\n", + " uploaded = files.upload()\n", + " image = imageio.imread(uploaded[list(uploaded.keys())[0]])\n", + " return transform.resize(image, [128, 128])\n", + "\n", + "if image_from_module_space:\n", + " target_image = get_module_space_image()\n", + "else:\n", + " target_image = upload_image()\n", + "\n", + "display_image(target_image)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rBIt3Q4qvhuq" + }, + "source": [ + "After defining a loss function between the target image and the image generated by a latent space variable, we can use gradient descent to find variable values that minimize the loss." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cUGakLdbML2Q" + }, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "initial_vector = tf.random.normal([1, latent_dim])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "u7MGzDE5MU20" + }, + "outputs": [], + "source": [ + "display_image(progan(initial_vector)['default'][0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "q_4Z7tnyg-ZY" + }, + "outputs": [], + "source": [ + "def find_closest_latent_vector(initial_vector, num_optimization_steps,\n", + " steps_per_image):\n", + " images = []\n", + " losses = []\n", + "\n", + " vector = tf.Variable(initial_vector) \n", + " optimizer = tf.optimizers.Adam(learning_rate=0.01)\n", + " loss_fn = tf.losses.MeanAbsoluteError(reduction=\"sum\")\n", + "\n", + " for step in range(num_optimization_steps):\n", + " if (step % 100)==0:\n", + " print()\n", + " print('.', end='')\n", + " with tf.GradientTape() as tape:\n", + " image = progan(vector.read_value())['default'][0]\n", + " if (step % steps_per_image) == 0:\n", + " images.append(image.numpy())\n", + " target_image_difference = loss_fn(image, target_image[:,:,:3])\n", + " # The latent vectors were sampled from a normal distribution. We can get\n", + " # more realistic images if we regularize the length of the latent vector to \n", + " # the average length of vector from this distribution.\n", + " regularizer = tf.abs(tf.norm(vector) - np.sqrt(latent_dim))\n", + " \n", + " loss = target_image_difference + regularizer\n", + " losses.append(loss.numpy())\n", + " grads = tape.gradient(loss, [vector])\n", + " optimizer.apply_gradients(zip(grads, [vector]))\n", + " \n", + " return images, losses\n", + "\n", + "\n", + "num_optimization_steps=200\n", + "steps_per_image=5\n", + "images, loss = find_closest_latent_vector(initial_vector, num_optimization_steps, steps_per_image)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pRbeF2oSAcOB" + }, + "outputs": [], + "source": [ + "plt.plot(loss)\n", + "plt.ylim([0,max(plt.ylim())])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KnZkDy2FEsTt" + }, + "outputs": [], + "source": [ + "animate(np.stack(images))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GGKfuCdfPQKH" + }, + "source": [ + "Compare the result to the target:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TK1P5z3bNuIl" + }, + "outputs": [], + "source": [ + "display_image(np.concatenate([images[-1], target_image], axis=1))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tDt15dLsJwMy" + }, + "source": [ + "### Playing with the above example\n", + "If image is from the module space, the descent is quick and converges to a reasonable sample. Try out descending to an image that is **not from the module space**. The descent will only converge if the image is reasonably close to the space of training images.\n", + "\n", + "How to make it descend faster and to a more realistic image? One can try:\n", + "* using different loss on the image difference, e.g., quadratic,\n", + "* using different regularizer on the latent vector,\n", + "* initializing from a random vector in multiple runs,\n", + "* etc.\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "N6ZDpd9XzFeN" + ], + "name": "tf_hub_generative_image_module.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/tweening_conv3d.ipynb b/site/en/hub/tutorials/tweening_conv3d.ipynb new file mode 100644 index 00000000000..8c53929021f --- /dev/null +++ b/site/en/hub/tutorials/tweening_conv3d.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "wC0PtNm3Sa_T" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hgOqPjRKSa-7" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oKAkxAYuONU6" + }, + "source": [ + "# Video Inbetweening using 3D Convolutions\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cvMgkVIBpT-Y" + }, + "source": [ + "Yunpeng Li, Dominik Roblek, and Marco Tagliasacchi. From Here to There: Video Inbetweening Using Direct 3D Convolutions, 2019.\n", + "\n", + "https://arxiv.org/abs/1905.10240\n", + "\n", + "\n", + "Current Hub characteristics:\n", + "- has models for BAIR Robot pushing videos and KTH action video dataset (though this colab uses only BAIR)\n", + "- BAIR dataset already available in Hub. However, KTH videos need to be supplied by the users themselves.\n", + "- only evaluation (video generation) for now\n", + "- batch size and frame size are hard-coded\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q4DN769E2O_R" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EsQFWvxrYrHg" + }, + "source": [ + "Since `tfds.load('bair_robot_pushing_small', split='test')` would download a 30GB archive that also contains the training data, we download a separated archive that only contains the 190MB test data. The used dataset has been published by [this paper](https://arxiv.org/abs/1710.05268) and is licensed as Creative Commons BY 4.0." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GhIKakhc7JYL" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import seaborn as sns\n", + "import tensorflow_hub as hub\n", + "import tensorflow_datasets as tfds\n", + "\n", + "from tensorflow_datasets.core import SplitGenerator\n", + "from tensorflow_datasets.video.bair_robot_pushing import BairRobotPushingSmall\n", + "\n", + "import tempfile\n", + "import pathlib\n", + "\n", + "TEST_DIR = pathlib.Path(tempfile.mkdtemp()) / \"bair_robot_pushing_small/softmotion30_44k/test/\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zBMz14GmYkwz" + }, + "outputs": [], + "source": [ + "# Download the test split to $TEST_DIR\n", + "!mkdir -p $TEST_DIR\n", + "!wget -nv https://storage.googleapis.com/download.tensorflow.org/data/bair_test_traj_0_to_255.tfrecords -O $TEST_DIR/traj_0_to_255.tfrecords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "irRJ2Q0iYoW0" + }, + "outputs": [], + "source": [ + "# Since the dataset builder expects the train and test split to be downloaded,\n", + "# patch it so it only expects the test data to be available\n", + "builder = BairRobotPushingSmall()\n", + "test_generator = SplitGenerator(name='test', gen_kwargs={\"filedir\": str(TEST_DIR)})\n", + "builder._split_generators = lambda _: [test_generator]\n", + "builder.download_and_prepare()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iaGU8hhBPi_6" + }, + "source": [ + "## BAIR: Demo based on numpy array inputs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "IgWmW8YzEiDo" + }, + "outputs": [], + "source": [ + "# @title Load some example data (BAIR).\n", + "batch_size = 16\n", + "\n", + "# If unable to download the dataset automatically due to \"not enough disk space\", please download manually to Google Drive and\n", + "# load using tf.data.TFRecordDataset.\n", + "ds = builder.as_dataset(split=\"test\")\n", + "test_videos = ds.batch(batch_size)\n", + "first_batch = next(iter(test_videos))\n", + "input_frames = first_batch['image_aux1'][:, ::15]\n", + "input_frames = tf.cast(input_frames, tf.float32)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "96Jd5XefGHRr" + }, + "outputs": [], + "source": [ + "# @title Visualize loaded videos start and end frames.\n", + "\n", + "print('Test videos shape [batch_size, start/end frame, height, width, num_channels]: ', input_frames.shape)\n", + "sns.set_style('white')\n", + "plt.figure(figsize=(4, 2*batch_size))\n", + "\n", + "for i in range(batch_size)[:4]:\n", + " plt.subplot(batch_size, 2, 1 + 2*i)\n", + " plt.imshow(input_frames[i, 0] / 255.0)\n", + " plt.title('Video {}: First frame'.format(i))\n", + " plt.axis('off')\n", + " plt.subplot(batch_size, 2, 2 + 2*i)\n", + " plt.imshow(input_frames[i, 1] / 255.0)\n", + " plt.title('Video {}: Last frame'.format(i))\n", + " plt.axis('off')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "w0FFhkikQABy" + }, + "source": [ + "### Load Hub Module" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cLAUiWfEQAB5" + }, + "outputs": [], + "source": [ + "hub_handle = 'https://tfhub.dev/google/tweening_conv3d_bair/1'\n", + "module = hub.load(hub_handle).signatures['default']" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PVHTdXnhbGsK" + }, + "source": [ + "### Generate and show the videos" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FHAwBW-zyegP" + }, + "outputs": [], + "source": [ + "filled_frames = module(input_frames)['default'] / 255.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tVesWHTnSW1Z" + }, + "outputs": [], + "source": [ + "# Show sequences of generated video frames.\n", + "\n", + "# Concatenate start/end frames and the generated filled frames for the new videos.\n", + "generated_videos = np.concatenate([input_frames[:, :1] / 255.0, filled_frames, input_frames[:, 1:] / 255.0], axis=1)\n", + "\n", + "for video_id in range(4):\n", + " fig = plt.figure(figsize=(10 * 2, 2))\n", + " for frame_id in range(1, 16):\n", + " ax = fig.add_axes([frame_id * 1 / 16., 0, (frame_id + 1) * 1 / 16., 1],\n", + " xmargin=0, ymargin=0)\n", + " ax.imshow(generated_videos[video_id, frame_id])\n", + " ax.axis('off')" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "Q4DN769E2O_R" + ], + "name": "tweening_conv3d.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/wav2vec2_saved_model_finetuning.ipynb b/site/en/hub/tutorials/wav2vec2_saved_model_finetuning.ipynb new file mode 100644 index 00000000000..879bdbd0edb --- /dev/null +++ b/site/en/hub/tutorials/wav2vec2_saved_model_finetuning.ipynb @@ -0,0 +1,984 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "yCs7P9JTMlzV" + }, + "source": [ + "##### Copyright 2021 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Jqn-HYw-Mkea" + }, + "outputs": [], + "source": [ + "#@title Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "stRetE8gMlmZ" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ndG8MjmJeicp" + }, + "source": [ + "# Fine-tuning Wav2Vec2 with an LM head\n", + "\n", + "In this notebook, we will load the pre-trained wav2vec2 model from [TFHub](https://tfhub.dev) and will fine-tune it on [LibriSpeech dataset](https://huggingface.co/datasets/librispeech_asr) by appending Language Modeling head (LM) over the top of our pre-trained model. The underlying task is to build a model for **Automatic Speech Recognition** i.e. given some speech, the model should be able to transcribe it into text." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rWk8nL6Ui-_0" + }, + "source": [ + "## Setting Up\n", + "\n", + "Before running this notebook, please ensure that you are on GPU runtime (`Runtime` > `Change runtime type` > `GPU`). The following cell will install [`gsoc-wav2vec2`](https://github.com/vasudevgupta7/gsoc-wav2vec2) package & its dependencies." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "seqTlMyeZvM4" + }, + "outputs": [], + "source": [ + "!pip3 install -q git+https://github.com/vasudevgupta7/gsoc-wav2vec2@main\n", + "!sudo apt-get install -y libsndfile1-dev\n", + "!pip3 install -q SoundFile" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wvuJL8-f0zn5" + }, + "source": [ + "## Model setup using `TFHub`\n", + "\n", + "We will start by importing some libraries/modules." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "M3_fgx4eZvM7" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "from wav2vec2 import Wav2Vec2Config\n", + "\n", + "config = Wav2Vec2Config()\n", + "\n", + "print(\"TF version:\", tf.__version__)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y0rVUxyWsS5f" + }, + "source": [ + "First, we will download our model from TFHub & will wrap our model signature with [`hub.KerasLayer`](https://www.tensorflow.org/hub/api_docs/python/hub/KerasLayer) to be able to use this model like any other Keras layer. Fortunately, `hub.KerasLayer` can do both in just 1 line.\n", + "\n", + "**Note:** When loading model with `hub.KerasLayer`, model becomes a bit opaque but sometimes we need finer controls over the model, then we can load the model with `tf.keras.models.load_model(...)`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NO6QRC7KZvM9" + }, + "outputs": [], + "source": [ + "pretrained_layer = hub.KerasLayer(\"https://tfhub.dev/vasudevgupta7/wav2vec2/1\", trainable=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pCputyVBv2e9" + }, + "source": [ + "You can refer to this [script](https://github.com/vasudevgupta7/gsoc-wav2vec2/blob/main/src/export2hub.py) in case you are interested in the model exporting script. Object `pretrained_layer` is the freezed version of [`Wav2Vec2Model`](https://github.com/vasudevgupta7/gsoc-wav2vec2/blob/main/src/wav2vec2/modeling.py). These pre-trained weights were converted from HuggingFace PyTorch [pre-trained weights](https://huggingface.co/facebook/wav2vec2-base) using [this script](https://github.com/vasudevgupta7/gsoc-wav2vec2/blob/main/src/convert_torch_to_tf.py).\n", + "\n", + "Originally, wav2vec2 was pre-trained with a masked language modelling approach with the objective to identify the true quantized latent speech representation for a masked time step. You can read more about the training objective in the paper- [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations](https://arxiv.org/abs/2006.11477)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SseDnCr7hyhC" + }, + "source": [ + "Now, we will be defining a few constants and hyper-parameters which will be useful in the next few cells. `AUDIO_MAXLEN` is intentionally set to `246000` as the model signature only accepts static sequence length of `246000`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eiILuMBERxlO" + }, + "outputs": [], + "source": [ + "AUDIO_MAXLEN = 246000\n", + "LABEL_MAXLEN = 256\n", + "BATCH_SIZE = 2" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1V4gTgGLgXvO" + }, + "source": [ + "In the following cell, we will wrap `pretrained_layer` & a dense layer (LM head) with the [Keras's Functional API](https://www.tensorflow.org/guide/keras/functional)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "a3CUN1KEB10Q" + }, + "outputs": [], + "source": [ + "inputs = tf.keras.Input(shape=(AUDIO_MAXLEN,))\n", + "hidden_states = pretrained_layer(inputs)\n", + "outputs = tf.keras.layers.Dense(config.vocab_size)(hidden_states)\n", + "\n", + "model = tf.keras.Model(inputs=inputs, outputs=outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5zDXuoMXhDMo" + }, + "source": [ + "The dense layer (defined above) is having an output dimension of `vocab_size` as we want to predict probabilities of each token in the vocabulary at each time step." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oPp18ZHRtnq-" + }, + "source": [ + "## Setting up training state" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ATQy1ZK3vFr7" + }, + "source": [ + "In TensorFlow, model weights are built only when `model.call` or `model.build` is called for the first time, so the following cell will build the model weights for us. Further, we will be running `model.summary()` to check the total number of trainable parameters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZgL5wyaXZvM-" + }, + "outputs": [], + "source": [ + "model(tf.random.uniform(shape=(BATCH_SIZE, AUDIO_MAXLEN)))\n", + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EQxxA4Fevp7m" + }, + "source": [ + "Now, we need to define the `loss_fn` and optimizer to be able to train the model. The following cell will do that for us. We will be using the `Adam` optimizer for simplicity. `CTCLoss` is a common loss type that is used for tasks (like `ASR`) where input sub-parts can't be easily aligned with output sub-parts. You can read more about CTC-loss from this amazing [blog post](https://distill.pub/2017/ctc/).\n", + "\n", + "\n", + "`CTCLoss` (from [`gsoc-wav2vec2`](https://github.com/vasudevgupta7/gsoc-wav2vec2) package) accepts 3 arguments: `config`, `model_input_shape` & `division_factor`. If `division_factor=1`, then loss will simply get summed, so pass `division_factor` accordingly to get mean over batch." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "glDepVEHZvM_" + }, + "outputs": [], + "source": [ + "from wav2vec2 import CTCLoss\n", + "\n", + "LEARNING_RATE = 5e-5\n", + "\n", + "loss_fn = CTCLoss(config, (BATCH_SIZE, AUDIO_MAXLEN), division_factor=BATCH_SIZE)\n", + "optimizer = tf.keras.optimizers.Adam(LEARNING_RATE)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1mvTuOXpwsQe" + }, + "source": [ + "## Loading & Pre-processing data\n", + "\n", + "Let's now download the LibriSpeech dataset from the [official website](http://www.openslr.org/12) and set it up." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "I4kIEC77cBCM" + }, + "outputs": [], + "source": [ + "!wget https://www.openslr.org/resources/12/dev-clean.tar.gz -P ./data/train/\n", + "!tar -xf ./data/train/dev-clean.tar.gz -C ./data/train/" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LsQpmpn6jrMI" + }, + "source": [ + "**Note:** We are using `dev-clean` configuration as this notebook is just for demonstration purposes, so we need a small amount of data. Complete training data can be easily downloaded from [LibriSpeech website](http://www.openslr.org/12)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ynxAjtGHGFpM" + }, + "outputs": [], + "source": [ + "ls ./data/train/" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yBMiORo0xJD0" + }, + "source": [ + "Our dataset lies in the LibriSpeech directory. Let's explore these files." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jkIu_Wt4ZvNA" + }, + "outputs": [], + "source": [ + "data_dir = \"./data/train/LibriSpeech/dev-clean/2428/83705/\"\n", + "all_files = os.listdir(data_dir)\n", + "\n", + "flac_files = [f for f in all_files if f.endswith(\".flac\")]\n", + "txt_files = [f for f in all_files if f.endswith(\".txt\")]\n", + "\n", + "print(\"Transcription files:\", txt_files, \"\\nSound files:\", flac_files)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XEObi_Apk3ZD" + }, + "source": [ + "Alright, so each sub-directory has many `.flac` files and a `.txt` file. The `.txt` file contains text transcriptions for all the speech samples (i.e. `.flac` files) present in that sub-directory." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WYW6WKJflO2e" + }, + "source": [ + "We can load this text data as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cEBKxQblHPwq" + }, + "outputs": [], + "source": [ + "def read_txt_file(f):\n", + " with open(f, \"r\") as f:\n", + " samples = f.read().split(\"\\n\")\n", + " samples = {s.split()[0]: \" \".join(s.split()[1:]) for s in samples if len(s.split()) > 2}\n", + " return samples" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ldkf_ceb0_YW" + }, + "source": [ + "Similarly, we will define a function for loading a speech sample from a `.flac` file.\n", + "\n", + "`REQUIRED_SAMPLE_RATE` is set to `16000` as wav2vec2 was pre-trained with `16K` frequency and it's recommended to fine-tune it without any major change in data distribution due to frequency." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YOJ3OzPsTyXv" + }, + "outputs": [], + "source": [ + "import soundfile as sf\n", + "\n", + "REQUIRED_SAMPLE_RATE = 16000\n", + "\n", + "def read_flac_file(file_path):\n", + " with open(file_path, \"rb\") as f:\n", + " audio, sample_rate = sf.read(f)\n", + " if sample_rate != REQUIRED_SAMPLE_RATE:\n", + " raise ValueError(\n", + " f\"sample rate (={sample_rate}) of your files must be {REQUIRED_SAMPLE_RATE}\"\n", + " )\n", + " file_id = os.path.split(file_path)[-1][:-len(\".flac\")]\n", + " return {file_id: audio}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2sxDN8P4nWkW" + }, + "source": [ + "Now, we will pick some random samples & will try to visualize them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HI5J-2Dfm_wT" + }, + "outputs": [], + "source": [ + "from IPython.display import Audio\n", + "import random\n", + "\n", + "file_id = random.choice([f[:-len(\".flac\")] for f in flac_files])\n", + "flac_file_path, txt_file_path = os.path.join(data_dir, f\"{file_id}.flac\"), os.path.join(data_dir, \"2428-83705.trans.txt\")\n", + "\n", + "print(\"Text Transcription:\", read_txt_file(txt_file_path)[file_id], \"\\nAudio:\")\n", + "Audio(filename=flac_file_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "M8jJ7Ed81p_A" + }, + "source": [ + "Now, we will combine all the speech & text samples and will define the function (in next cell) for that purpose." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MI-5YCzaTsei" + }, + "outputs": [], + "source": [ + "def fetch_sound_text_mapping(data_dir):\n", + " all_files = os.listdir(data_dir)\n", + "\n", + " flac_files = [os.path.join(data_dir, f) for f in all_files if f.endswith(\".flac\")]\n", + " txt_files = [os.path.join(data_dir, f) for f in all_files if f.endswith(\".txt\")]\n", + "\n", + " txt_samples = {}\n", + " for f in txt_files:\n", + " txt_samples.update(read_txt_file(f))\n", + "\n", + " speech_samples = {}\n", + " for f in flac_files:\n", + " speech_samples.update(read_flac_file(f))\n", + "\n", + " assert len(txt_samples) == len(speech_samples)\n", + "\n", + " samples = [(speech_samples[file_id], txt_samples[file_id]) for file_id in speech_samples.keys() if len(speech_samples[file_id]) < AUDIO_MAXLEN]\n", + " return samples" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mx95Lxvu0nT4" + }, + "source": [ + "It's time to have a look at a few samples ..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_Ls7X_jqIz4R" + }, + "outputs": [], + "source": [ + "samples = fetch_sound_text_mapping(data_dir)\n", + "samples[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TUjhSWfsnlCL" + }, + "source": [ + "Note: We are loading this data into memory as we working with a small amount of dataset in this notebook. But for training on the complete dataset (~300 GBs), you will have to load data lazily. You can refer to [this script](https://github.com/vasudevgupta7/gsoc-wav2vec2/blob/main/src/data_utils.py) to know more on that." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xg8Zia1kzw0J" + }, + "source": [ + "Let's pre-process the data now !!!\n", + "\n", + "We will first define the tokenizer & processor using `gsoc-wav2vec2` package. Then, we will do very simple pre-processing. `processor` will normalize raw speech w.r.to frames axis and `tokenizer` will convert our model outputs into the string (using the defined vocabulary) & will take care of the removal of special tokens (depending on your tokenizer configuration)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gaat_hMLNVHF" + }, + "outputs": [], + "source": [ + "from wav2vec2 import Wav2Vec2Processor\n", + "tokenizer = Wav2Vec2Processor(is_tokenizer=True)\n", + "processor = Wav2Vec2Processor(is_tokenizer=False)\n", + "\n", + "def preprocess_text(text):\n", + " label = tokenizer(text)\n", + " return tf.constant(label, dtype=tf.int32)\n", + "\n", + "def preprocess_speech(audio):\n", + " audio = tf.constant(audio, dtype=tf.float32)\n", + " return processor(tf.transpose(audio))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GyKl8QP-zRFC" + }, + "source": [ + "Now, we will define the python generator to call the preprocessing functions we defined in above cells." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PoQrRalwMpQ6" + }, + "outputs": [], + "source": [ + "def inputs_generator():\n", + " for speech, text in samples:\n", + " yield preprocess_speech(speech), preprocess_text(text)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7Vlm3ySFULsG" + }, + "source": [ + "## Setting up `tf.data.Dataset`\n", + "\n", + "Following cell will setup `tf.data.Dataset` object using its `.from_generator(...)` method. We will be using the `generator` object, we defined in the above cell.\n", + "\n", + "**Note:** For distributed training (especially on TPUs), `.from_generator(...)` doesn't work currently and it is recommended to train on data stored in `.tfrecord` format (Note: The TFRecords should ideally be stored inside a GCS Bucket in order for the TPUs to work to the fullest extent).\n", + "\n", + "You can refer to [this script](https://github.com/vasudevgupta7/gsoc-wav2vec2/blob/main/src/make_tfrecords.py) for more details on how to convert LibriSpeech data into tfrecords." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LbQ_dMwGO62h" + }, + "outputs": [], + "source": [ + "output_signature = (\n", + " tf.TensorSpec(shape=(None), dtype=tf.float32),\n", + " tf.TensorSpec(shape=(None), dtype=tf.int32),\n", + ")\n", + "\n", + "dataset = tf.data.Dataset.from_generator(inputs_generator, output_signature=output_signature)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HXBbNsRyPyw3" + }, + "outputs": [], + "source": [ + "BUFFER_SIZE = len(flac_files)\n", + "SEED = 42\n", + "\n", + "dataset = dataset.shuffle(BUFFER_SIZE, seed=SEED)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9DAUmns3pXfr" + }, + "source": [ + "We will pass the dataset into multiple batches, so let's prepare batches in the following cell. Now, all the sequences in a batch should be padded to a constant length. We will use the`.padded_batch(...)` method for that purpose." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Okhko1IWRida" + }, + "outputs": [], + "source": [ + "dataset = dataset.padded_batch(BATCH_SIZE, padded_shapes=(AUDIO_MAXLEN, LABEL_MAXLEN), padding_values=(0.0, 0))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A45CjQG5qSbV" + }, + "source": [ + "Accelerators (like GPUs/TPUs) are very fast and often data-loading (& pre-processing) becomes the bottleneck during training as the data-loading part happens on CPUs. This can increase the training time significantly especially when there is a lot of online pre-processing involved or data is streamed online from GCS buckets. To handle those issues, `tf.data.Dataset` offers the `.prefetch(...)` method. This method helps in preparing the next few batches in parallel (on CPUs) while the model is making predictions (on GPUs/TPUs) on the current batch." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "f-bKu2YjRior" + }, + "outputs": [], + "source": [ + "dataset = dataset.prefetch(tf.data.AUTOTUNE)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Lqk2cs6LxVIh" + }, + "source": [ + "Since this notebook is made for demonstration purposes, we will be taking first `num_train_batches` and will perform training over only that. You are encouraged to train on the whole dataset though. Similarly, we will evaluate only `num_val_batches`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "z6GO5oYUxXtz" + }, + "outputs": [], + "source": [ + "num_train_batches = 10\n", + "num_val_batches = 4\n", + "\n", + "train_dataset = dataset.take(num_train_batches)\n", + "val_dataset = dataset.skip(num_train_batches).take(num_val_batches)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CzAOI78tky08" + }, + "source": [ + "## Model training\n", + "\n", + "For training our model, we will be directly calling `.fit(...)` method after compiling our model with `.compile(...)`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vuBY2sZElgwg" + }, + "outputs": [], + "source": [ + "model.compile(optimizer, loss=loss_fn)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qswxafSl0HjO" + }, + "source": [ + "The above cell will set up our training state. Now we can initiate training with the `.fit(...)` method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vtuSfnj1l-I_" + }, + "outputs": [], + "source": [ + "history = model.fit(train_dataset, validation_data=val_dataset, epochs=3)\n", + "history.history" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ySvp8r2E1q_V" + }, + "source": [ + "Let's save our model with `.save(...)` method to be able to perform inference later. You can also export this SavedModel to TFHub by following [TFHub documentation](https://www.tensorflow.org/hub/publish)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "C0KEYcwydwjF" + }, + "outputs": [], + "source": [ + "save_dir = \"finetuned-wav2vec2\"\n", + "model.save(save_dir, include_optimizer=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MkOpp9rZ211t" + }, + "source": [ + "Note: We are setting `include_optimizer=False` as we want to use this model for inference only." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SJfPlTgezD0i" + }, + "source": [ + "## Evaluation\n", + "\n", + "Now we will be computing Word Error Rate over the validation dataset\n", + "\n", + "**Word error rate** (WER) is a common metric for measuring the performance of an automatic speech recognition system. The WER is derived from the Levenshtein distance, working at the word level. Word error rate can then be computed as: WER = (S + D + I) / N = (S + D + I) / (S + D + C) where S is the number of substitutions, D is the number of deletions, I is the number of insertions, C is the number of correct words, N is the number of words in the reference (N=S+D+C). This value indicates the percentage of words that were incorrectly predicted. \n", + "\n", + "You can refer to [this paper](https://www.isca-speech.org/archive_v0/interspeech_2004/i04_2765.html) to learn more about WER." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Io_91Y7-r3xu" + }, + "source": [ + "We will use `load_metric(...)` function from [HuggingFace datasets](https://huggingface.co/docs/datasets/) library. Let's first install the `datasets` library using `pip` and then define the `metric` object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GW9F_oVDU1TZ" + }, + "outputs": [], + "source": [ + "!pip3 install -q datasets\n", + "\n", + "from datasets import load_metric\n", + "metric = load_metric(\"wer\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ssWXWc7CZvNB" + }, + "outputs": [], + "source": [ + "@tf.function(jit_compile=True)\n", + "def eval_fwd(batch):\n", + " logits = model(batch, training=False)\n", + " return tf.argmax(logits, axis=-1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NFh1myg1x4ua" + }, + "source": [ + "It's time to run the evaluation on validation data now." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EQTFVjZghckJ" + }, + "outputs": [], + "source": [ + "from tqdm.auto import tqdm\n", + "\n", + "for speech, labels in tqdm(val_dataset, total=num_val_batches):\n", + " predictions = eval_fwd(speech)\n", + " predictions = [tokenizer.decode(pred) for pred in predictions.numpy().tolist()]\n", + " references = [tokenizer.decode(label, group_tokens=False) for label in labels.numpy().tolist()]\n", + " metric.add_batch(references=references, predictions=predictions)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WWCc8qBesv3e" + }, + "source": [ + "We are using the `tokenizer.decode(...)` method for decoding our predictions and labels back into the text and will add them to the metric for `WER` computation later." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XI_URj8Wtb2g" + }, + "source": [ + "Now, let's calculate the metric value in following cell:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "a83wekLgWMod" + }, + "outputs": [], + "source": [ + "metric.compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "c_cD1OgVEjl4" + }, + "source": [ + "**Note:** Here metric value doesn't make any sense as the model is trained on very small data and ASR-like tasks often require a large amount of data to learn a mapping from speech to text. You should probably train on large data to get some good results. This notebook gives you a template to fine-tune a pre-trained speech model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G14o706kdTE1" + }, + "source": [ + "## Inference\n", + "\n", + "Now that we are satisfied with the training process & have saved the model in `save_dir`, we will see how this model can be used for inference.\n", + "\n", + "First, we will load our model using `tf.keras.models.load_model(...)`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wrTrExiUdaED" + }, + "outputs": [], + "source": [ + "finetuned_model = tf.keras.models.load_model(save_dir)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "luodSroz20SR" + }, + "source": [ + "Let's download some speech samples for performing inference. You can replace the following sample with your speech sample also." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HUE0shded6Ej" + }, + "outputs": [], + "source": [ + "!wget https://github.com/vasudevgupta7/gsoc-wav2vec2/raw/main/data/SA2.wav" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ycBjU_U53FjL" + }, + "source": [ + "Now, we will read the speech sample using `soundfile.read(...)` and pad it to `AUDIO_MAXLEN` to satisfy the model signature. Then we will normalize that speech sample using the `Wav2Vec2Processor` instance & will feed it into the model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "z7CARje4d5_H" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "speech, _ = sf.read(\"SA2.wav\")\n", + "speech = np.pad(speech, (0, AUDIO_MAXLEN - len(speech)))\n", + "speech = tf.expand_dims(processor(tf.constant(speech)), 0)\n", + "\n", + "outputs = finetuned_model(speech)\n", + "outputs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lUSttSPa30qP" + }, + "source": [ + "Let's decode numbers back into text sequence using the `Wav2Vec2tokenizer` instance, we defined above." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RYdJqxQ4llgI" + }, + "outputs": [], + "source": [ + "predictions = tf.argmax(outputs, axis=-1)\n", + "predictions = [tokenizer.decode(pred) for pred in predictions.numpy().tolist()]\n", + "predictions" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7DXC757bztJc" + }, + "source": [ + "This prediction is quite random as the model was never trained on large data in this notebook (as this notebook is not meant for doing complete training). You will get good predictions if you train this model on complete LibriSpeech dataset.\n", + "\n", + "Finally, we have reached an end to this notebook. But it's not an end of learning TensorFlow for speech-related tasks, this [repository](https://github.com/tulasiram58827/TTS_TFLite) contains some more amazing tutorials. In case you encountered any bug in this notebook, please create an issue [here](https://github.com/vasudevgupta7/gsoc-wav2vec2/issues)." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "rWk8nL6Ui-_0", + "wvuJL8-f0zn5", + "oPp18ZHRtnq-", + "1mvTuOXpwsQe", + "7Vlm3ySFULsG", + "CzAOI78tky08", + "SJfPlTgezD0i", + "G14o706kdTE1" + ], + "name": "wav2vec2_saved_model_finetuning.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/wiki40b_lm.ipynb b/site/en/hub/tutorials/wiki40b_lm.ipynb new file mode 100644 index 00000000000..ad94ce0aab8 --- /dev/null +++ b/site/en/hub/tutorials/wiki40b_lm.ipynb @@ -0,0 +1,451 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Oxb_tjw13y4G" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EAkh2aBJLg6q" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "owAopeOtirc9" + }, + "source": [ + "# Wiki40B Language Models\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "T-nCyGRri-KO" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8eY9jkGpjf3d" + }, + "source": [ + "Generate Wikipedia-like text using the **Wiki40B language models** from [TensorFlow Hub](https://tfhub.dev)!\n", + "\n", + "This notebook illustrates how to:\n", + "* Load the 41 monolingual and 2 multilingual language models that are part of the [Wiki40b-LM collection](https://tfhub.dev/google/collections/wiki40b-lm/1) on TF-Hub\n", + "* Use the models to obtain perplexity, per layer activations, and word embeddings for a given piece of text\n", + "* Generate text token-by-token from a piece of seed text\n", + "\n", + "The language models are trained on the newly published, cleaned-up [Wiki40B dataset](https://www.tensorflow.org/datasets/catalog/wiki40b) available on TensorFlow Datasets. The training setup is based on the paper [“Wiki-40B: Multilingual Language Model Dataset”](https://research.google/pubs/pub49029/)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wK2YnrEhLjDf" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "sv2CmI7BdaML" + }, + "outputs": [], + "source": [ + "#@title Installing Dependencies\n", + "!pip install --quiet \"tensorflow-text==2.11.*\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "8uSkaQ-Vdon2" + }, + "outputs": [], + "source": [ + "#@title Imports\n", + "import numpy as np\n", + "import tensorflow.compat.v1 as tf\n", + "import tensorflow_hub as hub\n", + "import tensorflow_text as tf_text\n", + "\n", + "tf.disable_eager_execution()\n", + "tf.logging.set_verbosity(tf.logging.WARN)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d2MvP-cyL-BN" + }, + "source": [ + "## Choose Language\n", + "\n", + "Let's choose **which language model** to load from TF-Hub and the **length of text** to be generated. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "33zYlSXwMA_o" + }, + "outputs": [], + "source": [ + "#@title { run: \"auto\" }\n", + "language = \"en\" #@param [\"en\", \"ar\", \"zh-cn\", \"zh-tw\", \"nl\", \"fr\", \"de\", \"it\", \"ja\", \"ko\", \"pl\", \"pt\", \"ru\", \"es\", \"th\", \"tr\", \"bg\", \"ca\", \"cs\", \"da\", \"el\", \"et\", \"fa\", \"fi\", \"he\", \"hi\", \"hr\", \"hu\", \"id\", \"lt\", \"lv\", \"ms\", \"no\", \"ro\", \"sk\", \"sl\", \"sr\", \"sv\", \"tl\", \"uk\", \"vi\", \"multilingual-64k\", \"multilingual-128k\"]\n", + "hub_module = \"https://tfhub.dev/google/wiki40b-lm-{}/1\".format(language)\n", + "max_gen_len = 20 #@param\n", + "\n", + "print(\"Using the {} model to generate sequences of max length {}.\".format(hub_module, max_gen_len))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dgw2qW4xZbMj" + }, + "source": [ + "## Build the Model\n", + "\n", + "Okay, now that we've configured which pre-trained model to use, let's configure it to generate text up to `max_gen_len`. We will need to load the language model from TF-Hub, feed in a piece of starter text, and then iteratively feed in tokens as they are generated." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "pUypKuc3Mlpa" + }, + "outputs": [], + "source": [ + "#@title Load the language model pieces\n", + "g = tf.Graph()\n", + "n_layer = 12\n", + "model_dim = 768\n", + "\n", + "with g.as_default():\n", + " text = tf.placeholder(dtype=tf.string, shape=(1,))\n", + "\n", + " # Load the pretrained model from TF-Hub\n", + " module = hub.Module(hub_module)\n", + "\n", + " # Get the word embeddings, activations at each layer, negative log likelihood\n", + " # of the text, and calculate the perplexity.\n", + " embeddings = module(dict(text=text), signature=\"word_embeddings\", as_dict=True)[\"word_embeddings\"]\n", + " activations = module(dict(text=text), signature=\"activations\", as_dict=True)[\"activations\"]\n", + " neg_log_likelihood = module(dict(text=text), signature=\"neg_log_likelihood\", as_dict=True)[\"neg_log_likelihood\"]\n", + " ppl = tf.exp(tf.reduce_mean(neg_log_likelihood, axis=1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "ZOS2Z2n0MsuC" + }, + "outputs": [], + "source": [ + "#@title Construct the per-token generation graph\n", + "def feedforward_step(module, inputs, mems):\n", + " \"\"\"Generate one step.\"\"\"\n", + " # Set up the input dict for one step of generation\n", + " inputs = tf.dtypes.cast(inputs, tf.int64)\n", + " generation_input_dict = dict(input_tokens=inputs)\n", + " mems_dict = {\"mem_{}\".format(i): mems[i] for i in range(n_layer)}\n", + " generation_input_dict.update(mems_dict)\n", + "\n", + " # Generate the tokens from the language model\n", + " generation_outputs = module(generation_input_dict, signature=\"prediction\", as_dict=True)\n", + "\n", + " # Get the probabilities and the inputs for the next steps\n", + " probs = generation_outputs[\"probs\"]\n", + " new_mems = [generation_outputs[\"new_mem_{}\".format(i)] for i in range(n_layer)]\n", + "\n", + " return probs, new_mems" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "S9ss6amQMyVY" + }, + "outputs": [], + "source": [ + "#@title Build the statically unrolled graph for `max_gen_len` tokens\n", + "with g.as_default():\n", + " # Tokenization with the sentencepiece model.\n", + " token_ids = module(dict(text=text), signature=\"tokenization\", as_dict=True)[\"token_ids\"]\n", + " inputs_np = token_ids\n", + " # Generate text by statically unrolling the computational graph\n", + " mems_np = [np.zeros([1, 0, model_dim], dtype=np.float32) for _ in range(n_layer)]\n", + "\n", + " # Generate up to `max_gen_len` tokens\n", + " sampled_ids = []\n", + " for step in range(max_gen_len):\n", + " probs, mems_np = feedforward_step(module, inputs_np, mems_np)\n", + " sampled_id = tf.random.categorical(tf.math.log(probs[0]), num_samples=1, dtype=tf.int32)\n", + " sampled_id = tf.squeeze(sampled_id)\n", + " sampled_ids.append(sampled_id)\n", + " inputs_np = tf.reshape(sampled_id, [1, 1])\n", + "\n", + " # Transform the ids into text\n", + " sampled_ids = tf.expand_dims(sampled_ids, axis=0)\n", + " generated_text = module(dict(token_ids=sampled_ids), signature=\"detokenization\", as_dict=True)[\"text\"]\n", + "\n", + " init_op = tf.group([tf.global_variables_initializer(), tf.tables_initializer()])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "K5SYcRrxM7vS" + }, + "source": [ + "## Generate some text\n", + "\n", + "Let's generate some text! We'll set a text `seed` to prompt the language model.\n", + "\n", + "You can use one of the **predefined** seeds or _optionally_ **enter your own**. This text will be used as seed for the language model to help prompt the language model for what to generate next.\n", + "\n", + "You can use the following special tokens precede special parts of the generated article. Use **`_START_ARTICLE_`** to indicate the beginning of the article, **`_START_SECTION_`** to indicate the beginning of a section, and **`_START_PARAGRAPH_`** to generate text in the article\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "GmZxv7bzMIcL" + }, + "outputs": [], + "source": [ + "#@title Predefined Seeds\n", + "lang_to_seed = {\"en\": \"\\n_START_ARTICLE_\\n1882 Prince Edward Island general election\\n_START_PARAGRAPH_\\nThe 1882 Prince Edward Island election was held on May 8, 1882 to elect members of the House of Assembly of the province of Prince Edward Island, Canada.\",\n", + " \"ar\": \"\\n_START_ARTICLE_\\nأوليفيا كوك\\n_START_SECTION_\\nنشأتها والتعلي \\n_START_PARAGRAPH_\\nولدت أوليفيا كوك في أولدهام في مانشستر الكبرى لأسرة تتكون من أب يعمل كظابط شرطة، وأمها تعمل كممثلة مبيعات. عندما كانت صغيرة بدأت تأخذ دروساً في الباليه الجمباز. وفي المدرسة شاركت في المسرحيات المدرسية، إضافةً إلى عملها في مسرح سندريلا . وفي سن الرابعة عشر عاماً، حصلت على وكيلة لها في مانشستر وهي وقعت عقداً مع وكالة الفنانين المبدعين في مانشستر،\",\n", + " \"zh-cn\": \"\\n_START_ARTICLE_\\n上尾事件\\n_START_SECTION_\\n日本国铁劳资关系恶化\\n_START_PARAGRAPH_\\n由于日本国铁财政恶化,管理层开始重整人手安排,令工会及员工感到受威胁。但日本国铁作为公营企业,其雇员均受公营企业等劳资关系法规管——该法第17条规定公营企业员工不得发动任何罢工行为。为了规避该法例\",\n", + " \"zh-tw\": \"\\n_START_ARTICLE_\\n乌森\\n_START_PARAGRAPH_\\n烏森(法語:Houssen,發音:[usən];德語:Hausen;阿爾薩斯語:Hüse)是法國上萊茵省的一個市鎮,位於該省北部,屬於科爾馬-里博維萊區(Colmar-Ribeauvillé)第二科爾馬縣(Colmar-2)。該市鎮總面積6.7平方公里,2009年時的人口為\",\n", + " \"nl\": \"\\n_START_ARTICLE_\\n1001 vrouwen uit de Nederlandse geschiedenis\\n_START_SECTION_\\nSelectie van vrouwen\\n_START_PARAGRAPH_\\nDe 'oudste' biografie in het boek is gewijd aan de beschermheilige\",\n", + " \"fr\": \"\\n_START_ARTICLE_\\nꝹ\\n_START_SECTION_\\nUtilisation\\n_START_PARAGRAPH_\\nLe d insulaire est utilisé comme lettre additionnelle dans l’édition de 1941 du recueil de chroniques galloises Brut y Tywysogion\",\n", + " \"de\": \"\\n_START_ARTICLE_\\nÜnal Demirkıran\\n_START_SECTION_\\nLaufbahn\\n_START_PARAGRAPH_\\nDemirkıran debütierte als junges Talent am 25. September 1999 im Auswärtsspiel des SSV Ulm 1846 bei Werder Bremen (2:2) in der Bundesliga, als er kurz\",\n", + " \"it\": \"\\n_START_ARTICLE_\\n28th Street (linea IRT Lexington Avenue)\\n_START_SECTION_\\nStoria\\n_START_PARAGRAPH_\\nLa stazione, i cui lavori di costruzione ebbero inizio nel 1900, venne aperta il 27 ottobre 1904, come\",\n", + " \"ja\": \"\\n_START_ARTICLE_\\nしのぶ・まさみshow'05 恋してラララ\\n_START_SECTION_\\n概要\\n_START_PARAGRAPH_\\n『上海ルーキーSHOW』の打ち切り後に放送された年末特番で、同番組MCの大竹しのぶと久本雅美が恋愛にまつわるテーマでトークや音楽企画を展開していた。基本は女\",\n", + " \"ko\": \"\\n_START_ARTICLE_\\n녹턴, Op. 9 (쇼팽)\\n_START_SECTION_\\n녹턴 3번 나장조\\n_START_PARAGRAPH_\\n쇼팽의 녹턴 3번은 세도막 형식인 (A-B-A)형식을 취하고 있다. 첫 부분은 알레그레토(Allegretto)의 빠르기가 지시되어 있으며 물 흐르듯이 부드럽게 전개되나\",\n", + " \"pl\": \"\\n_START_ARTICLE_\\nAK-176\\n_START_SECTION_\\nHistoria\\n_START_PARAGRAPH_\\nPod koniec lat 60 XX w. w ZSRR dostrzeżono potrzebę posiadania lekkiej armaty uniwersalnej średniego kalibru o stosunkowo dużej mocy ogniowej, która\",\n", + " \"pt\": \"\\n_START_ARTICLE_\\nÁcido ribonucleico\\n_START_SECTION_\\nIntermediário da transferência de informação\\n_START_PARAGRAPH_\\nEm 1957 Elliot Volkin e Lawrence Astrachan fizeram uma observação significativa. Eles descobriram que uma das mais marcantes mudanças\",\n", + " \"ru\": \"\\n_START_ARTICLE_\\nАрнольд, Ремо\\n_START_SECTION_\\nКлубная карьера\\n_START_PARAGRAPH_\\nАрнольд перешёл в академию «Люцерна» в 12 лет. С 2014 года выступал за вторую команду, где провёл пятнадцать встреч. С сезона 2015/2016 находится в составе основной команды. 27 сентября 2015 года дебютировал\",\n", + " \"es\": \"\\n_START_ARTICLE_\\n(200012) 2007 LK20\\n_START_SECTION_\\nDesignación y nombre\\n_START_PARAGRAPH_\\nDesignado provisionalmente como 2007 LK20.\\n_START_SECTION_\\nCaracterísticas orbitales\\n_START_PARAGRAPH_\\n2007 LK20\",\n", + " \"th\": \"\\n_START_ARTICLE_\\nการนัดหยุดเรียนเพื่อภูมิอากาศ\\n_START_SECTION_\\nเกรียตา ทืนแบร์ย\\n_START_PARAGRAPH_\\nวันที่ 20 สิงหาคม 2561 เกรียตา ทืนแบร์ย นักกิจกรรมภูมิอากาศชาวสวีเดน ซึ่งขณะนั้นศึกษาอยู่ในชั้นเกรด 9 (เทียบเท่ามัธยมศึกษาปีที่ 3) ตัดสินใจไม่เข้าเรียนจนกระทั่งการเลือกตั้งทั่วไปในประเทศสวีเดนปี\",\n", + " \"tr\": \"\\n_START_ARTICLE_\\nİsrail'in Muhafazakar Dostları\\n_START_SECTION_\\nFaaliyetleri\\n_START_PARAGRAPH_\\nGrubun 2005 stratejisi ile aşağıdaki faaliyet alanları tespit edilmiştir:_NEWLINE_İsrail'i destekleme\",\n", + " \"bg\": \"\\n_START_ARTICLE_\\nАвтомобил с повишена проходимост\\n_START_SECTION_\\nОсобености на конструкцията\\n_START_PARAGRAPH_\\nВ исторически план леки автомобили с висока проходимост се произвеждат и имат военно\",\n", + " \"ca\": \"\\n_START_ARTICLE_\\nAuchy-la-Montagne\\n_START_SECTION_\\nPoblació\\n_START_PARAGRAPH_\\nEl 2007 la població de fet d'Auchy-la-Montagne era de 469 persones. Hi havia 160 famílies de les quals 28\",\n", + " \"cs\": \"\\n_START_ARTICLE_\\nŘemeslo\\n_START_PARAGRAPH_\\nŘemeslo je určitý druh manuální dovednosti, provozovaný za účelem obživy, resp. vytváření zisku. Pro řemeslné práce je charakteristický vysoký podíl ruční práce, spojený s používáním specializovaných nástrojů a pomůcek. Řemeslné práce\",\n", + " \"da\": \"\\n_START_ARTICLE_\\nÖrenäs slot\\n_START_PARAGRAPH_\\nÖrenäs slot (svensk: Örenäs slott) er et slot nær Glumslöv i Landskrona stad tæt på Øresunds-kysten i Skåne i Sverige._NEWLINE_Örenäs ligger\",\n", + " \"el\": \"\\n_START_ARTICLE_\\nΆλβαρο Ρεκόμπα\\n_START_SECTION_\\nΒιογραφικά στοιχεία\\n_START_PARAGRAPH_\\nΟ Άλβαρο Ρεκόμπα γεννήθηκε στις 17 Μαρτίου 1976 στο Μοντεβίδεο της Ουρουγουάης από\",\n", + " \"et\": \"\\n_START_ARTICLE_\\nAus deutscher Geistesarbeit\\n_START_PARAGRAPH_\\nAus deutscher Geistesarbeit (alapealkiri Wochenblatt für wissenschaftliche und kulturelle Fragen der Gegenwart) oli ajakiri, mis 1924–1934 ilmus Tallinnas. Ajakirja andis 1932–1934\",\n", + " \"fa\": \"\\n_START_ARTICLE_\\nتفسیر بغوی\\n_START_PARAGRAPH_\\nایرانی حسین بن مسعود بغوی است. این کتاب خلاصه ای از تفسیر الکشف و البیان عن تفسیر القرآن ابواسحاق احمد ثعلبی می‌باشد. این کتاب در ۴ جلد موجود می‌باش\",\n", + " \"fi\": \"\\n_START_ARTICLE_\\nBovesin verilöyly\\n_START_SECTION_\\nVerilöyly\\n_START_PARAGRAPH_\\n19. syyskuuta 1943 partisaaniryhmä saapui Bovesiin tarkoituksenaan ostaa leipää kylästä. Kylässä sattui olemaan kaksi SS-miestä, jotka\",\n", + " \"he\": \"\\n_START_ARTICLE_\\nאוגדה 85\\n_START_SECTION_\\nהיסטוריה\\n_START_PARAGRAPH_\\nהאוגדה הוקמה בהתחלה כמשלט העמקים בשנות השבעים. בשנות השמונים הפכה להיות אוגדה מרחבית עם שתי\",\n", + " \"hi\": \"\\n_START_ARTICLE_\\nऑडी\\n_START_SECTION_\\nऑडी इंडिया\\n_START_PARAGRAPH_\\nऑडी इंडिया की स्थापना मार्च 2007 में फोक्सवैगन ग्रुप सेल्स इंडिया के एक विभाजन के रूप में की गई थी। दुनिया भर में 110\",\n", + " \"hr\": \"\\n_START_ARTICLE_\\nČimariko (jezična porodica)\\n_START_PARAGRAPH_\\nChimarikan.-porodica sjevernoameričkih indijanskih jezika koja prema Powersu obuhvaća jezike Indijanaca Chimariko (Chemaŕeko) sa rijeke Trinity i Chimalakwe\",\n", + " \"hu\": \"\\n_START_ARTICLE_\\nÁllami Politikai Igazgatóság\\n_START_PARAGRAPH_\\nAz Állami Politikai Igazgatóság (rövidítve: GPU, oroszul: Государственное политическое управление), majd később Egyesített Állami Politikai Igazgatóság Szovjet-Oroszország\",\n", + " \"id\": \"\\n_START_ARTICLE_\\n(257195) 2008 QY41\\n_START_SECTION_\\nPembentukan\\n_START_PARAGRAPH_\\nSeperti asteroid secara keseluruhan, asteroid ini terbentuk dari nebula matahari primordial sebagai pecahan planetisimal, sesuatu di\",\n", + " \"lt\": \"\\n_START_ARTICLE_\\nŠavijos–Uardigo regionas\\n_START_SECTION_\\nGeografija\\n_START_PARAGRAPH_\\nŠavijos-Uardigo regionas yra Atlanto vandenynu pakrantės lygumoje\",\n", + " \"lv\": \"\\n_START_ARTICLE_\\nApatīts\\n_START_SECTION_\\nĪpašības\\n_START_PARAGRAPH_\\nApatīta kopējā ķīmiskā formula ir Ca₁₀(PO₄)₆(OH,F,Cl)₂, ir trīs atšķirīgi apatīta veidi: apatīts: Ca₁₀(PO₄)₆(OH)₂, fluorapatīts Ca₁₀(PO₄)₆(F)₂ un hlorapatīts: Ca₁₀(PO₄)₆(Cl)₂. Pēc sastāva\",\n", + " \"ms\": \"\\n_START_ARTICLE_\\nEdward C. Prescott\\n_START_PARAGRAPH_\\nEdward Christian Prescott (lahir 26 Disember 1940) ialah seorang ahli ekonomi Amerika. Beliau menerima Hadiah Peringatan Nobel dalam Sains Ekonomi pada tahun 2004, berkongsi\",\n", + " \"no\": \"\\n_START_ARTICLE_\\nAl-Minya\\n_START_SECTION_\\nEtymologi\\n_START_PARAGRAPH_\\nDet er sprikende forklaringer på bynavnet. Det kan komme fra gammelegyptisk Men'at Khufu, i betydning byen hvor Khufu ble ammet, noe som knytter byen til farao Khufu (Keops), som\",\n", + " \"ro\": \"\\n_START_ARTICLE_\\nDealurile Cernăuțiului\\n_START_PARAGRAPH_\\nDealurile Cernăuțiului sunt un lanț deluros striat, care se întinde în partea centrală a interfluviului dintre Prut și Siret, în cadrul regiunii Cernăuți din\",\n", + " \"sk\": \"\\n_START_ARTICLE_\\n10. peruť RAAF\\n_START_PARAGRAPH_\\n10. peruť RAAF je námorná hliadkovacia peruť kráľovských austrálskych vzdušných síl (Royal Australian Air Force – RAAF) založená na základni Edinburgh v Južnej Austrálii ako súčasť 92\",\n", + " \"sl\": \"\\n_START_ARTICLE_\\n105 Artemida\\n_START_SECTION_\\nOdkritje\\n_START_PARAGRAPH_\\nAsteroid je 16. septembra 1868 odkril James Craig Watson (1838 – 1880). Poimenovan je po Artemidi, boginji Lune iz grške\",\n", + " \"sr\": \"\\n_START_ARTICLE_\\nЉанос Морелос 1. Сексион (Истапангахоја)\\n_START_SECTION_\\nСтановништво\\n_START_PARAGRAPH_\\nПрема подацима из 2010. године у насељу је живело 212\",\n", + " \"sv\": \"\\n_START_ARTICLE_\\nÖstra Torps landskommun\\n_START_SECTION_\\nAdministrativ historik\\n_START_PARAGRAPH_\\nKommunen bildades i Östra Torps socken i Vemmenhögs härad i Skåne när 1862 års kommunalförordningar trädde i kraft. _NEWLINE_Vid kommunreformen\",\n", + " \"tl\": \"\\n_START_ARTICLE_\\nBésame Mucho\\n_START_PARAGRAPH_\\nAng Bésame Mucho ay isang awit na nasa Kastila. Isinulat ito ng Mehikanang si Consuelo Velázquez noong 1940, bago sumapit ang kanyang ika-16 na\",\n", + " \"uk\": \"\\n_START_ARTICLE_\\nІслам та інші релігії\\n_START_PARAGRAPH_\\nПротягом багатовікової ісламської історії мусульманські правителі, ісламські вчені і звичайні мусульмани вступали у різні відносини з представниками інших релігій. Стиль цих\",\n", + " \"vi\": \"\\n_START_ARTICLE_\\nĐường tỉnh 316\\n_START_PARAGRAPH_\\nĐường tỉnh 316 hay tỉnh lộ 316, viết tắt ĐT316 hay TL316, là đường tỉnh ở các huyện Thanh Sơn, Thanh Thủy, Tam Nông tỉnh Phú Thọ ._NEWLINE_ĐT316 bắt đầu từ xã Tinh Nhuệ\",\n", + " \"multilingual-64k\": \"\\n_START_ARTICLE_\\n1882 Prince Edward Island general election\\n_START_PARAGRAPH_\\nThe 1882 Prince Edward Island election was held on May 8, 1882 to elect members of the House of Assembly of the province of Prince Edward Island, Canada.\",\n", + " \"multilingual-128k\": \"\\n_START_ARTICLE_\\n1882 Prince Edward Island general election\\n_START_PARAGRAPH_\\nThe 1882 Prince Edward Island election was held on May 8, 1882 to elect members of the House of Assembly of the province of Prince Edward Island, Canada.\"}\n", + "\n", + "seed = lang_to_seed[language]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "mZDGsSyUM_Mg" + }, + "outputs": [], + "source": [ + "#@title Enter your own seed (Optional).\n", + "user_seed = \"\" #@param { type: \"string\" }\n", + "if user_seed.strip():\n", + " seed = user_seed.strip()\n", + "\n", + "# The seed must start with \"_START_ARTICLE_\" or the generated text will be gibberish\n", + "START_ARTICLE = \"_START_ARTICLE_\"\n", + "if START_ARTICLE not in seed:\n", + " seed = \"\\n{}\\n{}\".format(START_ARTICLE, seed)\n", + "\n", + "print(\"Generating text from seed:\\n{}\".format(seed))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "5dMuShi3XuLd" + }, + "outputs": [], + "source": [ + "#@title Initialize session.\n", + "with tf.Session(graph=g).as_default() as session:\n", + " session.run(init_op)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "aS53xjmbbw0Z" + }, + "outputs": [], + "source": [ + "#@title Generate text\n", + "\n", + "with session.as_default():\n", + " results = session.run([embeddings, neg_log_likelihood, ppl, activations, token_ids, generated_text], feed_dict={text: [seed]})\n", + " embeddings_result, neg_log_likelihood_result, ppl_result, activations_result, token_ids_result, generated_text_result = results\n", + " generated_text_output = generated_text_result[0].decode('utf-8')\n", + "\n", + "print(generated_text_output)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tjQf3N1wdND0" + }, + "source": [ + "We can also look at the other outputs of the model - the perplexity, the token ids, the intermediate activations, and the embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pGfw3CQWNC_n" + }, + "outputs": [], + "source": [ + "ppl_result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FLlgJObFNEmj" + }, + "outputs": [], + "source": [ + "token_ids_result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5SaH36M-NGXc" + }, + "outputs": [], + "source": [ + "activations_result.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "k9Eb_DPfQdUu" + }, + "outputs": [], + "source": [ + "embeddings_result" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "wiki40b_lm.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/yamnet.ipynb b/site/en/hub/tutorials/yamnet.ipynb new file mode 100644 index 00000000000..e6c9fbca5a1 --- /dev/null +++ b/site/en/hub/tutorials/yamnet.ipynb @@ -0,0 +1,359 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "laa9tRjJ59bl" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "T4ZHtBpK6Dom" + }, + "outputs": [], + "source": [ + "#@title Copyright 2020 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hk5u_9KN1m-t" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x2ep-q7k_5R-" + }, + "source": [ + "# Sound classification with YAMNet\n", + "\n", + "YAMNet is a deep net that predicts 521 audio event [classes](https://github.com/tensorflow/models/blob/master/research/audioset/yamnet/yamnet_class_map.csv) from the [AudioSet-YouTube corpus](http://g.co/audioset) it was trained on. It employs the\n", + "[Mobilenet_v1](https://arxiv.org/pdf/1704.04861.pdf) depthwise-separable\n", + "convolution architecture." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Bteu7pfkpt_f" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "import numpy as np\n", + "import csv\n", + "\n", + "import matplotlib.pyplot as plt\n", + "from IPython.display import Audio\n", + "from scipy.io import wavfile" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YSVs3zRrrYmY" + }, + "source": [ + "Load the Model from TensorFlow Hub.\n", + "\n", + "Note: to read the documentation just follow the model's [url](https://tfhub.dev/google/yamnet/1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VX8Vzs6EpwMo" + }, + "outputs": [], + "source": [ + "# Load the model.\n", + "model = hub.load('https://tfhub.dev/google/yamnet/1')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lxWx6tOdtdBP" + }, + "source": [ + "The labels file will be loaded from the models assets and is present at `model.class_map_path()`.\n", + "You will load it on the `class_names` variable." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EHSToAW--o4U" + }, + "outputs": [], + "source": [ + "# Find the name of the class with the top score when mean-aggregated across frames.\n", + "def class_names_from_csv(class_map_csv_text):\n", + " \"\"\"Returns list of class names corresponding to score vector.\"\"\"\n", + " class_names = []\n", + " with tf.io.gfile.GFile(class_map_csv_text) as csvfile:\n", + " reader = csv.DictReader(csvfile)\n", + " for row in reader:\n", + " class_names.append(row['display_name'])\n", + "\n", + " return class_names\n", + "\n", + "class_map_path = model.class_map_path().numpy()\n", + "class_names = class_names_from_csv(class_map_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mSFjRwkZ59lU" + }, + "source": [ + "Add a method to verify and convert a loaded audio is on the proper sample_rate (16K), otherwise it would affect the model's results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LizGwWjc5w6A" + }, + "outputs": [], + "source": [ + "def ensure_sample_rate(original_sample_rate, waveform,\n", + " desired_sample_rate=16000):\n", + " \"\"\"Resample waveform if required.\"\"\"\n", + " if original_sample_rate != desired_sample_rate:\n", + " desired_length = int(round(float(len(waveform)) /\n", + " original_sample_rate * desired_sample_rate))\n", + " waveform = scipy.signal.resample(waveform, desired_length)\n", + " return desired_sample_rate, waveform" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AZEgCobA9bWl" + }, + "source": [ + "## Downloading and preparing the sound file\n", + "\n", + "Here you will download a wav file and listen to it.\n", + "If you have a file already available, just upload it to colab and use it instead.\n", + "\n", + "Note: The expected audio file should be a mono wav file at 16kHz sample rate." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WzZHvyTtsJrc" + }, + "outputs": [], + "source": [ + "!curl -O https://storage.googleapis.com/audioset/speech_whistling2.wav" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "D8LKmqvGzZzr" + }, + "outputs": [], + "source": [ + "!curl -O https://storage.googleapis.com/audioset/miaow_16k.wav" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Wo9KJb-5zuz1" + }, + "outputs": [], + "source": [ + "# wav_file_name = 'speech_whistling2.wav'\n", + "wav_file_name = 'miaow_16k.wav'\n", + "sample_rate, wav_data = wavfile.read(wav_file_name, 'rb')\n", + "sample_rate, wav_data = ensure_sample_rate(sample_rate, wav_data)\n", + "\n", + "# Show some basic information about the audio.\n", + "duration = len(wav_data)/sample_rate\n", + "print(f'Sample rate: {sample_rate} Hz')\n", + "print(f'Total duration: {duration:.2f}s')\n", + "print(f'Size of the input: {len(wav_data)}')\n", + "\n", + "# Listening to the wav file.\n", + "Audio(wav_data, rate=sample_rate)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P9I290COsMBm" + }, + "source": [ + "The `wav_data` needs to be normalized to values in `[-1.0, 1.0]` (as stated in the model's [documentation](https://tfhub.dev/google/yamnet/1))." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bKr78aCBsQo3" + }, + "outputs": [], + "source": [ + "waveform = wav_data / tf.int16.max" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e_Xwd4GPuMsB" + }, + "source": [ + "## Executing the Model\n", + "\n", + "Now the easy part: using the data already prepared, you just call the model and get the: scores, embedding and the spectrogram.\n", + "\n", + "The score is the main result you will use.\n", + "The spectrogram you will use to do some visualizations later." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BJGP6r-At_Jc" + }, + "outputs": [], + "source": [ + "# Run the model, check the output.\n", + "scores, embeddings, spectrogram = model(waveform)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Vmo7griQprDk" + }, + "outputs": [], + "source": [ + "scores_np = scores.numpy()\n", + "spectrogram_np = spectrogram.numpy()\n", + "infered_class = class_names[scores_np.mean(axis=0).argmax()]\n", + "print(f'The main sound is: {infered_class}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Uj2xLf-P_ndS" + }, + "source": [ + "## Visualization\n", + "\n", + "YAMNet also returns some additional information that we can use for visualization.\n", + "Let's take a look on the Waveform, spectrogram and the top classes inferred." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_QSTkmv7wr2M" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(10, 6))\n", + "\n", + "# Plot the waveform.\n", + "plt.subplot(3, 1, 1)\n", + "plt.plot(waveform)\n", + "plt.xlim([0, len(waveform)])\n", + "\n", + "# Plot the log-mel spectrogram (returned by the model).\n", + "plt.subplot(3, 1, 2)\n", + "plt.imshow(spectrogram_np.T, aspect='auto', interpolation='nearest', origin='lower')\n", + "\n", + "# Plot and label the model output scores for the top-scoring classes.\n", + "mean_scores = np.mean(scores, axis=0)\n", + "top_n = 10\n", + "top_class_indices = np.argsort(mean_scores)[::-1][:top_n]\n", + "plt.subplot(3, 1, 3)\n", + "plt.imshow(scores_np[:, top_class_indices].T, aspect='auto', interpolation='nearest', cmap='gray_r')\n", + "\n", + "# patch_padding = (PATCH_WINDOW_SECONDS / 2) / PATCH_HOP_SECONDS\n", + "# values from the model documentation\n", + "patch_padding = (0.025 / 2) / 0.01\n", + "plt.xlim([-patch_padding-0.5, scores.shape[0] + patch_padding-0.5])\n", + "# Label the top_N classes.\n", + "yticks = range(0, top_n, 1)\n", + "plt.yticks(yticks, [class_names[top_class_indices[x]] for x in yticks])\n", + "_ = plt.ylim(-0.5 + np.array([top_n, 0]))" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "yamnet.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/install/_index.yaml b/site/en/install/_index.yaml index 0946e24ef41..71bc660f81d 100644 --- a/site/en/install/_index.yaml +++ b/site/en/install/_index.yaml @@ -20,13 +20,14 @@ landing_page:
      -
    • Python 3.6–3.9
    • +
    • Python 3.9–3.12
    • Ubuntu 16.04 or later
    • Windows 7 or later (with C++ redistributable)
    • macOS 10.12.6 (Sierra) or later (no GPU support)
    • +
    • WSL2 via Windows 10 19044 or higher including GPUs (Experimental)
    @@ -40,7 +41,6 @@ landing_page:

    Install TensorFlow with Python's pip package manager.

    Official packages available for Ubuntu, Windows, and macOS.

    -

    See the GPU guide for CUDA®-enabled cards.

    buttons: - label: Read the pip install guide @@ -51,8 +51,10 @@ landing_page:
             # Requires the latest pip
             pip install --upgrade pip
    - # Current stable release for CPU and GPU + # Current stable release for CPU pip install tensorflow
    + # Current stable release for GPU (Linux / WSL2) + pip install tensorflow[and-cuda]
    # Or try the preview build (unstable) pip install tf-nightly
    @@ -66,8 +68,7 @@ landing_page: The TensorFlow Docker images are already configured to run TensorFlow. A Docker container runs in a - virtual environment and is the easiest way to set up GPU - support. + virtual environment and is the easiest way to set up GPU support.

             docker pull tensorflow/tensorflow:latest  # Download latest stable image
    diff --git a/site/en/install/_toc.yaml b/site/en/install/_toc.yaml index c8f60bde852..26cdb270bb8 100644 --- a/site/en/install/_toc.yaml +++ b/site/en/install/_toc.yaml @@ -7,8 +7,6 @@ toc: - title: Docker path: /install/docker - heading: Additional setup -- title: GPU support - path: /install/gpu - title: GPU device plugins path: /install/gpu_plugins - title: Problems diff --git a/site/en/install/docker.md b/site/en/install/docker.md index 30942924688..376ca0820a7 100644 --- a/site/en/install/docker.md +++ b/site/en/install/docker.md @@ -1,45 +1,43 @@ # Docker -[Docker](https://docs.docker.com/install/){:.external} uses *containers* to +[Docker](https://docs.docker.com/install/) uses *containers* to create virtual environments that isolate a TensorFlow installation from the rest of the system. TensorFlow programs are run *within* this virtual environment that can share resources with its host machine (access directories, use the GPU, connect to the Internet, etc.). The -[TensorFlow Docker images](https://hub.docker.com/r/tensorflow/tensorflow/){:.external} +[TensorFlow Docker images](https://hub.docker.com/r/tensorflow/tensorflow/) are tested for each release. -Docker is the easiest way to enable TensorFlow [GPU support](./gpu.md) on Linux since only the -[NVIDIA® GPU driver](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#how-do-i-install-the-nvidia-driver){:.external} +Docker is the easiest way to enable TensorFlow [GPU support](./pip.md) on Linux since only the +[NVIDIA® GPU driver](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#how-do-i-install-the-nvidia-driver) is required on the *host* machine (the *NVIDIA® CUDA® Toolkit* does not need to be installed). ## TensorFlow Docker requirements -1. [Install Docker](https://docs.docker.com/install/){:.external} on +1. [Install Docker](https://docs.docker.com/install/) on your local *host* machine. -2. For GPU support on Linux, [install NVIDIA Docker support](https://github.com/NVIDIA/nvidia-docker){:.external}. +2. For GPU support on Linux, [install NVIDIA Docker support](https://github.com/NVIDIA/nvidia-container-toolkit). * Take note of your Docker version with `docker -v`. Versions __earlier than__ 19.03 require nvidia-docker2 and the `--runtime=nvidia` flag. On versions __including and after__ 19.03, you will use the `nvidia-container-toolkit` package and the `--gpus all` flag. Both options are documented on the page linked above. Note: To run the `docker` command without `sudo`, create the `docker` group and add your user. For details, see the -[post-installation steps for Linux](https://docs.docker.com/install/linux/linux-postinstall/){:.external}. +[post-installation steps for Linux](https://docs.docker.com/install/linux/linux-postinstall/). ## Download a TensorFlow Docker image The official TensorFlow Docker images are located in the -[tensorflow/tensorflow](https://hub.docker.com/r/tensorflow/tensorflow/){:.external} -Docker Hub repository. Image releases [are tagged](https://hub.docker.com/r/tensorflow/tensorflow/tags/){:.external} +[tensorflow/tensorflow](https://hub.docker.com/r/tensorflow/tensorflow/) +Docker Hub repository. Image releases [are tagged](https://hub.docker.com/r/tensorflow/tensorflow/tags/) using the following format: | Tag | Description | |-------------|----------------------------------------------------------------------------------------------------------------------| | `latest` | The latest release of TensorFlow CPU binary image. Default. | | `nightly` | Nightly builds of the TensorFlow image. (Unstable.) | -| *`version`* | Specify the *version* of the TensorFlow binary image, for example\: *2.1.0* | -| `devel` | Nightly builds of a TensorFlow `master` development environment. Includes TensorFlow source code. | -| `custom-op` | Special experimental image for developing TF custom ops. More info [here](https://github.com/tensorflow/custom-op). | +| *`version`* | Specify the *version* of the TensorFlow binary image, for example\: *2.8.3* | Each base *tag* has variants that add or change functionality: @@ -66,7 +64,7 @@ To start a TensorFlow-configured container, use the following command form: docker run [-it] [--rm] [-p hostPort:containerPort] tensorflow/tensorflow[:tag] [command]
    -For details, see the [docker run reference](https://docs.docker.com/engine/reference/run/){:.external}. +For details, see the [docker run reference](https://docs.docker.com/engine/reference/run/). ### Examples using CPU-only images @@ -100,7 +98,7 @@ docker run -it --rm -v $PWD:/tmp -w /tmp tensorflow/tensorflow python ./script.p Permission issues can arise when files created within a container are exposed to the host. It's usually best to edit files on the host system. -Start a [Jupyter Notebook](https://jupyter.org/){:.external} server using +Start a [Jupyter Notebook](https://jupyter.org/) server using TensorFlow's nightly build:
    @@ -114,13 +112,13 @@ Follow the instructions and open the URL in your host web browser:
     ## GPU support
     
     Docker is the easiest way to run TensorFlow on a GPU since the *host* machine
    -only requires the [NVIDIA® driver](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#how-do-i-install-the-nvidia-driver){:.external}
    +only requires the [NVIDIA® driver](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#how-do-i-install-the-nvidia-driver)
     (the *NVIDIA® CUDA® Toolkit* is not required).
     
    -Install the [Nvidia Container Toolkit](https://github.com/NVIDIA/nvidia-docker/blob/master/README.md#quickstart){:.external} 
    +Install the [Nvidia Container Toolkit](https://github.com/NVIDIA/nvidia-docker/blob/master/README.md#quickstart) 
     to add NVIDIA® GPU support to Docker. `nvidia-container-runtime` is only
     available for Linux. See the `nvidia-container-runtime` 
    -[platform support FAQ](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#platform-support){:.external}
    +[platform support FAQ](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#platform-support)
     for details.
     
     Check if a GPU is available:
    @@ -132,7 +130,7 @@ lspci | grep -i nvidia
     Verify your `nvidia-docker` installation:
     
     
    -docker run --gpus all --rm nvidia/cuda nvidia-smi
    +docker run --rm --runtime=nvidia --gpus all ubuntu nvidia-smi
     
    Note: `nvidia-docker` v2 uses `--runtime=nvidia` instead of `--gpus all`. `nvidia-docker` v1 uses the `nvidia-docker` alias, diff --git a/site/en/install/errors.md b/site/en/install/errors.md index 0d52c00f898..938ba8b454f 100644 --- a/site/en/install/errors.md +++ b/site/en/install/errors.md @@ -1,8 +1,9 @@ # Build and install error messages -TensorFlow uses [GitHub issues](https://github.com/tensorflow/tensorflow/issues){:.external} -and [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow){:.external} -to track and document build and installation problems. +TensorFlow uses [GitHub issues](https://github.com/tensorflow/tensorflow/issues), +[Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow) and +[TensorFlow Forum](https://discuss.tensorflow.org/c/general-discussion/6) +to track, document, and discuss build and installation problems. The following list links error messages to a solution or discussion. If you find an installation or build problem that is not listed, please search the GitHub @@ -13,10 +14,10 @@ question on Stack Overflow with the `tensorflow` tag.
    GitHub issue or Stack Overflow Error Message
    38896424 31058"No matching distribution found for tensorflow": + "No matching distribution found for tensorflow": Pip can't find a TensorFlow package compatible with your system. Check the system requirements and - python version + Python version
    36371137 and - here36371137
    libprotobuf ERROR google/protobuf/src/google/protobuf/io/coded_stream.cc:207] A
       protocol message was rejected because it was too big (more than 67108864 bytes).
       To increase the limit (or to disable these warnings), see
    -  CodedInputStream::SetTotalBytesLimit() in google/protobuf/io/coded_stream.h.
    35252888
    33623453
    IOError: [Errno 2] No such file or directory:
    -  '/tmp/pip-o6Tpui-build/setup.py'
    + '/tmp/pip-o6Tpui-build/setup.py'
    42006320
    33623453
    IOError: [Errno 2] No such file or directory:
    -  '/tmp/pip-o6Tpui-build/setup.py'
    + '/tmp/pip-o6Tpui-build/setup.py'
    35190574
    33623453
    IOError: [Errno 2] No such file or directory:
    -  '/tmp/pip-o6Tpui-build/setup.py'
    + '/tmp/pip-o6Tpui-build/setup.py'
    35190574
    \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kFmEkitOFJSw" + }, + "source": [ + "TensorFlow provides a C API that can be used to build\n", + "[bindings for other languages](https://github.com/tensorflow/docs/tree/master/site/en/r1/guide/extend/bindings.md).\n", + "The API is defined in\n", + "c_api.h\n", + "and designed for simplicity and uniformity rather than convenience.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Vk--31hqIwSV" + }, + "source": [ + "## Nightly libtensorflow C packages\n", + "\n", + "libtensorflow packages are built nightly and uploaded to GCS for all supported\n", + "platforms. They are uploaded to the\n", + "[libtensorflow-nightly GCS bucket](https://storage.googleapis.com/libtensorflow-nightly)\n", + "and are indexed by operating system and date built. For MacOS and Linux shared\n", + "objects, there is a\n", + "[script](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/ci_build/builds/libtensorflow_nightly_symlink.sh)\n", + "that renames the `.so` files versioned to the current date copied into the\n", + "directory with the artifacts." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qowtdsijFMYZ" + }, + "source": [ + "## Supported Platforms\n", + "\n", + "TensorFlow for C is supported on the following systems:\n", + "\n", + "* Linux, 64-bit, x86\n", + "* macOS, Version 10.12.6 (Sierra) or higher\n", + "* Windows, 64-bit x86" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hnhAk8y-FSBN" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y50y01XUFVb2" + }, + "source": [ + "### Download and extract\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    TensorFlow C libraryURL
    Linux\n", + " \n", + "
    Linux CPU onlyhttps://storage.googleapis.com/tensorflow/versions/2.18.0/libtensorflow-cpu-linux-x86_64.tar.gz
    Linux GPU supporthttps://storage.googleapis.com/tensorflow/versions/2.18.0/libtensorflow-gpu-linux-x86_64.tar.gz
    macOS\n", + " \n", + "
    macOS CPU onlyhttps://storage.googleapis.com/tensorflow/versions/2.16.2/libtensorflow-cpu-darwin-x86_64.tar.gz
    macOS ARM64 CPU onlyhttps://storage.googleapis.com/tensorflow/versions/2.18.0/libtensorflow-cpu-darwin-arm64.tar.gz
    Windows\n", + " \n", + "
    Windows CPU onlyhttps://storage.googleapis.com/tensorflow/versions/2.18.1/libtensorflow-cpu-windows-x86_64.zip
    Windows GPU onlyhttps://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-windows-x86_64-2.10.0.zip

    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b4kWu6k0FaT9" + }, + "source": [ + "Extract the downloaded archive, which contains the header files to include in\n", + "your C program and the shared libraries to link against.\n", + "\n", + "On Linux and macOS, you may want to extract to `/usr/local/lib`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DrjVyjVJFcon" + }, + "outputs": [], + "source": [ + "%%bash\n", + "FILENAME=libtensorflow-cpu-linux-x86_64.tar.gz\n", + "wget -q --no-check-certificate https://storage.googleapis.com/tensorflow/versions/2.18.1/${FILENAME}\n", + "sudo tar -C /usr/local -xzf ${FILENAME}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fcBJDdojJDyk" + }, + "source": [ + "### Linker\n", + "\n", + "On Linux/macOS, if you extract the TensorFlow C library to a system directory,\n", + "such as `/usr/local`, configure the linker with `ldconfig`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "h0STAG82JDZs" + }, + "outputs": [], + "source": [ + "%%bash\n", + "sudo ldconfig /usr/local/lib" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ix4HdnNGH6aF" + }, + "source": [ + "If you extract the TensorFlow C library to a non-system directory, such as\n", + "`~/mydir`, then configure the linker environmental variables:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "E6E99eJzIJQs" + }, + "source": [ + "
    \n", + "
    \n", + "

    Linux

    \n", + "
    \n",
    +        "export LIBRARY_PATH=$LIBRARY_PATH:~/mydir/lib\n",
    +        "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:~/mydir/lib\n",
    +        "
    \n", + "
    \n", + "
    \n", + "

    macOS

    \n", + "
    \n",
    +        "export LIBRARY_PATH=$LIBRARY_PATH:~/mydir/lib\n",
    +        "export DYLD_LIBRARY_PATH=$DYLD_LIBRARY_PATH:~/mydir/lib\n",
    +        "
    \n", + "
    \n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qYVWjxqaJVPs" + }, + "source": [ + "## Build" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UoMUuMJrJXp8" + }, + "source": [ + "### Example program\n", + "\n", + "With the TensorFlow C library installed, create an example program with the\n", + "following source code (`hello_tf.c`):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "b5851f1b" + }, + "outputs": [], + "source": [ + "%%writefile hello_tf.c\n", + "#include \n", + "#include \n", + "\n", + "int main() {\n", + " printf(\"Hello from TensorFlow C library version %s\\n\", TF_Version());\n", + " return 0;\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "H1GFidbrIWzU" + }, + "source": [ + "### Compile\n", + "\n", + "Compile the example program to create an executable, then run:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Jph67SAjIX0M" + }, + "outputs": [], + "source": [ + "%%bash\n", + "gcc hello_tf.c -ltensorflow -o hello_tf\n", + "\n", + "./hello_tf" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0qtHXROoJwoz" + }, + "source": [ + "Success: The TensorFlow C library is configured.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YbqbjqOSJ0IL" + }, + "source": [ + "If the program doesn't build, make sure that `gcc` can access the TensorFlow C\n", + "library. If extracted to `/usr/local`, explicitly pass the library location to\n", + "the compiler:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CdPmM35VJ_77" + }, + "outputs": [], + "source": [ + "%%bash\n", + "gcc -I/usr/local/include -L/usr/local/lib hello_tf.c -ltensorflow -o hello_tf\n", + "\n", + "./hello_tf" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ea5fd208" + }, + "source": [ + "## Build from source\n", + "\n", + "TensorFlow is open source. Read\n", + "[the instructions](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/lib_package/README.md)\n", + "to build TensorFlow's C library from source code." + ] + } + ], + "metadata": { + "colab": { + "name": "lang_c.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/install/lang_c.md b/site/en/install/lang_c.md deleted file mode 100644 index 4b3e3d8ebe4..00000000000 --- a/site/en/install/lang_c.md +++ /dev/null @@ -1,144 +0,0 @@ -# Install TensorFlow for C - -TensorFlow provides a C API that can be used to build -[bindings for other languages](https://github.com/tensorflow/docs/tree/master/site/en/r1/guide/extend/bindings.md). -The API is defined in -c_api.h -and designed for simplicity and uniformity rather than convenience. - -## Nightly Libtensorflow C packages - -Libtensorflow packages are built nightly and uploaded to GCS for all supported -platforms. They are uploaded to the -[libtensorflow-nightly GCS bucket](https://storage.googleapis.com/libtensorflow-nightly) -and are indexed by operating system and date built. For MacOS and Linux shared -objects, we have a -[script](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/ci_build/builds/libtensorflow_nightly_symlink.sh) -that renames the .so files versioned to the current date copied into the -directory with the artifacts. - -## Supported Platforms - -TensorFlow for C is supported on the following systems: - -* Linux, 64-bit, x86 -* macOS, Version 10.12.6 (Sierra) or higher -* Windows, 64-bit x86 - -## Setup - -### Download - - - - - - - - - - - - - - - - - - - - - - - - - - -
    TensorFlow C libraryURL
    Linux
    Linux CPU onlyhttps://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-linux-x86_64-2.6.0.tar.gz
    Linux GPU supporthttps://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-linux-x86_64-2.6.0.tar.gz
    macOS
    macOS CPU onlyhttps://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-darwin-x86_64-2.6.0.tar.gz
    Windows
    Windows CPU onlyhttps://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-windows-x86_64-2.6.0.zip
    Windows GPU onlyhttps://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-windows-x86_64-2.6.0.zip
    - -### Extract - -Extract the downloaded archive, which contains the header files to include in -your C program and the shared libraries to link against. - -On Linux and macOS, you may want to extract to `/usr/local/lib`: - -
    -sudo tar -C /usr/local -xzf (downloaded file)
    -
    - -### Linker - -On Linux/macOS, if you extract the TensorFlow C library to a system directory, -such as `/usr/local`, configure the linker with `ldconfig`: - -
    -sudo ldconfig
    -
    - -If you extract the TensorFlow C library to a non-system directory, such as -`~/mydir`, then configure the linker environmental variables: - -
    -
    -

    Linux

    -
    -export LIBRARY_PATH=$LIBRARY_PATH:~/mydir/lib
    -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:~/mydir/lib
    -
    -
    -
    -

    macOS

    -
    -export LIBRARY_PATH=$LIBRARY_PATH:~/mydir/lib
    -export DYLD_LIBRARY_PATH=$DYLD_LIBRARY_PATH:~/mydir/lib
    -
    -
    -
    - - -## Build - -### Example program - -With the TensorFlow C library installed, create an example program with the -following source code (`hello_tf.c`): - -```c -#include -#include - -int main() { - printf("Hello from TensorFlow C library version %s\n", TF_Version()); - return 0; -} -``` - -### Compile - -Compile the example program to create an executable, then run: - -
    -gcc hello_tf.c -ltensorflow -o hello_tf
    -
    -./hello_tf
    -
    - -The command outputs: Hello from TensorFlow C library version number - -Success: The TensorFlow C library is configured. - -If the program doesn't build, make sure that `gcc` can access the TensorFlow C -library. If extracted to `/usr/local`, explicitly pass the library location to -the compiler: - -
    -gcc -I/usr/local/include -L/usr/local/lib hello_tf.c -ltensorflow -o hello_tf
    -
    - - -## Build from source - -TensorFlow is open source. Read -[the instructions](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/lib_package/README.md){:.external} -to build TensorFlow's C library from source code. diff --git a/site/en/install/lang_java_legacy.md b/site/en/install/lang_java_legacy.md index af177dc0950..37341c36659 100644 --- a/site/en/install/lang_java_legacy.md +++ b/site/en/install/lang_java_legacy.md @@ -1,7 +1,7 @@ # Install TensorFlow for Java Warning: TensorFlow for Java is deprecated and will be removed in a future -version of TensorFlow once the replacement is stable. +version of TensorFlow once [the replacement](https://www.tensorflow.org/jvm) is stable. TensorFlow provides a [Java API](https://www.tensorflow.org/api_docs/java/reference/org/tensorflow/package-summary)— @@ -27,7 +27,7 @@ To use TensorFlow on Android see [TensorFlow Lite](https://tensorflow.org/lite) ## TensorFlow with Apache Maven -To use TensorFlow with [Apache Maven](https://maven.apache.org){:.external}, +To use TensorFlow with [Apache Maven](https://maven.apache.org), add the dependency to the project's `pom.xml` file: ```xml @@ -40,7 +40,7 @@ add the dependency to the project's `pom.xml` file: ### GPU support -If your system has [GPU support](./gpu.md), add the following TensorFlow +If your system has [GPU support](./pip.md), add the following TensorFlow dependencies to the project's `pom.xml` file: ```xml @@ -167,11 +167,11 @@ system and processor support: Note: On Windows, the native library (`tensorflow_jni.dll`) requires `msvcp140.dll` at runtime. See the [Windows build from source](./source_windows.md) guide to install the -[Visual C++ 2019 Redistributable](https://visualstudio.microsoft.com/vs/){:.external}. +[Visual C++ 2019 Redistributable](https://visualstudio.microsoft.com/vs/). ### Compile -Using the `HelloTensorFlow.java` file from the [previous example](#example), +Using the `HelloTensorFlow.java` file from the [previous example](#example-program), compile a program that uses TensorFlow. Make sure the `libtensorflow.jar` is accessible to your `classpath`: @@ -203,5 +203,5 @@ Success: TensorFlow for Java is configured. ## Build from source TensorFlow is open source. Read -[the instructions](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/java/README.md){:.external} +[the instructions](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/java/README.md) to build TensorFlow's Java and native libraries from source code. diff --git a/site/en/install/pip.html b/site/en/install/pip.html deleted file mode 100644 index 3bd415aad9c..00000000000 --- a/site/en/install/pip.html +++ /dev/null @@ -1,350 +0,0 @@ - - - Install TensorFlow with pip - - - - - - -

    TensorFlow 2 packages are available

    -
      -
    • tensorflow —Latest stable release with CPU and GPU support (Ubuntu and Windows)
    • -
    • tf-nightly —Preview build (unstable). Ubuntu and Windows include GPU support.
    • -
    - - -

    Older versions of TensorFlow

    - -

    For TensorFlow 1.x, CPU and GPU packages are separate:

    - -
      -
    • tensorflow==1.15 —Release for CPU-only
    • -
    • tensorflow-gpu==1.15 —Release with GPU support (Ubuntu and Windows)
    • -
    - - -

    System requirements

    -
      -
    • Python 3.6–3.9 -
        -
      • Python 3.9 support requires TensorFlow 2.5 or later.
      • -
      • Python 3.8 support requires TensorFlow 2.2 or later.
      • -
      -
    • -
    • pip 19.0 or later (requires manylinux2010 support)
    • -
    • Ubuntu 16.04 or later (64-bit)
    • -
    • macOS 10.12.6 (Sierra) or later (64-bit) (no GPU support) -
        -
      • macOS requires pip 20.3 or later
      • -
      -
    • -
    • Windows 7 or later (64-bit) - -
    • -
    • GPU support requires a CUDA®-enabled card (Ubuntu and Windows)
    • -
    - - - -

    Hardware requirements

    -
      -
    • Starting with TensorFlow 1.6, binaries use AVX instructions which may not run on older CPUs.
    • -
    • Read the GPU support guide to set up a CUDA®-enabled GPU card on Ubuntu or Windows.
    • -
    - - -

    1. Install the Python development environment on your system

    - -

    - Check if your Python environment is already configured: -

    - - - -
    -python3 --version
    -pip3 --version
    -
    - -

    - If these packages are already installed, skip to the next step.
    - Otherwise, install Python, the - pip package manager, - and venv: -

    - -
    -
    -

    Ubuntu

    -
    -sudo apt update
    -sudo apt install python3-dev python3-pip python3-venv
    -
    -
    - -
    -

    macOS

    -

    Install using the Homebrew package manager:

    -
    -/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
    -export PATH="/usr/local/opt/python/libexec/bin:$PATH"
    -# if you are on macOS 10.12 (Sierra) use `export PATH="/usr/local/bin:/usr/local/sbin:$PATH"`
    -brew update
    -brew install python  # Python 3
    -
    -
    - -
    -

    Windows

    -

    - Install the Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017, - and 2019. Starting with the TensorFlow 2.1.0 version, the msvcp140_1.dll - file is required from this package (which may not be provided from older redistributable packages). - The redistributable comes with Visual Studio 2019 but can be installed separately: -

    -
      -
    1. Go to the Microsoft Visual C++ downloads,
    2. -
    3. Scroll down the page to the Visual Studio 2015, 2017 and 2019 section.
    4. -
    5. Download and install the Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019 for your platform.
    6. -
    -

    Make sure long paths are enabled on Windows.

    -

    Install the 64-bit Python 3 release for Windows (select pip as an optional feature).

    -
    - -
    -

    Other

    -
    -curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
    -python get-pip.py
    -
    -
    -
    - - - - -

    2. Create a virtual environment (recommended)

    - -

    - Python virtual environments are used to isolate package installation from the system. -

    - -
    -
    -

    Ubuntu / macOS

    -

    - Create a new virtual environment by choosing a Python interpreter and making a - ./venv directory to hold it: -

    -
    python3 -m venv --system-site-packages ./venv
    -

    - Activate the virtual environment using a shell-specific command: -

    -
    source ./venv/bin/activate  # sh, bash, or zsh
    -
    . ./venv/bin/activate.fish  # fish
    -
    source ./venv/bin/activate.csh  # csh or tcsh
    - -

    - When the virtual environment is active, your shell prompt is prefixed with (venv). -

    -

    - Install packages within a virtual environment without affecting the host system - setup. Start by upgrading pip: -

    -
    -pip install --upgrade pip
    -
    -pip list  # show packages installed within the virtual environment
    -
    -

    - And to exit the virtual environment later: -

    -
    deactivate  # don't exit until you're done using TensorFlow
    -
    - - -
    -

    Windows

    -

    - Create a new virtual environment by choosing a Python interpreter and making a - .\venv directory to hold it: -

    -
    python -m venv --system-site-packages .\venv
    -

    - Activate the virtual environment: -

    -
    .\venv\Scripts\activate
    -

    - Install packages within a virtual environment without affecting the host system - setup. Start by upgrading pip: -

    -
    -pip install --upgrade pip
    -
    -pip list  # show packages installed within the virtual environment
    -
    -

    - And to exit the virtual environment later: -

    -
    deactivate  # don't exit until you're done using TensorFlow
    -
    - - -
    -

    Conda

    -

    -While the TensorFlow provided pip package is recommended, a -community-supported Anaconda package -is available. To install, read the Anaconda TensorFlow guide. -

    -
    -
    - - -

    3. Install the TensorFlow pip package

    - -

    - Choose one of the following TensorFlow packages to install from PyPI: -

    - -
      -
    • tensorflow —Latest stable release with CPU and GPU support (Ubuntu and Windows).
    • -
    • tf-nightly —Preview build (unstable). Ubuntu and Windows include GPU support.
    • -
    • tensorflow==1.15 —The final version of TensorFlow 1.x.
    • -
    - - - -
    -
    -

    Virtual environment install

    -
    pip install --upgrade tensorflow
    -

    Verify the install:

    -
    python -c "import tensorflow as tf;print(tf.reduce_sum(tf.random.normal([1000, 1000])))"
    -
    - -
    -

    System install

    -
    pip3 install --user --upgrade tensorflow  # install in $HOME
    -

    Verify the install:

    -
    python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))"
    -
    -
    - - - -

    Package location

    - -

    - A few installation mechanisms require the URL of the TensorFlow Python package. - The value you specify depends on your Python version. -

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    VersionURL
    Linux
    Python 3.6 GPU supporthttps://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-2.6.0-cp36-cp36m-manylinux2010_x86_64.whl
    Python 3.6 CPU-onlyhttps://storage.googleapis.com/tensorflow/linux/cpu/tensorflow_cpu-2.6.0-cp36-cp36m-manylinux2010_x86_64.whl
    Python 3.7 GPU supporthttps://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-2.6.0-cp37-cp37m-manylinux2010_x86_64.whl
    Python 3.7 CPU-onlyhttps://storage.googleapis.com/tensorflow/linux/cpu/tensorflow_cpu-2.6.0-cp37-cp37m-manylinux2010_x86_64.whl
    Python 3.8 GPU supporthttps://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-2.6.0-cp38-cp38-manylinux2010_x86_64.whl
    Python 3.8 CPU-onlyhttps://storage.googleapis.com/tensorflow/linux/cpu/tensorflow_cpu-2.6.0-cp38-cp38-manylinux2010_x86_64.whl
    Python 3.9 GPU supporthttps://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-2.6.0-cp39-cp39-manylinux2010_x86_64.whl
    Python 3.9 CPU-onlyhttps://storage.googleapis.com/tensorflow/linux/cpu/tensorflow_cpu-2.6.0-cp39-cp39-manylinux2010_x86_64.whl
    macOS (CPU-only)
    Python 3.6https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-2.6.0-cp36-cp36m-macosx_10_11_x86_64.whl
    Python 3.7https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-2.6.0-cp37-cp37m-macosx_10_11_x86_64.whl
    Python 3.8https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-2.6.0-cp38-cp38-macosx_10_11_x86_64.whl
    Python 3.9https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-2.6.0-cp39-cp39-macosx_10_11_x86_64.whl
    Windows
    Python 3.6 GPU supporthttps://storage.googleapis.com/tensorflow/windows/gpu/tensorflow_gpu-2.6.0-cp36-cp36m-win_amd64.whl
    Python 3.6 CPU-onlyhttps://storage.googleapis.com/tensorflow/windows/cpu/tensorflow_cpu-2.6.0-cp36-cp36m-win_amd64.whl
    Python 3.7 GPU supporthttps://storage.googleapis.com/tensorflow/windows/gpu/tensorflow_gpu-2.6.0-cp37-cp37m-win_amd64.whl
    Python 3.7 CPU-onlyhttps://storage.googleapis.com/tensorflow/windows/cpu/tensorflow_cpu-2.6.0-cp37-cp37m-win_amd64.whl
    Python 3.8 GPU supporthttps://storage.googleapis.com/tensorflow/windows/gpu/tensorflow_gpu-2.6.0-cp38-cp38-win_amd64.whl
    Python 3.8 CPU-onlyhttps://storage.googleapis.com/tensorflow/windows/cpu/tensorflow_cpu-2.6.0-cp38-cp38-win_amd64.whl
    Python 3.9 GPU supporthttps://storage.googleapis.com/tensorflow/windows/gpu/tensorflow_gpu-2.6.0-cp39-cp39-win_amd64.whl
    Python 3.9 CPU-onlyhttps://storage.googleapis.com/tensorflow/windows/cpu/tensorflow_cpu-2.6.0-cp39-cp39-win_amd64.whl
    - - - diff --git a/site/en/install/pip.md b/site/en/install/pip.md new file mode 100644 index 00000000000..a9e4bf4bf74 --- /dev/null +++ b/site/en/install/pip.md @@ -0,0 +1,658 @@ + +# Install TensorFlow with pip + + +This guide is for the latest stable version of TensorFlow. For the +preview build *(nightly)*, use the pip package named +`tf-nightly`. Refer to [these tables](./source#tested_build_configurations) for +older TensorFlow version requirements. For the CPU-only build, use the pip +package named `tensorflow-cpu`. + +Here are the quick versions of the install commands. Scroll down for the +step-by-step instructions. + +* {Linux} + + Note: Starting with TensorFlow `2.10`, Linux CPU-builds for Aarch64/ARM64 + processors are built, maintained, tested and released by a third party: + [AWS](https://aws.amazon.com/). + Installing the [`tensorflow`](https://pypi.org/project/tensorflow/) + package on an ARM machine installs AWS's + [`tensorflow-cpu-aws`](https://pypi.org/project/tensorflow-cpu-aws/) package. + They are provided as-is. Tensorflow will use reasonable efforts to maintain + the availability and integrity of this pip package. There may be delays if + the third party fails to release the pip package. See + [this blog post](https://blog.tensorflow.org/2022/09/announcing-tensorflow-official-build-collaborators.html) + for more information about this collaboration. + + ```bash + python3 -m pip install 'tensorflow[and-cuda]' + # Verify the installation: + python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" + ``` + +* {MacOS} + + ```bash + # There is currently no official GPU support for MacOS. + python3 -m pip install tensorflow + # Verify the installation: + python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" + ``` + +* {Windows Native} + + Caution: TensorFlow `2.10` was the **last** TensorFlow release that + supported GPU on native-Windows. + Starting with TensorFlow `2.11`, you will need to install + [TensorFlow in WSL2](https://tensorflow.org/install/pip#windows-wsl2), + or install `tensorflow` or `tensorflow-cpu` and, optionally, try the + [TensorFlow-DirectML-Plugin](https://github.com/microsoft/tensorflow-directml-plugin#tensorflow-directml-plugin-) + + ```bash + conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 + # Anything above 2.10 is not supported on the GPU on Windows Native + python -m pip install "tensorflow<2.11" + # Verify the installation: + python -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" + ``` + +* {Windows WSL2} + + Note: TensorFlow with GPU access is supported for WSL2 on Windows 10 19044 or + higher. This corresponds to Windows 10 version 21H2, the November 2021 + update. You can get the latest update from here: + [Download Windows 10](https://www.microsoft.com/software-download/windows10). + For instructions, see + [Install WSL2](https://docs.microsoft.com/windows/wsl/install) + and + [NVIDIA’s setup docs](https://docs.nvidia.com/cuda/wsl-user-guide/index.html) + for CUDA in WSL. + + ```bash + python3 -m pip install tensorflow[and-cuda] + # Verify the installation: + python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" + ``` + +* {CPU} + + Note: Starting with TensorFlow `2.10`, Windows CPU-builds for x86/x64 + processors are built, maintained, tested and released by a third party: + [Intel](https://www.intel.com/). + Installing the Windows-native [`tensorflow`](https://pypi.org/project/tensorflow/) + or [`tensorflow-cpu`](https://pypi.org/project/tensorflow-cpu/) + package installs Intel's + [`tensorflow-intel`](https://pypi.org/project/tensorflow-intel/) + package. These packages are provided as-is. Tensorflow will use reasonable + efforts to maintain the availability and integrity of this pip package. + There may be delays if the third party fails to release the pip package. See + [this blog post](https://blog.tensorflow.org/2022/09/announcing-tensorflow-official-build-collaborators.html) + for more information about this + collaboration. + + ```bash + python3 -m pip install tensorflow + # Verify the installation: + python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" + ``` + +* {Nightly} + + ```bash + python3 -m pip install tf-nightly + # Verify the installation: + python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" + ``` + +## Hardware requirements + +Note: TensorFlow binaries use +[AVX instructions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX) +which may not run on older CPUs. + +The following GPU-enabled devices are supported: + +* NVIDIA® GPU card with CUDA® architectures 3.5, 5.0, 6.0, 7.0, 7.5, 8.0 and + higher. See the list of + [CUDA®-enabled GPU cards](https://developer.nvidia.com/cuda-gpus). +* For GPUs with unsupported CUDA® architectures, or to avoid JIT compilation + from PTX, or to use different versions of the NVIDIA® libraries, see the + [Linux build from source](./source.md) guide. +* Packages do not contain PTX code except for the latest supported CUDA® + architecture; therefore, TensorFlow fails to load on older GPUs when + `CUDA_FORCE_PTX_JIT=1` is set. (See + [Application Compatibility](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#application-compatibility) + for details.) + +Note: The error message "Status: device kernel image is invalid" indicates that +the TensorFlow package does not contain PTX for your architecture. You can +enable compute capabilities by [building TensorFlow from source](./source.md). + +## System requirements + +* Ubuntu 16.04 or higher (64-bit) +* macOS 12.0 (Monterey) or higher (64-bit) *(no GPU support)* +* Windows Native - Windows 7 or higher (64-bit) *(no GPU support after TF 2.10)* +* Windows WSL2 - Windows 10 19044 or higher (64-bit) + +Note: GPU support is available for Ubuntu and Windows with CUDA®-enabled cards. + +## Software requirements + +* Python 3.9–3.12 +* pip version 19.0 or higher for Linux (requires `manylinux2014` support) and + Windows. pip version 20.3 or higher for macOS. +* Windows Native Requires + [Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019](https://learn.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist) + + +The following NVIDIA® software are only required for GPU support. + +* [NVIDIA® GPU drivers](https://www.nvidia.com/drivers) + * >= 525.60.13 for Linux + * >= 528.33 for WSL on Windows +* [CUDA® Toolkit 12.3](https://developer.nvidia.com/cuda-toolkit-archive). +* [cuDNN SDK 8.9.7](https://developer.nvidia.com/cudnn). +* *(Optional)* + [TensorRT](https://docs.nvidia.com/deeplearning/tensorrt/archives/index.html#trt_7) + to improve latency and throughput for inference. + +## Step-by-step instructions + +* {Linux} + + ### 1. System requirements + + * Ubuntu 16.04 or higher (64-bit) + + TensorFlow only officially supports Ubuntu. However, the following + instructions may also work for other Linux distros. + + Note: Starting with TensorFlow `2.10`, Linux CPU-builds for Aarch64/ARM64 + processors are built, maintained, tested and released by a third party: + [AWS](https://aws.amazon.com/). + Installing the [`tensorflow`](https://pypi.org/project/tensorflow/) + package on an ARM machine installs AWS's + [`tensorflow-cpu-aws`](https://pypi.org/project/tensorflow-cpu-aws/) package. + They are provided as-is. Tensorflow will use reasonable efforts to maintain + the availability and integrity of this pip package. There may be delays if + the third party fails to release the pip package. See + [this blog post](https://blog.tensorflow.org/2022/09/announcing-tensorflow-official-build-collaborators.html) + for more information about this collaboration. + + ### 2. GPU setup + + You can skip this section if you only run TensorFlow on the CPU. + + Install the + [NVIDIA GPU driver](https://www.nvidia.com/Download/index.aspx) + if you have not. You can use the following command to verify it is + installed. + + ```bash + nvidia-smi + ``` + + ### 3. Create a virtual environment with [venv](https://docs.python.org/3/library/venv.html){:.external} + + The venv module is part of Python’s standard library and is the officially recommended way to create virtual environments. + + Navigate to your desired virtual environments directory and create a new venv environment named `tf` with the following command. + + ```bash + python3 -m venv tf + ``` + + You can activate it with the following command. + + ```bash + source tf/bin/activate + ``` + + Make sure that the virtual environment is activated for the rest of the installation. + + ### 4. Install TensorFlow + + TensorFlow requires a recent version of pip, so upgrade your pip + installation to be sure you're running the latest version. + + ```bash + pip install --upgrade pip + ``` + + Then, install TensorFlow with pip. + + ```bash + # For GPU users + pip install tensorflow[and-cuda] + # For CPU users + pip install tensorflow + ``` + + **Note:** Do not install TensorFlow with `conda`. It may not have the latest stable version. `pip` is recommended since TensorFlow is only officially released to PyPI. + + ### 6. Verify the installation + + Verify the CPU setup: + + ```bash + python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" + ``` + + If a tensor is returned, you've installed TensorFlow successfully. + + Verify the GPU setup: + + ```bash + python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" + ``` + + If a list of GPU devices is returned, you've installed TensorFlow + successfully. **If not continue to the next step**. + + ### 6. [GPU only] Virtual environment configuration + + If the GPU test in the last section was unsuccessful, the most likely cause is that components aren't being detected, + and/or conflict with the existing system CUDA installation. So you need to add some symbolic links to fix this. + + * Create symbolic links to NVIDIA shared libraries: + + ```bash + pushd $(dirname $(python -c 'print(__import__("tensorflow").__file__)')) + ln -svf ../nvidia/*/lib/*.so* . + popd + ``` + + * Create a symbolic link to ptxas: + + ```bash + ln -sf $(find $(dirname $(dirname $(python -c "import nvidia.cuda_nvcc; + print(nvidia.cuda_nvcc.__file__)"))/*/bin/) -name ptxas -print -quit) $VIRTUAL_ENV/bin/ptxas + ``` + + Verify the GPU setup: + + ```bash + python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" + ``` + + + + +* {MacOS} + + ### 1. System requirements + + * macOS 10.12.6 (Sierra) or higher (64-bit) + + Note: While TensorFlow supports Apple Silicon (M1), packages that include + custom C++ extensions for TensorFlow also need to be compiled for Apple M1. + Some packages, like + [tensorflow_decision_forests](https://www.tensorflow.org/decision_forests) + publish M1-compatible versions, but many packages don't. To use those + libraries, you will have to use TensorFlow with x86 emulation and Rosetta. + + Currently there is no official GPU support for running TensorFlow on + MacOS. The following instructions are for running on CPU. + + ### 2. Check Python version + + Check if your Python environment is already configured: + + Note: Requires Python 3.9–3.11, and pip >= 20.3 for MacOS. + + ```bash + python3 --version + python3 -m pip --version + ``` + + ### 3. Install TensorFlow + + TensorFlow requires a recent version of pip, so upgrade your pip + installation to be sure you're running the latest version. + + ```bash + pip install --upgrade pip + ``` + + Then, install TensorFlow with pip. + + ```bash + pip install tensorflow + ``` + + ### 4. Verify the installation + + ```bash + python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" + ``` + + If a tensor is returned, you've installed TensorFlow successfully. + +* {Windows Native} + + Caution: TensorFlow `2.10` was the **last** TensorFlow release that + supported GPU on native-Windows. + Starting with TensorFlow `2.11`, you will need to install + [TensorFlow in WSL2](https://tensorflow.org/install/pip#windows-[wsl2]), + or install `tensorflow-cpu` and, optionally, try the + [TensorFlow-DirectML-Plugin](https://github.com/microsoft/tensorflow-directml-plugin#tensorflow-directml-plugin-) + + ## 1. System requirements + + * Windows 7 or higher (64-bit) + + Note: Starting with TensorFlow `2.10`, Windows CPU-builds for x86/x64 + processors are built, maintained, tested and released by a third party: + [Intel](https://www.intel.com/). + Installing the windows-native [`tensorflow`](https://pypi.org/project/tensorflow/) + or [`tensorflow-cpu`](https://pypi.org/project/tensorflow-cpu/) + package installs Intel's + [`tensorflow-intel`](https://pypi.org/project/tensorflow-intel/) + package. These packages are provided as-is. Tensorflow will use reasonable + efforts to maintain the availability and integrity of this pip package. + There may be delays if the third party fails to release the pip package. See + [this blog post](https://blog.tensorflow.org/2022/09/announcing-tensorflow-official-build-collaborators.html) + for more information about this + collaboration. + + ### 2. Install Microsoft Visual C++ Redistributable + + Install the *Microsoft Visual C++ Redistributable for Visual Studio 2015, + 2017, and 2019*. Starting with the TensorFlow 2.1.0 version, the + `msvcp140_1.dll` file is required from this package (which may not be + provided from older redistributable packages). The redistributable comes + with *Visual Studio 2019* but can be installed separately: + + 1. Go to the + [Microsoft Visual C++ downloads](https://support.microsoft.com/help/2977003/the-latest-supported-visual-c-downloads). + 2. Scroll down the page to the *Visual Studio 2015, 2017 and 2019* section. + 3. Download and install the *Microsoft Visual C++ Redistributable for + Visual Studio 2015, 2017 and 2019* for your platform. + + Make sure + [long paths are enabled](https://superuser.com/questions/1119883/windows-10-enable-ntfs-long-paths-policy-option-missing) + on Windows. + + ### 3. Install Miniconda + + [Miniconda](https://docs.conda.io/en/latest/miniconda.html) + is the recommended approach for installing TensorFlow with GPU support. + It creates a separate environment to avoid changing any installed + software in your system. This is also the easiest way to install the + required software especially for the GPU setup. + + Download the + [Miniconda Windows Installer](https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe). + Double-click the downloaded file and follow the instructions on the screen. + + ### 4. Create a conda environment + + Create a new conda environment named `tf` with the following command. + + ```bash + conda create --name tf python=3.9 + ``` + + You can deactivate and activate it with the following commands. + + ```bash + conda deactivate + conda activate tf + ``` + + Make sure it is activated for the rest of the installation. + + ### 5. GPU setup + + You can skip this section if you only run TensorFlow on CPU. + + First install + [NVIDIA GPU driver](https://www.nvidia.com/Download/index.aspx) + if you have not. + + Then install the CUDA, cuDNN with conda. + + ```bash + conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 + ``` + + ### 6. Install TensorFlow + + TensorFlow requires a recent version of pip, so upgrade your pip + installation to be sure you're running the latest version. + + ```bash + pip install --upgrade pip + ``` + + Then, install TensorFlow with pip. + + Note: Do not install TensorFlow with conda. It may not have the latest stable + version. pip is recommended since TensorFlow is only officially released to + PyPI. + + ```bash + # Anything above 2.10 is not supported on the GPU on Windows Native + pip install "tensorflow<2.11" + ``` + + ### 7. Verify the installation + + Verify the CPU setup: + + ```bash + python -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" + ``` + + If a tensor is returned, you've installed TensorFlow successfully. + + Verify the GPU setup: + + ```bash + python -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" + ``` + + If a list of GPU devices is returned, you've installed TensorFlow + successfully. + +* {Windows WSL2} + + ### 1. System requirements + + * Windows 10 19044 or higher (64-bit). This corresponds to Windows 10 + version 21H2, the November 2021 update. + + See the following documents to: + + * [Download the latest Windows 10 update](https://www.microsoft.com/software-download/windows10). + * [Install WSL2](https://docs.microsoft.com/windows/wsl/install) + * [Setup NVIDIA® GPU support in WSL2](https://docs.nvidia.com/cuda/wsl-user-guide/index.html) + + ### 2. GPU setup + + You can skip this section if you only run TensorFlow on the CPU. + + Install the + [NVIDIA GPU driver](https://www.nvidia.com/Download/index.aspx) + if you have not. You can use the following command to verify it is + installed. + + ```bash + nvidia-smi + ``` + + ### 3. Install TensorFlow + + TensorFlow requires a recent version of pip, so upgrade your pip + installation to be sure you're running the latest version. + + ```bash + pip install --upgrade pip + ``` + + Then, install TensorFlow with pip. + + ```bash + # For GPU users + pip install tensorflow[and-cuda] + # For CPU users + pip install tensorflow + ``` + + ### 4. Verify the installation + + Verify the CPU setup: + + ```bash + python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" + ``` + + If a tensor is returned, you've installed TensorFlow successfully. + + Verify the GPU setup: + + ```bash + python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" + ``` + + If a list of GPU devices is returned, you've installed TensorFlow + successfully. + + +## Package location + +A few installation mechanisms require the URL of the TensorFlow Python package. +The value you specify depends on your Python version. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    VersionURL
    Linux x86
    Python 3.9 GPU supporthttps://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    Python 3.9 CPU-onlyhttps://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    Python 3.10 GPU supporthttps://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    Python 3.10 CPU-onlyhttps://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    Python 3.11 GPU supporthttps://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    Python 3.11 CPU-onlyhttps://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    Python 3.12 GPU supporthttps://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    Python 3.12 CPU-onlyhttps://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    Python 3.13 GPU supporthttps://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    Python 3.13 CPU-onlyhttps://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    Linux Arm64 (CPU-only)
    Python 3.9https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
    Python 3.10https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
    Python 3.11https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
    Python 3.12https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
    Python 3.13https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
    macOS x86 (CPU-only)
    Caution: TensorFlow 2.16 was the last TensorFlow release that supported macOS x86
    Python 3.9https://storage.googleapis.com/tensorflow/versions/2.16.2/tensorflow-2.16.2-cp39-cp39-macosx_10_15_x86_64.whl
    Python 3.10https://storage.googleapis.com/tensorflow/versions/2.16.2/tensorflow-2.16.2-cp310-cp310-macosx_10_15_x86_64.whl
    Python 3.11https://storage.googleapis.com/tensorflow/versions/2.16.2/tensorflow-2.16.2-cp311-cp311-macosx_10_15_x86_64.whl
    Python 3.12https://storage.googleapis.com/tensorflow/versions/2.16.2/tensorflow-2.16.2-cp312-cp312-macosx_10_15_x86_64.whl
    macOS Arm64 (CPU-only)
    Python 3.9https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp39-cp39-macosx_12_0_arm64.whl
    Python 3.10https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp310-cp310-macosx_12_0_arm64.whl
    Python 3.11https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp311-cp311-macosx_12_0_arm64.whl
    Python 3.12https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp312-cp312-macosx_12_0_arm64.whl
    Python 3.13https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp313-cp313-macosx_12_0_arm64.whl
    Windows (CPU-only)
    Python 3.9https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp39-cp39-win_amd64.whl
    Python 3.10https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp310-cp310-win_amd64.whl
    Python 3.11https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp311-cp311-win_amd64.whl
    Python 3.12https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp312-cp312-win_amd64.whl
    Python 3.13https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp313-cp313-win_amd64.whl
    diff --git a/site/en/install/source.md b/site/en/install/source.md index b2f8ee8cb0b..dc847f017e9 100644 --- a/site/en/install/source.md +++ b/site/en/install/source.md @@ -4,8 +4,8 @@ Build a TensorFlow *pip* package from source and install it on Ubuntu Linux and macOS. While the instructions might work for other systems, it is only tested and supported for Ubuntu and macOS. -Note: We already provide well-tested, pre-built -[TensorFlow packages](./pip.html) for Linux and macOS systems. +Note: Well-tested, pre-built [TensorFlow packages](./pip.md) for Linux and macOS +systems are already provided. ## Setup for Linux and macOS @@ -25,9 +25,6 @@ Install the following build tools to configure your development environment.

    Requires Xcode 9.2 or later.

    Install using the Homebrew package manager:

    -/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
    -export PATH="/usr/local/opt/python/libexec/bin:$PATH"
    -# if you are on macOS 10.12 (Sierra) use `export PATH="/usr/local/bin:/usr/local/sbin:$PATH"`
     brew install python
     
    @@ -37,13 +34,12 @@ Install the TensorFlow *pip* package dependencies (if using a virtual environment, omit the `--user` argument):
    -pip install -U --user pip numpy wheel
    -pip install -U --user keras_preprocessing --no-deps
    +pip install -U --user pip
     
    Note: A `pip` version >19.0 is required to install the TensorFlow 2 `.whl` package. Additional required dependencies are listed in the -setup.py +setup.py.tpl file under `REQUIRED_PACKAGES`. ### Install Bazel @@ -54,32 +50,83 @@ Bazel and automatically downloads the correct Bazel version for TensorFlow. For ease of use, add Bazelisk as the `bazel` executable in your `PATH`. If Bazelisk is not available, you can manually -[install Bazel](https://docs.bazel.build/versions/master/install.html). Make -sure to install a supported Bazel version: any version between -`_TF_MIN_BAZEL_VERSION` and `_TF_MAX_BAZEL_VERSION` as specified in -`tensorflow/configure.py`. +[install Bazel](https://bazel.build/install). Make +sure to install the correct Bazel version from TensorFlow's +[.bazelversion](https://github.com/tensorflow/tensorflow/blob/master/.bazelversion) +file. + +### Install Clang (recommended, Linux only) + +Clang is a C/C++/Objective-C compiler that is compiled in C++ based on LLVM. It +is the default compiler to build TensorFlow starting with TensorFlow 2.13. The +current supported version is LLVM/Clang 17. + +[LLVM Debian/Ubuntu nightly packages](https://apt.llvm.org) provide an automatic +installation script and packages for manual installation on Linux. Make sure you +run the following command if you manually add llvm apt repository to your +package sources: + +
    +sudo apt-get update && sudo apt-get install -y llvm-17 clang-17
    +
    + +Now that `/usr/lib/llvm-17/bin/clang` is the actual path to clang in this case. + +Alternatively, you can download and unpack the pre-built +[Clang + LLVM 17](https://github.com/llvm/llvm-project/releases/tag/llvmorg-17.0.2). + +Below is an example of steps you can take to set up the downloaded Clang + LLVM +17 binaries on Debian/Ubuntu operating systems: + +1. Change to the desired destination directory: `cd ` + +1. Load and extract an archive file...(suitable to your architecture): +
    +    wget https://github.com/llvm/llvm-project/releases/download/llvmorg-17.0.2/clang+llvm-17.0.2-x86_64-linux-gnu-ubuntu-22.04.tar.xz
    +    
    +    tar -xvf clang+llvm-17.0.2-x86_64-linux-gnu-ubuntu-22.04.tar.xz
    +    
    +    
    + +1. Copy the extracted contents (directories and files) to `/usr` (you may need + sudo permissions, and the correct directory may vary by distribution). This + effectively installs Clang and LLVM, and adds it to the path. You should not + have to replace anything, unless you have a previous installation, in which + case you should replace the files: +
    +    cp -r clang+llvm-17.0.2-x86_64-linux-gnu-ubuntu-22.04/* /usr
    +    
    + +1. Check the obtained Clang + LLVM 17 binaries version: +
    +    clang --version
    +    
    + +1. Now that `/usr/bin/clang` is the actual path to your new clang. You can run + the `./configure` script or manually set environment variables `CC` and + `BAZEL_COMPILER` to this path. ### Install GPU support (optional, Linux only) There is *no* GPU support for macOS. -Read the [GPU support](./gpu.md) guide to install the drivers and additional +Read the [GPU support](./pip.md) guide to install the drivers and additional software required to run TensorFlow on a GPU. Note: It is easier to set up one of TensorFlow's GPU-enabled [Docker images](#docker_linux_builds). ### Download the TensorFlow source code -Use [Git](https://git-scm.com/){:.external} to clone the -[TensorFlow repository](https://github.com/tensorflow/tensorflow){:.external}: +Use [Git](https://git-scm.com/) to clone the +[TensorFlow repository](https://github.com/tensorflow/tensorflow):
     git clone https://github.com/tensorflow/tensorflow.git
     cd tensorflow
     
    -The repo defaults to the `master` development branch. You can also checkout a -[release branch](https://github.com/tensorflow/tensorflow/releases){:.external} +The repo defaults to the `master` development branch. You can also check out a +[release branch](https://github.com/tensorflow/tensorflow/releases) to build:
    @@ -89,16 +136,21 @@ git checkout branch_name  # r2.2, r2.3, etc.
     
     ## Configure the build
     
    -Configure your system build by running the `./configure` at the root of your
    -TensorFlow source tree. This script prompts you for the location of TensorFlow
    -dependencies and asks for additional build configuration options (compiler
    -flags, for example).
    +TensorFlow builds are configured by the `.bazelrc` file in the repository's
    +root directory. The `./configure` or `./configure.py` scripts can be used to
    +adjust common settings.
    +
    +Please run the `./configure` script from the repository's root directory. This
    +script will prompt you for the location of TensorFlow dependencies and asks for
    +additional build configuration options (compiler flags, for example). Refer to
    +the _Sample session_ section for details.
     
     
     ./configure
     
    -If using a virtual environment, `python configure.py` prioritizes paths +There is also a python version of this script, `./configure.py`. If using a +virtual environment, `python configure.py` prioritizes paths within the environment, whereas `./configure` prioritizes paths outside the environment. In both cases you can change the default. @@ -111,65 +163,47 @@ session may differ):

    View sample configuration session

     ./configure
    -You have bazel 3.0.0 installed.
    -Please specify the location of python. [Default is /usr/bin/python3]: 
    +You have bazel 6.1.0 installed.
    +Please specify the location of python. [Default is /Library/Frameworks/Python.framework/Versions/3.9/bin/python3]: 
     
     
     Found possible Python library paths:
    -  /usr/lib/python3/dist-packages
    -  /usr/local/lib/python3.6/dist-packages
    -Please input the desired Python library path to use.  Default is [/usr/lib/python3/dist-packages]
    -
    -Do you wish to build TensorFlow with OpenCL SYCL support? [y/N]: 
    -No OpenCL SYCL support will be enabled for TensorFlow.
    +  /Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages
    +Please input the desired Python library path to use.  Default is [/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages]
     
    -Do you wish to build TensorFlow with ROCm support? [y/N]: 
    +Do you wish to build TensorFlow with ROCm support? [y/N]:
     No ROCm support will be enabled for TensorFlow.
     
    -Do you wish to build TensorFlow with CUDA support? [y/N]: Y
    -CUDA support will be enabled for TensorFlow.
    -
    -Do you wish to build TensorFlow with TensorRT support? [y/N]: 
    -No TensorRT support will be enabled for TensorFlow.
    -
    -Found CUDA 10.1 in:
    -    /usr/local/cuda-10.1/targets/x86_64-linux/lib
    -    /usr/local/cuda-10.1/targets/x86_64-linux/include
    -Found cuDNN 7 in:
    -    /usr/lib/x86_64-linux-gnu
    -    /usr/include
    -
    -
    -Please specify a list of comma-separated CUDA compute capabilities you want to build with.
    -You can find the compute capability of your device at: https://developer.nvidia.com/cuda-gpus. Each capability can be specified as "x.y" or "compute_xy" to include both virtual and binary GPU code, or as "sm_xy" to only include the binary code.
    -Please note that each additional compute capability significantly increases your build time and binary size, and that TensorFlow only supports compute capabilities >= 3.5 [Default is: 3.5,7.0]: 6.1
    -
    +Do you wish to build TensorFlow with CUDA support? [y/N]:
    +No CUDA support will be enabled for TensorFlow.
     
    -Do you want to use clang as CUDA compiler? [y/N]: 
    -nvcc will be used as CUDA compiler.
    +Do you want to use Clang to build TensorFlow? [Y/n]:
    +Clang will be used to compile TensorFlow.
     
    -Please specify which gcc should be used by nvcc as the host compiler. [Default is /usr/bin/gcc]: 
    +Please specify the path to clang executable. [Default is /usr/lib/llvm-16/bin/clang]:
     
    +You have Clang 16.0.4 installed.
     
    -Please specify optimization flags to use during compilation when bazel option "--config=opt" is specified [Default is -march=native -Wno-sign-compare]: 
    +Please specify optimization flags to use during compilation when bazel option "--config=opt" is specified [Default is -Wno-sign-compare]:
     
     
    -Would you like to interactively configure ./WORKSPACE for Android builds? [y/N]: 
    +Would you like to interactively configure ./WORKSPACE for Android builds? [y/N]: n
     Not configuring the WORKSPACE for Android builds.
     
    +Do you wish to build TensorFlow with iOS support? [y/N]: n
    +No iOS support will be enabled for TensorFlow.
    +
     Preconfigured Bazel build configs. You can use any of the below by adding "--config=<>" to your build command. See .bazelrc for more details.
     	--config=mkl         	# Build with MKL support.
    +	--config=mkl_aarch64 	# Build with oneDNN and Compute Library for the Arm Architecture (ACL).
     	--config=monolithic  	# Config for mostly static monolithic build.
    -	--config=ngraph      	# Build with Intel nGraph support.
     	--config=numa        	# Build with NUMA support.
     	--config=dynamic_kernels	# (Experimental) Build kernels into separate shared objects.
    -	--config=v2          	# Build TensorFlow 2.x instead of 1.x.
    +	--config=v1          	# Build with TensorFlow 1 API instead of TF 2 API.
     Preconfigured Bazel build configs to DISABLE default on features:
    -	--config=noaws       	# Disable AWS S3 filesystem support.
     	--config=nogcp       	# Disable GCP support.
    -	--config=nohdfs      	# Disable HDFS support.
     	--config=nonccl      	# Disable NVIDIA NCCL support.
    -Configuration finished
    +
     
    @@ -177,7 +211,14 @@ Configuration finished #### GPU support -For [GPU support](./gpu.md), set `cuda=Y` during configuration and specify the +##### from v.2.18.0 +For [GPU support](./pip.md), set `cuda=Y` during configuration and specify the +versions of CUDA and cuDNN if required. Bazel will download CUDA and CUDNN +packages automatically or point to CUDA/CUDNN/NCCL redistributions on local file +system if required. + +##### before v.2.18.0 +For [GPU support](./pip.md), set `cuda=Y` during configuration and specify the versions of CUDA and cuDNN. If your system has multiple versions of CUDA or cuDNN installed, explicitly set the version instead of relying on the default. `./configure` creates symbolic links to your system's CUDA libraries—so if you @@ -188,8 +229,8 @@ building. For compilation optimization flags, the default (`-march=native`) optimizes the generated code for your machine's CPU type. However, if building TensorFlow for -a different CPU type, consider a more specific optimization flag. See the -[GCC manual](https://gcc.gnu.org/onlinedocs/gcc-4.5.3/gcc/i386-and-x86_002d64-Options.html){:.external} +a different CPU type, consider a more specific optimization flag. Check the +[GCC manual](https://gcc.gnu.org/onlinedocs/gcc-4.5.3/gcc/i386-and-x86_002d64-Options.html) for examples. #### Preconfigured configurations @@ -201,81 +242,55 @@ There are some preconfigured build configs available that can be added to the [CONTRIBUTING.md](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md) for details. * `--config=mkl` —Support for the - [Intel® MKL-DNN](https://github.com/intel/mkl-dnn){:.external}. + [Intel® MKL-DNN](https://github.com/intel/mkl-dnn). * `--config=monolithic` —Configuration for a mostly static, monolithic build. -* `--config=v1` —Build TensorFlow 1.x instead of 2.x. - -Note: Starting with TensorFlow 1.6, binaries use AVX instructions which may not -run on older CPUs. - - -## Build the pip package -### TensorFlow 2.x -[Install Bazel](https://docs.bazel.build/versions/master/install.html) and use -`bazel build` to create the TensorFlow 2.x package with *CPU-only* support: +## Build and install the pip package -
    -bazel build [--config=option] //tensorflow/tools/pip_package:build_pip_package
    -
    - -Note: GPU support can be enabled with `cuda=Y` during the `./configure` stage. - -### GPU support - -To build a TensorFlow package builder with GPU support: - -
    -bazel build --config=cuda [--config=option] //tensorflow/tools/pip_package:build_pip_package
    -
    - -### TensorFlow 1.x - -To build an older TensorFlow 1.x package, use the `--config=v1` option: - -
    -bazel build --config=v1 [--config=option] //tensorflow/tools/pip_package:build_pip_package
    -
    +#### Bazel build options -### Bazel build options - -See the Bazel [command-line reference](https://docs.bazel.build/versions/master/command-line-reference.html) +Refer to the Bazel +[command-line reference](https://bazel.build/reference/command-line-reference) for -[build options](https://docs.bazel.build/versions/master/command-line-reference.html#build-options). +[build options](https://bazel.build/reference/command-line-reference#build-options). Building TensorFlow from source can use a lot of RAM. If your system is memory-constrained, limit Bazel's RAM usage with: `--local_ram_resources=2048`. -The [official TensorFlow packages](./pip.html) are built with a GCC 7.3 -toolchain that complies with the manylinux2010 package standard. - -For GCC 5 and later, compatibility with the older ABI can be built using: -`--cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0"`. ABI compatibility ensures that custom -ops built against the official TensorFlow package continue to work with the -GCC 5 built package. +The [official TensorFlow packages](./pip.md) are built with a Clang toolchain +that complies with the manylinux2014 package standard. ### Build the package -The `bazel build` command creates an executable named `build_pip_package`—this -is the program that builds the `pip` package. Run the executable as shown -below to build a `.whl` package in the `/tmp/tensorflow_pkg` directory. +To build pip package, you need to specify `--repo_env=WHEEL_NAME` flag. +depending on the provided name, package will be created, e.g: -To build from a release branch: +To build tensorflow CPU package: +
    +bazel build //tensorflow/tools/pip_package:wheel --repo_env=USE_PYWRAP_RULES=1 --repo_env=WHEEL_NAME=tensorflow_cpu
    +
    +To build tensorflow GPU package:
    -./bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
    +bazel build //tensorflow/tools/pip_package:wheel --repo_env=USE_PYWRAP_RULES=1 --repo_env=WHEEL_NAME=tensorflow --config=cuda --config=cuda_wheel
     
    -To build from master, use `--nightly_flag` to get the right dependencies: +To build tensorflow TPU package: +
    +bazel build //tensorflow/tools/pip_package:wheel --repo_env=USE_PYWRAP_RULES=1 --repo_env=WHEEL_NAME=tensorflow_tpu --config=tpu
    +
    +To build nightly package, set `tf_nightly` instead of `tensorflow`, e.g. +to build CPU nightly package:
    -./bazel-bin/tensorflow/tools/pip_package/build_pip_package --nightly_flag /tmp/tensorflow_pkg
    +bazel build //tensorflow/tools/pip_package:wheel --repo_env=USE_PYWRAP_RULES=1 --repo_env=WHEEL_NAME=tf_nightly_cpu
     
    -Although it is possible to build both CUDA and non-CUDA configurations under the -same source tree, it's recommended to run `bazel clean` when switching between -these two configurations in the same source tree. +As a result, generated wheel will be located in +
    +bazel-bin/tensorflow/tools/pip_package/wheel_house/
    +
    ### Install the package @@ -283,7 +298,7 @@ The filename of the generated `.whl` file depends on the TensorFlow version and your platform. Use `pip install` to install the package, for example:
    -pip install /tmp/tensorflow_pkg/tensorflow-version-tags.whl
    +pip install bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow-version-tags.whl
     
    Success: TensorFlow is now installed. @@ -293,17 +308,17 @@ Success: TensorFlow is now installed. TensorFlow's Docker development images are an easy way to set up an environment to build Linux packages from source. These images already contain the source -code and dependencies required to build TensorFlow. See the TensorFlow -[Docker guide](./docker.md) for installation and the -[list of available image tags](https://hub.docker.com/r/tensorflow/tensorflow/tags/){:.external}. +code and dependencies required to build TensorFlow. Go to the TensorFlow +[Docker guide](./docker.md) for installation instructions and the +[list of available image tags](https://hub.docker.com/r/tensorflow/tensorflow/tags/). ### CPU-only The following example uses the `:devel` image to build a CPU-only package from -the latest TensorFlow source code. See the [Docker guide](./docker.md) for +the latest TensorFlow source code. Check the [Docker guide](./docker.md) for available TensorFlow `-devel` tags. -Download the latest development image and start a Docker container that we'll +Download the latest development image and start a Docker container that you'll use to build the *pip* package:
    @@ -331,20 +346,20 @@ docker run -it -w /tensorflow -v /path/to/tensorflow:/tensorflow -v $
     With the source tree set up, build the TensorFlow package within the container's
     virtual environment:
     
    -1.  Configure the build—this prompts the user to answer build configuration
    -    questions.
    -2.  Build the tool used to create the *pip* package.
    -3.  Run the tool to create the *pip* package.
    -4.  Adjust the ownership permissions of the file for outside the container.
    +1.  Optional: Configure the build—this prompts the user to answer build
    +    configuration questions.
    +2.  Build the *pip* package.
    +3.  Adjust the ownership permissions of the file for outside the container.
     
     
    -./configure  # answer prompts or use defaults
    -
    -bazel build --config=opt //tensorflow/tools/pip_package:build_pip_package
    -
    -./bazel-bin/tensorflow/tools/pip_package/build_pip_package /mnt  # create package
    -
    -chown $HOST_PERMS /mnt/tensorflow-version-tags.whl
    +./configure  # if necessary
    +
    +
    +bazel build //tensorflow/tools/pip_package:wheel \
    +--repo_env=USE_PYWRAP_RULES=1 --repo_env=WHEEL_NAME=tensorflow_cpu --config=opt
    +
    +`
    +chown $HOST_PERMS bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow-version-tags.whl
     
    Install and verify the package within the container: @@ -352,7 +367,7 @@ Install and verify the package within the container:
     pip uninstall tensorflow  # remove current version
     
    -pip install /mnt/tensorflow-version-tags.whl
    +pip install bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow-version-tags.whl
     cd /tmp  # don't import from source directory
     python -c "import tensorflow as tf; print(tf.__version__)"
     
    @@ -365,12 +380,15 @@ On your host machine, the TensorFlow *pip* package is in the current directory ### GPU support +Note: Starting from Tensorflow v.2.18.0 the wheels can be built from +source on a machine without GPUs and without NVIDIA driver installed. + Docker is the easiest way to build GPU support for TensorFlow since the *host* machine only requires the -[NVIDIA® driver](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#how-do-i-install-the-nvidia-driver){:.external} -(the *NVIDIA® CUDA® Toolkit* doesn't have to be installed). See the -[GPU support guide](./gpu.md) and the TensorFlow [Docker guide](./docker.md) to -set up [nvidia-docker](https://github.com/NVIDIA/nvidia-docker){:.external} +[NVIDIA® driver](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#how-do-i-install-the-nvidia-driver) +(the *NVIDIA® CUDA® Toolkit* doesn't have to be installed). Refer to the +[GPU support guide](./pip.md) and the TensorFlow [Docker guide](./docker.md) to +set up [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) (Linux only). The following example downloads the TensorFlow `:devel-gpu` image and uses @@ -388,13 +406,15 @@ Then, within the container's virtual environment, build the TensorFlow package with GPU support:
    -./configure  # answer prompts or use defaults
    -
    -bazel build --config=opt --config=cuda //tensorflow/tools/pip_package:build_pip_package
    +./configure  # if necessary
     
    -./bazel-bin/tensorflow/tools/pip_package/build_pip_package /mnt  # create package
    +
    +bazel build //tensorflow/tools/pip_package:wheel \
    +--repo_env=USE_PYWRAP_RULES=1 --repo_env=WHEEL_NAME=tensorflow --config=cuda \
    +--config=cuda_wheel --config=opt
    +
     
    -chown $HOST_PERMS /mnt/tensorflow-version-tags.whl
    +chown $HOST_PERMS bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow-version-tags.whl
     
    Install and verify the package within the container and check for a GPU: @@ -402,7 +422,7 @@ Install and verify the package within the container and check for a GPU:
     pip uninstall tensorflow  # remove current version
     
    -pip install /mnt/tensorflow-version-tags.whl
    +pip install bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow-version-tags.whl
     cd /tmp  # don't import from source directory
     python -c "import tensorflow as tf; print(\"Num GPUs Available: \", len(tf.config.list_physical_devices('GPU')))"
     
    @@ -419,6 +439,20 @@ Success: TensorFlow is now installed. + + + + + + + + + + + + + + @@ -448,6 +482,20 @@ Success: TensorFlow is now installed.
    VersionPython versionCompilerBuild tools
    tensorflow-2.20.03.9-3.13Clang 18.1.8Bazel 7.4.1
    tensorflow-2.19.03.9-3.12Clang 18.1.8Bazel 6.5.0
    tensorflow-2.18.03.9-3.12Clang 17.0.6Bazel 6.5.0
    tensorflow-2.17.03.9-3.12Clang 17.0.6Bazel 6.5.0
    tensorflow-2.16.13.9-3.12Clang 17.0.6Bazel 6.5.0
    tensorflow-2.15.03.9-3.11Clang 16.0.0Bazel 6.1.0
    tensorflow-2.14.03.9-3.11Clang 16.0.0Bazel 6.1.0
    tensorflow-2.13.03.8-3.11Clang 16.0.0Bazel 5.3.0
    tensorflow-2.12.03.8-3.11GCC 9.3.1Bazel 5.3.0
    tensorflow-2.11.03.7-3.10GCC 9.3.1Bazel 5.3.0
    tensorflow-2.10.03.7-3.10GCC 9.3.1Bazel 5.1.1
    tensorflow-2.9.03.7-3.10GCC 9.3.1Bazel 5.0.0
    tensorflow-2.8.03.7-3.10GCC 7.3.1Bazel 4.2.1
    tensorflow-2.7.03.7-3.9GCC 7.3.1Bazel 3.7.2
    tensorflow-2.6.03.6-3.9GCC 7.3.1Bazel 3.7.2
    tensorflow-2.5.03.6-3.9GCC 7.3.1Bazel 3.7.2
    tensorflow-2.4.03.6-3.8GCC 7.3.1Bazel 3.1.0
    + + + + + + + + + + + + + + @@ -479,6 +527,16 @@ Success: TensorFlow is now installed.
    VersionPython versionCompilerBuild toolscuDNNCUDA
    tensorflow-2.20.03.9-3.13Clang 18.1.8Bazel 7.4.19.312.5
    tensorflow-2.19.03.9-3.12Clang 18.1.8Bazel 6.5.09.312.5
    tensorflow-2.18.03.9-3.12Clang 17.0.6Bazel 6.5.09.312.5
    tensorflow-2.17.03.9-3.12Clang 17.0.6Bazel 6.5.08.912.3
    tensorflow-2.16.13.9-3.12Clang 17.0.6Bazel 6.5.08.912.3
    tensorflow-2.15.03.9-3.11Clang 16.0.0Bazel 6.1.08.912.2
    tensorflow-2.14.03.9-3.11Clang 16.0.0Bazel 6.1.08.711.8
    tensorflow-2.13.03.8-3.11Clang 16.0.0Bazel 5.3.08.611.8
    tensorflow-2.12.03.8-3.11GCC 9.3.1Bazel 5.3.08.611.8
    tensorflow-2.11.03.7-3.10GCC 9.3.1Bazel 5.3.08.111.2
    tensorflow-2.10.03.7-3.10GCC 9.3.1Bazel 5.1.18.111.2
    tensorflow-2.9.03.7-3.10GCC 9.3.1Bazel 5.0.08.111.2
    tensorflow-2.8.03.7-3.10GCC 7.3.1Bazel 4.2.18.111.2
    tensorflow-2.7.03.7-3.9GCC 7.3.1Bazel 3.7.28.111.2
    tensorflow-2.6.03.6-3.9GCC 7.3.1Bazel 3.7.28.111.2
    tensorflow-2.5.03.6-3.9GCC 7.3.1Bazel 3.7.28.111.2
    tensorflow-2.4.03.6-3.8GCC 7.3.1Bazel 3.1.08.011.0
    + + + + + + + + + + diff --git a/site/en/install/source_windows.md b/site/en/install/source_windows.md index cff252e0b9e..efc0f7a9286 100644 --- a/site/en/install/source_windows.md +++ b/site/en/install/source_windows.md @@ -1,9 +1,9 @@ # Build from source on Windows -Build a TensorFlow *pip* package from source and install it on Windows. +Build a TensorFlow *pip* package from the source and install it on Windows. Note: We already provide well-tested, pre-built -[TensorFlow packages](./pip.html) for Windows systems. +[TensorFlow packages](./pip.md) for Windows systems. ## Setup for Windows @@ -13,16 +13,16 @@ environment. ### Install Python and the TensorFlow package dependencies Install a -[Python 3.6.x 64-bit release for Windows](https://www.python.org/downloads/windows/){:.external}. +[Python 3.9+ 64-bit release for Windows](https://www.python.org/downloads/windows/). Select *pip* as an optional feature and add it to your `%PATH%` environmental variable. Install the TensorFlow *pip* package dependencies:
    -pip3 install six numpy wheel
    -pip3 install keras_applications==1.0.6 --no-deps
    -pip3 install keras_preprocessing==1.0.5 --no-deps
    +pip3 install -U pip
    +pip3 install -U six numpy wheel packaging
    +pip3 install -U keras_preprocessing --no-deps
     
    The dependencies are listed in the @@ -42,38 +42,53 @@ Add the location of the Bazel executable to your `%PATH%` environment variable. ### Install MSYS2 -[Install MSYS2](https://www.msys2.org/){:.external} for the bin tools needed to +[Install MSYS2](https://www.msys2.org/) for the bin tools needed to build TensorFlow. If MSYS2 is installed to `C:\msys64`, add `C:\msys64\usr\bin` to your `%PATH%` environment variable. Then, using `cmd.exe`, run:
    +pacman -Syu (requires a console restart)
     pacman -S git patch unzip
    +pacman -S git patch unzip rsync
     
    -### Install Visual C++ Build Tools 2019 +Note: Clang will be the preferred compiler to build TensorFlow CPU wheels on the Windows Platform starting with TF 2.16.1 The currently supported version is LLVM/clang 17.0.6. -Install the *Visual C++ build tools 2019*. This comes with *Visual Studio 2019* +Note: To build with Clang on Windows, it is required to install both LLVM and Visual C++ Build tools as although Windows uses clang-cl.exe as the compiler, Visual C++ Build tools are needed to link to Visual C++ libraries + +### Install Visual C++ Build Tools 2022 + +Install the *Visual C++ build tools 2022*. This comes with *Visual Studio Community 2022* but can be installed separately: 1. Go to the - [Visual Studio downloads](https://visualstudio.microsoft.com/downloads/){:.external}, -2. Select *Redistributables and Build Tools*, + [Visual Studio downloads](https://visualstudio.microsoft.com/downloads/), +2. Select *Tools for Visual Studio or Other Tools, Framework and Redistributables*, 3. Download and install: - - *Microsoft Visual C++ 2019 Redistributable* - - *Microsoft Build Tools 2019* + - *Build Tools for Visual Studio 2022* + - *Microsoft Visual C++ Redistributables for Visual Studio 2022* + +Note: TensorFlow is tested against the *Visual Studio Community 2022*. + +### Install LLVM + +1. Go to the + [LLVM downloads](https://github.com/llvm/llvm-project/releases/), +2. Download and install Windows-compatible LLVM in C:/Program Files/LLVM e.g., LLVM-17.0.6-win64.exe -Note: TensorFlow is tested against the *Visual Studio 2019*. ### Install GPU support (optional) See the Windows [GPU support](./gpu.md) guide to install the drivers and additional software required to run TensorFlow on a GPU. +Note: GPU support on native-Windows is only available for 2.10 or earlier versions, starting in TF 2.11, CUDA build is not supported for Windows. For using TensorFlow GPU on Windows, you will need to build/install TensorFlow in WSL2 or use tensorflow-cpu with TensorFlow-DirectML-Plugin + ### Download the TensorFlow source code -Use [Git](https://git-scm.com/){:.external} to clone the -[TensorFlow repository](https://github.com/tensorflow/tensorflow){:.external} +Use [Git](https://git-scm.com/) to clone the +[TensorFlow repository](https://github.com/tensorflow/tensorflow) (`git` is installed with MSYS2):
    @@ -81,8 +96,8 @@ Use [Git](https://git-scm.com/){:.external} to clone the
     cd tensorflow
     
    -The repo defaults to the `master` development branch. You can also checkout a -[release branch](https://github.com/tensorflow/tensorflow/releases){:.external} +The repo defaults to the `master` development branch. You can also check out a +[release branch](https://github.com/tensorflow/tensorflow/releases) to build:
    @@ -92,11 +107,38 @@ git checkout branch_name  # r1.9, r1.10, etc.
     Key Point: If you're having build problems on the latest development branch, try
     a release branch that is known to work.
     
    +## Optional: Environmental Variable Set Up
    +Run the following commands before running the build command to avoid issues with package creation:
    +(If the below commands were set up while installing the packages, please ignore them). Run `set` to check if all the paths were set correctly, run `echo %Environmental Variable%` e.g., `echo %BAZEL_VC%` to check the path set up for a specific Environmental Variable
    +
    + Python path set up issue [tensorflow:issue#59943](https://github.com/tensorflow/tensorflow/issues/59943),[tensorflow:issue#9436](https://github.com/tensorflow/tensorflow/issues/9436),[tensorflow:issue#60083](https://github.com/tensorflow/tensorflow/issues/60083)
    +
    +
    +set PATH=path/to/python;%PATH% # [e.g. (C:/Python311)]
    +set PATH=path/to/python/Scripts;%PATH% # [e.g. (C:/Python311/Scripts)] 
    +set PYTHON_BIN_PATH=path/to/python_virtualenv/Scripts/python.exe 
    +set PYTHON_LIB_PATH=path/to/python virtualenv/lib/site-packages 
    +set PYTHON_DIRECTORY=path/to/python_virtualenv/Scripts 
    +
    + +Bazel/MSVC/CLANG path set up issue [tensorflow:issue#54578](https://github.com/tensorflow/tensorflow/issues/54578) + +
    +set BAZEL_SH=C:/msys64/usr/bin/bash.exe 
    +set BAZEL_VS=C:/Program Files/Microsoft Visual Studio/2022/BuildTools 
    +set BAZEL_VC=C:/Program Files/Microsoft Visual Studio/2022/BuildTools/VC 
    +set Bazel_LLVM=C:/Program Files/LLVM (explicitly tell Bazel where LLVM is installed by BAZEL_LLVM, needed while using CLANG)
    +set PATH=C:/Program Files/LLVM/bin;%PATH% (Optional, needed while using CLANG as Compiler)
    +
    + +## Optional: Configure the build -## Configure the build +TensorFlow builds are configured by the `.bazelrc` file in the repository's +root directory. The `./configure` or `./configure.py` scripts can be used to +adjust common settings. -Configure your system build by running the following at the root of your -TensorFlow source tree: +If you need to change the configuration, run the `./configure` script from +the repository's root directory.
     python ./configure.py
    @@ -111,92 +153,99 @@ differ):
     

    View sample configuration session

     python ./configure.py
    -Starting local Bazel server and connecting to it...
    -................
    -You have bazel 0.15.0 installed.
    -Please specify the location of python. [Default is C:\python36\python.exe]:
    +You have bazel 6.5.0 installed.
    +Please specify the location of python. [Default is C:\Python311\python.exe]:
     
     Found possible Python library paths:
    -  C:\python36\lib\site-packages
    -Please input the desired Python library path to use.  Default is [C:\python36\lib\site-packages]
    -
    -Do you wish to build TensorFlow with CUDA support? [y/N]: Y
    -CUDA support will be enabled for TensorFlow.
    +C:\Python311\lib\site-packages
    +Please input the desired Python library path to use.  Default is [C:\Python311\lib\site-packages]
     
    -Please specify the CUDA SDK version you want to use. [Leave empty to default to CUDA 9.0]:
    +Do you wish to build TensorFlow with ROCm support? [y/N]:
    +No ROCm support will be enabled for TensorFlow.
     
    -Please specify the location where CUDA 9.0 toolkit is installed. Refer to README.md for more details. [Default is C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.0]:
    +WARNING: Cannot build with CUDA support on Windows.
    +Starting in TF 2.11, CUDA build is not supported for Windows. To use TensorFlow GPU on Windows, you will need to build/install TensorFlow in WSL2.
     
    -Please specify the cuDNN version you want to use. [Leave empty to default to cuDNN 7.0]: 7.0
    +Do you want to use Clang to build TensorFlow? [Y/n]:
    +Add "--config=win_clang" to compile TensorFlow with CLANG.
     
    -Please specify the location where cuDNN 7 library is installed. Refer to README.md for more details. [Default is C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.0]: C:\tools\cuda
    +Please specify the path to clang executable. [Default is C:\Program Files\LLVM\bin\clang.EXE]:
     
    -Please specify a list of comma-separated Cuda compute capabilities you want to build with.
    -You can find the compute capability of your device at: https://developer.nvidia.com/cuda-gpus.
    -Please note that each additional compute capability significantly increases your build time and binary size. [Default is: 3.5,7.0]: 3.7
    +You have Clang 17.0.6 installed.
     
     Please specify optimization flags to use during compilation when bazel option "--config=opt" is specified [Default is /arch:AVX]:
     
     Would you like to override eigen strong inline for some C++ compilation to reduce the compilation time? [Y/n]:
     Eigen strong inline overridden.
     
    -Configuration finished
    +Would you like to interactively configure ./WORKSPACE for Android builds? [y/N]:
    +Not configuring the WORKSPACE for Android builds.
    +
    +Preconfigured Bazel build configs. You can use any of the below by adding "--config=<>" to your build command. See .bazelrc for more details.
    +        --config=mkl            # Build with MKL support.
    +        --config=mkl_aarch64    # Build with oneDNN and Compute Library for the Arm Architecture (ACL).
    +        --config=monolithic     # Config for mostly static monolithic build.
    +        --config=numa           # Build with NUMA support.
    +        --config=dynamic_kernels        # (Experimental) Build kernels into separate shared objects.
    +        --config=v1             # Build with TensorFlow 1 API instead of TF 2 API.
    +Preconfigured Bazel build configs to DISABLE default on features:
    +        --config=nogcp          # Disable GCP support.
    +        --config=nonccl         # Disable NVIDIA NCCL support.
     
    -### Configuration options - -For [GPU support](./gpu.md), specify the versions of CUDA and cuDNN. If your -system has multiple versions of CUDA or cuDNN installed, explicitly set the -version instead of relying on the default. `./configure.py` creates symbolic -links to your system's CUDA libraries—so if you update your CUDA library paths, -this configuration step must be run again before building. - -Note: Starting with TensorFlow 1.6, binaries use AVX instructions which may not -run on older CPUs. - +## Build and install the pip package -## Build the pip package +The pip package is built in two steps. A `bazel build` command creates a +"package-builder" program. You then run the package-builder to create the +package. -### TensorFlow 2.x +### Build the package-builder tensorflow:master repo has been updated to build 2.x by default. [Install Bazel](https://docs.bazel.build/versions/master/install.html) and use -`bazel build ` to create the TensorFlow package. +`bazel build ` to create the TensorFlow package-builder.
    -bazel build //tensorflow/tools/pip_package:build_pip_package
    +bazel build //tensorflow/tools/pip_package:wheel
     
    +#### CPU-only -### TensorFlow 1.x - -To build the 1.x version of TensorFlow from master, use -`bazel build --config=v1` to create a TensorFlow 1.x package. +Use `bazel` to make the TensorFlow package builder with CPU-only support: +##### Build with MSVC
    -bazel build --config=v1 //tensorflow/tools/pip_package:build_pip_package
    +bazel build --config=opt --repo_env=TF_PYTHON_VERSION=3.11 //tensorflow/tools/pip_package:wheel --repo_env=WHEEL_NAME=tensorflow_cpu
     
    -#### CPU-only - -Use `bazel` to make the TensorFlow package builder with CPU-only support: +##### Build with CLANG +Use --config=`win_clang` to build TenorFlow with the CLANG Compiler:
    -bazel build --config=opt //tensorflow/tools/pip_package:build_pip_package
    +bazel build --config=win_clang --repo_env=TF_PYTHON_VERSION=3.11 //tensorflow/tools/pip_package:wheel --repo_env=WHEEL_NAME=tensorflow_cpu
     
    #### GPU support +Note: GPU support on native-Windows is only available for 2.10 or earlier versions, starting in TF 2.11, CUDA build is not supported for Windows. For using TensorFlow GPU on Windows, you will need to build/install TensorFlow in WSL2 or use tensorflow-cpu with TensorFlow-DirectML-Plugin + To make the TensorFlow package builder with GPU support:
     bazel build --config=opt --config=cuda --define=no_tensorflow_py_deps=true //tensorflow/tools/pip_package:build_pip_package
     
    +Commands to clean the bazel cache to resolve errors due to invalid or outdated cached data, bazel clean with --expunge flag removes files permanently + +
    +bazel clean 
    +bazel clean --expunge  
    +
    + #### Bazel build options -Use this option when building to avoid issue with package creation: +Use this option when building to avoid issues with package creation: [tensorflow:issue#22390](https://github.com/tensorflow/tensorflow/issues/22390)
    @@ -215,30 +264,37 @@ to suppress nvcc warning messages.
     
     ### Build the package
     
    -The `bazel build` command creates an executable named `build_pip_package`—this
    -is the program that builds the `pip` package. For example, the following builds
    -a `.whl` package in the `C:/tmp/tensorflow_pkg` directory:
    +To build a pip package, you need to specify the --repo_env=WHEEL_NAME flag. 
    +Depending on the provided name, the package will be created. For example:
     
    -
    -bazel-bin\tensorflow\tools\pip_package\build_pip_package C:/tmp/tensorflow_pkg
    +To build tensorflow CPU package:
    +
    +bazel build //tensorflow/tools/pip_package:wheel --repo_env=WHEEL_NAME=tensorflow_cpu
    +
    + +To build nightly package, set `tf_nightly` instead of `tensorflow`, e.g. +to build CPU nightly package: +
    +bazel build //tensorflow/tools/pip_package:wheel --repo_env=WHEEL_NAME=tf_nightly_cpu
    +
    + +As a result, generated wheel will be located in +
    +bazel-bin/tensorflow/tools/pip_package/wheel_house/
     
    -Although it is possible to build both CUDA and non-CUDA configs under the -same source tree, we recommend running `bazel clean` when switching between -these two configurations in the same source tree. ### Install the package The filename of the generated `.whl` file depends on the TensorFlow version and -your platform. Use `pip3 install` to install the package, for example: +your platform. Use `pip install` to install the package, for example: -
    -pip3 install C:/tmp/tensorflow_pkg/tensorflow-version-cp36-cp36m-win_amd64.whl
    +
    +pip install bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow-version-tags.whl
     
    Success: TensorFlow is now installed. - ## Build using the MSYS shell TensorFlow can also be built using the MSYS shell. Make the changes listed @@ -260,12 +316,12 @@ considered a Unix absolute path since it starts with a slash.) Add the Bazel and Python installation directories to your `$PATH` environmental variable. If Bazel is installed to `C:\tools\bazel.exe`, and Python to -`C:\Python36\python.exe`, set your `PATH` with: +`C:\Python\python.exe`, set your `PATH` with:
     # Use Unix-style with ':' as separator
     export PATH="/c/tools:$PATH"
    -export PATH="/c/Python36:$PATH"
    +export PATH="/c/path/to/Python:$PATH"
     
    For GPU support, add the CUDA and cuDNN bin directories to your `$PATH`: @@ -276,6 +332,8 @@ For GPU support, add the CUDA and cuDNN bin directories to your `$PATH`: export PATH="/c/tools/cuda/bin:$PATH"
    +Note: Starting in TF 2.11, CUDA build is not supported for Windows. For using TensorFlow GPU on Windows, you will need to build/install TensorFlow in WSL2 or use tensorflow-cpu with TensorFlow-DirectML-Plugin + ## Tested build configurations @@ -283,6 +341,19 @@ For GPU support, add the CUDA and cuDNN bin directories to your `$PATH`:
    VersionPython versionCompilerBuild tools
    tensorflow-2.16.13.9-3.12Clang from Xcode 13.6Bazel 6.5.0
    tensorflow-2.15.03.9-3.11Clang from xcode 10.15Bazel 6.1.0
    tensorflow-2.14.03.9-3.11Clang from xcode 10.15Bazel 6.1.0
    tensorflow-2.13.03.8-3.11Clang from xcode 10.15Bazel 5.3.0
    tensorflow-2.12.03.8-3.11Clang from xcode 10.15Bazel 5.3.0
    tensorflow-2.11.03.7-3.10Clang from xcode 10.14Bazel 5.3.0
    tensorflow-2.10.03.7-3.10Clang from xcode 10.14Bazel 5.1.1
    tensorflow-2.9.03.7-3.10Clang from xcode 10.14Bazel 5.0.0
    tensorflow-2.8.03.7-3.10Clang from xcode 10.14Bazel 4.2.1
    tensorflow-2.7.03.7-3.9Clang from xcode 10.11Bazel 3.7.2
    tensorflow-2.6.03.6-3.9Clang from xcode 10.11Bazel 3.7.2
    tensorflow-2.5.03.6-3.9Clang from xcode 10.11Bazel 3.7.2
    tensorflow-2.4.03.6-3.8Clang from xcode 10.3Bazel 3.1.0
    + + + + + + + + + + + + + @@ -309,9 +380,14 @@ For GPU support, add the CUDA and cuDNN bin directories to your `$PATH`:
    VersionPython versionCompilerBuild tools
    tensorflow-2.20.03.9-3.13CLANG 18.1.4Bazel 7.4.1
    tensorflow-2.19.03.9-3.12CLANG 18.1.4Bazel 6.5.0
    tensorflow-2.18.03.9-3.12CLANG 17.0.6Bazel 6.5.0
    tensorflow-2.17.03.9-3.12CLANG 17.0.6Bazel 6.5.0
    tensorflow-2.16.13.9-3.12CLANG 17.0.6Bazel 6.5.0
    tensorflow-2.15.03.9-3.11MSVC 2019Bazel 6.1.0
    tensorflow-2.14.03.9-3.11MSVC 2019Bazel 6.1.0
    tensorflow-2.12.03.8-3.11MSVC 2019Bazel 5.3.0
    tensorflow-2.11.03.7-3.10MSVC 2019Bazel 5.3.0
    tensorflow-2.10.03.7-3.10MSVC 2019Bazel 5.1.1
    tensorflow-2.9.03.7-3.10MSVC 2019Bazel 5.0.0
    tensorflow-2.8.03.7-3.10MSVC 2019Bazel 4.2.1
    tensorflow-2.7.03.7-3.9MSVC 2019Bazel 3.7.2
    tensorflow-2.6.03.6-3.9MSVC 2019Bazel 3.7.2
    tensorflow-2.5.03.6-3.9MSVC 2019Bazel 3.7.2
    tensorflow-2.4.03.6-3.8MSVC 2019Bazel 3.1.0
    ### GPU +Note: GPU support on native-Windows is only available for 2.10 or earlier versions, starting in TF 2.11, CUDA build is not supported for Windows. For using TensorFlow GPU on Windows, you will need to build/install TensorFlow in WSL2 or use tensorflow-cpu with TensorFlow-DirectML-Plugin + + + + diff --git a/site/en/io/README.md b/site/en/io/README.md deleted file mode 100644 index 24249b7ac03..00000000000 --- a/site/en/io/README.md +++ /dev/null @@ -1,5 +0,0 @@ -Welcome to the warp zone! - -# TensorFlow SIG IO - -These docs are available here: https://github.com/tensorflow/io/tree/master/docs diff --git a/site/en/js/README.md b/site/en/js/README.md deleted file mode 100644 index 5a3a34677b4..00000000000 --- a/site/en/js/README.md +++ /dev/null @@ -1,5 +0,0 @@ -Welcome to the warp zone! - -# TensorFlow.js - -These docs are available here: https://github.com/tensorflow/tfjs-website/tree/master/docs diff --git a/site/en/lattice/README.md b/site/en/lattice/README.md deleted file mode 100644 index 27ce3c8ce55..00000000000 --- a/site/en/lattice/README.md +++ /dev/null @@ -1,5 +0,0 @@ -Welcome to the warp zone! - -# TensorFlow Lattice - -These docs are available here: https://github.com/tensorflow/lattice/tree/master/docs diff --git a/site/en/lite/README.md b/site/en/lite/README.md deleted file mode 100644 index 43c3249dc7b..00000000000 --- a/site/en/lite/README.md +++ /dev/null @@ -1,6 +0,0 @@ -Welcome to the warp zone! - -# TensorFlow Lite - -These docs are available here: -https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/g3doc diff --git a/site/en/mlir/README.md b/site/en/mlir/README.md deleted file mode 100644 index 614f9f693c8..00000000000 --- a/site/en/mlir/README.md +++ /dev/null @@ -1,5 +0,0 @@ -Welcome to the warp zone! - -# TensorFlow MLIR - -These docs are available here: https://github.com/tensorflow/tensorflow/tree/master/tensorflow/compiler/mlir/g3doc diff --git a/site/en/neural_structured_learning/README.md b/site/en/neural_structured_learning/README.md deleted file mode 100644 index 85c905af170..00000000000 --- a/site/en/neural_structured_learning/README.md +++ /dev/null @@ -1,5 +0,0 @@ -Welcome to the warp zone! - -# Neural Structured Learning - -These docs are available here: https://github.com/tensorflow/neural-structured-learning/tree/master/g3doc diff --git a/site/en/probability/README.md b/site/en/probability/README.md deleted file mode 100644 index c17e5ba447b..00000000000 --- a/site/en/probability/README.md +++ /dev/null @@ -1,5 +0,0 @@ -Welcome to the warp zone! - -# TensorFlow Probability - -These docs are available here: https://github.com/tensorflow/probability/tree/master/tensorflow_probability/g3doc diff --git a/site/en/quantum/README.md b/site/en/quantum/README.md deleted file mode 100644 index 78580b3dfd8..00000000000 --- a/site/en/quantum/README.md +++ /dev/null @@ -1,5 +0,0 @@ -Welcome to the warp zone! - -# TensorFlow Quantum - -These docs are available here: https://github.com/tensorflow/quantum/tree/master/docs diff --git a/site/en/r1/guide/autograph.ipynb b/site/en/r1/guide/autograph.ipynb index 5d8d7c97999..64d631a52b3 100644 --- a/site/en/r1/guide/autograph.ipynb +++ b/site/en/r1/guide/autograph.ipynb @@ -66,7 +66,7 @@ "source": [ "> Note: This is an archived TF1 notebook. These are configured\n", "to run in TF2's \n", - "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n", + "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n", "but will run in TF1 as well. To use TF1 in Colab, use the\n", "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n", "magic." @@ -78,7 +78,7 @@ "id": "CydFK2CL7ZHA" }, "source": [ - "[AutoGraph](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/autograph/) helps you write complicated graph code using normal Python. Behind the scenes, AutoGraph automatically transforms your code into the equivalent [TensorFlow graph code](https://www.tensorflow.org/r1/guide/graphs). AutoGraph already supports much of the Python language, and that coverage continues to grow. For a list of supported Python language features, see the [Autograph capabilities and limitations](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/autograph/g3doc/reference/limitations.md)." + "[AutoGraph](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/autograph/) helps you write complicated graph code using normal Python. Behind the scenes, AutoGraph automatically transforms your code into the equivalent [TensorFlow graph code](https://www.tensorflow.org/r1/guide/graphs). AutoGraph already supports much of the Python language, and that coverage continues to grow. For a list of supported Python language features, see the [Autograph capabilities and limitations](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/autograph/g3doc/reference/limitations.md)." ] }, { @@ -241,7 +241,7 @@ "id": "m-jWmsCmByyw" }, "source": [ - "AutoGraph supports common Python statements like `while`, `for`, `if`, `break`, and `return`, with support for nesting. Compare this function with the complicated graph verson displayed in the following code blocks:" + "AutoGraph supports common Python statements like `while`, `for`, `if`, `break`, and `return`, with support for nesting. Compare this function with the complicated graph version displayed in the following code blocks:" ] }, { diff --git a/site/en/r1/guide/checkpoints.md b/site/en/r1/guide/checkpoints.md index 682631449d5..41544f52b25 100644 --- a/site/en/r1/guide/checkpoints.md +++ b/site/en/r1/guide/checkpoints.md @@ -56,8 +56,8 @@ Suppose you call the Estimator's `train` method. For example: ```python classifier.train( - input_fn=lambda:train_input_fn(train_x, train_y, batch_size=100), - steps=200) + input_fn=lambda: train_input_fn(train_x, train_y, batch_size=100), + steps=200) ``` As suggested by the following diagrams, the first call to `train` diff --git a/site/en/r1/guide/custom_estimators.md b/site/en/r1/guide/custom_estimators.md index 87dce26a0dc..7bbf3573909 100644 --- a/site/en/r1/guide/custom_estimators.md +++ b/site/en/r1/guide/custom_estimators.md @@ -592,10 +592,10 @@ function for custom Estimators; everything else is the same. For more details, be sure to check out: * The - [official TensorFlow implementation of MNIST](https://github.com/tensorflow/models/tree/master/official/r1/mnist), + [official TensorFlow implementation of MNIST](https://github.com/tensorflow/models/tree/r1.15/official/r1/mnist), which uses a custom estimator. * The TensorFlow - [official models repository](https://github.com/tensorflow/models/tree/master/official), + [official models repository](https://github.com/tensorflow/models/tree/r1.15/official), which contains more curated examples using custom estimators. * This [TensorBoard video](https://youtu.be/eBbEDRsCmv4), which introduces TensorBoard. diff --git a/site/en/r1/guide/datasets.md b/site/en/r1/guide/datasets.md index b1ed1b6e113..d7c38bf2f92 100644 --- a/site/en/r1/guide/datasets.md +++ b/site/en/r1/guide/datasets.md @@ -437,7 +437,7 @@ dataset = dataset.batch(32) iterator = dataset.make_initializable_iterator() # You can feed the initializer with the appropriate filenames for the current -# phase of execution, e.g. training vs. validation. +# phase of execution, e.g., training vs. validation. # Initialize `iterator` with training data. training_filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"] @@ -639,7 +639,7 @@ TODO(mrry): Add this section. The simplest form of batching stacks `n` consecutive elements of a dataset into a single element. The `Dataset.batch()` transformation does exactly this, with the same constraints as the `tf.stack()` operator, applied to each component -of the elements: i.e. for each component *i*, all elements must have a tensor +of the elements: i.e., for each component *i*, all elements must have a tensor of the exact same shape. ```python diff --git a/site/en/r1/guide/debugger.md b/site/en/r1/guide/debugger.md index 2b4b6497ec4..963765b97db 100644 --- a/site/en/r1/guide/debugger.md +++ b/site/en/r1/guide/debugger.md @@ -10,7 +10,7 @@ due to TensorFlow's computation-graph paradigm. This guide focuses on the command-line interface (CLI) of `tfdbg`. For guide on how to use the graphical user interface (GUI) of tfdbg, i.e., the **TensorBoard Debugger Plugin**, please visit -[its README](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md). +[its README](https://github.com/tensorflow/tensorboard/blob/r1.15/tensorboard/plugins/debugger/README.md). Note: The TensorFlow debugger uses a [curses](https://en.wikipedia.org/wiki/Curses_\(programming_library\))-based text @@ -35,7 +35,7 @@ TensorFlow. Later sections of this document describe how to use **tfdbg** with higher-level APIs of TensorFlow, including `tf.estimator`, `tf.keras` / `keras` and `tf.contrib.slim`. To *observe* such an issue, run the following command without the debugger (the source code can be found -[here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/debug/examples/v1/debug_mnist.py)): +[here](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/debug/examples/v1/debug_mnist.py)):
     python -m tensorflow.python.debug.examples.v1.debug_mnist
    @@ -64,7 +64,7 @@ numeric problem first surfaced.
     To add support for tfdbg in our example, all that is needed is to add the
     following lines of code and wrap the Session object with a debugger wrapper.
     This code is already added in
    -[debug_mnist.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/debug/examples/v1/debug_mnist.py),
    +[debug_mnist.py](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/debug/examples/v1/debug_mnist.py),
     so you can activate tfdbg CLI with the `--debug` flag at the command line.
     
     ```python
    @@ -370,7 +370,7 @@ traceback of the node's construction.
     
     From the traceback, you can see that the op is constructed at the following
     line:
    -[`debug_mnist.py`](https://www.tensorflow.org/code/tensorflow/python/debug/examples/v1/debug_mnist.py):
    +[`debug_mnist.py`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/debug/examples/v1/debug_mnist.py):
     
     ```python
     diff = y_ * tf.log(y)
    @@ -457,7 +457,7 @@ accuracy_score = classifier.evaluate(eval_input_fn,
     predict_results = classifier.predict(predict_input_fn, hooks=hooks)
     ```
     
    -[debug_tflearn_iris.py](https://www.tensorflow.org/code/tensorflow/python/debug/examples/v1/debug_tflearn_iris.py),
    +[debug_tflearn_iris.py](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/debug/examples/v1/debug_tflearn_iris.py),
     contains a full example of how to use the tfdbg with `Estimator`s. To run this
     example, do:
     
    @@ -501,7 +501,7 @@ TensorFlow backend. You just need to replace `tf.keras.backend` with
     ## Debugging tf-slim with TFDBG
     
     TFDBG supports debugging of training and evaluation with
    -[tf-slim](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim).
    +[tf-slim](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/contrib/slim).
     As detailed below, training and evaluation require slightly different debugging
     workflows.
     
    @@ -605,7 +605,7 @@ The `watch_fn` argument accepts a `Callable` that allows you to configure what
     If your model code is written in C++ or other languages, you can also
     modify the `debug_options` field of `RunOptions` to generate debug dumps that
     can be inspected offline. See
    -[the proto definition](https://www.tensorflow.org/code/tensorflow/core/protobuf/debug.proto)
    +[the proto definition](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/protobuf/debug.proto)
     for more details.
     
     ### Debugging Remotely-Running Estimators
    @@ -648,7 +648,7 @@ python -m tensorflow.python.debug.cli.offline_analyzer \
            model, check out
     
        1. The profiling mode of tfdbg: `tfdbg> run -p`.
    -   2. [tfprof](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler)
    +   2. [tfprof](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/core/profiler)
           and other profiling tools for TensorFlow.
     
     **Q**: _How do I link tfdbg against my `Session` in Bazel? Why do I see an
    @@ -808,4 +808,4 @@ tensor dumps.
            and conditional breakpoints, and tying tensors to their
            graph-construction source code, all in the browser environment.
            To get started, please visit
    -       [its README](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md).
    +       [its README](https://github.com/tensorflow/tensorboard/blob/r1.15/tensorboard/plugins/debugger/README.md).
    diff --git a/site/en/r1/guide/distribute_strategy.ipynb b/site/en/r1/guide/distribute_strategy.ipynb
    index f6d85912e16..4dd502d331b 100644
    --- a/site/en/r1/guide/distribute_strategy.ipynb
    +++ b/site/en/r1/guide/distribute_strategy.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    @@ -118,7 +118,7 @@
             "## Types of strategies\n",
             "`tf.distribute.Strategy` intends to cover a number of use cases along different axes. Some of these combinations are currently supported and others will be added in the future. Some of these axes are:\n",
             "\n",
    -        "* Syncronous vs asynchronous training: These are two common ways of distributing training with data parallelism. In sync training, all workers train over different slices of input data in sync, and aggregating gradients at each step. In async training, all workers are independently training over the input data and updating variables asynchronously. Typically sync training is supported via all-reduce and async through parameter server architecture.\n",
    +        "* Synchronous vs asynchronous training: These are two common ways of distributing training with data parallelism. In sync training, all workers train over different slices of input data in sync, and aggregating gradients at each step. In async training, all workers are independently training over the input data and updating variables asynchronously. Typically sync training is supported via all-reduce and async through parameter server architecture.\n",
             "* Hardware platform: Users may want to scale their training onto multiple GPUs on one machine, or multiple machines in a network (with 0 or more GPUs each), or on Cloud TPUs.\n",
             "\n",
             "In order to support these use cases, we have 4 strategies available. In the next section we will talk about which of these are supported in which scenarios in TF."
    @@ -223,7 +223,7 @@
             "id": "KY1nJHNkMl7b"
           },
           "source": [
    -        "This will create a `CentralStorageStrategy` instance which will use all visible GPUs and CPU. Update to variables on replicas will be aggragated before being applied to variables."
    +        "This will create a `CentralStorageStrategy` instance which will use all visible GPUs and CPU. Update to variables on replicas will be aggregated before being applied to variables."
           ]
         },
         {
    @@ -245,7 +245,7 @@
             "\n",
             "`tf.distribute.experimental.MultiWorkerMirroredStrategy` is very similar to `MirroredStrategy`. It implements synchronous distributed training across multiple workers, each with potentially multiple GPUs. Similar to `MirroredStrategy`, it creates copies of all variables in the model on each device across all workers.\n",
             "\n",
    -        "It uses [CollectiveOps](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/collective_ops.py) as the multi-worker all-reduce communication method used to keep variables in sync. A collective op is a single op in the TensorFlow graph which can automatically choose an all-reduce algorithm in the TensorFlow runtime according to hardware, network topology and tensor sizes.\n",
    +        "It uses [CollectiveOps](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/ops/collective_ops.py) as the multi-worker all-reduce communication method used to keep variables in sync. A collective op is a single op in the TensorFlow graph which can automatically choose an all-reduce algorithm in the TensorFlow runtime according to hardware, network topology and tensor sizes.\n",
             "\n",
             "It also implements additional performance optimizations. For example, it includes a static optimization that converts multiple all-reductions on small tensors into fewer all-reductions on larger tensors. In addition, we are designing it to have a plugin architecture - so that in the future, users will be able to plugin algorithms that are better tuned for their hardware. Note that collective ops also implement other collective operations such as broadcast and all-gather.\n",
             "\n",
    @@ -371,7 +371,7 @@
             "id": "hQv1lm9UPDFy"
           },
           "source": [
    -        "So far we've talked about what are the different stategies available and how you can instantiate them. In the next few sections, we will talk about the different ways in which you can use them to distribute your training. We will show short code snippets in this guide and link off to full tutorials which you can run end to end."
    +        "So far we've talked about what are the different strategies available and how you can instantiate them. In the next few sections, we will talk about the different ways in which you can use them to distribute your training. We will show short code snippets in this guide and link off to full tutorials which you can run end to end."
           ]
         },
         {
    @@ -490,8 +490,8 @@
             "Here is a list of tutorials and examples that illustrate the above integration end to end with Keras:\n",
             "\n",
             "1. [Tutorial](../tutorials/distribute/keras.ipynb) to train MNIST with `MirroredStrategy`.\n",
    -        "2. Official [ResNet50](https://github.com/tensorflow/models/blob/master/official/vision/image_classification/resnet_imagenet_main.py) training with ImageNet data using `MirroredStrategy`.\n",
    -        "3. [ResNet50](https://github.com/tensorflow/tpu/blob/master/models/experimental/resnet50_keras/resnet50.py) trained with Imagenet data on Cloud TPus with `TPUStrategy`."
    +        "2. Official [ResNet50](https://github.com/tensorflow/models/blob/r1.15/official/vision/image_classification/resnet_imagenet_main.py) training with ImageNet data using `MirroredStrategy`.\n",
    +        "3. [ResNet50](https://github.com/tensorflow/tpu/blob/1.15/models/experimental/resnet50_keras/resnet50.py) trained with Imagenet data on Cloud TPus with `TPUStrategy`."
           ]
         },
         {
    @@ -595,9 +595,9 @@
             "### Examples and Tutorials\n",
             "Here are some examples that show end to end usage of various strategies with Estimator:\n",
             "\n",
    -        "1. [End to end example](https://github.com/tensorflow/ecosystem/tree/master/distribution_strategy) for multi worker training in tensorflow/ecosystem using Kuberentes templates. This example starts with a Keras model and converts it to an Estimator using the `tf.keras.estimator.model_to_estimator` API.\n",
    -        "2. Official [ResNet50](https://github.com/tensorflow/models/blob/master/official/r1/resnet/imagenet_main.py) model, which can be trained using either `MirroredStrategy` or `MultiWorkerMirroredStrategy`.\n",
    -        "3. [ResNet50](https://github.com/tensorflow/tpu/blob/master/models/experimental/distribution_strategy/resnet_estimator.py) example with TPUStrategy."
    +        "1. [End to end example](https://github.com/tensorflow/ecosystem/tree/r1.15/distribution_strategy) for multi worker training in tensorflow/ecosystem using Kuberentes templates. This example starts with a Keras model and converts it to an Estimator using the `tf.keras.estimator.model_to_estimator` API.\n",
    +        "2. Official [ResNet50](https://github.com/tensorflow/models/blob/r1.15/official/r1/resnet/imagenet_main.py) model, which can be trained using either `MirroredStrategy` or `MultiWorkerMirroredStrategy`.\n",
    +        "3. [ResNet50](https://github.com/tensorflow/tpu/blob/1.15/models/experimental/distribution_strategy/resnet_estimator.py) example with TPUStrategy."
           ]
         },
         {
    @@ -607,7 +607,7 @@
           },
           "source": [
             "## Using `tf.distribute.Strategy` with custom training loops\n",
    -        "As you've seen, using `tf.distrbute.Strategy` with high level APIs is only a couple lines of code change. With a little more effort, `tf.distrbute.Strategy` can also be used by other users who are not using these frameworks.\n",
    +        "As you've seen, using `tf.distribute.Strategy` with high level APIs is only a couple lines of code change. With a little more effort, `tf.distribute.Strategy` can also be used by other users who are not using these frameworks.\n",
             "\n",
             "TensorFlow is used for a wide variety of use cases and some users (such as researchers) require more flexibility and control over their training loops. This makes it hard for them to use the high level frameworks such as Estimator or Keras. For instance, someone using a GAN may want to take a different number of generator or discriminator steps each round. Similarly, the high level frameworks are not very suitable for Reinforcement Learning training. So these users will usually write their own training loops.\n",
             "\n",
    diff --git a/site/en/r1/guide/eager.ipynb b/site/en/r1/guide/eager.ipynb
    index 547e1b02977..f76acb4b702 100644
    --- a/site/en/r1/guide/eager.ipynb
    +++ b/site/en/r1/guide/eager.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    @@ -95,7 +95,7 @@
             "\n",
             "Eager execution supports most TensorFlow operations and GPU acceleration. For a\n",
             "collection of examples running in eager execution, see:\n",
    -        "[tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples).\n",
    +        "[tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/contrib/eager/python/examples).\n",
             "\n",
             "Note: Some models may experience increased overhead with eager execution\n",
             "enabled. Performance improvements are ongoing, but please\n",
    @@ -702,7 +702,7 @@
           },
           "outputs": [],
           "source": [
    -        "if tf.test.is_gpu_available():\n",
    +        "if tf.config.list_physical_devices('GPU'):\n",
             "  with tf.device(\"gpu:0\"):\n",
             "    v = tf.Variable(tf.random_normal([1000, 1000]))\n",
             "    v = None  # v no longer takes up GPU memory"
    @@ -1116,7 +1116,7 @@
             "  print(\"CPU: {} secs\".format(measure(tf.random_normal(shape), steps)))\n",
             "\n",
             "# Run on GPU, if available:\n",
    -        "if tf.test.is_gpu_available():\n",
    +        "if tf.config.list_physical_devices('GPU'):\n",
             "  with tf.device(\"/gpu:0\"):\n",
             "    print(\"GPU: {} secs\".format(measure(tf.random_normal(shape), steps)))\n",
             "else:\n",
    @@ -1141,7 +1141,7 @@
           },
           "outputs": [],
           "source": [
    -        "if tf.test.is_gpu_available():\n",
    +        "if tf.config.list_physical_devices('GPU'):\n",
             "  x = tf.random_normal([10, 10])\n",
             "\n",
             "  x_gpu0 = x.gpu()\n",
    @@ -1160,7 +1160,7 @@
             "### Benchmarks\n",
             "\n",
             "For compute-heavy models, such as\n",
    -        "[ResNet50](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples/resnet50)\n",
    +        "[ResNet50](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/contrib/eager/python/examples/resnet50)\n",
             "training on a GPU, eager execution performance is comparable to graph execution.\n",
             "But this gap grows larger for models with less computation and there is work to\n",
             "be done for optimizing hot code paths for models with lots of small operations."
    @@ -1225,7 +1225,7 @@
             "production deployment. Use `tf.train.Checkpoint` to save and restore model\n",
             "variables, this allows movement between eager and graph execution environments.\n",
             "See the examples in:\n",
    -        "[tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples).\n"
    +        "[tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/contrib/eager/python/examples).\n"
           ]
         },
         {
    diff --git a/site/en/r1/guide/extend/architecture.md b/site/en/r1/guide/extend/architecture.md
    index 1f2ac53066f..0753824e15e 100644
    --- a/site/en/r1/guide/extend/architecture.md
    +++ b/site/en/r1/guide/extend/architecture.md
    @@ -34,7 +34,7 @@ This document focuses on the following layers:
     *  **Client**:
        *  Defines the computation as a dataflow graph.
        *  Initiates graph execution using a [**session**](
    -      https://www.tensorflow.org/code/tensorflow/python/client/session.py).
    +      https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/client/session.py).
     *  **Distributed Master**
        *  Prunes a specific subgraph from the graph, as defined by the arguments
           to Session.run().
    @@ -144,8 +144,8 @@ The distributed master then ships the graph pieces to the distributed tasks.
     
     ### Code
     
    -*  [MasterService API definition](https://www.tensorflow.org/code/tensorflow/core/protobuf/master_service.proto)
    -*  [Master interface](https://www.tensorflow.org/code/tensorflow/core/distributed_runtime/master_interface.h)
    +*  [MasterService API definition](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/protobuf/master_service.proto)
    +*  [Master interface](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/distributed_runtime/master_interface.h)
     
     ## Worker Service
     
    @@ -178,7 +178,7 @@ For transfers between tasks, TensorFlow uses multiple protocols, including:
     
     We also have preliminary support for NVIDIA's NCCL library for multi-GPU
     communication, see:
    -[`tf.contrib.nccl`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/nccl_ops.py).
    +[`tf.contrib.nccl`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/ops/nccl_ops.py).
     
     Partitioned Graph
     
    @@ -186,9 +186,9 @@ communication, see:
     
     ### Code
     
    -*   [WorkerService API definition](https://www.tensorflow.org/code/tensorflow/core/protobuf/worker_service.proto)
    -*   [Worker interface](https://www.tensorflow.org/code/tensorflow/core/distributed_runtime/worker_interface.h)
    -*   [Remote rendezvous (for Send and Recv implementations)](https://www.tensorflow.org/code/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.h)
    +*   [WorkerService API definition](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/protobuf/worker_service.proto)
    +*   [Worker interface](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/distributed_runtime/worker_interface.h)
    +*   [Remote rendezvous (for Send and Recv implementations)](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.h)
     
     ## Kernel Implementations
     
    @@ -199,7 +199,7 @@ Many of the operation kernels are implemented using Eigen::Tensor, which uses
     C++ templates to generate efficient parallel code for multicore CPUs and GPUs;
     however, we liberally use libraries like cuDNN where a more efficient kernel
     implementation is possible. We have also implemented
    -[quantization](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/performance/post_training_quantization.md), which enables
    +[quantization](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/lite/g3doc/performance/post_training_quantization.md), which enables
     faster inference in environments such as mobile devices and high-throughput
     datacenter applications, and use the
     [gemmlowp](https://github.com/google/gemmlowp) low-precision matrix library to
    @@ -215,4 +215,4 @@ experimental implementation of automatic kernel fusion.
     
     ### Code
     
    -*   [`OpKernel` interface](https://www.tensorflow.org/code/tensorflow/core/framework/op_kernel.h)
    +*   [`OpKernel` interface](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op_kernel.h)
    diff --git a/site/en/r1/guide/extend/bindings.md b/site/en/r1/guide/extend/bindings.md
    index 9c10e90840f..7daa2212106 100644
    --- a/site/en/r1/guide/extend/bindings.md
    +++ b/site/en/r1/guide/extend/bindings.md
    @@ -112,11 +112,11 @@ There are a few ways to get a list of the `OpDef`s for the registered ops:
         to interpret the `OpDef` messages.
     -   The C++ function `OpRegistry::Global()->GetRegisteredOps()` returns the same
         list of all registered `OpDef`s (defined in
    -    [`tensorflow/core/framework/op.h`](https://www.tensorflow.org/code/tensorflow/core/framework/op.h)). This can be used to write the generator
    +    [`tensorflow/core/framework/op.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op.h)). This can be used to write the generator
         in C++ (particularly useful for languages that do not have protocol buffer
         support).
     -   The ASCII-serialized version of that list is periodically checked in to
    -    [`tensorflow/core/ops/ops.pbtxt`](https://www.tensorflow.org/code/tensorflow/core/ops/ops.pbtxt) by an automated process.
    +    [`tensorflow/core/ops/ops.pbtxt`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/ops/ops.pbtxt) by an automated process.
     
     The `OpDef` specifies the following:
     
    @@ -159,7 +159,7 @@ between the generated code and the `OpDef`s checked into the repository, but is
     useful for languages where code is expected to be generated ahead of time like
     `go get` for Go and `cargo ops` for Rust. At the other end of the spectrum, for
     some languages the code could be generated dynamically from
    -[`tensorflow/core/ops/ops.pbtxt`](https://www.tensorflow.org/code/tensorflow/core/ops/ops.pbtxt).
    +[`tensorflow/core/ops/ops.pbtxt`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/ops/ops.pbtxt).
     
     #### Handling Constants
     
    @@ -228,4 +228,4 @@ At this time, support for gradients, functions and control flow operations ("if"
     and "while") is not available in languages other than Python. This will be
     updated when the [C API] provides necessary support.
     
    -[C API]: https://www.tensorflow.org/code/tensorflow/c/c_api.h
    +[C API]: https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/c/c_api.h
    diff --git a/site/en/r1/guide/extend/filesystem.md b/site/en/r1/guide/extend/filesystem.md
    index 4d34c07102e..2d6ea0c4645 100644
    --- a/site/en/r1/guide/extend/filesystem.md
    +++ b/site/en/r1/guide/extend/filesystem.md
    @@ -54,7 +54,7 @@ To implement a custom filesystem plugin, you must do the following:
     ### The FileSystem interface
     
     The `FileSystem` interface is an abstract C++ interface defined in
    -[file_system.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/file_system.h).
    +[file_system.h](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/file_system.h).
     An implementation of the `FileSystem` interface should implement all relevant
     the methods defined by the interface. Implementing the interface requires
     defining operations such as creating `RandomAccessFile`, `WritableFile`, and
    @@ -70,26 +70,26 @@ involves calling `stat()` on the file and then returns the filesize as reported
     by the return of the stat object. Similarly, for the `HDFSFileSystem`
     implementation, these calls simply delegate to the `libHDFS` implementation of
     similar functionality, such as `hdfsDelete` for
    -[DeleteFile](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/hadoop/hadoop_file_system.cc#L386).
    +[DeleteFile](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/hadoop/hadoop_file_system.cc#L386).
     
     We suggest looking through these code examples to get an idea of how different
     filesystem implementations call their existing libraries. Examples include:
     
     *   [POSIX
    -    plugin](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/posix/posix_file_system.h)
    +    plugin](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/posix/posix_file_system.h)
     *   [HDFS
    -    plugin](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/hadoop/hadoop_file_system.h)
    +    plugin](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/hadoop/hadoop_file_system.h)
     *   [GCS
    -    plugin](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/cloud/gcs_file_system.h)
    +    plugin](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/cloud/gcs_file_system.h)
     *   [S3
    -    plugin](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/s3/s3_file_system.h)
    +    plugin](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/s3/s3_file_system.h)
     
     #### The File interfaces
     
     Beyond operations that allow you to query and manipulate files and directories
     in a filesystem, the `FileSystem` interface requires you to implement factories
     that return implementations of abstract objects such as the
    -[RandomAccessFile](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/file_system.h#L223),
    +[RandomAccessFile](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/file_system.h#L223),
     the `WritableFile`, so that TensorFlow code and read and write to files in that
     `FileSystem` implementation.
     
    @@ -224,7 +224,7 @@ it will use the `FooBarFileSystem` implementation.
     
     Next, you must build a shared object containing this implementation. An example
     of doing so using bazel's `cc_binary` rule can be found
    -[here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/BUILD#L244),
    +[here](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/BUILD#L244),
     but you may use any build system to do so. See the section on [building the op library](../extend/op.md#build_the_op_library) for similar
     instructions.
     
    @@ -236,7 +236,7 @@ passing the path to the shared object. Calling this in your client program loads
     the shared object in the process, thus registering your implementation as
     available for any file operations going through the `FileSystem` interface. You
     can see
    -[test_file_system.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/framework/file_system_test.py)
    +[test_file_system.py](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/framework/file_system_test.py)
     for an example.
     
     ## What goes through this interface?
    diff --git a/site/en/r1/guide/extend/formats.md b/site/en/r1/guide/extend/formats.md
    index 3b7b4aafbd6..bdebee5487d 100644
    --- a/site/en/r1/guide/extend/formats.md
    +++ b/site/en/r1/guide/extend/formats.md
    @@ -28,11 +28,11 @@ individual records in a file. There are several examples of "reader" datasets
     that are already built into TensorFlow:
     
     *   `tf.data.TFRecordDataset`
    -    ([source in `kernels/data/reader_dataset_ops.cc`](https://www.tensorflow.org/code/tensorflow/core/kernels/data/reader_dataset_ops.cc))
    +    ([source in `kernels/data/reader_dataset_ops.cc`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/data/reader_dataset_ops.cc))
     *   `tf.data.FixedLengthRecordDataset`
    -    ([source in `kernels/data/reader_dataset_ops.cc`](https://www.tensorflow.org/code/tensorflow/core/kernels/data/reader_dataset_ops.cc))
    +    ([source in `kernels/data/reader_dataset_ops.cc`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/data/reader_dataset_ops.cc))
     *   `tf.data.TextLineDataset`
    -    ([source in `kernels/data/reader_dataset_ops.cc`](https://www.tensorflow.org/code/tensorflow/core/kernels/data/reader_dataset_ops.cc))
    +    ([source in `kernels/data/reader_dataset_ops.cc`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/data/reader_dataset_ops.cc))
     
     Each of these implementations comprises three related classes:
     
    @@ -279,7 +279,7 @@ if __name__ == "__main__":
     ```
     
     You can see some examples of `Dataset` wrapper classes in
    -[`tensorflow/python/data/ops/dataset_ops.py`](https://www.tensorflow.org/code/tensorflow/python/data/ops/dataset_ops.py).
    +[`tensorflow/python/data/ops/dataset_ops.py`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/data/ops/dataset_ops.py).
     
     ## Writing an Op for a record format
     
    @@ -297,7 +297,7 @@ Examples of Ops useful for decoding records:
     
     Note that it can be useful to use multiple Ops to decode a particular record
     format.  For example, you may have an image saved as a string in
    -[a `tf.train.Example` protocol buffer](https://www.tensorflow.org/code/tensorflow/core/example/example.proto).
    +[a `tf.train.Example` protocol buffer](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/example/example.proto).
     Depending on the format of that image, you might take the corresponding output
     from a `tf.parse_single_example` op and call `tf.image.decode_jpeg`,
     `tf.image.decode_png`, or `tf.decode_raw`.  It is common to take the output
    diff --git a/site/en/r1/guide/extend/model_files.md b/site/en/r1/guide/extend/model_files.md
    index 30e73a5169e..e590fcf1f27 100644
    --- a/site/en/r1/guide/extend/model_files.md
    +++ b/site/en/r1/guide/extend/model_files.md
    @@ -28,7 +28,7 @@ by calling `as_graph_def()`, which returns a `GraphDef` object.
     
     The GraphDef class is an object created by the ProtoBuf library from the
     definition in
    -[tensorflow/core/framework/graph.proto](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/graph.proto). The protobuf tools parse
    +[tensorflow/core/framework/graph.proto](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/graph.proto). The protobuf tools parse
     this text file, and generate the code to load, store, and manipulate graph
     definitions. If you see a standalone TensorFlow file representing a model, it's
     likely to contain a serialized version of one of these `GraphDef` objects
    @@ -87,7 +87,7 @@ for node in graph_def.node
     ```
     
     Each node is a `NodeDef` object, defined in
    -[tensorflow/core/framework/node_def.proto](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/node_def.proto). These
    +[tensorflow/core/framework/node_def.proto](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/node_def.proto). These
     are the fundamental building blocks of TensorFlow graphs, with each one defining
     a single operation along with its input connections. Here are the members of a
     `NodeDef`, and what they mean.
    @@ -107,7 +107,7 @@ This defines what operation to run, for example `"Add"`, `"MatMul"`, or
     `"Conv2D"`. When a graph is run, this op name is looked up in a registry to
     find an implementation. The registry is populated by calls to the
     `REGISTER_OP()` macro, like those in
    -[tensorflow/core/ops/nn_ops.cc](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/nn_ops.cc).
    +[tensorflow/core/ops/nn_ops.cc](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/ops/nn_ops.cc).
     
     ### `input`
     
    @@ -133,7 +133,7 @@ size of filters for convolutions, or the values of constant ops. Because there
     can be so many different types of attribute values, from strings, to ints, to
     arrays of tensor values, there's a separate protobuf file defining the data
     structure that holds them, in
    -[tensorflow/core/framework/attr_value.proto](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/attr_value.proto).
    +[tensorflow/core/framework/attr_value.proto](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/attr_value.proto).
     
     Each attribute has a unique name string, and the expected attributes are listed
     when the operation is defined. If an attribute isn't present in a node, but it
    @@ -151,7 +151,7 @@ the file format during training. Instead, they're held in separate checkpoint
     files, and there are `Variable` ops in the graph that load the latest values
     when they're initialized. It's often not very convenient to have separate files
     when you're deploying to production, so there's the
    -[freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py) script that takes a graph definition and a set
    +[freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/tools/freeze_graph.py) script that takes a graph definition and a set
     of checkpoints and freezes them together into a single file.
     
     What this does is load the `GraphDef`, pull in the values for all the variables
    @@ -167,7 +167,7 @@ the most common problems is extracting and interpreting the weight values. A
     common way to store them, for example in graphs created by the freeze_graph
     script, is as `Const` ops containing the weights as `Tensors`. These are
     defined in
    -[tensorflow/core/framework/tensor.proto](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto), and contain information
    +[tensorflow/core/framework/tensor.proto](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/tensor.proto), and contain information
     about the size and type of the data, as well as the values themselves. In
     Python, you get a `TensorProto` object from a `NodeDef` representing a `Const`
     op by calling something like `some_node_def.attr['value'].tensor`.
    diff --git a/site/en/r1/guide/extend/op.md b/site/en/r1/guide/extend/op.md
    index d006a6251d0..186d9c28c04 100644
    --- a/site/en/r1/guide/extend/op.md
    +++ b/site/en/r1/guide/extend/op.md
    @@ -47,7 +47,7 @@ To incorporate your custom op you'll need to:
         test the op in C++. If you define gradients, you can verify them with the
         Python `tf.test.compute_gradient_error`.
         See
    -    [`relu_op_test.py`](https://www.tensorflow.org/code/tensorflow/python/kernel_tests/relu_op_test.py) as
    +    [`relu_op_test.py`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/kernel_tests/relu_op_test.py) as
         an example that tests the forward functions of Relu-like operators and
         their gradients.
     
    @@ -155,17 +155,17 @@ REGISTER_KERNEL_BUILDER(Name("ZeroOut").Device(DEVICE_CPU), ZeroOutOp);
     >   Important: Instances of your OpKernel may be accessed concurrently.
     >   Your `Compute` method must be thread-safe. Guard any access to class
     >   members with a mutex. Or better yet, don't share state via class members!
    ->   Consider using a [`ResourceMgr`](https://www.tensorflow.org/code/tensorflow/core/framework/resource_mgr.h)
    +>   Consider using a [`ResourceMgr`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/resource_mgr.h)
     >   to keep track of op state.
     
     ### Multi-threaded CPU kernels
     
     To write a multi-threaded CPU kernel, the Shard function in
    -[`work_sharder.h`](https://www.tensorflow.org/code/tensorflow/core/util/work_sharder.h)
    +[`work_sharder.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/util/work_sharder.h)
     can be used. This function shards a computation function across the
     threads configured to be used for intra-op threading (see
     intra_op_parallelism_threads in
    -[`config.proto`](https://www.tensorflow.org/code/tensorflow/core/protobuf/config.proto)).
    +[`config.proto`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/protobuf/config.proto)).
     
     ### GPU kernels
     
    @@ -348,12 +348,13 @@ g++ -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC ${TF_CFLAGS[@]} ${TF_LFL
     On macOS, the additional flag "-undefined dynamic_lookup" is required when
     building the `.so` file.
     
    ->   Note on `gcc` version `>=5`: gcc uses the new C++
    ->   [ABI](https://gcc.gnu.org/gcc-5/changes.html#libstdcxx) since version `5`. The binary pip
    ->   packages available on the TensorFlow website are built with `gcc4` that uses
    ->   the older ABI. If you compile your op library with `gcc>=5`, add
    ->   `-D_GLIBCXX_USE_CXX11_ABI=0` to the command line to make the library
    ->   compatible with the older abi.
    +> Note on `gcc` version `>=5`: gcc uses the new C++
    +> [ABI](https://gcc.gnu.org/gcc-5/changes.html#libstdcxx) since version `5`.
    +> TensorFlow 2.8 and earlier were built with `gcc4` that uses the older ABI. If
    +> you are using these versions of TensorFlow and are trying to compile your op
    +> library with `gcc>=5`, add `-D_GLIBCXX_USE_CXX11_ABI=0` to the command line to
    +> make the library compatible with the older ABI. TensorFlow 2.9+ packages are
    +> compatible with the newer ABI by default.
     
     ### Compile the op using bazel (TensorFlow source installation)
     
    @@ -485,13 +486,13 @@ This asserts that the input is a vector, and returns having set the
     
     *   The `context`, which can either be an `OpKernelContext` or
         `OpKernelConstruction` pointer (see
    -    [`tensorflow/core/framework/op_kernel.h`](https://www.tensorflow.org/code/tensorflow/core/framework/op_kernel.h)),
    +    [`tensorflow/core/framework/op_kernel.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op_kernel.h)),
         for its `SetStatus()` method.
     *   The condition.  For example, there are functions for validating the shape
         of a tensor in
    -    [`tensorflow/core/framework/tensor_shape.h`](https://www.tensorflow.org/code/tensorflow/core/framework/tensor_shape.h)
    +    [`tensorflow/core/framework/tensor_shape.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/tensor_shape.h)
     *   The error itself, which is represented by a `Status` object, see
    -    [`tensorflow/core/lib/core/status.h`](https://www.tensorflow.org/code/tensorflow/core/lib/core/status.h). A
    +    [`tensorflow/core/lib/core/status.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/lib/core/status.h). A
         `Status` has both a type (frequently `InvalidArgument`, but see the list of
         types) and a message.  Functions for constructing an error may be found in
         [`tensorflow/core/lib/core/errors.h`][validation-macros].
    @@ -632,7 +633,7 @@ define an attr with constraints, you can use the following ``s:
     
         The specific lists of types allowed by these are defined by the functions
         (like `NumberTypes()`) in
    -    [`tensorflow/core/framework/types.h`](https://www.tensorflow.org/code/tensorflow/core/framework/types.h).
    +    [`tensorflow/core/framework/types.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/types.h).
         In this example the attr `t` must be one of the numeric types:
     
         ```c++
    @@ -1179,7 +1180,7 @@ There are several ways to preserve backwards-compatibility.
        type into a list of varying types).
     
     The full list of safe and unsafe changes can be found in
    -[`tensorflow/core/framework/op_compatibility_test.cc`](https://www.tensorflow.org/code/tensorflow/core/framework/op_compatibility_test.cc).
    +[`tensorflow/core/framework/op_compatibility_test.cc`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op_compatibility_test.cc).
     If you cannot make your change to an operation backwards compatible, then create
     a new operation with a new name with the new semantics.
     
    @@ -1189,23 +1190,23 @@ callers.  The Python API may be kept compatible by careful changes in a
     hand-written Python wrapper, by keeping the old signature except possibly adding
     new optional arguments to the end.  Generally incompatible changes may only be
     made when TensorFlow changes major versions, and must conform to the
    -[`GraphDef` version semantics](../guide/version_compat.md#compatibility_of_graphs_and_checkpoints).
    +[`GraphDef` version semantics](../version_compat.md).
     
     ### GPU Support
     
     You can implement different OpKernels and register one for CPU and another for
     GPU, just like you can [register kernels for different types](#polymorphism).
     There are several examples of kernels with GPU support in
    -[`tensorflow/core/kernels/`](https://www.tensorflow.org/code/tensorflow/core/kernels/).
    +[`tensorflow/core/kernels/`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/).
     Notice some kernels have a CPU version in a `.cc` file, a GPU version in a file
     ending in `_gpu.cu.cc`, and some code shared in common in a `.h` file.
     
     For example, the `tf.pad` has
     everything but the GPU kernel in [`tensorflow/core/kernels/pad_op.cc`][pad_op].
     The GPU kernel is in
    -[`tensorflow/core/kernels/pad_op_gpu.cu.cc`](https://www.tensorflow.org/code/tensorflow/core/kernels/pad_op_gpu.cu.cc),
    +[`tensorflow/core/kernels/pad_op_gpu.cu.cc`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/pad_op_gpu.cu.cc),
     and the shared code is a templated class defined in
    -[`tensorflow/core/kernels/pad_op.h`](https://www.tensorflow.org/code/tensorflow/core/kernels/pad_op.h).
    +[`tensorflow/core/kernels/pad_op.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/pad_op.h).
     We organize the code this way for two reasons: it allows you to share common
     code among the CPU and GPU implementations, and it puts the GPU implementation
     into a separate file so that it can be compiled only by the GPU compiler.
    @@ -1226,16 +1227,16 @@ kept on the CPU, add a `HostMemory()` call to the kernel registration, e.g.:
     #### Compiling the kernel for the GPU device
     
     Look at
    -[cuda_op_kernel.cu.cc](https://www.tensorflow.org/code/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc)
    +[cuda_op_kernel.cu.cc](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc)
     for an example that uses a CUDA kernel to implement an op. The
     `tf_custom_op_library` accepts a `gpu_srcs` argument in which the list of source
     files containing the CUDA kernels (`*.cu.cc` files) can be specified. For use
     with a binary installation of TensorFlow, the CUDA kernels have to be compiled
     with NVIDIA's `nvcc` compiler. Here is the sequence of commands you can use to
     compile the
    -[cuda_op_kernel.cu.cc](https://www.tensorflow.org/code/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc)
    +[cuda_op_kernel.cu.cc](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc)
     and
    -[cuda_op_kernel.cc](https://www.tensorflow.org/code/tensorflow/examples/adding_an_op/cuda_op_kernel.cc)
    +[cuda_op_kernel.cc](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/adding_an_op/cuda_op_kernel.cc)
     into a single dynamically loadable library:
     
     ```bash
    @@ -1360,7 +1361,7 @@ be set to the first input's shape. If the output is selected by its index as in
     
     There are a number of common shape functions
     that apply to many ops, such as `shape_inference::UnchangedShape` which can be
    -found in [common_shape_fns.h](https://www.tensorflow.org/code/tensorflow/core/framework/common_shape_fns.h) and used as follows:
    +found in [common_shape_fns.h](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/common_shape_fns.h) and used as follows:
     
     ```c++
     REGISTER_OP("ZeroOut")
    @@ -1407,7 +1408,7 @@ provides access to the attributes of the op).
     
     Since shape inference is an optional feature, and the shapes of tensors may vary
     dynamically, shape functions must be robust to incomplete shape information for
    -any of the inputs. The `Merge` method in [`InferenceContext`](https://www.tensorflow.org/code/tensorflow/core/framework/shape_inference.h)
    +any of the inputs. The `Merge` method in [`InferenceContext`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/shape_inference.h)
     allows the caller to assert that two shapes are the same, even if either
     or both of them do not have complete information. Shape functions are defined
     for all of the core TensorFlow ops and provide many different usage examples.
    @@ -1432,7 +1433,7 @@ If you have a complicated shape function, you should consider adding a test for
     validating that various input shape combinations produce the expected output
     shape combinations.  You can see examples of how to write these tests in some
     our
    -[core ops tests](https://www.tensorflow.org/code/tensorflow/core/ops/array_ops_test.cc).
    +[core ops tests](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/ops/array_ops_test.cc).
     (The syntax of `INFER_OK` and `INFER_ERROR` are a little cryptic, but try to be
     compact in representing input and output shape specifications in tests.  For
     now, see the surrounding comments in those tests to get a sense of the shape
    @@ -1445,20 +1446,20 @@ To build a `pip` package for your op, see the
     guide shows how to build custom ops from the TensorFlow pip package instead
     of building TensorFlow from source.
     
    -[core-array_ops]:https://www.tensorflow.org/code/tensorflow/core/ops/array_ops.cc
    -[python-user_ops]:https://www.tensorflow.org/code/tensorflow/python/user_ops/user_ops.py
    -[tf-kernels]:https://www.tensorflow.org/code/tensorflow/core/kernels/
    -[user_ops]:https://www.tensorflow.org/code/tensorflow/core/user_ops/
    -[pad_op]:https://www.tensorflow.org/code/tensorflow/core/kernels/pad_op.cc
    -[standard_ops-py]:https://www.tensorflow.org/code/tensorflow/python/ops/standard_ops.py
    -[standard_ops-cc]:https://www.tensorflow.org/code/tensorflow/cc/ops/standard_ops.h
    -[python-BUILD]:https://www.tensorflow.org/code/tensorflow/python/BUILD
    -[validation-macros]:https://www.tensorflow.org/code/tensorflow/core/lib/core/errors.h
    -[op_def_builder]:https://www.tensorflow.org/code/tensorflow/core/framework/op_def_builder.h
    -[register_types]:https://www.tensorflow.org/code/tensorflow/core/framework/register_types.h
    -[FinalizeAttr]:https://www.tensorflow.org/code/tensorflow/core/framework/op_def_builder.cc
    -[DataTypeString]:https://www.tensorflow.org/code/tensorflow/core/framework/types.cc
    -[python-BUILD]:https://www.tensorflow.org/code/tensorflow/python/BUILD
    -[types-proto]:https://www.tensorflow.org/code/tensorflow/core/framework/types.proto
    -[TensorShapeProto]:https://www.tensorflow.org/code/tensorflow/core/framework/tensor_shape.proto
    -[TensorProto]:https://www.tensorflow.org/code/tensorflow/core/framework/tensor.proto
    +[core-array_ops]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/ops/array_ops.cc
    +[python-user_ops]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/user_ops/user_ops.py
    +[tf-kernels]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/
    +[user_ops]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/user_ops/
    +[pad_op]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/pad_op.cc
    +[standard_ops-py]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/ops/standard_ops.py
    +[standard_ops-cc]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/cc/ops/standard_ops.h
    +[python-BUILD]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/BUILD
    +[validation-macros]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/lib/core/errors.h
    +[op_def_builder]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op_def_builder.h
    +[register_types]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/register_types.h
    +[FinalizeAttr]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op_def_builder.cc
    +[DataTypeString]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/types.cc
    +[python-BUILD]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/BUILD
    +[types-proto]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/types.proto
    +[TensorShapeProto]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/tensor_shape.proto
    +[TensorProto]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/tensor.proto
    diff --git a/site/en/r1/guide/feature_columns.md b/site/en/r1/guide/feature_columns.md
    index 5a4dfbbf46d..e4259f85e9f 100644
    --- a/site/en/r1/guide/feature_columns.md
    +++ b/site/en/r1/guide/feature_columns.md
    @@ -562,7 +562,7 @@ For more examples on feature columns, view the following:
     
     * The [Low Level Introduction](../guide/low_level_intro.md#feature_columns) demonstrates how
       experiment directly with `feature_columns` using TensorFlow's low level APIs.
    -* The [Estimator wide and deep learning tutorial](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep)
    +* The [Estimator wide and deep learning tutorial](https://github.com/tensorflow/models/tree/r1.15/official/r1/wide_deep)
       solves a binary classification problem using `feature_columns` on a variety of
       input data types.
     
    diff --git a/site/en/r1/guide/graph_viz.md b/site/en/r1/guide/graph_viz.md
    index 1965378e03e..1e3780e7928 100644
    --- a/site/en/r1/guide/graph_viz.md
    +++ b/site/en/r1/guide/graph_viz.md
    @@ -251,7 +251,7 @@ is a snippet from the train and test section of a modification of the
     [Estimators MNIST tutorial](../tutorials/estimators/cnn.md), in which we have
     recorded summaries and
     runtime statistics. See the
    -[Tensorboard](https://tensorflow.org/tensorboard)
    +[TensorBoard](https://tensorflow.org/tensorboard)
     for details on how to record summaries.
     Full source is [here](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py).
     
    diff --git a/site/en/r1/guide/keras.ipynb b/site/en/r1/guide/keras.ipynb
    index 08a778b60a5..3a0cd8e55c5 100644
    --- a/site/en/r1/guide/keras.ipynb
    +++ b/site/en/r1/guide/keras.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    @@ -1211,8 +1211,7 @@
         "colab": {
           "collapsed_sections": [],
           "name": "keras.ipynb",
    -      "provenance": [],
    -      "toc_visible": true
    +            "toc_visible": true
         },
         "kernelspec": {
           "display_name": "Python 3",
    diff --git a/site/en/r1/guide/performance/benchmarks.md b/site/en/r1/guide/performance/benchmarks.md
    index 8998c0723db..a56959ea416 100644
    --- a/site/en/r1/guide/performance/benchmarks.md
    +++ b/site/en/r1/guide/performance/benchmarks.md
    @@ -401,7 +401,7 @@ GPUs | InceptionV3 (batch size 32) | ResNet-50 (batch size 32)
     ## Methodology
     
     This
    -[script](https://github.com/tensorflow/benchmarks/tree/master/scripts/tf_cnn_benchmarks)
    +[script](https://github.com/tensorflow/benchmarks/tree/r1.15/scripts/tf_cnn_benchmarks)
     was run on the various platforms to generate the above results.
     
     In order to create results that are as repeatable as possible, each test was run
    diff --git a/site/en/r1/guide/performance/overview.md b/site/en/r1/guide/performance/overview.md
    index af74f0f28c6..be7217f4b99 100644
    --- a/site/en/r1/guide/performance/overview.md
    +++ b/site/en/r1/guide/performance/overview.md
    @@ -19,9 +19,9 @@ Reading large numbers of small files significantly impacts I/O performance.
     One approach to get maximum I/O throughput is to preprocess input data into
     larger (~100MB) `TFRecord` files. For smaller data sets (200MB-1GB), the best
     approach is often to load the entire data set into memory. The document
    -[Downloading and converting to TFRecord format](https://github.com/tensorflow/models/tree/master/research/slim#downloading-and-converting-to-tfrecord-format)
    +[Downloading and converting to TFRecord format](https://github.com/tensorflow/models/tree/r1.15/research/slim#downloading-and-converting-to-tfrecord-format)
     includes information and scripts for creating `TFRecord`s, and this
    -[script](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10_estimator/generate_cifar10_tfrecords.py)
    +[script](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10_estimator/generate_cifar10_tfrecords.py)
     converts the CIFAR-10 dataset into `TFRecord`s.
     
     While feeding data using a `feed_dict` offers a high level of flexibility, in
    @@ -122,7 +122,7 @@ tf.Session(config=config)
     Intel® has added optimizations to TensorFlow for Intel® Xeon® and Intel® Xeon
     Phi™ through the use of the Intel® Math Kernel Library for Deep Neural Networks
     (Intel® MKL-DNN) optimized primitives. The optimizations also provide speedups
    -for the consumer line of processors, e.g. i5 and i7 Intel processors. The Intel
    +for the consumer line of processors, e.g., i5 and i7 Intel processors. The Intel
     published paper
     [TensorFlow* Optimizations on Modern Intel® Architecture](https://software.intel.com/en-us/articles/tensorflow-optimizations-on-modern-intel-architecture)
     contains additional details on the implementation.
    @@ -255,7 +255,7 @@ bazel build -c opt --copt=-march="broadwell" --config=cuda //tensorflow/tools/pi
       a docker container, the data is not cached and the penalty is paid each time
       TensorFlow starts. The best practice is to include the
       [compute capabilities](http://developer.nvidia.com/cuda-gpus)
    -  of the GPUs that will be used, e.g. P100: 6.0, Titan X (Pascal): 6.1,
    +  of the GPUs that will be used, e.g., P100: 6.0, Titan X (Pascal): 6.1,
       Titan X (Maxwell): 5.2, and K80: 3.7.
     * Use a version of `gcc` that supports all of the optimizations of the target
       CPU. The recommended minimum gcc version is 4.8.3. On macOS, upgrade to the
    diff --git a/site/en/r1/guide/ragged_tensors.ipynb b/site/en/r1/guide/ragged_tensors.ipynb
    index 61bce66ecfb..289d29ce82e 100644
    --- a/site/en/r1/guide/ragged_tensors.ipynb
    +++ b/site/en/r1/guide/ragged_tensors.ipynb
    @@ -57,7 +57,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    @@ -1010,7 +1010,7 @@
             "    `tf.RaggedTensor.values`\n",
             "    and\n",
             "    `tf.RaggedTensor.row_splits`\n",
    -        "    properties, or row-paritioning methods such as `tf.RaggedTensor.row_lengths()`\n",
    +        "    properties, or row-partitioning methods such as `tf.RaggedTensor.row_lengths()`\n",
             "    and `tf.RaggedTensor.value_rowids()`."
           ]
         },
    diff --git a/site/en/r1/guide/saved_model.md b/site/en/r1/guide/saved_model.md
    index 623863a9df9..34447ffe861 100644
    --- a/site/en/r1/guide/saved_model.md
    +++ b/site/en/r1/guide/saved_model.md
    @@ -23,7 +23,7 @@ TensorFlow saves variables in binary *checkpoint files* that map variable
     names to tensor values.
     
     Caution: TensorFlow model files are code. Be careful with untrusted code.
    -See [Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md)
    +See [Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/r1.15/SECURITY.md)
     for details.
     
     ### Save variables
    @@ -148,7 +148,7 @@ Notes:
        `tf.variables_initializer` for more information.
     
     *  To inspect the variables in a checkpoint, you can use the
    -   [`inspect_checkpoint`](https://www.tensorflow.org/code/tensorflow/python/tools/inspect_checkpoint.py)
    +   [`inspect_checkpoint`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/tools/inspect_checkpoint.py)
        library, particularly the `print_tensors_in_checkpoint_file` function.
     
     *  By default, `Saver` uses the value of the `tf.Variable.name` property
    @@ -159,7 +159,7 @@ Notes:
     ### Inspect variables in a checkpoint
     
     We can quickly inspect variables in a checkpoint with the
    -[`inspect_checkpoint`](https://www.tensorflow.org/code/tensorflow/python/tools/inspect_checkpoint.py) library.
    +[`inspect_checkpoint`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/tools/inspect_checkpoint.py) library.
     
     Continuing from the save/restore examples shown earlier:
     
    @@ -216,7 +216,7 @@ simple_save(session,
     
     This configures the `SavedModel` so it can be loaded by
     [TensorFlow serving](https://www.tensorflow.org/tfx/tutorials/serving/rest_simple) and supports the
    -[Predict API](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/predict.proto).
    +[Predict API](https://github.com/tensorflow/serving/blob/r1.15/tensorflow_serving/apis/predict.proto).
     To access the classify, regress, or multi-inference APIs, use the manual
     `SavedModel` builder APIs or an `tf.estimator.Estimator`.
     
    @@ -328,7 +328,7 @@ with tf.Session(graph=tf.Graph()) as sess:
     ### Load a SavedModel in C++
     
     The C++ version of the SavedModel
    -[loader](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/saved_model/loader.h)
    +[loader](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/cc/saved_model/loader.h)
     provides an API to load a SavedModel from a path, while allowing
     `SessionOptions` and `RunOptions`.
     You have to specify the tags associated with the graph to be loaded.
    @@ -383,20 +383,20 @@ reuse and share across tools consistently.
     You may use sets of tags to uniquely identify a `MetaGraphDef` saved in a
     SavedModel. A subset of commonly used tags is specified in:
     
    -* [Python](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/tag_constants.py)
    -* [C++](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/saved_model/tag_constants.h)
    +* [Python](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/saved_model/tag_constants.py)
    +* [C++](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/cc/saved_model/tag_constants.h)
     
     
     #### Standard SignatureDef constants
     
    -A [**SignatureDef**](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/meta_graph.proto)
    +A [**SignatureDef**](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/protobuf/meta_graph.proto)
     is a protocol buffer that defines the signature of a computation
     supported by a graph.
     Commonly used input keys, output keys, and method names are
     defined in:
     
    -* [Python](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/signature_constants.py)
    -* [C++](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/saved_model/signature_constants.h)
    +* [Python](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/saved_model/signature_constants.py)
    +* [C++](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/cc/saved_model/signature_constants.h)
     
     ## Using SavedModel with Estimators
     
    @@ -408,7 +408,7 @@ To prepare a trained Estimator for serving, you must export it in the standard
     SavedModel format. This section explains how to:
     
     * Specify the output nodes and the corresponding
    -  [APIs](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/prediction_service.proto)
    +  [APIs](https://github.com/tensorflow/serving/blob/r1.15/tensorflow_serving/apis/prediction_service.proto)
       that can be served (Classify, Regress, or Predict).
     * Export your model to the SavedModel format.
     * Serve the model from a local server and request predictions.
    @@ -506,7 +506,7 @@ Each `output` value must be an `ExportOutput` object  such as
     `tf.estimator.export.PredictOutput`.
     
     These output types map straightforwardly to the
    -[TensorFlow Serving APIs](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/prediction_service.proto),
    +[TensorFlow Serving APIs](https://github.com/tensorflow/serving/blob/r1.15/tensorflow_serving/apis/prediction_service.proto),
     and so determine which request types will be honored.
     
     Note: In the multi-headed case, a `SignatureDef` will be generated for each
    @@ -515,7 +515,7 @@ the same keys.  These `SignatureDef`s differ only in their outputs, as
     provided by the corresponding `ExportOutput` entry.  The inputs are always
     those provided by the `serving_input_receiver_fn`.
     An inference request may specify the head by name.  One head must be named
    -using [`signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`](https://www.tensorflow.org/code/tensorflow/python/saved_model/signature_constants.py)
    +using [`signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/saved_model/signature_constants.py)
     indicating which `SignatureDef` will be served when an inference request
     does not specify one.
     
    @@ -566,9 +566,9 @@ Now you have a server listening for inference requests via gRPC on port 9000!
     ### Request predictions from a local server
     
     The server responds to gRPC requests according to the
    -[PredictionService](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/prediction_service.proto#L15)
    +[PredictionService](https://github.com/tensorflow/serving/blob/r1.15/tensorflow_serving/apis/prediction_service.proto#L15)
     gRPC API service definition.  (The nested protocol buffers are defined in
    -various [neighboring files](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis)).
    +various [neighboring files](https://github.com/tensorflow/serving/blob/r1.15/tensorflow_serving/apis)).
     
     From the API service definition, the gRPC framework generates client libraries
     in various languages providing remote access to the API.  In a project using the
    @@ -620,7 +620,7 @@ The returned result in this example is a `ClassificationResponse` protocol
     buffer.
     
     This is a skeletal example; please see the [Tensorflow Serving](../deploy/index.md)
    -documentation and [examples](https://github.com/tensorflow/serving/tree/master/tensorflow_serving/example)
    +documentation and [examples](https://github.com/tensorflow/serving/tree/r1.15/tensorflow_serving/example)
     for more details.
     
     > Note: `ClassificationRequest` and `RegressionRequest` contain a
    diff --git a/site/en/r1/guide/using_tpu.md b/site/en/r1/guide/using_tpu.md
    index 74169092189..e3e338adf49 100644
    --- a/site/en/r1/guide/using_tpu.md
    +++ b/site/en/r1/guide/using_tpu.md
    @@ -7,8 +7,8 @@ changing the *hardware accelerator* in your notebook settings:
     TPU-enabled Colab notebooks are available to test:
     
       1. [A quick test, just to measure FLOPS](https://colab.research.google.com/notebooks/tpu.ipynb).
    -  2. [A CNN image classifier with `tf.keras`](https://colab.research.google.com/github/tensorflow/tpu/blob/master/tools/colab/fashion_mnist.ipynb).
    -  3. [An LSTM markov chain text generator with `tf.keras`](https://colab.research.google.com/github/tensorflow/tpu/blob/master/tools/colab/shakespeare_with_tpu_and_keras.ipynb)
    +  2. [A CNN image classifier with `tf.keras`](https://colab.research.google.com/github/tensorflow/tpu/blob/r1.15/tools/colab/fashion_mnist.ipynb).
    +  3. [An LSTM markov chain text generator with `tf.keras`](https://colab.research.google.com/github/tensorflow/tpu/blob/r1.15/tools/colab/shakespeare_with_tpu_and_keras.ipynb)
     
     ## TPUEstimator
     
    @@ -25,7 +25,7 @@ Cloud TPU is to define the model's inference phase (from inputs to predictions)
     outside of the `model_fn`. Then maintain separate implementations of the
     `Estimator` setup and `model_fn`, both wrapping this inference step. For an
     example of this pattern compare the `mnist.py` and `mnist_tpu.py` implementation in
    -[tensorflow/models](https://github.com/tensorflow/models/tree/master/official/r1/mnist).
    +[tensorflow/models](https://github.com/tensorflow/models/tree/r1.15/official/r1/mnist).
     
     ### Run a TPUEstimator locally
     
    @@ -350,10 +350,10 @@ in bytes. A minimum of a few MB (`buffer_size=8*1024*1024`) is recommended so
     that data is available when needed.
     
     The TPU-demos repo includes
    -[a script](https://github.com/tensorflow/tpu/blob/master/tools/datasets/imagenet_to_gcs.py)
    +[a script](https://github.com/tensorflow/tpu/blob/1.15/tools/datasets/imagenet_to_gcs.py)
     for downloading the imagenet dataset and converting it to an appropriate format.
     This together with the imagenet
    -[models](https://github.com/tensorflow/tpu/tree/master/models)
    +[models](https://github.com/tensorflow/tpu/tree/r1.15/models)
     included in the repo demonstrate all of these best-practices.
     
     ## Next steps
    diff --git a/site/en/r1/guide/version_compat.md b/site/en/r1/guide/version_compat.md
    index 6702f6e0819..a765620518d 100644
    --- a/site/en/r1/guide/version_compat.md
    +++ b/site/en/r1/guide/version_compat.md
    @@ -49,19 +49,19 @@ patch versions.  The public APIs consist of
       submodules, but is not documented, then it is **not** considered part of the
       public API.
     
    -* The [C API](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/c/c_api.h).
    +* The [C API](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/c/c_api.h).
     
     * The following protocol buffer files:
    -    * [`attr_value`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/attr_value.proto)
    -    * [`config`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto)
    -    * [`event`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/event.proto)
    -    * [`graph`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/graph.proto)
    -    * [`op_def`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_def.proto)
    -    * [`reader_base`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/reader_base.proto)
    -    * [`summary`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/summary.proto)
    -    * [`tensor`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto)
    -    * [`tensor_shape`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor_shape.proto)
    -    * [`types`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.proto)
    +    * [`attr_value`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/attr_value.proto)
    +    * [`config`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/protobuf/config.proto)
    +    * [`event`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/util/event.proto)
    +    * [`graph`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/graph.proto)
    +    * [`op_def`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op_def.proto)
    +    * [`reader_base`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/reader_base.proto)
    +    * [`summary`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/summary.proto)
    +    * [`tensor`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/tensor.proto)
    +    * [`tensor_shape`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/tensor_shape.proto)
    +    * [`types`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/types.proto)
     
     
     ## What is *not* covered
    @@ -79,7 +79,7 @@ backward incompatible ways between minor releases. These include:
         such as:
     
       - [C++](./extend/cc.md) (exposed through header files in
    -    [`tensorflow/cc`](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/cc)).
    +    [`tensorflow/cc`](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/cc)).
       - [Java](../api_docs/java/reference/org/tensorflow/package-summary),
       - [Go](https://pkg.go.dev/github.com/tensorflow/tensorflow/tensorflow/go)
       - [JavaScript](https://js.tensorflow.org)
    @@ -209,7 +209,7 @@ guidelines for evolving `GraphDef` versions.
     There are different data versions for graphs and checkpoints. The two data
     formats evolve at different rates from each other and also at different rates
     from TensorFlow. Both versioning systems are defined in
    -[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h).
    +[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/public/version.h).
     Whenever a new version is added, a note is added to the header detailing what
     changed and the date.
     
    @@ -224,7 +224,7 @@ We distinguish between the following kinds of data version information:
       (`min_producer`).
     
     Each piece of versioned data has a [`VersionDef
    -versions`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/versions.proto)
    +versions`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/versions.proto)
     field which records the `producer` that made the data, the `min_consumer`
     that it is compatible with, and a list of `bad_consumers` versions that are
     disallowed.
    @@ -239,7 +239,7 @@ accept a piece of data if the following are all true:
     *   `consumer` not in data's `bad_consumers`
     
     Since both producers and consumers come from the same TensorFlow code base,
    -[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h)
    +[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/public/version.h)
     contains a main data version which is treated as either `producer` or
     `consumer` depending on context and both `min_consumer` and `min_producer`
     (needed by producers and consumers, respectively). Specifically,
    @@ -309,7 +309,7 @@ existing producer scripts will not suddenly use the new functionality.
     1.  Add a new similar op named `SomethingV2` or similar and go through the
         process of adding it and switching existing Python wrappers to use it.
         To ensure forward compatibility use the checks suggested in
    -    [compat.py](https://www.tensorflow.org/code/tensorflow/python/compat/compat.py)
    +    [compat.py](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/compat/compat.py)
         when changing the Python wrappers.
     2.  Remove the old op (Can only take place with a major version change due to
         backward compatibility).
    diff --git a/site/en/r1/tutorials/README.md b/site/en/r1/tutorials/README.md
    index b6d932041bd..9ff164ad77c 100644
    --- a/site/en/r1/tutorials/README.md
    +++ b/site/en/r1/tutorials/README.md
    @@ -10,7 +10,7 @@ desktop, mobile, web, and cloud. See the sections below to get started.
     
     The high-level Keras API provides building blocks to create and
     train deep learning models. Start with these beginner-friendly
    -notebook examples, then read the [TensorFlow Keras guide](../guide/keras.ipynb).
    +notebook examples, then read the [TensorFlow Keras guide](https://www.tensorflow.org/guide/keras).
     
     * [Basic classification](./keras/basic_classification.ipynb)
     * [Text classification](./keras/basic_text_classification.ipynb)
    @@ -68,4 +68,4 @@ implement common ML algorithms. See the
     * [Boosted trees](./estimators/boosted_trees.ipynb)
     * [Gradient Boosted Trees: Model understanding](./estimators/boosted_trees_model_understanding.ipynb)
     * [Build a Convolutional Neural Network using Estimators](./estimators/cnn.ipynb)
    -* [Wide and deep learning with Estimators](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep)
    +* [Wide and deep learning with Estimators](https://github.com/tensorflow/models/tree/r1.15/official/r1/wide_deep)
    diff --git a/site/en/r1/tutorials/_index.ipynb b/site/en/r1/tutorials/_index.ipynb
    index e2fe960d125..eca1450964f 100644
    --- a/site/en/r1/tutorials/_index.ipynb
    +++ b/site/en/r1/tutorials/_index.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    diff --git a/site/en/r1/tutorials/distribute/keras.ipynb b/site/en/r1/tutorials/distribute/keras.ipynb
    index b8d3c87bfab..14e8bf739a9 100644
    --- a/site/en/r1/tutorials/distribute/keras.ipynb
    +++ b/site/en/r1/tutorials/distribute/keras.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    @@ -86,7 +86,7 @@
             "Essentially, it copies all of the model's variables to each processor.\n",
             "Then, it uses [all-reduce](http://mpitutorial.com/tutorials/mpi-reduce-and-allreduce/) to combine the gradients from all processors and applies the combined value to all copies of the model.\n",
             "\n",
    -        "`MirroredStategy` is one of several distribution strategy available in TensorFlow core. You can read about more strategies at [distribution strategy guide](../../guide/distribute_strategy.ipynb).\n"
    +        "`MirroredStrategy` is one of several distribution strategy available in TensorFlow core. You can read about more strategies at [distribution strategy guide](../../guide/distribute_strategy.ipynb).\n"
           ]
         },
         {
    @@ -345,7 +345,7 @@
           "source": [
             "The callbacks used here are:\n",
             "\n",
    -        "*   *Tensorboard*: This callback writes a log for Tensorboard which allows you to visualize the graphs.\n",
    +        "*   *TensorBoard*: This callback writes a log for TensorBoard which allows you to visualize the graphs.\n",
             "*   *Model Checkpoint*: This callback saves the model after every epoch.\n",
             "*   *Learning Rate Scheduler*: Using this callback, you can schedule the learning rate to change after every epoch/batch.\n",
             "\n",
    @@ -554,7 +554,7 @@
           },
           "outputs": [],
           "source": [
    -        "tf.keras.experimental.export_saved_model(model, path)"
    +        "model.save(path)"
           ]
         },
         {
    @@ -574,7 +574,7 @@
           },
           "outputs": [],
           "source": [
    -        "unreplicated_model = tf.keras.experimental.load_from_saved_model(path)\n",
    +        "unreplicated_model = tf.keras.models.load_model(path)\n",
             "\n",
             "unreplicated_model.compile(\n",
             "    loss='sparse_categorical_crossentropy',\n",
    diff --git a/site/en/r1/tutorials/distribute/tpu_custom_training.ipynb b/site/en/r1/tutorials/distribute/tpu_custom_training.ipynb
    index 6d09d2623de..c61f893ca4c 100644
    --- a/site/en/r1/tutorials/distribute/tpu_custom_training.ipynb
    +++ b/site/en/r1/tutorials/distribute/tpu_custom_training.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    diff --git a/site/en/r1/tutorials/distribute/training_loops.ipynb b/site/en/r1/tutorials/distribute/training_loops.ipynb
    index 1343e8c8b6b..8eb72c13030 100644
    --- a/site/en/r1/tutorials/distribute/training_loops.ipynb
    +++ b/site/en/r1/tutorials/distribute/training_loops.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    diff --git a/site/en/r1/tutorials/eager/automatic_differentiation.ipynb b/site/en/r1/tutorials/eager/automatic_differentiation.ipynb
    index bbbb689a617..df843bac3b8 100644
    --- a/site/en/r1/tutorials/eager/automatic_differentiation.ipynb
    +++ b/site/en/r1/tutorials/eager/automatic_differentiation.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    diff --git a/site/en/r1/tutorials/eager/custom_layers.ipynb b/site/en/r1/tutorials/eager/custom_layers.ipynb
    index c82458cb857..48b55ed943e 100644
    --- a/site/en/r1/tutorials/eager/custom_layers.ipynb
    +++ b/site/en/r1/tutorials/eager/custom_layers.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    @@ -230,7 +230,7 @@
           "source": [
             "## Models: composing layers\n",
             "\n",
    -        "Many interesting layer-like things in machine learning models are implemented by composing existing layers. For example, each residual block in a resnet is a composition of convolutions, batch normalizations, and a shortcut.\n",
    +        "Many interesting layer-like things in machine learning models are implemented by composing existing layers. For example, each residual block in a ResNet is a composition of convolutions, batch normalizations, and a shortcut.\n",
             "\n",
             "The main class used when creating a layer-like thing which contains other layers is tf.keras.Model. Implementing one is done by inheriting from tf.keras.Model."
           ]
    diff --git a/site/en/r1/tutorials/eager/custom_training.ipynb b/site/en/r1/tutorials/eager/custom_training.ipynb
    index 72beefe89ad..f0f7faffa7f 100644
    --- a/site/en/r1/tutorials/eager/custom_training.ipynb
    +++ b/site/en/r1/tutorials/eager/custom_training.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    diff --git a/site/en/r1/tutorials/eager/custom_training_walkthrough.ipynb b/site/en/r1/tutorials/eager/custom_training_walkthrough.ipynb
    index a4839429827..3989f3e44bc 100644
    --- a/site/en/r1/tutorials/eager/custom_training_walkthrough.ipynb
    +++ b/site/en/r1/tutorials/eager/custom_training_walkthrough.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    diff --git a/site/en/r1/tutorials/eager/eager_basics.ipynb b/site/en/r1/tutorials/eager/eager_basics.ipynb
    index 9a72f192385..acd00ec2e20 100644
    --- a/site/en/r1/tutorials/eager/eager_basics.ipynb
    +++ b/site/en/r1/tutorials/eager/eager_basics.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    @@ -236,7 +236,7 @@
             "x = tf.random.uniform([3, 3])\n",
             "\n",
             "print(\"Is there a GPU available: \"),\n",
    -        "print(tf.test.is_gpu_available())\n",
    +        "print(tf.config.list_physical_devices('GPU'))\n",
             "\n",
             "print(\"Is the Tensor on GPU #0:  \"),\n",
             "print(x.device.endswith('GPU:0'))"
    @@ -292,7 +292,7 @@
             "  time_matmul(x)\n",
             "\n",
             "# Force execution on GPU #0 if available\n",
    -        "if tf.test.is_gpu_available():\n",
    +        "if tf.config.list_physical_devices('GPU'):\n",
             "  with tf.device(\"GPU:0\"): # Or GPU:1 for the 2nd GPU, GPU:2 for the 3rd etc.\n",
             "    x = tf.random_uniform([1000, 1000])\n",
             "    assert x.device.endswith(\"GPU:0\")\n",
    diff --git a/site/en/r1/tutorials/estimators/boosted_trees.ipynb b/site/en/r1/tutorials/estimators/boosted_trees.ipynb
    deleted file mode 100644
    index 7452d521095..00000000000
    --- a/site/en/r1/tutorials/estimators/boosted_trees.ipynb
    +++ /dev/null
    @@ -1,606 +0,0 @@
    -{
    -  "cells": [
    -    {
    -      "cell_type": "markdown",
    -      "metadata": {
    -        "id": "7765UFHoyGx6"
    -      },
    -      "source": [
    -        "##### Copyright 2019 The TensorFlow Authors."
    -      ]
    -    },
    -    {
    -      "cell_type": "code",
    -      "execution_count": null,
    -      "metadata": {
    -        "cellView": "form",
    -        "id": "KVtTDrUNyL7x"
    -      },
    -      "outputs": [],
    -      "source": [
    -        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
    -        "# you may not use this file except in compliance with the License.\n",
    -        "# You may obtain a copy of the License at\n",
    -        "#\n",
    -        "# https://www.apache.org/licenses/LICENSE-2.0\n",
    -        "#\n",
    -        "# Unless required by applicable law or agreed to in writing, software\n",
    -        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
    -        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
    -        "# See the License for the specific language governing permissions and\n",
    -        "# limitations under the License."
    -      ]
    -    },
    -    {
    -      "cell_type": "markdown",
    -      "metadata": {
    -        "id": "xPYxZMrWyA0N"
    -      },
    -      "source": [
    -        "#How to train Boosted Trees models in TensorFlow"
    -      ]
    -    },
    -    {
    -      "cell_type": "markdown",
    -      "metadata": {
    -        "id": "p_vOREjRx-Y0"
    -      },
    -      "source": [
    -        "
    VersionPython versionCompilerBuild toolscuDNNCUDA
    tensorflow_gpu-2.10.03.7-3.10MSVC 2019Bazel 5.1.18.111.2
    tensorflow_gpu-2.9.03.7-3.10MSVC 2019Bazel 5.0.08.111.2
    tensorflow_gpu-2.8.03.7-3.10MSVC 2019Bazel 4.2.18.111.2
    tensorflow_gpu-2.7.03.7-3.9MSVC 2019Bazel 3.7.28.111.2
    tensorflow_gpu-2.6.03.6-3.9MSVC 2019Bazel 3.7.28.111.2
    tensorflow_gpu-2.5.03.6-3.9MSVC 2019Bazel 3.7.28.111.2
    tensorflow_gpu-2.4.03.6-3.8MSVC 2019Bazel 3.1.08.011.0
    \n", - " \n", - " \n", - "
    \n", - " Run in Google Colab\n", - " \n", - " View source on GitHub\n", - "
    " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6lCDyX3HFWos" - }, - "source": [ - "> Note: This is an archived TF1 notebook. These are configured\n", - "to run in TF2's \n", - "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n", - "but will run in TF1 as well. To use TF1 in Colab, use the\n", - "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n", - "magic." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dW3r7qVxzqN5" - }, - "source": [ - "This tutorial is an end-to-end walkthrough of training a Gradient Boosting model using decision trees with the `tf.estimator` API. Boosted Trees models are among the most popular and effective machine learning approaches for both regression and classification. It is an ensemble technique that combines the predictions from several (think 10s, 100s or even 1000s) tree models.\n", - "\n", - "Boosted Trees models are popular with many machine learning practioners as they can achieve impressive performance with minimal hyperparameter tuning." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eylrTPAN3rJV" - }, - "source": [ - "## Load the titanic dataset\n", - "You will be using the titanic dataset, where the (rather morbid) goal is to predict passenger survival, given characteristics such as gender, age, class, etc." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "KuhAiPfZ3rJW" - }, - "outputs": [], - "source": [ - "from matplotlib import pyplot as plt\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "import tensorflow.compat.v1 as tf\n", - "\n", - "tf.logging.set_verbosity(tf.logging.ERROR)\n", - "tf.set_random_seed(123)\n", - "\n", - "# Load dataset.\n", - "dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')\n", - "dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')\n", - "y_train = dftrain.pop('survived')\n", - "y_eval = dfeval.pop('survived')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3ioodHdVJVdA" - }, - "source": [ - "The dataset consists of a training set and an evaluation set:\n", - "\n", - "* `dftrain` and `y_train` are the *training set*—the data the model uses to learn.\n", - "* The model is tested against the *eval set*, `dfeval`, and `y_eval`.\n", - "\n", - "For training you will use the following features:\n", - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    Feature NameDescription
    sexGender of passenger
    ageAge of passenger
    n_siblings_spouses# siblings and partners aboard
    parch# of parents and children aboard
    fareFare passenger paid.
    classPassenger's class on ship
    deckWhich deck passenger was on
    embark_townWhich town passenger embarked from
    aloneIf passenger was alone
    " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AoPiWsJALr-k" - }, - "source": [ - "## Explore the data" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "slcat1yzmzw5" - }, - "source": [ - "Let's first preview some of the data and create summary statistics on the training set." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "15PLelXBlxEW" - }, - "outputs": [], - "source": [ - "dftrain.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "j2hiM4ETmqP0" - }, - "outputs": [], - "source": [ - "dftrain.describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-IR0e8V-LyJ4" - }, - "source": [ - "There are 627 and 264 examples in the training and evaluation sets, respectively." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_1NwYqGwDjFf" - }, - "outputs": [], - "source": [ - "dftrain.shape[0], dfeval.shape[0]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "28UFJ4KSMK3V" - }, - "source": [ - "The majority of passengers are in their 20's and 30's." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "CaVDmZtuDfux" - }, - "outputs": [], - "source": [ - "dftrain.age.hist(bins=20)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1pifWiCoMbR5" - }, - "source": [ - "There are approximately twice as male passengers as female passengers aboard." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "-WazAq30MO5J" - }, - "outputs": [], - "source": [ - "dftrain.sex.value_counts().plot(kind='barh')\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7_XkxrpmmVU_" - }, - "source": [ - "The majority of passengers were in the \"third\" class." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "zZ3PvVy4l4gI" - }, - "outputs": [], - "source": [ - "(dftrain['class']\n", - " .value_counts()\n", - " .plot(kind='barh'))\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HM5SlwlxmZMT" - }, - "source": [ - "Most passengers embarked from Southampton." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "RVTSrdr4mZaC" - }, - "outputs": [], - "source": [ - "(dftrain['embark_town']\n", - " .value_counts()\n", - " .plot(kind='barh'))\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aTn1niLPob3x" - }, - "source": [ - "Females have a much higher chance of surviving vs. males. This will clearly be a predictive feature for the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Eh3KW5oYkaNS" - }, - "outputs": [], - "source": [ - "ax = (pd.concat([dftrain, y_train], axis=1)\\\n", - " .groupby('sex')\n", - " .survived\n", - " .mean()\n", - " .plot(kind='barh'))\n", - "ax.set_xlabel('% survive')\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "krkRHuMp3rJn" - }, - "source": [ - "## Create feature columns and input functions\n", - "The Gradient Boosting estimator can utilize both numeric and categorical features. Feature columns work with all TensorFlow estimators and their purpose is to define the features used for modeling. Additionally they provide some feature engineering capabilities like one-hot-encoding, normalization, and bucketization. In this tutorial, the fields in `CATEGORICAL_COLUMNS` are transformed from categorical columns to one-hot-encoded columns ([indicator column](https://www.tensorflow.org/api_docs/python/tf/feature_column/indicator_column)):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "upaNWxcF3rJn" - }, - "outputs": [], - "source": [ - "fc = tf.feature_column\n", - "CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck',\n", - " 'embark_town', 'alone']\n", - "NUMERIC_COLUMNS = ['age', 'fare']\n", - "\n", - "def one_hot_cat_column(feature_name, vocab):\n", - " return fc.indicator_column(\n", - " fc.categorical_column_with_vocabulary_list(feature_name,\n", - " vocab))\n", - "feature_columns = []\n", - "for feature_name in CATEGORICAL_COLUMNS:\n", - " # Need to one-hot encode categorical features.\n", - " vocabulary = dftrain[feature_name].unique()\n", - " feature_columns.append(one_hot_cat_column(feature_name, vocabulary))\n", - "\n", - "for feature_name in NUMERIC_COLUMNS:\n", - " feature_columns.append(fc.numeric_column(feature_name,\n", - " dtype=tf.float32))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "74GNtFpStSAz" - }, - "source": [ - "You can view the transformation that a feature column produces. For example, here is the output when using the `indicator_column` on a single example:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Eaq79D9FtmF8" - }, - "outputs": [], - "source": [ - "example = dftrain.head(1)\n", - "class_fc = one_hot_cat_column('class', ('First', 'Second', 'Third'))\n", - "print('Feature value: \"{}\"'.format(example['class'].iloc[0]))\n", - "print('One-hot encoded: ', fc.input_layer(dict(example), [class_fc]).numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YbCUn3nCusC3" - }, - "source": [ - "Additionally, you can view all of the feature column transformations together:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "omIYcsVws3g0" - }, - "outputs": [], - "source": [ - "fc.input_layer(dict(example), feature_columns).numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-UOlROp33rJo" - }, - "source": [ - "Next you need to create the input functions. These will specify how data will be read into our model for both training and inference. You will use the `from_tensor_slices` method in the [`tf.data`](https://www.tensorflow.org/api_docs/python/tf/data) API to read in data directly from Pandas. This is suitable for smaller, in-memory datasets. For larger datasets, the tf.data API supports a variety of file formats (including [csv](https://www.tensorflow.org/api_docs/python/tf/data/experimental/make_csv_dataset)) so that you can process datasets that do not fit in memory." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9dquwCQB3rJp" - }, - "outputs": [], - "source": [ - "# Use entire batch since this is such a small dataset.\n", - "NUM_EXAMPLES = len(y_train)\n", - "\n", - "def make_input_fn(X, y, n_epochs=None, shuffle=True):\n", - " y = np.expand_dims(y, axis=1)\n", - " def input_fn():\n", - " dataset = tf.data.Dataset.from_tensor_slices((dict(X), y))\n", - " if shuffle:\n", - " dataset = dataset.shuffle(NUM_EXAMPLES)\n", - " # For training, cycle thru dataset as many times as need (n_epochs=None).\n", - " dataset = dataset.repeat(n_epochs)\n", - " # In memory training doesn't use batching.\n", - " dataset = dataset.batch(NUM_EXAMPLES)\n", - " return dataset\n", - " return input_fn\n", - "\n", - "# Training and evaluation input functions.\n", - "train_input_fn = make_input_fn(dftrain, y_train)\n", - "eval_input_fn = make_input_fn(dfeval, y_eval, shuffle=False, n_epochs=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HttfNNlN3rJr" - }, - "source": [ - "## Train and evaluate the model\n", - "\n", - "Below you will do the following steps:\n", - "\n", - "1. Initialize the model, specifying the features and hyperparameters.\n", - "2. Feed the training data to the model using the `train_input_fn` and train the model using the `train` function.\n", - "3. You will assess model performance using the evaluation set—in this example, the `dfeval` DataFrame. You will verify that the predictions match the labels from the `y_eval` array.\n", - "\n", - "Before training a Boosted Trees model, let's first train a linear classifier (logistic regression model). It is best practice to start with simpler model to establish a benchmark." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "JPOGpmmq3rJr" - }, - "outputs": [], - "source": [ - "linear_est = tf.estimator.LinearClassifier(feature_columns)\n", - "\n", - "# Train model.\n", - "linear_est.train(train_input_fn, max_steps=100)\n", - "\n", - "# Evaluation.\n", - "results = linear_est.evaluate(eval_input_fn)\n", - "print('Accuracy : ', results['accuracy'])\n", - "print('Dummy model: ', results['accuracy_baseline'])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BarkNXwA3rJu" - }, - "source": [ - "Next let's train a Boosted Trees model. For boosted trees, regression (`BoostedTreesRegressor`) and classification (`BoostedTreesClassifier`) are supported, along with using any twice differentiable custom loss (`BoostedTreesEstimator`). Since the goal is to predict a class - survive or not survive, you will use the `BoostedTreesClassifier`.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "tgEzMtlw3rJu" - }, - "outputs": [], - "source": [ - "# Since data fits into memory, use entire dataset per layer. It will be faster.\n", - "# Above one batch is defined as the entire dataset.\n", - "n_batches = 1\n", - "est = tf.estimator.BoostedTreesClassifier(feature_columns,\n", - " n_batches_per_layer=n_batches)\n", - "\n", - "# The model will stop training once the specified number of trees is built, not\n", - "# based on the number of steps.\n", - "est.train(train_input_fn, max_steps=100)\n", - "\n", - "# Eval.\n", - "results = est.evaluate(eval_input_fn)\n", - "print('Accuracy : ', results['accuracy'])\n", - "print('Dummy model: ', results['accuracy_baseline'])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hEflwznXvuMP" - }, - "source": [ - "Now you can use the train model to make predictions on a passenger from the evaluation set. TensorFlow models are optimized to make predictions on a batch, or collection, of examples at once. Earlier, the `eval_input_fn` is defined using the entire evaluation set." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6zmIjTr73rJ4" - }, - "outputs": [], - "source": [ - "pred_dicts = list(est.predict(eval_input_fn))\n", - "probs = pd.Series([pred['probabilities'][1] for pred in pred_dicts])\n", - "\n", - "probs.plot(kind='hist', bins=20, title='predicted probabilities')\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mBUaNN1BzJHG" - }, - "source": [ - "Finally you can also look at the receiver operating characteristic (ROC) of the results, which will give us a better idea of the tradeoff between the true positive rate and false positive rate." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "NzxghvVz3rJ6" - }, - "outputs": [], - "source": [ - "from sklearn.metrics import roc_curve\n", - "\n", - "fpr, tpr, _ = roc_curve(y_eval, probs)\n", - "plt.plot(fpr, tpr)\n", - "plt.title('ROC curve')\n", - "plt.xlabel('false positive rate')\n", - "plt.ylabel('true positive rate')\n", - "plt.xlim(0,)\n", - "plt.ylim(0,)\n", - "plt.show()" - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "boosted_trees.ipynb", - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/site/en/r1/tutorials/estimators/boosted_trees_model_understanding.ipynb b/site/en/r1/tutorials/estimators/boosted_trees_model_understanding.ipynb deleted file mode 100644 index 6f3f2c2feb0..00000000000 --- a/site/en/r1/tutorials/estimators/boosted_trees_model_understanding.ipynb +++ /dev/null @@ -1,1028 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "7765UFHoyGx6" - }, - "source": [ - "##### Copyright 2019 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "KVtTDrUNyL7x" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "r0_fqL3ayLHX" - }, - "source": [ - "# Gradient Boosted Trees: Model understanding" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PS6_yKSoyLAl" - }, - "source": [ - "\n", - " \n", - " \n", - "
    \n", - " Run in Google Colab\n", - " \n", - " View source on GitHub\n", - "
    " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PS6_yKSoyLAl" - }, - "source": [ - "> Note: This is an archived TF1 notebook. These are configured\n", - "to run in TF2's \n", - "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n", - "but will run in TF1 as well. To use TF1 in Colab, use the\n", - "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n", - "magic." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dW3r7qVxzqN5" - }, - "source": [ - "For an end-to-end walkthrough of training a Gradient Boosting model check out the [boosted trees tutorial](https://www.tensorflow.org/r1/tutorials/estimators/boosted_trees). In this tutorial you will:\n", - "\n", - "* Learn how to interpret a Boosted Trees model both *locally* and *globally*\n", - "* Gain intution for how a Boosted Trees model fits a dataset\n", - "\n", - "## How to interpret Boosted Trees models both locally and globally\n", - "\n", - "Local interpretability refers to an understanding of a model’s predictions at the individual example level, while global interpretability refers to an understanding of the model as a whole. Such techniques can help machine learning (ML) practitioners detect bias and bugs during the model development stage\n", - "\n", - "For local interpretability, you will learn how to create and visualize per-instance contributions. To distinguish this from feature importances, we refer to these values as directional feature contributions (DFCs).\n", - "\n", - "For global interpretability you will retrieve and visualize gain-based feature importances, [permutation feature importances](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf) and also show aggregated DFCs." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eylrTPAN3rJV" - }, - "source": [ - "## Load the titanic dataset\n", - "You will be using the titanic dataset, where the (rather morbid) goal is to predict passenger survival, given characteristics such as gender, age, class, etc." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "KuhAiPfZ3rJW" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import tensorflow.compat.v1 as tf\n", - "\n", - "\n", - "tf.logging.set_verbosity(tf.logging.ERROR)\n", - "tf.set_random_seed(123)\n", - "\n", - "# Load dataset.\n", - "dftrain = pd.read_csv('https://storage.googleapis.com/tfbt/titanic_train.csv')\n", - "dfeval = pd.read_csv('https://storage.googleapis.com/tfbt/titanic_eval.csv')\n", - "y_train = dftrain.pop('survived')\n", - "y_eval = dfeval.pop('survived')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3ioodHdVJVdA" - }, - "source": [ - "For a description of the features, please review the prior tutorial." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "krkRHuMp3rJn" - }, - "source": [ - "## Create feature columns, input_fn, and the train the estimator" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JiJ6K3hr1lXW" - }, - "source": [ - "### Preprocess the data" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "udMytRJC05oW" - }, - "source": [ - "Create the feature columns, using the original numeric columns as is and one-hot-encoding categorical variables." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "upaNWxcF3rJn" - }, - "outputs": [], - "source": [ - "fc = tf.feature_column\n", - "CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck',\n", - " 'embark_town', 'alone']\n", - "NUMERIC_COLUMNS = ['age', 'fare']\n", - "\n", - "def one_hot_cat_column(feature_name, vocab):\n", - " return fc.indicator_column(\n", - " fc.categorical_column_with_vocabulary_list(feature_name,\n", - " vocab))\n", - "feature_columns = []\n", - "for feature_name in CATEGORICAL_COLUMNS:\n", - " # Need to one-hot encode categorical features.\n", - " vocabulary = dftrain[feature_name].unique()\n", - " feature_columns.append(one_hot_cat_column(feature_name, vocabulary))\n", - "\n", - "for feature_name in NUMERIC_COLUMNS:\n", - " feature_columns.append(fc.numeric_column(feature_name,\n", - " dtype=tf.float32))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9rTefnXe1n0v" - }, - "source": [ - "### Build the input pipeline" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-UOlROp33rJo" - }, - "source": [ - "Create the input functions using the `from_tensor_slices` method in the [`tf.data`](https://www.tensorflow.org/api_docs/python/tf/data) API to read in data directly from Pandas." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9dquwCQB3rJp" - }, - "outputs": [], - "source": [ - "# Use entire batch since this is such a small dataset.\n", - "NUM_EXAMPLES = len(y_train)\n", - "\n", - "def make_input_fn(X, y, n_epochs=None, shuffle=True):\n", - " y = np.expand_dims(y, axis=1)\n", - " def input_fn():\n", - " dataset = tf.data.Dataset.from_tensor_slices((X.to_dict(orient='list'), y))\n", - " if shuffle:\n", - " dataset = dataset.shuffle(NUM_EXAMPLES)\n", - " # For training, cycle thru dataset as many times as need (n_epochs=None).\n", - " dataset = (dataset\n", - " .repeat(n_epochs)\n", - " .batch(NUM_EXAMPLES))\n", - " return dataset\n", - " return input_fn\n", - "\n", - "# Training and evaluation input functions.\n", - "train_input_fn = make_input_fn(dftrain, y_train)\n", - "eval_input_fn = make_input_fn(dfeval, y_eval, shuffle=False, n_epochs=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HttfNNlN3rJr" - }, - "source": [ - "### Train the model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "tgEzMtlw3rJu" - }, - "outputs": [], - "source": [ - "params = {\n", - " 'n_trees': 50,\n", - " 'max_depth': 3,\n", - " 'n_batches_per_layer': 1,\n", - " # You must enable center_bias = True to get DFCs. This will force the model to\n", - " # make an initial prediction before using any features (e.g. use the mean of\n", - " # the training labels for regression or log odds for classification when\n", - " # using cross entropy loss).\n", - " 'center_bias': True\n", - "}\n", - "\n", - "est = tf.estimator.BoostedTreesClassifier(feature_columns, **params)\n", - "est.train(train_input_fn, max_steps=100)\n", - "results = est.evaluate(eval_input_fn)\n", - "pd.Series(results).to_frame()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cUrakbu6sqKe" - }, - "source": [ - "For performance reasons, when your data fits in memory, we recommend use the `boosted_trees_classifier_train_in_memory` function. However if training time is not of a concern or if you have a very large dataset and want to do distributed training, use the `tf.estimator.BoostedTrees` API shown above.\n", - "\n", - "\n", - "When using this method, you should not batch your input data, as the method operates on the entire dataset.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "-4_xz3b_D0W5" - }, - "outputs": [], - "source": [ - "in_memory_params = dict(params)\n", - "del in_memory_params['n_batches_per_layer']\n", - "# In-memory input_fn does not use batching.\n", - "def make_inmemory_train_input_fn(X, y):\n", - " y = np.expand_dims(y, axis=1)\n", - " def input_fn():\n", - " return dict(X), y\n", - " return input_fn\n", - "train_input_fn = make_inmemory_train_input_fn(dftrain, y_train)\n", - "\n", - "# Train the model.\n", - "est = tf.contrib.estimator.boosted_trees_classifier_train_in_memory(\n", - " train_input_fn,\n", - " feature_columns,\n", - " **in_memory_params)\n", - "print(est.evaluate(eval_input_fn))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TSZYqNcRuczV" - }, - "source": [ - "## Model interpretation and plotting" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "BjcfLiI3uczW" - }, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "sns_colors = sns.color_palette('colorblind')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ywTtbBvBuczY" - }, - "source": [ - "## Local interpretability\n", - "Next you will output the directional feature contributions (DFCs) to explain individual predictions using the approach outlined in [Palczewska et al](https://arxiv.org/pdf/1312.1121.pdf) and by Saabas in [Interpreting Random Forests](http://blog.datadive.net/interpreting-random-forests/) (this method is also available in scikit-learn for Random Forests in the [`treeinterpreter`](https://github.com/andosa/treeinterpreter) package). The DFCs are generated with:\n", - "\n", - "`pred_dicts = list(est.experimental_predict_with_explanations(pred_input_fn))`\n", - "\n", - "(Note: The method is named experimental as we may modify the API before dropping the experimental prefix.)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "TIL93B4sDRqE" - }, - "outputs": [], - "source": [ - "pred_dicts = list(est.experimental_predict_with_explanations(eval_input_fn))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "tDPoRx_ZaY1E" - }, - "outputs": [], - "source": [ - "# Create DFC Pandas dataframe.\n", - "labels = y_eval.values\n", - "probs = pd.Series([pred['probabilities'][1] for pred in pred_dicts])\n", - "df_dfc = pd.DataFrame([pred['dfc'] for pred in pred_dicts])\n", - "df_dfc.describe().T" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EUKSaVoraY1C" - }, - "source": [ - "A nice property of DFCs is that the sum of the contributions + the bias is equal to the prediction for a given example." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Hd9VuizRaY1H" - }, - "outputs": [], - "source": [ - "# Sum of DFCs + bias == probabality.\n", - "bias = pred_dicts[0]['bias']\n", - "dfc_prob = df_dfc.sum(axis=1) + bias\n", - "np.testing.assert_almost_equal(dfc_prob.values,\n", - " probs.values)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uIC7qm1gaY1L" - }, - "source": [ - "Plot DFCs for an individual passenger." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "P3u971LsuczZ" - }, - "outputs": [], - "source": [ - "# Plot results.\n", - "ID = 182\n", - "example = df_dfc.iloc[ID] # Choose ith example from evaluation set.\n", - "TOP_N = 8 # View top 8 features.\n", - "sorted_ix = example.abs().sort_values()[-TOP_N:].index\n", - "ax = example[sorted_ix].plot(kind='barh', color=sns_colors[3])\n", - "ax.grid(False, axis='y')\n", - "\n", - "ax.set_title('Feature contributions for example {}\\n pred: {:1.2f}; label: {}'.format(ID, probs[ID], labels[ID]))\n", - "ax.set_xlabel('Contribution to predicted probability')\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "L4i4mjK66FYg" - }, - "source": [ - "The larger magnitude contributions have a larger impact on the model's prediction. Negative contributions indicate the feature value for this given example reduced the model's prediction, while positive values contribute an increase in the prediction." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tx5p4vEhuczg" - }, - "source": [ - "### Improved plotting\n", - "Let's make the plot nice by color coding based on the contributions' directionality and add the feature values on figure." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6z_Tq1Pquczj" - }, - "outputs": [], - "source": [ - "# Boilerplate code for plotting :)\n", - "def _get_color(value):\n", - " \"\"\"To make positive DFCs plot green, negative DFCs plot red.\"\"\"\n", - " green, red = sns.color_palette()[2:4]\n", - " if value >= 0: return green\n", - " return red\n", - "\n", - "def _add_feature_values(feature_values, ax):\n", - " \"\"\"Display feature's values on left of plot.\"\"\"\n", - " x_coord = ax.get_xlim()[0]\n", - " OFFSET = 0.15\n", - " for y_coord, (feat_name, feat_val) in enumerate(feature_values.items()):\n", - " t = plt.text(x_coord, y_coord - OFFSET, '{}'.format(feat_val), size=12)\n", - " t.set_bbox(dict(facecolor='white', alpha=0.5))\n", - " from matplotlib.font_manager import FontProperties\n", - " font = FontProperties()\n", - " font.set_weight('bold')\n", - " t = plt.text(x_coord, y_coord + 1 - OFFSET, 'feature\\nvalue',\n", - " fontproperties=font, size=12)\n", - "\n", - "def plot_example(example):\n", - " TOP_N = 8 # View top 8 features.\n", - " sorted_ix = example.abs().sort_values()[-TOP_N:].index # Sort by magnitude.\n", - " example = example[sorted_ix]\n", - " colors = example.map(_get_color).tolist()\n", - " ax = example.to_frame().plot(kind='barh',\n", - " color=[colors],\n", - " legend=None,\n", - " alpha=0.75,\n", - " figsize=(10,6))\n", - " ax.grid(False, axis='y')\n", - " ax.set_yticklabels(ax.get_yticklabels(), size=14)\n", - "\n", - " # Add feature values.\n", - " _add_feature_values(dfeval.iloc[ID][sorted_ix], ax)\n", - " return ax" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "FlrsuOu8-Yds" - }, - "source": [ - "Plot example." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Ht1P2-1euczk" - }, - "outputs": [], - "source": [ - "example = df_dfc.iloc[ID] # Choose IDth example from evaluation set.\n", - "ax = plot_example(example)\n", - "ax.set_title('Feature contributions for example {}\\n pred: {:1.2f}; label: {}'.format(ID, probs[ID], labels[ID]))\n", - "ax.set_xlabel('Contribution to predicted probability', size=14)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0swvlkZFaY1Z" - }, - "source": [ - "You can also plot the example's DFCs compare with the entire distribution using a voilin plot." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "zo7rNd1v_5e2" - }, - "outputs": [], - "source": [ - "# Boilerplate plotting code.\n", - "def dist_violin_plot(df_dfc, ID):\n", - " # Initialize plot.\n", - " fig, ax = plt.subplots(1, 1, figsize=(10, 6))\n", - "\n", - " # Create example dataframe.\n", - " TOP_N = 8 # View top 8 features.\n", - " example = df_dfc.iloc[ID]\n", - " ix = example.abs().sort_values()[-TOP_N:].index\n", - " example = example[ix]\n", - " example_df = example.to_frame(name='dfc')\n", - "\n", - " # Add contributions of entire distribution.\n", - " parts=ax.violinplot([df_dfc[w] for w in ix],\n", - " vert=False,\n", - " showextrema=False,\n", - " widths=0.7,\n", - " positions=np.arange(len(ix)))\n", - " face_color = sns_colors[0]\n", - " alpha = 0.15\n", - " for pc in parts['bodies']:\n", - " pc.set_facecolor(face_color)\n", - " pc.set_alpha(alpha)\n", - "\n", - " # Add feature values.\n", - " _add_feature_values(dfeval.iloc[ID][sorted_ix], ax)\n", - "\n", - " # Add local contributions.\n", - " ax.scatter(example,\n", - " np.arange(example.shape[0]),\n", - " color=sns.color_palette()[2],\n", - " s=100,\n", - " marker=\"s\",\n", - " label='contributions for example')\n", - "\n", - " # Legend\n", - " # Proxy plot, to show violinplot dist on legend.\n", - " ax.plot([0,0], [1,1], label='eval set contributions\\ndistributions',\n", - " color=face_color, alpha=alpha, linewidth=10)\n", - " legend = ax.legend(loc='lower right', shadow=True, fontsize='x-large',\n", - " frameon=True)\n", - " legend.get_frame().set_facecolor('white')\n", - "\n", - " # Format plot.\n", - " ax.set_yticks(np.arange(example.shape[0]))\n", - " ax.set_yticklabels(example.index)\n", - " ax.grid(False, axis='y')\n", - " ax.set_xlabel('Contribution to predicted probability', size=14)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PiLw2tlm_9aK" - }, - "source": [ - "Plot this example." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VkCqraA2uczm" - }, - "outputs": [], - "source": [ - "dist_violin_plot(df_dfc, ID)\n", - "plt.title('Feature contributions for example {}\\n pred: {:1.2f}; label: {}'.format(ID, probs[ID], labels[ID]))\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TVJFM85SAWVq" - }, - "source": [ - "Finally, third-party tools, such as [LIME](https://github.com/marcotcr/lime) and [shap](https://github.com/slundberg/shap), can also help understand individual predictions for a model." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PnNXH6mZuczr" - }, - "source": [ - "## Global feature importances\n", - "\n", - "Additionally, you might want to understand the model as a whole, rather than studying individual predictions. Below, you will compute and use:\n", - "\n", - "* Gain-based feature importances using `est.experimental_feature_importances`\n", - "* Permutation importances\n", - "* Aggregate DFCs using `est.experimental_predict_with_explanations`\n", - "\n", - "Gain-based feature importances measure the loss change when splitting on a particular feature, while permutation feature importances are computed by evaluating model performance on the evaluation set by shuffling each feature one-by-one and attributing the change in model performance to the shuffled feature.\n", - "\n", - "In general, permutation feature importance are preferred to gain-based feature importance, though both methods can be unreliable in situations where potential predictor variables vary in their scale of measurement or their number of categories and when features are correlated ([source](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-9-307)). Check out [this article](http://explained.ai/rf-importance/index.html) for an in-depth overview and great discussion on different feature importance types." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3ocBcMatuczs" - }, - "source": [ - "### Gain-based feature importances" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "gMaxCgPbBJ-j" - }, - "source": [ - "Gain-based feature importances are built into the TensorFlow Boosted Trees estimators using `est.experimental_feature_importances`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "pPTxbAaeuczt" - }, - "outputs": [], - "source": [ - "importances = est.experimental_feature_importances(normalize=True)\n", - "df_imp = pd.Series(importances)\n", - "\n", - "# Visualize importances.\n", - "N = 8\n", - "ax = (df_imp.iloc[0:N][::-1]\n", - " .plot(kind='barh',\n", - " color=sns_colors[0],\n", - " title='Gain feature importances',\n", - " figsize=(10, 6)))\n", - "ax.grid(False, axis='y')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GvfAcBeGuczw" - }, - "source": [ - "### Average absolute DFCs\n", - "You can also average the absolute values of DFCs to understand impact at a global level." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "JkvAWLWLuczx" - }, - "outputs": [], - "source": [ - "# Plot.\n", - "dfc_mean = df_dfc.abs().mean()\n", - "N = 8\n", - "sorted_ix = dfc_mean.abs().sort_values()[-N:].index # Average and sort by absolute.\n", - "ax = dfc_mean[sorted_ix].plot(kind='barh',\n", - " color=sns_colors[1],\n", - " title='Mean |directional feature contributions|',\n", - " figsize=(10, 6))\n", - "ax.grid(False, axis='y')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Z0k_DvPLaY1o" - }, - "source": [ - "You can also see how DFCs vary as a feature value varies." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ZcIfN1IpaY1o" - }, - "outputs": [], - "source": [ - "FEATURE = 'fare'\n", - "feature = pd.Series(df_dfc[FEATURE].values, index=dfeval[FEATURE].values).sort_index()\n", - "ax = sns.regplot(feature.index.values, feature.values, lowess=True)\n", - "ax.set_ylabel('contribution')\n", - "ax.set_xlabel(FEATURE)\n", - "ax.set_xlim(0, 100)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lbpG72ULucz0" - }, - "source": [ - "### Permutation feature importance" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6esOw1VOucz0" - }, - "outputs": [], - "source": [ - "def permutation_importances(est, X_eval, y_eval, metric, features):\n", - " \"\"\"Column by column, shuffle values and observe effect on eval set.\n", - "\n", - " source: http://explained.ai/rf-importance/index.html\n", - " A similar approach can be done during training. See \"Drop-column importance\"\n", - " in the above article.\"\"\"\n", - " baseline = metric(est, X_eval, y_eval)\n", - " imp = []\n", - " for col in features:\n", - " save = X_eval[col].copy()\n", - " X_eval[col] = np.random.permutation(X_eval[col])\n", - " m = metric(est, X_eval, y_eval)\n", - " X_eval[col] = save\n", - " imp.append(baseline - m)\n", - " return np.array(imp)\n", - "\n", - "def accuracy_metric(est, X, y):\n", - " \"\"\"TensorFlow estimator accuracy.\"\"\"\n", - " eval_input_fn = make_input_fn(X,\n", - " y=y,\n", - " shuffle=False,\n", - " n_epochs=1)\n", - " return est.evaluate(input_fn=eval_input_fn)['accuracy']\n", - "features = CATEGORICAL_COLUMNS + NUMERIC_COLUMNS\n", - "importances = permutation_importances(est, dfeval, y_eval, accuracy_metric,\n", - " features)\n", - "df_imp = pd.Series(importances, index=features)\n", - "\n", - "sorted_ix = df_imp.abs().sort_values().index\n", - "ax = df_imp[sorted_ix][-5:].plot(kind='barh', color=sns_colors[2], figsize=(10, 6))\n", - "ax.grid(False, axis='y')\n", - "ax.set_title('Permutation feature importance')\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "E236y3pVEzHg" - }, - "source": [ - "## Visualizing model fitting" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TrcQ-839EzZ6" - }, - "source": [ - "Lets first simulate/create training data using the following formula:\n", - "\n", - "\n", - "$$z=x* e^{-x^2 - y^2}$$\n", - "\n", - "\n", - "Where \\\\(z\\\\) is the dependent variable you are trying to predict and \\\\(x\\\\) and \\\\(y\\\\) are the features." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "e8woaj81GGE9" - }, - "outputs": [], - "source": [ - "from numpy.random import uniform, seed\n", - "from matplotlib.mlab import griddata\n", - "\n", - "# Create fake data\n", - "seed(0)\n", - "npts = 5000\n", - "x = uniform(-2, 2, npts)\n", - "y = uniform(-2, 2, npts)\n", - "z = x*np.exp(-x**2 - y**2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "GRI3KHfLZsGP" - }, - "outputs": [], - "source": [ - "# Prep data for training.\n", - "df = pd.DataFrame({'x': x, 'y': y, 'z': z})\n", - "\n", - "xi = np.linspace(-2.0, 2.0, 200),\n", - "yi = np.linspace(-2.1, 2.1, 210),\n", - "xi,yi = np.meshgrid(xi, yi)\n", - "\n", - "df_predict = pd.DataFrame({\n", - " 'x' : xi.flatten(),\n", - " 'y' : yi.flatten(),\n", - "})\n", - "predict_shape = xi.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "w0JnH4IhZuAb" - }, - "outputs": [], - "source": [ - "def plot_contour(x, y, z, **kwargs):\n", - " # Grid the data.\n", - " plt.figure(figsize=(10, 8))\n", - " # Contour the gridded data, plotting dots at the nonuniform data points.\n", - " CS = plt.contour(x, y, z, 15, linewidths=0.5, colors='k')\n", - " CS = plt.contourf(x, y, z, 15,\n", - " vmax=abs(zi).max(), vmin=-abs(zi).max(), cmap='RdBu_r')\n", - " plt.colorbar() # Draw colorbar.\n", - " # Plot data points.\n", - " plt.xlim(-2, 2)\n", - " plt.ylim(-2, 2)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KF7WsIcYGF_E" - }, - "source": [ - "You can visualize the function. Redder colors correspond to larger function values." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "WrxuqaaXGFOK" - }, - "outputs": [], - "source": [ - "zi = griddata(x, y, z, xi, yi, interp='linear')\n", - "plot_contour(xi, yi, zi)\n", - "plt.scatter(df.x, df.y, marker='.')\n", - "plt.title('Contour on training data')\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "hoANr0f2GFrM" - }, - "outputs": [], - "source": [ - "fc = [tf.feature_column.numeric_column('x'),\n", - " tf.feature_column.numeric_column('y')]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "xVRWyoY3ayTK" - }, - "outputs": [], - "source": [ - "def predict(est):\n", - " \"\"\"Predictions from a given estimator.\"\"\"\n", - " predict_input_fn = lambda: tf.data.Dataset.from_tensors(dict(df_predict))\n", - " preds = np.array([p['predictions'][0] for p in est.predict(predict_input_fn)])\n", - " return preds.reshape(predict_shape)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uyPu5618GU7K" - }, - "source": [ - "First let's try to fit a linear model to the data." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "zUIV2IVgGVSk" - }, - "outputs": [], - "source": [ - "train_input_fn = make_input_fn(df, df.z)\n", - "est = tf.estimator.LinearRegressor(fc)\n", - "est.train(train_input_fn, max_steps=500);" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_u4WAcCqfbco" - }, - "outputs": [], - "source": [ - "plot_contour(xi, yi, predict(est))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XD_fMAUtSCSa" - }, - "source": [ - "It's not a very good fit. Next let's try to fit a GBDT model to it and try to understand how the model fits the function." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ka1GgvqmSCK7" - }, - "outputs": [], - "source": [ - "def create_bt_est(n_trees):\n", - " return tf.estimator.BoostedTreesRegressor(fc,\n", - " n_batches_per_layer=1,\n", - " n_trees=n_trees)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "w0s86Kq1R_Fc" - }, - "outputs": [], - "source": [ - "N_TREES = [1,2,3,4,10,20,50,100]\n", - "for n in N_TREES:\n", - " est = create_bt_est(n)\n", - " est.train(train_input_fn, max_steps=500)\n", - " plot_contour(xi, yi, predict(est))\n", - " plt.text(-1.8, 2.1, '# trees: {}'.format(n), color='w', backgroundcolor='black', size=20)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5WcZ9fubh1wT" - }, - "source": [ - "As you increase the number of trees, the model's predictions better approximates the underlying function." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SMKoEZnCdrsp" - }, - "source": [ - "## Conclusion" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZSZUSrjXdw9g" - }, - "source": [ - "In this tutorial you learned how to interpret Boosted Trees models using directional feature contributions and feature importance techniques. These techniques provide insight into how the features impact a model's predictions. Finally, you also gained intution for how a Boosted Tree model fits a complex function by viewing the decision surface for several models." - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "boosted_trees_model_understanding.ipynb", - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/site/en/r1/tutorials/estimators/cnn.ipynb b/site/en/r1/tutorials/estimators/cnn.ipynb deleted file mode 100644 index 6ce033f2d30..00000000000 --- a/site/en/r1/tutorials/estimators/cnn.ipynb +++ /dev/null @@ -1,973 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "Tce3stUlHN0L" - }, - "source": [ - "##### Copyright 2018 The TensorFlow Authors.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "tuOe1ymfHZPu" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MfBg1C5NB3X0" - }, - "source": [ - "# Build a Convolutional Neural Network using Estimators\n", - "\n", - "\n", - " \n", - " \n", - "
    \n", - " Run in Google Colab\n", - " \n", - " View source on GitHub\n", - "
    " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MfBg1C5NB3X0" - }, - "source": [ - "> Note: This is an archived TF1 notebook. These are configured\n", - "to run in TF2's \n", - "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n", - "but will run in TF1 as well. To use TF1 in Colab, use the\n", - "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n", - "magic." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xHxb-dlhMIzW" - }, - "source": [ - "The `tf.layers` module provides a high-level API that makes\n", - "it easy to construct a neural network. It provides methods that facilitate the\n", - "creation of dense (fully connected) layers and convolutional layers, adding\n", - "activation functions, and applying dropout regularization. In this tutorial,\n", - "you'll learn how to use `layers` to build a convolutional neural network model\n", - "to recognize the handwritten digits in the MNIST data set.\n", - "\n", - "![handwritten digits 0–9 from the MNIST data set](https://www.tensorflow.org/images/mnist_0-9.png)\n", - "\n", - "The [MNIST dataset](http://yann.lecun.com/exdb/mnist/) comprises 60,000\n", - "training examples and 10,000 test examples of the handwritten digits 0–9,\n", - "formatted as 28x28-pixel monochrome images." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wTe-6uXpP2Ts" - }, - "source": [ - "## Get Started\n", - "\n", - "Let's set up the imports for our TensorFlow program:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6-tpguHLP6Rm" - }, - "outputs": [], - "source": [ - "import tensorflow.compat.v1 as tf\n", - "\n", - "import numpy as np\n", - "\n", - "tf.logging.set_verbosity(tf.logging.INFO)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4j5yyyDFQgSB" - }, - "source": [ - "## Intro to Convolutional Neural Networks\n", - "\n", - "Convolutional neural networks (CNNs) are the current state-of-the-art model\n", - "architecture for image classification tasks. CNNs apply a series of filters to\n", - "the raw pixel data of an image to extract and learn higher-level features, which\n", - "the model can then use for classification. CNNs contains three components:\n", - "\n", - "* **Convolutional layers**, which apply a specified number of convolution\n", - " filters to the image. For each subregion, the layer performs a set of\n", - " mathematical operations to produce a single value in the output feature map.\n", - " Convolutional layers then typically apply a\n", - " [ReLU activation function](https://en.wikipedia.org/wiki/Rectifier_\\(neural_networks\\)) to\n", - " the output to introduce nonlinearities into the model.\n", - "\n", - "* **Pooling layers**, which\n", - " [downsample the image data](https://en.wikipedia.org/wiki/Convolutional_neural_network#Pooling_layer)\n", - " extracted by the convolutional layers to reduce the dimensionality of the\n", - " feature map in order to decrease processing time. A commonly used pooling\n", - " algorithm is max pooling, which extracts subregions of the feature map\n", - " (e.g., 2x2-pixel tiles), keeps their maximum value, and discards all other\n", - " values.\n", - "\n", - "* **Dense (fully connected) layers**, which perform classification on the\n", - " features extracted by the convolutional layers and downsampled by the\n", - " pooling layers. In a dense layer, every node in the layer is connected to\n", - " every node in the preceding layer.\n", - "\n", - "Typically, a CNN is composed of a stack of convolutional modules that perform\n", - "feature extraction. Each module consists of a convolutional layer followed by a\n", - "pooling layer. The last convolutional module is followed by one or more dense\n", - "layers that perform classification. The final dense layer in a CNN contains a\n", - "single node for each target class in the model (all the possible classes the\n", - "model may predict), with a\n", - "[softmax](https://en.wikipedia.org/wiki/Softmax_function) activation function to\n", - "generate a value between 0–1 for each node (the sum of all these softmax values\n", - "is equal to 1). We can interpret the softmax values for a given image as\n", - "relative measurements of how likely it is that the image falls into each target\n", - "class.\n", - "\n", - "Note: For a more comprehensive walkthrough of CNN architecture, see Stanford University's [Convolutional Neural Networks for Visual Recognition course material](https://cs231n.github.io/convolutional-networks/)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "j23E_Z0FQvZB" - }, - "source": [ - "## Building the CNN MNIST Classifier\n", - "\n", - "Let's build a model to classify the images in the MNIST dataset using the\n", - "following CNN architecture:\n", - "\n", - "1. **Convolutional Layer #1**: Applies 32 5x5 filters (extracting 5x5-pixel\n", - " subregions), with ReLU activation function\n", - "2. **Pooling Layer #1**: Performs max pooling with a 2x2 filter and stride of 2\n", - " (which specifies that pooled regions do not overlap)\n", - "3. **Convolutional Layer #2**: Applies 64 5x5 filters, with ReLU activation\n", - " function\n", - "4. **Pooling Layer #2**: Again, performs max pooling with a 2x2 filter and\n", - " stride of 2\n", - "5. **Dense Layer #1**: 1,024 neurons, with dropout regularization rate of 0.4\n", - " (probability of 0.4 that any given element will be dropped during training)\n", - "6. **Dense Layer #2 (Logits Layer)**: 10 neurons, one for each digit target\n", - " class (0–9).\n", - "\n", - "The `tf.layers` module contains methods to create each of the three layer types\n", - "above:\n", - "\n", - "* `conv2d()`. Constructs a two-dimensional convolutional layer. Takes number\n", - " of filters, filter kernel size, padding, and activation function as\n", - " arguments.\n", - "* `max_pooling2d()`. Constructs a two-dimensional pooling layer using the\n", - " max-pooling algorithm. Takes pooling filter size and stride as arguments.\n", - "* `dense()`. Constructs a dense layer. Takes number of neurons and activation\n", - " function as arguments.\n", - "\n", - "Each of these methods accepts a tensor as input and returns a transformed tensor\n", - "as output. This makes it easy to connect one layer to another: just take the\n", - "output from one layer-creation method and supply it as input to another.\n", - "\n", - "Add the following `cnn_model_fn` function, which\n", - "conforms to the interface expected by TensorFlow's Estimator API (more on this\n", - "later in [Create the Estimator](#create-the-estimator)). This function takes\n", - "MNIST feature data, labels, and mode (from\n", - "`tf.estimator.ModeKeys`: `TRAIN`, `EVAL`, `PREDICT`) as arguments;\n", - "configures the CNN; and returns predictions, loss, and a training operation:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "gMR-_3rkRKPa" - }, - "outputs": [], - "source": [ - "def cnn_model_fn(features, labels, mode):\n", - " \"\"\"Model function for CNN.\"\"\"\n", - " # Input Layer\n", - " input_layer = tf.reshape(features[\"x\"], [-1, 28, 28, 1])\n", - "\n", - " # Convolutional Layer #1\n", - " conv1 = tf.layers.conv2d(\n", - " inputs=input_layer,\n", - " filters=32,\n", - " kernel_size=[5, 5],\n", - " padding=\"same\",\n", - " activation=tf.nn.relu)\n", - "\n", - " # Pooling Layer #1\n", - " pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)\n", - "\n", - " # Convolutional Layer #2 and Pooling Layer #2\n", - " conv2 = tf.layers.conv2d(\n", - " inputs=pool1,\n", - " filters=64,\n", - " kernel_size=[5, 5],\n", - " padding=\"same\",\n", - " activation=tf.nn.relu)\n", - " pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)\n", - "\n", - " # Dense Layer\n", - " pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])\n", - " dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)\n", - " dropout = tf.layers.dropout(\n", - " inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)\n", - "\n", - " # Logits Layer\n", - " logits = tf.layers.dense(inputs=dropout, units=10)\n", - "\n", - " predictions = {\n", - " # Generate predictions (for PREDICT and EVAL mode)\n", - " \"classes\": tf.argmax(input=logits, axis=1),\n", - " # Add `softmax_tensor` to the graph. It is used for PREDICT and by the\n", - " # `logging_hook`.\n", - " \"probabilities\": tf.nn.softmax(logits, name=\"softmax_tensor\")\n", - " }\n", - "\n", - " if mode == tf.estimator.ModeKeys.PREDICT:\n", - " return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)\n", - "\n", - " # Calculate Loss (for both TRAIN and EVAL modes)\n", - " loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)\n", - "\n", - " # Configure the Training Op (for TRAIN mode)\n", - " if mode == tf.estimator.ModeKeys.TRAIN:\n", - " optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)\n", - " train_op = optimizer.minimize(\n", - " loss=loss,\n", - " global_step=tf.train.get_global_step())\n", - " return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)\n", - "\n", - " # Add evaluation metrics (for EVAL mode)\n", - " eval_metric_ops = {\n", - " \"accuracy\": tf.metrics.accuracy(\n", - " labels=labels, predictions=predictions[\"classes\"])\n", - " }\n", - " return tf.estimator.EstimatorSpec(\n", - " mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "b7z8qC9FRSLB" - }, - "source": [ - "The following sections (with headings corresponding to each code block above)\n", - "dive deeper into the `tf.layers` code used to create each layer, as well as how\n", - "to calculate loss, configure the training op, and generate predictions. If\n", - "you're already experienced with CNNs and [TensorFlow `Estimator`s](../../guide/custom_estimators.md),\n", - "and find the above code intuitive, you may want to skim these sections or just\n", - "skip ahead to [\"Training and Evaluating the CNN MNIST Classifier\"](#train_eval_mnist)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sFBXEYRlRUWu" - }, - "source": [ - "### Input Layer\n", - "\n", - "The methods in the `layers` module for creating convolutional and pooling layers\n", - "for two-dimensional image data expect input tensors to have a shape of\n", - "[batch_size, image_height, image_width,\n", - "channels] by default. This behavior can be changed using the\n", - "data_format parameter; defined as follows:\n", - "\n", - "* `batch_size` —Size of the subset of examples to use when performing\n", - " gradient descent during training.\n", - "* `image_height` —Height of the example images.\n", - "* `image_width` —Width of the example images.\n", - "* `channels` —Number of color channels in the example images. For color\n", - " images, the number of channels is 3 (red, green, blue). For monochrome\n", - " images, there is just 1 channel (black).\n", - "* `data_format` —A string, one of `channels_last` (default) or `channels_first`.\n", - " `channels_last` corresponds to inputs with shape\n", - " `(batch, ..., channels)` while `channels_first` corresponds to\n", - " inputs with shape `(batch, channels, ...)`.\n", - "\n", - "Here, our MNIST dataset is composed of monochrome 28x28 pixel images, so the\n", - "desired shape for our input layer is [batch_size, 28, 28,\n", - "1].\n", - "\n", - "To convert our input feature map (`features`) to this shape, we can perform the\n", - "following `reshape` operation:\n", - "\n", - "```\n", - "input_layer = tf.reshape(features[\"x\"], [-1, 28, 28, 1])\n", - "```\n", - "\n", - "Note that we've indicated `-1` for batch size, which specifies that this\n", - "dimension should be dynamically computed based on the number of input values in\n", - "`features[\"x\"]`, holding the size of all other dimensions constant. This allows\n", - "us to treat `batch_size` as a hyperparameter that we can tune. For example, if\n", - "we feed examples into our model in batches of 5, `features[\"x\"]` will contain\n", - "3,920 values (one value for each pixel in each image), and `input_layer` will\n", - "have a shape of `[5, 28, 28, 1]`. Similarly, if we feed examples in batches of\n", - "100, `features[\"x\"]` will contain 78,400 values, and `input_layer` will have a\n", - "shape of `[100, 28, 28, 1]`." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "iU8Jr1_JRiKA" - }, - "source": [ - "### Convolutional Layer #1\n", - "\n", - "In our first convolutional layer, we want to apply 32 5x5 filters to the input\n", - "layer, with a ReLU activation function. We can use the `conv2d()` method in the\n", - "`layers` module to create this layer as follows:\n", - "\n", - "```\n", - "conv1 = tf.layers.conv2d(\n", - " inputs=input_layer,\n", - " filters=32,\n", - " kernel_size=[5, 5],\n", - " padding=\"same\",\n", - " activation=tf.nn.relu)\n", - "```\n", - "\n", - "The `inputs` argument specifies our input tensor, which must have the shape\n", - "[batch_size, image_height, image_width,\n", - "channels]. Here, we're connecting our first convolutional layer\n", - "to `input_layer`, which has the shape [batch_size, 28, 28,\n", - "1].\n", - "\n", - "Note: `conv2d()` will instead accept a shape of `[batch_size, channels, image_height, image_width]` when passed the argument `data_format=channels_first`.\n", - "\n", - "The `filters` argument specifies the number of filters to apply (here, 32), and\n", - "`kernel_size` specifies the dimensions of the filters as `[height,\n", - "width] (here, [5, 5]`).\n", - "\n", - "

    TIP: If filter height and width have the same value, you can instead specify a\n", - "single integer for kernel_size—e.g., kernel_size=5.

    \n", - "\n", - "The `padding` argument specifies one of two enumerated values\n", - "(case-insensitive): `valid` (default value) or `same`. To specify that the\n", - "output tensor should have the same height and width values as the input tensor,\n", - "we set `padding=same` here, which instructs TensorFlow to add 0 values to the\n", - "edges of the input tensor to preserve height and width of 28. (Without padding,\n", - "a 5x5 convolution over a 28x28 tensor will produce a 24x24 tensor, as there are\n", - "24x24 locations to extract a 5x5 tile from a 28x28 grid.)\n", - "\n", - "The `activation` argument specifies the activation function to apply to the\n", - "output of the convolution. Here, we specify ReLU activation with\n", - "`tf.nn.relu`.\n", - "\n", - "Our output tensor produced by `conv2d()` has a shape of\n", - "[batch_size, 28, 28, 32]: the same height and width\n", - "dimensions as the input, but now with 32 channels holding the output from each\n", - "of the filters." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8qzx1ZMFRqt_" - }, - "source": [ - "### Pooling Layer #1\n", - "\n", - "Next, we connect our first pooling layer to the convolutional layer we just\n", - "created. We can use the `max_pooling2d()` method in `layers` to construct a\n", - "layer that performs max pooling with a 2x2 filter and stride of 2:\n", - "\n", - "```\n", - "pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)\n", - "```\n", - "\n", - "Again, `inputs` specifies the input tensor, with a shape of\n", - "[batch_size, image_height, image_width,\n", - "channels]. Here, our input tensor is `conv1`, the output from\n", - "the first convolutional layer, which has a shape of [batch_size,\n", - "28, 28, 32].\n", - "\n", - "Note: As with conv2d(), max_pooling2d() will instead\n", - "accept a shape of [batch_size, channels,\n", - "image_height, image_width] when passed the argument\n", - "data_format=channels_first.\n", - "\n", - "The `pool_size` argument specifies the size of the max pooling filter as\n", - "[height, width] (here, `[2, 2]`). If both\n", - "dimensions have the same value, you can instead specify a single integer (e.g.,\n", - "`pool_size=2`).\n", - "\n", - "The `strides` argument specifies the size of the stride. Here, we set a stride\n", - "of 2, which indicates that the subregions extracted by the filter should be\n", - "separated by 2 pixels in both the height and width dimensions (for a 2x2 filter,\n", - "this means that none of the regions extracted will overlap). If you want to set\n", - "different stride values for height and width, you can instead specify a tuple or\n", - "list (e.g., `stride=[3, 6]`).\n", - "\n", - "Our output tensor produced by `max_pooling2d()` (`pool1`) has a shape of\n", - "[batch_size, 14, 14, 32]: the 2x2 filter reduces height and width by 50% each." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xXej53NlRzFh" - }, - "source": [ - "### Convolutional Layer #2 and Pooling Layer #2\n", - "\n", - "We can connect a second convolutional and pooling layer to our CNN using\n", - "`conv2d()` and `max_pooling2d()` as before. For convolutional layer #2, we\n", - "configure 64 5x5 filters with ReLU activation, and for pooling layer #2, we use\n", - "the same specs as pooling layer #1 (a 2x2 max pooling filter with stride of 2):\n", - "\n", - "```\n", - "conv2 = tf.layers.conv2d(\n", - " inputs=pool1,\n", - " filters=64,\n", - " kernel_size=[5, 5],\n", - " padding=\"same\",\n", - " activation=tf.nn.relu)\n", - "\n", - "pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)\n", - "```\n", - "\n", - "Note that convolutional layer #2 takes the output tensor of our first pooling\n", - "layer (`pool1`) as input, and produces the tensor `conv2` as output. `conv2`\n", - "has a shape of [batch_size, 14, 14, 64], the same height and width as `pool1` (due to `padding=\"same\"`), and 64 channels for the 64\n", - "filters applied.\n", - "\n", - "Pooling layer #2 takes `conv2` as input, producing `pool2` as output. `pool2`\n", - "has shape [batch_size, 7, 7, 64] (50% reduction of height and width from `conv2`)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jjmLqVP7R7z6" - }, - "source": [ - "### Dense Layer\n", - "\n", - "Next, we want to add a dense layer (with 1,024 neurons and ReLU activation) to\n", - "our CNN to perform classification on the features extracted by the\n", - "convolution/pooling layers. Before we connect the layer, however, we'll flatten\n", - "our feature map (`pool2`) to shape [batch_size,\n", - "features], so that our tensor has only two dimensions:\n", - "\n", - "```\n", - "pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])\n", - "```\n", - "\n", - "In the `reshape()` operation above, the `-1` signifies that the *`batch_size`*\n", - "dimension will be dynamically calculated based on the number of examples in our\n", - "input data. Each example has 7 (`pool2` height) * 7 (`pool2` width) * 64\n", - "(`pool2` channels) features, so we want the `features` dimension to have a value\n", - "of 7 * 7 * 64 (3136 in total). The output tensor, `pool2_flat`, has shape\n", - "[batch_size, 3136].\n", - "\n", - "Now, we can use the `dense()` method in `layers` to connect our dense layer as\n", - "follows:\n", - "\n", - "```\n", - "dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)\n", - "```\n", - "\n", - "The `inputs` argument specifies the input tensor: our flattened feature map,\n", - "`pool2_flat`. The `units` argument specifies the number of neurons in the dense\n", - "layer (1,024). The `activation` argument takes the activation function; again,\n", - "we'll use `tf.nn.relu` to add ReLU activation.\n", - "\n", - "To help improve the results of our model, we also apply dropout regularization\n", - "to our dense layer, using the `dropout` method in `layers`:\n", - "\n", - "```\n", - "dropout = tf.layers.dropout(\n", - " inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)\n", - "```\n", - "\n", - "Again, `inputs` specifies the input tensor, which is the output tensor from our\n", - "dense layer (`dense`).\n", - "\n", - "The `rate` argument specifies the dropout rate; here, we use `0.4`, which means\n", - "40% of the elements will be randomly dropped out during training.\n", - "\n", - "The `training` argument takes a boolean specifying whether or not the model is\n", - "currently being run in training mode; dropout will only be performed if\n", - "`training` is `True`. Here, we check if the `mode` passed to our model function\n", - "`cnn_model_fn` is `TRAIN` mode.\n", - "\n", - "Our output tensor `dropout` has shape [batch_size, 1024]." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rzUcwkCZSTF7" - }, - "source": [ - "### Logits Layer\n", - "\n", - "The final layer in our neural network is the logits layer, which will return the\n", - "raw values for our predictions. We create a dense layer with 10 neurons (one for\n", - "each target class 0–9), with linear activation (the default):\n", - "\n", - "```\n", - "logits = tf.layers.dense(inputs=dropout, units=10)\n", - "```\n", - "\n", - "Our final output tensor of the CNN, `logits`, has shape `[batch_size, 10]`." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "y3uJ0V1KSakc" - }, - "source": [ - "### Generate Predictions {#generate_predictions}\n", - "\n", - "The logits layer of our model returns our predictions as raw values in a\n", - "[batch_size, 10]-dimensional tensor. Let's convert these\n", - "raw values into two different formats that our model function can return:\n", - "\n", - "* The **predicted class** for each example: a digit from 0–9.\n", - "* The **probabilities** for each possible target class for each example: the\n", - " probability that the example is a 0, is a 1, is a 2, etc.\n", - "\n", - "For a given example, our predicted class is the element in the corresponding row\n", - "of the logits tensor with the highest raw value. We can find the index of this\n", - "element using the `tf.argmax`\n", - "function:\n", - "\n", - "```\n", - "tf.argmax(input=logits, axis=1)\n", - "```\n", - "\n", - "The `input` argument specifies the tensor from which to extract maximum\n", - "values—here `logits`. The `axis` argument specifies the axis of the `input`\n", - "tensor along which to find the greatest value. Here, we want to find the largest\n", - "value along the dimension with index of 1, which corresponds to our predictions\n", - "(recall that our logits tensor has shape [batch_size,\n", - "10]).\n", - "\n", - "We can derive probabilities from our logits layer by applying softmax activation\n", - "using `tf.nn.softmax`:\n", - "\n", - "```\n", - "tf.nn.softmax(logits, name=\"softmax_tensor\")\n", - "```\n", - "\n", - "Note: We use the `name` argument to explicitly name this operation `softmax_tensor`, so we can reference it later. (We'll set up logging for the softmax values in [\"Set Up a Logging Hook\"](#set-up-a-logging-hook)).\n", - "\n", - "We compile our predictions in a dict, and return an `EstimatorSpec` object:\n", - "\n", - "```\n", - "predictions = {\n", - " \"classes\": tf.argmax(input=logits, axis=1),\n", - " \"probabilities\": tf.nn.softmax(logits, name=\"softmax_tensor\")\n", - "}\n", - "if mode == tf.estimator.ModeKeys.PREDICT:\n", - " return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "f2ks_tqSSucg" - }, - "source": [ - "### Calculate Loss {#calculating-loss}\n", - "\n", - "For both training and evaluation, we need to define a\n", - "[loss function](https://en.wikipedia.org/wiki/Loss_function)\n", - "that measures how closely the model's predictions match the target classes. For\n", - "multiclass classification problems like MNIST,\n", - "[cross entropy](https://en.wikipedia.org/wiki/Cross_entropy) is typically used\n", - "as the loss metric. The following code calculates cross entropy when the model\n", - "runs in either `TRAIN` or `EVAL` mode:\n", - "\n", - "```\n", - "loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)\n", - "```\n", - "\n", - "Let's take a closer look at what's happening above.\n", - "\n", - "Our `labels` tensor contains a list of prediction indices for our examples, e.g. `[1,\n", - "9, ...]`. `logits` contains the linear outputs of our last layer.\n", - "\n", - "`tf.losses.sparse_softmax_cross_entropy`, calculates the softmax crossentropy\n", - "(aka: categorical crossentropy, negative log-likelihood) from these two inputs\n", - "in an efficient, numerically stable way." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YgE7Ll3pS2FG" - }, - "source": [ - "### Configure the Training Op\n", - "\n", - "In the previous section, we defined loss for our CNN as the softmax\n", - "cross-entropy of the logits layer and our labels. Let's configure our model to\n", - "optimize this loss value during training. We'll use a learning rate of 0.001 and\n", - "[stochastic gradient descent](https://en.wikipedia.org/wiki/Stochastic_gradient_descent)\n", - "as the optimization algorithm:\n", - "\n", - "```\n", - "if mode == tf.estimator.ModeKeys.TRAIN:\n", - " optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)\n", - " train_op = optimizer.minimize(\n", - " loss=loss,\n", - " global_step=tf.train.get_global_step())\n", - " return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rEJPnXAzS6m9" - }, - "source": [ - "Note: For a more in-depth look at configuring training ops for Estimator model functions, see [\"Defining the training op for the model\"](../../guide/custom_estimators.md#defining-the-training-op-for-the-model) in the [\"Creating Estimations in tf.estimator\"](../../guide/custom_estimators.md) tutorial." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "QQuGDWvHTAib" - }, - "source": [ - "### Add evaluation metrics\n", - "\n", - "To add accuracy metric in our model, we define `eval_metric_ops` dict in EVAL\n", - "mode as follows:\n", - "\n", - "```\n", - "eval_metric_ops = {\n", - " \"accuracy\": tf.metrics.accuracy(\n", - " labels=labels, predictions=predictions[\"classes\"])\n", - "}\n", - "return tf.estimator.EstimatorSpec(\n", - " mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Y2Bwe-AdTRzX" - }, - "source": [ - "\n", - "## Training and Evaluating the CNN MNIST Classifier\n", - "\n", - "We've coded our MNIST CNN model function; now we're ready to train and evaluate\n", - "it." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6EC9aOY2TTLU" - }, - "source": [ - "### Load Training and Test Data\n", - "\n", - "First, let's load our training and test data with the following code:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ccobb0qETV-S" - }, - "outputs": [], - "source": [ - "# Load training and eval data\n", - "((train_data, train_labels),\n", - " (eval_data, eval_labels)) = tf.keras.datasets.mnist.load_data()\n", - "\n", - "train_data = train_data/np.float32(255)\n", - "train_labels = train_labels.astype(np.int32) # not required\n", - "\n", - "eval_data = eval_data/np.float32(255)\n", - "eval_labels = eval_labels.astype(np.int32) # not required" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8l84-IxSTZnO" - }, - "source": [ - "We store the training feature data (the raw pixel values for 55,000 images of\n", - "hand-drawn digits) and training labels (the corresponding value from 0–9 for\n", - "each image) as [numpy\n", - "arrays](https://docs.scipy.org/doc/numpy/reference/generated/numpy.array.html)\n", - "in `train_data` and `train_labels`, respectively. Similarly, we store the\n", - "evaluation feature data (10,000 images) and evaluation labels in `eval_data`\n", - "and `eval_labels`, respectively." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "S2_Isc7kTa45" - }, - "source": [ - "### Create the Estimator {#create-the-estimator}\n", - "\n", - "Next, let's create an `Estimator` (a TensorFlow class for performing high-level\n", - "model training, evaluation, and inference) for our model. Add the following code\n", - "to `main()`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "yjC6HdwZTdg4" - }, - "outputs": [], - "source": [ - "# Create the Estimator\n", - "mnist_classifier = tf.estimator.Estimator(\n", - " model_fn=cnn_model_fn, model_dir=\"/tmp/mnist_convnet_model\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "f78EBcg7TfTU" - }, - "source": [ - "The `model_fn` argument specifies the model function to use for training,\n", - "evaluation, and prediction; we pass it the `cnn_model_fn` we created in\n", - "[\"Building the CNN MNIST Classifier.\"](#building-the-cnn-mnist-classifier) The\n", - "`model_dir` argument specifies the directory where model data (checkpoints) will\n", - "be saved (here, we specify the temp directory `/tmp/mnist_convnet_model`, but\n", - "feel free to change to another directory of your choice).\n", - "\n", - "Note: For an in-depth walkthrough of the TensorFlow `Estimator` API, see the tutorial [Creating Estimators in tf.estimator](../../guide/custom_estimators.md)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_6ow7hVYTm3f" - }, - "source": [ - "### Set Up a Logging Hook {#set_up_a_logging_hook}\n", - "\n", - "Since CNNs can take a while to train, let's set up some logging so we can track\n", - "progress during training. We can use TensorFlow's `tf.train.SessionRunHook` to create a\n", - "`tf.train.LoggingTensorHook`\n", - "that will log the probability values from the softmax layer of our CNN. Add the\n", - "following to `main()`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "S6T10kssTpdz" - }, - "outputs": [], - "source": [ - "# Set up logging for predictions\n", - "tensors_to_log = {\"probabilities\": \"softmax_tensor\"}\n", - "\n", - "logging_hook = tf.train.LoggingTensorHook(\n", - " tensors=tensors_to_log, every_n_iter=50)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RZdtZ6JQTsmg" - }, - "source": [ - "We store a dict of the tensors we want to log in `tensors_to_log`. Each key is a\n", - "label of our choice that will be printed in the log output, and the\n", - "corresponding label is the name of a `Tensor` in the TensorFlow graph. Here, our\n", - "`probabilities` can be found in `softmax_tensor`, the name we gave our softmax\n", - "operation earlier when we generated the probabilities in `cnn_model_fn`.\n", - "\n", - "Note: If you don't explicitly assign a name to an operation via the `name` argument, TensorFlow will assign a default name. A couple easy ways to discover the names applied to operations are to visualize your graph on [TensorBoard](../../guide/graph_viz.md)) or to enable the [TensorFlow Debugger (tfdbg)](../../guide/debugger.md).\n", - "\n", - "Next, we create the `LoggingTensorHook`, passing `tensors_to_log` to the\n", - "`tensors` argument. We set `every_n_iter=50`, which specifies that probabilities\n", - "should be logged after every 50 steps of training." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "brVs1dRMT0NM" - }, - "source": [ - "### Train the Model\n", - "\n", - "Now we're ready to train our model, which we can do by creating `train_input_fn`\n", - "and calling `train()` on `mnist_classifier`. In the `numpy_input_fn` call, we pass the training feature data and labels to\n", - "`x` (as a dict) and `y`, respectively. We set a `batch_size` of `100` (which\n", - "means that the model will train on minibatches of 100 examples at each step).\n", - "`num_epochs=None` means that the model will train until the specified number of\n", - "steps is reached. We also set `shuffle=True` to shuffle the training data. Then train the model a single step and log the output:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "h-dewpleT2sk" - }, - "outputs": [], - "source": [ - "# Train the model\n", - "train_input_fn = tf.estimator.inputs.numpy_input_fn(\n", - " x={\"x\": train_data},\n", - " y=train_labels,\n", - " batch_size=100,\n", - " num_epochs=None,\n", - " shuffle=True)\n", - "\n", - "# train one step and display the probabilties\n", - "mnist_classifier.train(\n", - " input_fn=train_input_fn,\n", - " steps=1,\n", - " hooks=[logging_hook])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "gyNSE3e-14Lq" - }, - "source": [ - "Now—without logging each step—set `steps=1000` to train the model longer, but in a reasonable time to run this example. Training CNNs is computationally intensive. To increase the accuracy of your model, increase the number of `steps` passed to `train()`, like 20,000 steps." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "cri6zqcf2IXY" - }, - "outputs": [], - "source": [ - "mnist_classifier.train(input_fn=train_input_fn, steps=1000)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4bQdkLMeUE5U" - }, - "source": [ - "### Evaluate the Model\n", - "\n", - "Once training is complete, we want to evaluate our model to determine its\n", - "accuracy on the MNIST test set. We call the `evaluate` method, which evaluates\n", - "the metrics we specified in `eval_metric_ops` argument in the `model_fn`.\n", - "Add the following to `main()`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "I0RGiqd0UF0N" - }, - "outputs": [], - "source": [ - "eval_input_fn = tf.estimator.inputs.numpy_input_fn(\n", - " x={\"x\": eval_data},\n", - " y=eval_labels,\n", - " num_epochs=1,\n", - " shuffle=False)\n", - "\n", - "eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)\n", - "print(eval_results)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JIBVID6dUIXT" - }, - "source": [ - "To create `eval_input_fn`, we set `num_epochs=1`, so that the model evaluates\n", - "the metrics over one epoch of data and returns the result. We also set\n", - "`shuffle=False` to iterate through the data sequentially." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "htmLZ-zEUZZk" - }, - "source": [ - "## Additional Resources\n", - "\n", - "To learn more about TensorFlow Estimators and CNNs in TensorFlow, see the\n", - "following resources:\n", - "\n", - "* [Creating Estimators in tf.estimator](../../guide/custom_estimators.md)\n", - " provides an introduction to the TensorFlow Estimator API. It walks through\n", - " configuring an Estimator, writing a model function, calculating loss, and\n", - " defining a training op.\n", - "* [Advanced Convolutional Neural Networks](../../tutorials/images/deep_cnn.md) walks through how to build a MNIST CNN classification model\n", - " *without estimators* using lower-level TensorFlow operations." - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [ - "Tce3stUlHN0L" - ], - "name": "cnn.ipynb", - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/site/en/r1/tutorials/estimators/linear.ipynb b/site/en/r1/tutorials/estimators/linear.ipynb deleted file mode 100644 index 4155e0974a1..00000000000 --- a/site/en/r1/tutorials/estimators/linear.ipynb +++ /dev/null @@ -1,1260 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "MWW1TyjaecRh" - }, - "source": [ - "##### Copyright 2018 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "mOtR1FzCef-u" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Zr7KpBhMcYvE" - }, - "source": [ - "# Build a linear model with Estimators" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uJl4gaPFzxQz" - }, - "source": [ - "\n", - " \n", - " \n", - "
    \n", - " Run in Google Colab\n", - " \n", - " View source on GitHub\n", - "
    " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uJl4gaPFzxQy" - }, - "source": [ - "> Note: This is an archived TF1 notebook. These are configured\n", - "to run in TF2's \n", - "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n", - "but will run in TF1 as well. To use TF1 in Colab, use the\n", - "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n", - "magic." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "77aETSYDcdoK" - }, - "source": [ - "This tutorial uses the `tf.estimator` API in TensorFlow to solve a benchmark binary classification problem. Estimators are TensorFlow's most scalable and production-oriented model type. For more information see the [Estimator guide](https://www.tensorflow.org/r1/guide/estimators).\n", - "\n", - "## Overview\n", - "\n", - "Using census data which contains data about a person's age, education, marital status, and occupation (the *features*), you will try to predict whether or not the person earns more than 50,000 dollars a year (the target *label*). You will train a *logistic regression* model that, given an individual's information, outputs a number between 0 and 1—this can be interpreted as the probability that the individual has an annual income of over 50,000 dollars.\n", - "\n", - "Key Point: As a modeler and developer, think about how this data is used and the potential benefits and harm a model's predictions can cause. A model like this could reinforce societal biases and disparities. Is each feature relevant to the problem you want to solve or will it introduce bias? For more information, read about [ML fairness](https://developers.google.com/machine-learning/fairness-overview/).\n", - "\n", - "## Setup\n", - "\n", - "Import TensorFlow, feature column support, and supporting modules:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "NQgONe5ecYvE" - }, - "outputs": [], - "source": [ - "import tensorflow.compat.v1 as tf\n", - "\n", - "import tensorflow.feature_column as fc\n", - "\n", - "import os\n", - "import sys\n", - "\n", - "import matplotlib.pyplot as plt\n", - "from IPython.display import clear_output" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Rpb1JSMj1nqk" - }, - "source": [ - "And let's enable [eager execution](https://www.tensorflow.org/r1/guide/eager) to inspect this program as you run it:" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-MPr95UccYvL" - }, - "source": [ - "## Download the official implementation\n", - "\n", - "You will use the [wide and deep model](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep/) available in TensorFlow's [model repository](https://github.com/tensorflow/models/). Download the code, add the root directory to your Python path, and jump to the `wide_deep` directory:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "tTwQzWcn8aBu" - }, - "outputs": [], - "source": [ - "! pip install requests\n", - "! git clone --depth 1 --branch r2.1.0 https://github.com/tensorflow/models" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sRpuysc73Eb-" - }, - "source": [ - "Add the root directory of the repository to your Python path:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "yVvFyhnkcYvL" - }, - "outputs": [], - "source": [ - "models_path = os.path.join(os.getcwd(), 'models')\n", - "\n", - "sys.path.append(models_path)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "15Ethw-wcYvP" - }, - "source": [ - "Download the dataset:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6QilS4-0cYvQ" - }, - "outputs": [], - "source": [ - "from official.r1.wide_deep import census_dataset\n", - "from official.r1.wide_deep import census_main\n", - "\n", - "census_dataset.download(\"/tmp/census_data/\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cD5e3ibAcYvS" - }, - "source": [ - "### Command line usage\n", - "\n", - "The repo includes a complete program for experimenting with this type of model.\n", - "\n", - "To execute the tutorial code from the command line first add the path to tensorflow/models to your `PYTHONPATH`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "DYOkY8boUptJ" - }, - "outputs": [], - "source": [ - "#export PYTHONPATH=${PYTHONPATH}:\"$(pwd)/models\"\n", - "#running from python you need to set the `os.environ` or the subprocess will not see the directory.\n", - "\n", - "if \"PYTHONPATH\" in os.environ:\n", - " os.environ['PYTHONPATH'] += os.pathsep + models_path\n", - "else:\n", - " os.environ['PYTHONPATH'] = models_path" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5r0V9YUMUyoh" - }, - "source": [ - "Use `--help` to see what command line options are available:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "1_3tBaLW4YM4" - }, - "outputs": [], - "source": [ - "!python -m official.r1.wide_deep.census_main --help" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RrMLazEN6DMj" - }, - "source": [ - "Now run the model:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "py7MarZl5Yh6" - }, - "outputs": [], - "source": [ - "!python -m official.r1.wide_deep.census_main --model_type=wide --train_epochs=2" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AmZ4CpaOcYvV" - }, - "source": [ - "## Read the U.S. Census data\n", - "\n", - "This example uses the [U.S Census Income Dataset](https://archive.ics.uci.edu/ml/datasets/Census+Income) from 1994 and 1995. The [census_dataset.py](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep/census_dataset.py) script is provided to download the data and perform a little cleanup.\n", - "\n", - "Since the task is a *binary classification problem*, you will construct a label column named \"label\" whose value is 1 if the income is over 50K, and 0 otherwise. For reference, see the `input_fn` in [census_main.py](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep/census_main.py).\n", - "\n", - "Let's look at the data to see which columns you can use to predict the target label:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "N6Tgye8bcYvX" - }, - "outputs": [], - "source": [ - "!ls /tmp/census_data/" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6y3mj9zKcYva" - }, - "outputs": [], - "source": [ - "train_file = \"/tmp/census_data/adult.data\"\n", - "test_file = \"/tmp/census_data/adult.test\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EO_McKgE5il2" - }, - "source": [ - "[pandas](https://pandas.pydata.org/) provides some convenient utilities for data analysis. Here's a list of columns available in the Census Income dataset:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "vkn1FNmpcYvb" - }, - "outputs": [], - "source": [ - "import pandas\n", - "\n", - "train_df = pandas.read_csv(train_file, names=census_dataset._CSV_COLUMNS)\n", - "test_df = pandas.read_csv(test_file, names=census_dataset._CSV_COLUMNS)\n", - "\n", - "train_df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "QZZtXes4cYvf" - }, - "source": [ - "The columns are grouped into two types: *categorical* and *continuous* columns:\n", - "\n", - "* A column is called *categorical* if its value can only be one of the categories in a finite set. For example, the relationship status of a person (wife, husband, unmarried, etc.) or the education level (high school, college, etc.) are categorical columns.\n", - "* A column is called *continuous* if its value can be any numerical value in a continuous range. For example, the capital gain of a person (e.g. $14,084) is a continuous column.\n", - "\n", - "## Converting Data into Tensors\n", - "\n", - "When building a `tf.estimator` model, the input data is specified by using an *input function* (or `input_fn`). This builder function returns a `tf.data.Dataset` of batches of `(features-dict, label)` pairs. It is not called until it is passed to `tf.estimator.Estimator` methods such as `train` and `evaluate`.\n", - "\n", - "The input builder function returns the following pair:\n", - "\n", - "1. `features`: A dict from feature names to `Tensors` or `SparseTensors` containing batches of features.\n", - "2. `labels`: A `Tensor` containing batches of labels.\n", - "\n", - "The keys of the `features` are used to configure the model's input layer.\n", - "\n", - "Note: The input function is called while constructing the TensorFlow graph, *not* while running the graph. It is returning a representation of the input data as a sequence of TensorFlow graph operations.\n", - "\n", - "For small problems like this, it's easy to make a `tf.data.Dataset` by slicing the `pandas.DataFrame`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "N7zNJflKcYvg" - }, - "outputs": [], - "source": [ - "def easy_input_function(df, label_key, num_epochs, shuffle, batch_size):\n", - " label = df[label_key]\n", - " ds = tf.data.Dataset.from_tensor_slices((dict(df),label))\n", - "\n", - " if shuffle:\n", - " ds = ds.shuffle(10000)\n", - "\n", - " ds = ds.batch(batch_size).repeat(num_epochs)\n", - "\n", - " return ds" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WeEgNR9AcYvh" - }, - "source": [ - "Since you have eager execution enabled, it's easy to inspect the resulting dataset:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ygaKuikecYvi" - }, - "outputs": [], - "source": [ - "ds = easy_input_function(train_df, label_key='income_bracket', num_epochs=5, shuffle=True, batch_size=10)\n", - "\n", - "for feature_batch, label_batch in ds.take(1):\n", - " print('Some feature keys:', list(feature_batch.keys())[:5])\n", - " print()\n", - " print('A batch of Ages :', feature_batch['age'])\n", - " print()\n", - " print('A batch of Labels:', label_batch )" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "O_KZxQUucYvm" - }, - "source": [ - "But this approach has severly-limited scalability. Larger datasets should be streamed from disk. The `census_dataset.input_fn` provides an example of how to do this using `tf.decode_csv` and `tf.data.TextLineDataset`:\n", - "\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "vUTeXaEUcYvn" - }, - "outputs": [], - "source": [ - "import inspect\n", - "print(inspect.getsource(census_dataset.input_fn))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yyGcv_e-cYvq" - }, - "source": [ - "This `input_fn` returns equivalent output:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Mv3as_CEcYvu" - }, - "outputs": [], - "source": [ - "ds = census_dataset.input_fn(train_file, num_epochs=5, shuffle=True, batch_size=10)\n", - "\n", - "for feature_batch, label_batch in ds.take(1):\n", - " print('Feature keys:', list(feature_batch.keys())[:5])\n", - " print()\n", - " print('Age batch :', feature_batch['age'])\n", - " print()\n", - " print('Label batch :', label_batch )" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "810fnfY5cYvz" - }, - "source": [ - "Because `Estimators` expect an `input_fn` that takes no arguments, you typically wrap configurable input function into an object with the expected signature. For this notebook configure the `train_inpf` to iterate over the data twice:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "wnQdpEcVcYv0" - }, - "outputs": [], - "source": [ - "import functools\n", - "\n", - "train_inpf = functools.partial(census_dataset.input_fn, train_file, num_epochs=2, shuffle=True, batch_size=64)\n", - "test_inpf = functools.partial(census_dataset.input_fn, test_file, num_epochs=1, shuffle=False, batch_size=64)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pboNpNWhcYv4" - }, - "source": [ - "## Selecting and Engineering Features for the Model\n", - "\n", - "Estimators use a system called [feature columns](https://www.tensorflow.org/r1/guide/feature_columns) to describe how the model should interpret each of the raw input features. An Estimator expects a vector of numeric inputs, and feature columns describe how the model should convert each feature.\n", - "\n", - "Selecting and crafting the right set of feature columns is key to learning an effective model. A *feature column* can be either one of the raw inputs in the original features `dict` (a *base feature column*), or any new columns created using transformations defined over one or multiple base columns (a *derived feature columns*).\n", - "\n", - "A feature column is an abstract concept of any raw or derived variable that can be used to predict the target label." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_hh-cWdU__Lq" - }, - "source": [ - "### Base Feature Columns" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BKz6LA8_ACI7" - }, - "source": [ - "#### Numeric columns\n", - "\n", - "The simplest `feature_column` is `numeric_column`. This indicates that a feature is a numeric value that should be input to the model directly. For example:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ZX0r2T5OcYv6" - }, - "outputs": [], - "source": [ - "age = fc.numeric_column('age')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tnLUiaHxcYv-" - }, - "source": [ - "The model will use the `feature_column` definitions to build the model input. You can inspect the resulting output using the `input_layer` function:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "kREtIPfwcYv_" - }, - "outputs": [], - "source": [ - "fc.input_layer(feature_batch, [age]).numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "OPuLduCucYwD" - }, - "source": [ - "The following will train and evaluate a model using only the `age` feature:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9R5eSJ1pcYwE" - }, - "outputs": [], - "source": [ - "classifier = tf.estimator.LinearClassifier(feature_columns=[age])\n", - "classifier.train(train_inpf)\n", - "result = classifier.evaluate(test_inpf)\n", - "\n", - "clear_output() # used for display in notebook\n", - "print(result)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YDZGcdTdcYwI" - }, - "source": [ - "Similarly, you can define a `NumericColumn` for each continuous feature column\n", - "that you want to use in the model:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "uqPbUqlxcYwJ" - }, - "outputs": [], - "source": [ - "education_num = tf.feature_column.numeric_column('education_num')\n", - "capital_gain = tf.feature_column.numeric_column('capital_gain')\n", - "capital_loss = tf.feature_column.numeric_column('capital_loss')\n", - "hours_per_week = tf.feature_column.numeric_column('hours_per_week')\n", - "\n", - "my_numeric_columns = [age,education_num, capital_gain, capital_loss, hours_per_week]\n", - "\n", - "fc.input_layer(feature_batch, my_numeric_columns).numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cBGDN97IcYwQ" - }, - "source": [ - "You could retrain a model on these features by changing the `feature_columns` argument to the constructor:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XN8k5S95cYwR" - }, - "outputs": [], - "source": [ - "classifier = tf.estimator.LinearClassifier(feature_columns=my_numeric_columns)\n", - "classifier.train(train_inpf)\n", - "\n", - "result = classifier.evaluate(test_inpf)\n", - "\n", - "clear_output()\n", - "\n", - "for key,value in sorted(result.items()):\n", - " print('%s: %s' % (key, value))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jBRq9_AzcYwU" - }, - "source": [ - "#### Categorical columns\n", - "\n", - "To define a feature column for a categorical feature, create a `CategoricalColumn` using one of the `tf.feature_column.categorical_column*` functions.\n", - "\n", - "If you know the set of all possible feature values of a column—and there are only a few of them—use `categorical_column_with_vocabulary_list`. Each key in the list is assigned an auto-incremented ID starting from 0. For example, for the `relationship` column you can assign the feature string `Husband` to an integer ID of 0 and \"Not-in-family\" to 1, etc." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0IjqSi9tcYwV" - }, - "outputs": [], - "source": [ - "relationship = fc.categorical_column_with_vocabulary_list(\n", - " 'relationship',\n", - " ['Husband', 'Not-in-family', 'Wife', 'Own-child', 'Unmarried', 'Other-relative'])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-RjoWv-7cYwW" - }, - "source": [ - "This creates a sparse one-hot vector from the raw input feature.\n", - "\n", - "The `input_layer` function you are using is designed for DNN models and expects dense inputs. To demonstrate the categorical column you must wrap it in a `tf.feature_column.indicator_column` to create the dense one-hot output (Linear `Estimators` can often skip this dense-step).\n", - "\n", - "Note: the other sparse-to-dense option is `tf.feature_column.embedding_column`.\n", - "\n", - "Run the input layer, configured with both the `age` and `relationship` columns:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "kI43CYlncYwY" - }, - "outputs": [], - "source": [ - "fc.input_layer(feature_batch, [age, fc.indicator_column(relationship)])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tTudP7WHcYwb" - }, - "source": [ - "If you don't know the set of possible values in advance, use the `categorical_column_with_hash_bucket` instead:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8pSBaliCcYwb" - }, - "outputs": [], - "source": [ - "occupation = tf.feature_column.categorical_column_with_hash_bucket(\n", - " 'occupation', hash_bucket_size=1000)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fSAPrqQkcYwd" - }, - "source": [ - "Here, each possible value in the feature column `occupation` is hashed to an integer ID as you encounter them in training. The example batch has a few different occupations:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "dCvQNv36cYwe" - }, - "outputs": [], - "source": [ - "for item in feature_batch['occupation'].numpy():\n", - " print(item.decode())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KP5hN2rAcYwh" - }, - "source": [ - "If you run `input_layer` with the hashed column, you see that the output shape is `(batch_size, hash_bucket_size)`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0Y16peWacYwh" - }, - "outputs": [], - "source": [ - "occupation_result = fc.input_layer(feature_batch, [fc.indicator_column(occupation)])\n", - "\n", - "occupation_result.numpy().shape" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HMW2MzWAcYwk" - }, - "source": [ - "It's easier to see the actual results if you take the `tf.argmax` over the `hash_bucket_size` dimension. Notice how any duplicate occupations are mapped to the same pseudo-random index:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "q_ryRglmcYwk" - }, - "outputs": [], - "source": [ - "tf.argmax(occupation_result, axis=1).numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "j1e5NfyKcYwn" - }, - "source": [ - "Note: Hash collisions are unavoidable, but often have minimal impact on model quality. The effect may be noticable if the hash buckets are being used to compress the input space.\n", - "\n", - "No matter how you choose to define a `SparseColumn`, each feature string is mapped into an integer ID by looking up a fixed mapping or by hashing. Under the hood, the `LinearModel` class is responsible for managing the mapping and creating `tf.Variable` to store the model parameters (model *weights*) for each feature ID. The model parameters are learned through the model training process described later.\n", - "\n", - "Let's do the similar trick to define the other categorical features:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0Z5eUrd_cYwo" - }, - "outputs": [], - "source": [ - "education = tf.feature_column.categorical_column_with_vocabulary_list(\n", - " 'education', [\n", - " 'Bachelors', 'HS-grad', '11th', 'Masters', '9th', 'Some-college',\n", - " 'Assoc-acdm', 'Assoc-voc', '7th-8th', 'Doctorate', 'Prof-school',\n", - " '5th-6th', '10th', '1st-4th', 'Preschool', '12th'])\n", - "\n", - "marital_status = tf.feature_column.categorical_column_with_vocabulary_list(\n", - " 'marital_status', [\n", - " 'Married-civ-spouse', 'Divorced', 'Married-spouse-absent',\n", - " 'Never-married', 'Separated', 'Married-AF-spouse', 'Widowed'])\n", - "\n", - "workclass = tf.feature_column.categorical_column_with_vocabulary_list(\n", - " 'workclass', [\n", - " 'Self-emp-not-inc', 'Private', 'State-gov', 'Federal-gov',\n", - " 'Local-gov', '?', 'Self-emp-inc', 'Without-pay', 'Never-worked'])\n", - "\n", - "\n", - "my_categorical_columns = [relationship, occupation, education, marital_status, workclass]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ASQJM1pEcYwr" - }, - "source": [ - "It's easy to use both sets of columns to configure a model that uses all these features:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_i_MLoo9cYws" - }, - "outputs": [], - "source": [ - "classifier = tf.estimator.LinearClassifier(feature_columns=my_numeric_columns+my_categorical_columns)\n", - "classifier.train(train_inpf)\n", - "result = classifier.evaluate(test_inpf)\n", - "\n", - "clear_output()\n", - "\n", - "for key,value in sorted(result.items()):\n", - " print('%s: %s' % (key, value))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zdKEqF6xcYwv" - }, - "source": [ - "### Derived feature columns" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RgYaf_48FSU2" - }, - "source": [ - "#### Make Continuous Features Categorical through Bucketization\n", - "\n", - "Sometimes the relationship between a continuous feature and the label is not linear. For example, *age* and *income*—a person's income may grow in the early stage of their career, then the growth may slow at some point, and finally, the income decreases after retirement. In this scenario, using the raw `age` as a real-valued feature column might not be a good choice because the model can only learn one of the three cases:\n", - "\n", - "1. Income always increases at some rate as age grows (positive correlation),\n", - "2. Income always decreases at some rate as age grows (negative correlation), or\n", - "3. Income stays the same no matter at what age (no correlation).\n", - "\n", - "If you want to learn the fine-grained correlation between income and each age group separately, you can leverage *bucketization*. Bucketization is a process of dividing the entire range of a continuous feature into a set of consecutive buckets, and then converting the original numerical feature into a bucket ID (as a categorical feature) depending on which bucket that value falls into. So, you can define a `bucketized_column` over `age` as:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "KT4pjD9AcYww" - }, - "outputs": [], - "source": [ - "age_buckets = tf.feature_column.bucketized_column(\n", - " age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "S-XOscrEcYwx" - }, - "source": [ - "`boundaries` is a list of bucket boundaries. In this case, there are 10 boundaries, resulting in 11 age group buckets (from age 17 and below, 18-24, 25-29, ..., to 65 and over).\n", - "\n", - "With bucketing, the model sees each bucket as a one-hot feature:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Lr40vm3qcYwy" - }, - "outputs": [], - "source": [ - "fc.input_layer(feature_batch, [age, age_buckets]).numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Z_tQI9j8cYw1" - }, - "source": [ - "#### Learn complex relationships with crossed column\n", - "\n", - "Using each base feature column separately may not be enough to explain the data. For example, the correlation between education and the label (earning > 50,000 dollars) may be different for different occupations. Therefore, if you only learn a single model weight for `education=\"Bachelors\"` and `education=\"Masters\"`, you won't capture every education-occupation combination (e.g. distinguishing between `education=\"Bachelors\"` AND `occupation=\"Exec-managerial\"` AND `education=\"Bachelors\" AND occupation=\"Craft-repair\"`).\n", - "\n", - "To learn the differences between different feature combinations, you can add *crossed feature columns* to the model:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "IAPhPzXscYw1" - }, - "outputs": [], - "source": [ - "education_x_occupation = tf.feature_column.crossed_column(\n", - " ['education', 'occupation'], hash_bucket_size=1000)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UeTxMunbcYw5" - }, - "source": [ - "You can also create a `crossed_column` over more than two columns. Each constituent column can be either a base feature column that is categorical (`SparseColumn`), a bucketized real-valued feature column, or even another `CrossColumn`. For example:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "y8UaBld9cYw7" - }, - "outputs": [], - "source": [ - "age_buckets_x_education_x_occupation = tf.feature_column.crossed_column(\n", - " [age_buckets, 'education', 'occupation'], hash_bucket_size=1000)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HvKmW6U5cYw8" - }, - "source": [ - "These crossed columns always use hash buckets to avoid the exponential explosion in the number of categories, and put the control over number of model weights in the hands of the user.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HtjpheB6cYw9" - }, - "source": [ - "## Define the logistic regression model\n", - "\n", - "After processing the input data and defining all the feature columns, you can put them together and build a *logistic regression* model. The previous section showed several types of base and derived feature columns, including:\n", - "\n", - "* `CategoricalColumn`\n", - "* `NumericColumn`\n", - "* `BucketizedColumn`\n", - "* `CrossedColumn`\n", - "\n", - "All of these are subclasses of the abstract `FeatureColumn` class and can be added to the `feature_columns` field of a model:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Klmf3OxpcYw-" - }, - "outputs": [], - "source": [ - "import tempfile\n", - "\n", - "base_columns = [\n", - " education, marital_status, relationship, workclass, occupation,\n", - " age_buckets,\n", - "]\n", - "\n", - "crossed_columns = [\n", - " tf.feature_column.crossed_column(\n", - " ['education', 'occupation'], hash_bucket_size=1000),\n", - " tf.feature_column.crossed_column(\n", - " [age_buckets, 'education', 'occupation'], hash_bucket_size=1000),\n", - "]\n", - "\n", - "model = tf.estimator.LinearClassifier(\n", - " model_dir=tempfile.mkdtemp(),\n", - " feature_columns=base_columns + crossed_columns,\n", - " optimizer=tf.train.FtrlOptimizer(learning_rate=0.1))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jRhnPxUucYxC" - }, - "source": [ - "The model automatically learns a bias term, which controls the prediction made without observing any features. The learned model files are stored in `model_dir`.\n", - "\n", - "## Train and evaluate the model\n", - "\n", - "After adding all the features to the model, let's train the model. Training a model is just a single command using the `tf.estimator` API:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ZlrIBuoecYxD" - }, - "outputs": [], - "source": [ - "train_inpf = functools.partial(census_dataset.input_fn, train_file,\n", - " num_epochs=40, shuffle=True, batch_size=64)\n", - "\n", - "model.train(train_inpf)\n", - "\n", - "clear_output() # used for notebook display" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "IvY3a9pzcYxH" - }, - "source": [ - "After the model is trained, evaluate the accuracy of the model by predicting the labels of the holdout data:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "L9nVJEO8cYxI" - }, - "outputs": [], - "source": [ - "results = model.evaluate(test_inpf)\n", - "\n", - "clear_output()\n", - "\n", - "for key,value in sorted(results.items()):\n", - " print('%s: %0.2f' % (key, value))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "E0fAibNDcYxL" - }, - "source": [ - "The first line of the output should display something like: `accuracy: 0.84`, which means the accuracy is 84%. You can try using more features and transformations to see if you can do better!\n", - "\n", - "After the model is evaluated, you can use it to predict whether an individual has an annual income of over 50,000 dollars given an individual's information input.\n", - "\n", - "Let's look in more detail how the model performed:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8R5bz5CxcYxL" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "predict_df = test_df[:20].copy()\n", - "\n", - "pred_iter = model.predict(\n", - " lambda:easy_input_function(predict_df, label_key='income_bracket',\n", - " num_epochs=1, shuffle=False, batch_size=10))\n", - "\n", - "classes = np.array(['<=50K', '>50K'])\n", - "pred_class_id = []\n", - "\n", - "for pred_dict in pred_iter:\n", - " pred_class_id.append(pred_dict['class_ids'])\n", - "\n", - "predict_df['predicted_class'] = classes[np.array(pred_class_id)]\n", - "predict_df['correct'] = predict_df['predicted_class'] == predict_df['income_bracket']\n", - "\n", - "clear_output()\n", - "\n", - "predict_df[['income_bracket','predicted_class', 'correct']]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "N_uCpFTicYxN" - }, - "source": [ - "For a working end-to-end example, download our [example code](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep/census_main.py) and set the `model_type` flag to `wide`." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "oyKy1lM_3gkL" - }, - "source": [ - "## Adding Regularization to Prevent Overfitting\n", - "\n", - "Regularization is a technique used to avoid overfitting. Overfitting happens when a model performs well on the data it is trained on, but worse on test data that the model has not seen before. Overfitting can occur when a model is excessively complex, such as having too many parameters relative to the number of observed training data. Regularization allows you to control the model's complexity and make the model more generalizable to unseen data.\n", - "\n", - "You can add L1 and L2 regularizations to the model with the following code:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lzMUSBQ03hHx" - }, - "outputs": [], - "source": [ - "model_l1 = tf.estimator.LinearClassifier(\n", - " feature_columns=base_columns + crossed_columns,\n", - " optimizer=tf.train.FtrlOptimizer(\n", - " learning_rate=0.1,\n", - " l1_regularization_strength=10.0,\n", - " l2_regularization_strength=0.0))\n", - "\n", - "model_l1.train(train_inpf)\n", - "\n", - "results = model_l1.evaluate(test_inpf)\n", - "clear_output()\n", - "for key in sorted(results):\n", - " print('%s: %0.2f' % (key, results[key]))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ofmPL212JIy2" - }, - "outputs": [], - "source": [ - "model_l2 = tf.estimator.LinearClassifier(\n", - " feature_columns=base_columns + crossed_columns,\n", - " optimizer=tf.train.FtrlOptimizer(\n", - " learning_rate=0.1,\n", - " l1_regularization_strength=0.0,\n", - " l2_regularization_strength=10.0))\n", - "\n", - "model_l2.train(train_inpf)\n", - "\n", - "results = model_l2.evaluate(test_inpf)\n", - "clear_output()\n", - "for key in sorted(results):\n", - " print('%s: %0.2f' % (key, results[key]))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Lp1Rfy_k4e7w" - }, - "source": [ - "These regularized models don't perform much better than the base model. Let's look at the model's weight distributions to better see the effect of the regularization:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Wb6093N04XlS" - }, - "outputs": [], - "source": [ - "def get_flat_weights(model):\n", - " weight_names = [\n", - " name for name in model.get_variable_names()\n", - " if \"linear_model\" in name and \"Ftrl\" not in name]\n", - "\n", - " weight_values = [model.get_variable_value(name) for name in weight_names]\n", - "\n", - " weights_flat = np.concatenate([item.flatten() for item in weight_values], axis=0)\n", - "\n", - " return weights_flat\n", - "\n", - "weights_flat = get_flat_weights(model)\n", - "weights_flat_l1 = get_flat_weights(model_l1)\n", - "weights_flat_l2 = get_flat_weights(model_l2)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GskJmtfmL0p-" - }, - "source": [ - "The models have many zero-valued weights caused by unused hash bins (there are many more hash bins than categories in some columns). You can mask these weights when viewing the weight distributions:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rM3agZe3MT3D" - }, - "outputs": [], - "source": [ - "weight_mask = weights_flat != 0\n", - "\n", - "weights_base = weights_flat[weight_mask]\n", - "weights_l1 = weights_flat_l1[weight_mask]\n", - "weights_l2 = weights_flat_l2[weight_mask]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NqBpxLLQNEBE" - }, - "source": [ - "Now plot the distributions:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "IdFK7wWa5_0K" - }, - "outputs": [], - "source": [ - "plt.figure()\n", - "_ = plt.hist(weights_base, bins=np.linspace(-3,3,30))\n", - "plt.title('Base Model')\n", - "plt.ylim([0,500])\n", - "\n", - "plt.figure()\n", - "_ = plt.hist(weights_l1, bins=np.linspace(-3,3,30))\n", - "plt.title('L1 - Regularization')\n", - "plt.ylim([0,500])\n", - "\n", - "plt.figure()\n", - "_ = plt.hist(weights_l2, bins=np.linspace(-3,3,30))\n", - "plt.title('L2 - Regularization')\n", - "_=plt.ylim([0,500])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Mv6knhFa5-iJ" - }, - "source": [ - "Both types of regularization squeeze the distribution of weights towards zero. L2 regularization has a greater effect in the tails of the distribution eliminating extreme weights. L1 regularization produces more exactly-zero values, in this case it sets ~200 to zero." - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [ - "MWW1TyjaecRh" - ], - "name": "linear.ipynb", - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/site/en/r1/tutorials/images/deep_cnn.md b/site/en/r1/tutorials/images/deep_cnn.md index 00a914d8976..885f3907aa7 100644 --- a/site/en/r1/tutorials/images/deep_cnn.md +++ b/site/en/r1/tutorials/images/deep_cnn.md @@ -80,15 +80,15 @@ for details. It consists of 1,068,298 learnable parameters and requires about ## Code Organization The code for this tutorial resides in -[`models/tutorials/image/cifar10/`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/). +[`models/tutorials/image/cifar10/`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/). File | Purpose --- | --- -[`cifar10_input.py`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/cifar10_input.py) | Loads CIFAR-10 dataset using [tensorflow-datasets library](https://github.com/tensorflow/datasets). -[`cifar10.py`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/cifar10.py) | Builds the CIFAR-10 model. -[`cifar10_train.py`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/cifar10_train.py) | Trains a CIFAR-10 model on a CPU or GPU. -[`cifar10_multi_gpu_train.py`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/cifar10_multi_gpu_train.py) | Trains a CIFAR-10 model on multiple GPUs. -[`cifar10_eval.py`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/cifar10_eval.py) | Evaluates the predictive performance of a CIFAR-10 model. +[`cifar10_input.py`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/cifar10_input.py) | Loads CIFAR-10 dataset using [tensorflow-datasets library](https://github.com/tensorflow/datasets). +[`cifar10.py`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/cifar10.py) | Builds the CIFAR-10 model. +[`cifar10_train.py`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/cifar10_train.py) | Trains a CIFAR-10 model on a CPU or GPU. +[`cifar10_multi_gpu_train.py`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/cifar10_multi_gpu_train.py) | Trains a CIFAR-10 model on multiple GPUs. +[`cifar10_eval.py`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/cifar10_eval.py) | Evaluates the predictive performance of a CIFAR-10 model. To run this tutorial, you will need to: @@ -99,7 +99,7 @@ pip install tensorflow-datasets ## CIFAR-10 Model The CIFAR-10 network is largely contained in -[`cifar10.py`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/cifar10.py). +[`cifar10.py`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/cifar10.py). The complete training graph contains roughly 765 operations. We find that we can make the code most reusable by constructing the graph with the following modules: @@ -108,7 +108,7 @@ reusable by constructing the graph with the following modules: operations that read and preprocess CIFAR images for evaluation and training, respectively. 1. [**Model prediction:**](#model-prediction) `inference()` -adds operations that perform inference, i.e. classification, on supplied images. +adds operations that perform inference, i.e., classification, on supplied images. 1. [**Model training:**](#model-training) `loss()` and `train()` add operations that compute the loss, gradients, variable updates and visualization summaries. @@ -405,7 +405,7 @@ a "tower". We must set two attributes for each tower: * A unique name for all operations within a tower. `tf.name_scope` provides this unique name by prepending a scope. For instance, all operations in -the first tower are prepended with `tower_0`, e.g. `tower_0/conv1/Conv2D`. +the first tower are prepended with `tower_0`, e.g., `tower_0/conv1/Conv2D`. * A preferred hardware device to run the operation within a tower. `tf.device` specifies this. For diff --git a/site/en/r1/tutorials/images/hub_with_keras.ipynb b/site/en/r1/tutorials/images/hub_with_keras.ipynb index ece9c0fa4a9..f4e683e8936 100644 --- a/site/en/r1/tutorials/images/hub_with_keras.ipynb +++ b/site/en/r1/tutorials/images/hub_with_keras.ipynb @@ -60,7 +60,7 @@ "source": [ "> Note: This is an archived TF1 notebook. These are configured\n", "to run in TF2's \n", - "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n", + "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n", "but will run in TF1 as well. To use TF1 in Colab, use the\n", "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n", "magic." @@ -841,7 +841,7 @@ "t = time.time()\n", "\n", "export_path = \"/tmp/saved_models/{}\".format(int(t))\n", - "tf.keras.experimental.export_saved_model(model, export_path)\n", + "model.save(export_path)\n", "\n", "export_path" ] @@ -863,7 +863,7 @@ }, "outputs": [], "source": [ - "reloaded = tf.keras.experimental.load_from_saved_model(export_path, custom_objects={'KerasLayer':hub.KerasLayer})" + "reloaded = tf.keras.models.load_model(export_path, custom_objects={'KerasLayer':hub.KerasLayer})" ] }, { diff --git a/site/en/r1/tutorials/images/image_recognition.md b/site/en/r1/tutorials/images/image_recognition.md index 0be884de403..cb66e594629 100644 --- a/site/en/r1/tutorials/images/image_recognition.md +++ b/site/en/r1/tutorials/images/image_recognition.md @@ -140,13 +140,13 @@ score of 0.8. -Next, try it out on your own images by supplying the --image= argument, e.g. +Next, try it out on your own images by supplying the --image= argument, e.g., ```bash bazel-bin/tensorflow/examples/label_image/label_image --image=my_image.png ``` -If you look inside the [`tensorflow/examples/label_image/main.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/label_image/main.cc) +If you look inside the [`tensorflow/examples/label_image/main.cc`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/label_image/main.cc) file, you can find out how it works. We hope this code will help you integrate TensorFlow into your own applications, so we will walk step by step through the main functions: @@ -164,7 +164,7 @@ training. If you have a graph that you've trained yourself, you'll just need to adjust the values to match whatever you used during your training process. You can see how they're applied to an image in the -[`ReadTensorFromImageFile()`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/label_image/main.cc#L88) +[`ReadTensorFromImageFile()`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/label_image/main.cc#L88) function. ```C++ @@ -334,7 +334,7 @@ The `PrintTopLabels()` function takes those sorted results, and prints them out friendly way. The `CheckTopLabel()` function is very similar, but just makes sure that the top label is the one we expect, for debugging purposes. -At the end, [`main()`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/label_image/main.cc#L252) +At the end, [`main()`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/label_image/main.cc#L252) ties together all of these calls. ```C++ diff --git a/site/en/r1/tutorials/images/transfer_learning.ipynb b/site/en/r1/tutorials/images/transfer_learning.ipynb index bdb05a86382..25779babd17 100644 --- a/site/en/r1/tutorials/images/transfer_learning.ipynb +++ b/site/en/r1/tutorials/images/transfer_learning.ipynb @@ -64,7 +64,7 @@ "source": [ "> Note: This is an archived TF1 notebook. These are configured\n", "to run in TF2's \n", - "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n", + "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n", "but will run in TF1 as well. To use TF1 in Colab, use the\n", "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n", "magic." @@ -364,7 +364,7 @@ }, "outputs": [], "source": [ - "model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.0001),\n", + "model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0001),\n", " loss='binary_crossentropy',\n", " metrics=['accuracy'])" ] @@ -547,7 +547,7 @@ "\n", "# Freeze all the layers before the `fine_tune_at` layer\n", "for layer in base_model.layers[:fine_tune_at]:\n", - " layer.trainable = False" + " layer.trainable = False" ] }, { @@ -569,7 +569,7 @@ }, "outputs": [], "source": [ - "model.compile(optimizer = tf.keras.optimizers.RMSprop(lr=2e-5),\n", + "model.compile(optimizer = tf.keras.optimizers.RMSprop(learning_rate=2e-5),\n", " loss='binary_crossentropy',\n", " metrics=['accuracy'])" ] diff --git a/site/en/r1/tutorials/keras/README.md b/site/en/r1/tutorials/keras/README.md index 4da2f72dca9..47aca7e0052 100644 --- a/site/en/r1/tutorials/keras/README.md +++ b/site/en/r1/tutorials/keras/README.md @@ -4,7 +4,7 @@ This notebook collection is inspired by the book *[Deep Learning with Python](https://books.google.com/books?id=Yo3CAQAACAAJ)*. These tutorials use `tf.keras`, TensorFlow's high-level Python API for building and training deep learning models. To learn more about using Keras with -TensorFlow, see the [TensorFlow Keras Guide](../../guide/keras.ipynb). +TensorFlow, see the [TensorFlow Keras Guide](https://www.tensorflow.org/guide/keras). Publisher's note: *Deep Learning with Python* introduces the field of deep learning using the Python language and the powerful Keras library. Written by diff --git a/site/en/r1/tutorials/keras/basic_classification.ipynb b/site/en/r1/tutorials/keras/basic_classification.ipynb index be7f5e9e8b1..14950538ce4 100644 --- a/site/en/r1/tutorials/keras/basic_classification.ipynb +++ b/site/en/r1/tutorials/keras/basic_classification.ipynb @@ -96,7 +96,7 @@ "source": [ "> Note: This is an archived TF1 notebook. These are configured\n", "to run in TF2's \n", - "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n", + "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n", "but will run in TF1 as well. To use TF1 in Colab, use the\n", "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n", "magic." diff --git a/site/en/r1/tutorials/keras/basic_regression.ipynb b/site/en/r1/tutorials/keras/basic_regression.ipynb index 7d9cb711efa..4bffd62f982 100644 --- a/site/en/r1/tutorials/keras/basic_regression.ipynb +++ b/site/en/r1/tutorials/keras/basic_regression.ipynb @@ -96,7 +96,7 @@ "source": [ "> Note: This is an archived TF1 notebook. These are configured\n", "to run in TF2's \n", - "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n", + "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n", "but will run in TF1 as well. To use TF1 in Colab, use the\n", "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n", "magic." diff --git a/site/en/r1/tutorials/keras/basic_text_classification.ipynb b/site/en/r1/tutorials/keras/basic_text_classification.ipynb index 0303d54d973..5424185bcbd 100644 --- a/site/en/r1/tutorials/keras/basic_text_classification.ipynb +++ b/site/en/r1/tutorials/keras/basic_text_classification.ipynb @@ -96,7 +96,7 @@ "source": [ "> Note: This is an archived TF1 notebook. These are configured\n", "to run in TF2's \n", - "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n", + "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n", "but will run in TF1 as well. To use TF1 in Colab, use the\n", "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n", "magic." diff --git a/site/en/r1/tutorials/keras/overfit_and_underfit.ipynb b/site/en/r1/tutorials/keras/overfit_and_underfit.ipynb index a8f266f9869..8e35b06e556 100644 --- a/site/en/r1/tutorials/keras/overfit_and_underfit.ipynb +++ b/site/en/r1/tutorials/keras/overfit_and_underfit.ipynb @@ -96,7 +96,7 @@ "source": [ "> Note: This is an archived TF1 notebook. These are configured\n", "to run in TF2's \n", - "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n", + "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n", "but will run in TF1 as well. To use TF1 in Colab, use the\n", "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n", "magic." diff --git a/site/en/r1/tutorials/keras/save_and_restore_models.ipynb b/site/en/r1/tutorials/keras/save_and_restore_models.ipynb index 7911e37e139..04cc94417a9 100644 --- a/site/en/r1/tutorials/keras/save_and_restore_models.ipynb +++ b/site/en/r1/tutorials/keras/save_and_restore_models.ipynb @@ -96,7 +96,7 @@ "source": [ "> Note: This is an archived TF1 notebook. These are configured\n", "to run in TF2's \n", - "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n", + "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n", "but will run in TF1 as well. To use TF1 in Colab, use the\n", "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n", "magic." @@ -115,7 +115,7 @@ "\n", "Sharing this data helps others understand how the model works and try it themselves with new data.\n", "\n", - "Caution: Be careful with untrusted code—TensorFlow models are code. See [Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md) for details.\n", + "Caution: Be careful with untrusted code—TensorFlow models are code. See [Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/r1.15/SECURITY.md) for details.\n", "\n", "### Options\n", "\n", @@ -698,7 +698,7 @@ "id": "B7qfpvpY9HCe" }, "source": [ - "Load the the saved model." + "Load the saved model." ] }, { diff --git a/site/en/r1/tutorials/load_data/images.ipynb b/site/en/r1/tutorials/load_data/images.ipynb index dbee204323b..923b95130d1 100644 --- a/site/en/r1/tutorials/load_data/images.ipynb +++ b/site/en/r1/tutorials/load_data/images.ipynb @@ -64,7 +64,7 @@ "source": [ "> Note: This is an archived TF1 notebook. These are configured\n", "to run in TF2's \n", - "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n", + "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n", "but will run in TF1 as well. To use TF1 in Colab, use the\n", "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n", "magic." diff --git a/site/en/r1/tutorials/load_data/tf_records.ipynb b/site/en/r1/tutorials/load_data/tf_records.ipynb index 8b57d3f2f1e..45635034c69 100644 --- a/site/en/r1/tutorials/load_data/tf_records.ipynb +++ b/site/en/r1/tutorials/load_data/tf_records.ipynb @@ -57,7 +57,7 @@ "source": [ "> Note: This is an archived TF1 notebook. These are configured\n", "to run in TF2's \n", - "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n", + "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n", "but will run in TF1 as well. To use TF1 in Colab, use the\n", "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n", "magic." @@ -141,7 +141,7 @@ "source": [ "Fundamentally a `tf.Example` is a `{\"string\": tf.train.Feature}` mapping.\n", "\n", - "The `tf.train.Feature` message type can accept one of the following three types (See the [`.proto` file](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/feature.proto) for reference). Most other generic types can be coerced into one of these.\n", + "The `tf.train.Feature` message type can accept one of the following three types (See the [`.proto` file](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/example/feature.proto) for reference). Most other generic types can be coerced into one of these.\n", "\n", "1. `tf.train.BytesList` (the following types can be coerced)\n", "\n", @@ -276,7 +276,7 @@ "\n", "1. We create a map (dictionary) from the feature name string to the encoded feature value produced in #1.\n", "\n", - "1. The map produced in #2 is converted to a [`Features` message](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/feature.proto#L85)." + "1. The map produced in #2 is converted to a [`Features` message](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/example/feature.proto#L85)." ] }, { @@ -365,7 +365,7 @@ "id": "XftzX9CN_uGT" }, "source": [ - "For example, suppose we have a single observation from the dataset, `[False, 4, bytes('goat'), 0.9876]`. We can create and print the `tf.Example` message for this observation using `create_message()`. Each single observation will be written as a `Features` message as per the above. Note that the `tf.Example` [message](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/example.proto#L88) is just a wrapper around the `Features` message." + "For example, suppose we have a single observation from the dataset, `[False, 4, bytes('goat'), 0.9876]`. We can create and print the `tf.Example` message for this observation using `create_message()`. Each single observation will be written as a `Features` message as per the above. Note that the `tf.Example` [message](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/example/example.proto#L88) is just a wrapper around the `Features` message." ] }, { @@ -632,7 +632,7 @@ "source": [ "We can also read the TFRecord file using the `tf.data.TFRecordDataset` class.\n", "\n", - "More information on consuming TFRecord files using `tf.data` can be found [here](https://www.tensorflow.org/r1/guide/datasets#consuming_tfrecord_data).\n", + "More information on consuming TFRecord files using `tf.data` can be found [here](https://www.tensorflow.org/guide/data#consuming_tfrecord_data).\n", "\n", "Using `TFRecordDataset`s can be useful for standardizing input data and optimizing performance." ] diff --git a/site/en/r1/tutorials/non-ml/mandelbrot.ipynb b/site/en/r1/tutorials/non-ml/mandelbrot.ipynb index 88177211896..bca8a142be4 100644 --- a/site/en/r1/tutorials/non-ml/mandelbrot.ipynb +++ b/site/en/r1/tutorials/non-ml/mandelbrot.ipynb @@ -64,7 +64,7 @@ "source": [ "> Note: This is an archived TF1 notebook. These are configured\n", "to run in TF2's \n", - "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n", + "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n", "but will run in TF1 as well. To use TF1 in Colab, use the\n", "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n", "magic." diff --git a/site/en/r1/tutorials/non-ml/pdes.ipynb b/site/en/r1/tutorials/non-ml/pdes.ipynb index d2646daa8da..832fa450523 100644 --- a/site/en/r1/tutorials/non-ml/pdes.ipynb +++ b/site/en/r1/tutorials/non-ml/pdes.ipynb @@ -64,7 +64,7 @@ "source": [ "> Note: This is an archived TF1 notebook. These are configured\n", "to run in TF2's \n", - "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n", + "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n", "but will run in TF1 as well. To use TF1 in Colab, use the\n", "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n", "magic." diff --git a/site/en/r1/tutorials/representation/kernel_methods.md b/site/en/r1/tutorials/representation/kernel_methods.md index 67adc4951c6..227fe81d515 100644 --- a/site/en/r1/tutorials/representation/kernel_methods.md +++ b/site/en/r1/tutorials/representation/kernel_methods.md @@ -24,7 +24,7 @@ following sources for an introduction: Currently, TensorFlow supports explicit kernel mappings for dense features only; TensorFlow will provide support for sparse features at a later release. -This tutorial uses [tf.contrib.learn](https://www.tensorflow.org/code/tensorflow/contrib/learn/python/learn) +This tutorial uses [tf.contrib.learn](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/contrib/learn/python/learn) (TensorFlow's high-level Machine Learning API) Estimators for our ML models. If you are not familiar with this API, The [Estimator guide](../../guide/estimators.md) is a good place to start. We will use the MNIST dataset. The tutorial consists @@ -131,7 +131,7 @@ In addition to experimenting with the (training) batch size and the number of training steps, there are a couple other parameters that can be tuned as well. For instance, you can change the optimization method used to minimize the loss by explicitly selecting another optimizer from the collection of -[available optimizers](https://www.tensorflow.org/code/tensorflow/python/training). +[available optimizers](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/training). As an example, the following code constructs a LinearClassifier estimator that uses the Follow-The-Regularized-Leader (FTRL) optimization strategy with a specific learning rate and L2-regularization. diff --git a/site/en/r1/tutorials/representation/linear.md b/site/en/r1/tutorials/representation/linear.md index 5516672b34a..d996a13bc1f 100644 --- a/site/en/r1/tutorials/representation/linear.md +++ b/site/en/r1/tutorials/representation/linear.md @@ -12,7 +12,7 @@ those tools. It explains: Read this overview to decide whether the Estimator's linear model tools might be useful to you. Then work through the -[Estimator wide and deep learning tutorial](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep) +[Estimator wide and deep learning tutorial](https://github.com/tensorflow/models/tree/r1.15/official/r1/wide_deep) to give it a try. This overview uses code samples from the tutorial, but the tutorial walks through the code in greater detail. @@ -177,7 +177,7 @@ the name of a `FeatureColumn`. Each key's value is a tensor containing the values of that feature for all data instances. See [Premade Estimators](../../guide/premade_estimators.md#input_fn) for a more comprehensive look at input functions, and `input_fn` in the -[wide and deep learning tutorial](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep) +[wide and deep learning tutorial](https://github.com/tensorflow/models/tree/r1.15/official/r1/wide_deep) for an example implementation of an input function. The input function is passed to the `train()` and `evaluate()` calls that @@ -236,4 +236,4 @@ e = tf.estimator.DNNLinearCombinedClassifier( dnn_hidden_units=[100, 50]) ``` For more information, see the -[wide and deep learning tutorial](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep). +[wide and deep learning tutorial](https://github.com/tensorflow/models/tree/r1.15/official/r1/wide_deep). diff --git a/site/en/r1/tutorials/representation/unicode.ipynb b/site/en/r1/tutorials/representation/unicode.ipynb index 6762a483a42..f76977c3c92 100644 --- a/site/en/r1/tutorials/representation/unicode.ipynb +++ b/site/en/r1/tutorials/representation/unicode.ipynb @@ -57,7 +57,7 @@ "source": [ "> Note: This is an archived TF1 notebook. These are configured\n", "to run in TF2's \n", - "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n", + "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n", "but will run in TF1 as well. To use TF1 in Colab, use the\n", "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n", "magic." @@ -136,7 +136,7 @@ "id": "jsMPnjb6UDJ1" }, "source": [ - "Note: When using python to construct strings, the handling of unicode differs betweeen v2 and v3. In v2, unicode strings are indicated by the \"u\" prefix, as above. In v3, strings are unicode-encoded by default." + "Note: When using python to construct strings, the handling of unicode differs between v2 and v3. In v2, unicode strings are indicated by the \"u\" prefix, as above. In v3, strings are unicode-encoded by default." ] }, { @@ -425,7 +425,7 @@ "source": [ "### Character substrings\n", "\n", - "Similarly, the `tf.strings.substr` operation accepts the \"`unit`\" parameter, and uses it to determine what kind of offsets the \"`pos`\" and \"`len`\" paremeters contain." + "Similarly, the `tf.strings.substr` operation accepts the \"`unit`\" parameter, and uses it to determine what kind of offsets the \"`pos`\" and \"`len`\" parameters contain." ] }, { @@ -587,7 +587,7 @@ "id": "CapnbShuGU8i" }, "source": [ - "First, we decode the sentences into character codepoints, and find the script identifeir for each character." + "First, we decode the sentences into character codepoints, and find the script identifier for each character." ] }, { diff --git a/site/en/r1/tutorials/representation/word2vec.md b/site/en/r1/tutorials/representation/word2vec.md index f6a27c68f3c..517a5dbc5c5 100644 --- a/site/en/r1/tutorials/representation/word2vec.md +++ b/site/en/r1/tutorials/representation/word2vec.md @@ -36,7 +36,7 @@ like to get your hands dirty with the details. Image and audio processing systems work with rich, high-dimensional datasets encoded as vectors of the individual raw pixel-intensities for image data, or -e.g. power spectral density coefficients for audio data. For tasks like object +e.g., power spectral density coefficients for audio data. For tasks like object or speech recognition we know that all the information required to successfully perform the task is encoded in the data (because humans can perform these tasks from the raw data). However, natural language processing systems traditionally @@ -109,7 +109,7 @@ $$ where \\(\text{score}(w_t, h)\\) computes the compatibility of word \\(w_t\\) with the context \\(h\\) (a dot product is commonly used). We train this model by maximizing its [log-likelihood](https://en.wikipedia.org/wiki/Likelihood_function) -on the training set, i.e. by maximizing +on the training set, i.e., by maximizing $$ \begin{align} @@ -176,7 +176,7 @@ As an example, let's consider the dataset We first form a dataset of words and the contexts in which they appear. We could define 'context' in any way that makes sense, and in fact people have looked at syntactic contexts (i.e. the syntactic dependents of the current -target word, see e.g. +target word, see e.g., [Levy et al.](https://levyomer.files.wordpress.com/2014/04/dependency-based-word-embeddings-acl-2014.pdf)), words-to-the-left of the target, words-to-the-right of the target, etc. For now, let's stick to the vanilla definition and define 'context' as the window @@ -204,7 +204,7 @@ where the goal is to predict `the` from `quick`. We select `num_noise` number of noisy (contrastive) examples by drawing from some noise distribution, typically the unigram distribution, \\(P(w)\\). For simplicity let's say `num_noise=1` and we select `sheep` as a noisy example. Next we compute the -loss for this pair of observed and noisy examples, i.e. the objective at time +loss for this pair of observed and noisy examples, i.e., the objective at time step \\(t\\) becomes $$J^{(t)}_\text{NEG} = \log Q_\theta(D=1 | \text{the, quick}) + @@ -212,7 +212,7 @@ $$J^{(t)}_\text{NEG} = \log Q_\theta(D=1 | \text{the, quick}) + The goal is to make an update to the embedding parameters \\(\theta\\) to improve (in this case, maximize) this objective function. We do this by deriving the -gradient of the loss with respect to the embedding parameters \\(\theta\\), i.e. +gradient of the loss with respect to the embedding parameters \\(\theta\\), i.e., \\(\frac{\partial}{\partial \theta} J_\text{NEG}\\) (luckily TensorFlow provides easy helper functions for doing this!). We then perform an update to the embeddings by taking a small step in the direction of the gradient. When this @@ -227,7 +227,7 @@ When we inspect these visualizations it becomes apparent that the vectors capture some general, and in fact quite useful, semantic information about words and their relationships to one another. It was very interesting when we first discovered that certain directions in the induced vector space specialize -towards certain semantic relationships, e.g. *male-female*, *verb tense* and +towards certain semantic relationships, e.g., *male-female*, *verb tense* and even *country-capital* relationships between words, as illustrated in the figure below (see also for example [Mikolov et al., 2013](https://www.aclweb.org/anthology/N13-1090)). @@ -327,7 +327,7 @@ for inputs, labels in generate_batch(...): ``` See the full example code in -[tensorflow/examples/tutorials/word2vec/word2vec_basic.py](https://www.tensorflow.org/code/tensorflow/examples/tutorials/word2vec/word2vec_basic.py). +[tensorflow/examples/tutorials/word2vec/word2vec_basic.py](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/tutorials/word2vec/word2vec_basic.py). ## Visualizing the learned embeddings @@ -341,7 +341,7 @@ t-SNE. Et voila! As expected, words that are similar end up clustering nearby each other. For a more heavyweight implementation of word2vec that showcases more of the advanced features of TensorFlow, see the implementation in -[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/research/tutorials/embedding/word2vec.py). +[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/embedding/word2vec.py). ## Evaluating embeddings: analogical reasoning @@ -357,7 +357,7 @@ Download the dataset for this task from To see how we do this evaluation, have a look at the `build_eval_graph()` and `eval()` functions in -[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/research/tutorials/embedding/word2vec.py). +[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/embedding/word2vec.py). The choice of hyperparameters can strongly influence the accuracy on this task. To achieve state-of-the-art performance on this task requires training over a diff --git a/site/en/r1/tutorials/sequences/audio_recognition.md b/site/en/r1/tutorials/sequences/audio_recognition.md index 8ad71b88a3c..0388514ec92 100644 --- a/site/en/r1/tutorials/sequences/audio_recognition.md +++ b/site/en/r1/tutorials/sequences/audio_recognition.md @@ -159,9 +159,9 @@ accuracy. If the training accuracy increases but the validation doesn't, that's a sign that overfitting is occurring, and your model is only learning things about the training clips, not broader patterns that generalize. -## Tensorboard +## TensorBoard -A good way to visualize how the training is progressing is using Tensorboard. By +A good way to visualize how the training is progressing is using TensorBoard. By default, the script saves out events to /tmp/retrain_logs, and you can load these by running: diff --git a/site/en/r1/tutorials/sequences/recurrent.md b/site/en/r1/tutorials/sequences/recurrent.md index 6654795d944..e7c1f8c0b16 100644 --- a/site/en/r1/tutorials/sequences/recurrent.md +++ b/site/en/r1/tutorials/sequences/recurrent.md @@ -2,7 +2,7 @@ ## Introduction -See [Understanding LSTM Networks](https://colah.github.io/posts/2015-08-Understanding-LSTMs/){:.external} +See [Understanding LSTM Networks](https://colah.github.io/posts/2015-08-Understanding-LSTMs/) for an introduction to recurrent neural networks and LSTMs. ## Language Modeling diff --git a/site/en/r1/tutorials/sequences/recurrent_quickdraw.md b/site/en/r1/tutorials/sequences/recurrent_quickdraw.md index 435076f629c..d6a85377d17 100644 --- a/site/en/r1/tutorials/sequences/recurrent_quickdraw.md +++ b/site/en/r1/tutorials/sequences/recurrent_quickdraw.md @@ -109,7 +109,7 @@ This download will take a while and download a bit more than 23GB of data. To convert the `ndjson` files to [TFRecord](../../api_guides/python/python_io.md#TFRecords_Format_Details) files containing -[`tf.train.Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto) +[`tf.train.Example`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/example/example.proto) protos run the following command. ```shell @@ -213,7 +213,7 @@ screen coordinates and normalize the size such that the drawing has unit height. Finally, we compute the differences between consecutive points and store these as a `VarLenFeature` in a -[tensorflow.Example](https://www.tensorflow.org/code/tensorflow/core/example/example.proto) +[tensorflow.Example](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/example/example.proto) under the key `ink`. In addition we store the `class_index` as a single entry `FixedLengthFeature` and the `shape` of the `ink` as a `FixedLengthFeature` of length 2. diff --git a/site/en/r1/tutorials/sequences/text_generation.ipynb b/site/en/r1/tutorials/sequences/text_generation.ipynb index 5911d1c7673..84d942c8bd0 100644 --- a/site/en/r1/tutorials/sequences/text_generation.ipynb +++ b/site/en/r1/tutorials/sequences/text_generation.ipynb @@ -65,7 +65,7 @@ "source": [ "> Note: This is an archived TF1 notebook. These are configured\n", "to run in TF2's \n", - "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n", + "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n", "but will run in TF1 as well. To use TF1 in Colab, use the\n", "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n", "magic." @@ -77,9 +77,9 @@ "id": "BwpJ5IffzRG6" }, "source": [ - "This tutorial demonstrates how to generate text using a character-based RNN. We will work with a dataset of Shakespeare's writing from Andrej Karpathy's [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/). Given a sequence of characters from this data (\"Shakespear\"), train a model to predict the next character in the sequence (\"e\"). Longer sequences of text can be generated by calling the model repeatedly.\n", + "This tutorial demonstrates how to generate text using a character-based RNN. You will work with a dataset of Shakespeare's writing from Andrej Karpathy's [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/). Given a sequence of characters from this data (\"Shakespear\"), train a model to predict the next character in the sequence (\"e\"). Longer sequences of text can be generated by calling the model repeatedly.\n", "\n", - "Note: Enable GPU acceleration to execute this notebook faster. In Colab: *Runtime > Change runtime type > Hardware acclerator > GPU*. If running locally make sure TensorFlow version >= 1.11.\n", + "Note: Enable GPU acceleration to execute this notebook faster. In Colab: *Runtime > Change runtime type > Hardware accelerator > GPU*. If running locally make sure TensorFlow version >= 1.11.\n", "\n", "This tutorial includes runnable code implemented using [tf.keras](https://www.tensorflow.org/programmers_guide/keras) and [eager execution](https://www.tensorflow.org/programmers_guide/eager). The following is sample output when the model in this tutorial trained for 30 epochs, and started with the string \"Q\":\n", "\n", @@ -98,7 +98,7 @@ "To watch the next way with his father with his face?\n", "\n", "ESCALUS:\n", - "The cause why then we are all resolved more sons.\n", + "The cause why then us all resolved more sons.\n", "\n", "VOLUMNIA:\n", "O, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, it is no sin it should be dead,\n", @@ -248,7 +248,7 @@ "source": [ "### Vectorize the text\n", "\n", - "Before training, we need to map strings to a numerical representation. Create two lookup tables: one mapping characters to numbers, and another for numbers to characters." + "Before training, you need to map strings to a numerical representation. Create two lookup tables: one mapping characters to numbers, and another for numbers to characters." ] }, { @@ -272,7 +272,7 @@ "id": "tZfqhkYCymwX" }, "source": [ - "Now we have an integer representation for each character. Notice that we mapped the character as indexes from 0 to `len(unique)`." + "Now you have an integer representation for each character. Notice that you mapped the character as indexes from 0 to `len(unique)`." ] }, { @@ -316,7 +316,7 @@ "id": "wssHQ1oGymwe" }, "source": [ - "Given a character, or a sequence of characters, what is the most probable next character? This is the task we're training the model to perform. The input to the model will be a sequence of characters, and we train the model to predict the output—the following character at each time step.\n", + "Given a character, or a sequence of characters, what is the most probable next character? This is the task you are training the model to perform. The input to the model will be a sequence of characters, and you train the model to predict the output—the following character at each time step.\n", "\n", "Since RNNs maintain an internal state that depends on the previously seen elements, given all the characters computed until this moment, what is the next character?\n" ] @@ -346,7 +346,7 @@ }, "outputs": [], "source": [ - "# The maximum length sentence we want for a single input in characters\n", + "# The maximum length sentence you want for a single input in characters\n", "seq_length = 100\n", "examples_per_epoch = len(text)//seq_length\n", "\n", @@ -458,7 +458,7 @@ "source": [ "### Create training batches\n", "\n", - "We used `tf.data` to split the text into manageable sequences. But before feeding this data into the model, we need to shuffle the data and pack it into batches." + "You used `tf.data` to split the text into manageable sequences. But before feeding this data into the model, you need to shuffle the data and pack it into batches." ] }, { @@ -543,7 +543,7 @@ }, "outputs": [], "source": [ - "if tf.test.is_gpu_available():\n", + "if tf.config.list_physical_devices('GPU'):\n", " rnn = tf.keras.layers.CuDNNGRU\n", "else:\n", " import functools\n", @@ -650,7 +650,7 @@ "id": "uwv0gEkURfx1" }, "source": [ - "To get actual predictions from the model we need to sample from the output distribution, to get actual character indices. This distribution is defined by the logits over the character vocabulary.\n", + "To get actual predictions from the model you need to sample from the output distribution, to get actual character indices. This distribution is defined by the logits over the character vocabulary.\n", "\n", "Note: It is important to _sample_ from this distribution as taking the _argmax_ of the distribution can easily get the model stuck in a loop.\n", "\n", @@ -746,7 +746,7 @@ "source": [ "The standard `tf.keras.losses.sparse_categorical_crossentropy` loss function works in this case because it is applied across the last dimension of the predictions.\n", "\n", - "Because our model returns logits, we need to set the `from_logits` flag.\n" + "Because our model returns logits, you need to set the `from_logits` flag.\n" ] }, { @@ -771,7 +771,7 @@ "id": "jeOXriLcymww" }, "source": [ - "Configure the training procedure using the `tf.keras.Model.compile` method. We'll use `tf.train.AdamOptimizer` with default arguments and the loss function." + "Configure the training procedure using the `tf.keras.Model.compile` method. You'll use `tf.train.AdamOptimizer` with default arguments and the loss function." ] }, { @@ -891,7 +891,7 @@ "\n", "Because of the way the RNN state is passed from timestep to timestep, the model only accepts a fixed batch size once built.\n", "\n", - "To run the model with a different `batch_size`, we need to rebuild the model and restore the weights from the checkpoint.\n" + "To run the model with a different `batch_size`, you need to rebuild the model and restore the weights from the checkpoint.\n" ] }, { @@ -992,7 +992,7 @@ " predictions = predictions / temperature\n", " predicted_id = tf.multinomial(predictions, num_samples=1)[-1,0].numpy()\n", "\n", - " # We pass the predicted word as the next input to the model\n", + " # You pass the predicted word as the next input to the model\n", " # along with the previous hidden state\n", " input_eval = tf.expand_dims([predicted_id], 0)\n", "\n", @@ -1035,11 +1035,11 @@ "\n", "So now that you've seen how to run the model manually let's unpack the training loop, and implement it ourselves. This gives a starting point, for example, to implement _curriculum learning_ to help stabilize the model's open-loop output.\n", "\n", - "We will use `tf.GradientTape` to track the gradients. You can learn more about this approach by reading the [eager execution guide](https://www.tensorflow.org/r1/guide/eager).\n", + "You will use `tf.GradientTape` to track the gradients. You can learn more about this approach by reading the [eager execution guide](https://www.tensorflow.org/r1/guide/eager).\n", "\n", "The procedure works as follows:\n", "\n", - "* First, initialize the RNN state. We do this by calling the `tf.keras.Model.reset_states` method.\n", + "* First, initialize the RNN state. You do this by calling the `tf.keras.Model.reset_states` method.\n", "\n", "* Next, iterate over the dataset (batch by batch) and calculate the *predictions* associated with each.\n", "\n", diff --git a/site/en/swift/README.md b/site/en/swift/README.md deleted file mode 100644 index 162a81fa7d3..00000000000 --- a/site/en/swift/README.md +++ /dev/null @@ -1,6 +0,0 @@ -Welcome to the warp zone! - -# Swift for TensorFlow - -These docs are available here: -https://github.com/tensorflow/swift/tree/main/docs/site diff --git a/site/en/tensorboard/README.md b/site/en/tensorboard/README.md deleted file mode 100644 index 7e2126c23d4..00000000000 --- a/site/en/tensorboard/README.md +++ /dev/null @@ -1,5 +0,0 @@ -Welcome to the warp zone! - -# Tensorboard - -These docs are available here: https://github.com/tensorflow/tensorboard/tree/master/docs diff --git a/site/en/tfx/README.md b/site/en/tfx/README.md deleted file mode 100644 index c56ad2dbf01..00000000000 --- a/site/en/tfx/README.md +++ /dev/null @@ -1,10 +0,0 @@ -Welcome to the warp zone! - -# TensorFlow Extended (TFX) - -These docs are available here: - -* Data Validation: https://github.com/tensorflow/data-validation/tree/master/g3doc -* Model Analysis: https://github.com/tensorflow/model-analysis/tree/master/g3doc -* Transform: https://github.com/tensorflow/transform/tree/master/docs -* Serving: https://github.com/tensorflow/serving/tree/master/tensorflow_serving/g3doc diff --git a/site/en/tutorials/_index.yaml b/site/en/tutorials/_index.yaml index e2fc95aff1f..0d09f04c5c7 100644 --- a/site/en/tutorials/_index.yaml +++ b/site/en/tutorials/_index.yaml @@ -16,8 +16,9 @@ landing_page: - description: >

    The TensorFlow tutorials are written as Jupyter notebooks and run - directly in Google Colab—a hosted notebook environment that requires - no setup. Click the Run in Google Colab button. + directly in Google Colab—a hosted notebook environment that requires + no setup. At the top of each tutorial, you'll see a Run in Google Colab button. Click + the button to open the notebook and run the code yourself.

    - classname: devsite-landing-row-100 @@ -84,38 +85,16 @@ landing_page: - classname: devsite-landing-row-100 items: - description: > - - Subscribe to the - TensorFlow blog, - YouTube channel, - and Twitter - for the latest updates. + + Check out these videos for an introduction to machine learning with TensorFlow: - items: - - heading: "Intro to Machine Learning" - path: "https://www.youtube.com/watch?v=KNAWp2S3w94" + - heading: "TensorFlow ML Zero to Hero" + path: "https://www.youtube.com/watch?v=KNAWp2S3w94&list=PLQY2H8rRoyvwWuPiWnuTDBHe7I0fMSsfO" youtube_id: "KNAWp2S3w94?rel=0&show_info=0" - - heading: "TensorFlow 2.0 and Keras" - path: "https://www.youtube.com/watch?v=wGI_VtE9CJM" - youtube_id: "wGI_VtE9CJM?rel=0&show_info=0" - - - classname: devsite-landing-row-cards - items: - - heading: "Looking Back at 2019" - path: https://blog.tensorflow.org/2019/12/looking-back-at-2019.html - buttons: - - label: "Read on the TensorFlow blog" - path: https://blog.tensorflow.org/2019/12/looking-back-at-2019.html - - heading: "TensorFlow 2 is now available" - path: https://blog.tensorflow.org/2019/09/tensorflow-20-is-now-available.html - buttons: - - label: "Read on the TensorFlow blog" - path: https://blog.tensorflow.org/2019/09/tensorflow-20-is-now-available.html - - heading: "Standardizing on Keras: Guidance on High-level APIs in TensorFlow 2" - path: https://blog.tensorflow.org/2018/12/standardizing-on-keras-guidance.html - buttons: - - label: "Read on the TensorFlow blog" - path: https://blog.tensorflow.org/2018/12/standardizing-on-keras-guidance.html + - heading: "Basic Computer Vision with ML" + path: "https://www.youtube.com/watch?v=bemDFpNooA8&list=PLQY2H8rRoyvwWuPiWnuTDBHe7I0fMSsfO" + youtube_id: "bemDFpNooA8?rel=0&show_info=0" - classname: devsite-landing-row-100 items: @@ -132,8 +111,8 @@ landing_page: - description: > path: /tensorboard icon: @@ -243,7 +222,7 @@ landing_page: path: /xla icon: @@ -295,3 +274,13 @@ landing_page: icon_name: chevron_right foreground: theme background: grey + + - classname: devsite-landing-row-100 + items: + - description: > + + Subscribe to the + TensorFlow blog, + YouTube channel, + and Twitter + for the latest updates. diff --git a/site/en/tutorials/_toc.yaml b/site/en/tutorials/_toc.yaml index 27c1d422823..a3907ffe9a4 100644 --- a/site/en/tutorials/_toc.yaml +++ b/site/en/tutorials/_toc.yaml @@ -35,6 +35,9 @@ toc: section: - title: "Images" path: /tutorials/load_data/images + - title: "Video" + path: /tutorials/load_data/video + status: new - title: "CSV" path: /tutorials/load_data/csv - title: "NumPy" @@ -74,6 +77,12 @@ toc: section: - title: "Distributed training with Keras" path: /tutorials/distribute/keras + - title: "Distributed training with DTensors" + path: /tutorials/distribute/dtensor_ml_tutorial + status: experimental + - title: "Using DTensors with Keras" + path: /tutorials/distribute/dtensor_keras_tutorial + status: experimental - title: "Custom training loops" path: /tutorials/distribute/custom_training - title: "Multi-worker training with Keras" @@ -88,9 +97,14 @@ toc: - title: "Distributed input" path: /tutorials/distribute/input -- title: "Images" +- title: "Vision" style: accordion section: + - title: "Computer vision" + path: /tutorials/images + - title: "KerasCV" + path: https://keras.io/keras_cv/ + status: external - title: "Convolutional Neural Network" path: /tutorials/images/cnn - title: "Image classification" @@ -104,31 +118,27 @@ toc: - title: "Image segmentation" path: /tutorials/images/segmentation - title: "Object detection with TF Hub" - path: https://github.com/tensorflow/hub/blob/master/examples/colab/tf2_object_detection.ipynb + path: /hub/tutorials/tf2_object_detection status: external + - title: "Video classification" + status: new + path: /tutorials/video/video_classification + - title: "Transfer learning with MoViNet" + status: new + path: /tutorials/video/transfer_learning_with_movinet - title: "Text" style: accordion section: - - title: "Word embeddings" - path: /text/guide/word_embeddings - status: external - - title: "Word2Vec" - path: /tutorials/text/word2vec - - title: "Text classification with an RNN" - path: /text/tutorials/text_classification_rnn - status: external - - title: "Classify Text with BERT" - path: /text/tutorials/classify_text_with_bert - status: external - - title: "Solve GLUE tasks using BERT on TPU" - path: /text/tutorials/bert_glue + - title: "Text and natural language processing" + path: /tutorials/text/index + - title: "Get started with KerasNLP" + path: https://keras.io/guides/keras_nlp/getting_started/ status: external - - title: "Neural machine translation with attention" - path: /text/tutorials/nmt_with_attention + - title: "Text and NLP guide" + path: /text status: external - - title: "Image captioning" - path: /tutorials/text/image_captioning + - title: "Audio" style: accordion section: @@ -136,10 +146,8 @@ toc: path: /tutorials/audio/simple_audio - title: "Transfer learning for audio recognition" path: /tutorials/audio/transfer_learning_audio - status: new - title: "Generate music with an RNN" path: /tutorials/audio/music_generation - status: new - title: "Structured data" style: accordion @@ -160,6 +168,9 @@ toc: - title: "Generative" style: accordion section: + - title: "Stable Diffusion" + status: new + path: /tutorials/generative/generate_images_with_stable_diffusion - title: "Neural style transfer" path: /tutorials/generative/style_transfer - title: "DeepDream" @@ -176,6 +187,17 @@ toc: path: /tutorials/generative/autoencoder - title: "Variational Autoencoder" path: /tutorials/generative/cvae + - title: "Lossy data compression" + path: /tutorials/generative/data_compression + +- title: "Model optimization" + style: accordion + section: + - title: "Scalable model compression with EPR" + path: /tutorials/optimization/compression + - title: "TensorFlow model optimization" + status: external + path: /model_optimization - title: "Model Understanding" style: accordion @@ -184,9 +206,10 @@ toc: path: /tutorials/interpretability/integrated_gradients - title: "Uncertainty quantification with SNGP" path: /tutorials/understanding/sngp - - title: "Probabalistic regression" + - title: "Probabilistic regression" path: /probability/examples/Probabilistic_Layers_Regression status: external + - title: "Reinforcement learning" style: accordion section: @@ -198,15 +221,12 @@ toc: - title: "tf.Estimator" style: accordion + status: deprecated section: - title: "Premade estimator" path: /tutorials/estimator/premade - title: "Linear model" path: /tutorials/estimator/linear - - title: "Boosted trees" - path: /tutorials/estimator/boosted_trees - - title: "Boosted trees model understanding" - path: /tutorials/estimator/boosted_trees_model_understanding - title: "Keras model to Estimator" path: /tutorials/estimator/keras_model_to_estimator - title: "Multi-worker training with Estimator" diff --git a/site/en/tutorials/audio/music_generation.ipynb b/site/en/tutorials/audio/music_generation.ipynb index 89802d0447b..e1423ef7cf2 100644 --- a/site/en/tutorials/audio/music_generation.ipynb +++ b/site/en/tutorials/audio/music_generation.ipynb @@ -68,9 +68,9 @@ "id": "hr78EkAY-FFg" }, "source": [ - "This tutorial shows you how to generate musical notes using a simple RNN. You will train a model using a collection of piano MIDI files from the [MAESTRO dataset](https://magenta.tensorflow.org/datasets/maestro). Given a sequence of notes, your model will learn to predict the next note in the sequence. You can generate a longer sequences of notes by calling the model repeatedly.\n", + "This tutorial shows you how to generate musical notes using a simple recurrent neural network (RNN). You will train a model using a collection of piano MIDI files from the [MAESTRO dataset](https://magenta.tensorflow.org/datasets/maestro). Given a sequence of notes, your model will learn to predict the next note in the sequence. You can generate longer sequences of notes by calling the model repeatedly.\n", "\n", - "This tutorial contains complete code to parse and create MIDI files. You can learn more about how RNNs work by visiting [Text generation with an RNN](https://www.tensorflow.org/text/tutorials/text_generation)." + "This tutorial contains complete code to parse and create MIDI files. You can learn more about how RNNs work by visiting the [Text generation with an RNN](https://www.tensorflow.org/text/tutorials/text_generation) tutorial." ] }, { @@ -145,7 +145,7 @@ "\n", "from IPython import display\n", "from matplotlib import pyplot as plt\n", - "from typing import Dict, List, Optional, Sequence, Tuple" + "from typing import Optional" ] }, { @@ -680,7 +680,7 @@ "id": "xIBLvj-cODWS" }, "source": [ - "Next, create a [tf.data.Dataset](https://www.tensorflow.org/datasets) from the parsed notes." + "Next, create a `tf.data.Dataset` from the parsed notes." ] }, { @@ -713,7 +713,7 @@ "id": "Sj9SXRCjt3I7" }, "source": [ - "You will train the model on batches of sequences of notes. Each example will consist of a sequence of notes as the input features, and next note as the label. In this way, the model will be trained to predict the next note in a sequence. You can find a diagram explaining this process (and more details) in [Text classification with an RNN](https://www.tensorflow.org/text/tutorials/text_generation).\n", + "You will train the model on batches of sequences of notes. Each example will consist of a sequence of notes as the input features, and the next note as the label. In this way, the model will be trained to predict the next note in a sequence. You can find a diagram describing this process (and more details) in [Text classification with an RNN](https://www.tensorflow.org/text/tutorials/text_generation).\n", "\n", "You can use the handy [window](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#window) function with size `seq_length` to create the features and labels in this format." ] @@ -857,7 +857,7 @@ "id": "iGQn32q-hdK2" }, "source": [ - "The model will have three outputs, one for each note variable. For `pitch` and `duration`, you will use a custom loss function based on mean squared error that encourages the model to output non-negative values." + "The model will have three outputs, one for each note variable. For `step` and `duration`, you will use a custom loss function based on mean squared error that encourages the model to output non-negative values." ] }, { @@ -1056,7 +1056,7 @@ "source": [ "To use the model to generate notes, you will first need to provide a starting sequence of notes. The function below generates one note from a sequence of notes. \n", "\n", - "For note pitch, it draws a sample from softmax distribution of notes produced by the model, and does not simply pick the note with the highest probability.\n", + "For note pitch, it draws a sample from the softmax distribution of notes produced by the model, and does not simply pick the note with the highest probability.\n", "Always picking the note with the highest probability would lead to repetitive sequences of notes being generated.\n", "\n", "The `temperature` parameter can be used to control the randomness of notes generated. You can find more details on temperature in [Text generation with an RNN](https://www.tensorflow.org/text/tutorials/text_generation)." @@ -1072,9 +1072,9 @@ "source": [ "def predict_next_note(\n", " notes: np.ndarray, \n", - " keras_model: tf.keras.Model, \n", - " temperature: float = 1.0) -> int:\n", - " \"\"\"Generates a note IDs using a trained sequence model.\"\"\"\n", + " model: tf.keras.Model, \n", + " temperature: float = 1.0) -> tuple[int, float, float]:\n", + " \"\"\"Generates a note as a tuple of (pitch, step, duration), using a trained sequence model.\"\"\"\n", "\n", " assert temperature > 0\n", "\n", @@ -1229,9 +1229,8 @@ "source": [ "In the above plots, you will notice the change in distribution of the note variables.\n", "Since there is a feedback loop between the model's outputs and inputs, the model tends to generate similar sequences of outputs to reduce the loss. \n", - "This is particularly relevant for `step` and `duration`, which has uses MSE loss.\n", - "For `pitch`, you can increase the randomness by increasing the `temperature` in `predict_next_note`.\n", - "\n" + "This is particularly relevant for `step` and `duration`, which uses the MSE loss.\n", + "For `pitch`, you can increase the randomness by increasing the `temperature` in `predict_next_note`.\n" ] }, { @@ -1244,7 +1243,7 @@ "\n", "This tutorial demonstrated the mechanics of using an RNN to generate sequences of notes from a dataset of MIDI files. To learn more, you can visit the closely related [Text generation with an RNN](https://www.tensorflow.org/text/tutorials/text_generation) tutorial, which contains additional diagrams and explanations. \n", "\n", - "An alternative to using RNNs for music generation is using GANs. Rather than generating audio, a GAN-based approach can generate a entire sequence in parallel. The Magenta team has done impressive work on this approach with [GANSynth](https://magenta.tensorflow.org/gansynth). You can also find many wonderful music and art projects and open-source code on [Magenta project website](https://magenta.tensorflow.org/)." + "One of the alternatives to using RNNs for music generation is using GANs. Rather than generating audio, a GAN-based approach can generate an entire sequence in parallel. The Magenta team has done impressive work on this approach with [GANSynth](https://magenta.tensorflow.org/gansynth). You can also find many wonderful music and art projects and open-source code on [Magenta project website](https://magenta.tensorflow.org/)." ] } ], @@ -1253,7 +1252,6 @@ "colab": { "collapsed_sections": [], "name": "music_generation.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/tutorials/audio/simple_audio.ipynb b/site/en/tutorials/audio/simple_audio.ipynb index 3d208668d4e..9d79742fbb7 100644 --- a/site/en/tutorials/audio/simple_audio.ipynb +++ b/site/en/tutorials/audio/simple_audio.ipynb @@ -74,7 +74,9 @@ "id": "SPfDNFlb66XF" }, "source": [ - "This tutorial will show you how to build a basic speech recognition network that recognizes ten different words. It's important to know that real speech and audio recognition systems are much more complex, but like MNIST for images, it should give you a basic understanding of the techniques involved. Once you've completed this tutorial, you'll have a model that tries to classify a one second audio clip as \"down\", \"go\", \"left\", \"no\", \"right\", \"stop\", \"up\" and \"yes\"." + "This tutorial demonstrates how to preprocess audio files in the WAV format and build and train a basic [automatic speech recognition](https://en.wikipedia.org/wiki/Speech_recognition) (ASR) model for recognizing ten different words. You will use a portion of the [Speech Commands dataset](https://www.tensorflow.org/datasets/catalog/speech_commands) ([Warden, 2018](https://arxiv.org/abs/1804.03209)), which contains short (one-second or less) audio clips of commands, such as \"down\", \"go\", \"left\", \"no\", \"right\", \"stop\", \"up\" and \"yes\".\n", + "\n", + "Real-world speech and audio recognition [systems](https://ai.googleblog.com/search/label/Speech%20Recognition) are complex. But, like [image classification with the MNIST dataset](../quickstart/beginner.ipynb), this tutorial should give you a basic understanding of the techniques involved." ] }, { @@ -85,7 +87,18 @@ "source": [ "## Setup\n", "\n", - "Import necessary modules and dependencies." + "Import necessary modules and dependencies. You'll be using `tf.keras.utils.audio_dataset_from_directory` (introduced in TensorFlow 2.10), which helps generate audio classification datasets from directories of `.wav` files. You'll also need [seaborn](https://seaborn.pydata.org) for visualization in this tutorial." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hhNW45sjDEDe" + }, + "outputs": [], + "source": [ + "!pip install -U -q tensorflow tensorflow_datasets" ] }, { @@ -104,13 +117,11 @@ "import seaborn as sns\n", "import tensorflow as tf\n", "\n", - "from tensorflow.keras.layers.experimental import preprocessing\n", "from tensorflow.keras import layers\n", "from tensorflow.keras import models\n", "from IPython import display\n", "\n", - "\n", - "# Set seed for experiment reproducibility\n", + "# Set the seed value for experiment reproducibility.\n", "seed = 42\n", "tf.random.set_seed(seed)\n", "np.random.seed(seed)" @@ -122,11 +133,11 @@ "id": "yR0EdgrLCaWR" }, "source": [ - "## Import the Speech Commands dataset\n", + "## Import the mini Speech Commands dataset\n", "\n", - "You'll write a script to download a portion of the [Speech Commands dataset](https://www.tensorflow.org/datasets/catalog/speech_commands). The original dataset consists of over 105,000 WAV audio files of people saying thirty different words. This data was collected by Google and released under a CC BY license.\n", + "To save time with data loading, you will be working with a smaller version of the Speech Commands dataset. The [original dataset](https://www.tensorflow.org/datasets/catalog/speech_commands) consists of over 105,000 audio files in the [WAV (Waveform) audio file format](https://www.aelius.com/njh/wavemetatools/doc/riffmci.pdf) of people saying 35 different words. This data was collected by Google and released under a CC BY license.\n", "\n", - "You'll be using a portion of the dataset to save time with data loading. Extract the `mini_speech_commands.zip` and load it in using the `tf.data` API." + "Download and extract the `mini_speech_commands.zip` file containing the smaller Speech Commands datasets with `tf.keras.utils.get_file`:" ] }, { @@ -137,7 +148,9 @@ }, "outputs": [], "source": [ - "data_dir = pathlib.Path('data/mini_speech_commands')\n", + "DATASET_PATH = 'data/mini_speech_commands'\n", + "\n", + "data_dir = pathlib.Path(DATASET_PATH)\n", "if not data_dir.exists():\n", " tf.keras.utils.get_file(\n", " 'mini_speech_commands.zip',\n", @@ -152,7 +165,7 @@ "id": "BgvFq3uYiS5G" }, "source": [ - "Check basic statistics about the dataset." + "The dataset's audio clips are stored in eight folders corresponding to each speech command: `no`, `yes`, `down`, `go`, `left`, `up`, `right`, and `stop`:" ] }, { @@ -164,178 +177,140 @@ "outputs": [], "source": [ "commands = np.array(tf.io.gfile.listdir(str(data_dir)))\n", - "commands = commands[commands != 'README.md']\n", + "commands = commands[(commands != 'README.md') & (commands != '.DS_Store')]\n", "print('Commands:', commands)" ] }, { "cell_type": "markdown", "metadata": { - "id": "aMvdU9SY8WXN" + "id": "TZ7GJjDvHqtt" }, "source": [ - "Extract the audio files into a list and shuffle it." + "Divided into directories this way, you can easily load the data using `keras.utils.audio_dataset_from_directory`. \n", + "\n", + "The audio clips are 1 second or less at 16kHz. The `output_sequence_length=16000` pads the short ones to exactly 1 second (and would trim longer ones) so that they can be easily batched." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "hlX685l1wD9k" + "id": "mFM4c3aMC8Qv" }, "outputs": [], "source": [ - "filenames = tf.io.gfile.glob(str(data_dir) + '/*/*')\n", - "filenames = tf.random.shuffle(filenames)\n", - "num_samples = len(filenames)\n", - "print('Number of total examples:', num_samples)\n", - "print('Number of examples per label:',\n", - " len(tf.io.gfile.listdir(str(data_dir/commands[0]))))\n", - "print('Example file tensor:', filenames[0])" + "train_ds, val_ds = tf.keras.utils.audio_dataset_from_directory(\n", + " directory=data_dir,\n", + " batch_size=64,\n", + " validation_split=0.2,\n", + " seed=0,\n", + " output_sequence_length=16000,\n", + " subset='both')\n", + "\n", + "label_names = np.array(train_ds.class_names)\n", + "print()\n", + "print(\"label names:\", label_names)" ] }, { "cell_type": "markdown", "metadata": { - "id": "9vK3ymy23MCP" + "id": "cestp83qFnU5" }, "source": [ - "Split the files into training, validation and test sets using a 80:10:10 ratio, respectively." + "The dataset now contains batches of audio clips and integer labels. The audio clips have a shape of `(batch, samples, channels)`. " ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "Cv_wts-l3KgD" + "id": "3yU6SQGIFb3H" }, "outputs": [], "source": [ - "train_files = filenames[:6400]\n", - "val_files = filenames[6400: 6400 + 800]\n", - "test_files = filenames[-800:]\n", - "\n", - "print('Training set size', len(train_files))\n", - "print('Validation set size', len(val_files))\n", - "print('Test set size', len(test_files))" + "train_ds.element_spec" ] }, { "cell_type": "markdown", "metadata": { - "id": "g2Cj9FyvfweD" + "id": "ppG9Dgq2Ex8R" }, "source": [ - "## Reading audio files and their labels" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "j1zjcWteOcBy" - }, - "source": [ - "The audio file will initially be read as a binary file, which you'll want to convert into a numerical tensor.\n", - "\n", - "To load an audio file, you will use [`tf.audio.decode_wav`](https://www.tensorflow.org/api_docs/python/tf/audio/decode_wav), which returns the WAV-encoded audio as a Tensor and the sample rate.\n", - "\n", - "A WAV file contains time series data with a set number of samples per second. \n", - "Each sample represents the amplitude of the audio signal at that specific time. In a 16-bit system, like the files in `mini_speech_commands`, the values range from -32768 to 32767. \n", - "The sample rate for this dataset is 16kHz.\n", - "Note that `tf.audio.decode_wav` will normalize the values to the range [-1.0, 1.0]." + "This dataset only contains single channel audio, so use the `tf.squeeze` function to drop the extra axis:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "9PjJ2iXYwftD" + "id": "Xl-tnniUIBlM" }, "outputs": [], "source": [ - "def decode_audio(audio_binary):\n", - " audio, _ = tf.audio.decode_wav(audio_binary)\n", - " return tf.squeeze(audio, axis=-1)" + "def squeeze(audio, labels):\n", + " audio = tf.squeeze(audio, axis=-1)\n", + " return audio, labels\n", + "\n", + "train_ds = train_ds.map(squeeze, tf.data.AUTOTUNE)\n", + "val_ds = val_ds.map(squeeze, tf.data.AUTOTUNE)" ] }, { "cell_type": "markdown", "metadata": { - "id": "GPQseZElOjVN" + "id": "DtsCSWZN5ILv" }, "source": [ - "The label for each WAV file is its parent directory." + "The `utils.audio_dataset_from_directory` function only returns up to two splits. It's a good idea to keep a test set separate from your validation set.\n", + "Ideally you'd keep it in a separate directory, but in this case you can use `Dataset.shard` to split the validation set into two halves. Note that iterating over **any** shard will load **all** the data, and only keep its fraction. " ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "8VTtX1nr3YT-" + "id": "u5UEGsqM5Gss" }, "outputs": [], "source": [ - "def get_label(file_path):\n", - " parts = tf.strings.split(file_path, os.path.sep)\n", - "\n", - " # Note: You'll use indexing here instead of tuple unpacking to enable this \n", - " # to work in a TensorFlow graph.\n", - " return parts[-2] " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "E8Y9w_5MOsr-" - }, - "source": [ - "Let's define a method that will take in the filename of the WAV file and output a tuple containing the audio and labels for supervised training." + "test_ds = val_ds.shard(num_shards=2, index=0)\n", + "val_ds = val_ds.shard(num_shards=2, index=1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "WdgUD5T93NyT" + "id": "xIeoJcwJH5h9" }, "outputs": [], "source": [ - "def get_waveform_and_label(file_path):\n", - " label = get_label(file_path)\n", - " audio_binary = tf.io.read_file(file_path)\n", - " waveform = decode_audio(audio_binary)\n", - " return waveform, label" + "for example_audio, example_labels in train_ds.take(1): \n", + " print(example_audio.shape)\n", + " print(example_labels.shape)" ] }, { "cell_type": "markdown", "metadata": { - "id": "nvN8W_dDjYjc" + "id": "voxGEwvuh2L7" }, "source": [ - "You will now apply `process_path` to build your training set to extract the audio-label pairs and check the results. You'll build the validation and test sets using a similar procedure later on." + "Let's plot a few audio waveforms:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "0SQl8yXl3kNP" + "id": "dYtGq2zYNHuT" }, "outputs": [], "source": [ - "AUTOTUNE = tf.data.AUTOTUNE\n", - "files_ds = tf.data.Dataset.from_tensor_slices(train_files)\n", - "waveform_ds = files_ds.map(get_waveform_and_label, num_parallel_calls=AUTOTUNE)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "voxGEwvuh2L7" - }, - "source": [ - "Let's examine a few audio waveforms with their corresponding labels." + "label_names[[1,1,3,0]]" ] }, { @@ -346,20 +321,17 @@ }, "outputs": [], "source": [ + "plt.figure(figsize=(16, 10))\n", "rows = 3\n", "cols = 3\n", - "n = rows*cols\n", - "fig, axes = plt.subplots(rows, cols, figsize=(10, 12))\n", - "for i, (audio, label) in enumerate(waveform_ds.take(n)):\n", - " r = i // cols\n", - " c = i % cols\n", - " ax = axes[r][c]\n", - " ax.plot(audio.numpy())\n", - " ax.set_yticks(np.arange(-1.2, 1.2, 0.2))\n", - " label = label.numpy().decode('utf-8')\n", - " ax.set_title(label)\n", - "\n", - "plt.show()" + "n = rows * cols\n", + "for i in range(n):\n", + " plt.subplot(rows, cols, i+1)\n", + " audio_signal = example_audio[i]\n", + " plt.plot(audio_signal)\n", + " plt.title(label_names[example_labels[i]])\n", + " plt.yticks(np.arange(-1.2, 1.2, 0.2))\n", + " plt.ylim([-1.1, 1.1])" ] }, { @@ -368,17 +340,17 @@ "id": "EWXPphxm0B4m" }, "source": [ - "## Spectrogram\n", + "## Convert waveforms to spectrograms\n", "\n", - "You'll convert the waveform into a spectrogram, which shows frequency changes over time and can be represented as a 2D image. This can be done by applying the short-time Fourier transform (STFT) to convert the audio into the time-frequency domain.\n", + "The waveforms in the dataset are represented in the time domain. Next, you'll transform the waveforms from the time-domain signals into the time-frequency-domain signals by computing the [short-time Fourier transform (STFT)](https://en.wikipedia.org/wiki/Short-time_Fourier_transform) to convert the waveforms to as [spectrograms](https://en.wikipedia.org/wiki/Spectrogram), which show frequency changes over time and can be represented as 2D images. You will feed the spectrogram images into your neural network to train the model.\n", "\n", - "A Fourier transform ([`tf.signal.fft`](https://www.tensorflow.org/api_docs/python/tf/signal/fft)) converts a signal to its component frequencies, but loses all time information. The STFT ([`tf.signal.stft`](https://www.tensorflow.org/api_docs/python/tf/signal/stft)) splits the signal into windows of time and runs a Fourier transform on each window, preserving some time information, and returning a 2D tensor that you can run standard convolutions on.\n", + "A Fourier transform (`tf.signal.fft`) converts a signal to its component frequencies, but loses all time information. In comparison, STFT (`tf.signal.stft`) splits the signal into windows of time and runs a Fourier transform on each window, preserving some time information, and returning a 2D tensor that you can run standard convolutions on.\n", "\n", - "STFT produces an array of complex numbers representing magnitude and phase. However, you'll only need the magnitude for this tutorial, which can be derived by applying `tf.abs` on the output of `tf.signal.stft`. \n", + "Create a utility function for converting waveforms to spectrograms:\n", "\n", - "Choose `frame_length` and `frame_step` parameters such that the generated spectrogram \"image\" is almost square. For more information on STFT parameters choice, you can refer to [this video](https://www.coursera.org/lecture/audio-signal-processing/stft-2-tjEQe) on audio signal processing. \n", - "\n", - "You also want the waveforms to have the same length, so that when you convert it to a spectrogram image, the results will have similar dimensions. This can be done by simply zero padding the audio clips that are shorter than one second.\n" + "- The waveforms need to be of the same length, so that when you convert them to spectrograms, the results have similar dimensions. This can be done by simply zero-padding the audio clips that are shorter than one second (using `tf.zeros`).\n", + "- When calling `tf.signal.stft`, choose the `frame_length` and `frame_step` parameters such that the generated spectrogram \"image\" is almost square. For more information on the STFT parameters choice, refer to [this Coursera video](https://www.coursera.org/lecture/audio-signal-processing/stft-2-tjEQe) on audio signal processing and STFT.\n", + "- The STFT produces an array of complex numbers representing magnitude and phase. However, in this tutorial you'll only use the magnitude, which you can derive by applying `tf.abs` on the output of `tf.signal.stft`." ] }, { @@ -390,18 +362,15 @@ "outputs": [], "source": [ "def get_spectrogram(waveform):\n", - " # Padding for files with less than 16000 samples\n", - " zero_padding = tf.zeros([16000] - tf.shape(waveform), dtype=tf.float32)\n", - "\n", - " # Concatenate audio with padding so that all audio clips will be of the \n", - " # same length\n", - " waveform = tf.cast(waveform, tf.float32)\n", - " equal_length = tf.concat([waveform, zero_padding], 0)\n", + " # Convert the waveform to a spectrogram via a STFT.\n", " spectrogram = tf.signal.stft(\n", - " equal_length, frame_length=255, frame_step=128)\n", - " \n", + " waveform, frame_length=255, frame_step=128)\n", + " # Obtain the magnitude of the STFT.\n", " spectrogram = tf.abs(spectrogram)\n", - "\n", + " # Add a `channels` dimension, so that the spectrogram can be used\n", + " # as image-like input data with convolution layers (which expect\n", + " # shape (`batch_size`, `height`, `width`, `channels`).\n", + " spectrogram = spectrogram[..., tf.newaxis]\n", " return spectrogram" ] }, @@ -411,7 +380,7 @@ "id": "5rdPiPYJphs2" }, "source": [ - "Next, you will explore the data. Compare the waveform, the spectrogram and the actual audio of one example from the dataset." + "Next, start exploring the data. Print the shapes of one example's tensorized waveform and the corresponding spectrogram, and play the original audio:" ] }, { @@ -422,15 +391,25 @@ }, "outputs": [], "source": [ - "for waveform, label in waveform_ds.take(1):\n", - " label = label.numpy().decode('utf-8')\n", + "for i in range(3):\n", + " label = label_names[example_labels[i]]\n", + " waveform = example_audio[i]\n", " spectrogram = get_spectrogram(waveform)\n", "\n", - "print('Label:', label)\n", - "print('Waveform shape:', waveform.shape)\n", - "print('Spectrogram shape:', spectrogram.shape)\n", - "print('Audio playback')\n", - "display.display(display.Audio(waveform, rate=16000))" + " print('Label:', label)\n", + " print('Waveform shape:', waveform.shape)\n", + " print('Spectrogram shape:', spectrogram.shape)\n", + " print('Audio playback')\n", + " display.display(display.Audio(waveform, rate=16000))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xnSuqyxJ1isF" + }, + "source": [ + "Now, define a function for displaying a spectrogram:" ] }, { @@ -442,154 +421,136 @@ "outputs": [], "source": [ "def plot_spectrogram(spectrogram, ax):\n", - " # Convert to frequencies to log scale and transpose so that the time is\n", - " # represented in the x-axis (columns). An epsilon is added to avoid log of zero.\n", - " log_spec = np.log(spectrogram.T+np.finfo(float).eps)\n", + " if len(spectrogram.shape) > 2:\n", + " assert len(spectrogram.shape) == 3\n", + " spectrogram = np.squeeze(spectrogram, axis=-1)\n", + " # Convert the frequencies to log scale and transpose, so that the time is\n", + " # represented on the x-axis (columns).\n", + " # Add an epsilon to avoid taking a log of zero.\n", + " log_spec = np.log(spectrogram.T + np.finfo(float).eps)\n", " height = log_spec.shape[0]\n", " width = log_spec.shape[1]\n", " X = np.linspace(0, np.size(spectrogram), num=width, dtype=int)\n", " Y = range(height)\n", - " ax.pcolormesh(X, Y, log_spec)\n", - "\n", - "\n", - "fig, axes = plt.subplots(2, figsize=(12, 8))\n", - "timescale = np.arange(waveform.shape[0])\n", - "axes[0].plot(timescale, waveform.numpy())\n", - "axes[0].set_title('Waveform')\n", - "axes[0].set_xlim([0, 16000])\n", - "plot_spectrogram(spectrogram.numpy(), axes[1])\n", - "axes[1].set_title('Spectrogram')\n", - "plt.show()" + " ax.pcolormesh(X, Y, log_spec)" ] }, { "cell_type": "markdown", "metadata": { - "id": "GyYXjW07jCHA" + "id": "baa5c91e8603" }, "source": [ - "Now transform the waveform dataset to have spectrogram images and their corresponding labels as integer IDs." + "Plot the example's waveform over time and the corresponding spectrogram (frequencies over time):" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "43IS2IouEV40" + "id": "d2_CikgY1tjv" }, "outputs": [], "source": [ - "def get_spectrogram_and_label_id(audio, label):\n", - " spectrogram = get_spectrogram(audio)\n", - " spectrogram = tf.expand_dims(spectrogram, -1)\n", - " label_id = tf.argmax(label == commands)\n", - " return spectrogram, label_id" + "fig, axes = plt.subplots(2, figsize=(12, 8))\n", + "timescale = np.arange(waveform.shape[0])\n", + "axes[0].plot(timescale, waveform.numpy())\n", + "axes[0].set_title('Waveform')\n", + "axes[0].set_xlim([0, 16000])\n", + "\n", + "plot_spectrogram(spectrogram.numpy(), axes[1])\n", + "axes[1].set_title('Spectrogram')\n", + "plt.suptitle(label.title())\n", + "plt.show()" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": { - "id": "yEVb_oK0oBLQ" + "id": "GyYXjW07jCHA" }, - "outputs": [], "source": [ - "spectrogram_ds = waveform_ds.map(\n", - " get_spectrogram_and_label_id, num_parallel_calls=AUTOTUNE)" + "Now, create spectrogram datasets from the audio datasets:" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": { - "id": "6gQpAAgMnyDi" + "id": "mAD0LpkgqtQo" }, + "outputs": [], "source": [ - "Examine the spectrogram \"images\" for different samples of the dataset." + "def make_spec_ds(ds):\n", + " return ds.map(\n", + " map_func=lambda audio,label: (get_spectrogram(audio), label),\n", + " num_parallel_calls=tf.data.AUTOTUNE)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "QUbHfTuon4iF" + "id": "yEVb_oK0oBLQ" }, "outputs": [], "source": [ - "rows = 3\n", - "cols = 3\n", - "n = rows*cols\n", - "fig, axes = plt.subplots(rows, cols, figsize=(10, 10))\n", - "for i, (spectrogram, label_id) in enumerate(spectrogram_ds.take(n)):\n", - " r = i // cols\n", - " c = i % cols\n", - " ax = axes[r][c]\n", - " plot_spectrogram(np.squeeze(spectrogram.numpy()), ax)\n", - " ax.set_title(commands[label_id.numpy()])\n", - " ax.axis('off')\n", - " \n", - "plt.show()" + "train_spectrogram_ds = make_spec_ds(train_ds)\n", + "val_spectrogram_ds = make_spec_ds(val_ds)\n", + "test_spectrogram_ds = make_spec_ds(test_ds)" ] }, { "cell_type": "markdown", "metadata": { - "id": "z5KdY8IF8rkt" + "id": "6gQpAAgMnyDi" }, "source": [ - "## Build and train the model\n", - "\n", - "Now you can build and train your model. But before you do that, you'll need to repeat the training set preprocessing on the validation and test sets." + "Examine the spectrograms for different examples of the dataset:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "10UI32QH_45b" + "id": "EaM2q5aGis-d" }, "outputs": [], "source": [ - "def preprocess_dataset(files):\n", - " files_ds = tf.data.Dataset.from_tensor_slices(files)\n", - " output_ds = files_ds.map(get_waveform_and_label, num_parallel_calls=AUTOTUNE)\n", - " output_ds = output_ds.map(\n", - " get_spectrogram_and_label_id, num_parallel_calls=AUTOTUNE)\n", - " return output_ds" + "for example_spectrograms, example_spect_labels in train_spectrogram_ds.take(1):\n", + " break" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "HNv4xwYkB2P6" + "id": "QUbHfTuon4iF" }, "outputs": [], "source": [ - "train_ds = spectrogram_ds\n", - "val_ds = preprocess_dataset(val_files)\n", - "test_ds = preprocess_dataset(test_files)" + "rows = 3\n", + "cols = 3\n", + "n = rows*cols\n", + "fig, axes = plt.subplots(rows, cols, figsize=(16, 9))\n", + "\n", + "for i in range(n):\n", + " r = i // cols\n", + " c = i % cols\n", + " ax = axes[r][c]\n", + " plot_spectrogram(example_spectrograms[i].numpy(), ax)\n", + " ax.set_title(label_names[example_spect_labels[i].numpy()])\n", + "\n", + "plt.show()" ] }, { "cell_type": "markdown", "metadata": { - "id": "assnWo6SB3lR" - }, - "source": [ - "Batch the training and validation sets for model training." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UgY9WYzn61EX" + "id": "z5KdY8IF8rkt" }, - "outputs": [], "source": [ - "batch_size = 64\n", - "train_ds = train_ds.batch(batch_size)\n", - "val_ds = val_ds.batch(batch_size)" + "## Build and train the model" ] }, { @@ -598,7 +559,7 @@ "id": "GS1uIh6F_TN9" }, "source": [ - "Add dataset [`cache()`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#cache) and [`prefetch()`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#prefetch) operations to reduce read latency while training the model." + "Add `Dataset.cache` and `Dataset.prefetch` operations to reduce read latency while training the model:" ] }, { @@ -609,8 +570,9 @@ }, "outputs": [], "source": [ - "train_ds = train_ds.cache().prefetch(AUTOTUNE)\n", - "val_ds = val_ds.cache().prefetch(AUTOTUNE)" + "train_spectrogram_ds = train_spectrogram_ds.cache().shuffle(10000).prefetch(tf.data.AUTOTUNE)\n", + "val_spectrogram_ds = val_spectrogram_ds.cache().prefetch(tf.data.AUTOTUNE)\n", + "test_spectrogram_ds = test_spectrogram_ds.cache().prefetch(tf.data.AUTOTUNE)" ] }, { @@ -620,11 +582,13 @@ }, "source": [ "For the model, you'll use a simple convolutional neural network (CNN), since you have transformed the audio files into spectrogram images.\n", - "The model also has the following additional preprocessing layers:\n", - "- A [`Resizing`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/experimental/preprocessing/Resizing) layer to downsample the input to enable the model to train faster.\n", - "- A [`Normalization`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/experimental/preprocessing/Normalization) layer to normalize each pixel in the image based on its mean and standard deviation.\n", "\n", - "For the `Normalization` layer, its `adapt` method would first need to be called on the training data in order to compute aggregate statistics (i.e. mean and standard deviation)." + "Your `tf.keras.Sequential` model will use the following Keras preprocessing layers:\n", + "\n", + "- `tf.keras.layers.Resizing`: to downsample the input to enable the model to train faster.\n", + "- `tf.keras.layers.Normalization`: to normalize each pixel in the image based on its mean and standard deviation.\n", + "\n", + "For the `Normalization` layer, its `adapt` method would first need to be called on the training data in order to compute aggregate statistics (that is, the mean and the standard deviation)." ] }, { @@ -635,17 +599,21 @@ }, "outputs": [], "source": [ - "for spectrogram, _ in spectrogram_ds.take(1):\n", - " input_shape = spectrogram.shape\n", + "input_shape = example_spectrograms.shape[1:]\n", "print('Input shape:', input_shape)\n", - "num_labels = len(commands)\n", + "num_labels = len(label_names)\n", "\n", - "norm_layer = preprocessing.Normalization()\n", - "norm_layer.adapt(spectrogram_ds.map(lambda x, _: x))\n", + "# Instantiate the `tf.keras.layers.Normalization` layer.\n", + "norm_layer = layers.Normalization()\n", + "# Fit the state of the layer to the spectrograms\n", + "# with `Normalization.adapt`.\n", + "norm_layer.adapt(data=train_spectrogram_ds.map(map_func=lambda spec, label: spec))\n", "\n", "model = models.Sequential([\n", " layers.Input(shape=input_shape),\n", - " preprocessing.Resizing(32, 32), \n", + " # Downsample the input.\n", + " layers.Resizing(32, 32),\n", + " # Normalize.\n", " norm_layer,\n", " layers.Conv2D(32, 3, activation='relu'),\n", " layers.Conv2D(64, 3, activation='relu'),\n", @@ -660,6 +628,15 @@ "model.summary()" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "de52e5afa2f3" + }, + "source": [ + "Configure the Keras model with the Adam optimizer and the cross-entropy loss:" + ] + }, { "cell_type": "code", "execution_count": null, @@ -675,6 +652,15 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "f42b9e3a4705" + }, + "source": [ + "Train the model over 10 epochs for demonstration purposes:" + ] + }, { "cell_type": "code", "execution_count": null, @@ -685,8 +671,8 @@ "source": [ "EPOCHS = 10\n", "history = model.fit(\n", - " train_ds, \n", - " validation_data=val_ds, \n", + " train_spectrogram_ds,\n", + " validation_data=val_spectrogram_ds,\n", " epochs=EPOCHS,\n", " callbacks=tf.keras.callbacks.EarlyStopping(verbose=1, patience=2),\n", ")" @@ -698,7 +684,7 @@ "id": "gjpCDeQ4mUfS" }, "source": [ - "Let's check the training and validation loss curves to see how your model has improved during training." + "Let's plot the training and validation loss curves to check how your model has improved during training:" ] }, { @@ -710,9 +696,20 @@ "outputs": [], "source": [ "metrics = history.history\n", + "plt.figure(figsize=(16,6))\n", + "plt.subplot(1,2,1)\n", "plt.plot(history.epoch, metrics['loss'], metrics['val_loss'])\n", "plt.legend(['loss', 'val_loss'])\n", - "plt.show()" + "plt.ylim([0, max(plt.ylim())])\n", + "plt.xlabel('Epoch')\n", + "plt.ylabel('Loss [CrossEntropy]')\n", + "\n", + "plt.subplot(1,2,2)\n", + "plt.plot(history.epoch, 100*np.array(metrics['accuracy']), 100*np.array(metrics['val_accuracy']))\n", + "plt.legend(['accuracy', 'val_accuracy'])\n", + "plt.ylim([0, 100])\n", + "plt.xlabel('Epoch')\n", + "plt.ylabel('Accuracy [%]')" ] }, { @@ -721,54 +718,64 @@ "id": "5ZTt3kO3mfm4" }, "source": [ - "## Evaluate test set performance\n", + "## Evaluate the model performance\n", "\n", - "Let's run the model on the test set and check performance." + "Run the model on the test set and check the model's performance:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "biU2MwzyAo8o" + "id": "FapuRT_SsWGQ" }, "outputs": [], "source": [ - "test_audio = []\n", - "test_labels = []\n", - "\n", - "for audio, label in test_ds:\n", - " test_audio.append(audio.numpy())\n", - " test_labels.append(label.numpy())\n", + "model.evaluate(test_spectrogram_ds, return_dict=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "en9Znt1NOabH" + }, + "source": [ + "### Display a confusion matrix\n", "\n", - "test_audio = np.array(test_audio)\n", - "test_labels = np.array(test_labels)" + "Use a [confusion matrix](https://developers.google.com/machine-learning/glossary#confusion-matrix) to check how well the model did classifying each of the commands in the test set:\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "ktUanr9mRZky" + "id": "5Y6vmWWQuuT1" }, "outputs": [], "source": [ - "y_pred = np.argmax(model.predict(test_audio), axis=1)\n", - "y_true = test_labels\n", - "\n", - "test_acc = sum(y_pred == y_true) / len(y_true)\n", - "print(f'Test set accuracy: {test_acc:.0%}')" + "y_pred = model.predict(test_spectrogram_ds)" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": { - "id": "en9Znt1NOabH" + "id": "d6F0il82u7lW" }, + "outputs": [], "source": [ - "### Display a confusion matrix\n", - "\n", - "A confusion matrix is helpful to see how well the model did on each of the commands in the test set." + "y_pred = tf.argmax(y_pred, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vHSNoBYLvX81" + }, + "outputs": [], + "source": [ + "y_true = tf.concat(list(test_spectrogram_ds.map(lambda s,lab: lab)), axis=0)" ] }, { @@ -779,9 +786,11 @@ }, "outputs": [], "source": [ - "confusion_mtx = tf.math.confusion_matrix(y_true, y_pred) \n", + "confusion_mtx = tf.math.confusion_matrix(y_true, y_pred)\n", "plt.figure(figsize=(10, 8))\n", - "sns.heatmap(confusion_mtx, xticklabels=commands, yticklabels=commands, \n", + "sns.heatmap(confusion_mtx,\n", + " xticklabels=label_names,\n", + " yticklabels=label_names,\n", " annot=True, fmt='g')\n", "plt.xlabel('Prediction')\n", "plt.ylabel('Label')\n", @@ -796,7 +805,7 @@ "source": [ "## Run inference on an audio file\n", "\n", - "Finally, verify the model's prediction output using an input audio file of someone saying \"no.\" How well does your model perform?" + "Finally, verify the model's prediction output using an input audio file of someone saying \"no\". How well does your model perform?" ] }, { @@ -807,15 +816,21 @@ }, "outputs": [], "source": [ - "sample_file = data_dir/'no/01bb6a2a_nohash_0.wav'\n", + "x = data_dir/'no/01bb6a2a_nohash_0.wav'\n", + "x = tf.io.read_file(str(x))\n", + "x, sample_rate = tf.audio.decode_wav(x, desired_channels=1, desired_samples=16000,)\n", + "x = tf.squeeze(x, axis=-1)\n", + "waveform = x\n", + "x = get_spectrogram(x)\n", + "x = x[tf.newaxis,...]\n", "\n", - "sample_ds = preprocess_dataset([str(sample_file)])\n", + "prediction = model(x)\n", + "x_labels = ['no', 'yes', 'down', 'go', 'left', 'up', 'right', 'stop']\n", + "plt.bar(x_labels, tf.nn.softmax(prediction[0]))\n", + "plt.title('No')\n", + "plt.show()\n", "\n", - "for spectrogram, label in sample_ds.batch(1):\n", - " prediction = model(spectrogram)\n", - " plt.bar(commands, tf.nn.softmax(prediction[0]))\n", - " plt.title(f'Predictions for \"{commands[label[0]]}\"')\n", - " plt.show()" + "display.display(display.Audio(waveform, rate=16000))" ] }, { @@ -824,7 +839,107 @@ "id": "VgWICqdqQNaQ" }, "source": [ - "You can see that your model very clearly recognized the audio command as \"no.\"" + "As the output suggests, your model should have recognized the audio command as \"no\"." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h1icqlM3ISW0" + }, + "source": [ + "## Export the model with preprocessing" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r7HX-MjgIbji" + }, + "source": [ + "The model's not very easy to use if you have to apply those preprocessing steps before passing data to the model for inference. So build an end-to-end version:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2lIeXdWjIbDE" + }, + "outputs": [], + "source": [ + "class ExportModel(tf.Module):\n", + " def __init__(self, model):\n", + " self.model = model\n", + "\n", + " # Accept either a string-filename or a batch of waveforms.\n", + " # You could add additional signatures for a single wave, or a ragged-batch. \n", + " self.__call__.get_concrete_function(\n", + " x=tf.TensorSpec(shape=(), dtype=tf.string))\n", + " self.__call__.get_concrete_function(\n", + " x=tf.TensorSpec(shape=[None, 16000], dtype=tf.float32))\n", + "\n", + "\n", + " @tf.function\n", + " def __call__(self, x):\n", + " # If they pass a string, load the file and decode it. \n", + " if x.dtype == tf.string:\n", + " x = tf.io.read_file(x)\n", + " x, _ = tf.audio.decode_wav(x, desired_channels=1, desired_samples=16000,)\n", + " x = tf.squeeze(x, axis=-1)\n", + " x = x[tf.newaxis, :]\n", + " \n", + " x = get_spectrogram(x) \n", + " result = self.model(x, training=False)\n", + " \n", + " class_ids = tf.argmax(result, axis=-1)\n", + " class_names = tf.gather(label_names, class_ids)\n", + " return {'predictions':result,\n", + " 'class_ids': class_ids,\n", + " 'class_names': class_names}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gtZBmUiB9HGY" + }, + "source": [ + "Test run the \"export\" model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Z1_8TYaCIRue" + }, + "outputs": [], + "source": [ + "export = ExportModel(model)\n", + "export(tf.constant(str(data_dir/'no/01bb6a2a_nohash_0.wav')))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1J6Iuz829Cxo" + }, + "source": [ + "Save and reload the model, the reloaded model gives identical output:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wTAg4vsn3oEb" + }, + "outputs": [], + "source": [ + "tf.saved_model.save(export, \"saved\")\n", + "imported = tf.saved_model.load(\"saved\")\n", + "imported(waveform[tf.newaxis, :])" ] }, { @@ -835,18 +950,20 @@ "source": [ "## Next steps\n", "\n", - "This tutorial showed how you could do simple audio classification using a convolutional neural network with TensorFlow and Python.\n", - "\n", - "* To learn how to use transfer learning for audio classification, check out the [Sound classification with YAMNet](https://www.tensorflow.org/hub/tutorials/yamnet) tutorial.\n", - "\n", - "* To build your own interactive web app for audio classification, consider taking the [TensorFlow.js - Audio recognition using transfer learning codelab](https://codelabs.developers.google.com/codelabs/tensorflowjs-audio-codelab/index.html#0).\n", + "This tutorial demonstrated how to carry out simple audio classification/automatic speech recognition using a convolutional neural network with TensorFlow and Python. To learn more, consider the following resources:\n", "\n", - "* TensorFlow also has additional support for [audio data preparation and augmentation](https://www.tensorflow.org/io/tutorials/audio) to help with your own audio-based projects.\n" + "- The [Sound classification with YAMNet](https://www.tensorflow.org/hub/tutorials/yamnet) tutorial shows how to use transfer learning for audio classification.\n", + "- The notebooks from [Kaggle's TensorFlow speech recognition challenge](https://www.kaggle.com/c/tensorflow-speech-recognition-challenge/overview).\n", + "- The \n", + "[TensorFlow.js - Audio recognition using transfer learning codelab](https://codelabs.developers.google.com/codelabs/tensorflowjs-audio-codelab/index.html#0) teaches how to build your own interactive web app for audio classification.\n", + "- [A tutorial on deep learning for music information retrieval](https://arxiv.org/abs/1709.04396) (Choi et al., 2017) on arXiv.\n", + "- TensorFlow also has additional support for [audio data preparation and augmentation](https://www.tensorflow.org/io/tutorials/audio) to help with your own audio-based projects.\n", + "- Consider using the [librosa](https://librosa.org/) library for music and audio analysis." ] } ], "metadata": { - "accelerator": "GPU", + "accelerator": "CPU", "colab": { "collapsed_sections": [], "name": "simple_audio.ipynb", diff --git a/site/en/tutorials/audio/transfer_learning_audio.ipynb b/site/en/tutorials/audio/transfer_learning_audio.ipynb index 16c679aed61..160aeeb7103 100644 --- a/site/en/tutorials/audio/transfer_learning_audio.ipynb +++ b/site/en/tutorials/audio/transfer_learning_audio.ipynb @@ -99,7 +99,9 @@ }, "outputs": [], "source": [ - "!pip install tensorflow_io" + "!pip install -q \"tensorflow==2.11.*\"\n", + "# tensorflow_io 0.28 is compatible with TensorFlow 2.11\n", + "!pip install -q \"tensorflow_io==0.28.*\"" ] }, { @@ -235,7 +237,7 @@ "_ = plt.plot(testing_wav_data)\n", "\n", "# Play the audio file.\n", - "display.Audio(testing_wav_data,rate=16000)" + "display.Audio(testing_wav_data, rate=16000)" ] }, { @@ -286,7 +288,7 @@ "source": [ "scores, embeddings, spectrogram = yamnet_model(testing_wav_data)\n", "class_scores = tf.reduce_mean(scores, axis=0)\n", - "top_class = tf.argmax(class_scores)\n", + "top_class = tf.math.argmax(class_scores)\n", "inferred_class = class_names[top_class]\n", "\n", "print(f'The main sound is: {inferred_class}')\n", @@ -736,7 +738,7 @@ "outputs": [], "source": [ "reloaded_results = reloaded_model(testing_wav_data)\n", - "cat_or_dog = my_classes[tf.argmax(reloaded_results)]\n", + "cat_or_dog = my_classes[tf.math.argmax(reloaded_results)]\n", "print(f'The main sound is: {cat_or_dog}')" ] }, @@ -758,7 +760,7 @@ "outputs": [], "source": [ "serving_results = reloaded_model.signatures['serving_default'](testing_wav_data)\n", - "cat_or_dog = my_classes[tf.argmax(serving_results['classifier'])]\n", + "cat_or_dog = my_classes[tf.math.argmax(serving_results['classifier'])]\n", "print(f'The main sound is: {cat_or_dog}')\n" ] }, @@ -805,13 +807,13 @@ "# Run the model, check the output.\n", "scores, embeddings, spectrogram = yamnet_model(waveform)\n", "class_scores = tf.reduce_mean(scores, axis=0)\n", - "top_class = tf.argmax(class_scores)\n", + "top_class = tf.math.argmax(class_scores)\n", "inferred_class = class_names[top_class]\n", "top_score = class_scores[top_class]\n", "print(f'[YAMNet] The main sound is: {inferred_class} ({top_score})')\n", "\n", "reloaded_results = reloaded_model(waveform)\n", - "your_top_class = tf.argmax(reloaded_results)\n", + "your_top_class = tf.math.argmax(reloaded_results)\n", "your_inferred_class = my_classes[your_top_class]\n", "class_probabilities = tf.nn.softmax(reloaded_results, axis=-1)\n", "your_top_score = class_probabilities[your_top_class]\n", diff --git a/site/en/tutorials/customization/basics.ipynb b/site/en/tutorials/customization/basics.ipynb index 314738300e3..2df0840ad5e 100644 --- a/site/en/tutorials/customization/basics.ipynb +++ b/site/en/tutorials/customization/basics.ipynb @@ -70,10 +70,10 @@ "source": [ "This is an introductory TensorFlow tutorial that shows how to:\n", "\n", - "* Import the required package\n", - "* Create and use tensors\n", - "* Use GPU acceleration\n", - "* Demonstrate `tf.data.Dataset`" + "* Import the required package.\n", + "* Create and use tensors.\n", + "* Use GPU acceleration.\n", + "* Build a data pipeline with `tf.data.Dataset`." ] }, { @@ -84,7 +84,7 @@ "source": [ "## Import TensorFlow\n", "\n", - "To get started, import the `tensorflow` module. As of TensorFlow 2, eager execution is turned on by default. This enables a more interactive frontend to TensorFlow, the details of which we will discuss much later." + "To get started, import the `tensorflow` module. As of TensorFlow 2, eager execution is turned on by default. Eager execution enables a more interactive frontend to TensorFlow, which you will later explore in more detail." ] }, { @@ -106,7 +106,7 @@ "source": [ "## Tensors\n", "\n", - "A Tensor is a multi-dimensional array. Similar to NumPy `ndarray` objects, `tf.Tensor` objects have a data type and a shape. Additionally, `tf.Tensor`s can reside in accelerator memory (like a GPU). TensorFlow offers a rich library of operations ([tf.add](https://www.tensorflow.org/api_docs/python/tf/add), [tf.matmul](https://www.tensorflow.org/api_docs/python/tf/matmul), [tf.linalg.inv](https://www.tensorflow.org/api_docs/python/tf/linalg/inv) etc.) that consume and produce `tf.Tensor`s. These operations automatically convert native Python types, for example:\n" + "A Tensor is a multi-dimensional array. Similar to NumPy `ndarray` objects, `tf.Tensor` objects have a data type and a shape. Additionally, `tf.Tensor`s can reside in accelerator memory (like a GPU). TensorFlow offers a rich library of operations (for example, `tf.math.add`, `tf.linalg.matmul`, and `tf.linalg.inv`) that consume and produce `tf.Tensor`s. These operations automatically convert built-in Python types. For example:\n" ] }, { @@ -118,13 +118,13 @@ }, "outputs": [], "source": [ - "print(tf.add(1, 2))\n", - "print(tf.add([1, 2], [3, 4]))\n", - "print(tf.square(5))\n", - "print(tf.reduce_sum([1, 2, 3]))\n", + "print(tf.math.add(1, 2))\n", + "print(tf.math.add([1, 2], [3, 4]))\n", + "print(tf.math.square(5))\n", + "print(tf.math.reduce_sum([1, 2, 3]))\n", "\n", "# Operator overloading is also supported\n", - "print(tf.square(2) + tf.square(3))" + "print(tf.math.square(2) + tf.math.square(3))" ] }, { @@ -144,7 +144,7 @@ }, "outputs": [], "source": [ - "x = tf.matmul([[1]], [[2, 3]])\n", + "x = tf.linalg.matmul([[1]], [[2, 3]])\n", "print(x)\n", "print(x.shape)\n", "print(x.dtype)" @@ -168,9 +168,9 @@ "id": "Dwi1tdW3JBw6" }, "source": [ - "### NumPy Compatibility\n", + "### NumPy compatibility\n", "\n", - "Converting between a TensorFlow `tf.Tensor`s and a NumPy `ndarray` is easy:\n", + "Converting between a TensorFlow `tf.Tensor` and a NumPy `ndarray` is easy:\n", "\n", "* TensorFlow operations automatically convert NumPy ndarrays to Tensors.\n", "* NumPy operations automatically convert Tensors to NumPy ndarrays.\n", @@ -191,11 +191,11 @@ "ndarray = np.ones([3, 3])\n", "\n", "print(\"TensorFlow operations convert numpy arrays to Tensors automatically\")\n", - "tensor = tf.multiply(ndarray, 42)\n", + "tensor = tf.math.multiply(ndarray, 42)\n", "print(tensor)\n", "\n", "\n", - "print(\"And NumPy operations convert Tensors to numpy arrays automatically\")\n", + "print(\"And NumPy operations convert Tensors to NumPy arrays automatically\")\n", "print(np.add(tensor, 1))\n", "\n", "print(\"The .numpy() method explicitly converts a Tensor to a numpy array\")\n", @@ -210,7 +210,7 @@ "source": [ "## GPU acceleration\n", "\n", - "Many TensorFlow operations are accelerated using the GPU for computation. Without any annotations, TensorFlow automatically decides whether to use the GPU or CPU for an operation—copying the tensor between CPU and GPU memory, if necessary. Tensors produced by an operation are typically backed by the memory of the device on which the operation executed, for example:" + "Many TensorFlow operations are accelerated using the GPU for computation. Without any annotations, TensorFlow automatically decides whether to use the GPU or CPU for an operation—copying the tensor between CPU and GPU memory, if necessary. Tensors produced by an operation are typically backed by the memory of the device on which the operation executed. For example:" ] }, { @@ -237,7 +237,7 @@ "id": "vpgYzgVXW2Ud" }, "source": [ - "### Device Names\n", + "### Device names\n", "\n", "The `Tensor.device` property provides a fully qualified string name of the device hosting the contents of the tensor. This name encodes many details, such as an identifier of the network address of the host on which this program is executing and the device within that host. This is required for distributed execution of a TensorFlow program. The string ends with `GPU:` if the tensor is placed on the `N`-th GPU on the host." ] @@ -248,9 +248,11 @@ "id": "ZWZQCimzuqyP" }, "source": [ - "### Explicit Device Placement\n", + "### Explicit device placement\n", "\n", - "In TensorFlow, *placement* refers to how individual operations are assigned (placed on) a device for execution. As mentioned, when there is no explicit guidance provided, TensorFlow automatically decides which device to execute an operation and copies tensors to that device, if needed. However, TensorFlow operations can be explicitly placed on specific devices using the `tf.device` context manager, for example:" + "In TensorFlow, *placement* refers to how individual operations are assigned (placed on) a device for execution. As mentioned, when there is no explicit guidance provided, TensorFlow automatically decides which device to execute an operation and copies tensors to that device, if needed.\n", + "\n", + "However, TensorFlow operations can be explicitly placed on specific devices using the `tf.device` context manager. For example:" ] }, { @@ -266,7 +268,7 @@ "def time_matmul(x):\n", " start = time.time()\n", " for loop in range(10):\n", - " tf.matmul(x, x)\n", + " tf.linalg.matmul(x, x)\n", "\n", " result = time.time()-start\n", "\n", @@ -296,7 +298,7 @@ "source": [ "## Datasets\n", "\n", - "This section uses the [`tf.data.Dataset` API](https://www.tensorflow.org/guide/datasets) to build a pipeline for feeding data to your model. The `tf.data.Dataset` API is used to build performant, complex input pipelines from simple, re-usable pieces that will feed your model's training or evaluation loops." + "This section uses the `tf.data.Dataset` API to build a pipeline for feeding data to your model. `tf.data.Dataset` is used to build performant, complex input pipelines from simple, re-usable pieces that will feed your model's training or evaluation loops. (Refer to the [tf.data: Build TensorFlow input pipelines](../../guide/data.ipynb) guide to learn more.)" ] }, { @@ -307,7 +309,7 @@ "source": [ "### Create a source `Dataset`\n", "\n", - "Create a *source* dataset using one of the factory functions like [`Dataset.from_tensors`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices), or using objects that read from files like [`TextLineDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TextLineDataset) or [`TFRecordDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TFRecordDataset). See the [TensorFlow Dataset guide](https://www.tensorflow.org/guide/datasets#reading_input_data) for more information." + "Create a *source* dataset using one of the factory functions like `tf.data.Dataset.from_tensors`, `tf.data.Dataset.from_tensor_slices`, or using objects that read from files like `tf.data.TextLineDataset` or `tf.data.TFRecordDataset`. Refer to the _Reading input data_ section of the [tf.data: Build TensorFlow input pipelines](../../guide/data.ipynb) guide for more information." ] }, { @@ -341,7 +343,7 @@ "source": [ "### Apply transformations\n", "\n", - "Use the transformations functions like [`map`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map), [`batch`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch), and [`shuffle`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#shuffle) to apply transformations to dataset records." + "Use the transformations functions like `tf.data.Dataset.map`, `tf.data.Dataset.batch`, and `tf.data.Dataset.shuffle` to apply transformations to dataset records." ] }, { @@ -352,7 +354,7 @@ }, "outputs": [], "source": [ - "ds_tensors = ds_tensors.map(tf.square).shuffle(2).batch(2)\n", + "ds_tensors = ds_tensors.map(tf.math.square).shuffle(2).batch(2)\n", "\n", "ds_file = ds_file.batch(2)" ] diff --git a/site/en/tutorials/customization/custom_layers.ipynb b/site/en/tutorials/customization/custom_layers.ipynb index 97c0e8f8ba6..8bfe0a01b09 100644 --- a/site/en/tutorials/customization/custom_layers.ipynb +++ b/site/en/tutorials/customization/custom_layers.ipynb @@ -90,7 +90,7 @@ }, "outputs": [], "source": [ - "print(tf.test.is_gpu_available())" + "print(tf.config.list_physical_devices('GPU'))" ] }, { @@ -103,7 +103,7 @@ "\n", "Most of the time when writing code for machine learning models you want to operate at a higher level of abstraction than individual operations and manipulation of individual variables.\n", "\n", - "Many machine learning models are expressible as the composition and stacking of relatively simple layers, and TensorFlow provides both a set of many common layers as a well as easy ways for you to write your own application-specific layers either from scratch or as the composition of existing layers.\n", + "Many machine learning models are expressible as the composition and stacking of relatively simple layers, and TensorFlow provides both a set of many common layers as well as easy ways for you to write your own application-specific layers either from scratch or as the composition of existing layers.\n", "\n", "TensorFlow includes the full [Keras](https://keras.io) API in the tf.keras package, and the Keras layers are very useful when building your own models.\n" ] @@ -256,7 +256,7 @@ "\n", "Many interesting layer-like things in machine learning models are implemented by composing existing layers. For example, each residual block in a resnet is a composition of convolutions, batch normalizations, and a shortcut. Layers can be nested inside other layers.\n", "\n", - "Typically you inherit from `keras.Model` when you need the model methods like: `Model.fit`,`Model.evaluate`, and `Model.save` (see [Custom Keras layers and models](../../guide/keras/custom_layers_and_models.ipynb) for details).\n", + "Typically you inherit from `keras.Model` when you need the model methods like: `Model.fit`,`Model.evaluate`, and `Model.save` (see [Custom Keras layers and models](https://www.tensorflow.org/guide/keras/custom_layers_and_models) for details).\n", "\n", "One other feature provided by `keras.Model` (instead of `keras.layers.Layer`) is that in addition to tracking variables, a `keras.Model` also tracks its internal layers, making them easier to inspect.\n", "\n", diff --git a/site/en/tutorials/customization/custom_training_walkthrough.ipynb b/site/en/tutorials/customization/custom_training_walkthrough.ipynb index 45cc7e8c39d..9a05d864815 100644 --- a/site/en/tutorials/customization/custom_training_walkthrough.ipynb +++ b/site/en/tutorials/customization/custom_training_walkthrough.ipynb @@ -68,81 +68,20 @@ "id": "LDrzLFXE8T1l" }, "source": [ - "This guide uses machine learning to *categorize* Iris flowers by species. It uses TensorFlow to:\n", - "1. Build a model,\n", - "2. Train this model on example data, and\n", - "3. Use the model to make predictions about unknown data.\n", + "This tutorial shows you how to train a machine learning model with a custom training loop to *categorize* penguins by species. In this notebook, you use TensorFlow to accomplish the following:\n", "\n", - "## TensorFlow programming\n", - "\n", - "This guide uses these high-level TensorFlow concepts:\n", + "1. Import a dataset\n", + "2. Build a simple linear model\n", + "3. Train the model\n", + "4. Evaluate the model's effectiveness\n", + "5. Use the trained model to make predictions\n", "\n", - "* Use TensorFlow's default [eager execution](../../guide/eager.ipynb) development environment,\n", - "* Import data with the [Datasets API](../../guide/datasets.ipynb),\n", - "* Build models and layers with TensorFlow's [Keras API](../../guide/keras/overview.ipynb).\n", + "## TensorFlow programming\n", "\n", - "This tutorial is structured like many TensorFlow programs:\n", + "This tutorial demonstrates the following TensorFlow programming tasks:\n", "\n", - "1. Import and parse the dataset.\n", - "2. Select the type of model.\n", - "3. Train the model.\n", - "4. Evaluate the model's effectiveness.\n", - "5. Use the trained model to make predictions." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yNr7H-AIoLOR" - }, - "source": [ - "## Setup program" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1J3AuPBT9gyR" - }, - "source": [ - "### Configure imports\n", - "\n", - "Import TensorFlow and the other required Python modules. By default, TensorFlow uses [eager execution](../../guide/eager.ipynb) to evaluate operations immediately, returning concrete values instead of creating a computational graph that is executed later. If you are used to a REPL or the `python` interactive console, this feels familiar." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "jElLULrDhQZR" - }, - "outputs": [], - "source": [ - "import os\n", - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "bfV2Dai0Ow2o" - }, - "outputs": [], - "source": [ - "import tensorflow as tf" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "g4Wzg69bnwK2" - }, - "outputs": [], - "source": [ - "print(\"TensorFlow version: {}\".format(tf.__version__))\n", - "print(\"Eager execution: {}\".format(tf.executing_eagerly()))" + "* Importing data with the [TensorFlow Datasets API](https://www.tensorflow.org/datasets/overview#load_a_dataset)\n", + "* Building models and layers with the [Keras API](https://www.tensorflow.org/guide/keras/)\n" ] }, { @@ -151,293 +90,255 @@ "id": "Zx7wc0LuuxaJ" }, "source": [ - "## The Iris classification problem\n", + "## Penguin classification problem \n", "\n", - "Imagine you are a botanist seeking an automated way to categorize each Iris flower you find. Machine learning provides many algorithms to classify flowers statistically. For instance, a sophisticated machine learning program could classify flowers based on photographs. Our ambitions are more modest—we're going to classify Iris flowers based on the length and width measurements of their [sepals](https://en.wikipedia.org/wiki/Sepal) and [petals](https://en.wikipedia.org/wiki/Petal).\n", + "Imagine you are an ornithologist seeking an automated way to categorize each penguin you find. Machine learning provides many algorithms to classify penguins statistically. For instance, a sophisticated machine learning program could classify penguins based on photographs. The model you build in this tutorial is a little simpler. It classifies penguins based on their body weight, flipper length, and beaks, specifically the length and width measurements of their [culmen](https://en.wikipedia.org/wiki/Beak#Culmen).\n", "\n", - "The Iris genus entails about 300 species, but our program will only classify the following three:\n", + "There are 18 species of penguins, but in this tutorial you will only attempt to classify the following three:\n", "\n", - "* Iris setosa\n", - "* Iris virginica\n", - "* Iris versicolor\n", + "* Chinstrap penguins\n", + "* Gentoo penguins\n", + "* Adélie penguins\n", "\n", "\n", " \n", " \n", "
    \n", - " \"Petal\n", + " \"Illustration\n", "
    \n", - " Figure 1. Iris setosa (by Radomil, CC BY-SA 3.0), Iris versicolor, (by Dlanglois, CC BY-SA 3.0), and Iris virginica (by Frank Mayfield, CC BY-SA 2.0).
     \n", + " Figure 1. Chinstratp, Gentoo, and Adélie penguins (Artwork by @allison_horst, CC BY-SA 2.0).
     \n", "
    \n", "\n", - "Fortunately, someone has already created a [dataset of 120 Iris flowers](https://en.wikipedia.org/wiki/Iris_flower_data_set) with the sepal and petal measurements. This is a classic dataset that is popular for beginner machine learning classification problems." + "Fortunately, a research team has already created and shared a [dataset of 334 penguins](https://allisonhorst.github.io/palmerpenguins/) with body weight, flipper length, beak measurements, and other data. This dataset is also conveniently available as the [penguins](https://www.tensorflow.org/datasets/catalog/penguins) TensorFlow Dataset. " ] }, { "cell_type": "markdown", "metadata": { - "id": "3Px6KAg0Jowz" + "id": "1J3AuPBT9gyR" }, "source": [ - "## Import and parse the training dataset\n", + "## Setup\n", "\n", - "Download the dataset file and convert it into a structure that can be used by this Python program.\n", - "\n", - "### Download the dataset\n", - "\n", - "Download the training dataset file using the `tf.keras.utils.get_file` function. This returns the file path of the downloaded file:" + "Install the `tfds-nightly` package for the penguins dataset. The `tfds-nightly` package is the nightly released version of the TensorFlow Datasets (TFDS). For more information on TFDS, see [TensorFlow Datasets overview](https://www.tensorflow.org/datasets/overview)." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "J6c7uEU9rjRM" + "id": "4XXWn1eDZmET" }, "outputs": [], "source": [ - "train_dataset_url = \"https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv\"\n", - "\n", - "train_dataset_fp = tf.keras.utils.get_file(fname=os.path.basename(train_dataset_url),\n", - " origin=train_dataset_url)\n", - "\n", - "print(\"Local copy of the dataset file: {}\".format(train_dataset_fp))" + "!pip install -q tfds-nightly" ] }, { "cell_type": "markdown", "metadata": { - "id": "qnX1-aLors4S" + "id": "DtGeMicKRGzU" }, "source": [ - "### Inspect the data\n", + "Then select **Runtime > Restart Runtime** from the Colab menu to restart the Colab runtime.\n", "\n", - "This dataset, `iris_training.csv`, is a plain text file that stores tabular data formatted as comma-separated values (CSV). Use the `head -n5` command to take a peek at the first five entries:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "FQvb_JYdrpPm" - }, - "outputs": [], - "source": [ - "!head -n5 {train_dataset_fp}" + "Do not proceed with the rest of this tutorial without first restarting the runtime." ] }, { "cell_type": "markdown", "metadata": { - "id": "kQhzD6P-uBoq" + "id": "G9onjGZWZbA-" }, "source": [ - "From this view of the dataset, notice the following:\n", - "\n", - "1. The first line is a header containing information about the dataset:\n", - " * There are 120 total examples. Each example has four features and one of three possible label names.\n", - "2. Subsequent rows are data records, one *[example](https://developers.google.com/machine-learning/glossary/#example)* per line, where:\n", - " * The first four fields are *[features](https://developers.google.com/machine-learning/glossary/#feature)*: these are the characteristics of an example. Here, the fields hold float numbers representing flower measurements.\n", - " * The last column is the *[label](https://developers.google.com/machine-learning/glossary/#label)*: this is the value we want to predict. For this dataset, it's an integer value of 0, 1, or 2 that corresponds to a flower name.\n", - "\n", - "Let's write that out in code:" + "Import TensorFlow and the other required Python modules. " ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "9Edhevw7exl6" + "id": "jElLULrDhQZR" }, "outputs": [], "source": [ - "# column order in CSV file\n", - "column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']\n", - "\n", - "feature_names = column_names[:-1]\n", - "label_name = column_names[-1]\n", + "import os\n", + "import tensorflow as tf\n", + "import tensorflow_datasets as tfds\n", + "import matplotlib.pyplot as plt\n", "\n", - "print(\"Features: {}\".format(feature_names))\n", - "print(\"Label: {}\".format(label_name))" + "print(\"TensorFlow version: {}\".format(tf.__version__))\n", + "print(\"TensorFlow Datasets version: \",tfds.__version__)" ] }, { "cell_type": "markdown", "metadata": { - "id": "CCtwLoJhhDNc" + "id": "3Px6KAg0Jowz" }, "source": [ - "Each label is associated with string name (for example, \"setosa\"), but machine learning typically relies on numeric values. The label numbers are mapped to a named representation, such as:\n", - "\n", - "* `0`: Iris setosa\n", - "* `1`: Iris versicolor\n", - "* `2`: Iris virginica\n", + "## Import the dataset\n", "\n", - "For more information about features and labels, see the [ML Terminology section of the Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/framing/ml-terminology)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "sVNlJlUOhkoX" - }, - "outputs": [], - "source": [ - "class_names = ['Iris setosa', 'Iris versicolor', 'Iris virginica']" + "The default [penguins/processed](https://www.tensorflow.org/datasets/catalog/penguins) TensorFlow Dataset is already cleaned, normalized, and ready for building a model. Before you download the processed data, preview a simplified version to get familiar with the original penguin survey data.\n" ] }, { "cell_type": "markdown", "metadata": { - "id": "dqPkQExM2Pwt" + "id": "qnX1-aLors4S" }, "source": [ - "### Create a `tf.data.Dataset`\n", + "### Preview the data\n", "\n", - "TensorFlow's [Dataset API](../../guide/data.ipynb) handles many common cases for loading data into a model. This is a high-level API for reading data and transforming it into a form used for training.\n", - "\n", - "\n", - "Since the dataset is a CSV-formatted text file, use the `tf.data.experimental.make_csv_dataset` function to parse the data into a suitable format. Since this function generates data for training models, the default behavior is to shuffle the data (`shuffle=True, shuffle_buffer_size=10000`), and repeat the dataset forever (`num_epochs=None`). We also set the [batch_size](https://developers.google.com/machine-learning/glossary/#batch_size) parameter:" + "Download the simplified version of the penguins dataset (`penguins/simple`) using the TensorFlow Datasets [`tfds.load`](https://www.tensorflow.org/datasets/api_docs/python/tfds/load) method. There are 344 data records in this dataset. Extract the first five records into a [`DataFrame`](https://www.tensorflow.org/datasets/api_docs/python/tfds/as_dataframe) object to inspect a sample of the values in this dataset:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "WsxHnz1ebJ2S" + "id": "FQvb_JYdrpPm" }, "outputs": [], "source": [ - "batch_size = 32\n", - "\n", - "train_dataset = tf.data.experimental.make_csv_dataset(\n", - " train_dataset_fp,\n", - " batch_size,\n", - " column_names=column_names,\n", - " label_name=label_name,\n", - " num_epochs=1)" + "ds_preview, info = tfds.load('penguins/simple', split='train', with_info=True)\n", + "df = tfds.as_dataframe(ds_preview.take(5), info)\n", + "print(df)\n", + "print(info.features)" ] }, { "cell_type": "markdown", "metadata": { - "id": "gB_RSn62c-3G" + "id": "kQhzD6P-uBoq" }, "source": [ - "The `make_csv_dataset` function returns a `tf.data.Dataset` of `(features, label)` pairs, where `features` is a dictionary: `{'feature_name': value}`\n", - "\n", - "These `Dataset` objects are iterable. Let's look at a batch of features:" + "The numbered rows are data records, one _[example](https://developers.google.com/machine-learning/glossary/#example)_ per line, where:\n", + " * The first six fields are _[features](https://developers.google.com/machine-learning/glossary/#feature)_: these are the characteristics of an example. Here, the fields hold numbers representing penguin measurements.\n", + " * The last column is the _[label](https://developers.google.com/machine-learning/glossary/#label)_: this is the value you want to predict. For this dataset, it's an integer value of 0, 1, or 2 that corresponds to a penguin species name." ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": { - "id": "iDuG94H-C122" + "id": "CCtwLoJhhDNc" }, - "outputs": [], "source": [ - "features, labels = next(iter(train_dataset))\n", + "In the dataset, the label for the penguin species is represented as a number to make it easier to work with in the model you are building. These numbers correspond to the following penguin species:\n", + "\n", + "* `0`: Adélie penguin\n", + "* `1`: Chinstrap penguin\n", + "* `2`: Gentoo penguin\n", "\n", - "print(features)" + "Create a list containing the penguin species names in this order. You will use this list to interpret the output of the classification model:" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": { - "id": "E63mArnQaAGz" + "id": "sVNlJlUOhkoX" }, + "outputs": [], "source": [ - "Notice that like-features are grouped together, or *batched*. Each example row's fields are appended to the corresponding feature array. Change the `batch_size` to set the number of examples stored in these feature arrays.\n", - "\n", - "You can start to see some clusters by plotting a few features from the batch:" + "class_names = ['Adélie', 'Chinstrap', 'Gentoo']" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": { - "id": "me5Wn-9FcyyO" + "id": "iav9kEgxpY0s" }, - "outputs": [], "source": [ - "plt.scatter(features['petal_length'],\n", - " features['sepal_length'],\n", - " c=labels,\n", - " cmap='viridis')\n", - "\n", - "plt.xlabel(\"Petal length\")\n", - "plt.ylabel(\"Sepal length\")\n", - "plt.show()" + "For more information about features and labels, refer to the [ML Terminology section of the Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/framing/ml-terminology)." ] }, { "cell_type": "markdown", "metadata": { - "id": "YlxpSyHlhT6M" + "id": "PD33PxSmCrtL" }, "source": [ - "To simplify the model building step, create a function to repackage the features dictionary into a single array with shape: `(batch_size, num_features)`.\n", + "### Download the preprocessed dataset\n", "\n", - "This function uses the `tf.stack` method which takes values from a list of tensors and creates a combined tensor at the specified dimension:" + "Now, download the preprocessed penguins dataset (`penguins/processed`) with the `tfds.load` method, which returns a list of `tf.data.Dataset` objects. Note that the `penguins/processed` dataset doesn't come with its own test set, so use an 80:20 split to [slice the full dataset](https://www.tensorflow.org/datasets/splits) into the training and test sets. You will use the test dataset later to verify your model." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "jm932WINcaGU" + "id": "EVV96zIYYAi8" }, "outputs": [], "source": [ - "def pack_features_vector(features, labels):\n", - " \"\"\"Pack the features into a single array.\"\"\"\n", - " features = tf.stack(list(features.values()), axis=1)\n", - " return features, labels" + "ds_split, info = tfds.load(\"penguins/processed\", split=['train[:20%]', 'train[20%:]'], as_supervised=True, with_info=True)\n", + "\n", + "ds_test = ds_split[0]\n", + "ds_train = ds_split[1]\n", + "assert isinstance(ds_test, tf.data.Dataset)\n", + "\n", + "print(info.features)\n", + "df_test = tfds.as_dataframe(ds_test.take(5), info)\n", + "print(\"Test dataset sample: \")\n", + "print(df_test)\n", + "\n", + "df_train = tfds.as_dataframe(ds_train.take(5), info)\n", + "print(\"Train dataset sample: \")\n", + "print(df_train)\n", + "\n", + "ds_train_batch = ds_train.batch(32)" ] }, { "cell_type": "markdown", "metadata": { - "id": "V1Vuph_eDl8x" + "id": "xX2NfLyQOK1y" }, "source": [ - "Then use the `tf.data.Dataset#map` method to pack the `features` of each `(features,label)` pair into the training dataset:" + "Notice that this version of the dataset has been processed by reducing the data down to four normalized features and a species label. In this format, the data can be quickly used to train a model without further processing." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "ZbDkzGZIkpXf" + "id": "iDuG94H-C122" }, "outputs": [], "source": [ - "train_dataset = train_dataset.map(pack_features_vector)" + "features, labels = next(iter(ds_train_batch))\n", + "\n", + "print(features)\n", + "print(labels)" ] }, { "cell_type": "markdown", "metadata": { - "id": "NLy0Q1xCldVO" + "id": "E63mArnQaAGz" }, "source": [ - "The features element of the `Dataset` are now arrays with shape `(batch_size, num_features)`. Let's look at the first few examples:" + "You can visualize some clusters by plotting a few features from the batch:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "kex9ibEek6Tr" + "id": "me5Wn-9FcyyO" }, "outputs": [], "source": [ - "features, labels = next(iter(train_dataset))\n", + "plt.scatter(features[:,0],\n", + " features[:,2],\n", + " c=labels,\n", + " cmap='viridis')\n", "\n", - "print(features[:5])" + "plt.xlabel(\"Body Mass\")\n", + "plt.ylabel(\"Culmen Length\")\n", + "plt.show()" ] }, { @@ -446,29 +347,31 @@ "id": "LsaVrtNM3Tx5" }, "source": [ - "## Select the type of model\n", + "## Build a simple linear model\n", "\n", "### Why model?\n", "\n", - "A *[model](https://developers.google.com/machine-learning/crash-course/glossary#model)* is a relationship between features and the label. For the Iris classification problem, the model defines the relationship between the sepal and petal measurements and the predicted Iris species. Some simple models can be described with a few lines of algebra, but complex machine learning models have a large number of parameters that are difficult to summarize.\n", + "A *[model](https://developers.google.com/machine-learning/crash-course/glossary#model)* is a relationship between features and the label. For the penguin classification problem, the model defines the relationship between the body mass, flipper and culmen measurements and the predicted penguin species. Some simple models can be described with a few lines of algebra, but complex machine learning models have a large number of parameters that are difficult to summarize.\n", "\n", - "Could you determine the relationship between the four features and the Iris species *without* using machine learning? That is, could you use traditional programming techniques (for example, a lot of conditional statements) to create a model? Perhaps—if you analyzed the dataset long enough to determine the relationships between petal and sepal measurements to a particular species. And this becomes difficult—maybe impossible—on more complicated datasets. A good machine learning approach *determines the model for you*. If you feed enough representative examples into the right machine learning model type, the program will figure out the relationships for you.\n", + "Could you determine the relationship between the four features and the penguin species *without* using machine learning? That is, could you use traditional programming techniques (for example, a lot of conditional statements) to create a model? Perhaps—if you analyzed the dataset long enough to determine the relationships between body mass and culmen measurements to a particular species. And this becomes difficult—maybe impossible—on more complicated datasets. A good machine learning approach *determines the model for you*. If you feed enough representative examples into the right machine learning model type, the program figures out the relationships for you.\n", "\n", "### Select the model\n", "\n", - "We need to select the kind of model to train. There are many types of models and picking a good one takes experience. This tutorial uses a neural network to solve the Iris classification problem. *[Neural networks](https://developers.google.com/machine-learning/glossary/#neural_network)* can find complex relationships between features and the label. It is a highly-structured graph, organized into one or more *[hidden layers](https://developers.google.com/machine-learning/glossary/#hidden_layer)*. Each hidden layer consists of one or more *[neurons](https://developers.google.com/machine-learning/glossary/#neuron)*. There are several categories of neural networks and this program uses a dense, or *[fully-connected neural network](https://developers.google.com/machine-learning/glossary/#fully_connected_layer)*: the neurons in one layer receive input connections from *every* neuron in the previous layer. For example, Figure 2 illustrates a dense neural network consisting of an input layer, two hidden layers, and an output layer:\n", + "Next you need to select the kind of model to train. There are many types of models and picking a good one takes experience. This tutorial uses a neural network to solve the penguin classification problem. [*Neural networks*](https://developers.google.com/machine-learning/glossary/#neural_network) can find complex relationships between features and the label. It is a highly-structured graph, organized into one or more [*hidden layers*](https://developers.google.com/machine-learning/glossary/#hidden_layer). Each hidden layer consists of one or more [*neurons*](https://developers.google.com/machine-learning/glossary/#neuron). There are several categories of neural networks and this program uses a dense, or [*fully-connected neural network*](https://developers.google.com/machine-learning/glossary/#fully_connected_layer): the neurons in one layer receive input connections from *every* neuron in the previous layer. For example, Figure 2 illustrates a dense neural network consisting of an input layer, two hidden layers, and an output layer:\n", + "\n", + "\n", "\n", "\n", " \n", " \n", "
    \n", - " \n", + " \n", "
    \n", " Figure 2. A neural network with features, hidden layers, and predictions.
     \n", "
    \n", "\n", - "When the model from Figure 2 is trained and fed an unlabeled example, it yields three predictions: the likelihood that this flower is the given Iris species. This prediction is called *[inference](https://developers.google.com/machine-learning/crash-course/glossary#inference)*. For this example, the sum of the output predictions is 1.0. In Figure 2, this prediction breaks down as: `0.02` for *Iris setosa*, `0.95` for *Iris versicolor*, and `0.03` for *Iris virginica*. This means that the model predicts—with 95% probability—that an unlabeled example flower is an *Iris versicolor*." + "When you train the model from Figure 2 and feed it an unlabeled example, it yields three predictions: the likelihood that this penguin is the given penguin species. This prediction is called [*inference*](https://developers.google.com/machine-learning/crash-course/glossary#inference). For this example, the sum of the output predictions is 1.0. In Figure 2, this prediction breaks down as: `0.02` for *Adelie*, `0.95` for *Chinstrap*, and `0.03` for *Gentoo* species. This means that the model predicts—with 95% probability—that an unlabeled example penguin is a *Chinstrap* penguin." ] }, { @@ -481,7 +384,7 @@ "\n", "The TensorFlow `tf.keras` API is the preferred way to create models and layers. This makes it easy to build models and experiment while Keras handles the complexity of connecting everything together.\n", "\n", - "The `tf.keras.Sequential` model is a linear stack of layers. Its constructor takes a list of layer instances, in this case, two `tf.keras.layers.Dense` layers with 10 nodes each, and an output layer with 3 nodes representing our label predictions. The first layer's `input_shape` parameter corresponds to the number of features from the dataset, and is required:" + "The `tf.keras.Sequential` model is a linear stack of layers. Its constructor takes a list of layer instances, in this case, two `tf.keras.layers.Dense` layers with 10 nodes each, and an output layer with 3 nodes representing your label predictions. The first layer's `input_shape` parameter corresponds to the number of features from the dataset, and is required:" ] }, { @@ -505,7 +408,7 @@ "id": "FHcbEzMpxbHL" }, "source": [ - "The *[activation function](https://developers.google.com/machine-learning/crash-course/glossary#activation_function)* determines the output shape of each node in the layer. These non-linearities are important—without them the model would be equivalent to a single layer. There are many `tf.keras.activations`, but [ReLU](https://developers.google.com/machine-learning/crash-course/glossary#ReLU) is common for hidden layers.\n", + "The [*activation function*](https://developers.google.com/machine-learning/crash-course/glossary#activation_function) determines the output shape of each node in the layer. These non-linearities are important—without them the model would be equivalent to a single layer. There are many `tf.keras.activations`, but [ReLU](https://developers.google.com/machine-learning/crash-course/glossary#ReLU) is common for hidden layers.\n", "\n", "The ideal number of hidden layers and neurons depends on the problem and the dataset. Like many aspects of machine learning, picking the best shape of the neural network requires a mixture of knowledge and experimentation. As a rule of thumb, increasing the number of hidden layers and neurons typically creates a more powerful model, which requires more data to train effectively." ] @@ -516,7 +419,7 @@ "id": "2wFKnhWCpDSS" }, "source": [ - "### Using the model\n", + "### Use the model\n", "\n", "Let's have a quick look at what this model does to a batch of features:" ] @@ -561,7 +464,7 @@ "id": "uRZmchElo481" }, "source": [ - "Taking the `tf.argmax` across classes gives us the predicted class index. But, the model hasn't been trained yet, so these aren't good predictions:" + "Taking the `tf.math.argmax` across classes gives us the predicted class index. But, the model hasn't been trained yet, so these aren't good predictions:" ] }, { @@ -572,7 +475,7 @@ }, "outputs": [], "source": [ - "print(\"Prediction: {}\".format(tf.argmax(predictions, axis=1)))\n", + "print(\"Prediction: {}\".format(tf.math.argmax(predictions, axis=1)))\n", "print(\" Labels: {}\".format(labels))" ] }, @@ -584,9 +487,9 @@ "source": [ "## Train the model\n", "\n", - "*[Training](https://developers.google.com/machine-learning/crash-course/glossary#training)* is the stage of machine learning when the model is gradually optimized, or the model *learns* the dataset. The goal is to learn enough about the structure of the training dataset to make predictions about unseen data. If you learn *too much* about the training dataset, then the predictions only work for the data it has seen and will not be generalizable. This problem is called *[overfitting](https://developers.google.com/machine-learning/crash-course/glossary#overfitting)*—it's like memorizing the answers instead of understanding how to solve a problem.\n", + "[*Training*](https://developers.google.com/machine-learning/crash-course/glossary#training) is the stage of machine learning when the model is gradually optimized, or the model *learns* the dataset. The goal is to learn enough about the structure of the training dataset to make predictions about unseen data. If you learn *too much* about the training dataset, then the predictions only work for the data it has seen and will not be generalizable. This problem is called [*overfitting*](https://developers.google.com/machine-learning/crash-course/glossary#overfitting)—it's like memorizing the answers instead of understanding how to solve a problem.\n", "\n", - "The Iris classification problem is an example of *[supervised machine learning](https://developers.google.com/machine-learning/glossary/#supervised_machine_learning)*: the model is trained from examples that contain labels. In *[unsupervised machine learning](https://developers.google.com/machine-learning/glossary/#unsupervised_machine_learning)*, the examples don't contain labels. Instead, the model typically finds patterns among the features." + "The penguin classification problem is an example of [*supervised machine learning*](https://developers.google.com/machine-learning/glossary/#supervised_machine_learning): the model is trained from examples that contain labels. In [*unsupervised machine learning*](https://developers.google.com/machine-learning/glossary/#unsupervised_machine_learning), the examples don't contain labels. Instead, the model typically finds patterns among the features." ] }, { @@ -595,11 +498,11 @@ "id": "RaKp8aEjKX6B" }, "source": [ - "### Define the loss and gradient function\n", + "### Define the loss and gradients function\n", "\n", - "Both training and evaluation stages need to calculate the model's *[loss](https://developers.google.com/machine-learning/crash-course/glossary#loss)*. This measures how off a model's predictions are from the desired label, in other words, how bad the model is performing. We want to minimize, or optimize, this value.\n", + "Both training and evaluation stages need to calculate the model's [*loss*](https://developers.google.com/machine-learning/crash-course/glossary#loss). This measures how off a model's predictions are from the desired label, in other words, how bad the model is performing. You want to minimize, or optimize, this value.\n", "\n", - "Our model will calculate its loss using the `tf.keras.losses.SparseCategoricalCrossentropy` function which takes the model's class probability predictions and the desired label, and returns the average loss across the examples." + "Your model will calculate its loss using the `tf.keras.losses.SparseCategoricalCrossentropy` function which takes the model's class probability predictions and the desired label, and returns the average loss across the examples." ] }, { @@ -628,7 +531,6 @@ "\n", " return loss_object(y_true=y, y_pred=y_)\n", "\n", - "\n", "l = loss(model, features, labels, training=False)\n", "print(\"Loss test: {}\".format(l))" ] @@ -639,7 +541,7 @@ "id": "3IcPqA24QM6B" }, "source": [ - "Use the `tf.GradientTape` context to calculate the *[gradients](https://developers.google.com/machine-learning/crash-course/glossary#gradient)* used to optimize your model:" + "Use the `tf.GradientTape` context to calculate the [*gradients*](https://developers.google.com/machine-learning/crash-course/glossary#gradient) used to optimize your model:" ] }, { @@ -664,7 +566,7 @@ "source": [ "### Create an optimizer\n", "\n", - "An *[optimizer](https://developers.google.com/machine-learning/crash-course/glossary#optimizer)* applies the computed gradients to the model's variables to minimize the `loss` function. You can think of the loss function as a curved surface (see Figure 3) and we want to find its lowest point by walking around. The gradients point in the direction of steepest ascent—so we'll travel the opposite way and move down the hill. By iteratively calculating the loss and gradient for each batch, we'll adjust the model during training. Gradually, the model will find the best combination of weights and bias to minimize loss. And the lower the loss, the better the model's predictions.\n", + "An [*optimizer*](https://developers.google.com/machine-learning/crash-course/glossary#optimizer) applies the computed gradients to the model's parameters to minimize the `loss` function. You can think of the loss function as a curved surface (refer to Figure 3) and you want to find its lowest point by walking around. The gradients point in the direction of steepest ascent—so you'll travel the opposite way and move down the hill. By iteratively calculating the loss and gradient for each batch, you'll adjust the model during training. Gradually, the model will find the best combination of weights and bias to minimize the loss. And the lower the loss, the better the model's predictions.\n", "\n", "\n", " \n", "
    \n", @@ -676,7 +578,7 @@ "
    \n", "\n", - "TensorFlow has many optimization algorithms available for training. This model uses the `tf.keras.optimizers.SGD` that implements the *[stochastic gradient descent](https://developers.google.com/machine-learning/crash-course/glossary#gradient_descent)* (SGD) algorithm. The `learning_rate` sets the step size to take for each iteration down the hill. This is a *hyperparameter* that you'll commonly adjust to achieve better results." + "TensorFlow has many optimization algorithms available for training. In this tutorial, you will use the `tf.keras.optimizers.SGD` that implements the [*stochastic gradient descent*](https://developers.google.com/machine-learning/crash-course/glossary#gradient_descent) (SGD) algorithm. The `learning_rate` parameter sets the step size to take for each iteration down the hill. This rate is a [*hyperparameter*](https://developers.google.com/machine-learning/glossary/#hyperparameter) that you'll commonly adjust to achieve better results." ] }, { @@ -685,7 +587,7 @@ "id": "XkUd6UiZa_dF" }, "source": [ - "Let's setup the optimizer:" + "Instantiate the optimizer with a [*learning rate*](https://developers.google.com/machine-learning/glossary#learning-rate) of `0.01`, a scalar value that is multiplied by the gradient at each iteration of the training:" ] }, { @@ -705,7 +607,7 @@ "id": "pJVRZ0hP52ZB" }, "source": [ - "We'll use this to calculate a single optimization step:" + "Then use this object to calculate a single optimization step:" ] }, { @@ -740,11 +642,11 @@ "1. Iterate each *epoch*. An epoch is one pass through the dataset.\n", "2. Within an epoch, iterate over each example in the training `Dataset` grabbing its *features* (`x`) and *label* (`y`).\n", "3. Using the example's features, make a prediction and compare it with the label. Measure the inaccuracy of the prediction and use that to calculate the model's loss and gradients.\n", - "4. Use an `optimizer` to update the model's variables.\n", + "4. Use an `optimizer` to update the model's parameters.\n", "5. Keep track of some stats for visualization.\n", "6. Repeat for each epoch.\n", "\n", - "The `num_epochs` variable is the number of times to loop over the dataset collection. Counter-intuitively, training a model longer does not guarantee a better model. `num_epochs` is a *[hyperparameter](https://developers.google.com/machine-learning/glossary/#hyperparameter)* that you can tune. Choosing the right number usually requires both experience and experimentation:" + "The `num_epochs` variable is the number of times to loop over the dataset collection. In the code below, `num_epochs` is set to 201 which means this training loop will run 201 times. Counter-intuitively, training a model longer does not guarantee a better model. `num_epochs` is a [*hyperparameter*](https://developers.google.com/machine-learning/glossary/#hyperparameter) that you can tune. Choosing the right number usually requires both experience and experimentation:" ] }, { @@ -755,7 +657,7 @@ }, "outputs": [], "source": [ - "## Note: Rerunning this cell uses the same model variables\n", + "## Note: Rerunning this cell uses the same model parameters\n", "\n", "# Keep results for plotting\n", "train_loss_results = []\n", @@ -768,7 +670,7 @@ " epoch_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()\n", "\n", " # Training loop - using batches of 32\n", - " for x, y in train_dataset:\n", + " for x, y in ds_train_batch:\n", " # Optimize the model\n", " loss_value, grads = grad(model, x, y)\n", " optimizer.apply_gradients(zip(grads, model.trainable_variables))\n", @@ -790,6 +692,15 @@ " epoch_accuracy.result()))" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "Diep-ROEuKyl" + }, + "source": [ + "Alternatively, you could use the built-in Keras [`Model.fit(ds_train_batch)`](https://www.tensorflow.org/api_docs/python/tf/keras/Model#fit) method to train your model. " + ] + }, { "cell_type": "markdown", "metadata": { @@ -805,9 +716,9 @@ "id": "j3wdbmtLVTyr" }, "source": [ - "While it's helpful to print out the model's training progress, it's often *more* helpful to see this progress. [TensorBoard](https://www.tensorflow.org/tensorboard) is a nice visualization tool that is packaged with TensorFlow, but we can create basic charts using the `matplotlib` module.\n", + "While it's helpful to print out the model's training progress, you can visualize the progress with [TensorBoard](https://www.tensorflow.org/tensorboard) - a visualization and metrics tool that is packaged with TensorFlow. For this simple example, you will create basic charts using the `matplotlib` module.\n", "\n", - "Interpreting these charts takes some experience, but you really want to see the *loss* go down and the *accuracy* go up:" + "Interpreting these charts takes some experience, but in general you want to see the *loss* decrease and the *accuracy* increase:" ] }, { @@ -838,9 +749,9 @@ "source": [ "## Evaluate the model's effectiveness\n", "\n", - "Now that the model is trained, we can get some statistics on its performance.\n", + "Now that the model is trained, you can get some statistics on its performance.\n", "\n", - "*Evaluating* means determining how effectively the model makes predictions. To determine the model's effectiveness at Iris classification, pass some sepal and petal measurements to the model and ask the model to predict what Iris species they represent. Then compare the model's predictions against the actual label. For example, a model that picked the correct species on half the input examples has an *[accuracy](https://developers.google.com/machine-learning/glossary/#accuracy)* of `0.5`. Figure 4 shows a slightly more effective model, getting 4 out of 5 predictions correct at 80% accuracy:\n", + "*Evaluating* means determining how effectively the model makes predictions. To determine the model's effectiveness at penguin classification, pass some measurements to the model and ask the model to predict what penguin species they represent. Then compare the model's predictions against the actual label. For example, a model that picked the correct species on half the input examples has an [*accuracy*](https://developers.google.com/machine-learning/glossary/#accuracy) of `0.5`. Figure 4 shows a slightly more effective model, getting 4 out of 5 predictions correct at 80% accuracy:\n", "\n", "\n", " \n", @@ -869,7 +780,7 @@ " \n", " \n", " \n", "
    5.52.54.01.311
    \n", - " Figure 4. An Iris classifier that is 80% accurate.
     \n", + " Figure 4. A penguin classifier that is 80% accurate.
     \n", "
    " ] @@ -880,44 +791,11 @@ "id": "z-EvK7hGL0d8" }, "source": [ - "### Setup the test dataset\n", + "### Set up the test set\n", "\n", "Evaluating the model is similar to training the model. The biggest difference is the examples come from a separate *[test set](https://developers.google.com/machine-learning/crash-course/glossary#test_set)* rather than the training set. To fairly assess a model's effectiveness, the examples used to evaluate a model must be different from the examples used to train the model.\n", "\n", - "The setup for the test `Dataset` is similar to the setup for training `Dataset`. Download the CSV text file and parse that values, then give it a little shuffle:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Ps3_9dJ3Lodk" - }, - "outputs": [], - "source": [ - "test_url = \"https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv\"\n", - "\n", - "test_fp = tf.keras.utils.get_file(fname=os.path.basename(test_url),\n", - " origin=test_url)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "SRMWCu30bnxH" - }, - "outputs": [], - "source": [ - "test_dataset = tf.data.experimental.make_csv_dataset(\n", - " test_fp,\n", - " batch_size,\n", - " column_names=column_names,\n", - " label_name='species',\n", - " num_epochs=1,\n", - " shuffle=False)\n", - "\n", - "test_dataset = test_dataset.map(pack_features_vector)" + "The penguin dataset doesn't have a separate test dataset so in the previous Download the dataset section, you split the original dataset into test and train datasets. Use the `ds_test_batch` dataset for the evaluation." ] }, { @@ -928,7 +806,7 @@ "source": [ "### Evaluate the model on the test dataset\n", "\n", - "Unlike the training stage, the model only evaluates a single [epoch](https://developers.google.com/machine-learning/glossary/#epoch) of the test data. In the following code cell, we iterate over each example in the test set and compare the model's prediction against the actual label. This is used to measure the model's accuracy across the entire test set:" + "Unlike the training stage, the model only evaluates a single [epoch](https://developers.google.com/machine-learning/glossary/#epoch) of the test data. The following code iterates over each example in the test set and compare the model's prediction against the actual label. This comparison is used to measure the model's accuracy across the entire test set:" ] }, { @@ -940,24 +818,34 @@ "outputs": [], "source": [ "test_accuracy = tf.keras.metrics.Accuracy()\n", + "ds_test_batch = ds_test.batch(10)\n", "\n", - "for (x, y) in test_dataset:\n", + "for (x, y) in ds_test_batch:\n", " # training=False is needed only if there are layers with different\n", " # behavior during training versus inference (e.g. Dropout).\n", " logits = model(x, training=False)\n", - " prediction = tf.argmax(logits, axis=1, output_type=tf.int32)\n", + " prediction = tf.math.argmax(logits, axis=1, output_type=tf.int64)\n", " test_accuracy(prediction, y)\n", "\n", "print(\"Test set accuracy: {:.3%}\".format(test_accuracy.result()))" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "Fel8ql2qzGlK" + }, + "source": [ + "You can also use the `model.evaluate(ds_test, return_dict=True)` keras function to get accuracy information on your test dataset. " + ] + }, { "cell_type": "markdown", "metadata": { "id": "HcKEZMtCOeK-" }, "source": [ - "We can see on the last batch, for example, the model is usually correct:" + "By inspecting the last batch, for example, you can observe that the model predictions are usually correct.\n" ] }, { @@ -979,13 +867,13 @@ "source": [ "## Use the trained model to make predictions\n", "\n", - "We've trained a model and \"proven\" that it's good—but not perfect—at classifying Iris species. Now let's use the trained model to make some predictions on [unlabeled examples](https://developers.google.com/machine-learning/glossary/#unlabeled_example); that is, on examples that contain features but not a label.\n", + "You've trained a model and \"proven\" that it's good—but not perfect—at classifying penguin species. Now let's use the trained model to make some predictions on [*unlabeled examples*](https://developers.google.com/machine-learning/glossary/#unlabeled_example); that is, on examples that contain features but not labels.\n", "\n", - "In real-life, the unlabeled examples could come from lots of different sources including apps, CSV files, and data feeds. For now, we're going to manually provide three unlabeled examples to predict their labels. Recall, the label numbers are mapped to a named representation as:\n", + "In real-life, the unlabeled examples could come from lots of different sources including apps, CSV files, and data feeds. For this tutorial, manually provide three unlabeled examples to predict their labels. Recall, the label numbers are mapped to a named representation as:\n", "\n", - "* `0`: Iris setosa\n", - "* `1`: Iris versicolor\n", - "* `2`: Iris virginica" + "* `0`: Adélie penguin\n", + "* `1`: Chinstrap penguin\n", + "* `2`: Gentoo penguin" ] }, { @@ -997,9 +885,9 @@ "outputs": [], "source": [ "predict_dataset = tf.convert_to_tensor([\n", - " [5.1, 3.3, 1.7, 0.5,],\n", - " [5.9, 3.0, 4.2, 1.5,],\n", - " [6.9, 3.1, 5.4, 2.1]\n", + " [0.3, 0.8, 0.4, 0.5,],\n", + " [0.4, 0.1, 0.8, 0.5,],\n", + " [0.7, 0.9, 0.8, 0.4]\n", "])\n", "\n", "# training=False is needed only if there are layers with different\n", @@ -1007,7 +895,7 @@ "predictions = model(predict_dataset, training=False)\n", "\n", "for i, logits in enumerate(predictions):\n", - " class_idx = tf.argmax(logits).numpy()\n", + " class_idx = tf.math.argmax(logits).numpy()\n", " p = tf.nn.softmax(logits)[class_idx]\n", " name = class_names[class_idx]\n", " print(\"Example {} prediction: {} ({:4.1f}%)\".format(i, name, 100*p))" diff --git a/site/en/tutorials/customization/images/full_network_penguin.png b/site/en/tutorials/customization/images/full_network_penguin.png new file mode 100644 index 00000000000..3fb940bd8bf Binary files /dev/null and b/site/en/tutorials/customization/images/full_network_penguin.png differ diff --git a/site/en/tutorials/customization/images/penguins_ds_species.png b/site/en/tutorials/customization/images/penguins_ds_species.png new file mode 100644 index 00000000000..736ae89b686 Binary files /dev/null and b/site/en/tutorials/customization/images/penguins_ds_species.png differ diff --git a/site/en/tutorials/distribute/custom_training.ipynb b/site/en/tutorials/distribute/custom_training.ipynb index da45c340b1a..d14b0ac003c 100644 --- a/site/en/tutorials/distribute/custom_training.ipynb +++ b/site/en/tutorials/distribute/custom_training.ipynb @@ -68,9 +68,9 @@ "id": "FbVhjPpzn6BM" }, "source": [ - "This tutorial demonstrates how to use [`tf.distribute.Strategy`](https://www.tensorflow.org/guide/distributed_training) with custom training loops. We will train a simple CNN model on the fashion MNIST dataset. The fashion MNIST dataset contains 60000 train images of size 28 x 28 and 10000 test images of size 28 x 28.\n", + "This tutorial demonstrates how to use `tf.distribute.Strategy`—a TensorFlow API that provides an abstraction for [distributing your training](../../guide/distributed_training.ipynb) across multiple processing units (GPUs, multiple machines, or TPUs)—with custom training loops. In this example, you will train a simple convolutional neural network on the [Fashion MNIST dataset](https://github.com/zalandoresearch/fashion-mnist) containing 70,000 images of size 28 x 28.\n", "\n", - "We are using custom training loops to train our model because they give us flexibility and a greater control on training. Moreover, it is easier to debug the model and the training loop." + "[Custom training loops](../customization/custom_training_walkthrough.ipynb) provide flexibility and a greater control on training. They also make it easier to debug the model and the training loop." ] }, { @@ -97,7 +97,7 @@ "id": "MM6W__qraV55" }, "source": [ - "## Download the fashion MNIST dataset" + "## Download the Fashion MNIST dataset" ] }, { @@ -112,14 +112,14 @@ "\n", "(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()\n", "\n", - "# Adding a dimension to the array -> new shape == (28, 28, 1)\n", - "# We are doing this because the first layer in our model is a convolutional\n", + "# Add a dimension to the array -> new shape == (28, 28, 1)\n", + "# This is done because the first layer in our model is a convolutional\n", "# layer and it requires a 4D input (batch_size, height, width, channels).\n", "# batch_size dimension will be added later on.\n", "train_images = train_images[..., None]\n", "test_images = test_images[..., None]\n", "\n", - "# Getting the images in [0, 1] range.\n", + "# Scale the images to the [0, 1] range.\n", "train_images = train_images / np.float32(255)\n", "test_images = test_images / np.float32(255)" ] @@ -141,13 +141,13 @@ "source": [ "How does `tf.distribute.MirroredStrategy` strategy work?\n", "\n", - "* All the variables and the model graph is replicated on the replicas.\n", + "* All the variables and the model graph are replicated across the replicas.\n", "* Input is evenly distributed across the replicas.\n", "* Each replica calculates the loss and gradients for the input it received.\n", - "* The gradients are synced across all the replicas by summing them.\n", + "* The gradients are synced across all the replicas by **summing** them.\n", "* After the sync, the same update is made to the copies of the variables on each replica.\n", "\n", - "Note: You can put all the code below inside a single scope. We are dividing it into several code cells for illustration purposes.\n" + "Note: You can put all the code below inside a single scope. This example divides it into several code cells for illustration purposes.\n" ] }, { @@ -158,8 +158,8 @@ }, "outputs": [], "source": [ - "# If the list of devices is not specified in the\n", - "# `tf.distribute.MirroredStrategy` constructor, it will be auto-detected.\n", + "# If the list of devices is not specified in\n", + "# `tf.distribute.MirroredStrategy` constructor, they will be auto-detected.\n", "strategy = tf.distribute.MirroredStrategy()" ] }, @@ -171,7 +171,7 @@ }, "outputs": [], "source": [ - "print ('Number of devices: {}'.format(strategy.num_replicas_in_sync))" + "print('Number of devices: {}'.format(strategy.num_replicas_in_sync))" ] }, { @@ -183,15 +183,6 @@ "## Setup input pipeline" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "0Qb6nDgxiN_n" - }, - "source": [ - "Export the graph and the variables to the platform-agnostic SavedModel format. After your model is saved, you can load it with or without the scope." - ] - }, { "cell_type": "code", "execution_count": null, @@ -225,8 +216,8 @@ }, "outputs": [], "source": [ - "train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).shuffle(BUFFER_SIZE).batch(GLOBAL_BATCH_SIZE) \n", - "test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(GLOBAL_BATCH_SIZE) \n", + "train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).shuffle(BUFFER_SIZE).batch(GLOBAL_BATCH_SIZE)\n", + "test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(GLOBAL_BATCH_SIZE)\n", "\n", "train_dist_dataset = strategy.experimental_distribute_dataset(train_dataset)\n", "test_dist_dataset = strategy.experimental_distribute_dataset(test_dataset)" @@ -240,7 +231,7 @@ "source": [ "## Create the model\n", "\n", - "Create a model using `tf.keras.Sequential`. You can also use the Model Subclassing API to do this." + "Create a model using `tf.keras.Sequential`. You can also use the [Model Subclassing API](https://www.tensorflow.org/guide/keras/custom_layers_and_models) or the [functional API](https://www.tensorflow.org/guide/keras/functional) to do this." ] }, { @@ -252,14 +243,21 @@ "outputs": [], "source": [ "def create_model():\n", + " regularizer = tf.keras.regularizers.L2(1e-5)\n", " model = tf.keras.Sequential([\n", - " tf.keras.layers.Conv2D(32, 3, activation='relu'),\n", + " tf.keras.layers.Conv2D(32, 3,\n", + " activation='relu',\n", + " kernel_regularizer=regularizer),\n", " tf.keras.layers.MaxPooling2D(),\n", - " tf.keras.layers.Conv2D(64, 3, activation='relu'),\n", + " tf.keras.layers.Conv2D(64, 3,\n", + " activation='relu',\n", + " kernel_regularizer=regularizer),\n", " tf.keras.layers.MaxPooling2D(),\n", " tf.keras.layers.Flatten(),\n", - " tf.keras.layers.Dense(64, activation='relu'),\n", - " tf.keras.layers.Dense(10)\n", + " tf.keras.layers.Dense(64,\n", + " activation='relu',\n", + " kernel_regularizer=regularizer),\n", + " tf.keras.layers.Dense(10, kernel_regularizer=regularizer)\n", " ])\n", "\n", " return model" @@ -286,25 +284,29 @@ "source": [ "## Define the loss function\n", "\n", - "Normally, on a single machine with 1 GPU/CPU, loss is divided by the number of examples in the batch of input.\n", + "Recall that the loss function consists of one or two parts:\n", "\n", - "*So, how should the loss be calculated when using a `tf.distribute.Strategy`?*\n", + " * The **prediction loss** measures how far off the model's predictions are from the training labels for a batch of training examples. It is computed for each labeled example and then reduced across the batch by computing the average value.\n", + " * Optionally, **regularization loss** terms can be added to the prediction loss, to steer the model away from overfitting the training data. A common choice is L2 regularization, which adds a small fixed multiple of the sum of squares of all model weights, independent of the number of examples. The model above uses L2 regularization to demonstrate its handling in the training loop below.\n", "\n", - "* For an example, let's say you have 4 GPU's and a batch size of 64. One batch of input is distributed\n", - "across the replicas (4 GPUs), each replica getting an input of size 16.\n", + "For training on a single machine with a single GPU/CPU, this works as follows:\n", "\n", - "* The model on each replica does a forward pass with its respective input and calculates the loss. Now, instead of dividing the loss by the number of examples in its respective input (BATCH_SIZE_PER_REPLICA = 16), the loss should be divided by the GLOBAL_BATCH_SIZE (64)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "OCIcsaeoIHJX" - }, - "source": [ - "*Why do this?*\n", + " * The prediction loss is computed for each example in the batch, summed across the batch, and then divided by the batch size.\n", + " * The regularization loss is added to the prediction loss.\n", + " * The gradient of the total loss is computed w.r.t. each model weight, and the optimizer updates each model weight from the corresponding gradient.\n", + "\n", + "With `tf.distribute.Strategy`, the input batch is split between replicas.\n", + "For example, let's say you have 4 GPUs, each with one replica of the model. One batch of 256 input examples is distributed evenly across the 4 replicas, so each replica gets a batch of size 64: We have `256 = 4*64`, or generally `GLOBAL_BATCH_SIZE = num_replicas_in_sync * BATCH_SIZE_PER_REPLICA`.\n", "\n", - "* This needs to be done because after the gradients are calculated on each replica, they are synced across the replicas by **summing** them." + "Each replica computes the loss from the training examples it gets and computes the gradients of the loss w.r.t. each model weight. The optimizer takes care that these **gradients are summed up across replicas** before using them to update the copies of the model weights on each replica.\n", + "\n", + "*So, how should the loss be calculated when using a `tf.distribute.Strategy`?*\n", + "\n", + " * Each replica computes the prediction loss for all examples distributed to it, sums up the results and divides them by `num_replicas_in_sync * BATCH_SIZE_PER_REPLICA`, or equivently, `GLOBAL_BATCH_SIZE`.\n", + " * Each replica compues the regularization loss(es) and divides them by\n", + " `num_replicas_in_sync`.\n", + "\n", + "Compared to non-distributed training, all per-replica loss terms are scaled down by a factor of `1/num_replicas_in_sync`. On the other hand, all loss terms -- or rather, their gradients -- are summed across that number of replicas before the optimizer applies them. In effect, the optimizer on each replica uses the same gradients as if a non-distributed computation with `GLOBAL_BATCH_SIZE` had happened. This is consistent with the distributed and undistributed behavior of Keras `Model.fit`. See the [Distributed training with Keras](./keras.ipynb) tutorial on how a larger gloabl batch size enables to scale up the learning rate." ] }, { @@ -315,31 +317,18 @@ "source": [ "*How to do this in TensorFlow?*\n", "\n", - "* If you're writing a custom training loop, as in this tutorial, you should sum the per example losses and divide the sum by the GLOBAL_BATCH_SIZE: \n", - "`scale_loss = tf.reduce_sum(loss) * (1. / GLOBAL_BATCH_SIZE)`\n", - "or you can use `tf.nn.compute_average_loss` which takes the per example loss,\n", - "optional sample weights, and GLOBAL_BATCH_SIZE as arguments and returns the scaled loss.\n", - "\n", - "* If you are using regularization losses in your model then you need to scale\n", - "the loss value by number of replicas. You can do this by using the `tf.nn.scale_regularization_loss` function.\n", + " * Loss reduction and scaling is done automatically in Keras `Model.compile` and `Model.fit`\n", "\n", - "* Using `tf.reduce_mean` is not recommended. Doing so divides the loss by actual per replica batch size which may vary step to step.\n", + " * If you're writing a custom training loop, as in this tutorial, you should sum the per-example losses and divide the sum by the global batch size using `tf.nn.compute_average_loss`, which takes the per-example losses and\n", + "optional sample weights as arguments and returns the scaled loss.\n", "\n", - "* This reduction and scaling is done automatically in keras `model.compile` and `model.fit`\n", + " * If using `tf.keras.losses` classes (as in the example below), the loss reduction needs to be explicitly specified to be one of `NONE` or `SUM`. The default `AUTO` and `SUM_OVER_BATCH_SIZE` are disallowed outside `Model.fit`.\n", + " * `AUTO` is disallowed because the user should explicitly think about what reduction they want to make sure it is correct in the distributed case.\n", + " * `SUM_OVER_BATCH_SIZE` is disallowed because currently it would only divide by per replica batch size, and leave the dividing by number of replicas to the user, which might be easy to miss. So, instead, you need to do the reduction yourself explicitly.\n", "\n", - "* If using `tf.keras.losses` classes (as in the example below), the loss reduction needs to be explicitly specified to be one of `NONE` or `SUM`. `AUTO` and `SUM_OVER_BATCH_SIZE` are disallowed when used with `tf.distribute.Strategy`. `AUTO` is disallowed because the user should explicitly think about what reduction they want to make sure it is correct in the distributed case. `SUM_OVER_BATCH_SIZE` is disallowed because currently it would only divide by per replica batch size, and leave the dividing by number of replicas to the user, which might be easy to miss. So instead we ask the user do the reduction themselves explicitly.\n", - "* If `labels` is multi-dimensional, then average the `per_example_loss` across the number of elements in each sample. For example, if the shape of `predictions` is `(batch_size, H, W, n_classes)` and `labels` is `(batch_size, H, W)`, you will need to update `per_example_loss` like: `per_example_loss /= tf.cast(tf.reduce_prod(tf.shape(labels)[1:]), tf.float32)`\n", + " * If you're writing a custom training loop for a model with a non-empty list of `Model.losses` (e.g., weight regularizers), you should sum them up and divide the sum by the number of replicas. You can do this by using the `tf.nn.scale_regularization_loss` function. The model code itself remains unaware of the number of replicas.\n", "\n", - " Caution: **Verify the shape of your loss**. \n", - " Loss functions in `tf.losses`/`tf.keras.losses` typically\n", - " return the average over the last dimension of the input. The loss\n", - " classes wrap these functions. Passing `reduction=Reduction.NONE` when\n", - " creating an instance of a loss class means \"no **additional** reduction\".\n", - " For categorical losses with an example input shape of `[batch, W, H, n_classes]` the `n_classes`\n", - " dimension is reduced. For pointwise losses like\n", - " `losses.mean_squared_error` or `losses.binary_crossentropy` include a\n", - " dummy axis so that `[batch, W, H, 1]` is reduced to `[batch, W, H]`. Without\n", - " the dummy axis `[batch, W, H]` will be incorrectly reduced to `[batch, W]`.\n" + " However, models can define input-dependent regularization losses with Keras APIs such as `Layer.add_loss(...)` and `Layer(activity_regularizer=...)`. For `Layer.add_loss(...)`, it falls on the modeling code to perform the division of the summed per-example terms by the per-replica(!) batch size, e.g., by using `tf.math.reduce_mean()`." ] }, { @@ -351,14 +340,51 @@ "outputs": [], "source": [ "with strategy.scope():\n", - " # Set reduction to `none` so we can do the reduction afterwards and divide by\n", - " # global batch size.\n", + " # Set reduction to `NONE` so you can do the reduction yourself.\n", " loss_object = tf.keras.losses.SparseCategoricalCrossentropy(\n", " from_logits=True,\n", " reduction=tf.keras.losses.Reduction.NONE)\n", - " def compute_loss(labels, predictions):\n", + " def compute_loss(labels, predictions, model_losses):\n", " per_example_loss = loss_object(labels, predictions)\n", - " return tf.nn.compute_average_loss(per_example_loss, global_batch_size=GLOBAL_BATCH_SIZE)" + " loss = tf.nn.compute_average_loss(per_example_loss)\n", + " if model_losses:\n", + " loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n", + " return loss" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6pM96bqQY52D" + }, + "source": [ + "### Special cases\n", + "\n", + "Advanced users should also consider the following special cases.\n", + "\n", + " * Input batches shorter than `GLOBAL_BATCH_SIZE` create unpleasant corner cases in several places. In practice, it often works best to avoid them by allowing batches to span epoch boundaries using `Dataset.repeat().batch()` and defining approximate epochs by step counts, not dataset ends. Alternatively, `Dataset.batch(drop_remainder=True)` maintains the notion of epoch but drops the last few examples.\n", + "\n", + " For illustration, this example goes the harder route and allows short batches, so that each training epoch contains each training example exactly once.\n", + " \n", + " Which denominator should be used by `tf.nn.compute_average_loss()`?\n", + "\n", + " * By default, in the example code above and equivalently in `Keras.fit()`, the sum of prediction losses is divided by `num_replicas_in_sync` times the actual batch size seen on the replica (with empty batches silently ignored). This preserves the balance between the prediction loss on the one hand and the regularization losses on the other hand. It is particularly appropriate for models that use input-dependent regularization losses. Plain L2 regularization just superimposes weight decay onto the gradients of the prediction loss and is less in need of such a balance.\n", + " * In practice, many custom training loops pass as a constant Python value into `tf.nn.compute_average_loss(..., global_batch_size=GLOBAL_BATCH_SIZE)` to use it as the denominator. This preserves the relative weighting of training examples between batches. Without it, the smaller denominator in short batches effectively upweights the examples in those. (Before TensorFlow 2.13, this was also needed to avoid NaNs in case some replica received an actual batch size of zero.)\n", + " \n", + " Both options are equivalent if short batches are avoided, as suggested above.\n", + "\n", + " * Multi-dimensional `labels` require you to average the `per_example_loss` across the number of predictions in each example. Consider a classification task for all pixels of an input image, with `predictions` of shape `(batch_size, H, W, n_classes)` and `labels` of shape `(batch_size, H, W)`. You will need to update `per_example_loss` like: `per_example_loss /= tf.cast(tf.reduce_prod(tf.shape(labels)[1:]), tf.float32)`\n", + "\n", + " Caution: **Verify the shape of your loss**.\n", + " Loss functions in `tf.losses`/`tf.keras.losses` typically\n", + " return the average over the last dimension of the input. The loss\n", + " classes wrap these functions. Passing `reduction=Reduction.NONE` when\n", + " creating an instance of a loss class means \"no **additional** reduction\".\n", + " For categorical losses with an example input shape of `[batch, W, H, n_classes]` the `n_classes`\n", + " dimension is reduced. For pointwise losses like\n", + " `losses.mean_squared_error` or `losses.binary_crossentropy` include a\n", + " dummy axis so that `[batch, W, H, 1]` is reduced to `[batch, W, H]`. Without\n", + " the dummy axis `[batch, W, H]` will be incorrectly reduced to `[batch, W]`." ] }, { @@ -406,11 +432,11 @@ }, "outputs": [], "source": [ - "# model, optimizer, and checkpoint must be created under `strategy.scope`.\n", + "# A model, an optimizer, and a checkpoint must be created under `strategy.scope`.\n", "with strategy.scope():\n", " model = create_model()\n", "\n", - " optimizer = tf.keras.optimizers.Adam()\n", + " optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)\n", "\n", " checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)" ] @@ -428,13 +454,13 @@ "\n", " with tf.GradientTape() as tape:\n", " predictions = model(images, training=True)\n", - " loss = compute_loss(labels, predictions)\n", + " loss = compute_loss(labels, predictions, model.losses)\n", "\n", " gradients = tape.gradient(loss, model.trainable_variables)\n", " optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n", "\n", " train_accuracy.update_state(labels, predictions)\n", - " return loss \n", + " return loss\n", "\n", "def test_step(inputs):\n", " images, labels = inputs\n", @@ -484,9 +510,9 @@ "\n", " template = (\"Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, \"\n", " \"Test Accuracy: {}\")\n", - " print (template.format(epoch+1, train_loss,\n", - " train_accuracy.result()*100, test_loss.result(),\n", - " test_accuracy.result()*100))\n", + " print(template.format(epoch + 1, train_loss,\n", + " train_accuracy.result() * 100, test_loss.result(),\n", + " test_accuracy.result() * 100))\n", "\n", " test_loss.reset_states()\n", " train_accuracy.reset_states()\n", @@ -499,12 +525,12 @@ "id": "Z1YvXqOpwy08" }, "source": [ - "Things to note in the example above:\n", + "### Things to note in the example above\n", "\n", - "* We are iterating over the `train_dist_dataset` and `test_dist_dataset` using a `for x in ...` construct.\n", + "* Iterate over the `train_dist_dataset` and `test_dist_dataset` using a `for x in ...` construct.\n", "* The scaled loss is the return value of the `distributed_train_step`. This value is aggregated across replicas using the `tf.distribute.Strategy.reduce` call and then across batches by summing the return value of the `tf.distribute.Strategy.reduce` calls.\n", "* `tf.keras.Metrics` should be updated inside `train_step` and `test_step` that gets executed by `tf.distribute.Strategy.run`.\n", - "*`tf.distribute.Strategy.run` returns results from each local replica in the strategy, and there are multiple ways to consume this result. You can do `tf.distribute.Strategy.reduce` to get an aggregated value. You can also do `tf.distribute.Strategy.experimental_local_results` to get the list of values contained in the result, one per local replica.\n" + "* `tf.distribute.Strategy.run` returns results from each local replica in the strategy, and there are multiple ways to consume this result. You can do `tf.distribute.Strategy.reduce` to get an aggregated value. You can also do `tf.distribute.Strategy.experimental_local_results` to get the list of values contained in the result, one per local replica.\n" ] }, { @@ -570,8 +596,8 @@ "for images, labels in test_dataset:\n", " eval_step(images, labels)\n", "\n", - "print ('Accuracy after restoring the saved model without strategy: {}'.format(\n", - " eval_accuracy.result()*100))" + "print('Accuracy after restoring the saved model without strategy: {}'.format(\n", + " eval_accuracy.result() * 100))" ] }, { @@ -584,7 +610,7 @@ "\n", "### Using iterators\n", "\n", - "If you want to iterate over a given number of steps and not through the entire dataset you can create an iterator using the `iter` call and explicity call `next` on the iterator. You can choose to iterate over the dataset both inside and outside the tf.function. Here is a small snippet demonstrating iteration of the dataset outside the tf.function using an iterator.\n" + "If you want to iterate over a given number of steps and not through the entire dataset, you can create an iterator using the `iter` call and explicitly call `next` on the iterator. You can choose to iterate over the dataset both inside and outside the `tf.function`. Here is a small snippet demonstrating iteration of the dataset outside the `tf.function` using an iterator.\n" ] }, { @@ -606,7 +632,7 @@ " average_train_loss = total_loss / num_batches\n", "\n", " template = (\"Epoch {}, Loss: {}, Accuracy: {}\")\n", - " print (template.format(epoch+1, average_train_loss, train_accuracy.result()*100))\n", + " print(template.format(epoch + 1, average_train_loss, train_accuracy.result() * 100))\n", " train_accuracy.reset_states()" ] }, @@ -616,8 +642,9 @@ "id": "GxVp48Oy0m6y" }, "source": [ - "### Iterating inside a tf.function\n", - "You can also iterate over the entire input `train_dist_dataset` inside a tf.function using the `for x in ...` construct or by creating iterators like we did above. The example below demonstrates wrapping one epoch of training in a tf.function and iterating over `train_dist_dataset` inside the function." + "### Iterating inside a `tf.function`\n", + "\n", + "You can also iterate over the entire input `train_dist_dataset` inside a `tf.function` using the `for x in ...` construct or by creating iterators like you did above. The example below demonstrates wrapping one epoch of training with a `@tf.function` decorator and iterating over `train_dist_dataset` inside the function." ] }, { @@ -643,7 +670,7 @@ " train_loss = distributed_train_epoch(train_dist_dataset)\n", "\n", " template = (\"Epoch {}, Loss: {}, Accuracy: {}\")\n", - " print (template.format(epoch+1, train_loss, train_accuracy.result()*100))\n", + " print(template.format(epoch + 1, train_loss, train_accuracy.result() * 100))\n", "\n", " train_accuracy.reset_states()" ] @@ -658,17 +685,18 @@ "\n", "Note: As a general rule, you should use `tf.keras.Metrics` to track per-sample values and avoid values that have been aggregated within a replica.\n", "\n", - "We do *not* recommend using `tf.metrics.Mean` to track the training loss across different replicas, because of the loss scaling computation that is carried out.\n", + "Because of the loss scaling computation that is carried out, it's not recommended to use `tf.keras.metrics.Mean` to track the training loss across different replicas.\n", "\n", "For example, if you run a training job with the following characteristics:\n", + "\n", "* Two replicas\n", "* Two samples are processed on each replica\n", "* Resulting loss values: [2, 3] and [4, 5] on each replica\n", "* Global batch size = 4\n", "\n", - "With loss scaling, you calculate the per-sample value of loss on each replica by adding the loss values, and then dividing by the global batch size. In this case: `(2 + 3) / 4 = 1.25` and `(4 + 5) / 4 = 2.25`. \n", + "With loss scaling, you calculate the per-sample value of loss on each replica by adding the loss values, and then dividing by the global batch size. In this case: `(2 + 3) / 4 = 1.25` and `(4 + 5) / 4 = 2.25`.\n", "\n", - "If you use `tf.metrics.Mean` to track loss across the two replicas, the result is different. In this example, you end up with a `total` of 3.50 and `count` of 2, which results in `total`/`count` = 1.75 when `result()` is called on the metric. Loss calculated with `tf.keras.Metrics` is scaled by an additional factor that is equal to the number of replicas in sync." + "If you use `tf.keras.metrics.Mean` to track loss across the two replicas, the result is different. In this example, you end up with a `total` of 3.50 and `count` of 2, which results in `total`/`count` = 1.75 when `result()` is called on the metric. Loss calculated with `tf.keras.Metrics` is scaled by an additional factor that is equal to the number of replicas in sync." ] }, { @@ -678,16 +706,17 @@ }, "source": [ "### Guide and examples\n", + "\n", "Here are some examples for using distribution strategy with custom training loops:\n", "\n", "1. [Distributed training guide](../../guide/distributed_training)\n", "2. [DenseNet](https://github.com/tensorflow/examples/blob/master/tensorflow_examples/models/densenet/distributed_train.py) example using `MirroredStrategy`.\n", - "1. [BERT](https://github.com/tensorflow/models/blob/master/official/nlp/bert/run_classifier.py) example trained using `MirroredStrategy` and `TPUStrategy`.\n", + "1. [BERT](https://github.com/tensorflow/models/blob/master/official/legacy/bert/run_classifier.py) example trained using `MirroredStrategy` and `TPUStrategy`.\n", "This example is particularly helpful for understanding how to load from a checkpoint and generate periodic checkpoints during distributed training etc.\n", "2. [NCF](https://github.com/tensorflow/models/blob/master/official/recommendation/ncf_keras_main.py) example trained using `MirroredStrategy` that can be enabled using the `keras_use_ctl` flag.\n", "3. [NMT](https://github.com/tensorflow/examples/blob/master/tensorflow_examples/models/nmt_with_attention/distributed_train.py) example trained using `MirroredStrategy`.\n", "\n", - "More examples listed in the [Distribution strategy guide](../../guide/distributed_training.ipynb#examples_and_tutorials)." + "You can find more examples listed under _Examples and tutorials_ in the [Distribution strategy guide](../../guide/distributed_training.ipynb)." ] }, { @@ -699,7 +728,8 @@ "## Next steps\n", "\n", "* Try out the new `tf.distribute.Strategy` API on your models.\n", - "* Visit the [Performance section](../../guide/function.ipynb) in the guide to learn more about other strategies and [tools](../../guide/profiler.md) you can use to optimize the performance of your TensorFlow models." + "* Visit the [Better performance with `tf.function`](../../guide/function.ipynb) and [TensorFlow Profiler](../../guide/profiler.md) guides to learn more about tools to optimize the performance of your TensorFlow models.\n", + "* Check out the [Distributed training in TensorFlow](../../guide/distributed_training.ipynb) guide, which provides an overview of the available distribution strategies." ] } ], @@ -707,7 +737,6 @@ "colab": { "collapsed_sections": [], "name": "custom_training.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/tutorials/distribute/dtensor_keras_tutorial.ipynb b/site/en/tutorials/distribute/dtensor_keras_tutorial.ipynb new file mode 100644 index 00000000000..84f6478c2b5 --- /dev/null +++ b/site/en/tutorials/distribute/dtensor_keras_tutorial.ipynb @@ -0,0 +1,760 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Tce3stUlHN0L" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Authors.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "tuOe1ymfHZPu" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MT-LkFOl2axM" + }, + "source": [ + "# Using DTensors with Keras" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r6P32iYYV27b" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vTe9dcbUAwqx" + }, + "source": [ + "## Overview\n", + "\n", + "In this tutorial, you will learn how to use DTensors with Keras.\n", + "\n", + "Through DTensor integration with Keras, you can reuse your existing Keras layers and models to build and train distributed machine learning models.\n", + "\n", + "You will train a multi-layer classification model with the MNIST data. Setting the layout for subclassing model, Sequential model, and functional model will be demonstrated.\n", + "\n", + "This tutorial assumes that you have already read the [DTensor programing guide](/guide/dtensor_overview), and are familiar with basic DTensor concepts like `Mesh` and `Layout`.\n", + "\n", + "This tutorial is based on [Training a neural network on MNIST with Keras](https://www.tensorflow.org/datasets/keras_example)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "keIyP3IoA1o4" + }, + "source": [ + "## Setup\n", + "\n", + "DTensor (`tf.experimental.dtensor`) has been part of TensorFlow since the 2.9.0 release.\n", + "\n", + "First, install or upgrade TensorFlow Datasets:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4dHik7NYA5vm" + }, + "outputs": [], + "source": [ + "!pip install --quiet --upgrade tensorflow-datasets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VttBMZngDx8x" + }, + "source": [ + "Next, import TensorFlow and `dtensor`, and configure TensorFlow to use 8 virtual CPUs.\n", + "\n", + "Even though this example uses virtual CPUs, DTensor works the same way on CPU, GPU or TPU devices." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CodX6idGBGSm" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_datasets as tfds\n", + "from tensorflow.experimental import dtensor" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aAtvrpasDpDD" + }, + "outputs": [], + "source": [ + "def configure_virtual_cpus(ncpu):\n", + " phy_devices = tf.config.list_physical_devices('CPU')\n", + " tf.config.set_logical_device_configuration(\n", + " phy_devices[0], \n", + " [tf.config.LogicalDeviceConfiguration()] * ncpu)\n", + " \n", + "configure_virtual_cpus(8)\n", + "tf.config.list_logical_devices('CPU')\n", + "\n", + "devices = [f'CPU:{i}' for i in range(8)]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ogULE1OHtyd9" + }, + "source": [ + "## Deterministic pseudo-random number generators\n", + "One thing you should note is that DTensor API requires each of the running client to have the same random seeds, so that it could have deterministic behavior for initializing the weights. You can achieve this by setting the global seeds in keras via `tf.keras.utils.set_random_seed()`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9u85YypguL8N" + }, + "outputs": [], + "source": [ + "tf.keras.backend.experimental.enable_tf_random_generator()\n", + "tf.keras.utils.set_random_seed(1337)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tO11XvPDAu3_" + }, + "source": [ + "## Creating a Data Parallel Mesh\n", + "\n", + "This tutorial demonstrates Data Parallel training. Adapting to Model Parallel training and Spatial Parallel training can be as simple as switching to a different set of `Layout` objects. Refer to the [Distributed training with DTensors](dtensor_ml_tutorial.ipynb) tutorial for more information on distributed training beyond Data Parallel.\n", + "\n", + "Data Parallel training is a commonly used parallel training scheme, also used by, for example, `tf.distribute.MirroredStrategy`.\n", + "\n", + "With DTensor, a Data Parallel training loop uses a `Mesh` that consists of a single 'batch' dimension, where each device runs a replica of the model that receives a shard from the global batch." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6sT6s6z4j9H-" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"batch\", 8)], devices=devices)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rouFcF6FE0aF" + }, + "source": [ + "As each device runs a full replica of the model, the model variables shall be fully replicated across the mesh (unsharded). As an example, a fully replicated Layout for a rank-2 weight on this `Mesh` would be as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "U8OxvkDKE1Nu" + }, + "outputs": [], + "source": [ + "example_weight_layout = dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh) # or\n", + "example_weight_layout = dtensor.Layout.replicated(mesh, rank=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6Bnic98RE0xi" + }, + "source": [ + "A layout for a rank-2 data tensor on this `Mesh` would be sharded along the first dimension (sometimes known as `batch_sharded`)," + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PhYp0EKBFfxt" + }, + "outputs": [], + "source": [ + "example_data_layout = dtensor.Layout(['batch', dtensor.UNSHARDED], mesh) # or\n", + "example_data_layout = dtensor.Layout.batch_sharded(mesh, 'batch', rank=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4U-6n0DericV" + }, + "source": [ + "## Create Keras layers with layout\n", + "\n", + "In the data parallel scheme, you usually create your model weights with a fully replicated layout, so that each replica of the model can do calculations with the sharded input data. \n", + "\n", + "In order to configure the layout information for your layers' weights, Keras has exposed an extra parameter in the layer constructor for most of the built-in layers.\n", + "\n", + "The following example builds a small image classification model with fully replicated weight layout. You can specify layout information `kernel` and `bias` in `tf.keras.layers.Dense` via arguments `kernel_layout` and `bias_layout`. Most of the built-in keras layers are ready for explicitly specifying the `Layout` for the layer weights." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Koc5GlA1tFXY" + }, + "outputs": [], + "source": [ + "unsharded_layout_2d = dtensor.Layout.replicated(mesh, 2)\n", + "unsharded_layout_1d = dtensor.Layout.replicated(mesh, 1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GfOGTIxGs5Ql" + }, + "outputs": [], + "source": [ + "model = tf.keras.models.Sequential([\n", + " tf.keras.layers.Flatten(input_shape=(28, 28)),\n", + " tf.keras.layers.Dense(128, \n", + " activation='relu',\n", + " name='d1',\n", + " kernel_layout=unsharded_layout_2d, \n", + " bias_layout=unsharded_layout_1d),\n", + " tf.keras.layers.Dense(10,\n", + " name='d2',\n", + " kernel_layout=unsharded_layout_2d, \n", + " bias_layout=unsharded_layout_1d)\n", + "])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0frf3jsVtx_n" + }, + "source": [ + "You can check the layout information by examining the `layout` property on the weights." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Z_nqv_VdwcXo" + }, + "outputs": [], + "source": [ + "for weight in model.weights:\n", + " print(f'Weight name: {weight.name} with layout: {weight.layout}')\n", + " break" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6FMGB-QsxPtU" + }, + "source": [ + "## Load a dataset and build input pipeline\n", + "\n", + "Load a MNIST dataset and configure some pre-processing input pipeline for it. The dataset itself is not associated with any DTensor layout information." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zGt4kwltxOt4" + }, + "outputs": [], + "source": [ + "(ds_train, ds_test), ds_info = tfds.load(\n", + " 'mnist',\n", + " split=['train', 'test'],\n", + " shuffle_files=True,\n", + " as_supervised=True,\n", + " with_info=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HkUaOB_ryaLH" + }, + "outputs": [], + "source": [ + "def normalize_img(image, label):\n", + " \"\"\"Normalizes images: `uint8` -> `float32`.\"\"\"\n", + " return tf.cast(image, tf.float32) / 255., label" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Efm2H1iqydan" + }, + "outputs": [], + "source": [ + "batch_size = 128\n", + "\n", + "ds_train = ds_train.map(\n", + " normalize_img, num_parallel_calls=tf.data.AUTOTUNE)\n", + "ds_train = ds_train.cache()\n", + "ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)\n", + "ds_train = ds_train.batch(batch_size)\n", + "ds_train = ds_train.prefetch(tf.data.AUTOTUNE)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Lcrg6QAtyis4" + }, + "outputs": [], + "source": [ + "ds_test = ds_test.map(\n", + " normalize_img, num_parallel_calls=tf.data.AUTOTUNE)\n", + "ds_test = ds_test.batch(batch_size)\n", + "ds_test = ds_test.cache()\n", + "ds_test = ds_test.prefetch(tf.data.AUTOTUNE)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fHEZwib7lhqn" + }, + "source": [ + "## Define the training logic for the model\n", + "\n", + "Next, define the training and evaluation logic for the model. \n", + "\n", + "As of TensorFlow 2.9, you have to write a custom-training-loop for a DTensor-enabled Keras model. This is to pack the input data with proper layout information, which is not integrated with the standard `tf.keras.Model.fit()` or `tf.keras.Model.eval()` functions from Keras. you will get more `tf.data` support in the upcoming release. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CAx11gMjzzjs" + }, + "outputs": [], + "source": [ + "@tf.function\n", + "def train_step(model, x, y, optimizer, metrics):\n", + " with tf.GradientTape() as tape:\n", + " logits = model(x, training=True)\n", + " # tf.reduce_sum sums the batch sharded per-example loss to a replicated\n", + " # global loss (scalar).\n", + " loss = tf.reduce_sum(tf.keras.losses.sparse_categorical_crossentropy(\n", + " y, logits, from_logits=True))\n", + " \n", + " gradients = tape.gradient(loss, model.trainable_variables)\n", + " optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n", + "\n", + " for metric in metrics.values():\n", + " metric.update_state(y_true=y, y_pred=logits)\n", + "\n", + " loss_per_sample = loss / len(x)\n", + " results = {'loss': loss_per_sample}\n", + " return results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "maSTWeRemO0P" + }, + "outputs": [], + "source": [ + "@tf.function\n", + "def eval_step(model, x, y, metrics):\n", + " logits = model(x, training=False)\n", + " loss = tf.reduce_sum(tf.keras.losses.sparse_categorical_crossentropy(\n", + " y, logits, from_logits=True))\n", + "\n", + " for metric in metrics.values():\n", + " metric.update_state(y_true=y, y_pred=logits)\n", + "\n", + " loss_per_sample = loss / len(x)\n", + " results = {'eval_loss': loss_per_sample}\n", + " return results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dt00axcLmvLr" + }, + "outputs": [], + "source": [ + "def pack_dtensor_inputs(images, labels, image_layout, label_layout):\n", + " num_local_devices = image_layout.mesh.num_local_devices()\n", + " images = tf.split(images, num_local_devices)\n", + " labels = tf.split(labels, num_local_devices)\n", + " images = dtensor.pack(images, image_layout)\n", + " labels = dtensor.pack(labels, label_layout)\n", + " return images, labels" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9Eb-qIJGrxB9" + }, + "source": [ + "## Metrics and optimizers\n", + "\n", + "When using DTensor API with Keras `Metric` and `Optimizer`, you will need to provide the extra mesh information, so that any internal state variables and tensors can work with variables in the model.\n", + "\n", + "- For an optimizer, DTensor introduces a new experimental namespace `keras.dtensor.experimental.optimizers`, where many existing Keras Optimizers are extended to receive an additional `mesh` argument. In future releases, it may be merged with Keras core optimizers.\n", + "\n", + "- For metrics, you can directly specify the `mesh` to the constructor as an argument to make it a DTensor compatible `Metric`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1lu_0mz1sxrl" + }, + "outputs": [], + "source": [ + "optimizer = tf.keras.dtensor.experimental.optimizers.Adam(0.01, mesh=mesh)\n", + "metrics = {'accuracy': tf.keras.metrics.SparseCategoricalAccuracy(mesh=mesh)}\n", + "eval_metrics = {'eval_accuracy': tf.keras.metrics.SparseCategoricalAccuracy(mesh=mesh)}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QzufrkistELx" + }, + "source": [ + "## Train the model\n", + "\n", + "The following example demonstrates how to shard the data from input pipeline on the batch dimension, and train with the model, which has fully replicated weights. \n", + "\n", + "After 3 epochs, the model should achieve about 97% of accuracy:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kZW568Dk0vvL" + }, + "outputs": [], + "source": [ + "num_epochs = 3\n", + "\n", + "image_layout = dtensor.Layout.batch_sharded(mesh, 'batch', rank=4)\n", + "label_layout = dtensor.Layout.batch_sharded(mesh, 'batch', rank=1)\n", + "\n", + "for epoch in range(num_epochs):\n", + " print(\"============================\") \n", + " print(\"Epoch: \", epoch)\n", + " for metric in metrics.values():\n", + " metric.reset_state()\n", + " step = 0\n", + " results = {}\n", + " pbar = tf.keras.utils.Progbar(target=None, stateful_metrics=[])\n", + " for input in ds_train:\n", + " images, labels = input[0], input[1]\n", + " images, labels = pack_dtensor_inputs(\n", + " images, labels, image_layout, label_layout)\n", + "\n", + " results.update(train_step(model, images, labels, optimizer, metrics))\n", + " for metric_name, metric in metrics.items():\n", + " results[metric_name] = metric.result()\n", + "\n", + " pbar.update(step, values=results.items(), finalize=False)\n", + " step += 1\n", + " pbar.update(step, values=results.items(), finalize=True)\n", + "\n", + " for metric in eval_metrics.values():\n", + " metric.reset_state()\n", + " for input in ds_test:\n", + " images, labels = input[0], input[1]\n", + " images, labels = pack_dtensor_inputs(\n", + " images, labels, image_layout, label_layout)\n", + " results.update(eval_step(model, images, labels, eval_metrics))\n", + "\n", + " for metric_name, metric in eval_metrics.items():\n", + " results[metric_name] = metric.result()\n", + " \n", + " for metric_name, metric in results.items():\n", + " print(f\"{metric_name}: {metric.numpy()}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HYEXF6qCuoSr" + }, + "source": [ + "## Specify Layout for existing model code\n", + "\n", + "Often you have models that work well for your use case. Specifying `Layout` information to each individual layer within the model will be a large amount of work requiring a lot of edits.\n", + "\n", + "To help you easily convert your existing Keras model to work with DTensor API you can use the new `tf.keras.dtensor.experimental.LayoutMap` API that allow you to specify the `Layout` from a global point of view.\n", + "\n", + "First, you need to create a `LayoutMap` instance, which is a dictionary-like object that contains all the `Layout` you would like to specify for your model weights.\n", + "\n", + "`LayoutMap` needs a `Mesh` instance at init, which can be used to provide default replicated `Layout` for any weights that doesn't have Layout configured. In case you would like all your model weights to be just fully replicated, you can provide empty `LayoutMap`, and the default mesh will be used to create replicated `Layout`.\n", + "\n", + "`LayoutMap` uses a string as key and a `Layout` as value. There is a behavior difference between a normal Python dict and this class. The string key will be treated as a regex when retrieving the value." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SCq5Nl-UP_dS" + }, + "source": [ + "### Subclassed Model\n", + "\n", + "Consider the following model defined using the Keras subclassing Model syntax." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LZ0hRFs8unu0" + }, + "outputs": [], + "source": [ + "class SubclassedModel(tf.keras.Model):\n", + "\n", + " def __init__(self, name=None):\n", + " super().__init__(name=name)\n", + " self.feature = tf.keras.layers.Dense(16)\n", + " self.feature_2 = tf.keras.layers.Dense(24)\n", + " self.dropout = tf.keras.layers.Dropout(0.1)\n", + "\n", + " def call(self, inputs, training=None):\n", + " x = self.feature(inputs)\n", + " x = self.dropout(x, training=training)\n", + " return self.feature_2(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1njxqPB-yS97" + }, + "source": [ + "There are 4 weights in this model, which are `kernel` and `bias` for two `Dense` layers. Each of them are mapped based on the object path:\n", + "\n", + "* `model.feature.kernel`\n", + "* `model.feature.bias`\n", + "* `model.feature_2.kernel`\n", + "* `model.feature_2.bias`\n", + "\n", + "Note: For subclassed Models, the attribute name, rather than the `.name` attribute of the layer, is used as the key to retrieve the Layout from the mapping. This is consistent with the convention followed by `tf.Module` checkpointing. For complex models with more than a few layers, you can [manually inspect checkpoints](https://www.tensorflow.org/guide/checkpoint#manually_inspecting_checkpoints) to view the attribute mappings. \n", + "\n", + "Now define the following `LayoutMap` and apply it to the model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "goVX6iIZw468" + }, + "outputs": [], + "source": [ + "layout_map = tf.keras.dtensor.experimental.LayoutMap(mesh=mesh)\n", + "\n", + "layout_map['feature.*kernel'] = dtensor.Layout.batch_sharded(mesh, 'batch', rank=2)\n", + "layout_map['feature.*bias'] = dtensor.Layout.batch_sharded(mesh, 'batch', rank=1)\n", + "\n", + "with layout_map.scope():\n", + " subclassed_model = SubclassedModel()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "M32HcSp_PyWs" + }, + "source": [ + "The model weights are created on the first call, so call the model with a DTensor input and confirm the weights have the expected layouts:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "c3CbD9l7qUNq" + }, + "outputs": [], + "source": [ + "dtensor_input = dtensor.copy_to_mesh(tf.zeros((16, 16)), layout=unsharded_layout_2d)\n", + "# Trigger the weights creation for subclass model\n", + "subclassed_model(dtensor_input)\n", + "\n", + "print(subclassed_model.feature.kernel.layout)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZyCnfd-4Q2jk" + }, + "source": [ + "With this, you can quickly map the `Layout` to your models without updating any of your existing code. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6GliUdWTQnKC" + }, + "source": [ + "### Sequential and Functional Models" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6zzvTqAR2Teu" + }, + "source": [ + "For Keras Functional and Sequential models, you can use `tf.keras.dtensor.experimental.LayoutMap` as well.\n", + "\n", + "Note: For Functional and Sequential models, the mappings are slightly different. The layers in the model don't have a public attribute attached to the model (though you can access them via `Model.layers` as a list). Use the string name as the key in this case. The string name is guaranteed to be unique within a model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gXK2EquIRJCC" + }, + "outputs": [], + "source": [ + "layout_map = tf.keras.dtensor.experimental.LayoutMap(mesh=mesh)\n", + "\n", + "layout_map['feature.*kernel'] = dtensor.Layout.batch_sharded(mesh, 'batch', rank=2)\n", + "layout_map['feature.*bias'] = dtensor.Layout.batch_sharded(mesh, 'batch', rank=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cBzwJqrg2TH3" + }, + "outputs": [], + "source": [ + "with layout_map.scope():\n", + " inputs = tf.keras.Input((16,), batch_size=16)\n", + " x = tf.keras.layers.Dense(16, name='feature')(inputs)\n", + " x = tf.keras.layers.Dropout(0.1)(x)\n", + " output = tf.keras.layers.Dense(32, name='feature_2')(x)\n", + " model = tf.keras.Model(inputs, output)\n", + "\n", + "print(model.layers[1].kernel.layout)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pPuh1NlE3-wO" + }, + "outputs": [], + "source": [ + "with layout_map.scope():\n", + " model = tf.keras.Sequential([\n", + " tf.keras.layers.Dense(16, name='feature', input_shape=(16,)),\n", + " tf.keras.layers.Dropout(0.1),\n", + " tf.keras.layers.Dense(32, name='feature_2')\n", + " ])\n", + "\n", + "print(model.layers[2].kernel.layout)" + ] + } + ], + "metadata": { + "colab": { + "name": "dtensor_keras_tutorial.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/tutorials/distribute/dtensor_ml_tutorial.ipynb b/site/en/tutorials/distribute/dtensor_ml_tutorial.ipynb new file mode 100644 index 00000000000..55557be6368 --- /dev/null +++ b/site/en/tutorials/distribute/dtensor_ml_tutorial.ipynb @@ -0,0 +1,1070 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Tce3stUlHN0L" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Authors.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "tuOe1ymfHZPu" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "# Distributed training with DTensors" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r6P32iYYV27b" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kiF4jjX4O1mF" + }, + "source": [ + "## Overview\n", + "\n", + "DTensor provides a way for you to distribute the training of your model across devices to improve efficiency, reliability and scalability. For more details, check out the [DTensor concepts](../../guide/dtensor_overview.ipynb) guide.\n", + "\n", + "In this tutorial, you will train a sentiment analysis model using DTensors. The example demonstrates three distributed training schemes:\n", + "\n", + " - Data Parallel training, where the training samples are sharded (partitioned) to devices.\n", + " - Model Parallel training, where the model variables are sharded to devices.\n", + " - Spatial Parallel training, where the features of input data are sharded to devices (also known as [Spatial Partitioning](https://cloud.google.com/blog/products/ai-machine-learning/train-ml-models-on-large-images-and-3d-volumes-with-spatial-partitioning-on-cloud-tpus)).\n", + "\n", + "The training portion of this tutorial is inspired by a Kaggle notebook called [A Kaggle guide on sentiment analysis](https://www.kaggle.com/code/anasofiauzsoy/yelp-review-sentiment-analysis-tensorflow-tfds/notebook). To learn about the complete training and evaluation workflow (without DTensor), refer to that notebook.\n", + "\n", + "This tutorial will walk through the following steps:\n", + "\n", + "- Some data cleaning to obtain a `tf.data.Dataset` of tokenized sentences and their polarity.\n", + "- Then, building an MLP model with custom Dense and BatchNorm layers using a `tf.Module` to track the inference variables. The model constructor will take additional `Layout` arguments to control the sharding of variables.\n", + "- For training, you will first use data parallel training together with `tf.experimental.dtensor`'s checkpoint feature. Then, you will continue with Model Parallel Training and Spatial Parallel Training.\n", + "- The final section briefly describes the interaction between `tf.saved_model` and `tf.experimental.dtensor` as of TensorFlow 2.9." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YD80veeg7QtW" + }, + "source": [ + "## Setup\n", + "\n", + "DTensor (`tf.experimental.dtensor`) has been part of TensorFlow since the 2.9.0 release.\n", + "\n", + "First, install or upgrade TensorFlow Datasets:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-RKXLJN-7Yyb" + }, + "outputs": [], + "source": [ + "!pip install --quiet --upgrade tensorflow-datasets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tcxP4_Zu7ciQ" + }, + "source": [ + "Next, import `tensorflow` and `dtensor`, and configure TensorFlow to use 8 virtual CPUs.\n", + "\n", + "Even though this example uses virtual CPUs, DTensor works the same way on CPU, GPU or TPU devices." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dXcB26oP7dUd" + }, + "outputs": [], + "source": [ + "import tempfile\n", + "import numpy as np\n", + "import tensorflow_datasets as tfds\n", + "\n", + "import tensorflow as tf\n", + "\n", + "from tensorflow.experimental import dtensor\n", + "\n", + "print('TensorFlow version:', tf.__version__)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oHtO6MJLUXlz" + }, + "outputs": [], + "source": [ + "def configure_virtual_cpus(ncpu):\n", + " phy_devices = tf.config.list_physical_devices('CPU')\n", + " tf.config.set_logical_device_configuration(phy_devices[0], [\n", + " tf.config.LogicalDeviceConfiguration(),\n", + " ] * ncpu)\n", + "\n", + "configure_virtual_cpus(8)\n", + "DEVICES = [f'CPU:{i}' for i in range(8)]\n", + "\n", + "tf.config.list_logical_devices('CPU')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "omYd4jbF7j_I" + }, + "source": [ + "## Download the dataset\n", + "\n", + "Download the IMDB reviews data set to train the sentiment analysis model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fW4w4QlFVHhx" + }, + "outputs": [], + "source": [ + "train_data = tfds.load('imdb_reviews', split='train', shuffle_files=True, batch_size=64)\n", + "train_data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ki3mpfi4aZH8" + }, + "source": [ + "## Prepare the data\n", + "\n", + "First tokenize the text. Here use an extension of one-hot encoding, the `'tf_idf'` mode of `tf.keras.layers.TextVectorization`.\n", + "\n", + "- For the sake of speed, limit the number of tokens to 1200.\n", + "- To keep the `tf.Module` simple, run `TextVectorization` as a preprocessing step before the training.\n", + "\n", + "The final result of the data cleaning section is a `Dataset` with the tokenized text as `x` and label as `y`.\n", + "\n", + "**Note**: Running `TextVectorization` as a preprocessing step is **neither a usual practice nor a recommended one** as doing so assumes the training data fits into the client memory, which is not always the case.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zNpxjku_57Lg" + }, + "outputs": [], + "source": [ + "text_vectorization = tf.keras.layers.TextVectorization(output_mode='tf_idf', max_tokens=1200, output_sequence_length=None)\n", + "text_vectorization.adapt(data=train_data.map(lambda x: x['text']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "q16bjngoVwQp" + }, + "outputs": [], + "source": [ + "def vectorize(features):\n", + " return text_vectorization(features['text']), features['label']\n", + "\n", + "train_data_vec = train_data.map(vectorize)\n", + "train_data_vec" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "atTqL9kE5wz4" + }, + "source": [ + "## Build a neural network with DTensor\n", + "\n", + "Now build a Multi-Layer Perceptron (MLP) network with `DTensor`. The network will use fully connected Dense and BatchNorm layers.\n", + "\n", + "`DTensor` expands TensorFlow through single-program multi-data (SPMD) expansion of regular TensorFlow Ops according to the `dtensor.Layout` attributes of their input `Tensor` and variables.\n", + "\n", + "Variables of `DTensor` aware layers are `dtensor.DVariable`, and the constructors of `DTensor` aware layer objects take additional `Layout` inputs in addition to the usual layer parameters.\n", + "\n", + "Note: As of TensorFlow 2.9, Keras layers such as `tf.keras.layer.Dense`, and `tf.keras.layer.BatchNormalization` accepts `dtensor.Layout` arguments. Refer to the [DTensor Keras Integration Tutorial](/tutorials/distribute/dtensor_keras_tutorial) for more information using Keras with DTensor." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PMCt-Gj3b3Jy" + }, + "source": [ + "### Dense Layer\n", + "\n", + "The following custom Dense layer defines 2 layer variables: $W_{ij}$ is the variable for weights, and $b_i$ is the variable for the biases.\n", + "\n", + "$$\n", + "y_j = \\sigma(\\sum_i x_i W_{ij} + b_j)\n", + "$$\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nYlFUJWNjl4N" + }, + "source": [ + "### Layout deduction\n", + "\n", + "This result comes from the following observations:\n", + "\n", + "- The preferred DTensor sharding for operands to a matrix dot product $t_j = \\sum_i x_i W_{ij}$ is to shard $\\mathbf{W}$ and $\\mathbf{x}$ the same way along the $i$-axis.\n", + "\n", + "- The preferred DTensor sharding for operands to a matrix sum $t_j + b_j$, is to shard $\\mathbf{t}$ and $\\mathbf{b}$ the same way along the $j$-axis.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VpKblz7Yb16G" + }, + "outputs": [], + "source": [ + "class Dense(tf.Module):\n", + "\n", + " def __init__(self, input_size, output_size,\n", + " init_seed, weight_layout, activation=None):\n", + " super().__init__()\n", + "\n", + " random_normal_initializer = tf.function(tf.random.stateless_normal)\n", + "\n", + " self.weight = dtensor.DVariable(\n", + " dtensor.call_with_layout(\n", + " random_normal_initializer, weight_layout,\n", + " shape=[input_size, output_size],\n", + " seed=init_seed\n", + " ))\n", + " if activation is None:\n", + " activation = lambda x:x\n", + " self.activation = activation\n", + " \n", + " # bias is sharded the same way as the last axis of weight.\n", + " bias_layout = weight_layout.delete([0])\n", + "\n", + " self.bias = dtensor.DVariable(\n", + " dtensor.call_with_layout(tf.zeros, bias_layout, [output_size]))\n", + "\n", + " def __call__(self, x):\n", + " y = tf.matmul(x, self.weight) + self.bias\n", + " y = self.activation(y)\n", + "\n", + " return y" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tfVY_vAKbxM0" + }, + "source": [ + "### BatchNorm\n", + "\n", + "A batch normalization layer helps avoid collapsing modes while training. In this case, adding batch normalization layers helps model training avoid producing a model that only produces zeros.\n", + "\n", + "The constructor of the custom `BatchNorm` layer below does not take a `Layout` argument. This is because `BatchNorm` has no layer variables. This still works with DTensor because 'x', the only input to the layer, is already a DTensor that represents the global batch.\n", + "\n", + "Note: With DTensor, the input Tensor 'x' always represents the global batch. Therefore `tf.nn.batch_normalization` is applied to the global batch. This differs from training with `tf.distribute.MirroredStrategy`, where Tensor 'x' only represents the per-replica shard of the batch (the local batch)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "riBA9pfhlPFq" + }, + "outputs": [], + "source": [ + "class BatchNorm(tf.Module):\n", + "\n", + " def __init__(self):\n", + " super().__init__()\n", + "\n", + " def __call__(self, x, training=True):\n", + " if not training:\n", + " # This branch is not used in the Tutorial.\n", + " pass\n", + " mean, variance = tf.nn.moments(x, axes=[0])\n", + " return tf.nn.batch_normalization(x, mean, variance, 0.0, 1.0, 1e-5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "q4R4MPz5prh4" + }, + "source": [ + "A full featured batch normalization layer (such as `tf.keras.layers.BatchNormalization`) will need Layout arguments for its variables." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "unFcP99zprJj" + }, + "outputs": [], + "source": [ + "def make_keras_bn(bn_layout):\n", + " return tf.keras.layers.BatchNormalization(gamma_layout=bn_layout,\n", + " beta_layout=bn_layout,\n", + " moving_mean_layout=bn_layout,\n", + " moving_variance_layout=bn_layout,\n", + " fused=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "v8Dj7AJ_lPs0" + }, + "source": [ + "### Putting Layers Together\n", + "\n", + "Next, build a Multi-layer perceptron (MLP) network with the building blocks above. The diagram below shows the axis relationships between the input `x` and the weight matrices for the two `Dense` layers without any DTensor sharding or replication applied." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "udFGAO-NrZw6" + }, + "source": [ + "\"The\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8DCQ0aQ5rQtB" + }, + "source": [ + "The output of the first `Dense` layer is passed into the input of the second `Dense` layer (after the `BatchNorm`). Therefore, the preferred DTensor sharding for the output of first `Dense` layer ($\\mathbf{W_1}$) and the input of second `Dense` layer ($\\mathbf{W_2}$) is to shard $\\mathbf{W_1}$ and $\\mathbf{W_2}$ the same way along the common axis $\\hat{j}$,\n", + "\n", + "$$\n", + "\\mathsf{Layout}[{W_{1,ij}}; i, j] = \\left[\\hat{i}, \\hat{j}\\right] \\\\\n", + "\\mathsf{Layout}[{W_{2,jk}}; j, k] = \\left[\\hat{j}, \\hat{k} \\right]\n", + "$$\n", + "\n", + "Even though the layout deduction shows that the 2 layouts are not independent, for the sake of simplicity of the model interface, `MLP` will take 2 `Layout` arguments, one per Dense layer." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "junyS-965opl" + }, + "outputs": [], + "source": [ + "from typing import Tuple\n", + "\n", + "class MLP(tf.Module):\n", + "\n", + " def __init__(self, dense_layouts: Tuple[dtensor.Layout, dtensor.Layout]):\n", + " super().__init__()\n", + "\n", + " self.dense1 = Dense(\n", + " 1200, 48, (1, 2), dense_layouts[0], activation=tf.nn.relu)\n", + " self.bn = BatchNorm()\n", + " self.dense2 = Dense(48, 2, (3, 4), dense_layouts[1])\n", + "\n", + " def __call__(self, x):\n", + " y = x\n", + " y = self.dense1(y)\n", + " y = self.bn(y)\n", + " y = self.dense2(y)\n", + " return y\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9dgLmebHhr7h" + }, + "source": [ + "The trade-off between correctness in layout deduction constraints and simplicity of API is a common design point of APIs that uses DTensor.\n", + "It is also possible to capture the dependency between `Layout`'s with a different API. For example, the `MLPStricter` class creates the `Layout` objects in the constructor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wEZR7UlihsYX" + }, + "outputs": [], + "source": [ + "class MLPStricter(tf.Module):\n", + "\n", + " def __init__(self, mesh, input_mesh_dim, inner_mesh_dim1, output_mesh_dim):\n", + " super().__init__()\n", + "\n", + " self.dense1 = Dense(\n", + " 1200, 48, (1, 2), dtensor.Layout([input_mesh_dim, inner_mesh_dim1], mesh),\n", + " activation=tf.nn.relu)\n", + " self.bn = BatchNorm()\n", + " self.dense2 = Dense(48, 2, (3, 4), dtensor.Layout([inner_mesh_dim1, output_mesh_dim], mesh))\n", + "\n", + "\n", + " def __call__(self, x):\n", + " y = x\n", + " y = self.dense1(y)\n", + " y = self.bn(y)\n", + " y = self.dense2(y)\n", + " return y" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GcQi7D5mal2L" + }, + "source": [ + "To make sure the model runs, probe your model with fully replicated layouts and a fully replicated batch of `'x'` input." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zOPuYeQwallh" + }, + "outputs": [], + "source": [ + "WORLD = dtensor.create_mesh([(\"world\", 8)], devices=DEVICES)\n", + "\n", + "model = MLP([dtensor.Layout.replicated(WORLD, rank=2),\n", + " dtensor.Layout.replicated(WORLD, rank=2)])\n", + "\n", + "sample_x, sample_y = train_data_vec.take(1).get_single_element()\n", + "sample_x = dtensor.copy_to_mesh(sample_x, dtensor.Layout.replicated(WORLD, rank=2))\n", + "print(model(sample_x))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "akrjDstEpDv9" + }, + "source": [ + "## Moving data to the device\n", + "\n", + "Usually, `tf.data` iterators (and other data fetching methods) yield tensor objects backed by the local host device memory. This data must be transferred to the accelerator device memory that backs DTensor's component tensors.\n", + "\n", + "`dtensor.copy_to_mesh` is unsuitable for this situation because it replicates input tensors to all devices due to DTensor's global perspective. So in this tutorial, you will use a helper function `repack_local_tensor`, to facilitate the transfer of data. This helper function uses `dtensor.pack` to send (and only send) the shard of the global batch that is intended for a replica to the device backing the replica.\n", + "\n", + "This simplified function assumes single-client. Determining the correct way to split the local tensor and the mapping between the pieces of the split and the local devices can be laboring in a multi-client application.\n", + "\n", + "Additional DTensor API to simplify `tf.data` integration is planned, supporting both single-client and multi-client applications. Please stay tuned." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3t5WvQR4Hvo4" + }, + "outputs": [], + "source": [ + "def repack_local_tensor(x, layout):\n", + " \"\"\"Repacks a local Tensor-like to a DTensor with layout.\n", + "\n", + " This function assumes a single-client application.\n", + " \"\"\"\n", + " x = tf.convert_to_tensor(x)\n", + " sharded_dims = []\n", + "\n", + " # For every sharded dimension, use tf.split to split the along the dimension.\n", + " # The result is a nested list of split-tensors in queue[0].\n", + " queue = [x]\n", + " for axis, dim in enumerate(layout.sharding_specs):\n", + " if dim == dtensor.UNSHARDED:\n", + " continue\n", + " num_splits = layout.shape[axis]\n", + " queue = tf.nest.map_structure(lambda x: tf.split(x, num_splits, axis=axis), queue)\n", + " sharded_dims.append(dim)\n", + "\n", + " # Now we can build the list of component tensors by looking up the location in\n", + " # the nested list of split-tensors created in queue[0].\n", + " components = []\n", + " for locations in layout.mesh.local_device_locations():\n", + " t = queue[0]\n", + " for dim in sharded_dims:\n", + " split_index = locations[dim] # Only valid on single-client mesh.\n", + " t = t[split_index]\n", + " components.append(t)\n", + "\n", + " return dtensor.pack(components, layout)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2KKCDcjG7zj2" + }, + "source": [ + "## Data parallel training\n", + "\n", + "In this section, you will train your MLP model with data parallel training. The following sections will demonstrate model parallel training and spatial parallel training.\n", + "\n", + "Data parallel training is a commonly used scheme for distributed machine learning:\n", + "\n", + " - Model variables are replicated on N devices each.\n", + " - A global batch is split into N per-replica batches.\n", + " - Each per-replica batch is trained on the replica device.\n", + " - The gradient is reduced before weight up data is collectively performed on all replicas.\n", + "\n", + "Data parallel training provides nearly linear speedup regarding the number of devices." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UMsLUyTGq3oL" + }, + "source": [ + "### Creating a data parallel mesh\n", + "\n", + "A typical data parallelism training loop uses a DTensor `Mesh` that consists of a single `batch` dimension, where each device becomes a replica that receives a shard from the global batch.\n", + "\n", + "\"Data\n", + "\n", + "\n", + "The replicated model runs on the replica, therefore the model variables are fully replicated (unsharded)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "C0IyOlxmeu4I" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"batch\", 8)], devices=DEVICES)\n", + "\n", + "model = MLP([dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh),\n", + " dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh),])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OREKwBybo1gZ" + }, + "source": [ + "### Packing training data to DTensors\n", + "\n", + "The training data batch should be packed into DTensors sharded along the `'batch'`(first) axis, such that DTensor will evenly distribute the training data to the `'batch'` mesh dimension.\n", + "\n", + "**Note**: In DTensor, the `batch size` always refers to the global batch size. The batch size should be chosen such that it can be divided evenly by the size of the `batch` mesh dimension." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8xMYkTpGocY8" + }, + "outputs": [], + "source": [ + "def repack_batch(x, y, mesh):\n", + " x = repack_local_tensor(x, layout=dtensor.Layout(['batch', dtensor.UNSHARDED], mesh))\n", + " y = repack_local_tensor(y, layout=dtensor.Layout(['batch'], mesh))\n", + " return x, y\n", + "\n", + "sample_x, sample_y = train_data_vec.take(1).get_single_element()\n", + "sample_x, sample_y = repack_batch(sample_x, sample_y, mesh)\n", + "\n", + "print('x', sample_x[:, 0])\n", + "print('y', sample_y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uONSiqOIkFL1" + }, + "source": [ + "### Training step\n", + "\n", + "This example uses a Stochastic Gradient Descent optimizer with the Custom Training Loop (CTL). Consult the [Custom Training Loop guide](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) and [Walk through](https://www.tensorflow.org/tutorials/customization/custom_training_walkthrough) for more information on those topics.\n", + "\n", + "The `train_step` is encapsulated as a `tf.function` to indicate this body is to be traced as a TensorFlow Graph. The body of `train_step` consists of a forward inference pass, a backward gradient pass, and the variable update.\n", + "\n", + "Note that the body of `train_step` does not contain any special DTensor annotations. Instead, `train_step` only contains high-level TensorFlow operations that process the input `x` and `y` from the global view of the input batch and the model. All of the DTensor annotations (`Mesh`, `Layout`) are factored out of the train step." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BwUFzLGDtQT6" + }, + "outputs": [], + "source": [ + "# Refer to the CTL (custom training loop guide)\n", + "@tf.function\n", + "def train_step(model, x, y, learning_rate=tf.constant(1e-4)):\n", + " with tf.GradientTape() as tape:\n", + " logits = model(x)\n", + " # tf.reduce_sum sums the batch sharded per-example loss to a replicated\n", + " # global loss (scalar).\n", + " loss = tf.reduce_sum(\n", + " tf.nn.sparse_softmax_cross_entropy_with_logits(\n", + " logits=logits, labels=y))\n", + " parameters = model.trainable_variables\n", + " gradients = tape.gradient(loss, parameters)\n", + " for parameter, parameter_gradient in zip(parameters, gradients):\n", + " parameter.assign_sub(learning_rate * parameter_gradient)\n", + "\n", + " # Define some metrics\n", + " accuracy = 1.0 - tf.reduce_sum(tf.cast(tf.argmax(logits, axis=-1, output_type=tf.int64) != y, tf.float32)) / x.shape[0]\n", + " loss_per_sample = loss / len(x)\n", + " return {'loss': loss_per_sample, 'accuracy': accuracy}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0OYTu4j0evWT" + }, + "source": [ + "### Checkpointing\n", + "\n", + "You can checkpoint a DTensor model using `tf.train.Checkpoint` out of the box. Saving and restoring sharded DVariables will perform an efficient sharded save and restore. Currently, when using `tf.train.Checkpoint.save` and `tf.train.Checkpoint.restore`, all DVariables must be on the same host mesh, and DVariables and regular variables cannot be saved together. You can learn more about checkpointing in [this guide](../../guide/checkpoint.ipynb).\n", + "\n", + "When a DTensor checkpoint is restored, `Layout`s of variables can be different from when the checkpoint is saved. That is, saving DTensor models is layout- and mesh-agnostic, and only affects the efficiency of sharded saving. You can save a DTensor model with one mesh and layout and restore it on a different mesh and layout. This tutorial makes use of this feature to continue the training in the Model Parallel training and Spatial Parallel training sections.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rsInFFJg7x9t" + }, + "outputs": [], + "source": [ + "CHECKPOINT_DIR = tempfile.mkdtemp()\n", + "\n", + "def start_checkpoint_manager(model):\n", + " ckpt = tf.train.Checkpoint(root=model)\n", + " manager = tf.train.CheckpointManager(ckpt, CHECKPOINT_DIR, max_to_keep=3)\n", + "\n", + " if manager.latest_checkpoint:\n", + " print(\"Restoring a checkpoint\")\n", + " ckpt.restore(manager.latest_checkpoint).assert_consumed()\n", + " else:\n", + " print(\"New training\")\n", + " return manager\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9r77ky5Jgp1j" + }, + "source": [ + "### Training loop\n", + "\n", + "For the data parallel training scheme, train for epochs and report the progress. 3 epochs is insufficient for training the model -- an accuracy of 50% is as good as randomly guessing.\n", + "\n", + "Enable checkpointing so that you can pick up the training later. In the following section, you will load the checkpoint and train with a different parallel scheme." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UaLn-vGZgqbS" + }, + "outputs": [], + "source": [ + "num_epochs = 2\n", + "manager = start_checkpoint_manager(model)\n", + "\n", + "for epoch in range(num_epochs):\n", + " step = 0\n", + " pbar = tf.keras.utils.Progbar(target=int(train_data_vec.cardinality()), stateful_metrics=[])\n", + " metrics = {'epoch': epoch}\n", + " for x,y in train_data_vec:\n", + "\n", + " x, y = repack_batch(x, y, mesh)\n", + "\n", + " metrics.update(train_step(model, x, y, 1e-2))\n", + "\n", + " pbar.update(step, values=metrics.items(), finalize=False)\n", + " step += 1\n", + " manager.save()\n", + " pbar.update(step, values=metrics.items(), finalize=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YRFJEhum7EGD" + }, + "source": [ + "## Model Parallel Training\n", + "\n", + "If you switch to a 2 dimensional `Mesh`, and shard the model variables along the second mesh dimension, then the training becomes Model Parallel.\n", + "\n", + "In Model Parallel training, each model replica spans multiple devices (2 in this case):\n", + "\n", + "- There are 4 model replicas, and the training data batch is distributed to the 4 replicas.\n", + "- The 2 devices within a single model replica receive replicated training data.\n", + "\n", + "\n", + "\"Model\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5gZE9IT5Dzwl" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"batch\", 4), (\"model\", 2)], devices=DEVICES)\n", + "model = MLP([dtensor.Layout([dtensor.UNSHARDED, \"model\"], mesh), \n", + " dtensor.Layout([\"model\", dtensor.UNSHARDED], mesh)])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ihof3DkMFKnf" + }, + "source": [ + "As the training data is still sharded along the batch dimension, you can reuse the same `repack_batch` function as the Data Parallel training case. DTensor will automatically replicate the per-replica batch to all devices inside the replica along the `\"model\"` mesh dimension." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dZf56ynbE_p1" + }, + "outputs": [], + "source": [ + "def repack_batch(x, y, mesh):\n", + " x = repack_local_tensor(x, layout=dtensor.Layout(['batch', dtensor.UNSHARDED], mesh))\n", + " y = repack_local_tensor(y, layout=dtensor.Layout(['batch'], mesh))\n", + " return x, y" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UW3OXdhNFfpv" + }, + "source": [ + "Next run the training loop. The training loop reuses the same checkpoint manager as the Data Parallel training example, and the code looks identical.\n", + "\n", + "You can continue training the data parallel trained model under model parallel training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LLC0wgii7EgA" + }, + "outputs": [], + "source": [ + "num_epochs = 2\n", + "manager = start_checkpoint_manager(model)\n", + "\n", + "for epoch in range(num_epochs):\n", + " step = 0\n", + " pbar = tf.keras.utils.Progbar(target=int(train_data_vec.cardinality()))\n", + " metrics = {'epoch': epoch}\n", + " for x,y in train_data_vec:\n", + " x, y = repack_batch(x, y, mesh)\n", + " metrics.update(train_step(model, x, y, 1e-2))\n", + " pbar.update(step, values=metrics.items(), finalize=False)\n", + " step += 1\n", + " manager.save()\n", + " pbar.update(step, values=metrics.items(), finalize=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BZH-aMrVzi2L" + }, + "source": [ + "## Spatial Parallel Training" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u-bK6IZ9GCS9" + }, + "source": [ + "When training data of very high dimensionality (e.g. a very large image or a video), it may be desirable to shard along the feature dimension. This is called [Spatial Partitioning](https://cloud.google.com/blog/products/ai-machine-learning/train-ml-models-on-large-images-and-3d-volumes-with-spatial-partitioning-on-cloud-tpus), which was first introduced into TensorFlow for training models with large 3-d input samples.\n", + "\n", + "\"Spatial\n", + "\n", + "DTensor also supports this case. The only change you need to do is to create a Mesh that includes a `feature` dimension, and apply the corresponding `Layout`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jpc9mqURGpmK" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"batch\", 2), (\"feature\", 2), (\"model\", 2)], devices=DEVICES)\n", + "model = MLP([dtensor.Layout([\"feature\", \"model\"], mesh), \n", + " dtensor.Layout([\"model\", dtensor.UNSHARDED], mesh)])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "i07Wrv-jHBc1" + }, + "source": [ + "Shard the input data along the `feature` dimension when packing the input tensors to DTensors. You do this with a slightly different repack function, `repack_batch_for_spt`, where `spt` stands for Spatial Parallel Training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DWR8qF6BGtFL" + }, + "outputs": [], + "source": [ + "def repack_batch_for_spt(x, y, mesh):\n", + " # Shard data on feature dimension, too\n", + " x = repack_local_tensor(x, layout=dtensor.Layout([\"batch\", 'feature'], mesh))\n", + " y = repack_local_tensor(y, layout=dtensor.Layout([\"batch\"], mesh))\n", + " return x, y" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ygl9dqMUHTVN" + }, + "source": [ + "The Spatial parallel training can also continue from a checkpoint created with other parallell training schemes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "p3NnpHSKo-hx" + }, + "outputs": [], + "source": [ + "num_epochs = 2\n", + "\n", + "manager = start_checkpoint_manager(model)\n", + "for epoch in range(num_epochs):\n", + " step = 0\n", + " metrics = {'epoch': epoch}\n", + " pbar = tf.keras.utils.Progbar(target=int(train_data_vec.cardinality()))\n", + "\n", + " for x, y in train_data_vec:\n", + " x, y = repack_batch_for_spt(x, y, mesh)\n", + " metrics.update(train_step(model, x, y, 1e-2))\n", + "\n", + " pbar.update(step, values=metrics.items(), finalize=False)\n", + " step += 1\n", + " manager.save()\n", + " pbar.update(step, values=metrics.items(), finalize=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vp4L59CpJjYr" + }, + "source": [ + "## SavedModel and DTensor\n", + "\n", + "The integration of DTensor and SavedModel is still under development. \n", + "\n", + "As of TensorFlow `2.11`, `tf.saved_model` can save sharded and replicated DTensor models, and saving will do an efficient sharded save on different devices of the mesh. However, after a model is saved, all DTensor annotations are lost and the saved signatures can only be used with regular Tensors, not DTensors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "49HfIq_SJZoj" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"world\", 1)], devices=DEVICES[:1])\n", + "mlp = MLP([dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh), \n", + " dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh)])\n", + "\n", + "manager = start_checkpoint_manager(mlp)\n", + "\n", + "model_for_saving = tf.keras.Sequential([\n", + " text_vectorization,\n", + " mlp\n", + "])\n", + "\n", + "@tf.function(input_signature=[tf.TensorSpec([None], tf.string)])\n", + "def run(inputs):\n", + " return {'result': model_for_saving(inputs)}\n", + "\n", + "tf.saved_model.save(\n", + " model_for_saving, \"/tmp/saved_model\",\n", + " signatures=run)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h6Csim_VMGxQ" + }, + "source": [ + "As of TensorFlow 2.9.0, you can only call a loaded signature with a regular Tensor, or a fully replicated DTensor (which will be converted to a regular Tensor)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HG_ASSzR4IWW" + }, + "outputs": [], + "source": [ + "sample_batch = train_data.take(1).get_single_element()\n", + "sample_batch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qW8yKPrhKQ5b" + }, + "outputs": [], + "source": [ + "loaded = tf.saved_model.load(\"/tmp/saved_model\")\n", + "\n", + "run_sig = loaded.signatures[\"serving_default\"]\n", + "result = run_sig(sample_batch['text'])['result']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GahGbv0ZmkJb" + }, + "outputs": [], + "source": [ + "np.mean(tf.argmax(result, axis=-1) == sample_batch['label'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ks-Vs9qsH6jO" + }, + "source": [ + "## What's next?\n", + "\n", + "This tutorial demonstrated building and training an MLP sentiment analysis model with DTensor.\n", + "\n", + "Through `Mesh` and `Layout` primitives, DTensor can transform a TensorFlow `tf.function` to a distributed program suitable for a variety of training schemes.\n", + "\n", + "In a real-world machine learning application, evaluation and cross-validation should be applied to avoid producing an over-fitted model. The techniques introduced in this tutorial can also be applied to introduce parallelism to evaluation.\n", + "\n", + "Composing a model with `tf.Module` from scratch is a lot of work, and reusing existing building blocks such as layers and helper functions can drastically speed up model development.\n", + "As of TensorFlow 2.9, all Keras Layers under `tf.keras.layers` accepts DTensor layouts as their arguments, and can be used to build DTensor models. You can even directly reuse a Keras model with DTensor without modifying the model implementation. Refer to the [DTensor Keras Integration Tutorial](https://www.tensorflow.org/tutorials/distribute/dtensor_keras_tutorial) for information on using DTensor Keras. " + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "dtensor_ml_tutorial.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/tutorials/distribute/input.ipynb b/site/en/tutorials/distribute/input.ipynb index e1cdca6788e..f779c4f19a6 100644 --- a/site/en/tutorials/distribute/input.ipynb +++ b/site/en/tutorials/distribute/input.ipynb @@ -73,7 +73,7 @@ "This guide will show you the different ways in which you can create distributed dataset and iterators using `tf.distribute` APIs. Additionally, the following topics will be covered:\n", "- Usage, sharding and batching options when using `tf.distribute.Strategy.experimental_distribute_dataset` and `tf.distribute.Strategy.distribute_datasets_from_function`.\n", "- Different ways in which you can iterate over the distributed dataset.\n", - "- Differences between `tf.distribute.Strategy.experimental_distribute_dataset`/`tf.distribute.Strategy.distribute_datasets_from_function` APIs and `tf.data` APIs as well any limitations that users may come across in their usage.\n", + "- Differences between `tf.distribute.Strategy.experimental_distribute_dataset`/`tf.distribute.Strategy.distribute_datasets_from_function` APIs and `tf.data` APIs as well as any limitations that users may come across in their usage.\n", "\n", "This guide does not cover usage of distributed input with Keras APIs." ] @@ -84,7 +84,7 @@ "id": "MM6W__qraV55" }, "source": [ - "## Distributed Datasets" + "## Distributed datasets" ] }, { @@ -93,8 +93,8 @@ "id": "lNy9GxjSlMKQ" }, "source": [ - "To use `tf.distribute` APIs to scale, it is recommended that users use `tf.data.Dataset` to represent their input. `tf.distribute` has been made to work efficiently with `tf.data.Dataset` (for example, automatic prefetch of data onto each accelerator device) with performance optimizations being regularly incorporated into the implementation. If you have a use case for using something other than `tf.data.Dataset`, please refer a later [section](\"tensorinputs\") in this guide.\n", - "In a non distributed training loop, users first create a `tf.data.Dataset` instance and then iterate over the elements. For example:\n" + "To use `tf.distribute` APIs to scale, use `tf.data.Dataset` to represent their input. `tf.distribute` works efficiently with `tf.data.Dataset`—for example, via automatic prefetching onto each accelerator device and regular performance updates. If you have a use case for using something other than `tf.data.Dataset`, please refer to the [Tensor inputs section](#tensorinputs) in this guide.\n", + "In a non-distributed training loop, first create a `tf.data.Dataset` instance and then iterate over the elements. For example:\n" ] }, { @@ -114,6 +114,34 @@ "print(tf.__version__)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6cnilUtmKwpa" + }, + "outputs": [], + "source": [ + "# Simulate multiple CPUs with virtual devices\n", + "N_VIRTUAL_DEVICES = 2\n", + "physical_devices = tf.config.list_physical_devices(\"CPU\")\n", + "tf.config.set_logical_device_configuration(\n", + " physical_devices[0], [tf.config.LogicalDeviceConfiguration() for _ in range(N_VIRTUAL_DEVICES)])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zd4l1ySeLRk1" + }, + "outputs": [], + "source": [ + "print(\"Available devices:\")\n", + "for i, device in enumerate(tf.config.list_logical_devices()):\n", + " print(\"%d) %s\" % (i, device))" + ] + }, { "cell_type": "code", "execution_count": null, @@ -214,14 +242,14 @@ " * Replica 1:[0, 1]\n", " * Replica 2:[2, 3]\n", " * Batch 2:\n", - " * Replica 2: [4]\n", + " * Replica 1: [4]\n", " * Replica 2: [5]\n", "\n", "\n", "\n", "* `tf.data.Dataset.range(4).batch(4)`\n", " * Without distribution:\n", - " * Batch 1: [[0], [1], [2], [3]]\n", + " * Batch 1: [0, 1, 2, 3]\n", " * With distribution over 5 replicas:\n", " * Batch 1:\n", " * Replica 1: [0]\n", @@ -246,7 +274,7 @@ "\n", "Note: The above examples only illustrate how a global batch is split on different replicas. It is not advisable to depend on the actual values that might end up on each replica as it can change depending on the implementation.\n", "\n", - "Rebatching the dataset has a space complexity that increases linearly with the number of replicas. This means that for the multi worker training use case the input pipeline can run into OOM errors. " + "Rebatching the dataset has a space complexity that increases linearly with the number of replicas. This means that for the multi-worker training use case the input pipeline can run into OOM errors. " ] }, { @@ -257,7 +285,7 @@ "source": [ "##### Sharding\n", "\n", - "`tf.distribute` also autoshards the input dataset in multi worker training with `MultiWorkerMirroredStrategy` and `TPUStrategy`. Each dataset is created on the CPU device of the worker. Autosharding a dataset over a set of workers means that each worker is assigned a subset of the entire dataset (if the right `tf.data.experimental.AutoShardPolicy` is set). This is to ensure that at each step, a global batch size of non overlapping dataset elements will be processed by each worker. Autosharding has a couple of different options that can be specified using `tf.data.experimental.DistributeOptions`. Note that there is no autosharding in multi worker training with `ParameterServerStrategy`, and more information on dataset creation with this strategy can be found in the [Parameter Server Strategy tutorial](parameter_server_training.ipynb). " + "`tf.distribute` also autoshards the input dataset in multi-worker training with `MultiWorkerMirroredStrategy` and `TPUStrategy`. Each dataset is created on the CPU device of the worker. Autosharding a dataset over a set of workers means that each worker is assigned a subset of the entire dataset (if the right `tf.data.experimental.AutoShardPolicy` is set). This is to ensure that at each step, a global batch size of non-overlapping dataset elements will be processed by each worker. Autosharding has a couple of different options that can be specified using `tf.data.experimental.DistributeOptions`. Note that there is no autosharding in multi-worker training with `ParameterServerStrategy`, and more information on dataset creation with this strategy can be found in the [ParameterServerStrategy tutorial](parameter_server_training.ipynb). " ] }, { @@ -268,7 +296,7 @@ }, "outputs": [], "source": [ - "dataset = tf.data.Dataset.from_tensors(([1.],[1.])).repeat(64).batch(16)\n", + "dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(64).batch(16)\n", "options = tf.data.Options()\n", "options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA\n", "dataset = dataset.with_options(options)" @@ -358,7 +386,7 @@ "source": [ "#### Usage\n", "\n", - "This API takes an input function and returns a `tf.distribute.DistributedDataset` instance. The input function that users pass in has a `tf.distribute.InputContext` argument and should return a `tf.data.Dataset` instance. With this API, `tf.distribute` does not make any further changes to the user’s `tf.data.Dataset` instance returned from the input function. It is the responsibility of the user to batch and shard the dataset. `tf.distribute` calls the input function on the CPU device of each of the workers. Apart from allowing users to specify their own batching and sharding logic, this API also demonstrates better scalability and performance compared to `tf.distribute.Strategy.experimental_distribute_dataset` when used for multi worker training." + "This API takes an input function and returns a `tf.distribute.DistributedDataset` instance. The input function that users pass in has a `tf.distribute.InputContext` argument and should return a `tf.data.Dataset` instance. With this API, `tf.distribute` does not make any further changes to the user’s `tf.data.Dataset` instance returned from the input function. It is the responsibility of the user to batch and shard the dataset. `tf.distribute` calls the input function on the CPU device of each of the workers. Apart from allowing users to specify their own batching and sharding logic, this API also demonstrates better scalability and performance compared to `tf.distribute.Strategy.experimental_distribute_dataset` when used for multi-worker training." ] }, { @@ -373,11 +401,11 @@ "\n", "def dataset_fn(input_context):\n", " batch_size = input_context.get_per_replica_batch_size(global_batch_size)\n", - " dataset = tf.data.Dataset.from_tensors(([1.],[1.])).repeat(64).batch(16)\n", + " dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(64).batch(16)\n", " dataset = dataset.shard(\n", - " input_context.num_input_pipelines, input_context.input_pipeline_id)\n", + " input_context.num_input_pipelines, input_context.input_pipeline_id)\n", " dataset = dataset.batch(batch_size)\n", - " dataset = dataset.prefetch(2) # This prefetches 2 batches per device.\n", + " dataset = dataset.prefetch(2) # This prefetches 2 batches per device.\n", " return dataset\n", "\n", "dist_dataset = mirrored_strategy.distribute_datasets_from_function(dataset_fn)" @@ -411,7 +439,7 @@ "source": [ "##### Sharding\n", "\n", - "The `tf.distribute.InputContext` object that is implicitly passed as an argument to the user’s input function is created by `tf.distribute` under the hood. It has information about the number of workers, current worker id etc. This input function can handle sharding as per policies set by the user using these properties that are part of the `tf.distribute.InputContext` object.\n" + "The `tf.distribute.InputContext` object that is implicitly passed as an argument to the user’s input function is created by `tf.distribute` under the hood. It has information about the number of workers, current worker ID etc. This input function can handle sharding as per policies set by the user using these properties that are part of the `tf.distribute.InputContext` object.\n" ] }, { @@ -422,7 +450,7 @@ "source": [ "##### Prefetching\n", "\n", - "`tf.distribute` does not add a prefetch transformation at the end of the `tf.data.Dataset` returned by the user provided input function." + "`tf.distribute` does not add a prefetch transformation at the end of the `tf.data.Dataset` returned by the user-provided input function, so you explicitly call `Dataset.prefetch` in the example above." ] }, { @@ -442,7 +470,7 @@ "id": "dL3XbI1gzEjO" }, "source": [ - "## Distributed Iterators" + "## Distributed iterators" ] }, { @@ -452,7 +480,7 @@ }, "source": [ "Similar to non-distributed `tf.data.Dataset` instances, you will need to create an iterator on the `tf.distribute.DistributedDataset` instances to iterate over it and access the elements in the `tf.distribute.DistributedDataset`.\n", - "The following are the ways in which you can create an `tf.distribute.DistributedIterator` and use it to train your model:\n" + "The following are the ways in which you can create a `tf.distribute.DistributedIterator` and use it to train your model:\n" ] }, { @@ -486,7 +514,7 @@ "global_batch_size = 16\n", "mirrored_strategy = tf.distribute.MirroredStrategy()\n", "\n", - "dataset = tf.data.Dataset.from_tensors(([1.],[1.])).repeat(100).batch(global_batch_size)\n", + "dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(100).batch(global_batch_size)\n", "dist_dataset = mirrored_strategy.experimental_distribute_dataset(dataset)\n", "\n", "@tf.function\n", @@ -536,14 +564,16 @@ "id": "UpJXIlxjqPYg" }, "source": [ - "With `next()` or `tf.distribute.DistributedIterator.get_next()`, if the `tf.distribute.DistributedIterator` has reached its end, an OutOfRange error will be thrown. The client can catch the error on python side and continue doing other work such as checkpointing and evaluation. However, this will not work if you are using a host training loop (i.e., run multiple steps per `tf.function`), which looks like:\n", + "With `next` or `tf.distribute.DistributedIterator.get_next`, if the `tf.distribute.DistributedIterator` has reached its end, an OutOfRange error will be thrown. The client can catch the error on python side and continue doing other work such as checkpointing and evaluation. However, this will not work if you are using a host training loop (i.e., run multiple steps per `tf.function`), which looks like:\n", + "\n", "```\n", "@tf.function\n", "def train_fn(iterator):\n", " for _ in tf.range(steps_per_loop):\n", " strategy.run(step_fn, args=(next(iterator),))\n", "```\n", - " `train_fn` contains multiple steps by wrapping the step body inside a `tf.range`. In this case, different iterations in the loop with no dependency could start in parallel, so an OutOfRange error can be triggered in later iterations before the computation of previous iterations finishes. Once an OutOfRange error is thrown, all the ops in the function will be terminated right away. If this is some case that you would like to avoid, an alternative that does not throw an OutOfRange error is `tf.distribute.DistributedIterator.get_next_as_optional()`. `get_next_as_optional` returns a `tf.experimental.Optional` which contains the next element or no value if the `tf.distribute.DistributedIterator` has reached to an end." + "\n", + "This example `train_fn` contains multiple steps by wrapping the step body inside a `tf.range`. In this case, different iterations in the loop with no dependency could start in parallel, so an OutOfRange error can be triggered in later iterations before the computation of previous iterations finishes. Once an OutOfRange error is thrown, all the ops in the function will be terminated right away. If this is some case that you would like to avoid, an alternative that does not throw an OutOfRange error is `tf.distribute.DistributedIterator.get_next_as_optional`. `get_next_as_optional` returns a `tf.experimental.Optional` which contains the next element or no value if the `tf.distribute.DistributedIterator` has reached an end." ] }, { @@ -554,10 +584,10 @@ }, "outputs": [], "source": [ - "# You can break the loop with get_next_as_optional by checking if the Optional contains value\n", + "# You can break the loop with `get_next_as_optional` by checking if the `Optional` contains a value\n", "global_batch_size = 4\n", "steps_per_loop = 5\n", - "strategy = tf.distribute.MirroredStrategy(devices=[\"GPU:0\", \"CPU:0\"])\n", + "strategy = tf.distribute.MirroredStrategy()\n", "\n", "dataset = tf.data.Dataset.range(9).batch(global_batch_size)\n", "distributed_iterator = iter(strategy.experimental_distribute_dataset(dataset))\n", @@ -568,7 +598,7 @@ " optional_data = distributed_iterator.get_next_as_optional()\n", " if not optional_data.has_value():\n", " break\n", - " per_replica_results = strategy.run(lambda x:x, args=(optional_data.get_value(),))\n", + " per_replica_results = strategy.run(lambda x: x, args=(optional_data.get_value(),))\n", " tf.print(strategy.experimental_local_results(per_replica_results))\n", "train_fn(distributed_iterator)" ] @@ -579,7 +609,7 @@ "id": "LaclbKnqzLjf" }, "source": [ - "## Using `element_spec` property" + "## Using the `element_spec` property" ] }, { @@ -588,7 +618,7 @@ "id": "Z1YvXqOpwy08" }, "source": [ - "If you pass the elements of a distributed dataset to a `tf.function` and want a `tf.TypeSpec` guarantee, you can specify the `input_signature` argument of the `tf.function`. The output of a distributed dataset is `tf.distribute.DistributedValues` which can represent the input to a single device or multiple devices. To get the `tf.TypeSpec` corresponding to this distributed value you can use the `element_spec` property of the distributed dataset or distributed iterator object." + "If you pass the elements of a distributed dataset to a `tf.function` and want a `tf.TypeSpec` guarantee, you can specify the `input_signature` argument of the `tf.function`. The output of a distributed dataset is `tf.distribute.DistributedValues` which can represent the input to a single device or multiple devices. To get the `tf.TypeSpec` corresponding to this distributed value, you can use `tf.distribute.DistributedDataset.element_spec` or `tf.distribute.DistributedIterator.element_spec`." ] }, { @@ -604,7 +634,7 @@ "steps_per_epoch = 5\n", "mirrored_strategy = tf.distribute.MirroredStrategy()\n", "\n", - "dataset = tf.data.Dataset.from_tensors(([1.],[1.])).repeat(100).batch(global_batch_size)\n", + "dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(100).batch(global_batch_size)\n", "dist_dataset = mirrored_strategy.experimental_distribute_dataset(dataset)\n", "\n", "@tf.function(input_signature=[dist_dataset.element_spec])\n", @@ -627,7 +657,246 @@ "id": "-OAa6svUzuWm" }, "source": [ - "## Partial Batches" + "## Data preprocessing" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pSMrs3kJQexW" + }, + "source": [ + "So far, you have learned how to distribute a `tf.data.Dataset`. Yet before the data is ready for the model, it needs to be preprocessed, for example by cleansing, transforming, and augmenting it. Two sets of those handy tools are:\n", + "\n", + "* [Keras preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers): a set of Keras layers that allow developers to build Keras-native input processing pipelines. Some Keras preprocessing layers contain non-trainable states, which can be set on initialization or `adapt`ed (refer to the `adapt` section of the [Keras preprocessing layers guide](https://www.tensorflow.org/guide/keras/preprocessing_layers)). When distributing stateful preprocessing layers, the states should be replicated to all workers. To use these layers, you can either make them part of the model or apply them to the datasets.\n", + "\n", + "* [TensorFlow Transform (tf.Transform)](https://www.tensorflow.org/tfx/transform/get_started): a library for TensorFlow that allows you to define both instance-level and full-pass data transformation through data preprocessing pipelines. Tensorflow Transform has two phases. The first is the Analyze phase, where the raw training data is analyzed in a full-pass process to compute the statistics needed for the transformations, and the transformation logic is generated as instance-level operations. The second is the Transform phase, where the raw training data is transformed in an instance-level process.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Pd4aUCFdVlZ1" + }, + "source": [ + "### Keras preprocessing layers vs. Tensorflow Transform \n", + "\n", + "Both Tensorflow Transform and Keras preprocessing layers provide a way to split out preprocessing during training and bundle preprocessing with a model during inference, reducing train/serve skew.\n", + "\n", + "Tensorflow Transform, deeply integrated with [TFX](https://www.tensorflow.org/tfx), provides a scalable map-reduce solution to analyzing and transforming datasets of any size in a job separate from the training pipeline. If you need to run an analysis on a dataset that cannot fit on a single machine, Tensorflow Transform should be your first choice.\n", + "\n", + "Keras preprocessing layers are more geared towards preprocessing applied during training, after reading data from disk. They fit seamlessly with model development in the Keras library. They support analysis of a smaller dataset via [`adapt`](https://www.tensorflow.org/guide/keras/preprocessing_layers#the_adapt_method) and supports use cases like image data augmentation, where each pass over the input dataset will yield different examples for training.\n", + "\n", + "The two libraries can also be mixed, where Tensorflow Transform is used for analysis and static transformations of input data, and Keras preprocessing layers are used for train-time transformations (e.g., one-hot encoding or data augmentation).\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MReKhhZpHUpj" + }, + "source": [ + "### Best Practice with tf.distribute\n", + "\n", + "Working with both tools involves initializing the transformation logic to apply to data, which might create Tensorflow resources. These resources or states should be replicated to all workers to save inter-workers or worker-coordinator communication. To do so, you are recommended to create Keras preprocessing layers, `tft.TFTransformOutput.transform_features_layer`, or `tft.TransformFeaturesLayer` under `tf.distribute.Strategy.scope`, just like you would for any other Keras layers.\n", + "\n", + "The following examples demonstrate usage of the `tf.distribute.Strategy` API with the high-level Keras `Model.fit` API and with a custom training loop separately." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rwEGMWuoX7kJ" + }, + "source": [ + "#### Extra notes for Keras preprocessing layers users:\n", + "\n", + "**Preprocessing layers and large vocabularies**\n", + "\n", + "When dealing with large vocabularies (over one gigabyte) in a multi-worker setting (for example, `tf.distribute.MultiWorkerMirroredStrategy`, `tf.distribute.experimental.ParameterServerStrategy`, `tf.distribute.TPUStrategy`), it is recommended to save the vocabulary to a static file accessible from all workers (for example, with Cloud Storage). This will reduce the time spent replicating the vocabulary to all workers during training.\n", + "\n", + "**Preprocessing in the `tf.data` pipeline versus in the model**\n", + "\n", + "While Keras preprocessing layers can be applied either as part of the model or directly to a `tf.data.Dataset`, each of the options come with their edge:\n", + "\n", + "* Applying the preprocessing layers within the model makes your model portable, and it helps reduce the training/serving skew. (For more details, refer to the _Benefits of doing preprocessing inside the model at inference time_ section in the [Working with preprocessing layers guide](https://www.tensorflow.org/guide/keras/preprocessing_layers#benefits_of_doing_preprocessing_inside_the_model_at_inference_time))\n", + "* Applying within the `tf.data` pipeline allows prefetching or offloading to the CPU, which generally gives better performance when using accelerators.\n", + "\n", + "When running on one or more TPUs, users should almost always place Keras preprocessing layers in the `tf.data` pipeline, as not all layers support TPUs, and string ops do not execute on TPUs. (The two exceptions are `tf.keras.layers.Normalization` and `tf.keras.layers.Rescaling`, which run fine on TPUs and are commonly used as the first layer in an image model.)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hNCYZ9L-BD2R" + }, + "source": [ + "### Preprocessing with `Model.fit`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NhRB2Xe8B6bX" + }, + "source": [ + "When using Keras `Model.fit`, you do not need to distribute data with `tf.distribute.Strategy.experimental_distribute_dataset` nor `tf.distribute.Strategy.distribute_datasets_from_function` themselves. Check out the [Working with preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) guide and the [Distributed training with Keras](https://www.tensorflow.org/tutorials/distribute/keras) guide for details. A shortened example may look as below:\n", + "\n", + "```\n", + "strategy = tf.distribute.MirroredStrategy()\n", + "with strategy.scope():\n", + " # Create the layer(s) under scope.\n", + " integer_preprocessing_layer = tf.keras.layers.IntegerLookup(vocabulary=FILE_PATH)\n", + " model = ...\n", + " model.compile(...)\n", + "dataset = dataset.map(lambda x, y: (integer_preprocessing_layer(x), y))\n", + "model.fit(dataset)\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3zL2vzJ-G0yg" + }, + "source": [ + "Users of `tf.distribute.experimental.ParameterServerStrategy` with the `Model.fit` API need to use a `tf.keras.utils.experimental.DatasetCreator` as the input. (See the [Parameter Server Training](https://www.tensorflow.org/tutorials/distribute/parameter_server_training#parameter_server_training_with_modelfit_api) guide for more)\n", + "\n", + "```\n", + "strategy = tf.distribute.experimental.ParameterServerStrategy(\n", + " cluster_resolver,\n", + " variable_partitioner=variable_partitioner)\n", + "\n", + "with strategy.scope():\n", + " preprocessing_layer = tf.keras.layers.StringLookup(vocabulary=FILE_PATH)\n", + " model = ...\n", + " model.compile(...)\n", + "\n", + "def dataset_fn(input_context):\n", + " ...\n", + " dataset = dataset.map(preprocessing_layer)\n", + " ...\n", + " return dataset\n", + "\n", + "dataset_creator = tf.keras.utils.experimental.DatasetCreator(dataset_fn)\n", + "model.fit(dataset_creator, epochs=5, steps_per_epoch=20, callbacks=callbacks)\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "imZLQUOYBJyW" + }, + "source": [ + "### Preprocessing with a custom training loop" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r2PX1QH_OwU3" + }, + "source": [ + "When writing a [custom training loop](https://www.tensorflow.org/tutorials/distribute/custom_training), you will distribute your data with either the `tf.distribute.Strategy.experimental_distribute_dataset` API or the `tf.distribute.Strategy.distribute_datasets_from_function` API. If you distribute your dataset through `tf.distribute.Strategy.experimental_distribute_dataset`, applying these preprocessing APIs in your data pipeline will lead the resources automatically co-located with the data pipeline to avoid remote resource access. Thus the examples here will all use `tf.distribute.Strategy.distribute_datasets_from_function`, in which case it is crucial to place initialization of these APIs under `strategy.scope()` for efficiency:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wJS1UmcWQeab" + }, + "outputs": [], + "source": [ + "strategy = tf.distribute.MirroredStrategy()\n", + "vocab = [\"a\", \"b\", \"c\", \"d\", \"f\"]\n", + "\n", + "with strategy.scope():\n", + " # Create the layer(s) under scope.\n", + " layer = tf.keras.layers.StringLookup(vocabulary=vocab)\n", + "\n", + "def dataset_fn(input_context):\n", + " # a tf.data.Dataset\n", + " dataset = tf.data.Dataset.from_tensor_slices([\"a\", \"c\", \"e\"]).repeat()\n", + "\n", + " # Custom your batching, sharding, prefetching, etc.\n", + " global_batch_size = 4\n", + " batch_size = input_context.get_per_replica_batch_size(global_batch_size)\n", + " dataset = dataset.batch(batch_size)\n", + " dataset = dataset.shard(\n", + " input_context.num_input_pipelines,\n", + " input_context.input_pipeline_id)\n", + "\n", + " # Apply the preprocessing layer(s) to the tf.data.Dataset\n", + " def preprocess_with_kpl(input):\n", + " return layer(input)\n", + "\n", + " processed_ds = dataset.map(preprocess_with_kpl)\n", + " return processed_ds\n", + "\n", + "distributed_dataset = strategy.distribute_datasets_from_function(dataset_fn)\n", + "\n", + "# Print out a few example batches.\n", + "distributed_dataset_iterator = iter(distributed_dataset)\n", + "for _ in range(3):\n", + " print(next(distributed_dataset_iterator))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PVl1cblWQy8b" + }, + "source": [ + "Note that if you are training with `tf.distribute.experimental.ParameterServerStrategy`, you'll also call `tf.distribute.experimental.coordinator.ClusterCoordinator.create_per_worker_dataset`\n", + "\n", + "```\n", + "@tf.function\n", + "def per_worker_dataset_fn():\n", + " return strategy.distribute_datasets_from_function(dataset_fn)\n", + "\n", + "per_worker_dataset = coordinator.create_per_worker_dataset(per_worker_dataset_fn)\n", + "per_worker_iterator = iter(per_worker_dataset)\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ol7SmPID1dAt" + }, + "source": [ + "For Tensorflow Transform, as mentioned above, the Analyze stage is done separately from training and thus omitted here. See the [tutorial](https://www.tensorflow.org/tfx/tutorials/transform/census) for a detailed how-to. Usually, this stage includes creating a `tf.Transform` preprocessing function and transforming the data in an [Apache Beam](https://beam.apache.org/) pipeline with this preprocessing function. At the end of the Analyze stage, the output can be exported as a TensorFlow graph which you can use for both training and serving. Our example covers only the training pipeline part:\n", + "\n", + "```\n", + "with strategy.scope():\n", + " # working_dir contains the tf.Transform output.\n", + " tf_transform_output = tft.TFTransformOutput(working_dir)\n", + " # Loading from working_dir to create a Keras layer for applying the tf.Transform output to data\n", + " tft_layer = tf_transform_output.transform_features_layer()\n", + " ...\n", + "\n", + "def dataset_fn(input_context):\n", + " ...\n", + " dataset.map(tft_layer, num_parallel_calls=tf.data.AUTOTUNE)\n", + " ...\n", + " return dataset\n", + "\n", + "distributed_dataset = strategy.distribute_datasets_from_function(dataset_fn)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3_IQxRXxQWof" + }, + "source": [ + "## Partial batches" ] }, { @@ -636,7 +905,7 @@ "id": "hW2_gVkiztUG" }, "source": [ - "Partial batches are encountered when `tf.data.Dataset` instances that users create may contain batch sizes that are not evenly divisible by the number of replicas or when the cardinality of the dataset instance is not divisible by the batch size. This means that when the dataset is distributed over multiple replicas, the `next` call on some iterators will result in an OutOfRangeError. To handle this use case, `tf.distribute` returns dummy batches of batch size 0 on replicas that do not have any more data to process.\n" + "Partial batches are encountered when: 1) `tf.data.Dataset` instances that users create may contain batch sizes that are not evenly divisible by the number of replicas; or 2) when the cardinality of the dataset instance is not divisible by the batch size. This means that when the dataset is distributed over multiple replicas, the `next` call on some iterators will result in an `tf.errors.OutOfRangeError`. To handle this use case, `tf.distribute` returns dummy batches of batch size `0` on replicas that do not have any more data to process.\n" ] }, { @@ -645,9 +914,9 @@ "id": "rqutdpqtPcCH" }, "source": [ - "For the single worker case, if data is not returned by the `next` call on the iterator, dummy batches of 0 batch size are created and used along with the real data in the dataset. In the case of partial batches, the last global batch of data will contain real data alongside dummy batches of data. The stopping condition for processing data now checks if any of the replicas have data. If there is no data on any of the replicas, an OutOfRange error is thrown.\n", + "For the single-worker case, if the data is not returned by the `next` call on the iterator, dummy batches of 0 batch size are created and used along with the real data in the dataset. In the case of partial batches, the last global batch of data will contain real data alongside dummy batches of data. The stopping condition for processing data now checks if any of the replicas have data. If there is no data on any of the replicas, you will get a `tf.errors.OutOfRangeError`.\n", "\n", - "For the multi worker case, the boolean value representing presence of data on each of the workers is aggregated using cross replica communication and this is used to identify if all the workers have finished processing the distributed dataset. Since this involves cross worker communication there is some performance penalty involved.\n" + "For the multi-worker case, the boolean value representing presence of data on each of the workers is aggregated using cross replica communication and this is used to identify if all the workers have finished processing the distributed dataset. Since this involves cross worker communication there is some performance penalty involved.\n" ] }, { @@ -665,11 +934,11 @@ "id": "Nx4jyN_Az-Dy" }, "source": [ - "* When using `tf.distribute.Strategy.experimental_distribute_dataset` APIs with a multiple worker setup, users pass a `tf.data.Dataset` that reads from files. If the `tf.data.experimental.AutoShardPolicy` is set to `AUTO` or `FILE`, the actual per step batch size may be smaller than the user defined global batch size. This can happen when the remaining elements in the file are less than the global batch size. Users can either exhaust the dataset without depending on the number of steps to run or set `tf.data.experimental.AutoShardPolicy` to `DATA` to work around it.\n", + "* When using `tf.distribute.Strategy.experimental_distribute_dataset` APIs with a multi-worker setup, you pass a `tf.data.Dataset` that reads from files. If the `tf.data.experimental.AutoShardPolicy` is set to `AUTO` or `FILE`, the actual per-step batch size may be smaller than the one you defined for the global batch size. This can happen when the remaining elements in the file are less than the global batch size. You can either exhaust the dataset without depending on the number of steps to run, or set `tf.data.experimental.AutoShardPolicy` to `DATA` to work around it.\n", "\n", "* Stateful dataset transformations are currently not supported with `tf.distribute` and any stateful ops that the dataset may have are currently ignored. For example, if your dataset has a `map_fn` that uses `tf.random.uniform` to rotate an image, then you have a dataset graph that depends on state (i.e the random seed) on the local machine where the python process is being executed.\n", "\n", - "* Experimental `tf.data.experimental.OptimizationOptions` that are disabled by default can in certain contexts -- such as when used together with `tf.distribute` -- cause a performance degradation. You should only enable them after you validate that they benefit the performance of your workload in a distribute setting.\n", + "* Experimental `tf.data.experimental.OptimizationOptions` that are disabled by default can in certain contexts—such as when used together with `tf.distribute`—cause a performance degradation. You should only enable them after you validate that they benefit the performance of your workload in a distribute setting.\n", "\n", "* Please refer to [this guide](https://www.tensorflow.org/guide/data_performance) for how to optimize your input pipeline with `tf.data` in general. A few additional tips:\n", " * If you have multiple workers and are using `tf.data.Dataset.list_files` to create a dataset from all files matching one or more glob patterns, remember to set the `seed` argument or set `shuffle=False` so that each worker shard the file consistently.\n", @@ -695,7 +964,7 @@ "source": [ "* The order in which the data is processed by the workers when using `tf.distribute.experimental_distribute_dataset` or `tf.distribute.distribute_datasets_from_function` is not guaranteed. This is typically required if you are using `tf.distribute` to scale prediction. You can however insert an index for each element in the batch and order outputs accordingly. The following snippet is an example of how to order outputs.\n", "\n", - "Note: `tf.distribute.MirroredStrategy()` is used here for the sake of convenience. We only need to reorder inputs when we are using multiple workers and `tf.distribute.MirroredStrategy` is used to distribute training on a single worker." + "Note: `tf.distribute.MirroredStrategy` is used here for the sake of convenience. You only need to reorder inputs when you are using multiple workers, but `tf.distribute.MirroredStrategy` is used to distribute training on a single worker." ] }, { @@ -740,7 +1009,7 @@ }, "source": [ "\n", - "## How do I distribute my data if I am not using a canonical tf.data.Dataset instance?" + "## Tensor inputs instead of tf.data" ] }, { @@ -756,8 +1025,8 @@ "### Use experimental_distribute_values_from_function for arbitrary tensor inputs\n", "`strategy.run` accepts `tf.distribute.DistributedValues` which is the output of\n", "`next(iterator)`. To pass the tensor values, use\n", - "`experimental_distribute_values_from_function` to construct\n", - "`tf.distribute.DistributedValues` from raw tensors." + "`tf.distribute.Strategy.experimental_distribute_values_from_function` to construct\n", + "`tf.distribute.DistributedValues` from raw tensors. The user will have to specify their own batching and sharding logic in the input function with this option, which can be done using the `tf.distribute.experimental.ValueContext` input object." ] }, { @@ -769,14 +1038,13 @@ "outputs": [], "source": [ "mirrored_strategy = tf.distribute.MirroredStrategy()\n", - "worker_devices = mirrored_strategy.extended.worker_devices\n", "\n", "def value_fn(ctx):\n", - " return tf.constant(1.0)\n", + " return tf.constant(ctx.replica_id_in_sync_group)\n", "\n", "distributed_values = mirrored_strategy.experimental_distribute_values_from_function(value_fn)\n", "for _ in range(4):\n", - " result = mirrored_strategy.run(lambda x:x, args=(distributed_values,))\n", + " result = mirrored_strategy.run(lambda x: x, args=(distributed_values,))\n", " print(result)" ] }, @@ -819,7 +1087,8 @@ "dist_dataset = mirrored_strategy.experimental_distribute_dataset(dataset)\n", "iterator = iter(dist_dataset)\n", "for _ in range(4):\n", - " mirrored_strategy.run(lambda x:x, args=(next(iterator),))" + " result = mirrored_strategy.run(lambda x: x, args=(next(iterator),))\n", + " print(result)" ] } ], @@ -827,8 +1096,7 @@ "colab": { "collapsed_sections": [], "name": "input.ipynb", - "provenance": [], - "toc_visible": true + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", diff --git a/site/en/tutorials/distribute/keras.ipynb b/site/en/tutorials/distribute/keras.ipynb index c75e5f88af5..b96656d4436 100644 --- a/site/en/tutorials/distribute/keras.ipynb +++ b/site/en/tutorials/distribute/keras.ipynb @@ -76,7 +76,7 @@ "\n", "You will use the `tf.keras` APIs to build the model and `Model.fit` for training it. (To learn about distributed training with a custom training loop and the `MirroredStrategy`, check out [this tutorial](custom_training.ipynb).)\n", "\n", - "`MirroredStrategy` trains your model on multiple GPUs on a single machine. For _synchronous training on many GPUs on multiple workers_, use the `tf.distribute.MultiWorkerMirroredStrategy` [with the Keras Model.fit](multi_worker_with_keras.ipynb) or [a custom training loop](multi_worker_with_ctl.ipynb). For other options, refer to the [Distributed training guide](../../guide/distributed_training.ipynb).\n", + "`MirroredStrategy` trains your model on multiple GPUs on a single machine. For _synchronous training on many GPUs on multiple workers_, use the `tf.distribute.MultiWorkerMirroredStrategy` with the [Keras Model.fit](multi_worker_with_keras.ipynb) or [a custom training loop](multi_worker_with_ctl.ipynb). For other options, refer to the [Distributed training guide](../../guide/distributed_training.ipynb).\n", "\n", "To learn about various other strategies, there is the [Distributed training with TensorFlow](../../guide/distributed_training.ipynb) guide." ] @@ -280,7 +280,7 @@ "id": "4xsComp8Kz5H" }, "source": [ - "## Create the model" + "## Create the model and instantiate the optimizer" ] }, { @@ -289,7 +289,7 @@ "id": "1BnQYQTpB3YA" }, "source": [ - "Create and compile the Keras model in the context of `Strategy.scope`:" + "Within the context of `Strategy.scope`, create and compile the model using the Keras API:" ] }, { @@ -310,10 +310,21 @@ " ])\n", "\n", " model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", - " optimizer=tf.keras.optimizers.Adam(),\n", + " optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),\n", " metrics=['accuracy'])" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "DCDKFcNJzdcd" + }, + "source": [ + "For this toy example with the MNIST dataset, you will be using the Adam optimizer's default learning rate of 0.001.\n", + "\n", + "For larger datasets, the key benefit of distributed training is to learn more in each training step, because each step processes more training data in parallel, which allows for a larger learning rate (within the limits of the model and dataset)." + ] + }, { "cell_type": "markdown", "metadata": { @@ -329,13 +340,16 @@ "id": "YOXO5nvvK3US" }, "source": [ - "Define the following `tf.keras.callbacks`:\n", + "Define the following [Keras Callbacks](https://www.tensorflow.org/guide/keras/train_and_evaluate):\n", "\n", "- `tf.keras.callbacks.TensorBoard`: writes a log for TensorBoard, which allows you to visualize the graphs.\n", "- `tf.keras.callbacks.ModelCheckpoint`: saves the model at a certain frequency, such as after every epoch.\n", + "- `tf.keras.callbacks.BackupAndRestore`: provides the fault tolerance functionality by backing up the model and current epoch number. Learn more in the _Fault tolerance_ section of the [Multi-worker training with Keras](multi_worker_with_keras.ipynb) tutorial.\n", "- `tf.keras.callbacks.LearningRateScheduler`: schedules the learning rate to change after, for example, every epoch/batch.\n", "\n", - "For illustrative purposes, add a custom callback called `PrintLR` to display the *learning rate* in the notebook." + "For illustrative purposes, add a [custom callback](https://www.tensorflow.org/guide/keras/custom_callback) called `PrintLR` to display the *learning rate* in the notebook.\n", + "\n", + "**Note:** Use the `BackupAndRestore` callback instead of `ModelCheckpoint` as the main mechanism to restore the training state upon a restart from a job failure. Since `BackupAndRestore` only supports eager mode, in graph mode consider using `ModelCheckpoint`." ] }, { @@ -349,7 +363,7 @@ "# Define the checkpoint directory to store the checkpoints.\n", "checkpoint_dir = './training_checkpoints'\n", "# Define the name of the checkpoint files.\n", - "checkpoint_prefix = os.path.join(checkpoint_dir, \"ckpt_{epoch}\")" + "checkpoint_prefix = os.path.join(checkpoint_dir, \"ckpt_{epoch:04d}.weights.h5\")" ] }, { @@ -382,8 +396,7 @@ "# Define a callback for printing the learning rate at the end of each epoch.\n", "class PrintLR(tf.keras.callbacks.Callback):\n", " def on_epoch_end(self, epoch, logs=None):\n", - " print('\\nLearning rate for epoch {} is {}'.format(epoch + 1,\n", - " model.optimizer.lr.numpy()))" + " print('\\nLearning rate for epoch {} is {}'.format(epoch + 1, model.optimizer.learning_rate.numpy()))" ] }, { @@ -419,7 +432,7 @@ "id": "6EophnOAB3YD" }, "source": [ - "Now, train the model in the usual way by calling `Model.fit` on the model and passing in the dataset created at the beginning of the tutorial. This step is the same whether you are distributing the training or not." + "Now, train the model in the usual way by calling Keras `Model.fit` on the model and passing in the dataset created at the beginning of the tutorial. This step is the same whether you are distributing the training or not." ] }, { @@ -473,7 +486,10 @@ }, "outputs": [], "source": [ - "model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))\n", + "import pathlib\n", + "latest_checkpoint = sorted(pathlib.Path(checkpoint_dir).glob('*'))[-1]\n", + "\n", + "model.load_weights(latest_checkpoint)\n", "\n", "eval_loss, eval_acc = model.evaluate(eval_dataset)\n", "\n", @@ -526,7 +542,7 @@ "id": "kBLlogrDvMgg" }, "source": [ - "## Export to SavedModel" + "## Save the model" ] }, { @@ -535,7 +551,7 @@ "id": "Xa87y_A0vRma" }, "source": [ - "Export the graph and the variables to the platform-agnostic SavedModel format using `Model.save`. After your model is saved, you can load it with or without the `Strategy.scope`." + "Save the model to a `.keras` zip archive using `Model.save`. After your model is saved, you can load it with or without the `Strategy.scope`." ] }, { @@ -546,7 +562,7 @@ }, "outputs": [], "source": [ - "path = 'saved_model/'" + "path = 'my_model.keras'" ] }, { @@ -557,7 +573,7 @@ }, "outputs": [], "source": [ - "model.save(path, save_format='tf')" + "model.save(path)" ] }, { @@ -626,7 +642,7 @@ "\n", "More examples that use different distribution strategies with the Keras `Model.fit` API:\n", "\n", - "1. The [Solve GLUE tasks using BERT on TPU](https://www.tensorflow.org/text/tutorials/bert_glue) tutorial uses `tf.distribute.MirroredStrategy` for training on GPUs and `tf.distribute.TPUStrategy`—on TPUs.\n", + "1. The [Solve GLUE tasks using BERT on TPU](https://www.tensorflow.org/text/tutorials/bert_glue) tutorial uses `tf.distribute.MirroredStrategy` for training on GPUs and `tf.distribute.TPUStrategy` on TPUs.\n", "1. The [Save and load a model using a distribution strategy](save_and_load.ipynb) tutorial demonstates how to use the SavedModel APIs with `tf.distribute.Strategy`.\n", "1. The [official TensorFlow models](https://github.com/tensorflow/models/tree/master/official) can be configured to run multiple distribution strategies.\n", "\n", diff --git a/site/en/tutorials/distribute/multi_worker_with_ctl.ipynb b/site/en/tutorials/distribute/multi_worker_with_ctl.ipynb index ef3b4a73201..0361eea9328 100644 --- a/site/en/tutorials/distribute/multi_worker_with_ctl.ipynb +++ b/site/en/tutorials/distribute/multi_worker_with_ctl.ipynb @@ -63,11 +63,9 @@ "source": [ "## Overview\n", "\n", - "This tutorial demonstrates multi-worker training with custom training loop API, distributed via MultiWorkerMirroredStrategy, so a Keras model designed to run on [single-worker](https://www.tensorflow.org/tutorials/distribute/custom_training) can seamlessly work on multiple workers with minimal code change.\n", + "This tutorial demonstrates how to perform multi-worker distributed training with a Keras model and with [custom training loops](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) using the `tf.distribute.Strategy` API. The training loop is distributed via `tf.distribute.MultiWorkerMirroredStrategy`, such that a `tf.keras` model—designed to run on [single-worker](custom_training.ipynb)—can seamlessly work on multiple workers with minimal code changes. Custom training loops provide flexibility and a greater control on training, while also making it easier to debug the model. Learn more about [writing a basic training loop](../../guide/basic_training_loops.ipynb), [writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) and [custom training](../customization/custom_training_walkthrough.ipynb).\n", "\n", - "We are using custom training loops to train our model because they give us flexibility and a greater control on training. Moreover, it is easier to debug the model and the training loop. More detailed information is available in [Writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch).\n", - "\n", - "If you are looking for how to use `MultiWorkerMirroredStrategy` with keras `model.fit`, refer to this [tutorial](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras) instead.\n", + "If you are looking for how to use `MultiWorkerMirroredStrategy` with `tf.keras.Model.fit`, refer to this [tutorial](multi_worker_with_keras.ipynb) instead.\n", "\n", "[Distributed Training in TensorFlow](../../guide/distributed_training.ipynb) guide is available for an overview of the distribution strategies TensorFlow supports for those interested in a deeper understanding of `tf.distribute.Strategy` APIs." ] @@ -102,9 +100,8 @@ "id": "Zz0EY91y3mxy" }, "source": [ - "Before importing TensorFlow, make a few changes to the environment.\n", - "\n", - "Disable all GPUs. This prevents errors caused by the workers all trying to use the same GPU. For a real application each worker would be on a different machine." + "Before importing TensorFlow, make a few changes to the environment:\n", + "* Disable all GPUs. This prevents errors caused by all workers trying to use the same GPU. In a real-world application, each worker would be on a different machine." ] }, { @@ -124,7 +121,7 @@ "id": "7X1MS6385BWi" }, "source": [ - "Reset the `TF_CONFIG` environment variable, you'll see more about this later." + "* Reset the `'TF_CONFIG'` environment variable (you'll see more about this later)." ] }, { @@ -144,7 +141,7 @@ "id": "Rd4L9Ii77SS8" }, "source": [ - "Be sure that the current directory is on python's path. This allows the notebook to import the files written by `%%writefile` later.\n" + "* Make sure that the current directory is on Python's path. This allows the notebook to import the files written by `%%writefile` later.\n" ] }, { @@ -194,7 +191,7 @@ "id": "fLW6D2TzvC-4" }, "source": [ - "Next create an `mnist.py` file with a simple model and dataset setup. This python file will be used by the worker-processes in this tutorial:" + "Next, create an `mnist.py` file with a simple model and dataset setup. This Python file will be used by the worker-processes in this tutorial:" ] }, { @@ -230,13 +227,18 @@ " return dataset\n", "\n", "def build_cnn_model():\n", + " regularizer = tf.keras.regularizers.L2(1e-5)\n", " return tf.keras.Sequential([\n", " tf.keras.Input(shape=(28, 28)),\n", " tf.keras.layers.Reshape(target_shape=(28, 28, 1)),\n", - " tf.keras.layers.Conv2D(32, 3, activation='relu'),\n", + " tf.keras.layers.Conv2D(32, 3,\n", + " activation='relu',\n", + " kernel_regularizer=regularizer),\n", " tf.keras.layers.Flatten(),\n", - " tf.keras.layers.Dense(128, activation='relu'),\n", - " tf.keras.layers.Dense(10)\n", + " tf.keras.layers.Dense(128,\n", + " activation='relu',\n", + " kernel_regularizer=regularizer),\n", + " tf.keras.layers.Dense(10, kernel_regularizer=regularizer)\n", " ])" ] }, @@ -246,9 +248,9 @@ "id": "JmgZwwymxqt5" }, "source": [ - "## Multi-worker Configuration\n", + "## Multi-worker configuration\n", "\n", - "Now let's enter the world of multi-worker training. In TensorFlow, the `TF_CONFIG` environment variable is required for training on multiple machines, each of which possibly has a different role. `TF_CONFIG` used below, is a JSON string used to specify the cluster configuration on each worker that is part of the cluster. This is the default method for specifying a cluster, using `cluster_resolver.TFConfigClusterResolver`, but there are other options available in the `distribute.cluster_resolver` module." + "Now let's enter the world of multi-worker training. In TensorFlow, the `'TF_CONFIG'` environment variable is required for training on multiple machines. Each machine may have a different role. The `'TF_CONFIG'` variable used below is a JSON string that specifies the cluster configuration on each worker that is part of the cluster. This is the default method for specifying a cluster, using `cluster_resolver.TFConfigClusterResolver`, but there are other options available in the `distribute.cluster_resolver` module. Learn more about setting up the `'TF_CONFIG'` variable in the [Distributed training guide](../../guide/distributed_training.ipynb)." ] }, { @@ -283,7 +285,7 @@ "id": "JjgwJbPKZkJL" }, "source": [ - "Here is the same `TF_CONFIG` serialized as a JSON string:" + "Note that `tf_config` is just a local variable in Python. To use it for training configuration, serialize it as a JSON and place it in a `'TF_CONFIG'` environment variable. Here is the same `'TF_CONFIG'` serialized as a JSON string:" ] }, { @@ -303,11 +305,11 @@ "id": "AUBmYRZqxthH" }, "source": [ - "There are two components of `TF_CONFIG`: `cluster` and `task`.\n", + "There are two components of `'TF_CONFIG'`: `'cluster'` and `'task'`.\n", "\n", - "* `cluster` is the same for all workers and provides information about the training cluster, which is a dict consisting of different types of jobs such as `worker`. In multi-worker training with `MultiWorkerMirroredStrategy`, there is usually one `worker` that takes on a little more responsibility like saving checkpoint and writing summary file for TensorBoard in addition to what a regular `worker` does. Such a worker is referred to as the `chief` worker, and it is customary that the `worker` with `index` 0 is appointed as the chief `worker` (in fact this is how `tf.distribute.Strategy` is implemented).\n", + "* `'cluster'` is the same for all workers and provides information about the training cluster, which is a dict consisting of different types of jobs such as `'worker'`. In multi-worker training with `MultiWorkerMirroredStrategy`, there is usually one `'worker'` that takes on a little more responsibility like saving checkpoints and writing summary files for TensorBoard in addition to what a regular `'worker'` does. Such a worker is referred to as the `'chief'` worker, and it is customary that the `'worker'` with `'index'` 0 is appointed as the chief `worker`.\n", "\n", - "* `task` provides information of the current task and is different on each worker. It specifies the `type` and `index` of that worker." + "* `'task'` provides information of the current task and is different on each worker. It specifies the `'type'` and `'index'` of that worker." ] }, { @@ -316,7 +318,7 @@ "id": "8YFpxrcsZ2xG" }, "source": [ - "In this example, you set the task `type` to `\"worker\"` and the task `index` to `0`. This machine is the first worker and will be appointed as the chief worker and do more work than the others. Note that other machines will need to have the `TF_CONFIG` environment variable set as well, and it should have the same `cluster` dict, but different task `type` or task `index` depending on what the roles of those machines are.\n" + "In this example, you set the task `'type'` to `'worker'` and the task `'index'` to `0`. This machine is the first worker and will be appointed as the chief worker and do more work than the others. Note that other machines will need to have the `'TF_CONFIG'` environment variable set as well, and it should have the same `'cluster'` dict, but different task `'type'` or task `'index'` depending on what the roles of those machines are.\n" ] }, { @@ -325,18 +327,9 @@ "id": "aogb74kHxynz" }, "source": [ - "For illustration purposes, this tutorial shows how one may set a `TF_CONFIG` with 2 workers on `localhost`. In practice, users would create multiple workers on external IP addresses/ports, and set `TF_CONFIG` on each worker appropriately.\n", + "For illustration purposes, this tutorial shows how one may set a `'TF_CONFIG'` with two workers on `'localhost'`. In practice, users would create multiple workers on external IP addresses/ports, and set `'TF_CONFIG'` on each worker appropriately.\n", "\n", - "In this example you will use 2 workers, the first worker's `TF_CONFIG` is shown above. For the second worker you would set `tf_config['task']['index']=1`" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "f83FVYqDX3aX" - }, - "source": [ - "Above, `tf_config` is just a local variable in python. To actually use it to configure training, this dictionary needs to be serialized as JSON, and placed in the `TF_CONFIG` environment variable." + "This example uses two workers. The first worker's `'TF_CONFIG'` is shown above. For the second worker, set `tf_config['task']['index']=1`." ] }, { @@ -354,7 +347,7 @@ "id": "FcjAbuGY1ACJ" }, "source": [ - "Subprocesses inherit environment variables from their parent. So if you set an environment variable in this `jupyter notebook` process:" + "Subprocesses inherit environment variables from their parent. So if you set an environment variable in this Jupyter Notebook process:" ] }, { @@ -374,7 +367,7 @@ "id": "gQkIX-cg18md" }, "source": [ - "You can access the environment variable from a subprocesses:" + "you can then access the environment variable from a subprocess:" ] }, { @@ -395,7 +388,7 @@ "id": "af6BCA-Y2fpz" }, "source": [ - "In the next section, you'll use this to pass the `TF_CONFIG` to the worker subprocesses. You would never really launch your jobs this way, but it's sufficient for the purposes of this tutorial: To demonstrate a minimal multi-worker example." + "In the next section, you'll use this to pass the `'TF_CONFIG'` to the worker subprocesses. You would never really launch your jobs this way, but it's sufficient for the purposes of this tutorial: To demonstrate a minimal multi-worker example." ] }, { @@ -406,7 +399,7 @@ "source": [ "## MultiWorkerMirroredStrategy\n", "\n", - "To train the model, use an instance of `tf.distribute.MultiWorkerMirroredStrategy`, which creates copies of all variables in the model's layers on each device across all workers. The [`tf.distribute.Strategy` guide](../../guide/distributed_training.ipynb) has more details about this strategy." + "Before training the model, first create an instance of `tf.distribute.MultiWorkerMirroredStrategy`:" ] }, { @@ -426,7 +419,7 @@ "id": "N0iv7SyyAohc" }, "source": [ - "Note: `TF_CONFIG` is parsed and TensorFlow's GRPC servers are started at the time `MultiWorkerMirroredStrategy()` is called, so the `TF_CONFIG` environment variable must be set before a `tf.distribute.Strategy` instance is created." + "Note: `'TF_CONFIG'` is parsed and TensorFlow's GRPC servers are started at the time you call `tf.distribute.MultiWorkerMirroredStrategy.` Therefore, you must set the `'TF_CONFIG'` environment variable before you instantiate a `tf.distribute.Strategy`. To save time in this illustrative example, this is not demonstrated in this tutorial, so that servers do not need to start. You can find a full example in the last section of this tutorial." ] }, { @@ -435,7 +428,7 @@ "id": "TS4S-faBHHam" }, "source": [ - "Use `tf.distribute.Strategy.scope` to specify that a strategy should be used when building your model. This puts you in the \"[cross-replica context](https://www.tensorflow.org/guide/distributed_training?hl=en#mirroredstrategy)\" for this strategy, which means the strategy is put in control of things like variable placement." + "Use `tf.distribute.Strategy.scope` to specify that a strategy should be used when building your model. This allows the strategy to control things like variable placement—it will create copies of all variables in the model's layers on each device across all workers." ] }, { @@ -459,9 +452,8 @@ }, "source": [ "## Auto-shard your data across workers\n", - "In multi-worker training, dataset sharding is not necessarily needed, however it gives you exactly once semantic which makes more training more reproducible, i.e. training on multiple workers should be the same as training on one worker. Note: performance can be affected in some cases.\n", "\n", - "See: [`distribute_datasets_from_function`](https://www.tensorflow.org/api_docs/python/tf/distribute/Strategy?version=nightly#distribute_datasets_from_function)" + "In multi-worker training, _dataset sharding_ is needed to ensure convergence and reproducibility. Sharding means handing each worker a subset of the entire dataset—it helps create the experience similar to training on a single worker. In the example below, you're relying on the default autosharding policy of `tf.distribute`. You can also customize it by setting the `tf.data.experimental.AutoShardPolicy` of the `tf.data.experimental.DistributeOptions`. To learn more, refer to the _Sharding_ section of the [Distributed input tutorial](input.ipynb)." ] }, { @@ -487,8 +479,8 @@ "id": "rkNzSR3g60iP" }, "source": [ - "## Define Custom Training Loop and Train the model\n", - "Specify an optimizer" + "## Define a custom training loop and train the model\n", + "Specify an optimizer:" ] }, { @@ -500,7 +492,7 @@ "outputs": [], "source": [ "with strategy.scope():\n", - " # The creation of optimizer and train_accuracy will need to be in\n", + " # The creation of optimizer and train_accuracy needs to be in\n", " # `strategy.scope()` as well, since they create variables.\n", " optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)\n", " train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(\n", @@ -513,7 +505,7 @@ "id": "RmrDcAii4B5O" }, "source": [ - "Define a training step with `tf.function`\n" + "Define a training step with `tf.function`:\n" ] }, { @@ -533,11 +525,13 @@ " x, y = inputs\n", " with tf.GradientTape() as tape:\n", " predictions = multi_worker_model(x, training=True)\n", - " per_batch_loss = tf.keras.losses.SparseCategoricalCrossentropy(\n", + " per_example_loss = tf.keras.losses.SparseCategoricalCrossentropy(\n", " from_logits=True,\n", " reduction=tf.keras.losses.Reduction.NONE)(y, predictions)\n", - " loss = tf.nn.compute_average_loss(\n", - " per_batch_loss, global_batch_size=global_batch_size)\n", + " loss = tf.nn.compute_average_loss(per_example_loss)\n", + " model_losses = multi_worker_model.losses\n", + " if model_losses:\n", + " loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n", "\n", " grads = tape.gradient(loss, multi_worker_model.trainable_variables)\n", " optimizer.apply_gradients(\n", @@ -558,7 +552,7 @@ "source": [ "### Checkpoint saving and restoring\n", "\n", - "Checkpointing implementation in a Custom Training Loop requires the user to handle it instead of using a keras callback. It allows you to save model's weights and restore them without having to save the whole model." + "As you write a custom training loop, you need to handle [checkpoint saving](../../guide/checkpoint.ipynb) manually instead of relying on a Keras callback. Note that for `MultiWorkerMirroredStrategy`, saving a checkpoint or a complete model requires the participation of all workers, because attempting to save only on the chief worker could lead to a deadlock. Workers also need to write to different paths to avoid overwriting each other. Here's an example of how to configure the directories:" ] }, { @@ -572,40 +566,34 @@ "from multiprocessing import util\n", "checkpoint_dir = os.path.join(util.get_temp_dir(), 'ckpt')\n", "\n", - "def _is_chief(task_type, task_id):\n", - " return task_type is None or task_type == 'chief' or (task_type == 'worker' and\n", - " task_id == 0)\n", + "def _is_chief(task_type, task_id, cluster_spec):\n", + " return (task_type is None\n", + " or task_type == 'chief'\n", + " or (task_type == 'worker'\n", + " and task_id == 0\n", + " and \"chief\" not in cluster_spec.as_dict()))\n", + "\n", "def _get_temp_dir(dirpath, task_id):\n", " base_dirpath = 'workertemp_' + str(task_id)\n", " temp_dir = os.path.join(dirpath, base_dirpath)\n", " tf.io.gfile.makedirs(temp_dir)\n", " return temp_dir\n", "\n", - "def write_filepath(filepath, task_type, task_id):\n", + "def write_filepath(filepath, task_type, task_id, cluster_spec):\n", " dirpath = os.path.dirname(filepath)\n", " base = os.path.basename(filepath)\n", - " if not _is_chief(task_type, task_id):\n", + " if not _is_chief(task_type, task_id, cluster_spec):\n", " dirpath = _get_temp_dir(dirpath, task_id)\n", " return os.path.join(dirpath, base)" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "P7fabUIEW7-M" - }, - "source": [ - "Note: Checkpointing and Saving need to happen on each worker and they need to write to different paths as they would override each others.\n", - "If you chose to only checkpoint/save on the chief, this can lead to deadlock and is not recommended." - ] - }, { "cell_type": "markdown", "metadata": { "id": "nrcdPHtG4ObO" }, "source": [ - " Here, you'll create one `tf.train.Checkpoint` that tracks the model, which is managed by a `tf.train.CheckpointManager` so that only the latest checkpoint is preserved." + "Create one `tf.train.Checkpoint` that tracks the model, which is managed by a `tf.train.CheckpointManager`, so that only the latest checkpoints are preserved:" ] }, { @@ -623,11 +611,16 @@ " name='step_in_epoch')\n", "task_type, task_id = (strategy.cluster_resolver.task_type,\n", " strategy.cluster_resolver.task_id)\n", + "# Normally, you don't need to manually instantiate a `ClusterSpec`, but in this\n", + "# illustrative example you did not set `'TF_CONFIG'` before initializing the\n", + "# strategy. Check out the next section for \"real-world\" usage.\n", + "cluster_spec = tf.train.ClusterSpec(tf_config['cluster'])\n", "\n", "checkpoint = tf.train.Checkpoint(\n", " model=multi_worker_model, epoch=epoch, step_in_epoch=step_in_epoch)\n", "\n", - "write_checkpoint_dir = write_filepath(checkpoint_dir, task_type, task_id)\n", + "write_checkpoint_dir = write_filepath(checkpoint_dir, task_type, task_id,\n", + " cluster_spec)\n", "checkpoint_manager = tf.train.CheckpointManager(\n", " checkpoint, directory=write_checkpoint_dir, max_to_keep=1)" ] @@ -638,7 +631,7 @@ "id": "RO7cbN40XD5v" }, "source": [ - "Now, when you need to restore, you can find the latest checkpoint saved using the convenient `tf.train.latest_checkpoint` function." + "Now, when you need to restore a checkpoint, you can find the latest checkpoint saved using the convenient `tf.train.latest_checkpoint` function (or by calling `tf.train.CheckpointManager.restore_or_initialize`)." ] }, { @@ -693,7 +686,7 @@ " # Once the `CheckpointManager` is set up, you're now ready to save, and remove\n", " # the checkpoints non-chief workers saved.\n", " checkpoint_manager.save()\n", - " if not _is_chief(task_type, task_id):\n", + " if not _is_chief(task_type, task_id, cluster_spec):\n", " tf.io.gfile.rmtree(write_checkpoint_dir)\n", "\n", " epoch.assign_add(1)\n", @@ -706,7 +699,7 @@ "id": "0W1Osks466DE" }, "source": [ - "## Full code setup on workers" + "## Complete code at a glance" ] }, { @@ -715,10 +708,11 @@ "id": "jfYpmIxO6Jck" }, "source": [ - "To actually run with `MultiWorkerMirroredStrategy` you'll need to run worker processes and pass a `TF_CONFIG` to them.\n", + "To sum up all the procedures discussed so far:\n", "\n", - "Like the `mnist.py` file written earlier, here is the `main.py` that \n", - "contain the same code we walked through step by step previously in this colab, we're just writing it to a file so each of the workers will run it:" + "1. You create worker processes.\n", + "2. Pass `'TF_CONFIG'`s to the worker processes.\n", + "3. Let each work process run the script below that contains the training code." ] }, { @@ -746,19 +740,23 @@ "num_steps_per_epoch=70\n", "\n", "# Checkpoint saving and restoring\n", - "def _is_chief(task_type, task_id):\n", - " return task_type is None or task_type == 'chief' or (task_type == 'worker' and\n", - " task_id == 0)\n", + "def _is_chief(task_type, task_id, cluster_spec):\n", + " return (task_type is None\n", + " or task_type == 'chief'\n", + " or (task_type == 'worker'\n", + " and task_id == 0\n", + " and 'chief' not in cluster_spec.as_dict()))\n", + "\n", "def _get_temp_dir(dirpath, task_id):\n", " base_dirpath = 'workertemp_' + str(task_id)\n", " temp_dir = os.path.join(dirpath, base_dirpath)\n", " tf.io.gfile.makedirs(temp_dir)\n", " return temp_dir\n", "\n", - "def write_filepath(filepath, task_type, task_id):\n", + "def write_filepath(filepath, task_type, task_id, cluster_spec):\n", " dirpath = os.path.dirname(filepath)\n", " base = os.path.basename(filepath)\n", - " if not _is_chief(task_type, task_id):\n", + " if not _is_chief(task_type, task_id, cluster_spec):\n", " dirpath = _get_temp_dir(dirpath, task_id)\n", " return os.path.join(dirpath, base)\n", "\n", @@ -768,11 +766,11 @@ "strategy = tf.distribute.MultiWorkerMirroredStrategy()\n", "\n", "with strategy.scope():\n", - " # Model building/compiling need to be within `strategy.scope()`.\n", + " # Model building/compiling need to be within `tf.distribute.Strategy.scope`.\n", " multi_worker_model = mnist.build_cnn_model()\n", "\n", " multi_worker_dataset = strategy.distribute_datasets_from_function(\n", - " lambda input_context: mnist.dataset_fn(global_batch_size, input_context)) \n", + " lambda input_context: mnist.dataset_fn(global_batch_size, input_context))\n", " optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)\n", " train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(\n", " name='train_accuracy')\n", @@ -786,11 +784,13 @@ " x, y = inputs\n", " with tf.GradientTape() as tape:\n", " predictions = multi_worker_model(x, training=True)\n", - " per_batch_loss = tf.keras.losses.SparseCategoricalCrossentropy(\n", + " per_example_loss = tf.keras.losses.SparseCategoricalCrossentropy(\n", " from_logits=True,\n", " reduction=tf.keras.losses.Reduction.NONE)(y, predictions)\n", - " loss = tf.nn.compute_average_loss(\n", - " per_batch_loss, global_batch_size=global_batch_size)\n", + " loss = tf.nn.compute_average_loss(per_example_loss)\n", + " model_losses = multi_worker_model.losses\n", + " if model_losses:\n", + " loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n", "\n", " grads = tape.gradient(loss, multi_worker_model.trainable_variables)\n", " optimizer.apply_gradients(\n", @@ -809,13 +809,15 @@ " initial_value=tf.constant(0, dtype=tf.dtypes.int64),\n", " name='step_in_epoch')\n", "\n", - "task_type, task_id = (strategy.cluster_resolver.task_type,\n", - " strategy.cluster_resolver.task_id)\n", + "task_type, task_id, cluster_spec = (strategy.cluster_resolver.task_type,\n", + " strategy.cluster_resolver.task_id,\n", + " strategy.cluster_resolver.cluster_spec())\n", "\n", "checkpoint = tf.train.Checkpoint(\n", " model=multi_worker_model, epoch=epoch, step_in_epoch=step_in_epoch)\n", "\n", - "write_checkpoint_dir = write_filepath(checkpoint_dir, task_type, task_id)\n", + "write_checkpoint_dir = write_filepath(checkpoint_dir, task_type, task_id,\n", + " cluster_spec)\n", "checkpoint_manager = tf.train.CheckpointManager(\n", " checkpoint, directory=write_checkpoint_dir, max_to_keep=1)\n", "\n", @@ -838,11 +840,11 @@ " train_loss = total_loss / num_batches\n", " print('Epoch: %d, accuracy: %f, train_loss: %f.'\n", " %(epoch.numpy(), train_accuracy.result(), train_loss))\n", - " \n", + "\n", " train_accuracy.reset_states()\n", "\n", " checkpoint_manager.save()\n", - " if not _is_chief(task_type, task_id):\n", + " if not _is_chief(task_type, task_id, cluster_spec):\n", " tf.io.gfile.rmtree(write_checkpoint_dir)\n", "\n", " epoch.assign_add(1)\n", @@ -855,7 +857,6 @@ "id": "ItVOvPN1qnZ6" }, "source": [ - "## Train and Evaluate\n", "The current directory now contains both Python files:" ] }, @@ -877,7 +878,7 @@ "id": "qmEEStPS6vR_" }, "source": [ - "So json-serialize the `TF_CONFIG` and add it to the environment variables:" + "So JSON-serialize the `'TF_CONFIG'` and add it to the environment variables:" ] }, { @@ -897,7 +898,7 @@ "id": "MsY3dQLK7jdf" }, "source": [ - "Now, you can launch a worker process that will run the `main.py` and use the `TF_CONFIG`:" + "Now, you can launch a worker process that will run the `main.py` and use the `'TF_CONFIG'`:" ] }, { @@ -935,9 +936,9 @@ "1. It uses the `%%bash` which is a [notebook \"magic\"](https://ipython.readthedocs.io/en/stable/interactive/magics.html) to run some bash commands.\n", "2. It uses the `--bg` flag to run the `bash` process in the background, because this worker will not terminate. It waits for all the workers before it starts.\n", "\n", - "The backgrounded worker process won't print output to this notebook, so the `&>` redirects its output to a file, so you can see what happened.\n", + "The backgrounded worker process won't print the output to this notebook. The `&>` redirects its output to a file, so that you can inspect what happened.\n", "\n", - "So, wait a few seconds for the process to start up:" + "Wait a few seconds for the process to start up:" ] }, { @@ -958,7 +959,7 @@ "id": "ZFPoNxg_9_Mx" }, "source": [ - "Now look what's been output to the worker's logfile so far:" + "Now, check the output to the worker's log file so far:" ] }, { @@ -988,7 +989,7 @@ "id": "Pi8vPNNA_l4a" }, "source": [ - "So update the `tf_config` for the second worker's process to pick up:" + "Update the `tf_config` for the second worker's process to pick up:" ] }, { @@ -1030,7 +1031,7 @@ "id": "hX4FA2O2AuAn" }, "source": [ - "Now if you recheck the logs written by the first worker you'll see that it participated in training that model:" + "If you recheck the logs written by the first worker, notice that it participated in training that model:" ] }, { @@ -1053,7 +1054,7 @@ }, "outputs": [], "source": [ - "# Delete the `TF_CONFIG`, and kill any background tasks so they don't affect the next section.\n", + "# Delete the `'TF_CONFIG'`, and kill any background tasks so they don't affect the next section.\n", "os.environ.pop('TF_CONFIG', None)\n", "%killbgscripts" ] @@ -1064,9 +1065,9 @@ "id": "bhxMXa0AaZkK" }, "source": [ - "## Multi worker training in depth\n", + "## Multi-worker training in depth\n", "\n", - "This tutorial has demonstrated a `Custom Training Loop` workflow of the multi-worker setup. A detailed description of other topics is available in the [`model.fit's guide`](https://colab.sandbox.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/distribute/multi_worker_with_keras.ipynb) of the multi-worker setup and applicable to CTLs." + "This tutorial has demonstrated a custom training loop workflow of the multi-worker setup. Detailed descriptions of other topics is available in the [Multi-worker training with Keras (`tf.keras.Model.fit`)](multi_worker_with_keras.ipynb) tutorial applicable to custom training loops." ] }, { @@ -1075,10 +1076,11 @@ "id": "ega2hdOQEmy_" }, "source": [ - "## See also\n", - "1. [Distributed Training in TensorFlow](https://www.tensorflow.org/guide/distributed_training) guide provides an overview of the available distribution strategies.\n", + "## Learn more\n", + "\n", + "1. The [Distributed training in TensorFlow](../../guide/distributed_training.ipynb) guide provides an overview of the available distribution strategies.\n", "2. [Official models](https://github.com/tensorflow/models/tree/master/official), many of which can be configured to run multiple distribution strategies.\n", - "3. The [Performance section](../../guide/function.ipynb) in the guide provides information about other strategies and [tools](../../guide/profiler.md) you can use to optimize the performance of your TensorFlow models.\n" + "3. The [Performance section](../../guide/function.ipynb) in the `tf.function` guide provides information about other strategies and [tools](../../guide/profiler.md) you can use to optimize the performance of your TensorFlow models.\n" ] } ], @@ -1086,7 +1088,7 @@ "colab": { "collapsed_sections": [], "name": "multi_worker_with_ctl.ipynb", - "toc_visible": true + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", diff --git a/site/en/tutorials/distribute/multi_worker_with_estimator.ipynb b/site/en/tutorials/distribute/multi_worker_with_estimator.ipynb index b4fffa60fb4..fcee0618854 100644 --- a/site/en/tutorials/distribute/multi_worker_with_estimator.ipynb +++ b/site/en/tutorials/distribute/multi_worker_with_estimator.ipynb @@ -186,7 +186,7 @@ "\n", "There are two components of `TF_CONFIG`: `cluster` and `task`. `cluster` provides information about the entire cluster, namely the workers and parameter servers in the cluster. `task` provides information about the current task. The first component `cluster` is the same for all workers and parameter servers in the cluster, and the second component `task` is different on each worker and parameter server and specifies its own `type` and `index`. In this example, the task `type` is `worker` and the task `index` is `0`.\n", "\n", - "For illustration purposes, this tutorial shows how to set a `TF_CONFIG` with 2 workers on `localhost`. In practice, you would create multiple workers on an external IP address and port, and set `TF_CONFIG` on each worker appropriately, i.e. modify the task `index`.\n", + "For illustration purposes, this tutorial shows how to set a `TF_CONFIG` with 2 workers on `localhost`. In practice, you would create multiple workers on an external IP address and port, and set `TF_CONFIG` on each worker appropriately, i.e., modify the task `index`.\n", "\n", "Warning: *Do not execute the following code in Colab.* TensorFlow's runtime will attempt to create a gRPC server at the specified IP address and port, which will likely fail. See the [keras version](multi_worker_with_keras.ipynb) of this tutorial for an example of how you can test run multiple workers on a single machine.\n", "\n", @@ -351,8 +351,7 @@ "Tce3stUlHN0L" ], "name": "multi_worker_with_estimator.ipynb", - "provenance": [], - "toc_visible": true + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", diff --git a/site/en/tutorials/distribute/multi_worker_with_keras.ipynb b/site/en/tutorials/distribute/multi_worker_with_keras.ipynb index 1f00bb99e5b..c972e8b7fb6 100644 --- a/site/en/tutorials/distribute/multi_worker_with_keras.ipynb +++ b/site/en/tutorials/distribute/multi_worker_with_keras.ipynb @@ -63,13 +63,36 @@ "source": [ "## Overview\n", "\n", - "This tutorial demonstrates how to perform multi-worker distributed training with a Keras model and the `Model.fit` API using the `tf.distribute.Strategy` API—specifically the `tf.distribute.MultiWorkerMirroredStrategy` class. With the help of this strategy, a Keras model that was designed to run on a single-worker can seamlessly work on multiple workers with minimal code changes.\n", - "\n", - "For those interested in a deeper understanding of `tf.distribute.Strategy` APIs, the [Distributed training in TensorFlow](../../guide/distributed_training.ipynb) guide is available for an overview of the distribution strategies TensorFlow supports.\n", + "This tutorial demonstrates how to perform multi-worker distributed training with a Keras model and the `Model.fit` API using the `tf.distribute.MultiWorkerMirroredStrategy` API. With the help of this strategy, a Keras model that was designed to run on a single-worker can seamlessly work on multiple workers with minimal code changes.\n", "\n", "To learn how to use the `MultiWorkerMirroredStrategy` with Keras and a custom training loop, refer to [Custom training loop with Keras and MultiWorkerMirroredStrategy](multi_worker_with_ctl.ipynb).\n", "\n", - "Note that the purpose of this tutorial is to demonstrate a minimal multi-worker example with two workers." + "This tutorial contains a minimal multi-worker example with two workers for demonstration purposes." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JUdRerXg6yz3" + }, + "source": [ + "### Choose the right strategy" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YAiCV_oL63GM" + }, + "source": [ + "Before you dive in, make sure that `tf.distribute.MultiWorkerMirroredStrategy` is the right choice for your accelerator(s) and training. These are two common ways of distributing training with data parallelism:\n", + "\n", + "* _Synchronous training_, where the steps of training are synced across the workers and replicas, such as `tf.distribute.MirroredStrategy`, `tf.distribute.TPUStrategy`, and `tf.distribute.MultiWorkerMirroredStrategy`. All workers train over different slices of input data in sync, and aggregating gradients at each step.\n", + "* _Asynchronous training_, where the training steps are not strictly synced, such as `tf.distribute.experimental.ParameterServerStrategy`. All workers are independently training over the input data and updating variables asynchronously.\n", + "\n", + "If you are looking for multi-worker synchronous training without TPU, then `tf.distribute.MultiWorkerMirroredStrategy` is your choice. It creates copies of all variables in the model's layers on each device across all workers. It uses `CollectiveOps`, a TensorFlow op for collective communication, to aggregate gradients and keeps the variables in sync. For those interested, check out the `tf.distribute.experimental.CommunicationOptions` parameter for the collective implementation options.\n", + "\n", + "For an overview of `tf.distribute.Strategy` APIs, refer to [Distributed training in TensorFlow](../../guide/distributed_training.ipynb)." ] }, { @@ -104,14 +127,14 @@ "source": [ "Before importing TensorFlow, make a few changes to the environment:\n", "\n", - "1. Disable all GPUs. This prevents errors caused by the workers all trying to use the same GPU. In a real-world application, each worker would be on a different machine." + "* In a real-world application, each worker would be on a different machine. For the purposes of this tutorial, all the workers will run on the **this** machine. Therefore, disable all GPUs to prevent errors caused by all workers trying to use the same GPU." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "685pbYEY3jGC" + "id": "rpEIVI5upIzM" }, "outputs": [], "source": [ @@ -124,7 +147,7 @@ "id": "7X1MS6385BWi" }, "source": [ - "2. Reset the `TF_CONFIG` environment variable (you'll learn more about this later):" + "* Reset the `TF_CONFIG` environment variable (you'll learn more about this later):" ] }, { @@ -144,7 +167,7 @@ "id": "Rd4L9Ii77SS8" }, "source": [ - "3. Make sure that the current directory is on Python's path—this allows the notebook to import the files written by `%%writefile` later:\n" + "* Make sure that the current directory is on Python's path—this allows the notebook to import the files written by `%%writefile` later:\n" ] }, { @@ -162,10 +185,30 @@ { "cell_type": "markdown", "metadata": { - "id": "pDhHuMjb7bfU" + "id": "9hLpDZhAz2q-" }, "source": [ - "Now import TensorFlow:" + "Install `tf-nightly`, as the frequency of checkpoint saving at a particular step with the `save_freq` argument in `tf.keras.callbacks.BackupAndRestore` is introduced from TensorFlow 2.10:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-XqozLfzz30N" + }, + "outputs": [], + "source": [ + "!pip install tf-nightly" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "524e38dab658" + }, + "source": [ + "Finally, import TensorFlow:" ] }, { @@ -194,7 +237,7 @@ "id": "fLW6D2TzvC-4" }, "source": [ - "Next, create an `mnist.py` file with a simple model and dataset setup. This Python file will be used by the worker-processes in this tutorial:" + "Next, create an `mnist_setup.py` file with a simple model and dataset setup. This Python file will be used by the worker processes in this tutorial:" ] }, { @@ -205,7 +248,7 @@ }, "outputs": [], "source": [ - "%%writefile mnist.py\n", + "%%writefile mnist_setup.py\n", "\n", "import os\n", "import tensorflow as tf\n", @@ -256,11 +299,11 @@ }, "outputs": [], "source": [ - "import mnist\n", + "import mnist_setup\n", "\n", "batch_size = 64\n", - "single_worker_dataset = mnist.mnist_dataset(batch_size)\n", - "single_worker_model = mnist.build_and_compile_cnn_model()\n", + "single_worker_dataset = mnist_setup.mnist_dataset(batch_size)\n", + "single_worker_model = mnist_setup.build_and_compile_cnn_model()\n", "single_worker_model.fit(single_worker_dataset, epochs=3, steps_per_epoch=70)" ] }, @@ -276,7 +319,7 @@ "\n", "### A cluster with jobs and tasks\n", "\n", - "In TensorFlow, distributed training involves: a `'cluster'`\n", + "In TensorFlow, distributed training involves a `'cluster'`\n", "with several jobs, and each of the jobs may have one or more `'task'`s.\n", "\n", "You will need the `TF_CONFIG` configuration environment variable for training on multiple machines, each of which possibly has a different role. `TF_CONFIG` is a JSON string used to specify the cluster configuration for each worker that is part of the cluster.\n", @@ -284,10 +327,10 @@ "There are two components of a `TF_CONFIG` variable: `'cluster'` and `'task'`.\n", "\n", "* A `'cluster'` is the same for all workers and provides information about the training cluster, which is a dict consisting of different types of jobs, such as `'worker'` or `'chief'`.\n", - " - In multi-worker training with `tf.distribute.MultiWorkerMirroredStrategy`, there is usually one `'worker'` that takes on responsibilities, such as saving a checkpoint and writing a summary file for TensorBoard, in addition to what a regular `'worker'` does. Such `'worker'` is referred to as the chief worker (with a job name `'chief'`).\n", - " - It is customary for the `'chief'` to have `'index'` `0` be appointed to (in fact, this is how `tf.distribute.Strategy` is implemented).\n", + " - In multi-worker training with `tf.distribute.MultiWorkerMirroredStrategy`, there is usually one `'worker'` that takes on more responsibilities, such as saving a checkpoint and writing a summary file for TensorBoard, in addition to what a regular `'worker'` does. Such `'worker'` is referred to as the chief worker (with a job name `'chief'`).\n", + " - It is customary for the worker with `'index'` `0` to be the `'chief'`.\n", "\n", - "* A `'task'` provides information of the current task and is different for each worker. It specifies the `'type'` and `'index'` of that worker.\n", + "* A `'task'` provides information on the current task and is different for each worker. It specifies the `'type'` and `'index'` of that worker.\n", "\n", "Below is an example configuration:" ] @@ -314,7 +357,7 @@ "id": "JjgwJbPKZkJL" }, "source": [ - "Here is the same `TF_CONFIG` serialized as a JSON string:" + "Note that `tf_config` is just a local variable in Python. To use it for training configuration, serialize it as a JSON and place it in a `TF_CONFIG` environment variable." ] }, { @@ -328,22 +371,13 @@ "json.dumps(tf_config)" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "f83FVYqDX3aX" - }, - "source": [ - "Note that`tf_config` is just a local variable in Python. To be able to use it for a training configuration, this dict needs to be serialized as a JSON and placed in a `TF_CONFIG` environment variable." - ] - }, { "cell_type": "markdown", "metadata": { "id": "8YFpxrcsZ2xG" }, "source": [ - "In the example configuration above, you set the task `'type'` to `'worker'` and the task `'index'` to `0`. Therefore, this machine is the _first_ worker. It will be appointed as the `'chief'` worker and do more work than the others.\n", + "In the example configuration above, you set the task `'type'` to `'worker'` and the task `'index'` to `0`. Therefore, this machine is the _first_ worker. It will be appointed as the `'chief'` worker.\n", "\n", "Note: Other machines will need to have the `TF_CONFIG` environment variable set as well, and it should have the same `'cluster'` dict, but different task `'type'`s or task `'index'`es, depending on the roles of those machines." ] @@ -354,12 +388,8 @@ "id": "aogb74kHxynz" }, "source": [ - "For illustration purposes, this tutorial shows how you may set up a `TF_CONFIG` variable with two workers on a `localhost`.\n", - "\n", - "In practice, you would create multiple workers on external IP addresses/ports and set a `TF_CONFIG` variable on each worker accordingly.\n", - "\n", - "In this tutorial, you will use two workers:\n", - "- The first (`'chief'`) worker's `TF_CONFIG` is shown above.\n", + "In practice, you would create multiple workers on external IP addresses/ports and set a `TF_CONFIG` variable on each worker accordingly. For illustration purposes, this tutorial shows how you may set up a `TF_CONFIG` variable with two workers on a `localhost`:\n", + "- The first (`'chief'`) worker's `TF_CONFIG` as shown above.\n", "- For the second worker, you will set `tf_config['task']['index']=1`" ] }, @@ -378,9 +408,7 @@ "id": "FcjAbuGY1ACJ" }, "source": [ - "Subprocesses inherit environment variables from their parent.\n", - "\n", - "For example, you can set an environment variable in this Jupyter Notebook process as follows:" + "Subprocesses inherit environment variables from their parent. So if you set an environment variable in this Jupyter Notebook process:" ] }, { @@ -400,7 +428,7 @@ "id": "gQkIX-cg18md" }, "source": [ - "Then, you can access the environment variable from a subprocesses:" + "... then you can access the environment variable from the subprocesses:" ] }, { @@ -421,7 +449,16 @@ "id": "af6BCA-Y2fpz" }, "source": [ - "In the next section, you'll use a similar method to pass the `TF_CONFIG` to the worker subprocesses. In a real-world scenario, you wouldn't launch your jobs this way, but it's sufficient in this example." + "In the next section, you'll use this method to pass the `TF_CONFIG` to the worker subprocesses. You would never really launch your jobs this way in a real-world scenario—this tutorial is just showing how to do it with a minimal multi-worker example." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dnDJmaRA9qnf" + }, + "source": [ + "## Train the model" ] }, { @@ -430,16 +467,7 @@ "id": "UhNtHfuxCGVy" }, "source": [ - "## Choose the right strategy\n", - "\n", - "In TensorFlow, there are two main forms of distributed training:\n", - "\n", - "* _Synchronous training_, where the steps of training are synced across the workers and replicas, and\n", - "* _Asynchronous training_, where the training steps are not strictly synced (for example, [parameter server training](parameter_server_training.ipynb)).\n", - "\n", - "This tutorial demonstrates how to perform synchronous multi-worker training using an instance of `tf.distribute.MultiWorkerMirroredStrategy`.\n", - "\n", - "`MultiWorkerMirroredStrategy` creates copies of all variables in the model's layers on each device across all workers. It uses `CollectiveOps`, a TensorFlow op for collective communication, to aggregate gradients and keep the variables in sync. The [`tf.distribute.Strategy` guide](../../guide/distributed_training.ipynb) has more details about this strategy." + "To train the model, firstly create an instance of the `tf.distribute.MultiWorkerMirroredStrategy`:" ] }, { @@ -459,16 +487,7 @@ "id": "N0iv7SyyAohc" }, "source": [ - "Note: `TF_CONFIG` is parsed and TensorFlow's GRPC servers are started at the time `MultiWorkerMirroredStrategy()` is called, so the `TF_CONFIG` environment variable must be set before a `tf.distribute.Strategy` instance is created. Since `TF_CONFIG` is not set yet, the above strategy is effectively single-worker training." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "FMy2VM4Akzpr" - }, - "source": [ - "`MultiWorkerMirroredStrategy` provides multiple implementations via the [`CommunicationOptions`](https://www.tensorflow.org/api_docs/python/tf/distribute/experimental/CommunicationOptions) parameter: 1) `RING` implements ring-based collectives using gRPC as the cross-host communication layer; 2) `NCCL` uses the [NVIDIA Collective Communication Library](https://developer.nvidia.com/nccl) to implement collectives; and 3) `AUTO` defers the choice to the runtime. The best choice of collective implementation depends upon the number and kind of GPUs, and the network interconnect in the cluster." + "Note: `TF_CONFIG` is parsed and TensorFlow's GRPC servers are started at the time `MultiWorkerMirroredStrategy` is called, so the `TF_CONFIG` environment variable must be set before a `tf.distribute.Strategy` instance is created. Since `TF_CONFIG` is not set yet, the above strategy is effectively single-worker training." ] }, { @@ -477,8 +496,6 @@ "id": "H47DDcOgfzm7" }, "source": [ - "## Train the model\n", - "\n", "With the integration of `tf.distribute.Strategy` API into `tf.keras`, the only change you will make to distribute the training to multiple-workers is enclosing the model building and `model.compile()` call inside `strategy.scope()`. The distribution strategy's scope dictates how and where the variables are created, and in the case of `MultiWorkerMirroredStrategy`, the variables created are `MirroredVariable`s, and they are replicated on each of the workers.\n" ] }, @@ -492,7 +509,7 @@ "source": [ "with strategy.scope():\n", " # Model building/compiling need to be within `strategy.scope()`.\n", - " multi_worker_model = mnist.build_and_compile_cnn_model()" + " multi_worker_model = mnist_setup.build_and_compile_cnn_model()" ] }, { @@ -512,7 +529,7 @@ "source": [ "To actually run with `MultiWorkerMirroredStrategy` you'll need to run worker processes and pass a `TF_CONFIG` to them.\n", "\n", - "Like the `mnist.py` file written earlier, here is the `main.py` that each of the workers will run:" + "Like the `mnist_setup.py` file written earlier, here is the `main.py` that each of the workers will run:" ] }, { @@ -529,7 +546,7 @@ "import json\n", "\n", "import tensorflow as tf\n", - "import mnist\n", + "import mnist_setup\n", "\n", "per_worker_batch_size = 64\n", "tf_config = json.loads(os.environ['TF_CONFIG'])\n", @@ -538,11 +555,11 @@ "strategy = tf.distribute.MultiWorkerMirroredStrategy()\n", "\n", "global_batch_size = per_worker_batch_size * num_workers\n", - "multi_worker_dataset = mnist.mnist_dataset(global_batch_size)\n", + "multi_worker_dataset = mnist_setup.mnist_dataset(global_batch_size)\n", "\n", "with strategy.scope():\n", " # Model building/compiling need to be within `strategy.scope()`.\n", - " multi_worker_model = mnist.build_and_compile_cnn_model()\n", + " multi_worker_model = mnist_setup.build_and_compile_cnn_model()\n", "\n", "\n", "multi_worker_model.fit(multi_worker_dataset, epochs=3, steps_per_epoch=70)" @@ -584,7 +601,7 @@ "id": "qmEEStPS6vR_" }, "source": [ - "So json-serialize the `TF_CONFIG` and add it to the environment variables:" + "Serialize the `TF_CONFIG` to JSON and add it to the environment variables:" ] }, { @@ -686,7 +703,7 @@ "id": "RqZhVF7L_KOy" }, "source": [ - "The last line of the log file should say: `Started server with target: grpc://localhost:12345`. The first worker is now ready, and is waiting for all the other worker(s) to be ready to proceed." + "The last line of the log file should say: `Started server with target: grpc://localhost:12345`. The first worker is now ready and is waiting for all the other worker(s) to be ready to proceed." ] }, { @@ -758,11 +775,7 @@ "id": "zL79ak5PMzEg" }, "source": [ - "Unsurprisingly, this ran _slower_ than the test run at the beginning of this tutorial.\n", - "\n", - "Running multiple workers on a single machine only adds overhead.\n", - "\n", - "The goal here was not to improve the training time, but only to give an example of multi-worker training." + "Note: This may run slower than the test run at the beginning of this tutorial because running multiple workers on a single machine only adds overhead. The goal here is not to improve the training time but to give an example of multi-worker training.\n" ] }, { @@ -784,11 +797,16 @@ "id": "9j2FJVHoUIrE" }, "source": [ - "## Multi-worker training in depth\n", - "\n", - "So far, you have learned how to perform a basic multi-worker setup.\n", - "\n", - "During the rest of the tutorial, you will learn about other factors, which may be useful or important for real use cases, in detail." + "## Multi-worker training in depth\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C1hBks_dAZmT" + }, + "source": [ + "So far, you have learned how to perform a basic multi-worker setup. The rest of the tutorial goes over other factors, which may be useful or important for real use cases, in detail." ] }, { @@ -820,25 +838,41 @@ "options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF\n", "\n", "global_batch_size = 64\n", - "multi_worker_dataset = mnist.mnist_dataset(batch_size=64)\n", + "multi_worker_dataset = mnist_setup.mnist_dataset(batch_size=64)\n", "dataset_no_auto_shard = multi_worker_dataset.with_options(options)" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "z85hElxsBQsT" + }, + "source": [ + "### Evaluation" + ] + }, { "cell_type": "markdown", "metadata": { "id": "gmqvlh5LhAoU" }, "source": [ - "### Evaluation\n", - "\n", - "If you pass the `validation_data` into `Model.fit`, it will alternate between training and evaluation for each epoch. The evaluation taking the `validation_data` is distributed across the same set of workers and the evaluation results are aggregated and available for all workers.\n", + "If you pass the `validation_data` into `Model.fit` as well, it will alternate between training and evaluation for each epoch. The evaluation work is distributed across the same set of workers, and its results are aggregated and available to all workers.\n", "\n", "Similar to training, the validation dataset is automatically sharded at the file level. You need to set a global batch size in the validation dataset and set the `validation_steps`.\n", "\n", - "A repeated dataset is also recommended for evaluation.\n", + "A repeated dataset (by calling `tf.data.Dataset.repeat`) is recommended for evaluation.\n", "\n", - "Alternatively, you can also create another task that periodically reads checkpoints and runs the evaluation. This is what Estimator does. But this is not a recommended way to perform evaluation and thus its details are omitted." + "Alternatively, you can also create another task that periodically reads checkpoints and runs the evaluation. This is what an Estimator does. But this is not a recommended way to perform evaluation and thus its details are omitted." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FNkoxUPJBNTb" + }, + "source": [ + "### Performance" ] }, { @@ -847,25 +881,21 @@ "id": "XVk4ftYx6JAO" }, "source": [ - "### Performance\n", - "\n", - "You now have a Keras model that is all set up to run in multiple workers with the `MultiWorkerMirroredStrategy`.\n", - "\n", - "To tweak performance of multi-worker training, you can try the following:\n", + "To tweak the performance of multi-worker training, you can try the following:\n", "\n", "- `tf.distribute.MultiWorkerMirroredStrategy` provides multiple [collective communication implementations](https://www.tensorflow.org/api_docs/python/tf/distribute/experimental/CommunicationImplementation):\n", " - `RING` implements ring-based collectives using gRPC as the cross-host communication layer.\n", " - `NCCL` uses the [NVIDIA Collective Communication Library](https://developer.nvidia.com/nccl) to implement collectives.\n", " - `AUTO` defers the choice to the runtime.\n", " \n", - " The best choice of collective implementation depends upon the number of GPUs, the type of GPUs, and the network interconnect in the cluster. To override the automatic choice, specify the `communication_options` parameter of `MultiWorkerMirroredStrategy`'s constructor. For example:\n", + " The best choice of collective implementation depends upon the number of GPUs, the type of GPUs, and the network interconnects in the cluster. To override the automatic choice, specify the `communication_options` parameter of `MultiWorkerMirroredStrategy`'s constructor. For example:\n", " \n", " ```python\n", - " communication_options=tf.distribute.experimental.CommunicationOptions(implementation=tf.distribute.experimental.CollectiveCommunication.NCCL)\n", + " communication_options=tf.distribute.experimental.CommunicationOptions(implementation=tf.distribute.experimental.CommunicationImplementation.NCCL)\n", " ```\n", "\n", "- Cast the variables to `tf.float` if possible:\n", - " - The official ResNet model includes [an example](https://github.com/tensorflow/models/blob/8367cf6dabe11adf7628541706b660821f397dce/official/resnet/resnet_model.py#L466) of how this can be done." + " - The official ResNet model includes [an example](https://github.com/tensorflow/models/blob/8367cf6dabe11adf7628541706b660821f397dce/official/resnet/resnet_model.py#L466) of how to do this." ] }, { @@ -882,7 +912,7 @@ "\n", "When a worker becomes unavailable, other workers will fail (possibly after a timeout). In such cases, the unavailable worker needs to be restarted, as well as other workers that have failed.\n", "\n", - "Note: Previously, the `ModelCheckpoint` callback provided a mechanism to restore the training state upon a restart from a job failure for multi-worker training. The TensorFlow team are introducing a new [`BackupAndRestore`](#scrollTo=kmH8uCUhfn4w) callback, to also add the support to single worker training for a consistent experience, and removed fault tolerance functionality from existing `ModelCheckpoint` callback. From now on, applications that rely on this behavior should migrate to the new callback." + "Note: Previously, the `ModelCheckpoint` callback provided a mechanism to restore the training state upon a restart from a job failure for multi-worker training. The TensorFlow team is introducing a new [`BackupAndRestore`](#scrollTo=kmH8uCUhfn4w) callback, which also adds the support to single-worker training for a consistent experience, and removed the fault tolerance functionality from existing `ModelCheckpoint` callback. From now on, applications that rely on this behavior should migrate to the new `BackupAndRestore` callback." ] }, { @@ -891,13 +921,13 @@ "id": "KvHPjGlyyFt6" }, "source": [ - "#### ModelCheckpoint callback\n", + "#### The `ModelCheckpoint` callback\n", "\n", "`ModelCheckpoint` callback no longer provides fault tolerance functionality, please use [`BackupAndRestore`](#scrollTo=kmH8uCUhfn4w) callback instead.\n", "\n", "The `ModelCheckpoint` callback can still be used to save checkpoints. But with this, if training was interrupted or successfully finished, in order to continue training from the checkpoint, the user is responsible to load the model manually.\n", "\n", - "Optionally the user can choose to save and restore model/weights outside `ModelCheckpoint` callback." + "Optionally, users can choose to save and restore model/weights outside `ModelCheckpoint` callback." ] }, { @@ -919,14 +949,14 @@ "\n", "You should have some cleanup logic that deletes the temporary directories created by the workers once your training has completed.\n", "\n", - "The reason for saving on the chief and workers at the same time is because you might be aggregating variables during checkpointing which requires both the chief and workers to participate in the allreduce communication protocol. On the other hand, letting chief and workers save to the same model directory will result in errors due to contention.\n", + "The reason for saving on the chief and workers at the same time is because you might be aggregating variables during checkpointing, which requires both the chief and workers to participate in the allreduce communication protocol. On the other hand, letting chief and workers save to the same model directory will result in errors due to contention.\n", "\n", - "Using the `MultiWorkerMirroredStrategy`, the program is run on every worker, and in order to know whether the current worker is chief, it takes advantage of the cluster resolver object that has attributes `task_type` and `task_id`:\n", - "- `task_type` tells you what the current job is (e.g. `'worker'`).\n", + "Using the `MultiWorkerMirroredStrategy`, the program is run on every worker, and in order to know whether the current worker is the chief, it takes advantage of the cluster resolver object that has attributes `task_type` and `task_id`:\n", + "- `task_type` tells you what the current job is (for example, `'worker'`).\n", "- `task_id` tells you the identifier of the worker.\n", "- The worker with `task_id == 0` is designated as the chief worker.\n", "\n", - "In the code snippet below, the `write_filepath` function provides the file path to write, which depends on the the worker's `task_id`:\n", + "In the code snippet below, the `write_filepath` function provides the file path to write, which depends on the worker's `task_id`:\n", "\n", "- For the chief worker (with `task_id == 0`), it writes to the original file path. \n", "- For other workers, it creates a temporary directory—`temp_dir`—with the `task_id` in the directory path to write in:" @@ -943,14 +973,14 @@ "model_path = '/tmp/keras-model'\n", "\n", "def _is_chief(task_type, task_id):\n", - " # Note: there are two possible `TF_CONFIG` configuration.\n", + " # Note: there are two possible `TF_CONFIG` configurations.\n", " # 1) In addition to `worker` tasks, a `chief` task type is use;\n", " # in this case, this function should be modified to\n", " # `return task_type == 'chief'`.\n", " # 2) Only `worker` task type is used; in this case, worker 0 is\n", " # regarded as the chief. The implementation demonstrated here\n", " # is for this case.\n", - " # For the purpose of this Colab section, the `task_type is None` case\n", + " # For the purpose of this Colab section, the `task_type` is `None` case\n", " # is added because it is effectively run with only a single worker.\n", " return (task_type == 'worker' and task_id == 0) or task_type is None\n", "\n", @@ -981,6 +1011,15 @@ "With that, you're now ready to save:" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "XnToxeIcg_6O" + }, + "source": [ + "Deprecated: For Keras objects, it's recommended to use the new high-level `.keras` format and `tf.keras.Model.export`, as demonstrated in the guide [here](https://www.tensorflow.org/guide/keras/save_and_serialize). The low-level SavedModel format continues to be supported for existing code." + ] + }, { "cell_type": "code", "execution_count": null, @@ -998,7 +1037,7 @@ "id": "8LXUVVl9_v5x" }, "source": [ - "As described above, later on the model should only be loaded from the path chief saved to, so let's remove the temporary ones the non-chief workers saved:" + "As described above, later on the model should only be loaded from the file path the chief worker saved to. Therefore, remove the temporary ones the non-chief workers have saved:" ] }, { @@ -1019,7 +1058,7 @@ "id": "Nr-2PKlHAPBT" }, "source": [ - "Now, when it's time to load, let's use convenient `tf.keras.models.load_model` API, and continue with further work.\n", + "Now, when it's time to load, use the convenient `tf.keras.models.load_model` API, and continue with further work.\n", "\n", "Here, assume only using single worker to load and continue training, in which case you do not call `tf.keras.models.load_model` within another `strategy.scope()` (note that `strategy = tf.distribute.MultiWorkerMirroredStrategy()`, as defined earlier):" ] @@ -1117,20 +1156,23 @@ "id": "kmH8uCUhfn4w" }, "source": [ - "#### BackupAndRestore callback\n", + "#### The `BackupAndRestore` callback\n", + "\n", + "The `tf.keras.callbacks.BackupAndRestore` callback provides the fault tolerance functionality by backing up the model and current training state in a temporary checkpoint file under `backup_dir` argument to `BackupAndRestore`. \n", "\n", - "The `tf.keras.callbacks.experimental.BackupAndRestore` callback provides the fault tolerance functionality by backing up the model and current epoch number in a temporary checkpoint file under `backup_dir` argument to `BackupAndRestore`. This is done at the end of each epoch.\n", + "Note: In Tensorflow 2.9, the current model and the training state is backed up at epoch boundaries. In the `tf-nightly` version and from TensorFlow 2.10, the `BackupAndRestore` callback can back up the model and the training state at epoch or step boundaries. `BackupAndRestore` accepts an optional `save_freq` argument. `save_freq` accepts either `'epoch'` or an `int` value. If `save_freq` is set to `'epoch'` the model is backed up after every epoch. If `save_freq` is set to an integer value greater than `0`, the model is backed up after every `save_freq` number of batches.\n", "\n", - "Once jobs get interrupted and restart, the callback restores the last checkpoint, and training continues from the beginning of the interrupted epoch. Any partial training already done in the unfinished epoch before interruption will be thrown away, so that it doesn't affect the final model state.\n", + "Once the jobs get interrupted and restarted, the `BackupAndRestore` callback restores the last checkpoint, and you can continue training from the beginning of the epoch and step at which the training state was last saved.\n", "\n", - "To use it, provide an instance of `tf.keras.callbacks.experimental.BackupAndRestore` at the `Model.fit` call.\n", + "To use it, provide an instance of `tf.keras.callbacks.BackupAndRestore` at the `Model.fit` call.\n", "\n", - "With `MultiWorkerMirroredStrategy`, if a worker gets interrupted, the whole cluster pauses until the interrupted worker is restarted. Other workers will also restart, and the interrupted worker rejoins the cluster. Then, every worker reads the checkpoint file that was previously saved and picks up its former state, thereby allowing the cluster to get back in sync. Then, the training continues.\n", + "With `MultiWorkerMirroredStrategy`, if a worker gets interrupted, the whole cluster will pause until the interrupted worker is restarted. Other workers will also restart, and the interrupted worker will rejoin the cluster. Then, every worker will read the checkpoint file that was previously saved and pick up its former state, thereby allowing the cluster to get back in sync. Then, the training will continue. The distributed dataset iterator state will be re-initialized and not restored.\n", "\n", "The `BackupAndRestore` callback uses the `CheckpointManager` to save and restore the training state, which generates a file called checkpoint that tracks existing checkpoints together with the latest one. For this reason, `backup_dir` should not be re-used to store other checkpoints in order to avoid name collision.\n", "\n", - "Currently, the `BackupAndRestore` callback supports single worker with no strategy, MirroredStrategy, and multi-worker with MultiWorkerMirroredStrategy.\n", - "Below are two examples for both multi-worker training and single worker training." + "Currently, the `BackupAndRestore` callback supports single-worker training with no strategy—`MirroredStrategy`—and multi-worker training with `MultiWorkerMirroredStrategy`.\n", + "\n", + "Below are two examples for both multi-worker training and single-worker training:" ] }, { @@ -1141,12 +1183,73 @@ }, "outputs": [], "source": [ - "# Multi-worker training with MultiWorkerMirroredStrategy\n", - "# and the BackupAndRestore callback.\n", + "# Multi-worker training with `MultiWorkerMirroredStrategy`\n", + "# and the `BackupAndRestore` callback. The training state \n", + "# is backed up at epoch boundaries by default.\n", + "\n", + "callbacks = [tf.keras.callbacks.BackupAndRestore(backup_dir='/tmp/backup')]\n", + "with strategy.scope():\n", + " multi_worker_model = mnist_setup.build_and_compile_cnn_model()\n", + "multi_worker_model.fit(multi_worker_dataset,\n", + " epochs=3,\n", + " steps_per_epoch=70,\n", + " callbacks=callbacks)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f8e86TAp0Rsl" + }, + "source": [ + "If the `save_freq` argument in the `BackupAndRestore` callback is set to `'epoch'`, the model is backed up after every epoch." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rZjQGPsF0aEI" + }, + "outputs": [], + "source": [ + "# The training state is backed up at epoch boundaries because `save_freq` is\n", + "# set to `epoch`.\n", + "\n", + "callbacks = [tf.keras.callbacks.BackupAndRestore(backup_dir='/tmp/backup')]\n", + "with strategy.scope():\n", + " multi_worker_model = mnist_setup.build_and_compile_cnn_model()\n", + "multi_worker_model.fit(multi_worker_dataset,\n", + " epochs=3,\n", + " steps_per_epoch=70,\n", + " callbacks=callbacks)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p-r44kCM0jc6" + }, + "source": [ + "Note: The next code block uses features that are only available in `tf-nightly` until Tensorflow 2.10 is released.\n", + "\n", + "If the `save_freq` argument in the `BackupAndRestore` callback is set to an integer value greater than `0`, the model is backed up after every `save_freq` number of batches." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bSJUyLSF0moC" + }, + "outputs": [], + "source": [ + "# The training state is backed up at every 30 steps because `save_freq` is set\n", + "# to an integer value of `30`.\n", "\n", - "callbacks = [tf.keras.callbacks.experimental.BackupAndRestore(backup_dir='/tmp/backup')]\n", + "callbacks = [tf.keras.callbacks.BackupAndRestore(backup_dir='/tmp/backup', save_freq=30)]\n", "with strategy.scope():\n", - " multi_worker_model = mnist.build_and_compile_cnn_model()\n", + " multi_worker_model = mnist_setup.build_and_compile_cnn_model()\n", "multi_worker_model.fit(multi_worker_dataset,\n", " epochs=3,\n", " steps_per_epoch=70,\n", @@ -1161,7 +1264,7 @@ "source": [ "If you inspect the directory of `backup_dir` you specified in `BackupAndRestore`, you may notice some temporarily generated checkpoint files. Those files are needed for recovering the previously lost instances, and they will be removed by the library at the end of `Model.fit` upon successful exiting of your training.\n", "\n", - "Note: Currently the `BackupAndRestore` callback only supports eager mode. In graph mode, consider using [Save/Restore Model](#model_saving_and_loading) mentioned above, and by providing `initial_epoch` in `Model.fit`." + "Note: Currently the `BackupAndRestore` callback only supports eager mode. In graph mode, consider using `Model.save`/`tf.saved_model.save` and `tf.keras.models.load_model` for saving and restoring models, respectively, as described in the _Model saving and loading_ section above, and by providing `initial_epoch` in `Model.fit` during training." ] }, { @@ -1172,7 +1275,7 @@ "source": [ "## Additional resources\n", "\n", - "1. The [Distributed training in TensorFlow](https://www.tensorflow.org/guide/distributed_training) guide provides an overview of the available distribution strategies.\n", + "1. The [Distributed training in TensorFlow](../../guide/distributed_training.ipynb) guide provides an overview of the available distribution strategies.\n", "1. The [Custom training loop with Keras and MultiWorkerMirroredStrategy](multi_worker_with_ctl.ipynb) tutorial shows how to use the `MultiWorkerMirroredStrategy` with Keras and a custom training loop.\n", "1. Check out the [official models](https://github.com/tensorflow/models/tree/master/official), many of which can be configured to run multiple distribution strategies.\n", "1. The [Better performance with tf.function](../../guide/function.ipynb) guide provides information about other strategies and tools, such as the [TensorFlow Profiler](../../guide/profiler.md) you can use to optimize the performance of your TensorFlow models." @@ -1181,9 +1284,8 @@ ], "metadata": { "colab": { - "collapsed_sections": [], "name": "multi_worker_with_keras.ipynb", - "toc_visible": true + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", diff --git a/site/en/tutorials/distribute/parameter_server_training.ipynb b/site/en/tutorials/distribute/parameter_server_training.ipynb index fae0a2d3576..2e6bb0cfce2 100644 --- a/site/en/tutorials/distribute/parameter_server_training.ipynb +++ b/site/en/tutorials/distribute/parameter_server_training.ipynb @@ -74,7 +74,7 @@ "\n", "A parameter server training cluster consists of _workers_ and _parameter servers_. Variables are created on parameter servers and they are read and updated by workers in each step. By default, workers read and update these variables independently without synchronizing with each other. This is why sometimes parameter server-style training is called _asynchronous training_.\n", "\n", - "In TensorFlow 2, parameter server training is powered by the `tf.distribute.experimental.ParameterServerStrategy` class, which distributes the training steps to a cluster that scales up to thousands of workers (accompanied by parameter servers)." + "In TensorFlow 2, parameter server training is powered by the `tf.distribute.ParameterServerStrategy` class, which distributes the training steps to a cluster that scales up to thousands of workers (accompanied by parameter servers)." ] }, { @@ -87,9 +87,9 @@ "\n", "There are two main supported training methods:\n", "\n", - "- The Keras `Model.fit` API, which is recommended when you prefer a high-level abstraction and handling of training.\n", - "- A custom training loop (you can refer to [Custom training](https://www.tensorflow.org/tutorials/customization/custom_training_walkthrough#train_the_model), [Writing a training loop from scratch\n", - "](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) and [Custom training loop with Keras and MultiWorkerMirroredStrategy](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_ctl) for more details.) Custom loop training is recommended when you prefer to define the details of their training loop." + "- The Keras `Model.fit` API: if you prefer a high-level abstraction and handling of training. This is generally recommended if you are training a `tf.keras.Model`.\n", + "- A custom training loop: if you prefer to define the details of your training loop (you can refer to guides on [Custom training](../customization/custom_training_walkthrough.ipynb), [Writing a training loop from scratch\n", + "](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) and [Custom training loop with Keras and MultiWorkerMirroredStrategy](multi_worker_with_ctl.ipynb) for more details)." ] }, { @@ -100,15 +100,15 @@ "source": [ "### A cluster with jobs and tasks\n", "\n", - "Regardless of the API of choice (`Model.fit` or a custom training loop), distributed training in TensorFlow 2 involves: a `'cluster'` with several `'jobs'`, and each of the jobs may have one or more `'tasks'`.\n", + "Regardless of the API of choice (`Model.fit` or a custom training loop), distributed training in TensorFlow 2 involves a `'cluster'` with several `'jobs'`, and each of the jobs may have one or more `'tasks'`.\n", "\n", "When using parameter server training, it is recommended to have:\n", "\n", "- One _coordinator_ job (which has the job name `chief`)\n", - "- Multiple _worker_ jobs (job name `worker`); and\n", + "- Multiple _worker_ jobs (job name `worker`)\n", "- Multiple _parameter server_ jobs (job name `ps`)\n", "\n", - "While the _coordinator_ creates resources, dispatches training tasks, writes checkpoints, and deals with task failures, _workers_ and _parameter servers_ run `tf.distribute.Server` that listen for requests from the coordinator." + "The _coordinator_ creates resources, dispatches training tasks, writes checkpoints, and deals with task failures. The _workers_ and _parameter servers_ run `tf.distribute.Server` instances that listen for requests from the coordinator." ] }, { @@ -117,10 +117,9 @@ "id": "oLV1FbpLtqtB" }, "source": [ - "### Parameter server training with `Model.fit` API\n", + "### Parameter server training with the `Model.fit` API\n", "\n", - "Parameter server training with the `Model.fit` API requires the coordinator to use a `tf.distribute.experimental.ParameterServerStrategy` object, and a `tf.keras.utils.experimental.DatasetCreator` as the input. Similar to `Model.fit` usage with no strategy, or with other strategies, the workflow involves creating and compiling the model, preparing the callbacks, followed by\n", - "a `Model.fit` call." + "Parameter server training with the `Model.fit` API requires the coordinator to use a `tf.distribute.ParameterServerStrategy` object. Similar to `Model.fit` usage with no strategy, or with other strategies, the workflow involves creating and compiling the model, preparing the callbacks, and calling `Model.fit`." ] }, { @@ -131,12 +130,11 @@ "source": [ "### Parameter server training with a custom training loop\n", "\n", - "With custom training loops, the `tf.distribute.experimental.coordinator.ClusterCoordinator` class is the key component used for the coordinator.\n", + "With custom training loops, the `tf.distribute.coordinator.ClusterCoordinator` class is the key component used for the coordinator.\n", "\n", - "- The `ClusterCoordinator` class needs to work in conjunction with a `tf.distribute.Strategy` object.\n", - "- This `tf.distribute.Strategy` object is needed to provide the information of the cluster and is used to define a training step, as demonstrated in [Custom training with tf.distribute.Strategy](https://www.tensorflow.org/tutorials/distribute/custom_training#training_loop).\n", + "- The `ClusterCoordinator` class needs to work in conjunction with a `tf.distribute.ParameterServerStrategy` object.\n", + "- This `tf.distribute.Strategy` object is needed to provide the information of the cluster and is used to define a training step, as demonstrated in [Custom training with tf.distribute.Strategy](custom_training.ipynb).\n", "- The `ClusterCoordinator` object then dispatches the execution of these training steps to remote workers.\n", - "- For parameter server training, the `ClusterCoordinator` needs to work with a `tf.distribute.experimental.ParameterServerStrategy`.\n", "\n", "The most important API provided by the `ClusterCoordinator` object is `schedule`:\n", "\n", @@ -144,7 +142,7 @@ "- The queued functions will be dispatched to remote workers in background threads and their `RemoteValue`s will be filled asynchronously.\n", "- Since `schedule` doesn’t require worker assignment, the `tf.function` passed in can be executed on any available worker.\n", "- If the worker it is executed on becomes unavailable before its completion, the function will be retried on another available worker.\n", - "- Because of this fact and the fact that function execution is not atomic, a function may be executed more than once.\n", + "- Because of this fact and the fact that function execution is not atomic, a single function call may be executed more than once.\n", "\n", "In addition to dispatching remote functions, the `ClusterCoordinator` also helps\n", "to create datasets on all the workers and rebuild these datasets when a worker recovers from failure." @@ -169,9 +167,7 @@ }, "outputs": [], "source": [ - "!pip install portpicker\n", - "!pip uninstall tensorflow keras -y\n", - "!pip install tf-nightly" + "!pip install portpicker" ] }, { @@ -187,8 +183,7 @@ "import os\n", "import random\n", "import portpicker\n", - "import tensorflow as tf\n", - "from tensorflow.keras.layers.experimental import preprocessing" + "import tensorflow as tf" ] }, { @@ -199,9 +194,9 @@ "source": [ "## Cluster setup\n", "\n", - "As mentioned above, a parameter server training cluster requires a coordinator task that runs your training program, one or several workers and parameter server tasks that run TensorFlow servers—`tf.distribute.Server`—and possibly an additional evaluation task that runs side-car evaluation (see the side-car evaluation section below). The requirements to set them up are:\n", + "As mentioned above, a parameter server training cluster requires a coordinator task that runs your training program, one or several workers and parameter server tasks that run TensorFlow servers—`tf.distribute.Server`—and possibly an additional evaluation task that runs sidecar evaluation (refer to the [sidecar evaluation section](#sidecar_evaluation) below). The requirements to set them up are:\n", "\n", - "- The coordinator task needs to know the addresses and ports of all other TensorFlow servers except the evaluator.\n", + "- The coordinator task needs to know the addresses and ports of all other TensorFlow servers, except the evaluator.\n", "- The workers and parameter servers need to know which port they need to listen to. For the sake of simplicity, you can usually pass in the complete cluster information when creating TensorFlow servers on these tasks.\n", "- The evaluator task doesn’t have to know the setup of the training cluster. If it does, it should not attempt to connect to the training cluster.\n", "- Workers and parameter servers should have task types as `\"worker\"` and `\"ps\"`, respectively. The coordinator should use `\"chief\"` as the task type for legacy reasons.\n", @@ -217,7 +212,7 @@ "source": [ "### In-process cluster\n", "\n", - "You will start by creating several TensorFlow servers in advance and connect to them later. Note that this is only for the purpose of this tutorial's demonstration, and in real training the servers will be started on `\"worker\"` and `\"ps\"` machines." + "You will start by creating several TensorFlow servers in advance and you will connect to them later. Note that this is only for the purpose of this tutorial's demonstration, and in real training the servers will be started on `\"worker\"` and `\"ps\"` machines." ] }, { @@ -279,9 +274,9 @@ "id": "pX_91OByt0J2" }, "source": [ - "The in-process cluster setup is frequently used in unit testing, such as [here](https://github.com/tensorflow/tensorflow/blob/7621d31921c2ed979f212da066631ddfda37adf5/tensorflow/python/distribute/coordinator/cluster_coordinator_test.py#L437).\n", + "The in-process cluster setup is frequently used in unit testing, such as [here](https://github.com/tensorflow/tensorflow/blob/eb4c40fc91da260199fa2aed6fe67d36ad49fafd/tensorflow/python/distribute/coordinator/cluster_coordinator_test.py#L447).\n", "\n", - "Another option for local testing is to launch processes on the local machine—check out [Multi-worker training with Keras](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras) for an example of this approach." + "Another option for local testing is to launch processes on the local machine—check out [Multi-worker training with Keras](multi_worker_with_keras.ipynb) for an example of this approach." ] }, { @@ -292,7 +287,7 @@ "source": [ "## Instantiate a ParameterServerStrategy\n", "\n", - "Before you dive into the training code, let's instantiate a `ParameterServerStrategy` object. Note that this is needed regardless of whether you are proceeding with `Model.fit` or a custom training loop. The `variable_partitioner` argument will be explained in the [Variable sharding section](#variable-sharding)." + "Before you dive into the training code, let's instantiate a `tf.distribute.ParameterServerStrategy` object. Note that this is needed regardless of whether you are proceeding with `Model.fit` or a custom training loop. The `variable_partitioner` argument will be explained in the [Variable sharding section](#variable_sharding)." ] }, { @@ -308,7 +303,7 @@ " min_shard_bytes=(256 << 10),\n", " max_shards=NUM_PS))\n", "\n", - "strategy = tf.distribute.experimental.ParameterServerStrategy(\n", + "strategy = tf.distribute.ParameterServerStrategy(\n", " cluster_resolver,\n", " variable_partitioner=variable_partitioner)" ] @@ -331,7 +326,8 @@ "### Variable sharding\n", "\n", "Variable sharding refers to splitting a variable into multiple smaller\n", - "variables, which are called _shards_. Variable sharding may be useful to distribute the network load when accessing these shards. It is also useful to distribute computation and storage of a normal variable across multiple parameter servers.\n", + "variables, which are called _shards_. Variable sharding may be useful to distribute the network load when accessing these shards. It is also useful to distribute computation and storage of a normal variable across multiple parameter servers, for example, when using very large embeddings\n", + "that may not fit in a single machine's memory.\n", "\n", "To enable variable sharding, you can pass in a `variable_partitioner` when\n", "constructing a `ParameterServerStrategy` object. The `variable_partitioner` will\n", @@ -340,7 +336,7 @@ "`variable_partitioner`s are provided such as\n", "`tf.distribute.experimental.partitioners.MinSizePartitioner`. It is recommended to use size-based partitioners like\n", "`tf.distribute.experimental.partitioners.MinSizePartitioner` to avoid\n", - "partitioning small variables, which could have negative impact on model training\n", + "partitioning small variables, which could have a negative impact on model training\n", "speed." ] }, @@ -350,16 +346,16 @@ "id": "1--SxlxtsOb7" }, "source": [ - "When a `variable_partitioner` is passed in and if you create a variable directly\n", - "under `strategy.scope()`, it will become a container type with a `variables`\n", - "property which provides access to the list of shards. In most cases, this\n", + "When a `variable_partitioner` is passed in, and you create a variable directly\n", + "under `Strategy.scope`, the variable will become a container type with a `variables`\n", + "property, which provides access to the list of shards. In most cases, this\n", "container will be automatically converted to a Tensor by concatenating all the\n", "shards. As a result, it can be used as a normal variable. On the other hand,\n", "some TensorFlow methods such as `tf.nn.embedding_lookup` provide efficient\n", "implementation for this container type and in these methods automatic\n", "concatenation will be avoided.\n", "\n", - "Please see the API docs of `tf.distribute.experimental.ParameterServerStrategy` for more details." + "Refer to the API docs of `tf.distribute.ParameterServerStrategy` for more details." ] }, { @@ -371,7 +367,7 @@ "## Training with `Model.fit`\n", "\n", "\n", - "Keras provides an easy-to-use training API via `Model.fit` that handles the training loop under the hood, with the flexibility of overridable `train_step`, and callbacks, which provide functionalities such as checkpoint saving or summary saving for TensorBoard. With `Model.fit`, the same training code can be used for other strategies with a simple swap of the strategy object." + "Keras provides an easy-to-use training API via `Model.fit` that handles the training loop under the hood, with the flexibility of an overridable `train_step`, and callbacks which provide functionalities such as checkpoint saving or summary saving for TensorBoard. With `Model.fit`, the same training code can be used with other strategies with a simple swap of the strategy object." ] }, { @@ -382,12 +378,14 @@ "source": [ "### Input data\n", "\n", - "`Model.fit` with parameter server training requires that the input data be\n", - "provided in a callable that takes a single argument of type `tf.distribute.InputContext`, and returns a `tf.data.Dataset`. Then, create a `tf.keras.utils.experimental.DatasetCreator` object that takes such `callable`, and an optional `tf.distribute.InputOptions` object via `input_options` argument.\n", + "Keras `Model.fit` with `tf.distribute.ParameterServerStrategy` can take input data in the form of a `tf.data.Dataset`, `tf.distribute.DistributedDataset`, or a `tf.keras.utils.experimental.DatasetCreator`, with `Dataset` being the recommended option for ease of use. If you encounter memory issues using `Dataset`, however, you may need to use `DatasetCreator` with a callable `dataset_fn` argument (refer to the `tf.keras.utils.experimental.DatasetCreator` API documentation for details).\n", "\n", - "Note that it is recommended to shuffle and repeat the data with parameter server training, and specify `steps_per_epoch` in `fit` call so the library knows the epoch boundaries.\n", + "If you transform your dataset into a `tf.data.Dataset`, you should use `Dataset.shuffle` and `Dataset.repeat`, as demonstrated in the code example below.\n", "\n", - "Please see the [Distributed input](https://www.tensorflow.org/tutorials/distribute/input#usage_2) tutorial for more information about the `InputContext` argument." + "- Keras `Model.fit` with parameter server training assumes that each worker receives the same dataset, except when it is shuffled differently. Therefore, by calling `Dataset.shuffle`, you ensure more even iterations over the data.\n", + "- Because workers do not synchronize, they may finish processing their datasets at different times. Therefore, the easiest way to define epochs with parameter server training is to use `Dataset.repeat`—which repeats a dataset indefinitely when called without an argument—and specify the `steps_per_epoch` argument in the `Model.fit` call.\n", + "\n", + "Refer to the \"Training workflows\" section of the [tf.data guide](../../guide/data.ipynb) for more details on `shuffle` and `repeat`." ] }, { @@ -398,23 +396,14 @@ }, "outputs": [], "source": [ - "def dataset_fn(input_context):\n", - " global_batch_size = 64\n", - " batch_size = input_context.get_per_replica_batch_size(global_batch_size)\n", - "\n", - " x = tf.random.uniform((10, 10))\n", - " y = tf.random.uniform((10,))\n", - "\n", - " dataset = tf.data.Dataset.from_tensor_slices((x, y)).shuffle(10).repeat()\n", - " dataset = dataset.shard(\n", - " input_context.num_input_pipelines,\n", - " input_context.input_pipeline_id)\n", - " dataset = dataset.batch(batch_size)\n", - " dataset = dataset.prefetch(2)\n", + "global_batch_size = 64\n", "\n", - " return dataset\n", + "x = tf.random.uniform((10, 10))\n", + "y = tf.random.uniform((10,))\n", "\n", - "dc = tf.keras.utils.experimental.DatasetCreator(dataset_fn)" + "dataset = tf.data.Dataset.from_tensor_slices((x, y)).shuffle(10).repeat()\n", + "dataset = dataset.batch(global_batch_size)\n", + "dataset = dataset.prefetch(2)" ] }, { @@ -423,11 +412,18 @@ "id": "v_jhF70K7zON" }, "source": [ - "The code in `dataset_fn` will be invoked on the input device, which is usually the CPU, on each of the worker machines.\n", - "\n", + "If you instead create your dataset with `tf.keras.utils.experimental.DatasetCreator`, the code in `dataset_fn` will be invoked on the input device, which is usually the CPU, on each of the worker machines.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "w60PuWrWwBD4" + }, + "source": [ "### Model construction and compiling\n", "\n", - "Now, you will create a `tf.keras.Model`—a trivial `tf.keras.models.Sequential` model for demonstration purposes—followed by a `Model.compile` call to incorporate components, such as an optimizer, metrics, or parameters such as `steps_per_execution`:" + "Now, you will create a `tf.keras.Model`—a trivial `tf.keras.models.Sequential` model for demonstration purposes—followed by a `Model.compile` call to incorporate components, such as an optimizer, metrics, and other parameters such as `steps_per_execution`:" ] }, { @@ -441,7 +437,7 @@ "with strategy.scope():\n", " model = tf.keras.models.Sequential([tf.keras.layers.Dense(10)])\n", "\n", - "model.compile(tf.keras.optimizers.SGD(), loss='mse', steps_per_execution=10)" + " model.compile(tf.keras.optimizers.legacy.SGD(), loss=\"mse\", steps_per_execution=10)" ] }, { @@ -454,13 +450,13 @@ "\n", " \n", "\n", - "Before you call `model.fit` for the actual training, let's prepare the needed callbacks for common tasks, such as:\n", + "Before you call Keras `Model.fit` for the actual training, prepare any needed [callbacks](https://www.tensorflow.org/guide/keras/train_and_evaluate) for common tasks, such as:\n", "\n", - "- `ModelCheckpoint`: to save the model weights.\n", - "- `BackupAndRestore`: to make sure the training progress is automatically backed up, and recovered if the cluster experiences unavailability (such as abort or preemption); or\n", - "- `TensorBoard`: to save the progress reports into summary files, which get visualized in TensorBoard tool.\n", + "- `tf.keras.callbacks.ModelCheckpoint`: saves the model at a certain frequency, such as after every epoch.\n", + "- `tf.keras.callbacks.BackupAndRestore`: provides fault tolerance by backing up the model and current epoch number, if the cluster experiences unavailability (such as abort or preemption). You can then restore the training state upon a restart from a job failure, and continue training from the beginning of the interrupted epoch.\n", + "- `tf.keras.callbacks.TensorBoard`: periodically writes model logs in summary files that can be visualized in the TensorBoard tool.\n", "\n", - "Note: Due to performance consideration, custom callbacks cannot have batch level callbacks overridden when used with `ParameterServerStrategy`. Please modify your custom callbacks to make them epoch level calls, and adjust `steps_per_epoch` to a suitable value. In addition, `steps_per_epoch` is a required argument for `Model.fit` when used with `ParameterServerStrategy`." + "Note: Due to performance considerations, custom callbacks cannot have batch level callbacks overridden when used with `ParameterServerStrategy`. Please modify your custom callbacks to make them epoch level calls, and adjust `steps_per_epoch` to a suitable value. In addition, `steps_per_epoch` is a required argument for `Model.fit` when used with `ParameterServerStrategy`." ] }, { @@ -471,18 +467,18 @@ }, "outputs": [], "source": [ - "working_dir = '/tmp/my_working_dir'\n", - "log_dir = os.path.join(working_dir, 'log')\n", - "ckpt_filepath = os.path.join(working_dir, 'ckpt')\n", - "backup_dir = os.path.join(working_dir, 'backup')\n", + "working_dir = \"/tmp/my_working_dir\"\n", + "log_dir = os.path.join(working_dir, \"log\")\n", + "ckpt_filepath = os.path.join(working_dir, \"ckpt\")\n", + "backup_dir = os.path.join(working_dir, \"backup\")\n", "\n", "callbacks = [\n", " tf.keras.callbacks.TensorBoard(log_dir=log_dir),\n", " tf.keras.callbacks.ModelCheckpoint(filepath=ckpt_filepath),\n", - " tf.keras.callbacks.experimental.BackupAndRestore(backup_dir=backup_dir),\n", + " tf.keras.callbacks.BackupAndRestore(backup_dir=backup_dir),\n", "]\n", "\n", - "model.fit(dc, epochs=5, steps_per_epoch=20, callbacks=callbacks)" + "model.fit(dataset, epochs=5, steps_per_epoch=20, callbacks=callbacks)" ] }, { @@ -493,7 +489,7 @@ "source": [ "### Direct usage with `ClusterCoordinator` (optional)\n", "\n", - "Even if you choose the `Model.fit` training path, you can optionally instantiate a `tf.distribute.experimental.coordinator.ClusterCoordinator` object to schedule other functions you would like to be executed on the workers. See the [Training with a custom training loop](#training_with_custom_training_loop) section for more details and examples." + "Even if you choose the `Model.fit` training path, you can optionally instantiate a `tf.distribute.coordinator.ClusterCoordinator` object to schedule other functions you would like to be executed on the workers. Refer to the [Training with a custom training loop](#training_with_custom_training_loop) section for more details and examples." ] }, { @@ -506,11 +502,11 @@ "\n", " \n", "\n", - "Using custom training loops with `tf.distribute.Strategy` provides great flexibility to define training loops. With the `ParameterServerStrategy` defined above (as `strategy`), you will use a `tf.distribute.experimental.coordinator.ClusterCoordinator` to dispatch the execution of training steps to remote workers.\n", + "Using custom training loops with `tf.distribute.Strategy` provides great flexibility to define training loops. With the `ParameterServerStrategy` defined above (as `strategy`), you will use a `tf.distribute.coordinator.ClusterCoordinator` to dispatch the execution of training steps to remote workers.\n", "\n", - "Then, you will create a model, define a dataset and a step function, as you have done in the training loop with other `tf.distribute.Strategy`s. You can find more details in the [Custom training with tf.distribute.Strategy](https://www.tensorflow.org/tutorials/distribute/custom_training) tutorial.\n", + "Then, you will create a model, define a dataset, and define a step function, as you have done in the training loop with other `tf.distribute.Strategy`s. You can find more details in the [Custom training with tf.distribute.Strategy](custom_training.ipynb) tutorial.\n", "\n", - "To ensure efficient dataset prefetching, use the recommended distributed dataset creation APIs mentioned in the [Dispatch training steps to remote workers](https://www.tensorflow.org/tutorials/distribute/parameter_server_training#dispatch_training_steps_to_remote_workers) section below. Also, make sure to call `Strategy.run` inside `worker_fn` to take full advantage of GPUs allocated to workers. The rest of the steps are the same for training with or without GPUs.\n", + "To ensure efficient dataset prefetching, use the recommended distributed dataset creation APIs mentioned in the [Dispatch training steps to remote workers](#dispatch_training_steps_to_remote_workers) section below. Also, make sure to call `Strategy.run` inside `worker_fn` to take full advantage of GPUs allocated to workers. The rest of the steps are the same for training with or without GPUs.\n", "\n", "Let’s create these components in the following steps:\n" ] @@ -523,11 +519,13 @@ "source": [ "### Set up the data\n", "\n", - "First, write a function that creates a dataset that includes preprocessing logic implemented by [Keras preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers).\n", + "First, write a function that creates a dataset.\n", + "\n", + "If you would like to preprocess the data with [Keras preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) or [Tensorflow Transform layers](https://www.tensorflow.org/tfx/tutorials/transform/simple), create these layers **outside the `dataset_fn`** and **under `Strategy.scope`**, like you would do for any other Keras layers. This is because the `dataset_fn` will be wrapped into a `tf.function` and then executed on each worker to generate the data pipeline.\n", "\n", - "You will create these layers outside the `dataset_fn` but apply the transformation inside the `dataset_fn`, since you will wrap the `dataset_fn` into a `tf.function`, which doesn't allow variables to be created inside it.\n", + "If you don't follow the above procedure, creating the layers might create Tensorflow states which will be lifted out of the `tf.function` to the coordinator. Thus, accessing them on workers would incur repetitive RPC calls between coordinator and workers, and cause significant slowdown.\n", "\n", - "Note: There is a known performance implication when using lookup table resources, which layers, such as `tf.keras.layers.experimental.preprocessing.StringLookup`, employ. Refer to the [Known limitations](#known_limitations) section for more information." + "Placing the layers under `Strategy.scope` will instead create them on all workers. Then, you will apply the transformation inside the `dataset_fn` via `tf.data.Dataset.map`. Refer to _Data preprocessing_ in the [Distributed input](input.ipynb) tutorial for more information on data preprocessing with distributed input." ] }, { @@ -544,10 +542,10 @@ "label_vocab = [\"yes\", \"no\"]\n", "\n", "with strategy.scope():\n", - " feature_lookup_layer = preprocessing.StringLookup(\n", + " feature_lookup_layer = tf.keras.layers.StringLookup(\n", " vocabulary=feature_vocab,\n", " mask_token=None)\n", - " label_lookup_layer = preprocessing.StringLookup(\n", + " label_lookup_layer = tf.keras.layers.StringLookup(\n", " vocabulary=label_vocab,\n", " num_oov_indices=0,\n", " mask_token=None)\n", @@ -637,7 +635,7 @@ "source": [ "### Build the model\n", "\n", - "Next, create the model and other objects. Make sure to create all variables under `strategy.scope`." + "Next, create the model and other objects. Make sure to create all variables under `Strategy.scope`." ] }, { @@ -648,7 +646,7 @@ }, "outputs": [], "source": [ - "# These variables created under the `strategy.scope` will be placed on parameter\n", + "# These variables created under the `Strategy.scope` will be placed on parameter\n", "# servers in a round-robin fashion.\n", "with strategy.scope():\n", " # Create the model. The input needs to be compatible with Keras processing layers.\n", @@ -658,10 +656,13 @@ " emb_layer = tf.keras.layers.Embedding(\n", " input_dim=len(feature_lookup_layer.get_vocabulary()), output_dim=16384)\n", " emb_output = tf.reduce_mean(emb_layer(model_input), axis=1)\n", - " dense_output = tf.keras.layers.Dense(units=1, activation=\"sigmoid\")(emb_output)\n", + " dense_output = tf.keras.layers.Dense(\n", + " units=1, activation=\"sigmoid\",\n", + " kernel_regularizer=tf.keras.regularizers.L2(1e-4),\n", + " )(emb_output)\n", " model = tf.keras.Model({\"features\": model_input}, dense_output)\n", "\n", - " optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.1)\n", + " optimizer = tf.keras.optimizers.legacy.RMSprop(learning_rate=0.1)\n", " accuracy = tf.keras.metrics.Accuracy()" ] }, @@ -671,7 +672,7 @@ "id": "iyuxiqCQU50m" }, "source": [ - "Let's confirm that the use of `FixedShardsPartitioner` split all variables into two shards and each shard was assigned to different parameter servers:" + "Let's confirm that the use of `FixedShardsPartitioner` split all variables into two shards and that each shard was assigned to a different parameter server:" ] }, { @@ -685,8 +686,9 @@ "assert len(emb_layer.weights) == 2\n", "assert emb_layer.weights[0].shape == (4, 16384)\n", "assert emb_layer.weights[1].shape == (4, 16384)\n", - "assert emb_layer.weights[0].device == \"/job:ps/replica:0/task:0/device:CPU:0\"\n", - "assert emb_layer.weights[1].device == \"/job:ps/replica:0/task:1/device:CPU:0\"" + "\n", + "print(emb_layer.weights[0].device)\n", + "print(emb_layer.weights[1].device)\n" ] }, { @@ -714,9 +716,12 @@ " with tf.GradientTape() as tape:\n", " pred = model(batch_data, training=True)\n", " per_example_loss = tf.keras.losses.BinaryCrossentropy(\n", - " reduction=tf.keras.losses.Reduction.NONE)(labels, pred)\n", + " reduction=tf.keras.losses.Reduction.NONE)(labels, pred)\n", " loss = tf.nn.compute_average_loss(per_example_loss)\n", - " gradients = tape.gradient(loss, model.trainable_variables)\n", + " model_losses = model.losses\n", + " if model_losses:\n", + " loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n", + " gradients = tape.gradient(loss, model.trainable_variables)\n", "\n", " optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n", "\n", @@ -735,7 +740,7 @@ "id": "rvrYQUeYiLNy" }, "source": [ - "In the above training step function, calling `Strategy.run` and `Strategy.reduce` in the `step_fn` can support multiple GPUs per worker. If the workers have GPUs allocated, `Strategy.run` will distribute the datasets on multiple replicas.\n" + "In the above training step function, calling `Strategy.run` and `Strategy.reduce` in the `step_fn` can support multiple GPUs per worker. If the workers have GPUs allocated, `Strategy.run` will distribute the datasets on multiple replicas (GPUs). Their parallel calls to `tf.nn.compute_average_loss()` compute the average of the loss across the replicas (GPUs) of one worker, independent of the total number of workers." ] }, { @@ -747,7 +752,7 @@ "### Dispatch training steps to remote workers\n", " \n", "\n", - "After all the computations are defined by `ParameterServerStrategy`, you will use the `tf.distribute.experimental.coordinator.ClusterCoordinator` class to create resources and distribute the training steps to remote workers.\n", + "After all the computations are defined by `ParameterServerStrategy`, you will use the `tf.distribute.coordinator.ClusterCoordinator` class to create resources and distribute the training steps to remote workers.\n", "\n", "Let’s first create a `ClusterCoordinator` object and pass in the strategy object:" ] @@ -760,7 +765,7 @@ }, "outputs": [], "source": [ - "coordinator = tf.distribute.experimental.coordinator.ClusterCoordinator(strategy)" + "coordinator = tf.distribute.coordinator.ClusterCoordinator(strategy)" ] }, { @@ -769,7 +774,7 @@ "id": "-xRIgKxciOSe" }, "source": [ - "Then, create a per-worker dataset and an iterator. In the `per_worker_dataset_fn` below, wrapping the `dataset_fn` into `strategy.distribute_datasets_from_function` is recommended to allow efficient prefetching to GPUs seamlessly." + "Then, create a per-worker dataset and an iterator using the `ClusterCoordinator.create_per_worker_dataset` API, which replicates the dataset to all workers. In the `per_worker_dataset_fn` below, wrapping the `dataset_fn` into `strategy.distribute_datasets_from_function` is recommended to allow efficient prefetching to GPUs seamlessly." ] }, { @@ -808,15 +813,15 @@ }, "outputs": [], "source": [ - "num_epoches = 4\n", + "num_epochs = 4\n", "steps_per_epoch = 5\n", - "for i in range(num_epoches):\n", + "for i in range(num_epochs):\n", " accuracy.reset_states()\n", " for _ in range(steps_per_epoch):\n", " coordinator.schedule(step_fn, args=(per_worker_iterator,))\n", " # Wait at epoch boundaries.\n", " coordinator.join()\n", - " print (\"Finished epoch %d, accuracy is %f.\" % (i, accuracy.result().numpy()))" + " print(\"Finished epoch %d, accuracy is %f.\" % (i, accuracy.result().numpy()))" ] }, { @@ -837,7 +842,7 @@ "outputs": [], "source": [ "loss = coordinator.schedule(step_fn, args=(per_worker_iterator,))\n", - "print (\"Final loss is %f\" % loss.fetch())" + "print(\"Final loss is %f\" % loss.fetch())" ] }, { @@ -857,7 +862,7 @@ " # Do something like logging metrics or writing checkpoints.\n", "```\n", "\n", - "For the complete training and serving workflow for this particular example, please check out this [test](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/distribute/parameter_server_training_test.py).\n" + "For the complete training and serving workflow for this particular example, please check out this [test](https://github.com/keras-team/keras/blob/master/keras/integration_test/parameter_server_keras_preprocessing_test.py).\n" ] }, { @@ -868,11 +873,11 @@ "source": [ "### More about dataset creation\n", "\n", - "The dataset in the above code is created using the `ClusterCoordinator.create_per_worker_dataset` API). It creates one dataset per worker and returns a container object. You can call the `iter` method on it to create a per-worker iterator. The per-worker iterator contains one iterator per worker and the corresponding slice of a worker will be substituted in the input argument of the function passed to the `ClusterCoordinator.schedule` method before the function is executed on a particular worker.\n", + "The dataset in the above code is created using the `ClusterCoordinator.create_per_worker_dataset` API. It creates one dataset per worker and returns a container object. You can call the `iter` method on it to create a per-worker iterator. The per-worker iterator contains one iterator per worker and the corresponding slice of a worker will be substituted in the input argument of the function passed to the `ClusterCoordinator.schedule` method before the function is executed on a particular worker.\n", "\n", - "Currently, the `ClusterCoordinator.schedule` method assumes workers are equivalent and thus assumes the datasets on different workers are the same except they may be shuffled differently if they contain a `Dataset.shuffle` operation. Because of this, it is also recommended that the datasets to be repeated indefinitely and you schedule a finite number of steps instead of relying on the `OutOfRangeError` from a dataset.\n", + "The `ClusterCoordinator.schedule` method assumes workers are equivalent and thus assumes the datasets on different workers are the same (except that they may be shuffled differently). Because of this, it is also recommended to repeat datasets, and schedule a finite number of steps instead of relying on receiving an `OutOfRangeError` from a dataset.\n", "\n", - "Another important note is that `tf.data` datasets don’t support implicit serialization and deserialization across task boundaries. So it is important to create the whole dataset inside the function passed to `ClusterCoordinator.create_per_worker_dataset`." + "Another important note is that `tf.data` datasets don’t support implicit serialization and deserialization across task boundaries. So it is important to create the whole dataset inside the function passed to `ClusterCoordinator.create_per_worker_dataset`. The `create_per_worker_dataset` API can also directly take a `tf.data.Dataset` or `tf.distribute.DistributedDataset` as input." ] }, { @@ -883,7 +888,7 @@ "source": [ "## Evaluation\n", "\n", - "There is more than one way to define and run an evaluation loop in distributed training. Each has its own pros and cons as described below. The inline evaluation method is recommended if you don't have a preference." + "The two main approaches to performing evaluation with `tf.distribute.ParameterServerStrategy` training are inline evaluation and sidecar evaluation. Each has its own pros and cons as described below. The inline evaluation method is recommended if you don't have a preference. For users using `Model.fit`, `Model.evaluate` uses inline (distributed) evaluation under the hood." ] }, { @@ -894,12 +899,12 @@ "source": [ "### Inline evaluation\n", "\n", - "In this method, the coordinator alternates between training and evaluation and thus it is called it _inline evaluation_.\n", + "In this method, the coordinator alternates between training and evaluation, and thus it is called _inline evaluation_.\n", "\n", "There are several benefits of inline evaluation. For example:\n", "\n", "- It can support large evaluation models and evaluation datasets that a single task cannot hold.\n", - "- The evaluation results can be used to make decisions for training the next epoch.\n", + "- The evaluation results can be used to make decisions for training the next epoch, for example, whether to stop training early.\n", "\n", "There are two ways to implement inline evaluation: direct evaluation and distributed evaluation.\n", "\n", @@ -915,7 +920,7 @@ "outputs": [], "source": [ "eval_dataset = tf.data.Dataset.from_tensor_slices(\n", - " feature_and_label_gen(num_examples=16)).map(\n", + " feature_and_label_gen(num_examples=16)).map(\n", " lambda x: (\n", " {\"features\": feature_preprocess_stage(x[\"features\"])},\n", " label_preprocess_stage(x[\"label\"])\n", @@ -928,7 +933,7 @@ " actual_pred = tf.cast(tf.greater(pred, 0.5), tf.int64)\n", " eval_accuracy.update_state(labels, actual_pred)\n", "\n", - "print (\"Evaluation accuracy: %f\" % eval_accuracy.result())" + "print(\"Evaluation accuracy: %f\" % eval_accuracy.result())" ] }, { @@ -976,7 +981,7 @@ "for _ in range(eval_steps_per_epoch):\n", " coordinator.schedule(eval_step, args=(per_worker_eval_iterator,))\n", "coordinator.join()\n", - "print (\"Evaluation accuracy: %f\" % eval_accuracy.result())" + "print(\"Evaluation accuracy: %f\" % eval_accuracy.result())" ] }, { @@ -985,7 +990,23 @@ "id": "cKrQktZX5z7a" }, "source": [ - "Note: Currently, the `schedule` and `join` methods of `tf.distribute.experimental.coordinator.ClusterCoordinator` don’t support visitation guarantee or exactly-once semantics. In other words, there is no guarantee that all evaluation examples in a dataset will be evaluated exactly once; some may not be visited and some may be evaluated multiple times. Visitation guarantee on evaluation dataset is being worked on." + "#### Enabling exactly-once evaluation\n", + "\n", + "\n", + "The `schedule` and `join` methods of `tf.distribute.coordinator.ClusterCoordinator` don’t support visitation guarantees or exactly-once semantics by default. In other words, in the above example there is no guarantee that all evaluation examples in a dataset will be evaluated exactly once; some may not be visited and some may be evaluated multiple times.\n", + "\n", + "Exactly-once evaluation may be preferred to reduce the variance of evaluation across epochs, and improve model selection done via early stopping, hyperparameter tuning, or other methods. There are different ways to enable exactly-once evaluation:\n", + "\n", + "- With a `Model.fit/.evaluate` workflow, it can be enabled by adding an argument to `Model.compile`. Refer to docs for the `pss_evaluation_shards` argument.\n", + "- The `tf.data` service API can be used to provide exactly-once visitation for evaluation when using `ParameterServerStrategy` (refer to the _Dynamic Sharding_ section of the `tf.data.experimental.service` API documentation).\n", + "- [Sidecar evaluation](#sidecar_evaluation) provides exactly-once evaluation by default, since the evaluation happens on a single machine. However this can be much slower than performing evaluation distributed across many workers.\n", + "\n", + "The first option, using `Model.compile`, is the suggested solution for most users.\n", + "\n", + "Exactly-once evaluation has some limitations:\n", + "\n", + "- It is not supported to write a custom distributed evaluation loop with an exactly-once visitation guarantee. File a GitHub issue if you need support for this.\n", + "- It cannot automatically handle computation of metrics that use the `Layer.add_metric` API. These should be excluded from evaluation, or reworked into `Metric` objects." ] }, { @@ -994,9 +1015,69 @@ "id": "H40X-9Gs3i7_" }, "source": [ - "### Side-car evaluation\n", + "### Sidecar evaluation\n", + "\n", + "\n", + "Another method for defining and running an evaluation loop in `tf.distribute.ParameterServerStrategy` training is called _sidecar evaluation_, in which you create a dedicated evaluator task that repeatedly reads checkpoints and runs evaluation on the latest checkpoint (refer to [this guide](../../guide/checkpoint.ipynb) for more details on checkpointing). The coordinator and worker tasks do not spend any time on evaluation, so for a fixed number of iterations the overall training time should be shorter than using other evaluation methods. However, it requires an additional evaluator task and periodic checkpointing to trigger evaluation." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HonyjnXK9-ys" + }, + "source": [ + "To write an evaluation loop for sidecar evaluation, you have two\n", + "options:\n", + "\n", + "1. Use the `tf.keras.utils.SidecarEvaluator` API.\n", + "2. Create a custom evaluation loop.\n", "\n", - "Another method is called _side-car evaluation_ where you create a dedicated evaluator task that repeatedly reads checkpoints and runs evaluation on a latest checkpoint. It allows your training program to finish early if you don't need to change your training loop based on evaluation results. However, it requires an additional evaluator task and periodic checkpointing to trigger evaluation. Following is a possible side-car evaluation loop:\n", + "Refer to the `tf.keras.utils.SidecarEvaluator` API documentation for more details on option 1." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "U_c0EiwB88OG" + }, + "source": [ + "Sidecar evaluation is supported only with a single task. This means:\n", + "\n", + "* It is guaranteed that each example is evaluated once. In the event the\n", + " evaluator is preempted or restarted, it simply restarts the\n", + " evaluation loop from the latest checkpoint, and the partial evaluation\n", + " progress made before the restart is discarded.\n", + "\n", + "* However, running evaluation on a single task implies that a full evaluation\n", + " can possibly take a long time.\n", + "\n", + "* If the size of the model is too large to fit into an evaluator's memory,\n", + " single sidecar evaluation is not applicable." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VNJoWVc797B1" + }, + "source": [ + "Another caveat is that the `tf.keras.utils.SidecarEvaluator` implementation, and the custom\n", + "evaluation loop below, may skip some checkpoints because it always picks up the\n", + "latest checkpoint available, and during an evaluation epoch, multiple\n", + "checkpoints can be produced from the training cluster. You can write a custom\n", + "evaluation loop that evaluates every checkpoint, but it is not covered in this\n", + "tutorial. On the other hand, it may sit idle if checkpoints are produced less\n", + "frequently than how long it takes to run evaluation." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G5jopxBd85Ji" + }, + "source": [ + "A custom evaluation loop provides more control over the details, such as choosing which checkpoint to evaluate, or providing any additional logic to run along with evaluation. The following is a possible custom sidecar evaluation loop:\n", "\n", "```python\n", "checkpoint_dir = ...\n", @@ -1016,7 +1097,7 @@ " eval_model.evaluate(eval_data)\n", "\n", " # Evaluation finishes when it has evaluated the last epoch.\n", - " if latest_checkpoint.endswith('-{}'.format(train_epoches)):\n", + " if latest_checkpoint.endswith('-{}'.format(train_epochs)):\n", " break\n", "```" ] @@ -1034,9 +1115,9 @@ "\n", "In a real production environment, you will run all tasks in different processes on different machines. The simplest way to configure cluster information on each task is to set `\"TF_CONFIG\"` environment variables and use a `tf.distribute.cluster_resolver.TFConfigClusterResolver` to parse `\"TF_CONFIG\"`.\n", "\n", - "For a general description about `\"TF_CONFIG\"` environment variables, refer to the [Distributed training](https://www.tensorflow.org/guide/distributed_training#setting_up_tf_config_environment_variable) guide.\n", + "For a general description of `\"TF_CONFIG\"` environment variables, refer to \"Setting up the `TF_CONFIG` environment variable\" in the [Distributed training](../../guide/distributed_training.ipynb) guide.\n", "\n", - "If you start your training tasks using Kubernetes or other configuration templates, it is very likely that these templates have already set `“TF_CONFIG\"` for you." + "If you start your training tasks using Kubernetes or other configuration templates, likely, these templates have already set `“TF_CONFIG\"` for you." ] }, { @@ -1047,7 +1128,7 @@ "source": [ "### Set the `\"TF_CONFIG\"` environment variable\n", "\n", - "Suppose you have 3 workers and 2 parameter servers, the `\"TF_CONFIG\"` of worker 1 can be:\n", + "Suppose you have 3 workers and 2 parameter servers. Then the `\"TF_CONFIG\"` of worker 1 can be:\n", "\n", "```python\n", "os.environ[\"TF_CONFIG\"] = json.dumps({\n", @@ -1089,12 +1170,12 @@ "if cluster_resolver.task_type in (\"worker\", \"ps\"):\n", " # Start a TensorFlow server and wait.\n", "elif cluster_resolver.task_type == \"evaluator\":\n", - " # Run side-car evaluation\n", + " # Run sidecar evaluation\n", "else:\n", " # Run the coordinator.\n", "```\n", "\n", - "The following code starts a TensorFlow server and waits:\n", + "The following code starts a TensorFlow server and waits, useful for the `\"worker\"` and `\"ps\"` roles:\n", "\n", "```python\n", "# Set the environment variable to allow reporting worker and ps failure to the\n", @@ -1128,7 +1209,7 @@ "source": [ "### Worker failure\n", "\n", - "`tf.distribute.experimental.coordinator.ClusterCoordinator` or `Model.fit` provide built-in fault tolerance for worker failure. Upon worker recovery, the previously provided dataset function (either to `ClusterCoordinator.create_per_worker_dataset` for a custom training loop, or `tf.keras.utils.experimental.DatasetCreator` for `Model.fit`) will be invoked on the workers to re-create the datasets." + "Both the `tf.distribute.coordinator.ClusterCoordinator` custom training loop and `Model.fit` approaches provide built-in fault tolerance for worker failure. Upon worker recovery, the `ClusterCoordinator` invokes dataset re-creation on the workers." ] }, { @@ -1172,7 +1253,7 @@ "global_steps = int(optimizer.iterations.numpy())\n", "starting_epoch = global_steps // steps_per_epoch\n", "\n", - "for _ in range(starting_epoch, num_epoches):\n", + "for _ in range(starting_epoch, num_epochs):\n", " for _ in range(steps_per_epoch):\n", " coordinator.schedule(step_fn, args=(per_worker_iterator,))\n", " coordinator.join()\n", @@ -1212,17 +1293,21 @@ "source": [ "## Performance improvement\n", "\n", - "There are several possible reasons if you see performance issues when you train with `ParameterServerStrategy` and `ClusterResolver`.\n", + "There are several possible reasons you may experience performance issues when you train with `tf.distribute.ParameterServerStrategy` and `tf.distribute.coordinator.ClusterCoordinator`.\n", "\n", - "One common reason is parameter servers have unbalanced load and some heavily-loaded parameter servers have reached capacity. There can also be multiple root causes. Some simple methods to mitigate this issue are to:\n", + "One common reason is that the parameter servers have unbalanced load and some heavily-loaded parameter servers have reached capacity. There can also be multiple root causes. Some simple methods to mitigate this issue are to:\n", "\n", "1. Shard your large model variables via specifying a `variable_partitioner` when constructing a `ParameterServerStrategy`.\n", - "2. Avoid creating a hotspot variable that is required by all parameter servers in a single step if possible. For example, use a constant learning rate or subclass `tf.keras.optimizers.schedules.LearningRateSchedule` in optimizers since the default behavior is that the learning rate will become a variable placed on a particular parameter server and requested by all other parameter servers in each step.\n", + "2. Avoid creating a hotspot variable that is required by all parameter servers in a single step, by both:\n", + "\n", + " 1) Using a constant learning rate or subclass `tf.keras.optimizers.schedules.LearningRateSchedule` in optimizers. This is because the default behavior is that the learning rate will become a variable placed on a particular parameter server, and requested by all other parameter servers in each step); and\n", + "\n", + " 2) Using a `tf.keras.optimizers.legacy.Optimizer` (the standard `tf.keras.optimizers.Optimizer`s could still lead to hotspot variables).\n", "3. Shuffle your large vocabularies before passing them to Keras preprocessing layers.\n", "\n", - "Another possible reason for performance issues is the coordinator. Your first implementation of `schedule`/`join` is Python-based and thus may have threading overhead. Also the latency between the coordinator and the workers can be large. If this is the case,\n", + "Another possible reason for performance issues is the coordinator. The implementation of `schedule`/`join` is Python-based and thus may have threading overhead. Also, the latency between the coordinator and the workers can be large. If this is the case:\n", "\n", - "- For `Model.fit`, you can set `steps_per_execution` argument provided at `Model.compile` to a value larger than 1.\n", + "- For `Model.fit`, you can set the `steps_per_execution` argument provided at `Model.compile` to a value larger than 1.\n", "\n", "- For a custom training loop, you can pack multiple steps into a single `tf.function`:\n", "\n", @@ -1241,7 +1326,7 @@ "\n", "As the library is optimized further, hopefully most users won't have to manually pack steps in the future.\n", "\n", - "In addition, a small trick for performance improvement is to schedule functions without a return value as explained in the handling task failure section above." + "In addition, a small trick for performance improvement is to schedule functions without a return value as explained in the [handling task failure section](#handling_task_failure) above." ] }, { @@ -1261,22 +1346,35 @@ "- `os.environment[\"grpc_fail_fast\"]=\"use_caller\"` is needed on every task including the coordinator, to make fault tolerance work properly.\n", "- Synchronous parameter server training is not supported.\n", "- It is usually necessary to pack multiple steps into a single function to achieve optimal performance.\n", - "- It is not supported to load a saved_model via `tf.saved_model.load` containing sharded variables. Note loading such a saved_model using TensorFlow Serving is expected to work.\n", - "- It is not supported to load a checkpoint containing sharded optimizer slot variables into a different number of shards.\n", + "- It is not supported to load a saved_model via `tf.saved_model.load` containing sharded variables. Note loading such a saved_model using TensorFlow Serving is expected to work (refer to the [serving tutorial](https://www.tensorflow.org/tfx/tutorials/serving/rest_simple) for details).\n", "- It is not supported to recover from parameter server failure without restarting the coordinator task.\n", - "- Usage of `tf.lookup.StaticHashTable` (which is commonly employed by some `tf.keras.layers.experimental.preprocessing` layers, such as `IntegerLookup`, `StringLookup`, and `TextVectorization`) results in resources placed on the coordinator at this time with parameter server training. This has performance implications for lookup RPCs from workers to the coordinator. This is a current high priority to address.\n", - "\n", + "- Creation of `tf.lookup.StaticHashTable`, commonly employed by some Keras preprocessing layers, such as `tf.keras.layers.IntegerLookup`, `tf.keras.layers.StringLookup`, and `tf.keras.layers.TextVectorization`, should be placed under `Strategy.scope`. Otherwise, resources will be placed on the coordinator, and lookup RPCs from workers to the coordinator incur performance implications.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2MKBF0RPSvzB" + }, + "source": [ "### `Model.fit` specifics\n", "\n", "- `steps_per_epoch` argument is required in `Model.fit`. You can select a value that provides appropriate intervals in an epoch.\n", "- `ParameterServerStrategy` does not have support for custom callbacks that have batch-level calls for performance reasons. You should convert those calls into epoch-level calls with suitably picked `steps_per_epoch`, so that they are called every `steps_per_epoch` number of steps. Built-in callbacks are not affected: their batch-level calls have been modified to be performant. Supporting batch-level calls for `ParameterServerStrategy` is being planned.\n", - "- For the same reason, unlike other strategies, progress bar and metrics are logged only at epoch boundaries.\n", - "- `run_eagerly` is not supported.\n", - "\n", + "- For the same reason, unlike other strategies, progress bars and metrics are logged only at epoch boundaries.\n", + "- `run_eagerly` is not supported.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wvY-mg35Sx5L" + }, + "source": [ "### Custom training loop specifics\n", "\n", - "- `ClusterCoordinator.schedule` doesn't support visitation guarantees for a dataset.\n", - "- When `ClusterCoordinator.create_per_worker_dataset` is used, the whole dataset must be created inside the function passed to it.\n", + "- `ClusterCoordinator.schedule` doesn't support visitation guarantees for a dataset in general, although a visitation guarantee for evaluation is possible through `Model.fit/.evaluate`. See [Enabling exactly-once evaluation](#exactly_once_evaluation).\n", + "- When `ClusterCoordinator.create_per_worker_dataset` is used with a callable as input, the whole dataset must be created inside the function passed to it.\n", "- `tf.data.Options` is ignored in a dataset created by `ClusterCoordinator.create_per_worker_dataset`." ] } @@ -1284,9 +1382,8 @@ "metadata": { "accelerator": "GPU", "colab": { - "collapsed_sections": [], "name": "parameter_server_training.ipynb", - "toc_visible": true + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", diff --git a/site/en/tutorials/distribute/save_and_load.ipynb b/site/en/tutorials/distribute/save_and_load.ipynb index 7317b277c45..c53a9b8bf0b 100644 --- a/site/en/tutorials/distribute/save_and_load.ipynb +++ b/site/en/tutorials/distribute/save_and_load.ipynb @@ -71,7 +71,12 @@ "source": [ "## Overview\n", "\n", - "It's common to save and load a model during training. There are two sets of APIs for saving and loading a keras model: a high-level API, and a low-level API. This tutorial demonstrates how you can use the SavedModel APIs when using `tf.distribute.Strategy`. To learn about SavedModel and serialization in general, please read the [saved model guide](../../guide/saved_model.ipynb), and the [Keras model serialization guide](../../guide/keras/save_and_serialize.ipynb). Let's start with a simple example: " + "This tutorial demonstrates how you can save and load models in a SavedModel format with `tf.distribute.Strategy` during or after training. There are two kinds of APIs for saving and loading a Keras model: high-level (`tf.keras.Model.save` and `tf.keras.models.load_model`) and low-level (`tf.saved_model.save` and `tf.saved_model.load`).\n", + "\n", + "To learn about SavedModel and serialization in general, please read the [saved model guide](../../guide/saved_model.ipynb), and the [Keras model serialization guide](https://www.tensorflow.org/guide/keras/save_and_serialize). Let's start with a simple example.\n", + "\n", + "Caution: TensorFlow models are code and it is important to be careful with untrusted code. Learn more in [Using TensorFlow securely](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md).\n", + "\n" ] }, { @@ -102,7 +107,7 @@ "id": "qqapWj98ptNV" }, "source": [ - "Prepare the data and model using `tf.distribute.Strategy`:" + "Load and prepare the data with TensorFlow Datasets and `tf.data`, and create the model using `tf.distribute.MirroredStrategy`:" ] }, { @@ -116,7 +121,7 @@ "mirrored_strategy = tf.distribute.MirroredStrategy()\n", "\n", "def get_data():\n", - " datasets, ds_info = tfds.load(name='mnist', with_info=True, as_supervised=True)\n", + " datasets = tfds.load(name='mnist', as_supervised=True)\n", " mnist_train, mnist_test = datasets['train'], datasets['test']\n", "\n", " BUFFER_SIZE = 10000\n", @@ -157,7 +162,7 @@ "id": "qmU4Y3feS9Na" }, "source": [ - "Train the model: " + "Train the model with `tf.keras.Model.fit`: " ] }, { @@ -181,11 +186,11 @@ "source": [ "## Save and load the model\n", "\n", - "Now that you have a simple model to work with, let's take a look at the saving/loading APIs. \n", - "There are two sets of APIs available:\n", + "Now that you have a simple model to work with, let's explore the saving/loading APIs. \n", + "There are two kinds of APIs available:\n", "\n", - "* High level keras `model.save` and `tf.keras.models.load_model`\n", - "* Low level `tf.saved_model.save` and `tf.saved_model.load`\n" + "* High-level (Keras): `Model.save` and `tf.keras.models.load_model` (`.keras` zip archive format)\n", + "* Low-level: `tf.saved_model.save` and `tf.saved_model.load` (TF SavedModel format)\n" ] }, { @@ -194,7 +199,7 @@ "id": "FX_IF2F1tvFs" }, "source": [ - "### The Keras APIs" + "### The Keras API" ] }, { @@ -203,7 +208,7 @@ "id": "O8xfceg4Z3H_" }, "source": [ - "Here is an example of saving and loading a model with the Keras APIs:" + "Here is an example of saving and loading a model with the Keras API:" ] }, { @@ -214,7 +219,7 @@ }, "outputs": [], "source": [ - "keras_model_path = \"/tmp/keras_save\"\n", + "keras_model_path = '/tmp/keras_save.keras'\n", "model.save(keras_model_path)" ] }, @@ -245,9 +250,9 @@ "id": "gYAnskzorda-" }, "source": [ - "After restoring the model, you can continue training on it, even without needing to call `compile()` again, since it is already compiled before saving. The model is saved in the TensorFlow's standard `SavedModel` proto format. For more information, please refer to [the guide to `saved_model` format](../../guide/saved_model.ipynb).\n", + "After restoring the model, you can continue training on it, even without needing to call `Model.compile` again, since it was already compiled before saving. The model is saved a Keras zip archive format, marked by the `.keras` extension. For more information, please refer to [the guide on Keras saving](https://www.tensorflow.org/guide/keras/save_and_serialize).\n", "\n", - "Now to load the model and train it using a `tf.distribute.Strategy`:" + "Now, restore the model and train it using a `tf.distribute.Strategy`:" ] }, { @@ -258,7 +263,7 @@ }, "outputs": [], "source": [ - "another_strategy = tf.distribute.OneDeviceStrategy(\"/cpu:0\")\n", + "another_strategy = tf.distribute.OneDeviceStrategy('/cpu:0')\n", "with another_strategy.scope():\n", " restored_keras_model_ds = tf.keras.models.load_model(keras_model_path)\n", " restored_keras_model_ds.fit(train_dataset, epochs=2)" @@ -270,7 +275,7 @@ "id": "PdiiPmL5tQk5" }, "source": [ - "As you can see, loading works as expected with `tf.distribute.Strategy`. The strategy used here does not have to be the same strategy used before saving. " + "As the `Model.fit` output shows, loading works as expected with `tf.distribute.Strategy`. The strategy used here does not have to be the same strategy used before saving. " ] }, { @@ -279,7 +284,7 @@ "id": "3CrXIbmFt0f6" }, "source": [ - "### The `tf.saved_model` APIs" + "### The `tf.saved_model` API" ] }, { @@ -288,7 +293,7 @@ "id": "HtGzPp6et4Em" }, "source": [ - "Now let's take a look at the lower level APIs. Saving the model is similar to the keras API:" + "Saving the model with lower-level API is similar to the Keras API:" ] }, { @@ -300,7 +305,7 @@ "outputs": [], "source": [ "model = get_model() # get a fresh model\n", - "saved_model_path = \"/tmp/tf_save\"\n", + "saved_model_path = '/tmp/tf_save'\n", "tf.saved_model.save(model, saved_model_path)" ] }, @@ -310,7 +315,7 @@ "id": "q1QNRYcwuRll" }, "source": [ - "Loading can be done with `tf.saved_model.load()`. However, since it is an API that is on the lower level (and hence has a wider range of use cases), it does not return a Keras model. Instead, it returns an object that contain functions that can be used to do inference. For example:" + "Loading can be done with `tf.saved_model.load`. However, since it is a lower-level API (and hence has a wider range of use cases), it does not return a Keras model. Instead, it returns an object that contain functions that can be used to do inference. For example:" ] }, { @@ -321,7 +326,7 @@ }, "outputs": [], "source": [ - "DEFAULT_FUNCTION_KEY = \"serving_default\"\n", + "DEFAULT_FUNCTION_KEY = 'serving_default'\n", "loaded = tf.saved_model.load(saved_model_path)\n", "inference_func = loaded.signatures[DEFAULT_FUNCTION_KEY]" ] @@ -332,7 +337,7 @@ "id": "x65l7AaHUZCA" }, "source": [ - "The loaded object may contain multiple functions, each associated with a key. The `\"serving_default\"` is the default key for the inference function with a saved Keras model. To do an inference with this function: " + "The loaded object may contain multiple functions, each associated with a key. The `\"serving_default\"` key is the default key for the inference function with a saved Keras model. To do inference with this function: " ] }, { @@ -375,7 +380,9 @@ "\n", " # Calling the function in a distributed manner\n", " for batch in dist_predict_dataset:\n", - " another_strategy.run(inference_func,args=(batch,))" + " result = another_strategy.run(inference_func, args=(batch,))\n", + " print(result)\n", + " break" ] }, { @@ -384,7 +391,7 @@ "id": "hWGSukoyw3fF" }, "source": [ - "Calling the restored function is just a forward pass on the saved model (predict). What if yout want to continue training the loaded function? Or embed the loaded function into a bigger model? A common practice is to wrap this loaded object to a Keras layer to achieve this. Luckily, [TF Hub](https://www.tensorflow.org/hub) has [hub.KerasLayer](https://github.com/tensorflow/hub/blob/master/tensorflow_hub/keras_layer.py) for this purpose, shown here:" + "Calling the restored function is just a forward pass on the saved model (`tf.keras.Model.predict`). What if you want to continue training the loaded function? Or what if you need to embed the loaded function into a bigger model? A common practice is to wrap this loaded object into a Keras layer to achieve this. Luckily, [TF Hub](https://www.tensorflow.org/hub) has [`hub.KerasLayer`](https://github.com/tensorflow/hub/blob/master/tensorflow_hub/keras_layer.py) for this purpose, shown here:" ] }, { @@ -421,7 +428,7 @@ "id": "Oe1z_OtSJlu2" }, "source": [ - "As you can see, `hub.KerasLayer` wraps the result loaded back from `tf.saved_model.load()` into a Keras layer that can be used to build another model. This is very useful for transfer learning. " + "In the above example, Tensorflow Hub's `hub.KerasLayer` wraps the result loaded back from `tf.saved_model.load` into a Keras layer that is used to build another model. This is very useful for transfer learning. " ] }, { @@ -439,11 +446,11 @@ "id": "GC6GQ9HDLxD6" }, "source": [ - "For saving, if you are working with a keras model, it is almost always recommended to use the Keras's `model.save()` API. If what you are saving is not a Keras model, then the lower level API is your only choice. \n", + "For saving, if you are working with a Keras model, use the Keras `Model.save` API unless you need the additional control allowed by the low-level API. If what you are saving is not a Keras model, then the lower-level API, `tf.saved_model.save`, is your only choice. \n", "\n", - "For loading, which API you use depends on what you want to get from the loading API. If you cannot (or do not want to) get a Keras model, then use `tf.saved_model.load()`. Otherwise, use `tf.keras.models.load_model()`. Note that you can get a Keras model back only if you saved a Keras model. \n", + "For loading, your API choice depends on what you want to get from the model loading API. If you cannot (or do not want to) get a Keras model, then use `tf.saved_model.load`. Otherwise, use `tf.keras.models.load_model`. Note that you can get a Keras model back only if you saved a Keras model. \n", "\n", - "It is possible to mix and match the APIs. You can save a Keras model with `model.save`, and load a non-Keras model with the low-level API, `tf.saved_model.load`. " + "It is possible to mix and match the APIs. You can save a Keras model with `Model.save`, and load a non-Keras model with the low-level API, `tf.saved_model.load`. " ] }, { @@ -456,13 +463,13 @@ "source": [ "model = get_model()\n", "\n", - "# Saving the model using Keras's save() API\n", - "model.save(keras_model_path) \n", + "# Saving the model using Keras `Model.save`\n", + "model.save(saved_model_path)\n", "\n", "another_strategy = tf.distribute.MirroredStrategy()\n", - "# Loading the model using lower level API\n", + "# Loading the model using the lower-level API\n", "with another_strategy.scope():\n", - " loaded = tf.saved_model.load(keras_model_path)" + " loaded = tf.saved_model.load(saved_model_path)" ] }, { @@ -471,7 +478,7 @@ "id": "0Z7lSj8nZiW5" }, "source": [ - "### Saving/Loading from local device" + "### Saving/Loading from a local device" ] }, { @@ -480,7 +487,7 @@ "id": "NVAjWcosZodw" }, "source": [ - "When saving and loading from a local io device while running remotely, for example using a cloud TPU, the option `experimental_io_device` must be used to set the io device to localhost." + "When saving and loading from a local I/O device while training on remote devices—for example, when using a Cloud TPU—you must use the option `experimental_io_device` in `tf.saved_model.SaveOptions` and `tf.saved_model.LoadOptions` to set the I/O device to `localhost`. For example:" ] }, { @@ -494,7 +501,7 @@ "model = get_model()\n", "\n", "# Saving the model to a path on localhost.\n", - "saved_model_path = \"/tmp/tf_save\"\n", + "saved_model_path = '/tmp/tf_save'\n", "save_options = tf.saved_model.SaveOptions(experimental_io_device='/job:localhost')\n", "model.save(saved_model_path, options=save_options)\n", "\n", @@ -517,14 +524,10 @@ { "cell_type": "markdown", "metadata": { - "id": "Tzog2ti7YYgy" + "id": "2cCSZrD7VCxe" }, "source": [ - "A special case is when you have a Keras model that does not have well-defined inputs. For example, a Sequential model can be created without any input shapes (`Sequential([Dense(3), ...]`). Subclassed models also do not have well-defined inputs after initialization. In this case, you should stick with the lower level APIs on both saving and loading, otherwise you will get an error. \n", - "\n", - "To check if your model has well-defined inputs, just check if `model.inputs` is `None`. If it is not `None`, you are all good. Input shapes are automatically defined when the model is used in `.fit`, `.evaluate`, `.predict`, or when calling the model (`model(inputs)`). \n", - "\n", - "Here is an example:" + "One special case is when you create Keras models in certain ways, and then save them before training. For example:" ] }, { @@ -536,6 +539,7 @@ "outputs": [], "source": [ "class SubclassedModel(tf.keras.Model):\n", + " \"\"\"Example model defined by subclassing `tf.keras.Model`.\"\"\"\n", "\n", " output_name = 'output_layer'\n", "\n", @@ -548,8 +552,89 @@ " return self._dense_layer(inputs)\n", "\n", "my_model = SubclassedModel()\n", - "# my_model.save(keras_model_path) # ERROR! \n", - "tf.saved_model.save(my_model, saved_model_path)" + "try:\n", + " my_model.save(saved_model_path)\n", + "except ValueError as e:\n", + " print(f'{type(e).__name__}: ', *e.args)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D4qMyXFDSPDO" + }, + "source": [ + "A SavedModel saves the `tf.types.experimental.ConcreteFunction` objects generated when you trace a `tf.function` (check _When is a Function tracing?_ in the [Introduction to graphs and tf.function](../../guide/intro_to_graphs.ipynb) guide to learn more). If you get a `ValueError` like this it's because `Model.save` was not able to find or create a traced `ConcreteFunction`.\n", + "\n", + "**Caution:** You shouldn't save a model without at least one `ConcreteFunction`, since the low-level API will otherwise generate a SavedModel with no `ConcreteFunction` signatures ([learn more](../../guide/saved_model.ipynb) about the SavedModel format). For example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "064SE47mYDj8" + }, + "outputs": [], + "source": [ + "tf.saved_model.save(my_model, saved_model_path)\n", + "x = tf.saved_model.load(saved_model_path)\n", + "x.signatures" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LRTxlASJX-cY" + }, + "source": [ + "\n", + "Usually the model's forward pass—the `call` method—will be traced automatically when the model is called for the first time, often via the Keras `Model.fit` method. A `ConcreteFunction` can also be generated by the Keras [Sequential](https://www.tensorflow.org/guide/keras/sequential_model) and [Functional](https://www.tensorflow.org/guide/keras/functional) APIs, if you set the input shape, for example, by making the first layer either a `tf.keras.layers.InputLayer` or another layer type, and passing it the `input_shape` keyword argument. \n", + "\n", + "To verify if your model has any traced `ConcreteFunction`s, check if `Model.save_spec` is `None`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xAXise4eR0YJ" + }, + "outputs": [], + "source": [ + "print(my_model.save_spec() is None)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G2G_FQrWJAO3" + }, + "source": [ + "Let's use `tf.keras.Model.fit` to train the model, and notice that the `save_spec` gets defined and model saving will work:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cv5LTi0zDkKS" + }, + "outputs": [], + "source": [ + "BATCH_SIZE_PER_REPLICA = 4\n", + "BATCH_SIZE = BATCH_SIZE_PER_REPLICA * mirrored_strategy.num_replicas_in_sync\n", + "\n", + "dataset_size = 100\n", + "dataset = tf.data.Dataset.from_tensors(\n", + " (tf.range(5, dtype=tf.float32), tf.range(5, dtype=tf.float32))\n", + " ).repeat(dataset_size).batch(BATCH_SIZE)\n", + "\n", + "my_model.compile(optimizer='adam', loss='mean_squared_error')\n", + "my_model.fit(dataset, epochs=2)\n", + "\n", + "print(my_model.save_spec() is None)\n", + "my_model.save(saved_model_path)" ] } ], diff --git a/site/en/tutorials/estimator/boosted_trees.ipynb b/site/en/tutorials/estimator/boosted_trees.ipynb deleted file mode 100644 index 4c1bb1890c0..00000000000 --- a/site/en/tutorials/estimator/boosted_trees.ipynb +++ /dev/null @@ -1,612 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "7765UFHoyGx6" - }, - "source": [ - "##### Copyright 2019 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "KVtTDrUNyL7x" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xPYxZMrWyA0N" - }, - "source": [ - "# Boosted trees using Estimators" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "p_vOREjRx-Y0" - }, - "source": [ - "\n", - " \n", - " \n", - " \n", - " \n", - "
    \n", - " View on TensorFlow.org\n", - " \n", - " Run in Google Colab\n", - " \n", - " View source on GitHub\n", - " \n", - " Download notebook\n", - "
    " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6gWdn5lrlkhR" - }, - "source": [ - "> Warning: Estimators are not recommended for new code. Estimators run `v1.Session`-style code which is more difficult to write correctly, and can behave unexpectedly, especially when combined with TF 2 code. Estimators do fall under our [compatibility guarantees] (https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities. See the [migration guide](https://tensorflow.org/guide/migrate) for details." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qNW3c_rop5J8" - }, - "source": [ - "**Note**: Modern Keras based implementations of many state of the art decision forest algorithms are available in [TensorFlow Decision Forests](https://tensorflow.org/decision_forests)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dW3r7qVxzqN5" - }, - "source": [ - "This tutorial is an end-to-end walkthrough of training a Gradient Boosting model using decision trees with the `tf.estimator` API. Boosted Trees models are among the most popular and effective machine learning approaches for both regression and classification. It is an ensemble technique that combines the predictions from several (think 10s, 100s or even 1000s) tree models.\n", - "\n", - "Boosted Trees models are popular with many machine learning practitioners as they can achieve impressive performance with minimal hyperparameter tuning." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eylrTPAN3rJV" - }, - "source": [ - "## Load the titanic dataset\n", - "You will be using the titanic dataset, where the (rather morbid) goal is to predict passenger survival, given characteristics such as gender, age, class, etc." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "KuhAiPfZ3rJW" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "from IPython.display import clear_output\n", - "from matplotlib import pyplot as plt\n", - "\n", - "# Load dataset.\n", - "dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')\n", - "dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')\n", - "y_train = dftrain.pop('survived')\n", - "y_eval = dfeval.pop('survived')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "NFtnFm1T0kMf" - }, - "outputs": [], - "source": [ - "import tensorflow as tf\n", - "tf.random.set_seed(123)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3ioodHdVJVdA" - }, - "source": [ - "The dataset consists of a training set and an evaluation set:\n", - "\n", - "* `dftrain` and `y_train` are the *training set*—the data the model uses to learn.\n", - "* The model is tested against the *eval set*, `dfeval`, and `y_eval`.\n", - "\n", - "For training you will use the following features:\n", - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    Feature NameDescription
    sexGender of passenger
    ageAge of passenger
    n_siblings_spousessiblings and partners aboard
    parchof parents and children aboard
    fareFare passenger paid.
    classPassenger's class on ship
    deckWhich deck passenger was on
    embark_townWhich town passenger embarked from
    aloneIf passenger was alone
    " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AoPiWsJALr-k" - }, - "source": [ - "## Explore the data" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "slcat1yzmzw5" - }, - "source": [ - "Let's first preview some of the data and create summary statistics on the training set." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "15PLelXBlxEW" - }, - "outputs": [], - "source": [ - "dftrain.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "j2hiM4ETmqP0" - }, - "outputs": [], - "source": [ - "dftrain.describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-IR0e8V-LyJ4" - }, - "source": [ - "There are 627 and 264 examples in the training and evaluation sets, respectively." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_1NwYqGwDjFf" - }, - "outputs": [], - "source": [ - "dftrain.shape[0], dfeval.shape[0]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "28UFJ4KSMK3V" - }, - "source": [ - "The majority of passengers are in their 20's and 30's." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "CaVDmZtuDfux" - }, - "outputs": [], - "source": [ - "dftrain.age.hist(bins=20)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1pifWiCoMbR5" - }, - "source": [ - "There are approximately twice as male passengers as female passengers aboard." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "-WazAq30MO5J" - }, - "outputs": [], - "source": [ - "dftrain.sex.value_counts().plot(kind='barh')\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7_XkxrpmmVU_" - }, - "source": [ - "The majority of passengers were in the \"third\" class." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "zZ3PvVy4l4gI" - }, - "outputs": [], - "source": [ - "dftrain['class'].value_counts().plot(kind='barh')\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HM5SlwlxmZMT" - }, - "source": [ - "Most passengers embarked from Southampton." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "RVTSrdr4mZaC" - }, - "outputs": [], - "source": [ - "dftrain['embark_town'].value_counts().plot(kind='barh')\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aTn1niLPob3x" - }, - "source": [ - "Females have a much higher chance of surviving vs. males. This will clearly be a predictive feature for the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Eh3KW5oYkaNS" - }, - "outputs": [], - "source": [ - "pd.concat([dftrain, y_train], axis=1).groupby('sex').survived.mean().plot(kind='barh').set_xlabel('% survive')\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "krkRHuMp3rJn" - }, - "source": [ - "## Create feature columns and input functions\n", - "The Gradient Boosting estimator can utilize both numeric and categorical features. Feature columns work with all TensorFlow estimators and their purpose is to define the features used for modeling. Additionally they provide some feature engineering capabilities like one-hot-encoding, normalization, and bucketization. In this tutorial, the fields in `CATEGORICAL_COLUMNS` are transformed from categorical columns to one-hot-encoded columns ([indicator column](https://www.tensorflow.org/api_docs/python/tf/feature_column/indicator_column)):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "upaNWxcF3rJn" - }, - "outputs": [], - "source": [ - "CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck',\n", - " 'embark_town', 'alone']\n", - "NUMERIC_COLUMNS = ['age', 'fare']\n", - "\n", - "def one_hot_cat_column(feature_name, vocab):\n", - " return tf.feature_column.indicator_column(\n", - " tf.feature_column.categorical_column_with_vocabulary_list(feature_name,\n", - " vocab))\n", - "feature_columns = []\n", - "for feature_name in CATEGORICAL_COLUMNS:\n", - " # Need to one-hot encode categorical features.\n", - " vocabulary = dftrain[feature_name].unique()\n", - " feature_columns.append(one_hot_cat_column(feature_name, vocabulary))\n", - "\n", - "for feature_name in NUMERIC_COLUMNS:\n", - " feature_columns.append(tf.feature_column.numeric_column(feature_name,\n", - " dtype=tf.float32))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "74GNtFpStSAz" - }, - "source": [ - "You can view the transformation that a feature column produces. For example, here is the output when using the `indicator_column` on a single example:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Eaq79D9FtmF8" - }, - "outputs": [], - "source": [ - "example = dict(dftrain.head(1))\n", - "class_fc = tf.feature_column.indicator_column(tf.feature_column.categorical_column_with_vocabulary_list('class', ('First', 'Second', 'Third')))\n", - "print('Feature value: \"{}\"'.format(example['class'].iloc[0]))\n", - "print('One-hot encoded: ', tf.keras.layers.DenseFeatures([class_fc])(example).numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YbCUn3nCusC3" - }, - "source": [ - "Additionally, you can view all of the feature column transformations together:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "omIYcsVws3g0" - }, - "outputs": [], - "source": [ - "tf.keras.layers.DenseFeatures(feature_columns)(example).numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-UOlROp33rJo" - }, - "source": [ - "Next you need to create the input functions. These will specify how data will be read into our model for both training and inference. You will use the `from_tensor_slices` method in the [`tf.data`](https://www.tensorflow.org/api_docs/python/tf/data) API to read in data directly from Pandas. This is suitable for smaller, in-memory datasets. For larger datasets, the tf.data API supports a variety of file formats (including [csv](https://www.tensorflow.org/api_docs/python/tf/data/experimental/make_csv_dataset)) so that you can process datasets that do not fit in memory." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9dquwCQB3rJp" - }, - "outputs": [], - "source": [ - "# Use entire batch since this is such a small dataset.\n", - "NUM_EXAMPLES = len(y_train)\n", - "\n", - "def make_input_fn(X, y, n_epochs=None, shuffle=True):\n", - " def input_fn():\n", - " dataset = tf.data.Dataset.from_tensor_slices((dict(X), y))\n", - " if shuffle:\n", - " dataset = dataset.shuffle(NUM_EXAMPLES)\n", - " # For training, cycle thru dataset as many times as need (n_epochs=None).\n", - " dataset = dataset.repeat(n_epochs)\n", - " # In memory training doesn't use batching.\n", - " dataset = dataset.batch(NUM_EXAMPLES)\n", - " return dataset\n", - " return input_fn\n", - "\n", - "# Training and evaluation input functions.\n", - "train_input_fn = make_input_fn(dftrain, y_train)\n", - "eval_input_fn = make_input_fn(dfeval, y_eval, shuffle=False, n_epochs=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HttfNNlN3rJr" - }, - "source": [ - "## Train and evaluate the model\n", - "\n", - "Below you will do the following steps:\n", - "\n", - "1. Initialize the model, specifying the features and hyperparameters.\n", - "2. Feed the training data to the model using the `train_input_fn` and train the model using the `train` function.\n", - "3. You will assess model performance using the evaluation set—in this example, the `dfeval` DataFrame. You will verify that the predictions match the labels from the `y_eval` array.\n", - "\n", - "Before training a Boosted Trees model, let's first train a linear classifier (logistic regression model). It is best practice to start with a simpler model to establish a benchmark." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "JPOGpmmq3rJr" - }, - "outputs": [], - "source": [ - "linear_est = tf.estimator.LinearClassifier(feature_columns)\n", - "\n", - "# Train model.\n", - "linear_est.train(train_input_fn, max_steps=100)\n", - "\n", - "# Evaluation.\n", - "result = linear_est.evaluate(eval_input_fn)\n", - "clear_output()\n", - "print(pd.Series(result))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BarkNXwA3rJu" - }, - "source": [ - "Next let's train a Boosted Trees model. For boosted trees, regression (`BoostedTreesRegressor`) and classification (`BoostedTreesClassifier`) are supported. Since the goal is to predict a class - survive or not survive, you will use the `BoostedTreesClassifier`.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "tgEzMtlw3rJu" - }, - "outputs": [], - "source": [ - "# Since data fits into memory, use entire dataset per layer. It will be faster.\n", - "# Above one batch is defined as the entire dataset.\n", - "n_batches = 1\n", - "est = tf.estimator.BoostedTreesClassifier(feature_columns,\n", - " n_batches_per_layer=n_batches)\n", - "\n", - "# The model will stop training once the specified number of trees is built, not\n", - "# based on the number of steps.\n", - "est.train(train_input_fn, max_steps=100)\n", - "\n", - "# Eval.\n", - "result = est.evaluate(eval_input_fn)\n", - "clear_output()\n", - "print(pd.Series(result))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hEflwznXvuMP" - }, - "source": [ - "Now you can use the train model to make predictions on a passenger from the evaluation set. TensorFlow models are optimized to make predictions on a batch, or collection, of examples at once. Earlier, the `eval_input_fn` is defined using the entire evaluation set." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6zmIjTr73rJ4" - }, - "outputs": [], - "source": [ - "pred_dicts = list(est.predict(eval_input_fn))\n", - "probs = pd.Series([pred['probabilities'][1] for pred in pred_dicts])\n", - "\n", - "probs.plot(kind='hist', bins=20, title='predicted probabilities')\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mBUaNN1BzJHG" - }, - "source": [ - "Finally you can also look at the receiver operating characteristic (ROC) of the results, which will give us a better idea of the tradeoff between the true positive rate and false positive rate." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "NzxghvVz3rJ6" - }, - "outputs": [], - "source": [ - "from sklearn.metrics import roc_curve\n", - "\n", - "fpr, tpr, _ = roc_curve(y_eval, probs)\n", - "plt.plot(fpr, tpr)\n", - "plt.title('ROC curve')\n", - "plt.xlabel('false positive rate')\n", - "plt.ylabel('true positive rate')\n", - "plt.xlim(0,)\n", - "plt.ylim(0,)\n", - "plt.show()" - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "boosted_trees.ipynb", - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/site/en/tutorials/estimator/boosted_trees_model_understanding.ipynb b/site/en/tutorials/estimator/boosted_trees_model_understanding.ipynb deleted file mode 100644 index c437574a13a..00000000000 --- a/site/en/tutorials/estimator/boosted_trees_model_understanding.ipynb +++ /dev/null @@ -1,1027 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "7765UFHoyGx6" - }, - "source": [ - "##### Copyright 2019 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "KVtTDrUNyL7x" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "r0_fqL3ayLHX" - }, - "source": [ - "# Gradient Boosted Trees: Model understanding" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PS6_yKSoyLAl" - }, - "source": [ - "\n", - " \n", - " \n", - " \n", - " \n", - "
    \n", - " View on TensorFlow.org\n", - " \n", - " Run in Google Colab\n", - " \n", - " View source on GitHub\n", - " \n", - " Download notebook\n", - "
    " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pV4mnvs7l40o" - }, - "source": [ - "> Warning: Estimators are not recommended for new code. Estimators run `v1.Session`-style code which is more difficult to write correctly, and can behave unexpectedly, especially when combined with TF 2 code. Estimators do fall under our [compatibility guarantees](https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities. See the [migration guide](https://tensorflow.org/guide/migrate) for details." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "f4L1ffaFp2gT" - }, - "source": [ - "**Note**: Modern Keras based implementations of many state of the art decision forest algorithms are available in [TensorFlow Decision Forests](https://tensorflow.org/decision_forests)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dW3r7qVxzqN5" - }, - "source": [ - "For an end-to-end walkthrough of training a Gradient Boosting model check out the [boosted trees tutorial](./boosted_trees). In this tutorial you will:\n", - "\n", - "* Learn how to interpret a Boosted Trees model both *locally* and *globally*\n", - "* Gain intution for how a Boosted Trees model fits a dataset\n", - "\n", - "## How to interpret Boosted Trees models both locally and globally\n", - "\n", - "Local interpretability refers to an understanding of a model’s predictions at the individual example level, while global interpretability refers to an understanding of the model as a whole. Such techniques can help machine learning (ML) practitioners detect bias and bugs during the model development stage.\n", - "\n", - "For local interpretability, you will learn how to create and visualize per-instance contributions. To distinguish this from feature importances, we refer to these values as directional feature contributions (DFCs).\n", - "\n", - "For global interpretability you will retrieve and visualize gain-based feature importances, [permutation feature importances](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf) and also show aggregated DFCs." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eylrTPAN3rJV" - }, - "source": [ - "## Load the titanic dataset\n", - "You will be using the titanic dataset, where the (rather morbid) goal is to predict passenger survival, given characteristics such as gender, age, class, etc." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "132V3PZ8V8VA" - }, - "outputs": [], - "source": [ - "!pip install statsmodels" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "KuhAiPfZ3rJW" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "from IPython.display import clear_output\n", - "\n", - "# Load dataset.\n", - "dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')\n", - "dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')\n", - "y_train = dftrain.pop('survived')\n", - "y_eval = dfeval.pop('survived')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "sp1ShjJJeyH3" - }, - "outputs": [], - "source": [ - "import tensorflow as tf\n", - "tf.random.set_seed(123)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3ioodHdVJVdA" - }, - "source": [ - "For a description of the features, please review the prior tutorial." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "krkRHuMp3rJn" - }, - "source": [ - "## Create feature columns, input_fn, and the train the estimator" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JiJ6K3hr1lXW" - }, - "source": [ - "### Preprocess the data" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "udMytRJC05oW" - }, - "source": [ - "Create the feature columns, using the original numeric columns as is and one-hot-encoding categorical variables." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "upaNWxcF3rJn" - }, - "outputs": [], - "source": [ - "fc = tf.feature_column\n", - "CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck',\n", - " 'embark_town', 'alone']\n", - "NUMERIC_COLUMNS = ['age', 'fare']\n", - "\n", - "def one_hot_cat_column(feature_name, vocab):\n", - " return fc.indicator_column(\n", - " fc.categorical_column_with_vocabulary_list(feature_name,\n", - " vocab))\n", - "feature_columns = []\n", - "for feature_name in CATEGORICAL_COLUMNS:\n", - " # Need to one-hot encode categorical features.\n", - " vocabulary = dftrain[feature_name].unique()\n", - " feature_columns.append(one_hot_cat_column(feature_name, vocabulary))\n", - "\n", - "for feature_name in NUMERIC_COLUMNS:\n", - " feature_columns.append(fc.numeric_column(feature_name,\n", - " dtype=tf.float32))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9rTefnXe1n0v" - }, - "source": [ - "### Build the input pipeline" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-UOlROp33rJo" - }, - "source": [ - "Create the input functions using the `from_tensor_slices` method in the [`tf.data`](https://www.tensorflow.org/api_docs/python/tf/data) API to read in data directly from Pandas." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9dquwCQB3rJp" - }, - "outputs": [], - "source": [ - "# Use entire batch since this is such a small dataset.\n", - "NUM_EXAMPLES = len(y_train)\n", - "\n", - "def make_input_fn(X, y, n_epochs=None, shuffle=True):\n", - " def input_fn():\n", - " dataset = tf.data.Dataset.from_tensor_slices((X.to_dict(orient='list'), y))\n", - " if shuffle:\n", - " dataset = dataset.shuffle(NUM_EXAMPLES)\n", - " # For training, cycle thru dataset as many times as need (n_epochs=None).\n", - " dataset = (dataset\n", - " .repeat(n_epochs)\n", - " .batch(NUM_EXAMPLES))\n", - " return dataset\n", - " return input_fn\n", - "\n", - "# Training and evaluation input functions.\n", - "train_input_fn = make_input_fn(dftrain, y_train)\n", - "eval_input_fn = make_input_fn(dfeval, y_eval, shuffle=False, n_epochs=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HttfNNlN3rJr" - }, - "source": [ - "### Train the model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "tgEzMtlw3rJu" - }, - "outputs": [], - "source": [ - "params = {\n", - " 'n_trees': 50,\n", - " 'max_depth': 3,\n", - " 'n_batches_per_layer': 1,\n", - " # You must enable center_bias = True to get DFCs. This will force the model to\n", - " # make an initial prediction before using any features (e.g. use the mean of\n", - " # the training labels for regression or log odds for classification when\n", - " # using cross entropy loss).\n", - " 'center_bias': True\n", - "}\n", - "\n", - "est = tf.estimator.BoostedTreesClassifier(feature_columns, **params)\n", - "# Train model.\n", - "est.train(train_input_fn, max_steps=100)\n", - "\n", - "# Evaluation.\n", - "results = est.evaluate(eval_input_fn)\n", - "clear_output()\n", - "pd.Series(results).to_frame()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JgAz3jDa_tRA" - }, - "source": [ - "For performance reasons, when your data fits in memory, we recommend use the arg `train_in_memory=True` in the `tf.estimator.BoostedTreesClassifier` function. However if training time is not of a concern or if you have a very large dataset and want to do distributed training, use the `tf.estimator.BoostedTrees` API shown above.\n", - "\n", - "\n", - "When using this method, you should not batch your input data, as the method operates on the entire dataset.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "y7ztzoSk_vjY" - }, - "outputs": [], - "source": [ - "in_memory_params = dict(params)\n", - "in_memory_params['n_batches_per_layer'] = 1\n", - "# In-memory input_fn does not use batching.\n", - "def make_inmemory_train_input_fn(X, y):\n", - " y = np.expand_dims(y, axis=1)\n", - " def input_fn():\n", - " return dict(X), y\n", - " return input_fn\n", - "train_input_fn = make_inmemory_train_input_fn(dftrain, y_train)\n", - "\n", - "# Train the model.\n", - "est = tf.estimator.BoostedTreesClassifier(\n", - " feature_columns, \n", - " train_in_memory=True, \n", - " **in_memory_params)\n", - "\n", - "est.train(train_input_fn)\n", - "print(est.evaluate(eval_input_fn))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TSZYqNcRuczV" - }, - "source": [ - "## Model interpretation and plotting" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "BjcfLiI3uczW" - }, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "sns_colors = sns.color_palette('colorblind')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ywTtbBvBuczY" - }, - "source": [ - "## Local interpretability\n", - "Next you will output the directional feature contributions (DFCs) to explain individual predictions using the approach outlined in [Palczewska et al](https://arxiv.org/pdf/1312.1121.pdf) and by Saabas in [Interpreting Random Forests](http://blog.datadive.net/interpreting-random-forests/) (this method is also available in scikit-learn for Random Forests in the [`treeinterpreter`](https://github.com/andosa/treeinterpreter) package). The DFCs are generated with:\n", - "\n", - "`pred_dicts = list(est.experimental_predict_with_explanations(pred_input_fn))`\n", - "\n", - "(Note: The method is named experimental as we may modify the API before dropping the experimental prefix.)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "TIL93B4sDRqE" - }, - "outputs": [], - "source": [ - "pred_dicts = list(est.experimental_predict_with_explanations(eval_input_fn))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "tDPoRx_ZaY1E" - }, - "outputs": [], - "source": [ - "# Create DFC Pandas dataframe.\n", - "labels = y_eval.values\n", - "probs = pd.Series([pred['probabilities'][1] for pred in pred_dicts])\n", - "df_dfc = pd.DataFrame([pred['dfc'] for pred in pred_dicts])\n", - "df_dfc.describe().T" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EUKSaVoraY1C" - }, - "source": [ - "A nice property of DFCs is that the sum of the contributions + the bias is equal to the prediction for a given example." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Hd9VuizRaY1H" - }, - "outputs": [], - "source": [ - "# Sum of DFCs + bias == probabality.\n", - "bias = pred_dicts[0]['bias']\n", - "dfc_prob = df_dfc.sum(axis=1) + bias\n", - "np.testing.assert_almost_equal(dfc_prob.values,\n", - " probs.values)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tx5p4vEhuczg" - }, - "source": [ - "Plot DFCs for an individual passenger. Let's make the plot nice by color coding based on the contributions' directionality and add the feature values on figure." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6z_Tq1Pquczj" - }, - "outputs": [], - "source": [ - "# Boilerplate code for plotting :)\n", - "def _get_color(value):\n", - " \"\"\"To make positive DFCs plot green, negative DFCs plot red.\"\"\"\n", - " green, red = sns.color_palette()[2:4]\n", - " if value >= 0: return green\n", - " return red\n", - "\n", - "def _add_feature_values(feature_values, ax):\n", - " \"\"\"Display feature's values on left of plot.\"\"\"\n", - " x_coord = ax.get_xlim()[0]\n", - " OFFSET = 0.15\n", - " for y_coord, (feat_name, feat_val) in enumerate(feature_values.items()):\n", - " t = plt.text(x_coord, y_coord - OFFSET, '{}'.format(feat_val), size=12)\n", - " t.set_bbox(dict(facecolor='white', alpha=0.5))\n", - " from matplotlib.font_manager import FontProperties\n", - " font = FontProperties()\n", - " font.set_weight('bold')\n", - " t = plt.text(x_coord, y_coord + 1 - OFFSET, 'feature\\nvalue',\n", - " fontproperties=font, size=12)\n", - "\n", - "def plot_example(example):\n", - " TOP_N = 8 # View top 8 features.\n", - " sorted_ix = example.abs().sort_values()[-TOP_N:].index # Sort by magnitude.\n", - " example = example[sorted_ix]\n", - " colors = example.map(_get_color).tolist()\n", - " ax = example.to_frame().plot(kind='barh',\n", - " color=colors,\n", - " legend=None,\n", - " alpha=0.75,\n", - " figsize=(10,6))\n", - " ax.grid(False, axis='y')\n", - " ax.set_yticklabels(ax.get_yticklabels(), size=14)\n", - "\n", - " # Add feature values.\n", - " _add_feature_values(dfeval.iloc[ID][sorted_ix], ax)\n", - " return ax" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Ht1P2-1euczk" - }, - "outputs": [], - "source": [ - "# Plot results.\n", - "ID = 182\n", - "example = df_dfc.iloc[ID] # Choose ith example from evaluation set.\n", - "TOP_N = 8 # View top 8 features.\n", - "sorted_ix = example.abs().sort_values()[-TOP_N:].index\n", - "ax = plot_example(example)\n", - "ax.set_title('Feature contributions for example {}\\n pred: {:1.2f}; label: {}'.format(ID, probs[ID], labels[ID]))\n", - "ax.set_xlabel('Contribution to predicted probability', size=14)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aPXgWyFcfzAc" - }, - "source": [ - "The larger magnitude contributions have a larger impact on the model's prediction. Negative contributions indicate the feature value for this given example reduced the model's prediction, while positive values contribute an increase in the prediction." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0swvlkZFaY1Z" - }, - "source": [ - "You can also plot the example's DFCs compare with the entire distribution using a voilin plot." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "zo7rNd1v_5e2" - }, - "outputs": [], - "source": [ - "# Boilerplate plotting code.\n", - "def dist_violin_plot(df_dfc, ID):\n", - " # Initialize plot.\n", - " fig, ax = plt.subplots(1, 1, figsize=(10, 6))\n", - "\n", - " # Create example dataframe.\n", - " TOP_N = 8 # View top 8 features.\n", - " example = df_dfc.iloc[ID]\n", - " ix = example.abs().sort_values()[-TOP_N:].index\n", - " example = example[ix]\n", - " example_df = example.to_frame(name='dfc')\n", - "\n", - " # Add contributions of entire distribution.\n", - " parts=ax.violinplot([df_dfc[w] for w in ix],\n", - " vert=False,\n", - " showextrema=False,\n", - " widths=0.7,\n", - " positions=np.arange(len(ix)))\n", - " face_color = sns_colors[0]\n", - " alpha = 0.15\n", - " for pc in parts['bodies']:\n", - " pc.set_facecolor(face_color)\n", - " pc.set_alpha(alpha)\n", - "\n", - " # Add feature values.\n", - " _add_feature_values(dfeval.iloc[ID][sorted_ix], ax)\n", - "\n", - " # Add local contributions.\n", - " ax.scatter(example,\n", - " np.arange(example.shape[0]),\n", - " color=sns.color_palette()[2],\n", - " s=100,\n", - " marker=\"s\",\n", - " label='contributions for example')\n", - "\n", - " # Legend\n", - " # Proxy plot, to show violinplot dist on legend.\n", - " ax.plot([0,0], [1,1], label='eval set contributions\\ndistributions',\n", - " color=face_color, alpha=alpha, linewidth=10)\n", - " legend = ax.legend(loc='lower right', shadow=True, fontsize='x-large',\n", - " frameon=True)\n", - " legend.get_frame().set_facecolor('white')\n", - "\n", - " # Format plot.\n", - " ax.set_yticks(np.arange(example.shape[0]))\n", - " ax.set_yticklabels(example.index)\n", - " ax.grid(False, axis='y')\n", - " ax.set_xlabel('Contribution to predicted probability', size=14)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PiLw2tlm_9aK" - }, - "source": [ - "Plot this example." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VkCqraA2uczm" - }, - "outputs": [], - "source": [ - "dist_violin_plot(df_dfc, ID)\n", - "plt.title('Feature contributions for example {}\\n pred: {:1.2f}; label: {}'.format(ID, probs[ID], labels[ID]))\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TVJFM85SAWVq" - }, - "source": [ - "Finally, third-party tools, such as [LIME](https://github.com/marcotcr/lime) and [shap](https://github.com/slundberg/shap), can also help understand individual predictions for a model." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PnNXH6mZuczr" - }, - "source": [ - "## Global feature importances\n", - "\n", - "Additionally, you might want to understand the model as a whole, rather than studying individual predictions. Below, you will compute and use:\n", - "\n", - "* Gain-based feature importances using `est.experimental_feature_importances`\n", - "* Permutation importances\n", - "* Aggregate DFCs using `est.experimental_predict_with_explanations`\n", - "\n", - "Gain-based feature importances measure the loss change when splitting on a particular feature, while permutation feature importances are computed by evaluating model performance on the evaluation set by shuffling each feature one-by-one and attributing the change in model performance to the shuffled feature.\n", - "\n", - "In general, permutation feature importance are preferred to gain-based feature importance, though both methods can be unreliable in situations where potential predictor variables vary in their scale of measurement or their number of categories and when features are correlated ([source](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-9-307)). Check out [this article](http://explained.ai/rf-importance/index.html) for an in-depth overview and great discussion on different feature importance types." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3ocBcMatuczs" - }, - "source": [ - "### Gain-based feature importances" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "gMaxCgPbBJ-j" - }, - "source": [ - "Gain-based feature importances are built into the TensorFlow Boosted Trees estimators using `est.experimental_feature_importances`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "pPTxbAaeuczt" - }, - "outputs": [], - "source": [ - "importances = est.experimental_feature_importances(normalize=True)\n", - "df_imp = pd.Series(importances)\n", - "\n", - "# Visualize importances.\n", - "N = 8\n", - "ax = (df_imp.iloc[0:N][::-1]\n", - " .plot(kind='barh',\n", - " color=sns_colors[0],\n", - " title='Gain feature importances',\n", - " figsize=(10, 6)))\n", - "ax.grid(False, axis='y')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GvfAcBeGuczw" - }, - "source": [ - "### Average absolute DFCs\n", - "You can also average the absolute values of DFCs to understand impact at a global level." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "JkvAWLWLuczx" - }, - "outputs": [], - "source": [ - "# Plot.\n", - "dfc_mean = df_dfc.abs().mean()\n", - "N = 8\n", - "sorted_ix = dfc_mean.abs().sort_values()[-N:].index # Average and sort by absolute.\n", - "ax = dfc_mean[sorted_ix].plot(kind='barh',\n", - " color=sns_colors[1],\n", - " title='Mean |directional feature contributions|',\n", - " figsize=(10, 6))\n", - "ax.grid(False, axis='y')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Z0k_DvPLaY1o" - }, - "source": [ - "You can also see how DFCs vary as a feature value varies." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ZcIfN1IpaY1o" - }, - "outputs": [], - "source": [ - "FEATURE = 'fare'\n", - "feature = pd.Series(df_dfc[FEATURE].values, index=dfeval[FEATURE].values).sort_index()\n", - "ax = sns.regplot(feature.index.values, feature.values, lowess=True)\n", - "ax.set_ylabel('contribution')\n", - "ax.set_xlabel(FEATURE)\n", - "ax.set_xlim(0, 100)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lbpG72ULucz0" - }, - "source": [ - "### Permutation feature importance" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6esOw1VOucz0" - }, - "outputs": [], - "source": [ - "def permutation_importances(est, X_eval, y_eval, metric, features):\n", - " \"\"\"Column by column, shuffle values and observe effect on eval set.\n", - "\n", - " source: http://explained.ai/rf-importance/index.html\n", - " A similar approach can be done during training. See \"Drop-column importance\"\n", - " in the above article.\"\"\"\n", - " baseline = metric(est, X_eval, y_eval)\n", - " imp = []\n", - " for col in features:\n", - " save = X_eval[col].copy()\n", - " X_eval[col] = np.random.permutation(X_eval[col])\n", - " m = metric(est, X_eval, y_eval)\n", - " X_eval[col] = save\n", - " imp.append(baseline - m)\n", - " return np.array(imp)\n", - "\n", - "def accuracy_metric(est, X, y):\n", - " \"\"\"TensorFlow estimator accuracy.\"\"\"\n", - " eval_input_fn = make_input_fn(X,\n", - " y=y,\n", - " shuffle=False,\n", - " n_epochs=1)\n", - " return est.evaluate(input_fn=eval_input_fn)['accuracy']\n", - "features = CATEGORICAL_COLUMNS + NUMERIC_COLUMNS\n", - "importances = permutation_importances(est, dfeval, y_eval, accuracy_metric,\n", - " features)\n", - "df_imp = pd.Series(importances, index=features)\n", - "\n", - "sorted_ix = df_imp.abs().sort_values().index\n", - "ax = df_imp[sorted_ix][-5:].plot(kind='barh', color=sns_colors[2], figsize=(10, 6))\n", - "ax.grid(False, axis='y')\n", - "ax.set_title('Permutation feature importance')\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "E236y3pVEzHg" - }, - "source": [ - "## Visualizing model fitting" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TrcQ-839EzZ6" - }, - "source": [ - "Lets first simulate/create training data using the following formula:\n", - "\n", - "\n", - "$$z=x* e^{-x^2 - y^2}$$\n", - "\n", - "\n", - "Where \\(z\\) is the dependent variable you are trying to predict and \\(x\\) and \\(y\\) are the features." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "e8woaj81GGE9" - }, - "outputs": [], - "source": [ - "from numpy.random import uniform, seed\n", - "from scipy.interpolate import griddata\n", - "\n", - "# Create fake data\n", - "seed(0)\n", - "npts = 5000\n", - "x = uniform(-2, 2, npts)\n", - "y = uniform(-2, 2, npts)\n", - "z = x*np.exp(-x**2 - y**2)\n", - "xy = np.zeros((2,np.size(x)))\n", - "xy[0] = x\n", - "xy[1] = y\n", - "xy = xy.T" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "GRI3KHfLZsGP" - }, - "outputs": [], - "source": [ - "# Prep data for training.\n", - "df = pd.DataFrame({'x': x, 'y': y, 'z': z})\n", - "\n", - "xi = np.linspace(-2.0, 2.0, 200),\n", - "yi = np.linspace(-2.1, 2.1, 210),\n", - "xi,yi = np.meshgrid(xi, yi)\n", - "\n", - "df_predict = pd.DataFrame({\n", - " 'x' : xi.flatten(),\n", - " 'y' : yi.flatten(),\n", - "})\n", - "predict_shape = xi.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "w0JnH4IhZuAb" - }, - "outputs": [], - "source": [ - "def plot_contour(x, y, z, **kwargs):\n", - " # Grid the data.\n", - " plt.figure(figsize=(10, 8))\n", - " # Contour the gridded data, plotting dots at the nonuniform data points.\n", - " CS = plt.contour(x, y, z, 15, linewidths=0.5, colors='k')\n", - " CS = plt.contourf(x, y, z, 15,\n", - " vmax=abs(zi).max(), vmin=-abs(zi).max(), cmap='RdBu_r')\n", - " plt.colorbar() # Draw colorbar.\n", - " # Plot data points.\n", - " plt.xlim(-2, 2)\n", - " plt.ylim(-2, 2)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KF7WsIcYGF_E" - }, - "source": [ - "You can visualize the function. Redder colors correspond to larger function values." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "WrxuqaaXGFOK" - }, - "outputs": [], - "source": [ - "zi = griddata(xy, z, (xi, yi), method='linear', fill_value='0')\n", - "plot_contour(xi, yi, zi)\n", - "plt.scatter(df.x, df.y, marker='.')\n", - "plt.title('Contour on training data')\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "hoANr0f2GFrM" - }, - "outputs": [], - "source": [ - "fc = [tf.feature_column.numeric_column('x'),\n", - " tf.feature_column.numeric_column('y')]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "xVRWyoY3ayTK" - }, - "outputs": [], - "source": [ - "def predict(est):\n", - " \"\"\"Predictions from a given estimator.\"\"\"\n", - " predict_input_fn = lambda: tf.data.Dataset.from_tensors(dict(df_predict))\n", - " preds = np.array([p['predictions'][0] for p in est.predict(predict_input_fn)])\n", - " return preds.reshape(predict_shape)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uyPu5618GU7K" - }, - "source": [ - "First let's try to fit a linear model to the data." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "zUIV2IVgGVSk" - }, - "outputs": [], - "source": [ - "train_input_fn = make_input_fn(df, df.z)\n", - "est = tf.estimator.LinearRegressor(fc)\n", - "est.train(train_input_fn, max_steps=500);" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_u4WAcCqfbco" - }, - "outputs": [], - "source": [ - "plot_contour(xi, yi, predict(est))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XD_fMAUtSCSa" - }, - "source": [ - "It's not a very good fit. Next let's try to fit a GBDT model to it and try to understand how the model fits the function." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "-dHlKFlFgHDQ" - }, - "outputs": [], - "source": [ - "n_trees = 37 #@param {type: \"slider\", min: 1, max: 80, step: 1}\n", - "\n", - "est = tf.estimator.BoostedTreesRegressor(fc, n_batches_per_layer=1, n_trees=n_trees)\n", - "est.train(train_input_fn, max_steps=500)\n", - "clear_output()\n", - "plot_contour(xi, yi, predict(est))\n", - "plt.text(-1.8, 2.1, '# trees: {}'.format(n_trees), color='w', backgroundcolor='black', size=20)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5WcZ9fubh1wT" - }, - "source": [ - "As you increase the number of trees, the model's predictions better approximates the underlying function." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cj8u3NCG-IKX" - }, - "source": [ - "![](https://www.tensorflow.org/images/boosted_trees/boosted_trees_ntrees.gif)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SMKoEZnCdrsp" - }, - "source": [ - "## Conclusion" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZSZUSrjXdw9g" - }, - "source": [ - "In this tutorial you learned how to interpret Boosted Trees models using directional feature contributions and feature importance techniques. These techniques provide insight into how the features impact a model's predictions. Finally, you also gained intution for how a Boosted Tree model fits a complex function by viewing the decision surface for several models." - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "boosted_trees_model_understanding.ipynb", - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/site/en/tutorials/estimator/keras_model_to_estimator.ipynb b/site/en/tutorials/estimator/keras_model_to_estimator.ipynb index e41380908f5..be97a38b6eb 100644 --- a/site/en/tutorials/estimator/keras_model_to_estimator.ipynb +++ b/site/en/tutorials/estimator/keras_model_to_estimator.ipynb @@ -68,7 +68,7 @@ "id": "Dhcq8Ds4mCtm" }, "source": [ - "> Warning: Estimators are not recommended for new code. Estimators run `v1.Session`-style code which is more difficult to write correctly, and can behave unexpectedly, especially when combined with TF 2 code. Estimators do fall under our [compatibility guarantees](https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities. See the [migration guide](https://tensorflow.org/guide/migrate) for details." + "> Warning: TensorFlow 2.15 included the final release of the `tf-estimator` package. Estimators will not be available in TensorFlow 2.16 or after. See the [migration guide](https://tensorflow.org/guide/migrate/migrating_estimator) for more information about how to convert off of Estimators." ] }, { @@ -272,8 +272,7 @@ "colab": { "collapsed_sections": [], "name": "keras_model_to_estimator.ipynb", - "provenance": [], - "toc_visible": true + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", diff --git a/site/en/tutorials/estimator/linear.ipynb b/site/en/tutorials/estimator/linear.ipynb index ea46d41ede1..a26ffe2df4f 100644 --- a/site/en/tutorials/estimator/linear.ipynb +++ b/site/en/tutorials/estimator/linear.ipynb @@ -61,7 +61,7 @@ "id": "JOccPOFMm5Tc" }, "source": [ - "> Warning: Estimators are not recommended for new code. Estimators run `v1.Session`-style code which is more difficult to write correctly, and can behave unexpectedly, especially when combined with TF 2 code. Estimators do fall under our [compatibility guarantees](https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities. See the [migration guide](https://tensorflow.org/guide/migrate) for details." + "> Warning: TensorFlow 2.15 included the final release of the `tf-estimator` package. Estimators will not be available in TensorFlow 2.16 or after. See the [migration guide](https://tensorflow.org/guide/migrate/migrating_estimator) for more information about how to convert off of Estimators." ] }, { @@ -293,14 +293,31 @@ "pd.concat([dftrain, y_train], axis=1).groupby('sex').survived.mean().plot(kind='barh').set_xlabel('% survive')" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "qCHvgeorEsHa" + }, + "source": [ + "## Feature Engineering for the Model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Dhcq8Ds4mCtm" + }, + "source": [ + "> Warning: The tf.feature_columns module described in this tutorial is not recommended for new code. Keras preprocessing layers cover this functionality, for migration instructions see the [Migrating feature columns guide](https://www.tensorflow.org/guide/migrate/migrating_feature_columns). The tf.feature_columns module was designed for use with TF1 Estimators. It does fall under our [compatibility guarantees](https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities." + ] + }, { "cell_type": "markdown", "metadata": { "id": "VqDKQLZn8L-B" }, "source": [ - "## Feature Engineering for the Model\n", - "Estimators use a system called [feature columns](https://www.tensorflow.org/guide/feature_columns) to describe how the model should interpret each of the raw input features. An Estimator expects a vector of numeric inputs, and *feature columns* describe how the model should convert each feature.\n", + "Estimators use a system called [feature columns](https://www.tensorflow.org/tutorials/structured_data/feature_columns) to describe how the model should interpret each of the raw input features. An Estimator expects a vector of numeric inputs, and *feature columns* describe how the model should convert each feature.\n", "\n", "Selecting and crafting the right set of feature columns is key to learning an effective model. A feature column can be either one of the raw inputs in the original features `dict` (a *base feature column*), or any new columns created using transformations defined over one or multiple base columns (a *derived feature columns*).\n", "\n", @@ -583,8 +600,7 @@ "colab": { "collapsed_sections": [], "name": "linear.ipynb", - "provenance": [], - "toc_visible": true + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", diff --git a/site/en/tutorials/estimator/premade.ipynb b/site/en/tutorials/estimator/premade.ipynb index a34096ea2b8..dc81847c7cd 100644 --- a/site/en/tutorials/estimator/premade.ipynb +++ b/site/en/tutorials/estimator/premade.ipynb @@ -68,7 +68,7 @@ "id": "stQiPWL6ni6_" }, "source": [ - "> Warning: Estimators are not recommended for new code. Estimators run `v1.Session`-style code which is more difficult to write correctly, and can behave unexpectedly, especially when combined with TF 2 code. Estimators do fall under [compatibility guarantees](https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities. See the [migration guide](https://tensorflow.org/guide/migrate) for details." + "> Warning: TensorFlow 2.15 included the final release of the `tf-estimator` package. Estimators will not be available in TensorFlow 2.16 or after. See the [migration guide](https://tensorflow.org/guide/migrate/migrating_estimator) for more information about how to convert off of Estimators." ] }, { diff --git a/site/en/tutorials/generative/autoencoder.ipynb b/site/en/tutorials/generative/autoencoder.ipynb index d2af1c3a345..1b2a6fcd2a8 100644 --- a/site/en/tutorials/generative/autoencoder.ipynb +++ b/site/en/tutorials/generative/autoencoder.ipynb @@ -6,9 +6,16 @@ "id": "Ndo4ERqnwQOU" }, "source": [ - "##### Copyright 2020 The TensorFlow Authors." + "##### Copyright 2024 The TensorFlow Authors." ] }, + { + "metadata": { + "id": "13rwRG5Jec7n" + }, + "cell_type": "markdown", + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -76,7 +83,7 @@ "source": [ "This tutorial introduces autoencoders with three examples: the basics, image denoising, and anomaly detection.\n", "\n", - "An autoencoder is a special type of neural network that is trained to copy its input to its output. For example, given an image of a handwritten digit, an autoencoder first encodes the image into a lower dimensional latent representation, then decodes the latent representation back to an image. An autoencoder learns to compress the data while minimizing the reconstruction error. \n", + "An autoencoder is a special type of neural network that is trained to copy its input to its output. For example, given an image of a handwritten digit, an autoencoder first encodes the image into a lower dimensional latent representation, then decodes the latent representation back to an image. An autoencoder learns to compress the data while minimizing the reconstruction error.\n", "\n", "To learn more about autoencoders, please consider reading chapter 14 from [Deep Learning](https://www.deeplearningbook.org/) by Ian Goodfellow, Yoshua Bengio, and Aaron Courville." ] @@ -117,7 +124,7 @@ }, "source": [ "## Load the dataset\n", - "To start, you will train the basic autoencoder using the Fashon MNIST dataset. Each image in this dataset is 28x28 pixels. " + "To start, you will train the basic autoencoder using the Fashion MNIST dataset. Each image in this dataset is 28x28 pixels." ] }, { @@ -159,27 +166,29 @@ }, "outputs": [], "source": [ - "latent_dim = 64 \n", - "\n", "class Autoencoder(Model):\n", - " def __init__(self, latent_dim):\n", + " def __init__(self, latent_dim, shape):\n", " super(Autoencoder, self).__init__()\n", - " self.latent_dim = latent_dim \n", + " self.latent_dim = latent_dim\n", + " self.shape = shape\n", " self.encoder = tf.keras.Sequential([\n", " layers.Flatten(),\n", " layers.Dense(latent_dim, activation='relu'),\n", " ])\n", " self.decoder = tf.keras.Sequential([\n", - " layers.Dense(784, activation='sigmoid'),\n", - " layers.Reshape((28, 28))\n", + " layers.Dense(tf.math.reduce_prod(shape).numpy(), activation='sigmoid'),\n", + " layers.Reshape(shape)\n", " ])\n", "\n", " def call(self, x):\n", " encoded = self.encoder(x)\n", " decoded = self.decoder(encoded)\n", " return decoded\n", - " \n", - "autoencoder = Autoencoder(latent_dim) " + "\n", + "\n", + "shape = x_test.shape[1:]\n", + "latent_dim = 64\n", + "autoencoder = Autoencoder(latent_dim, shape)\n" ] }, { @@ -329,8 +338,8 @@ "outputs": [], "source": [ "noise_factor = 0.2\n", - "x_train_noisy = x_train + noise_factor * tf.random.normal(shape=x_train.shape) \n", - "x_test_noisy = x_test + noise_factor * tf.random.normal(shape=x_test.shape) \n", + "x_train_noisy = x_train + noise_factor * tf.random.normal(shape=x_train.shape)\n", + "x_test_noisy = x_test + noise_factor * tf.random.normal(shape=x_test.shape)\n", "\n", "x_train_noisy = tf.clip_by_value(x_train_noisy, clip_value_min=0., clip_value_max=1.)\n", "x_test_noisy = tf.clip_by_value(x_test_noisy, clip_value_min=0., clip_value_max=1.)" @@ -492,7 +501,7 @@ }, "outputs": [], "source": [ - "encoded_imgs = autoencoder.encoder(x_test).numpy()\n", + "encoded_imgs = autoencoder.encoder(x_test_noisy).numpy()\n", "decoded_imgs = autoencoder.decoder(encoded_imgs).numpy()" ] }, @@ -655,7 +664,7 @@ "id": "wVcTBDo-CqFS" }, "source": [ - "Plot a normal ECG. " + "Plot a normal ECG." ] }, { @@ -719,12 +728,12 @@ " layers.Dense(32, activation=\"relu\"),\n", " layers.Dense(16, activation=\"relu\"),\n", " layers.Dense(8, activation=\"relu\")])\n", - " \n", + "\n", " self.decoder = tf.keras.Sequential([\n", " layers.Dense(16, activation=\"relu\"),\n", " layers.Dense(32, activation=\"relu\"),\n", " layers.Dense(140, activation=\"sigmoid\")])\n", - " \n", + "\n", " def call(self, x):\n", " encoded = self.encoder(x)\n", " decoded = self.decoder(encoded)\n", @@ -761,8 +770,8 @@ }, "outputs": [], "source": [ - "history = autoencoder.fit(normal_train_data, normal_train_data, \n", - " epochs=20, \n", + "history = autoencoder.fit(normal_train_data, normal_train_data,\n", + " epochs=20,\n", " batch_size=512,\n", " validation_data=(test_data, test_data),\n", " shuffle=True)" @@ -906,7 +915,7 @@ "id": "uEGlA1Be50Nj" }, "source": [ - "Note: There are other strategies you could use to select a threshold value above which test examples should be classified as anomalous, the correct approach will depend on your dataset. You can learn more with the links at the end of this tutorial. " + "Note: There are other strategies you could use to select a threshold value above which test examples should be classified as anomalous, the correct approach will depend on your dataset. You can learn more with the links at the end of this tutorial." ] }, { @@ -915,7 +924,7 @@ "id": "zpLSDAeb51D_" }, "source": [ - "If you examine the reconstruction error for the anomalous examples in the test set, you'll notice most have greater reconstruction error than the threshold. By varing the threshold, you can adjust the [precision](https://developers.google.com/machine-learning/glossary#precision) and [recall](https://developers.google.com/machine-learning/glossary#recall) of your classifier. " + "If you examine the reconstruction error for the anomalous examples in the test set, you'll notice most have greater reconstruction error than the threshold. By varing the threshold, you can adjust the [precision](https://developers.google.com/machine-learning/glossary#precision) and [recall](https://developers.google.com/machine-learning/glossary#recall) of your classifier." ] }, { @@ -990,8 +999,18 @@ "metadata": { "accelerator": "GPU", "colab": { - "collapsed_sections": [], - "name": "autoencoder.ipynb", + "gpuType": "T4", + "private_outputs": true, + "provenance": [ + { + "file_id": "17gKB2bKebV2DzoYIMFzyEXA5uDnwWOvT", + "timestamp": 1712793165979 + }, + { + "file_id": "https://github.com/tensorflow/docs/blob/master/site/en/tutorials/generative/autoencoder.ipynb", + "timestamp": 1712792176273 + } + ], "toc_visible": true }, "kernelspec": { diff --git a/site/en/tutorials/generative/cyclegan.ipynb b/site/en/tutorials/generative/cyclegan.ipynb index 7136dd143ef..313be519591 100644 --- a/site/en/tutorials/generative/cyclegan.ipynb +++ b/site/en/tutorials/generative/cyclegan.ipynb @@ -154,7 +154,7 @@ "This is similar to what was done in [pix2pix](https://www.tensorflow.org/tutorials/generative/pix2pix#load_the_dataset)\n", "\n", "* In random jittering, the image is resized to `286 x 286` and then randomly cropped to `256 x 256`.\n", - "* In random mirroring, the image is randomly flipped horizontally i.e left to right." + "* In random mirroring, the image is randomly flipped horizontally i.e., left to right." ] }, { @@ -634,7 +634,7 @@ "source": [ "## Training\n", "\n", - "Note: This example model is trained for fewer epochs (40) than the paper (200) to keep training time reasonable for this tutorial. Predictions may be less accurate. " + "Note: This example model is trained for fewer epochs (10) than the paper (200) to keep training time reasonable for this tutorial. The generated images will have much lower quality." ] }, { @@ -645,7 +645,7 @@ }, "outputs": [], "source": [ - "EPOCHS = 40" + "EPOCHS = 10" ] }, { @@ -830,8 +830,7 @@ "colab": { "collapsed_sections": [], "name": "cyclegan.ipynb", - "provenance": [], - "toc_visible": true + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", diff --git a/site/en/tutorials/generative/data_compression.ipynb b/site/en/tutorials/generative/data_compression.ipynb new file mode 100644 index 00000000000..f756f088acd --- /dev/null +++ b/site/en/tutorials/generative/data_compression.ipynb @@ -0,0 +1,901 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Tce3stUlHN0L" + }, + "source": [ + "##### Copyright 2022 The TensorFlow Compression Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "tuOe1ymfHZPu" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qFdPvlXBOdUN" + }, + "source": [ + "# Learned data compression" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + " \n", + " View on TensorFlow.org\n", + " \n", + " \n", + " \n", + " Run in Google Colab\n", + " \n", + " \n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xHxb-dlhMIzW" + }, + "source": [ + "## Overview\n", + "\n", + "This notebook shows how to do lossy data compression using neural networks and [TensorFlow Compression](https://github.com/tensorflow/compression).\n", + "\n", + "Lossy compression involves making a trade-off between **rate**, the expected number of bits needed to encode a sample, and **distortion**, the expected error in the reconstruction of the sample.\n", + "\n", + "The examples below use an autoencoder-like model to compress images from the MNIST dataset. The method is based on the paper [End-to-end Optimized Image Compression](https://arxiv.org/abs/1611.01704).\n", + "\n", + "More background on learned data compression can be found in [this paper](https://arxiv.org/abs/2007.03034) targeted at people familiar with classical data compression, or [this survey](https://arxiv.org/abs/2202.06533) targeted at a machine learning audience.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MUXex9ctTuDB" + }, + "source": [ + "## Setup\n", + "\n", + "Install Tensorflow Compression via `pip`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "K489KsEgxuLI" + }, + "outputs": [], + "source": [ + "%%bash\n", + "# Installs the latest version of TFC compatible with the installed TF version.\n", + "\n", + "read MAJOR MINOR <<< \"$(pip show tensorflow | perl -p -0777 -e 's/.*Version: (\\d+)\\.(\\d+).*/\\1 \\2/sg')\"\n", + "pip install \"tensorflow-compression<$MAJOR.$(($MINOR+1))\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WfVAmHCVxpTS" + }, + "source": [ + "Import library dependencies." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IqR2PQG4ZaZ0" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import tensorflow as tf\n", + "import tensorflow_compression as tfc\n", + "import tensorflow_datasets as tfds\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wsncKT2iymgQ" + }, + "source": [ + "## Define the trainer model.\n", + "\n", + "Because the model resembles an autoencoder, and we need to perform a different set of functions during training and inference, the setup is a little different from, say, a classifier.\n", + "\n", + "The training model consists of three parts:\n", + "- the **analysis** (or encoder) transform, converting from the image into a latent space,\n", + "- the **synthesis** (or decoder) transform, converting from the latent space back into image space, and\n", + "- a **prior** and entropy model, modeling the marginal probabilities of the latents.\n", + "\n", + "First, define the transforms:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8yZESLgW-vp1" + }, + "outputs": [], + "source": [ + "def make_analysis_transform(latent_dims):\n", + " \"\"\"Creates the analysis (encoder) transform.\"\"\"\n", + " return tf.keras.Sequential([\n", + " tf.keras.layers.Conv2D(\n", + " 20, 5, use_bias=True, strides=2, padding=\"same\",\n", + " activation=\"leaky_relu\", name=\"conv_1\"),\n", + " tf.keras.layers.Conv2D(\n", + " 50, 5, use_bias=True, strides=2, padding=\"same\",\n", + " activation=\"leaky_relu\", name=\"conv_2\"),\n", + " tf.keras.layers.Flatten(),\n", + " tf.keras.layers.Dense(\n", + " 500, use_bias=True, activation=\"leaky_relu\", name=\"fc_1\"),\n", + " tf.keras.layers.Dense(\n", + " latent_dims, use_bias=True, activation=None, name=\"fc_2\"),\n", + " ], name=\"analysis_transform\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2sHdYBzF2xcu" + }, + "outputs": [], + "source": [ + "def make_synthesis_transform():\n", + " \"\"\"Creates the synthesis (decoder) transform.\"\"\"\n", + " return tf.keras.Sequential([\n", + " tf.keras.layers.Dense(\n", + " 500, use_bias=True, activation=\"leaky_relu\", name=\"fc_1\"),\n", + " tf.keras.layers.Dense(\n", + " 2450, use_bias=True, activation=\"leaky_relu\", name=\"fc_2\"),\n", + " tf.keras.layers.Reshape((7, 7, 50)),\n", + " tf.keras.layers.Conv2DTranspose(\n", + " 20, 5, use_bias=True, strides=2, padding=\"same\",\n", + " activation=\"leaky_relu\", name=\"conv_1\"),\n", + " tf.keras.layers.Conv2DTranspose(\n", + " 1, 5, use_bias=True, strides=2, padding=\"same\",\n", + " activation=\"leaky_relu\", name=\"conv_2\"),\n", + " ], name=\"synthesis_transform\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lYC8tHhkxTlK" + }, + "source": [ + "The trainer holds an instance of both transforms, as well as the parameters of the prior.\n", + "\n", + "Its `call` method is set up to compute:\n", + "- **rate**, an estimate of the number of bits needed to represent the batch of digits, and\n", + "- **distortion**, the mean absolute difference between the pixels of the original digits and their reconstructions.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ROn2DbzsBirI" + }, + "outputs": [], + "source": [ + "class MNISTCompressionTrainer(tf.keras.Model):\n", + " \"\"\"Model that trains a compressor/decompressor for MNIST.\"\"\"\n", + "\n", + " def __init__(self, latent_dims):\n", + " super().__init__()\n", + " self.analysis_transform = make_analysis_transform(latent_dims)\n", + " self.synthesis_transform = make_synthesis_transform()\n", + " self.prior_log_scales = tf.Variable(tf.zeros((latent_dims,)))\n", + "\n", + " @property\n", + " def prior(self):\n", + " return tfc.NoisyLogistic(loc=0., scale=tf.exp(self.prior_log_scales))\n", + "\n", + " def call(self, x, training):\n", + " \"\"\"Computes rate and distortion losses.\"\"\"\n", + " # Ensure inputs are floats in the range (0, 1).\n", + " x = tf.cast(x, self.compute_dtype) / 255.\n", + " x = tf.reshape(x, (-1, 28, 28, 1))\n", + "\n", + " # Compute latent space representation y, perturb it and model its entropy,\n", + " # then compute the reconstructed pixel-level representation x_hat.\n", + " y = self.analysis_transform(x)\n", + " entropy_model = tfc.ContinuousBatchedEntropyModel(\n", + " self.prior, coding_rank=1, compression=False)\n", + " y_tilde, rate = entropy_model(y, training=training)\n", + " x_tilde = self.synthesis_transform(y_tilde)\n", + "\n", + " # Average number of bits per MNIST digit.\n", + " rate = tf.reduce_mean(rate)\n", + "\n", + " # Mean absolute difference across pixels.\n", + " distortion = tf.reduce_mean(abs(x - x_tilde))\n", + "\n", + " return dict(rate=rate, distortion=distortion)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vEXbp9RV3kRX" + }, + "source": [ + "### Compute rate and distortion.\n", + "\n", + "Let's walk through this step by step, using one image from the training set. Load the MNIST dataset for training and validation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7FV99WTrIBen" + }, + "outputs": [], + "source": [ + "training_dataset, validation_dataset = tfds.load(\n", + " \"mnist\",\n", + " split=[\"train\", \"test\"],\n", + " shuffle_files=True,\n", + " as_supervised=True,\n", + " with_info=False,\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SwKgNTg_QfjH" + }, + "source": [ + "And extract one image $x$:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "O-BSdeHcPBBf" + }, + "outputs": [], + "source": [ + "(x, _), = validation_dataset.take(1)\n", + "\n", + "plt.imshow(tf.squeeze(x))\n", + "print(f\"Data type: {x.dtype}\")\n", + "print(f\"Shape: {x.shape}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V8IvuFkrRJIa" + }, + "source": [ + "To get the latent representation $y$, we need to cast it to `float32`, add a batch dimension, and pass it through the analysis transform." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jA0DOWq23lEq" + }, + "outputs": [], + "source": [ + "x = tf.cast(x, tf.float32) / 255.\n", + "x = tf.reshape(x, (-1, 28, 28, 1))\n", + "y = make_analysis_transform(10)(x)\n", + "\n", + "print(\"y:\", y)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rTojJQvZT8SX" + }, + "source": [ + "The latents will be quantized at test time. To model this in a differentiable way during training, we add uniform noise in the interval $(-.5, .5)$ and call the result $\\tilde y$. This is the same terminology as used in the paper [End-to-end Optimized Image Compression](https://arxiv.org/abs/1611.01704)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Spr3503OUOFQ" + }, + "outputs": [], + "source": [ + "y_tilde = y + tf.random.uniform(y.shape, -.5, .5)\n", + "\n", + "print(\"y_tilde:\", y_tilde)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7hRN89R7SA3U" + }, + "source": [ + "The \"prior\" is a probability density that we train to model the marginal distribution of the noisy latents. For example, it could be a set of independent [logistic distributions](https://en.wikipedia.org/wiki/Logistic_distribution) with different scales for each latent dimension. `tfc.NoisyLogistic` accounts for the fact that the latents have additive noise. As the scale approaches zero, a logistic distribution approaches a dirac delta (spike), but the added noise causes the \"noisy\" distribution to approach the uniform distribution instead." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2tmA1Bw7ReMY" + }, + "outputs": [], + "source": [ + "prior = tfc.NoisyLogistic(loc=0., scale=tf.linspace(.01, 2., 10))\n", + "\n", + "_ = tf.linspace(-6., 6., 501)[:, None]\n", + "plt.plot(_, prior.prob(_));\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2NSWtBZmUvVY" + }, + "source": [ + "During training, `tfc.ContinuousBatchedEntropyModel` adds uniform noise, and uses the noise and the prior to compute a (differentiable) upper bound on the rate (the average number of bits necessary to encode the latent representation). That bound can be minimized as a loss." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hFuGlyJuThBC" + }, + "outputs": [], + "source": [ + "entropy_model = tfc.ContinuousBatchedEntropyModel(\n", + " prior, coding_rank=1, compression=False)\n", + "y_tilde, rate = entropy_model(y, training=True)\n", + "\n", + "print(\"rate:\", rate)\n", + "print(\"y_tilde:\", y_tilde)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cyr8DGgmWd32" + }, + "source": [ + "Lastly, the noisy latents are passed back through the synthesis transform to produce an image reconstruction $\\tilde x$. Distortion is the error between original image and reconstruction. Obviously, with the transforms untrained, the reconstruction is not very useful." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gtmI0xGEVym0" + }, + "outputs": [], + "source": [ + "x_tilde = make_synthesis_transform()(y_tilde)\n", + "\n", + "# Mean absolute difference across pixels.\n", + "distortion = tf.reduce_mean(abs(x - x_tilde))\n", + "print(\"distortion:\", distortion)\n", + "\n", + "x_tilde = tf.saturate_cast(x_tilde[0] * 255, tf.uint8)\n", + "plt.imshow(tf.squeeze(x_tilde))\n", + "print(f\"Data type: {x_tilde.dtype}\")\n", + "print(f\"Shape: {x_tilde.shape}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UVz3I7E8ecij" + }, + "source": [ + "For every batch of digits, calling the `MNISTCompressionTrainer` produces the rate and distortion as an average over that batch:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ICJnjj1LeB8L" + }, + "outputs": [], + "source": [ + "(example_batch, _), = validation_dataset.batch(32).take(1)\n", + "trainer = MNISTCompressionTrainer(10)\n", + "example_output = trainer(example_batch)\n", + "\n", + "print(\"rate: \", example_output[\"rate\"])\n", + "print(\"distortion: \", example_output[\"distortion\"])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lgdfRtmee5Mn" + }, + "source": [ + "In the next section, we set up the model to do gradient descent on these two losses." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fKGVwv5MAq6w" + }, + "source": [ + "## Train the model.\n", + "\n", + "We compile the trainer in a way that it optimizes the rate–distortion Lagrangian, that is, a sum of rate and distortion, where one of the terms is weighted by Lagrange parameter $\\lambda$.\n", + "\n", + "This loss function affects the different parts of the model differently:\n", + "- The analysis transform is trained to produce a latent representation that achieves the desired trade-off between rate and distortion.\n", + "- The synthesis transform is trained to minimize distortion, given the latent representation.\n", + "- The parameters of the prior are trained to minimize the rate given the latent representation. This is identical to fitting the prior to the marginal distribution of latents in a maximum likelihood sense." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "k5mm1aDkcgAf" + }, + "outputs": [], + "source": [ + "def pass_through_loss(_, x):\n", + " # Since rate and distortion are unsupervised, the loss doesn't need a target.\n", + " return x\n", + "\n", + "def make_mnist_compression_trainer(lmbda, latent_dims=50):\n", + " trainer = MNISTCompressionTrainer(latent_dims)\n", + " trainer.compile(\n", + " optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),\n", + " # Just pass through rate and distortion as losses/metrics.\n", + " loss=dict(rate=pass_through_loss, distortion=pass_through_loss),\n", + " metrics=dict(rate=pass_through_loss, distortion=pass_through_loss),\n", + " loss_weights=dict(rate=1., distortion=lmbda),\n", + " )\n", + " return trainer\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DPwd4DTs3Mfr" + }, + "source": [ + "Next, train the model. The human annotations are not necessary here, since we just want to compress the images, so we drop them using a `map` and instead add \"dummy\" targets for rate and distortion." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QNBpCTgzAV7M" + }, + "outputs": [], + "source": [ + "def add_rd_targets(image, label):\n", + " # Training is unsupervised, so labels aren't necessary here. However, we\n", + " # need to add \"dummy\" targets for rate and distortion.\n", + " return image, dict(rate=0., distortion=0.)\n", + "\n", + "def train_mnist_model(lmbda):\n", + " trainer = make_mnist_compression_trainer(lmbda)\n", + " trainer.fit(\n", + " training_dataset.map(add_rd_targets).batch(128).prefetch(8),\n", + " epochs=15,\n", + " validation_data=validation_dataset.map(add_rd_targets).batch(128).cache(),\n", + " validation_freq=1,\n", + " verbose=1,\n", + " )\n", + " return trainer\n", + "\n", + "trainer = train_mnist_model(lmbda=2000)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Td4xuttmCd7T" + }, + "source": [ + "## Compress some MNIST images.\n", + "\n", + "For compression and decompression at test time, we split the trained model in two parts:\n", + "\n", + "- The encoder side consists of the analysis transform and the entropy model.\n", + "- The decoder side consists of the synthesis transform and the same entropy model.\n", + "\n", + "At test time, the latents will not have additive noise, but they will be quantized and then losslessly compressed, so we give them new names. We call them and the image reconstruction $\\hat x$ and $\\hat y$, respectively (following [End-to-end Optimized Image Compression](https://arxiv.org/abs/1611.01704))." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sBRAPa5jksss" + }, + "outputs": [], + "source": [ + "class MNISTCompressor(tf.keras.Model):\n", + " \"\"\"Compresses MNIST images to strings.\"\"\"\n", + "\n", + " def __init__(self, analysis_transform, entropy_model):\n", + " super().__init__()\n", + " self.analysis_transform = analysis_transform\n", + " self.entropy_model = entropy_model\n", + "\n", + " def call(self, x):\n", + " # Ensure inputs are floats in the range (0, 1).\n", + " x = tf.cast(x, self.compute_dtype) / 255.\n", + " y = self.analysis_transform(x)\n", + " # Also return the exact information content of each digit.\n", + " _, bits = self.entropy_model(y, training=False)\n", + " return self.entropy_model.compress(y), bits\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sSZ0X2xPnkN-" + }, + "outputs": [], + "source": [ + "class MNISTDecompressor(tf.keras.Model):\n", + " \"\"\"Decompresses MNIST images from strings.\"\"\"\n", + "\n", + " def __init__(self, entropy_model, synthesis_transform):\n", + " super().__init__()\n", + " self.entropy_model = entropy_model\n", + " self.synthesis_transform = synthesis_transform\n", + "\n", + " def call(self, string):\n", + " y_hat = self.entropy_model.decompress(string, ())\n", + " x_hat = self.synthesis_transform(y_hat)\n", + " # Scale and cast back to 8-bit integer.\n", + " return tf.saturate_cast(tf.round(x_hat * 255.), tf.uint8)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GI7rxeOUDnaC" + }, + "source": [ + "When instantiated with `compression=True`, the entropy model converts the learned prior into tables for a range coding algorithm. When calling `compress()`, this algorithm is invoked to convert the latent space vector into bit sequences. The length of each binary string approximates the information content of the latent (the negative log likelihood of the latent under the prior).\n", + "\n", + "The entropy model for compression and decompression must be the same instance, because the range coding tables need to be exactly identical on both sides. Otherwise, decoding errors can occur." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Dnm_p7mbnigo" + }, + "outputs": [], + "source": [ + "def make_mnist_codec(trainer, **kwargs):\n", + " # The entropy model must be created with `compression=True` and the same\n", + " # instance must be shared between compressor and decompressor.\n", + " entropy_model = tfc.ContinuousBatchedEntropyModel(\n", + " trainer.prior, coding_rank=1, compression=True, **kwargs)\n", + " compressor = MNISTCompressor(trainer.analysis_transform, entropy_model)\n", + " decompressor = MNISTDecompressor(entropy_model, trainer.synthesis_transform)\n", + " return compressor, decompressor\n", + "\n", + "compressor, decompressor = make_mnist_codec(trainer)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SYu5sVVH3YMv" + }, + "source": [ + "Grab 16 images from the validation dataset. You can select a different subset by changing the argument to `skip`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qAxArlU728K5" + }, + "outputs": [], + "source": [ + "(originals, _), = validation_dataset.batch(16).skip(3).take(1)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CHeN_ny929YS" + }, + "source": [ + "Compress them to strings, and keep track of each of their information content in bits." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "smOk42gQ3IXv" + }, + "outputs": [], + "source": [ + "strings, entropies = compressor(originals)\n", + "\n", + "print(f\"String representation of first digit in hexadecimal: 0x{strings[0].numpy().hex()}\")\n", + "print(f\"Number of bits actually needed to represent it: {entropies[0]:0.2f}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5j9R4bTT3Qhl" + }, + "source": [ + "Decompress the images back from the strings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yOP6pEqU3P0w" + }, + "outputs": [], + "source": [ + "reconstructions = decompressor(strings)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JWo0Q-vy23tt" + }, + "source": [ + "Display each of the 16 original digits together with its compressed binary representation, and the reconstructed digit." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "jU5IqzZzeEpf" + }, + "outputs": [], + "source": [ + "#@title\n", + "\n", + "def display_digits(originals, strings, entropies, reconstructions):\n", + " \"\"\"Visualizes 16 digits together with their reconstructions.\"\"\"\n", + " fig, axes = plt.subplots(4, 4, sharex=True, sharey=True, figsize=(12.5, 5))\n", + " axes = axes.ravel()\n", + " for i in range(len(axes)):\n", + " image = tf.concat([\n", + " tf.squeeze(originals[i]),\n", + " tf.zeros((28, 14), tf.uint8),\n", + " tf.squeeze(reconstructions[i]),\n", + " ], 1)\n", + " axes[i].imshow(image)\n", + " axes[i].text(\n", + " .5, .5, f\"→ 0x{strings[i].numpy().hex()} →\\n{entropies[i]:0.2f} bits\",\n", + " ha=\"center\", va=\"top\", color=\"white\", fontsize=\"small\",\n", + " transform=axes[i].transAxes)\n", + " axes[i].axis(\"off\")\n", + " plt.subplots_adjust(wspace=0, hspace=0, left=0, right=1, bottom=0, top=1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "km9PqVEtPJPc" + }, + "outputs": [], + "source": [ + "display_digits(originals, strings, entropies, reconstructions)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EzlrIOiYOzJc" + }, + "source": [ + "Note that the length of the encoded string differs from the information content of each digit.\n", + "\n", + "This is because the range coding process works with discrete probabilities, and has a small amount of overhead. So, especially for short strings, the correspondence is only approximate. However, range coding is **asymptotically optimal**: in the limit, the expected bit count will approach the cross entropy (the expected information content), for which the rate term in the training model is an upper bound." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "78qIG8t8FvJW" + }, + "source": [ + "## The rate–distortion trade-off\n", + "\n", + "Above, the model was trained for a specific trade-off (given by `lmbda=2000`) between the average number of bits used to represent each digit and the incurred error in the reconstruction.\n", + "\n", + "What happens when we repeat the experiment with different values?\n", + "\n", + "Let's start by reducing $\\lambda$ to 500." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1iFcAD0WF78p" + }, + "outputs": [], + "source": [ + "def train_and_visualize_model(lmbda):\n", + " trainer = train_mnist_model(lmbda=lmbda)\n", + " compressor, decompressor = make_mnist_codec(trainer)\n", + " strings, entropies = compressor(originals)\n", + " reconstructions = decompressor(strings)\n", + " display_digits(originals, strings, entropies, reconstructions)\n", + "\n", + "train_and_visualize_model(lmbda=500)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Uy5OkgJMObMc" + }, + "source": [ + "The bit rate of our code goes down, as does the fidelity of the digits. However, most of the digits remain recognizable.\n", + "\n", + "Let's reduce $\\lambda$ further." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NQp9_9_5GcxH" + }, + "outputs": [], + "source": [ + "train_and_visualize_model(lmbda=300)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3ELLMANN1OwMQ" + }, + "source": [ + "The strings begin to get much shorter now, on the order of one byte per digit. However, this comes at a cost. More digits are becoming unrecognizable.\n", + "\n", + "This demonstrates that this model is agnostic to human perceptions of error, it just measures the absolute deviation in terms of pixel values. To achieve a better perceived image quality, we would need to replace the pixel loss with a perceptual loss." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "v9cWHtH0LP_r" + }, + "source": [ + "## Use the decoder as a generative model.\n", + "\n", + "If we feed the decoder random bits, this will effectively sample from the distribution that the model learned to represent digits.\n", + "\n", + "First, re-instantiate the compressor/decompressor without a sanity check that would detect if the input string isn't completely decoded." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qnic8YsM0_ke" + }, + "outputs": [], + "source": [ + "compressor, decompressor = make_mnist_codec(trainer, decode_sanity_check=False)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "86uc9_Is1eeo" + }, + "source": [ + "Now, feed long enough random strings into the decompressor so that it can decode/sample digits from them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "o4fP7BkqKCHY" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "strings = tf.constant([os.urandom(8) for _ in range(16)])\n", + "samples = decompressor(strings)\n", + "\n", + "fig, axes = plt.subplots(4, 4, sharex=True, sharey=True, figsize=(5, 5))\n", + "axes = axes.ravel()\n", + "for i in range(len(axes)):\n", + " axes[i].imshow(tf.squeeze(samples[i]))\n", + " axes[i].axis(\"off\")\n", + "plt.subplots_adjust(wspace=0, hspace=0, left=0, right=1, bottom=0, top=1)\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "data_compression.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/tutorials/generative/deepdream.ipynb b/site/en/tutorials/generative/deepdream.ipynb index e6d0d85fc17..e4a675ebed6 100644 --- a/site/en/tutorials/generative/deepdream.ipynb +++ b/site/en/tutorials/generative/deepdream.ipynb @@ -103,9 +103,7 @@ "import matplotlib as mpl\n", "\n", "import IPython.display as display\n", - "import PIL.Image\n", - "\n", - "from tensorflow.keras.preprocessing import image" + "import PIL.Image" ] }, { @@ -514,19 +512,20 @@ " @tf.function(\n", " input_signature=(\n", " tf.TensorSpec(shape=[None,None,3], dtype=tf.float32),\n", + " tf.TensorSpec(shape=[2], dtype=tf.int32),\n", " tf.TensorSpec(shape=[], dtype=tf.int32),)\n", " )\n", - " def __call__(self, img, tile_size=512):\n", + " def __call__(self, img, img_size, tile_size=512):\n", " shift, img_rolled = random_roll(img, tile_size)\n", "\n", " # Initialize the image gradients to zero.\n", " gradients = tf.zeros_like(img_rolled)\n", " \n", " # Skip the last tile, unless there's only one tile.\n", - " xs = tf.range(0, img_rolled.shape[0], tile_size)[:-1]\n", + " xs = tf.range(0, img_size[1], tile_size)[:-1]\n", " if not tf.cast(len(xs), bool):\n", " xs = tf.constant([0])\n", - " ys = tf.range(0, img_rolled.shape[1], tile_size)[:-1]\n", + " ys = tf.range(0, img_size[0], tile_size)[:-1]\n", " if not tf.cast(len(ys), bool):\n", " ys = tf.constant([0])\n", "\n", @@ -539,7 +538,7 @@ " tape.watch(img_rolled)\n", "\n", " # Extract a tile out of the image.\n", - " img_tile = img_rolled[x:x+tile_size, y:y+tile_size]\n", + " img_tile = img_rolled[y:y+tile_size, x:x+tile_size]\n", " loss = calc_loss(img_tile, self.model)\n", "\n", " # Update the image gradients for this tile.\n", @@ -585,7 +584,7 @@ "def run_deep_dream_with_octaves(img, steps_per_octave=100, step_size=0.01, \n", " octaves=range(-2,3), octave_scale=1.3):\n", " base_shape = tf.shape(img)\n", - " img = tf.keras.preprocessing.image.img_to_array(img)\n", + " img = tf.keras.utils.img_to_array(img)\n", " img = tf.keras.applications.inception_v3.preprocess_input(img)\n", "\n", " initial_shape = img.shape[:-1]\n", @@ -593,10 +592,11 @@ " for octave in octaves:\n", " # Scale the image based on the octave\n", " new_size = tf.cast(tf.convert_to_tensor(base_shape[:-1]), tf.float32)*(octave_scale**octave)\n", - " img = tf.image.resize(img, tf.cast(new_size, tf.int32))\n", + " new_size = tf.cast(new_size, tf.int32)\n", + " img = tf.image.resize(img, new_size)\n", "\n", " for step in range(steps_per_octave):\n", - " gradients = get_tiled_gradients(img)\n", + " gradients = get_tiled_gradients(img, new_size)\n", " img = img + gradients*step_size\n", " img = tf.clip_by_value(img, -1, 1)\n", "\n", diff --git a/site/en/tutorials/generative/pix2pix.ipynb b/site/en/tutorials/generative/pix2pix.ipynb index 82cf08dfab7..e380924d04d 100644 --- a/site/en/tutorials/generative/pix2pix.ipynb +++ b/site/en/tutorials/generative/pix2pix.ipynb @@ -72,13 +72,13 @@ "source": [ "This tutorial demonstrates how to build and train a conditional generative adversarial network (cGAN) called pix2pix that learns a mapping from input images to output images, as described in [Image-to-image translation with conditional adversarial networks](https://arxiv.org/abs/1611.07004) by Isola et al. (2017). pix2pix is not application specific—it can be applied to a wide range of tasks, including synthesizing photos from label maps, generating colorized photos from black and white images, turning Google Maps photos into aerial images, and even transforming sketches into photos.\n", "\n", - "In this example, your network will generate images of building facades using the [CMP Facade Database](http://cmp.felk.cvut.cz/~tylecr1/facade/) provided by the [Center for Machine Perception](http://cmp.felk.cvut.cz/) at the [Czech Technical University in Prague](https://www.cvut.cz/). To keep it short, you will use a [preprocessed copy]((https://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/datasets/)) of this dataset created by the pix2pix authors.\n", + "In this example, your network will generate images of building facades using the [CMP Facade Database](http://cmp.felk.cvut.cz/~tylecr1/facade/) provided by the [Center for Machine Perception](http://cmp.felk.cvut.cz/) at the [Czech Technical University in Prague](https://www.cvut.cz/). To keep it short, you will use a [preprocessed copy](https://efrosgans.eecs.berkeley.edu/pix2pix/datasets/) of this dataset created by the pix2pix authors.\n", "\n", "In the pix2pix cGAN, you condition on input images and generate corresponding output images. cGANs were first proposed in [Conditional Generative Adversarial Nets](https://arxiv.org/abs/1411.1784) (Mirza and Osindero, 2014)\n", "\n", "The architecture of your network will contain:\n", "\n", - "- A generator with a [U-Net]([U-Net](https://arxiv.org/abs/1505.04597))-based architecture.\n", + "- A generator with a [U-Net](https://arxiv.org/abs/1505.04597)-based architecture.\n", "- A discriminator represented by a convolutional PatchGAN classifier (proposed in the [pix2pix paper](https://arxiv.org/abs/1611.07004)).\n", "\n", "Note that each epoch can take around 15 seconds on a single V100 GPU.\n", @@ -125,7 +125,7 @@ "source": [ "## Load the dataset\n", "\n", - "Download the CMP Facade Database data (30MB). Additional datasets are available in the same format [here](http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/). In Colab you can select other datasets from the drop-down menu. Note that some of the other datasets are significantly larger (`edges2handbags` is 8GB). " + "Download the CMP Facade Database data (30MB). Additional datasets are available in the same format [here](http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/). In Colab you can select other datasets from the drop-down menu. Note that some of the other datasets are significantly larger (`edges2handbags` is 8GB in size). " ] }, { @@ -156,7 +156,9 @@ "\n", "path_to_zip = pathlib.Path(path_to_zip)\n", "\n", - "PATH = path_to_zip.parent/dataset_name" + "extraction_dir = f'{dataset_name}_extracted/{dataset_name}'\n", + "\n", + "PATH = path_to_zip.parent/extraction_dir" ] }, { @@ -226,7 +228,7 @@ "def load(image_file):\n", " # Read and decode an image file to a uint8 tensor\n", " image = tf.io.read_file(image_file)\n", - " image = tf.image.decode_jpeg(image)\n", + " image = tf.io.decode_jpeg(image)\n", "\n", " # Split each image tensor into two tensors:\n", " # - one with a real building facade image\n", @@ -280,7 +282,7 @@ "\n", "1. Resize each `256 x 256` image to a larger height and width—`286 x 286`.\n", "2. Randomly crop it back to `256 x 256`.\n", - "3. Randomly flip the image horizontally i.e. left to right (random mirroring).\n", + "3. Randomly flip the image horizontally i.e., left to right (random mirroring).\n", "4. Normalize the images to the `[-1, 1]` range." ] }, @@ -490,7 +492,7 @@ "source": [ "## Build the generator\n", "\n", - "The generator of your pix2pix cGAN is a _modified_ [U-Net](https://arxiv.org/abs/1505.04597). A U-Net consists of an encoder (downsampler) and decoder (upsampler). (You can find out more about it in the [Image segmentation](https://www.tensorflow.org/tutorials/images/segmentation) tutorial and on the [U-Net project website](https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/).)\n", + "The generator of your pix2pix cGAN is a _modified_ [U-Net](https://arxiv.org/abs/1505.04597). A U-Net consists of an encoder (downsampler) and decoder (upsampler). (You can find out more about it in the [Image segmentation](../images/segmentation.ipynb) tutorial and on the [U-Net project website](https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/).)\n", "\n", "- Each block in the encoder is: Convolution -> Batch normalization -> Leaky ReLU\n", "- Each block in the decoder is: Transposed convolution -> Batch normalization -> Dropout (applied to the first 3 blocks) -> ReLU\n", @@ -1007,8 +1009,7 @@ "id": "Rb0QQFHF-JfS" }, "source": [ - "Note: The `training=True` is intentional here since\n", - "you want the batch statistics, while running the model on the test dataset. If you use `training=False`, you get the accumulated statistics learned from the training dataset (which you don't want)." + "Note: The `training=True` is intentional here since you want the batch statistics, while running the model on the test dataset. If you use `training=False`, you get the accumulated statistics learned from the training dataset (which you don't want)." ] }, { @@ -1181,7 +1182,8 @@ "\n", "If you work on a local machine, you would launch a separate TensorBoard process. When working in a notebook, launch the viewer before starting the training to monitor with TensorBoard.\n", "\n", - "To launch the viewer paste the following into a code-cell:" + "Launch the TensorBoard viewer (Sorry, this doesn't\n", + "display on tensorflow.org):" ] }, { @@ -1199,72 +1201,30 @@ { "cell_type": "markdown", "metadata": { - "id": "Pe0-8Bzg22ox" - }, - "source": [ - "Finally, run the training loop:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "a1zZmKmvOH85" - }, - "outputs": [], - "source": [ - "fit(train_dataset, test_dataset, steps=40000)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "oeq9sByu86-B" - }, - "source": [ - "If you want to share the TensorBoard results _publicly_, you can upload the logs to [TensorBoard.dev](https://tensorboard.dev/) by copying the following into a code-cell.\n", - "\n", - "Note: This requires a Google account.\n", - "\n", - "```\n", - "!tensorboard dev upload --logdir {log_dir}\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "l-kT7WHRKz-E" + "id": "fyjixlMlBybN" }, "source": [ - "Caution: This command does not terminate. It's designed to continuously upload the results of long-running experiments. Once your data is uploaded you need to stop it using the \"interrupt execution\" option in your notebook tool." + "You can view the [results of a previous run](https://tensorboard.dev/experiment/lZ0C6FONROaUMfjYkVyJqw) of this notebook on [TensorBoard.dev](https://tensorboard.dev/)." ] }, { "cell_type": "markdown", "metadata": { - "id": "-lGhS_LfwQoL" + "id": "Pe0-8Bzg22ox" }, "source": [ - "You can view the [results of a previous run](https://tensorboard.dev/experiment/lZ0C6FONROaUMfjYkVyJqw) of this notebook on [TensorBoard.dev](https://tensorboard.dev/).\n", - "\n", - "TensorBoard.dev is a managed experience for hosting, tracking, and sharing ML experiments with everyone.\n", - "\n", - "It can also included inline using an `