diff --git a/.circleci/config.yml b/.circleci/config.yml index 7e497d755a14..19428b7bb9c2 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -31,6 +31,14 @@ jobs: parallelism: 1 steps: - checkout + - run: if [[ "$CIRCLE_PULL_REQUEST" == "" && "$CIRCLE_BRANCH" != "main" && "$CIRCLE_BRANCH" != *-release ]]; then echo "Not a PR, not the main branch and not a release branch, skip test!"; circleci-agent step halt; fi + - run: 'curl -L -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" https://api.github.com/repos/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME/pulls/${CIRCLE_PULL_REQUEST##*/} >> github.txt' + - run: cat github.txt + - run: (python3 -c 'import json; from datetime import datetime; fp = open("github.txt"); data = json.load(fp); fp.close(); f = "%Y-%m-%dT%H:%M:%SZ"; created = datetime.strptime(data["created_at"], f); updated = datetime.strptime(data["updated_at"], f); s = (updated - created).total_seconds(); print(int(s))' || true) > elapsed.txt + - run: if [ "$(cat elapsed.txt)" == "" ]; then echo 60 > elapsed.txt; fi + - run: cat elapsed.txt + - run: if [ "$(cat elapsed.txt)" -lt "30" ]; then echo "PR is just opened, wait some actions from GitHub"; sleep 30; fi + - run: 'if grep -q "\"draft\": true," github.txt; then echo "draft mode, skip test!"; circleci-agent step halt; fi' - run: uv pip install -U -e . - run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV" - run: mkdir -p test_preparation @@ -146,7 +154,7 @@ jobs: path: ~/transformers/installed.txt - run: python -c "from transformers import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1) - run: ruff check examples tests src utils - - run: ruff format tests src utils --check + - run: ruff format examples tests src utils --check - run: python utils/custom_init_isort.py --check_only - run: python utils/sort_auto_mappings.py --check_only - run: python utils/check_doc_toc.py @@ -170,8 +178,7 @@ jobs: - store_artifacts: path: ~/transformers/installed.txt - run: python utils/check_copies.py - - run: python utils/check_modular_conversion.py --num_workers 4 - - run: python utils/check_table.py + - run: python utils/check_modular_conversion.py - run: python utils/check_dummies.py - run: python utils/check_repo.py - run: python utils/check_inits.py @@ -181,7 +188,6 @@ jobs: - run: make deps_table_check_updated - run: python utils/update_metadata.py --check-only - run: python utils/check_docstrings.py - - run: python utils/check_support_list.py workflows: version: 2 diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py index 71c75dac2ff0..2e87b4c2e1a8 100644 --- a/.circleci/create_circleci_config.py +++ b/.circleci/create_circleci_config.py @@ -28,13 +28,30 @@ "TRANSFORMERS_IS_CI": True, "PYTEST_TIMEOUT": 120, "RUN_PIPELINE_TESTS": False, - "RUN_PT_TF_CROSS_TESTS": False, - "RUN_PT_FLAX_CROSS_TESTS": False, } # Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical -COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "vvv": None, "rsfE":None} +COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "vvv": None, "rsfE":None} DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.8.12"}] +# Strings that commonly appear in the output of flaky tests when they fail. These are used with `pytest-rerunfailures` +# to rerun the tests that match these patterns. +FLAKY_TEST_FAILURE_PATTERNS = [ + "OSError", # Machine/connection transient error + "Timeout", # Machine/connection transient error + "ConnectionError", # Connection transient error + "FileNotFoundError", # Raised by `datasets` on Hub failures + "PIL.UnidentifiedImageError", # Raised by `PIL.Image.open` on connection issues + "HTTPError", # Also catches HfHubHTTPError + "AssertionError: Tensor-likes are not close!", # `torch.testing.assert_close`, we might have unlucky random values + # TODO: error downloading tokenizer's `merged.txt` from hub can cause all the exceptions below. Throw and handle + # them under a single message. + "TypeError: expected str, bytes or os.PathLike object, not NoneType", + "TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType", + "Converting from Tiktoken failed", + "KeyError: > ' steps = [ "checkout", @@ -152,9 +171,10 @@ def to_dict(self): "command": f"TESTS=$(circleci tests split --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" if self.parallelism else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt" } }, + {"run": {"name": "fetch hub objects before pytest", "command": "python3 utils/fetch_hub_objects_for_ci.py"}}, {"run": { "name": "Run tests", - "command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {additional_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"} + "command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {junit_flags} {repeat_on_failure_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"} }, {"run": {"name": "Expand to show skipped tests", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}}, {"run": {"name": "Failed tests: show reasons", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}}, @@ -177,23 +197,6 @@ def job_name(self): # JOBS -torch_and_tf_job = CircleCIJob( - "torch_and_tf", - docker_image=[{"image":"huggingface/transformers-torch-tf-light"}], - additional_env={"RUN_PT_TF_CROSS_TESTS": True}, - marker="is_pt_tf_cross_test", - pytest_options={"rA": None, "durations": 0}, -) - - -torch_and_flax_job = CircleCIJob( - "torch_and_flax", - additional_env={"RUN_PT_FLAX_CROSS_TESTS": True}, - docker_image=[{"image":"huggingface/transformers-torch-jax-light"}], - marker="is_pt_flax_cross_test", - pytest_options={"rA": None, "durations": 0}, -) - torch_job = CircleCIJob( "torch", docker_image=[{"image": "huggingface/transformers-torch-light"}], @@ -204,6 +207,9 @@ def job_name(self): generate_job = CircleCIJob( "generate", docker_image=[{"image": "huggingface/transformers-torch-light"}], + # networkx==3.3 (after #36957) cause some issues + # TODO: remove this once it works directly + install_steps=["uv venv && uv pip install . && uv pip install networkx==3.2.1"], marker="generate", parallelism=6, ) @@ -267,6 +273,7 @@ def job_name(self): docker_image=[{"image":"huggingface/transformers-examples-torch"}], # TODO @ArthurZucker remove this once docker is easier to build install_steps=["uv venv && uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"], + pytest_num_workers=4, ) @@ -274,6 +281,7 @@ def job_name(self): "examples_tensorflow", additional_env={"OMP_NUM_THREADS": 8}, docker_image=[{"image":"huggingface/transformers-examples-tf"}], + pytest_num_workers=2, ) @@ -324,6 +332,9 @@ def job_name(self): non_model_job = CircleCIJob( "non_model", docker_image=[{"image": "huggingface/transformers-torch-light"}], + # networkx==3.3 (after #36957) cause some issues + # TODO: remove this once it works directly + install_steps=["uv venv && uv pip install . && uv pip install networkx==3.2.1"], marker="not generate", parallelism=6, ) @@ -353,9 +364,9 @@ def job_name(self): pytest_num_workers=1, ) -REGULAR_TESTS = [torch_and_tf_job, torch_and_flax_job, torch_job, tf_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip -EXAMPLES_TESTS = [examples_torch_job, examples_tensorflow_job] -PIPELINE_TESTS = [pipelines_torch_job, pipelines_tf_job] +REGULAR_TESTS = [torch_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip +EXAMPLES_TESTS = [examples_torch_job] +PIPELINE_TESTS = [pipelines_torch_job] REPO_UTIL_TESTS = [repo_utils_job] DOC_TESTS = [doc_test_job] ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job] # fmt: skip diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index 9b2c00bac50d..6d5eca4f1ec8 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -38,21 +38,21 @@ body: - text models: @ArthurZucker - vision models: @amyeroberts, @qubvel - - speech models: @ylacombe, @eustlb + - speech models: @eustlb - graph models: @clefourrier Library: - - flax: @sanchit-gandhi + - flax: @gante and @Rocketknight1 - generate: @zucchini-nlp (visual-language models) or @gante (all others) - pipelines: @Rocketknight1 - tensorflow: @gante and @Rocketknight1 - tokenizers: @ArthurZucker and @itazap - - trainer: @muellerzr @SunMarc + - trainer: @zach-huggingface @SunMarc Integrations: - - deepspeed: HF Trainer/Accelerate: @muellerzr + - deepspeed: HF Trainer/Accelerate: @SunMarc @zach-huggingface - ray/raytune: @richardliaw, @amogkam - Big Model Inference: @SunMarc - quantization (bitsandbytes, autogpt): @SunMarc @MekkCyber @@ -72,7 +72,7 @@ body: Maintained examples (not research project or legacy): - - Flax: @sanchit-gandhi + - Flax: @Rocketknight1 - PyTorch: See Models above and tag the person corresponding to the modality of the example. - TensorFlow: @Rocketknight1 diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index ee7a7eaae113..439ab02ebc92 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -41,22 +41,22 @@ Models: - text models: @ArthurZucker - vision models: @amyeroberts, @qubvel -- speech models: @ylacombe, @eustlb +- speech models: @eustlb - graph models: @clefourrier Library: -- flax: @sanchit-gandhi +- flax: @gante and @Rocketknight1 - generate: @zucchini-nlp (visual-language models) or @gante (all others) - pipelines: @Rocketknight1 - tensorflow: @gante and @Rocketknight1 - tokenizers: @ArthurZucker -- trainer: @muellerzr and @SunMarc +- trainer: @zach-huggingface and @SunMarc - chat templates: @Rocketknight1 Integrations: -- deepspeed: HF Trainer/Accelerate: @muellerzr +- deepspeed: HF Trainer/Accelerate: @SunMarc @zach-huggingface - ray/raytune: @richardliaw, @amogkam - Big Model Inference: @SunMarc - quantization (bitsandbytes, autogpt): @SunMarc @MekkCyber @@ -72,7 +72,7 @@ HF projects: Maintained examples (not research project or legacy): -- Flax: @sanchit-gandhi +- Flax: @Rocketknight1 - PyTorch: See Models above and tag the person corresponding to the modality of the example. - TensorFlow: @Rocketknight1 diff --git a/.github/scripts/assign_reviewers.py b/.github/scripts/assign_reviewers.py new file mode 100644 index 000000000000..548ea3cb49b0 --- /dev/null +++ b/.github/scripts/assign_reviewers.py @@ -0,0 +1,102 @@ +# coding=utf-8 +# Copyright 2025 the HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import github +import json +from github import Github +import re +from collections import Counter +from pathlib import Path + +def pattern_to_regex(pattern): + if pattern.startswith("/"): + start_anchor = True + pattern = re.escape(pattern[1:]) + else: + start_anchor = False + pattern = re.escape(pattern) + # Replace `*` with "any number of non-slash characters" + pattern = pattern.replace(r"\*", "[^/]*") + if start_anchor: + pattern = r"^\/?" + pattern # Allow an optional leading slash after the start of the string + return pattern + +def get_file_owners(file_path, codeowners_lines): + # Process lines in reverse (last matching pattern takes precedence) + for line in reversed(codeowners_lines): + # Skip comments and empty lines, strip inline comments + line = line.split('#')[0].strip() + if not line: + continue + + # Split into pattern and owners + parts = line.split() + pattern = parts[0] + # Can be empty, e.g. for dummy files with explicitly no owner! + owners = [owner.removeprefix("@") for owner in parts[1:]] + + # Check if file matches pattern + file_regex = pattern_to_regex(pattern) + if re.search(file_regex, file_path) is not None: + return owners # Remember, can still be empty! + return [] # Should never happen, but just in case + +def main(): + script_dir = Path(__file__).parent.absolute() + with open(script_dir / "codeowners_for_review_action") as f: + codeowners_lines = f.readlines() + + g = Github(os.environ['GITHUB_TOKEN']) + repo = g.get_repo("huggingface/transformers") + with open(os.environ['GITHUB_EVENT_PATH']) as f: + event = json.load(f) + + # The PR number is available in the event payload + pr_number = event['pull_request']['number'] + pr = repo.get_pull(pr_number) + pr_author = pr.user.login + + existing_reviews = list(pr.get_reviews()) + if existing_reviews: + print(f"Already has reviews: {[r.user.login for r in existing_reviews]}") + return + + users_requested, teams_requested = pr.get_review_requests() + users_requested = list(users_requested) + if users_requested: + print(f"Reviewers already requested: {users_requested}") + return + + locs_per_owner = Counter() + for file in pr.get_files(): + owners = get_file_owners(file.filename, codeowners_lines) + for owner in owners: + locs_per_owner[owner] += file.changes + + # Assign the top 2 based on locs changed as reviewers, but skip the owner if present + locs_per_owner.pop(pr_author, None) + top_owners = locs_per_owner.most_common(2) + print("Top owners", top_owners) + top_owners = [owner[0] for owner in top_owners] + try: + pr.create_review_request(top_owners) + except github.GithubException as e: + print(f"Failed to request review for {top_owners}: {e}") + + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/codeowners_for_review_action b/.github/scripts/codeowners_for_review_action new file mode 100644 index 000000000000..7325b0f570cc --- /dev/null +++ b/.github/scripts/codeowners_for_review_action @@ -0,0 +1,370 @@ +# Top-level rules are matched only if nothing else matches +* @Rocketknight1 @ArthurZucker # if no one is pinged based on the other rules, he will do the dispatch +*.md @stevhliu +*tokenization* @ArthurZucker +docs/ @stevhliu +/benchmark/ @McPatate +/docker/ @ydshieh @ArthurZucker + +# More high-level globs catch cases when specific rules later don't apply +/src/transformers/models/*/processing* @molbap @yonigozlan @qubvel +/src/transformers/models/*/image_processing* @qubvel +/src/transformers/models/*/image_processing_*_fast* @yonigozlan + +# Owners of subsections of the library +/src/transformers/generation/ @gante +/src/transformers/pipeline/ @Rocketknight1 @yonigozlan +/src/transformers/integrations/ @SunMarc @MekkCyber @zach-huggingface +/src/transformers/quantizers/ @SunMarc @MekkCyber +tests/ @ydshieh +tests/generation/ @gante + +/src/transformers/models/auto/ @ArthurZucker +/src/transformers/utils/ @ArthurZucker @Rocketknight1 +/src/transformers/loss/ @ArthurZucker +/src/transformers/onnx/ @michaelbenayoun + +# Specific files come after the sections/globs, so they take priority +/.circleci/config.yml @ArthurZucker @ydshieh +/utils/tests_fetcher.py @ydshieh +trainer.py @zach-huggingface @SunMarc +trainer_utils.py @zach-huggingface @SunMarc +/utils/modular_model_converter.py @Cyrilvallez @ArthurZucker + +# Owners of individual models are specific / high priority, and so they come last +# mod* captures modeling and modular files + +# Text models +/src/transformers/models/albert/mod*_albert* @ArthurZucker +/src/transformers/models/bamba/mod*_bamba* @ArthurZucker +/src/transformers/models/bart/mod*_bart* @ArthurZucker +/src/transformers/models/barthez/mod*_barthez* @ArthurZucker +/src/transformers/models/bartpho/mod*_bartpho* @ArthurZucker +/src/transformers/models/bert/mod*_bert* @ArthurZucker +/src/transformers/models/bert_generation/mod*_bert_generation* @ArthurZucker +/src/transformers/models/bert_japanese/mod*_bert_japanese* @ArthurZucker +/src/transformers/models/bertweet/mod*_bertweet* @ArthurZucker +/src/transformers/models/big_bird/mod*_big_bird* @ArthurZucker +/src/transformers/models/bigbird_pegasus/mod*_bigbird_pegasus* @ArthurZucker +/src/transformers/models/biogpt/mod*_biogpt* @ArthurZucker +/src/transformers/models/blenderbot/mod*_blenderbot* @ArthurZucker +/src/transformers/models/blenderbot_small/mod*_blenderbot_small* @ArthurZucker +/src/transformers/models/bloom/mod*_bloom* @ArthurZucker +/src/transformers/models/bort/mod*_bort* @ArthurZucker +/src/transformers/models/byt5/mod*_byt5* @ArthurZucker +/src/transformers/models/camembert/mod*_camembert* @ArthurZucker +/src/transformers/models/canine/mod*_canine* @ArthurZucker +/src/transformers/models/codegen/mod*_codegen* @ArthurZucker +/src/transformers/models/code_llama/mod*_code_llama* @ArthurZucker +/src/transformers/models/cohere/mod*_cohere* @ArthurZucker +/src/transformers/models/cohere2/mod*_cohere2* @ArthurZucker +/src/transformers/models/convbert/mod*_convbert* @ArthurZucker +/src/transformers/models/cpm/mod*_cpm* @ArthurZucker +/src/transformers/models/cpmant/mod*_cpmant* @ArthurZucker +/src/transformers/models/ctrl/mod*_ctrl* @ArthurZucker +/src/transformers/models/dbrx/mod*_dbrx* @ArthurZucker +/src/transformers/models/deberta/mod*_deberta* @ArthurZucker +/src/transformers/models/deberta_v2/mod*_deberta_v2* @ArthurZucker +/src/transformers/models/dialogpt/mod*_dialogpt* @ArthurZucker +/src/transformers/models/diffllama/mod*_diffllama* @ArthurZucker +/src/transformers/models/distilbert/mod*_distilbert* @ArthurZucker +/src/transformers/models/dpr/mod*_dpr* @ArthurZucker +/src/transformers/models/electra/mod*_electra* @ArthurZucker +/src/transformers/models/encoder_decoder/mod*_encoder_decoder* @ArthurZucker +/src/transformers/models/ernie/mod*_ernie* @ArthurZucker +/src/transformers/models/ernie_m/mod*_ernie_m* @ArthurZucker +/src/transformers/models/esm/mod*_esm* @ArthurZucker +/src/transformers/models/falcon/mod*_falcon* @ArthurZucker +/src/transformers/models/falcon3/mod*_falcon3* @ArthurZucker +/src/transformers/models/falcon_mamba/mod*_falcon_mamba* @ArthurZucker +/src/transformers/models/fastspeech2_conformer/mod*_fastspeech2_conformer* @ArthurZucker +/src/transformers/models/flan_t5/mod*_flan_t5* @ArthurZucker +/src/transformers/models/flan_ul2/mod*_flan_ul2* @ArthurZucker +/src/transformers/models/flaubert/mod*_flaubert* @ArthurZucker +/src/transformers/models/fnet/mod*_fnet* @ArthurZucker +/src/transformers/models/fsmt/mod*_fsmt* @ArthurZucker +/src/transformers/models/funnel/mod*_funnel* @ArthurZucker +/src/transformers/models/fuyu/mod*_fuyu* @ArthurZucker +/src/transformers/models/gemma/mod*_gemma* @ArthurZucker +/src/transformers/models/gemma2/mod*_gemma2* @ArthurZucker +/src/transformers/models/glm/mod*_glm* @ArthurZucker +/src/transformers/models/openai_gpt/mod*_openai_gpt* @ArthurZucker +/src/transformers/models/gpt_neo/mod*_gpt_neo* @ArthurZucker +/src/transformers/models/gpt_neox/mod*_gpt_neox* @ArthurZucker +/src/transformers/models/gpt_neox_japanese/mod*_gpt_neox_japanese* @ArthurZucker +/src/transformers/models/gptj/mod*_gptj* @ArthurZucker +/src/transformers/models/gpt2/mod*_gpt2* @ArthurZucker +/src/transformers/models/gpt_bigcode/mod*_gpt_bigcode* @ArthurZucker +/src/transformers/models/gptsan_japanese/mod*_gptsan_japanese* @ArthurZucker +/src/transformers/models/gpt_sw3/mod*_gpt_sw3* @ArthurZucker +/src/transformers/models/granite/mod*_granite* @ArthurZucker +/src/transformers/models/granitemoe/mod*_granitemoe* @ArthurZucker +/src/transformers/models/herbert/mod*_herbert* @ArthurZucker +/src/transformers/models/ibert/mod*_ibert* @ArthurZucker +/src/transformers/models/jamba/mod*_jamba* @ArthurZucker +/src/transformers/models/jetmoe/mod*_jetmoe* @ArthurZucker +/src/transformers/models/jukebox/mod*_jukebox* @ArthurZucker +/src/transformers/models/led/mod*_led* @ArthurZucker +/src/transformers/models/llama/mod*_llama* @ArthurZucker @Cyrilvallez +/src/transformers/models/longformer/mod*_longformer* @ArthurZucker +/src/transformers/models/longt5/mod*_longt5* @ArthurZucker +/src/transformers/models/luke/mod*_luke* @ArthurZucker +/src/transformers/models/m2m_100/mod*_m2m_100* @ArthurZucker +/src/transformers/models/madlad_400/mod*_madlad_400* @ArthurZucker +/src/transformers/models/mamba/mod*_mamba* @ArthurZucker +/src/transformers/models/mamba2/mod*_mamba2* @ArthurZucker +/src/transformers/models/marian/mod*_marian* @ArthurZucker +/src/transformers/models/markuplm/mod*_markuplm* @ArthurZucker +/src/transformers/models/mbart/mod*_mbart* @ArthurZucker +/src/transformers/models/mega/mod*_mega* @ArthurZucker +/src/transformers/models/megatron_bert/mod*_megatron_bert* @ArthurZucker +/src/transformers/models/megatron_gpt2/mod*_megatron_gpt2* @ArthurZucker +/src/transformers/models/mistral/mod*_mistral* @ArthurZucker +/src/transformers/models/mixtral/mod*_mixtral* @ArthurZucker +/src/transformers/models/mluke/mod*_mluke* @ArthurZucker +/src/transformers/models/mobilebert/mod*_mobilebert* @ArthurZucker +/src/transformers/models/modernbert/mod*_modernbert* @ArthurZucker +/src/transformers/models/mpnet/mod*_mpnet* @ArthurZucker +/src/transformers/models/mpt/mod*_mpt* @ArthurZucker +/src/transformers/models/mra/mod*_mra* @ArthurZucker +/src/transformers/models/mt5/mod*_mt5* @ArthurZucker +/src/transformers/models/mvp/mod*_mvp* @ArthurZucker +/src/transformers/models/myt5/mod*_myt5* @ArthurZucker +/src/transformers/models/nemotron/mod*_nemotron* @ArthurZucker +/src/transformers/models/nezha/mod*_nezha* @ArthurZucker +/src/transformers/models/nllb/mod*_nllb* @ArthurZucker +/src/transformers/models/nllb_moe/mod*_nllb_moe* @ArthurZucker +/src/transformers/models/nystromformer/mod*_nystromformer* @ArthurZucker +/src/transformers/models/olmo/mod*_olmo* @ArthurZucker +/src/transformers/models/olmo2/mod*_olmo2* @ArthurZucker +/src/transformers/models/olmoe/mod*_olmoe* @ArthurZucker +/src/transformers/models/open_llama/mod*_open_llama* @ArthurZucker +/src/transformers/models/opt/mod*_opt* @ArthurZucker +/src/transformers/models/pegasus/mod*_pegasus* @ArthurZucker +/src/transformers/models/pegasus_x/mod*_pegasus_x* @ArthurZucker +/src/transformers/models/persimmon/mod*_persimmon* @ArthurZucker +/src/transformers/models/phi/mod*_phi* @ArthurZucker +/src/transformers/models/phi3/mod*_phi3* @ArthurZucker +/src/transformers/models/phimoe/mod*_phimoe* @ArthurZucker +/src/transformers/models/phobert/mod*_phobert* @ArthurZucker +/src/transformers/models/plbart/mod*_plbart* @ArthurZucker +/src/transformers/models/prophetnet/mod*_prophetnet* @ArthurZucker +/src/transformers/models/qdqbert/mod*_qdqbert* @ArthurZucker +/src/transformers/models/qwen2/mod*_qwen2* @ArthurZucker +/src/transformers/models/qwen2_moe/mod*_qwen2_moe* @ArthurZucker +/src/transformers/models/rag/mod*_rag* @ArthurZucker +/src/transformers/models/realm/mod*_realm* @ArthurZucker +/src/transformers/models/recurrent_gemma/mod*_recurrent_gemma* @ArthurZucker +/src/transformers/models/reformer/mod*_reformer* @ArthurZucker +/src/transformers/models/rembert/mod*_rembert* @ArthurZucker +/src/transformers/models/retribert/mod*_retribert* @ArthurZucker +/src/transformers/models/roberta/mod*_roberta* @ArthurZucker +/src/transformers/models/roberta_prelayernorm/mod*_roberta_prelayernorm* @ArthurZucker +/src/transformers/models/roc_bert/mod*_roc_bert* @ArthurZucker +/src/transformers/models/roformer/mod*_roformer* @ArthurZucker +/src/transformers/models/rwkv/mod*_rwkv* @ArthurZucker +/src/transformers/models/splinter/mod*_splinter* @ArthurZucker +/src/transformers/models/squeezebert/mod*_squeezebert* @ArthurZucker +/src/transformers/models/stablelm/mod*_stablelm* @ArthurZucker +/src/transformers/models/starcoder2/mod*_starcoder2* @ArthurZucker +/src/transformers/models/switch_transformers/mod*_switch_transformers* @ArthurZucker +/src/transformers/models/t5/mod*_t5* @ArthurZucker +/src/transformers/models/t5v1.1/mod*_t5v1.1* @ArthurZucker +/src/transformers/models/tapex/mod*_tapex* @ArthurZucker +/src/transformers/models/transfo_xl/mod*_transfo_xl* @ArthurZucker +/src/transformers/models/ul2/mod*_ul2* @ArthurZucker +/src/transformers/models/umt5/mod*_umt5* @ArthurZucker +/src/transformers/models/xmod/mod*_xmod* @ArthurZucker +/src/transformers/models/xglm/mod*_xglm* @ArthurZucker +/src/transformers/models/xlm/mod*_xlm* @ArthurZucker +/src/transformers/models/xlm_prophetnet/mod*_xlm_prophetnet* @ArthurZucker +/src/transformers/models/xlm_roberta/mod*_xlm_roberta* @ArthurZucker +/src/transformers/models/xlm_roberta_xl/mod*_xlm_roberta_xl* @ArthurZucker +/src/transformers/models/xlm_v/mod*_xlm_v* @ArthurZucker +/src/transformers/models/xlnet/mod*_xlnet* @ArthurZucker +/src/transformers/models/yoso/mod*_yoso* @ArthurZucker +/src/transformers/models/zamba/mod*_zamba* @ArthurZucker + +# Vision models +/src/transformers/models/beit/mod*_beit* @amyeroberts @qubvel +/src/transformers/models/bit/mod*_bit* @amyeroberts @qubvel +/src/transformers/models/conditional_detr/mod*_conditional_detr* @amyeroberts @qubvel +/src/transformers/models/convnext/mod*_convnext* @amyeroberts @qubvel +/src/transformers/models/convnextv2/mod*_convnextv2* @amyeroberts @qubvel +/src/transformers/models/cvt/mod*_cvt* @amyeroberts @qubvel +/src/transformers/models/deformable_detr/mod*_deformable_detr* @amyeroberts @qubvel +/src/transformers/models/deit/mod*_deit* @amyeroberts @qubvel +/src/transformers/models/depth_anything/mod*_depth_anything* @amyeroberts @qubvel +/src/transformers/models/depth_anything_v2/mod*_depth_anything_v2* @amyeroberts @qubvel +/src/transformers/models/deta/mod*_deta* @amyeroberts @qubvel +/src/transformers/models/detr/mod*_detr* @amyeroberts @qubvel +/src/transformers/models/dinat/mod*_dinat* @amyeroberts @qubvel +/src/transformers/models/dinov2/mod*_dinov2* @amyeroberts @qubvel +/src/transformers/models/dinov2_with_registers/mod*_dinov2_with_registers* @amyeroberts @qubvel +/src/transformers/models/dit/mod*_dit* @amyeroberts @qubvel +/src/transformers/models/dpt/mod*_dpt* @amyeroberts @qubvel +/src/transformers/models/efficientformer/mod*_efficientformer* @amyeroberts @qubvel +/src/transformers/models/efficientnet/mod*_efficientnet* @amyeroberts @qubvel +/src/transformers/models/focalnet/mod*_focalnet* @amyeroberts @qubvel +/src/transformers/models/glpn/mod*_glpn* @amyeroberts @qubvel +/src/transformers/models/hiera/mod*_hiera* @amyeroberts @qubvel +/src/transformers/models/ijepa/mod*_ijepa* @amyeroberts @qubvel +/src/transformers/models/imagegpt/mod*_imagegpt* @amyeroberts @qubvel +/src/transformers/models/levit/mod*_levit* @amyeroberts @qubvel +/src/transformers/models/mask2former/mod*_mask2former* @amyeroberts @qubvel +/src/transformers/models/maskformer/mod*_maskformer* @amyeroberts @qubvel +/src/transformers/models/mobilenet_v1/mod*_mobilenet_v1* @amyeroberts @qubvel +/src/transformers/models/mobilenet_v2/mod*_mobilenet_v2* @amyeroberts @qubvel +/src/transformers/models/mobilevit/mod*_mobilevit* @amyeroberts @qubvel +/src/transformers/models/mobilevitv2/mod*_mobilevitv2* @amyeroberts @qubvel +/src/transformers/models/nat/mod*_nat* @amyeroberts @qubvel +/src/transformers/models/poolformer/mod*_poolformer* @amyeroberts @qubvel +/src/transformers/models/pvt/mod*_pvt* @amyeroberts @qubvel +/src/transformers/models/pvt_v2/mod*_pvt_v2* @amyeroberts @qubvel +/src/transformers/models/regnet/mod*_regnet* @amyeroberts @qubvel +/src/transformers/models/resnet/mod*_resnet* @amyeroberts @qubvel +/src/transformers/models/rt_detr/mod*_rt_detr* @amyeroberts @qubvel +/src/transformers/models/segformer/mod*_segformer* @amyeroberts @qubvel +/src/transformers/models/seggpt/mod*_seggpt* @amyeroberts @qubvel +/src/transformers/models/superpoint/mod*_superpoint* @amyeroberts @qubvel +/src/transformers/models/swiftformer/mod*_swiftformer* @amyeroberts @qubvel +/src/transformers/models/swin/mod*_swin* @amyeroberts @qubvel +/src/transformers/models/swinv2/mod*_swinv2* @amyeroberts @qubvel +/src/transformers/models/swin2sr/mod*_swin2sr* @amyeroberts @qubvel +/src/transformers/models/table_transformer/mod*_table_transformer* @amyeroberts @qubvel +/src/transformers/models/textnet/mod*_textnet* @amyeroberts @qubvel +/src/transformers/models/timm_wrapper/mod*_timm_wrapper* @amyeroberts @qubvel +/src/transformers/models/upernet/mod*_upernet* @amyeroberts @qubvel +/src/transformers/models/van/mod*_van* @amyeroberts @qubvel +/src/transformers/models/vit/mod*_vit* @amyeroberts @qubvel +/src/transformers/models/vit_hybrid/mod*_vit_hybrid* @amyeroberts @qubvel +/src/transformers/models/vitdet/mod*_vitdet* @amyeroberts @qubvel +/src/transformers/models/vit_mae/mod*_vit_mae* @amyeroberts @qubvel +/src/transformers/models/vitmatte/mod*_vitmatte* @amyeroberts @qubvel +/src/transformers/models/vit_msn/mod*_vit_msn* @amyeroberts @qubvel +/src/transformers/models/vitpose/mod*_vitpose* @amyeroberts @qubvel +/src/transformers/models/yolos/mod*_yolos* @amyeroberts @qubvel +/src/transformers/models/zoedepth/mod*_zoedepth* @amyeroberts @qubvel + +# Audio models +/src/transformers/models/audio_spectrogram_transformer/mod*_audio_spectrogram_transformer* @eustlb +/src/transformers/models/bark/mod*_bark* @eustlb +/src/transformers/models/clap/mod*_clap* @eustlb +/src/transformers/models/dac/mod*_dac* @eustlb +/src/transformers/models/encodec/mod*_encodec* @eustlb +/src/transformers/models/hubert/mod*_hubert* @eustlb +/src/transformers/models/mctct/mod*_mctct* @eustlb +/src/transformers/models/mimi/mod*_mimi* @eustlb +/src/transformers/models/mms/mod*_mms* @eustlb +/src/transformers/models/moshi/mod*_moshi* @eustlb +/src/transformers/models/musicgen/mod*_musicgen* @eustlb +/src/transformers/models/musicgen_melody/mod*_musicgen_melody* @eustlb +/src/transformers/models/pop2piano/mod*_pop2piano* @eustlb +/src/transformers/models/seamless_m4t/mod*_seamless_m4t* @eustlb +/src/transformers/models/seamless_m4t_v2/mod*_seamless_m4t_v2* @eustlb +/src/transformers/models/sew/mod*_sew* @eustlb +/src/transformers/models/sew_d/mod*_sew_d* @eustlb +/src/transformers/models/speech_to_text/mod*_speech_to_text* @eustlb +/src/transformers/models/speech_to_text_2/mod*_speech_to_text_2* @eustlb +/src/transformers/models/speecht5/mod*_speecht5* @eustlb +/src/transformers/models/unispeech/mod*_unispeech* @eustlb +/src/transformers/models/unispeech_sat/mod*_unispeech_sat* @eustlb +/src/transformers/models/univnet/mod*_univnet* @eustlb +/src/transformers/models/vits/mod*_vits* @eustlb +/src/transformers/models/wav2vec2/mod*_wav2vec2* @eustlb +/src/transformers/models/wav2vec2_bert/mod*_wav2vec2_bert* @eustlb +/src/transformers/models/wav2vec2_conformer/mod*_wav2vec2_conformer* @eustlb +/src/transformers/models/wav2vec2_phoneme/mod*_wav2vec2_phoneme* @eustlb +/src/transformers/models/wavlm/mod*_wavlm* @eustlb +/src/transformers/models/whisper/mod*_whisper* @eustlb +/src/transformers/models/xls_r/mod*_xls_r* @eustlb +/src/transformers/models/xlsr_wav2vec2/mod*_xlsr_wav2vec2* @eustlb + +# Video models +/src/transformers/models/timesformer/mod*_timesformer* @Rocketknight1 +/src/transformers/models/videomae/mod*_videomae* @Rocketknight1 +/src/transformers/models/vivit/mod*_vivit* @Rocketknight1 + +# Multimodal models +/src/transformers/models/align/mod*_align* @zucchini-nlp +/src/transformers/models/altclip/mod*_altclip* @zucchini-nlp +/src/transformers/models/aria/mod*_aria* @zucchini-nlp +/src/transformers/models/blip/mod*_blip* @zucchini-nlp +/src/transformers/models/blip_2/mod*_blip_2* @zucchini-nlp +/src/transformers/models/bridgetower/mod*_bridgetower* @zucchini-nlp +/src/transformers/models/bros/mod*_bros* @zucchini-nlp +/src/transformers/models/chameleon/mod*_chameleon* @zucchini-nlp +/src/transformers/models/chinese_clip/mod*_chinese_clip* @zucchini-nlp +/src/transformers/models/clip/mod*_clip* @zucchini-nlp +/src/transformers/models/clipseg/mod*_clipseg* @zucchini-nlp +/src/transformers/models/clvp/mod*_clvp* @zucchini-nlp +/src/transformers/models/colpali/mod*_colpali* @zucchini-nlp @yonigozlan +/src/transformers/models/data2vec/mod*_data2vec* @zucchini-nlp +/src/transformers/models/deplot/mod*_deplot* @zucchini-nlp +/src/transformers/models/donut/mod*_donut* @zucchini-nlp +/src/transformers/models/flava/mod*_flava* @zucchini-nlp +/src/transformers/models/git/mod*_git* @zucchini-nlp +/src/transformers/models/grounding_dino/mod*_grounding_dino* @qubvel +/src/transformers/models/groupvit/mod*_groupvit* @zucchini-nlp +/src/transformers/models/idefics/mod*_idefics* @zucchini-nlp +/src/transformers/models/idefics2/mod*_idefics2* @zucchini-nlp +/src/transformers/models/idefics3/mod*_idefics3* @zucchini-nlp +/src/transformers/models/instructblip/mod*_instructblip* @zucchini-nlp +/src/transformers/models/instructblipvideo/mod*_instructblipvideo* @zucchini-nlp +/src/transformers/models/kosmos_2/mod*_kosmos_2* @zucchini-nlp +/src/transformers/models/layoutlm/mod*_layoutlm* @NielsRogge +/src/transformers/models/layoutlmv2/mod*_layoutlmv2* @NielsRogge +/src/transformers/models/layoutlmv3/mod*_layoutlmv3* @NielsRogge +/src/transformers/models/layoutxlm/mod*_layoutxlm* @NielsRogge +/src/transformers/models/lilt/mod*_lilt* @zucchini-nlp +/src/transformers/models/llava/mod*_llava* @zucchini-nlp @arthurzucker +/src/transformers/models/llava_next/mod*_llava_next* @zucchini-nlp +/src/transformers/models/llava_next_video/mod*_llava_next_video* @zucchini-nlp +/src/transformers/models/llava_onevision/mod*_llava_onevision* @zucchini-nlp +/src/transformers/models/lxmert/mod*_lxmert* @zucchini-nlp +/src/transformers/models/matcha/mod*_matcha* @zucchini-nlp +/src/transformers/models/mgp_str/mod*_mgp_str* @zucchini-nlp +/src/transformers/models/mllama/mod*_mllama* @zucchini-nlp +/src/transformers/models/nougat/mod*_nougat* @NielsRogge +/src/transformers/models/omdet_turbo/mod*_omdet_turbo* @qubvel @yonigozlan +/src/transformers/models/oneformer/mod*_oneformer* @zucchini-nlp +/src/transformers/models/owlvit/mod*_owlvit* @qubvel +/src/transformers/models/owlv2/mod*_owlv2* @qubvel +/src/transformers/models/paligemma/mod*_paligemma* @zucchini-nlp @molbap +/src/transformers/models/perceiver/mod*_perceiver* @zucchini-nlp +/src/transformers/models/pix2struct/mod*_pix2struct* @zucchini-nlp +/src/transformers/models/pixtral/mod*_pixtral* @zucchini-nlp @ArthurZucker +/src/transformers/models/qwen2_audio/mod*_qwen2_audio* @zucchini-nlp @ArthurZucker +/src/transformers/models/qwen2_vl/mod*_qwen2_vl* @zucchini-nlp @ArthurZucker +/src/transformers/models/sam/mod*_sam* @zucchini-nlp @ArthurZucker +/src/transformers/models/siglip/mod*_siglip* @zucchini-nlp +/src/transformers/models/speech_encoder_decoder/mod*_speech_encoder_decoder* @zucchini-nlp +/src/transformers/models/tapas/mod*_tapas* @NielsRogge +/src/transformers/models/trocr/mod*_trocr* @zucchini-nlp +/src/transformers/models/tvlt/mod*_tvlt* @zucchini-nlp +/src/transformers/models/tvp/mod*_tvp* @zucchini-nlp +/src/transformers/models/udop/mod*_udop* @zucchini-nlp +/src/transformers/models/video_llava/mod*_video_llava* @zucchini-nlp +/src/transformers/models/vilt/mod*_vilt* @zucchini-nlp +/src/transformers/models/vipllava/mod*_vipllava* @zucchini-nlp +/src/transformers/models/vision_encoder_decoder/mod*_vision_encoder_decoder* @Rocketknight1 +/src/transformers/models/vision_text_dual_encoder/mod*_vision_text_dual_encoder* @Rocketknight1 +/src/transformers/models/visual_bert/mod*_visual_bert* @zucchini-nlp +/src/transformers/models/xclip/mod*_xclip* @zucchini-nlp + +# Reinforcement learning models +/src/transformers/models/decision_transformer/mod*_decision_transformer* @Rocketknight1 +/src/transformers/models/trajectory_transformer/mod*_trajectory_transformer* @Rocketknight1 + +# Time series models +/src/transformers/models/autoformer/mod*_autoformer* @Rocketknight1 +/src/transformers/models/informer/mod*_informer* @Rocketknight1 +/src/transformers/models/patchtsmixer/mod*_patchtsmixer* @Rocketknight1 +/src/transformers/models/patchtst/mod*_patchtst* @Rocketknight1 +/src/transformers/models/time_series_transformer/mod*_time_series_transformer* @Rocketknight1 + +# Graph models +/src/transformers/models/graphormer/mod*_graphormer* @clefourrier + +# Finally, files with no owners that shouldn't generate pings, usually automatically generated and checked in the CI +utils/dummy* \ No newline at end of file diff --git a/.github/workflows/assign-reviewers.yml b/.github/workflows/assign-reviewers.yml new file mode 100644 index 000000000000..46bcb52a169f --- /dev/null +++ b/.github/workflows/assign-reviewers.yml @@ -0,0 +1,26 @@ +name: Assign PR Reviewers +on: + pull_request_target: + branches: + - main + types: [ready_for_review] + +jobs: + assign_reviewers: + permissions: + pull-requests: write + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.13' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install PyGithub + - name: Run assignment script + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: python .github/scripts/assign_reviewers.py \ No newline at end of file diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index bb5281778bf2..6b5555097c09 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -64,7 +64,7 @@ jobs: commit_id=$GITHUB_SHA fi commit_msg=$(git show -s --format=%s | cut -c1-70) - python3 benchmark/benchmarks_entrypoint.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg" + python3 benchmark/benchmarks_entrypoint.py "$BRANCH_NAME" "$commit_id" "$commit_msg" env: HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} # Enable this to see debug logs @@ -73,3 +73,4 @@ jobs: PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }} PGUSER: transformers_benchmarks PGPASSWORD: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGPASSWORD }} + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} diff --git a/.github/workflows/build-ci-docker-images.yml b/.github/workflows/build-ci-docker-images.yml index 9d947684ee86..5606668531da 100644 --- a/.github/workflows/build-ci-docker-images.yml +++ b/.github/workflows/build-ci-docker-images.yml @@ -26,7 +26,7 @@ jobs: strategy: matrix: - file: ["quality", "consistency", "custom-tokenizers", "torch-light", "tf-light", "exotic-models", "torch-tf-light", "torch-jax-light", "jax-light", "examples-torch", "examples-tf"] + file: ["quality", "consistency", "custom-tokenizers", "torch-light", "tf-light", "exotic-models", "torch-tf-light", "jax-light", "examples-torch", "examples-tf"] continue-on-error: true steps: @@ -34,11 +34,11 @@ jobs: name: Set tag run: | if ${{contains(github.event.head_commit.message, '[build-ci-image]')}}; then - echo "TAG=huggingface/transformers-${{ matrix.file }}:dev" >> "$GITHUB_ENV" + echo "TAG=huggingface/transformers-${{ matrix.file }}:dev" >> "$GITHUB_ENV" echo "setting it to DEV!" else echo "TAG=huggingface/transformers-${{ matrix.file }}" >> "$GITHUB_ENV" - + fi - name: Set up Docker Buildx diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml index f698f860b2f9..cbf7caa84e87 100644 --- a/.github/workflows/build_pr_documentation.yml +++ b/.github/workflows/build_pr_documentation.yml @@ -15,4 +15,3 @@ jobs: pr_number: ${{ github.event.number }} package: transformers languages: ar de en es fr hi it ko pt tr zh ja te - custom_container: huggingface/transformers-doc-builder diff --git a/.github/workflows/change_pr_to_draft.yml b/.github/workflows/change_pr_to_draft.yml new file mode 100644 index 000000000000..c8132d2f49ea --- /dev/null +++ b/.github/workflows/change_pr_to_draft.yml @@ -0,0 +1,25 @@ +name: Change PR to draft + +on: + pull_request_target: + types: [opened, reopened] + +jobs: + convert_pr_to_draft: + runs-on: ubuntu-22.04 + name: Convert PR to draft + permissions: + pull-requests: write + contents: write + if: github.event.pull_request.draft == false + steps: + - name: Convert PR to draft + shell: bash + env: + PR_NUMBER: ${{ github.event.number }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + run: | + echo $PR_NUMBER + gh pr ready $PR_NUMBER --repo $REPO --undo + gh pr comment $PR_NUMBER --repo $REPO --body "Hi 👋, thank you for opening this pull request! The pull request is converted to draft by default. The CI will be paused while the PR is in draft mode. When it is ready for review, please click the \`Ready for review\` button (at the bottom of the PR page). This will assign reviewers and trigger CI." diff --git a/.github/workflows/check_failed_model_tests.yml b/.github/workflows/check_failed_model_tests.yml index f3ea8646900a..5963523fd76c 100644 --- a/.github/workflows/check_failed_model_tests.yml +++ b/.github/workflows/check_failed_model_tests.yml @@ -22,7 +22,6 @@ env: HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} TF_FORCE_GPU_ALLOW_GROWTH: true - RUN_PT_TF_CROSS_TESTS: 1 CUDA_VISIBLE_DEVICES: 0,1 diff --git a/.github/workflows/model_jobs.yml b/.github/workflows/model_jobs.yml index 001e2c531d9b..0997a1112ad7 100644 --- a/.github/workflows/model_jobs.yml +++ b/.github/workflows/model_jobs.yml @@ -30,7 +30,6 @@ env: HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} TF_FORCE_GPU_ALLOW_GROWTH: true - RUN_PT_TF_CROSS_TESTS: 1 CUDA_VISIBLE_DEVICES: 0,1 jobs: diff --git a/.github/workflows/model_jobs_amd.yml b/.github/workflows/model_jobs_amd.yml index a7e6c7b1ccd5..c90181ec6f1b 100644 --- a/.github/workflows/model_jobs_amd.yml +++ b/.github/workflows/model_jobs_amd.yml @@ -30,7 +30,6 @@ env: HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} TF_FORCE_GPU_ALLOW_GROWTH: true - RUN_PT_TF_CROSS_TESTS: 1 CUDA_VISIBLE_DEVICES: 0,1 jobs: diff --git a/.github/workflows/new_model_pr_merged_notification.yml b/.github/workflows/new_model_pr_merged_notification.yml new file mode 100644 index 000000000000..6282528c0b74 --- /dev/null +++ b/.github/workflows/new_model_pr_merged_notification.yml @@ -0,0 +1,68 @@ +# Used to notify core maintainers about new model PR being merged +name: New model PR merged notification + +on: + push: + branches: + - main + paths: + - 'src/transformers/models/*/modeling_*' + +jobs: + notify_new_model: + name: Notify new model + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Check new model + shell: bash + run: | + python -m pip install gitpython + python -c 'from utils.pr_slow_ci_models import get_new_model; new_model = get_new_model(diff_with_last_commit=True); print(new_model)' | tee output.txt + echo "NEW_MODEL=$(tail -n 1 output.txt)" >> $GITHUB_ENV + echo "COMMIT_SHA=$(git log -1 --format=%H)" >> $GITHUB_ENV + + - name: print commit sha + if: ${{ env.NEW_MODEL != ''}} + shell: bash + run: | + echo "$COMMIT_SHA" + + - name: print new model + if: ${{ env.NEW_MODEL != ''}} + shell: bash + run: | + echo "$NEW_MODEL" + + - name: Notify + if: ${{ env.NEW_MODEL != ''}} + uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001 + with: + # Slack channel id, channel name, or user id to post message. + # See also: https://api.slack.com/methods/chat.postMessage#channels + channel-id: transformers-new-model-notification + # For posting a rich message using Block Kit + payload: | + { + "blocks": [ + { + "type": "header", + "text": { + "type": "plain_text", + "text": "New model!", + "emoji": true + } + }, + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": " GH_ArthurZucker, GH_lysandrejik, GH_ydshieh" + } + } + ] + } + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/push-important-models.yml b/.github/workflows/push-important-models.yml index 7294777655e1..099ded8018e9 100644 --- a/.github/workflows/push-important-models.yml +++ b/.github/workflows/push-important-models.yml @@ -7,14 +7,13 @@ on: env: OUTPUT_SLACK_CHANNEL_ID: "C06L2SGMEEA" HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} - HF_HOME: /mnt/cache - TRANSFORMERS_IS_CI: yes - OMP_NUM_THREADS: 8 - MKL_NUM_THREADS: 8 - RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`. - SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} - TF_FORCE_GPU_ALLOW_GROWTH: true - RUN_PT_TF_CROSS_TESTS: 1 + HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes + OMP_NUM_THREADS: 8 + MKL_NUM_THREADS: 8 + RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`. + SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} + TF_FORCE_GPU_ALLOW_GROWTH: true jobs: get_modified_models: @@ -25,13 +24,13 @@ jobs: steps: - name: Check out code uses: actions/checkout@v4 - + - name: Get changed files id: changed-files - uses: tj-actions/changed-files@3f54ebb830831fc121d3263c1857cfbdc310cdb9 #v42 + uses: tj-actions/changed-files@1c8e6069583811afb28f97afeaf8e7da80c6be5c with: files: src/transformers/models/** - + - name: Run step if only the files listed above change if: steps.changed-files.outputs.any_changed == 'true' id: set-matrix @@ -60,41 +59,41 @@ jobs: if: ${{ needs.get_modified_models.outputs.matrix != '[]' && needs.get_modified_models.outputs.matrix != '' && fromJson(needs.get_modified_models.outputs.matrix)[0] != null }} strategy: fail-fast: false - matrix: + matrix: model-name: ${{ fromJson(needs.get_modified_models.outputs.matrix) }} steps: - name: Check out code uses: actions/checkout@v4 - + - name: Install locally transformers & other libs run: | apt install sudo sudo -H pip install --upgrade pip - sudo -H pip uninstall -y transformers - sudo -H pip install -U -e ".[testing]" + sudo -H pip uninstall -y transformers + sudo -H pip install -U -e ".[testing]" MAX_JOBS=4 pip install flash-attn --no-build-isolation pip install bitsandbytes - + - name: NVIDIA-SMI run: | nvidia-smi - + - name: Show installed libraries and their versions run: pip freeze - + - name: Run FA2 tests id: run_fa2_tests run: pytest -rsfE -m "flash_attn_test" --make-reports=${{ matrix.model-name }}_fa2_tests/ tests/${{ matrix.model-name }}/test_modeling_* - + - name: "Test suite reports artifacts: ${{ matrix.model-name }}_fa2_tests" if: ${{ always() }} uses: actions/upload-artifact@v4 with: name: ${{ matrix.model-name }}_fa2_tests path: /transformers/reports/${{ matrix.model-name }}_fa2_tests - + - name: Post to Slack if: always() uses: huggingface/hf-workflows/.github/actions/post-slack@main @@ -103,13 +102,13 @@ jobs: title: 🤗 Results of the FA2 tests - ${{ matrix.model-name }} status: ${{ steps.run_fa2_tests.conclusion}} slack_token: ${{ secrets.CI_SLACK_BOT_TOKEN }} - + - name: Run integration tests id: run_integration_tests if: always() run: pytest -rsfE -k "IntegrationTest" --make-reports=tests_integration_${{ matrix.model-name }} tests/${{ matrix.model-name }}/test_modeling_* - + - name: "Test suite reports artifacts: tests_integration_${{ matrix.model-name }}" if: ${{ always() }} uses: actions/upload-artifact@v4 @@ -119,7 +118,7 @@ jobs: - name: Post to Slack if: always() - uses: huggingface/hf-workflows/.github/actions/post-slack@main + uses: huggingface/hf-workflows/.github/actions/post-slack@main with: slack_channel: ${{ env.OUTPUT_SLACK_CHANNEL_ID }} title: 🤗 Results of the Integration tests - ${{ matrix.model-name }} diff --git a/.github/workflows/self-comment-ci.yml b/.github/workflows/self-comment-ci.yml index 7adad7551641..8defab44b2b0 100644 --- a/.github/workflows/self-comment-ci.yml +++ b/.github/workflows/self-comment-ci.yml @@ -22,7 +22,6 @@ env: HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} TF_FORCE_GPU_ALLOW_GROWTH: true - RUN_PT_TF_CROSS_TESTS: 1 CUDA_VISIBLE_DEVICES: 0,1 jobs: @@ -30,7 +29,7 @@ jobs: runs-on: ubuntu-22.04 name: Get PR number # For security: only allow team members to run - if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "muellerzr"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }} + if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "muellerzr", "eustlb"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }} outputs: PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }} steps: diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index 6931c2f3eadc..621061988949 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -14,7 +14,6 @@ env: MKL_NUM_THREADS: 8 PYTEST_TIMEOUT: 60 TF_FORCE_GPU_ALLOW_GROWTH: true - RUN_PT_TF_CROSS_TESTS: 1 HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} jobs: diff --git a/.github/workflows/self-push-caller.yml b/.github/workflows/self-push-caller.yml index 59adde4c54e0..56299f30e517 100644 --- a/.github/workflows/self-push-caller.yml +++ b/.github/workflows/self-push-caller.yml @@ -25,7 +25,7 @@ jobs: - name: Get changed files id: changed-files - uses: tj-actions/changed-files@v41 + uses: tj-actions/changed-files@1c8e6069583811afb28f97afeaf8e7da80c6be5c - name: Was setup changed id: was_changed @@ -51,4 +51,4 @@ jobs: needs: build-docker-containers steps: - name: Trigger push CI via workflow_run - run: echo "Trigger push CI via workflow_run" \ No newline at end of file + run: echo "Trigger push CI via workflow_run" diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml index 940495c28753..3b3be41e3e9b 100644 --- a/.github/workflows/self-push.yml +++ b/.github/workflows/self-push.yml @@ -24,7 +24,6 @@ env: MKL_NUM_THREADS: 8 PYTEST_TIMEOUT: 60 TF_FORCE_GPU_ALLOW_GROWTH: true - RUN_PT_TF_CROSS_TESTS: 1 CUDA_VISIBLE_DEVICES: 0,1 jobs: @@ -293,7 +292,7 @@ jobs: echo "$machine_type" echo "machine_type=$machine_type" >> $GITHUB_ENV - + - name: Update clone using environment variables working-directory: /transformers run: | @@ -406,7 +405,7 @@ jobs: echo "$machine_type" echo "machine_type=$machine_type" >> $GITHUB_ENV - + - name: Update clone using environment variables working-directory: /workspace/transformers run: | @@ -516,7 +515,7 @@ jobs: echo "$machine_type" echo "machine_type=$machine_type" >> $GITHUB_ENV - + - name: Update clone using environment variables working-directory: /workspace/transformers run: | @@ -648,6 +647,6 @@ jobs: # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`. run: | pip install huggingface_hub - pip install slack_sdk + pip install slack_sdk pip show slack_sdk python utils/notification_service.py "${{ needs.setup.outputs.matrix }}" diff --git a/.github/workflows/self-scheduled-amd-mi250-caller.yml b/.github/workflows/self-scheduled-amd-mi250-caller.yml index a33b6e579c0e..4c6284a78cda 100644 --- a/.github/workflows/self-scheduled-amd-mi250-caller.yml +++ b/.github/workflows/self-scheduled-amd-mi250-caller.yml @@ -15,7 +15,7 @@ jobs: uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main with: job: run_models_gpu - slack_report_channel: "#transformers-ci-daily-amd" + slack_report_channel: "#amd-hf-ci" runner: mi250 docker: huggingface/transformers-pytorch-amd-gpu ci_event: Scheduled CI (AMD) - mi250 @@ -26,7 +26,7 @@ jobs: uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main with: job: run_pipelines_torch_gpu - slack_report_channel: "#transformers-ci-daily-amd" + slack_report_channel: "#amd-hf-ci" runner: mi250 docker: huggingface/transformers-pytorch-amd-gpu ci_event: Scheduled CI (AMD) - mi250 @@ -37,7 +37,7 @@ jobs: uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main with: job: run_examples_gpu - slack_report_channel: "#transformers-ci-daily-amd" + slack_report_channel: "#amd-hf-ci" runner: mi250 docker: huggingface/transformers-pytorch-amd-gpu ci_event: Scheduled CI (AMD) - mi250 @@ -48,7 +48,7 @@ jobs: uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main with: job: run_torch_cuda_extensions_gpu - slack_report_channel: "#transformers-ci-daily-amd" + slack_report_channel: "#amd-hf-ci" runner: mi250 docker: huggingface/transformers-pytorch-deepspeed-amd-gpu ci_event: Scheduled CI (AMD) - mi250 diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 48731f1c2ed3..78971820d146 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -40,7 +40,6 @@ env: HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} TF_FORCE_GPU_ALLOW_GROWTH: true - RUN_PT_TF_CROSS_TESTS: 1 CUDA_VISIBLE_DEVICES: 0,1 NUM_SLICES: 2 @@ -571,4 +570,4 @@ jobs: with: docker: ${{ inputs.docker }} start_sha: ${{ github.sha }} - secrets: inherit \ No newline at end of file + secrets: inherit diff --git a/.github/workflows/ssh-runner.yml b/.github/workflows/ssh-runner.yml index 02b022698b0c..e648883f191e 100644 --- a/.github/workflows/ssh-runner.yml +++ b/.github/workflows/ssh-runner.yml @@ -5,7 +5,7 @@ on: inputs: runner_type: description: 'Type of runner to test (a10 or t4)' - required: true + required: true docker_image: description: 'Name of the Docker image' required: true @@ -15,15 +15,14 @@ on: env: HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} - HF_HOME: /mnt/cache - TRANSFORMERS_IS_CI: yes - OMP_NUM_THREADS: 8 - MKL_NUM_THREADS: 8 - RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`. - SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} - TF_FORCE_GPU_ALLOW_GROWTH: true + HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes + OMP_NUM_THREADS: 8 + MKL_NUM_THREADS: 8 + RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`. + SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} + TF_FORCE_GPU_ALLOW_GROWTH: true CUDA_VISIBLE_DEVICES: 0,1 - RUN_PT_TF_CROSS_TESTS: 1 jobs: get_runner: @@ -78,7 +77,7 @@ jobs: - name: Show installed libraries and their versions working-directory: /transformers run: pip freeze - + - name: NVIDIA-SMI run: | nvidia-smi diff --git a/.github/workflows/update_metdata.yml b/.github/workflows/update_metdata.yml index 90cd73077ac0..d55b6e336c09 100644 --- a/.github/workflows/update_metdata.yml +++ b/.github/workflows/update_metdata.yml @@ -19,7 +19,7 @@ jobs: - name: Setup environment run: | pip install --upgrade pip - pip install datasets pandas==2.0.3 + pip install datasets pandas pip install .[torch,tf,flax] - name: Update metadata diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9eeea9971540..c4804c69fb03 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -221,10 +221,10 @@ You'll need **[Python 3.9](https://github.com/huggingface/transformers/blob/main [Checks on a Pull Request](https://huggingface.co/docs/transformers/pr_checks) guide. If you're modifying documents under the `docs/source` directory, make sure the documentation can still be built. This check will also run in the CI when you open a pull request. To run a local check - make sure you install the documentation builder: + make sure you install the [documentation builder](https://github.com/huggingface/doc-builder). ```bash - pip install ".[docs]" + pip install hf-doc-builder ``` Run the following command from the root of the repository: @@ -343,8 +343,6 @@ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/t Like the slow tests, there are other environment variables available which are not enabled by default during testing: - `RUN_CUSTOM_TOKENIZERS`: Enables tests for custom tokenizers. -- `RUN_PT_FLAX_CROSS_TESTS`: Enables tests for PyTorch + Flax integration. -- `RUN_PT_TF_CROSS_TESTS`: Enables tests for TensorFlow + PyTorch integration. More environment variables and additional information can be found in the [testing_utils.py](https://github.com/huggingface/transformers/blob/main/src/transformers/testing_utils.py). diff --git a/ISSUES.md b/ISSUES.md index a5969a3027f8..3b4e587a6d12 100644 --- a/ISSUES.md +++ b/ISSUES.md @@ -263,9 +263,9 @@ You are not required to read the following guidelines before opening an issue. H But if you're replying to a comment that happened some comments back it's always a good practice to quote just the relevant lines you're replying it. The `>` is used for quoting, or you can always use the menu to do so. For example your editor box will look like: ``` - > How big is your gpu cluster? + > How big is your GPU cluster? - Our cluster is made of 256 gpus. + Our cluster is made of 256 GPUs. ``` If you are addressing multiple comments, quote the relevant parts of each before your answer. Some people use the same comment to do multiple replies, others separate them into separate comments. Either way works. The latter approach helps for linking to a specific comment. diff --git a/Makefile b/Makefile index 710c555b74f6..21152e985082 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,6 @@ autogenerate_code: deps_table_update repo-consistency: python utils/check_copies.py python utils/check_modular_conversion.py - python utils/check_table.py python utils/check_dummies.py python utils/check_repo.py python utils/check_inits.py @@ -46,7 +45,6 @@ repo-consistency: python utils/check_doctest_list.py python utils/update_metadata.py --check-only python utils/check_docstrings.py - python utils/check_support_list.py # this target runs checks on all files @@ -82,7 +80,6 @@ fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency fix-copies: python utils/check_copies.py --fix_and_overwrite python utils/check_modular_conversion.py --fix_and_overwrite - python utils/check_table.py --fix_and_overwrite python utils/check_dummies.py --fix_and_overwrite python utils/check_doctest_list.py --fix_and_overwrite python utils/check_docstrings.py --fix_and_overwrite diff --git a/README.md b/README.md index 6bbcdbc82f8b..e51a2c51bb55 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ limitations under the License.

+ Checkpoints on Hub Build GitHub Documentation @@ -54,275 +55,254 @@ limitations under the License.

-

State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow

+

State-of-the-art pretrained models for inference and training

-🤗 Transformers provides thousands of pretrained models to perform tasks on different modalities such as text, vision, and audio. +Transformers is a library of pretrained text, computer vision, audio, video, and multimodal models for inference and training. Use Transformers to fine-tune models on your data, build inference applications, and for generative AI use cases across multiple modalities. -These models can be applied on: +There are over 500K+ Transformers [model checkpoints](https://huggingface.co/models?library=transformers&sort=trending) on the [Hugging Face Hub](https://huggingface.com/models) you can use. -* 📝 Text, for tasks like text classification, information extraction, question answering, summarization, translation, and text generation, in over 100 languages. -* 🖼️ Images, for tasks like image classification, object detection, and segmentation. -* 🗣️ Audio, for tasks like speech recognition and audio classification. +Explore the [Hub](https://huggingface.com/) today to find a model and use Transformers to help you get started right away. -Transformer models can also perform tasks on **several modalities combined**, such as table question answering, optical character recognition, information extraction from scanned documents, video classification, and visual question answering. +## Installation -🤗 Transformers provides APIs to quickly download and use those pretrained models on a given text, fine-tune them on your own datasets and then share them with the community on our [model hub](https://huggingface.co/models). At the same time, each python module defining an architecture is fully standalone and can be modified to enable quick research experiments. +Transformers works with Python 3.9+ [PyTorch](https://pytorch.org/get-started/locally/) 2.0+, [TensorFlow](https://www.tensorflow.org/install/pip) 2.6+, and [Flax](https://flax.readthedocs.io/en/latest/) 0.4.1+. -🤗 Transformers is backed by the three most popular deep learning libraries — [Jax](https://jax.readthedocs.io/en/latest/), [PyTorch](https://pytorch.org/) and [TensorFlow](https://www.tensorflow.org/) — with a seamless integration between them. It's straightforward to train your models with one before loading them for inference with the other. +Create and activate a virtual environment with [venv](https://docs.python.org/3/library/venv.html) or [uv](https://docs.astral.sh/uv/), a fast Rust-based Python package and project manager. -## Online demos +```py +# venv +python -m venv .my-env +source .my-env/bin/activate -You can test most of our models directly on their pages from the [model hub](https://huggingface.co/models). We also offer [private model hosting, versioning, & an inference API](https://huggingface.co/pricing) for public and private models. +# uv +uv venv .my-env +source .my-env/bin/activate +``` -Here are a few examples: +Install Transformers in your virtual environment. -In Natural Language Processing: -- [Masked word completion with BERT](https://huggingface.co/google-bert/bert-base-uncased?text=Paris+is+the+%5BMASK%5D+of+France) -- [Named Entity Recognition with Electra](https://huggingface.co/dbmdz/electra-large-discriminator-finetuned-conll03-english?text=My+name+is+Sarah+and+I+live+in+London+city) -- [Text generation with Mistral](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) -- [Natural Language Inference with RoBERTa](https://huggingface.co/FacebookAI/roberta-large-mnli?text=The+dog+was+lost.+Nobody+lost+any+animal) -- [Summarization with BART](https://huggingface.co/facebook/bart-large-cnn?text=The+tower+is+324+metres+%281%2C063+ft%29+tall%2C+about+the+same+height+as+an+81-storey+building%2C+and+the+tallest+structure+in+Paris.+Its+base+is+square%2C+measuring+125+metres+%28410+ft%29+on+each+side.+During+its+construction%2C+the+Eiffel+Tower+surpassed+the+Washington+Monument+to+become+the+tallest+man-made+structure+in+the+world%2C+a+title+it+held+for+41+years+until+the+Chrysler+Building+in+New+York+City+was+finished+in+1930.+It+was+the+first+structure+to+reach+a+height+of+300+metres.+Due+to+the+addition+of+a+broadcasting+aerial+at+the+top+of+the+tower+in+1957%2C+it+is+now+taller+than+the+Chrysler+Building+by+5.2+metres+%2817+ft%29.+Excluding+transmitters%2C+the+Eiffel+Tower+is+the+second+tallest+free-standing+structure+in+France+after+the+Millau+Viaduct) -- [Question answering with DistilBERT](https://huggingface.co/distilbert/distilbert-base-uncased-distilled-squad?text=Which+name+is+also+used+to+describe+the+Amazon+rainforest+in+English%3F&context=The+Amazon+rainforest+%28Portuguese%3A+Floresta+Amaz%C3%B4nica+or+Amaz%C3%B4nia%3B+Spanish%3A+Selva+Amaz%C3%B3nica%2C+Amazon%C3%ADa+or+usually+Amazonia%3B+French%3A+For%C3%AAt+amazonienne%3B+Dutch%3A+Amazoneregenwoud%29%2C+also+known+in+English+as+Amazonia+or+the+Amazon+Jungle%2C+is+a+moist+broadleaf+forest+that+covers+most+of+the+Amazon+basin+of+South+America.+This+basin+encompasses+7%2C000%2C000+square+kilometres+%282%2C700%2C000+sq+mi%29%2C+of+which+5%2C500%2C000+square+kilometres+%282%2C100%2C000+sq+mi%29+are+covered+by+the+rainforest.+This+region+includes+territory+belonging+to+nine+nations.+The+majority+of+the+forest+is+contained+within+Brazil%2C+with+60%25+of+the+rainforest%2C+followed+by+Peru+with+13%25%2C+Colombia+with+10%25%2C+and+with+minor+amounts+in+Venezuela%2C+Ecuador%2C+Bolivia%2C+Guyana%2C+Suriname+and+French+Guiana.+States+or+departments+in+four+nations+contain+%22Amazonas%22+in+their+names.+The+Amazon+represents+over+half+of+the+planet%27s+remaining+rainforests%2C+and+comprises+the+largest+and+most+biodiverse+tract+of+tropical+rainforest+in+the+world%2C+with+an+estimated+390+billion+individual+trees+divided+into+16%2C000+species) -- [Translation with T5](https://huggingface.co/google-t5/t5-base?text=My+name+is+Wolfgang+and+I+live+in+Berlin) +```py +# pip +pip install transformers -In Computer Vision: -- [Image classification with ViT](https://huggingface.co/google/vit-base-patch16-224) -- [Object Detection with DETR](https://huggingface.co/facebook/detr-resnet-50) -- [Semantic Segmentation with SegFormer](https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512) -- [Panoptic Segmentation with Mask2Former](https://huggingface.co/facebook/mask2former-swin-large-coco-panoptic) -- [Depth Estimation with Depth Anything](https://huggingface.co/docs/transformers/main/model_doc/depth_anything) -- [Video Classification with VideoMAE](https://huggingface.co/docs/transformers/model_doc/videomae) -- [Universal Segmentation with OneFormer](https://huggingface.co/shi-labs/oneformer_ade20k_dinat_large) +# uv +uv pip install transformers +``` -In Audio: -- [Automatic Speech Recognition with Whisper](https://huggingface.co/openai/whisper-large-v3) -- [Keyword Spotting with Wav2Vec2](https://huggingface.co/superb/wav2vec2-base-superb-ks) -- [Audio Classification with Audio Spectrogram Transformer](https://huggingface.co/MIT/ast-finetuned-audioset-10-10-0.4593) +Install Transformers from source if you want the latest changes in the library or are interested in contributing. However, the *latest* version may not be stable. Feel free to open an [issue](https://github.com/huggingface/transformers/issues) if you encounter an error. -In Multimodal tasks: -- [Table Question Answering with TAPAS](https://huggingface.co/google/tapas-base-finetuned-wtq) -- [Visual Question Answering with ViLT](https://huggingface.co/dandelin/vilt-b32-finetuned-vqa) -- [Image captioning with LLaVa](https://huggingface.co/llava-hf/llava-1.5-7b-hf) -- [Zero-shot Image Classification with SigLIP](https://huggingface.co/google/siglip-so400m-patch14-384) -- [Document Question Answering with LayoutLM](https://huggingface.co/impira/layoutlm-document-qa) -- [Zero-shot Video Classification with X-CLIP](https://huggingface.co/docs/transformers/model_doc/xclip) -- [Zero-shot Object Detection with OWLv2](https://huggingface.co/docs/transformers/en/model_doc/owlv2) -- [Zero-shot Image Segmentation with CLIPSeg](https://huggingface.co/docs/transformers/model_doc/clipseg) -- [Automatic Mask Generation with SAM](https://huggingface.co/docs/transformers/model_doc/sam) +```shell +git clone https://github.com/huggingface/transformers.git +cd transformers +pip install . +``` +## Quickstart -## 100 projects using Transformers +Get started with Transformers right away with the [Pipeline](https://huggingface.co/docs/transformers/pipeline_tutorial) API. The `Pipeline` is a high-level inference class that supports text, audio, vision, and multimodal tasks. It handles preprocessing the input and returns the appropriate output. -Transformers is more than a toolkit to use pretrained models: it's a community of projects built around it and the -Hugging Face Hub. We want Transformers to enable developers, researchers, students, professors, engineers, and anyone -else to build their dream projects. +Instantiate a pipeline and specify model to use for text generation. The model is downloaded and cached so you can easily reuse it again. Finally, pass some text to prompt the model. -In order to celebrate the 100,000 stars of transformers, we have decided to put the spotlight on the -community, and we have created the [awesome-transformers](./awesome-transformers.md) page which lists 100 -incredible projects built in the vicinity of transformers. +```py +from transformers import pipeline -If you own or use a project that you believe should be part of the list, please open a PR to add it! +pipeline = pipeline(task="text-generation", model="Qwen/Qwen2.5-1.5B") +pipeline("the secret to baking a really good cake is ") +[{'generated_text': 'the secret to baking a really good cake is 1) to use the right ingredients and 2) to follow the recipe exactly. the recipe for the cake is as follows: 1 cup of sugar, 1 cup of flour, 1 cup of milk, 1 cup of butter, 1 cup of eggs, 1 cup of chocolate chips. if you want to make 2 cakes, how much sugar do you need? To make 2 cakes, you will need 2 cups of sugar.'}] +``` -## Serious about AI in your organisation? Build faster with the Hugging Face Enterprise Hub. +To chat with a model, the usage pattern is the same. The only difference is you need to construct a chat history (the input to `Pipeline`) between you and the system. - - Hugging Face Enterprise Hub -
- -## Quick tour +> [!TIP] +> You can also chat with a model directly from the command line. +> ```shell +> transformers-cli chat --model_name_or_path Qwen/Qwen2.5-0.5B-Instruct +> ``` -To immediately use a model on a given input (text, image, audio, ...), we provide the `pipeline` API. Pipelines group together a pretrained model with the preprocessing that was used during that model's training. Here is how to quickly use a pipeline to classify positive versus negative texts: +```py +import torch +from transformers import pipeline -```python ->>> from transformers import pipeline +chat = [ + {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."}, + {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"} +] -# Allocate a pipeline for sentiment-analysis ->>> classifier = pipeline('sentiment-analysis') ->>> classifier('We are very happy to introduce pipeline to the transformers repository.') -[{'label': 'POSITIVE', 'score': 0.9996980428695679}] +pipeline = pipeline(task="text-generation", model="meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.bfloat16, device_map="auto") +response = pipeline(chat, max_new_tokens=512) +print(response[0]["generated_text"][-1]["content"]) ``` -The second line of code downloads and caches the pretrained model used by the pipeline, while the third evaluates it on the given text. Here, the answer is "positive" with a confidence of 99.97%. - -Many tasks have a pre-trained `pipeline` ready to go, in NLP but also in computer vision and speech. For example, we can easily extract detected objects in an image: - -``` python ->>> import requests ->>> from PIL import Image ->>> from transformers import pipeline - -# Download an image with cute cats ->>> url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/coco_sample.png" ->>> image_data = requests.get(url, stream=True).raw ->>> image = Image.open(image_data) - -# Allocate a pipeline for object detection ->>> object_detector = pipeline('object-detection') ->>> object_detector(image) -[{'score': 0.9982201457023621, - 'label': 'remote', - 'box': {'xmin': 40, 'ymin': 70, 'xmax': 175, 'ymax': 117}}, - {'score': 0.9960021376609802, - 'label': 'remote', - 'box': {'xmin': 333, 'ymin': 72, 'xmax': 368, 'ymax': 187}}, - {'score': 0.9954745173454285, - 'label': 'couch', - 'box': {'xmin': 0, 'ymin': 1, 'xmax': 639, 'ymax': 473}}, - {'score': 0.9988006353378296, - 'label': 'cat', - 'box': {'xmin': 13, 'ymin': 52, 'xmax': 314, 'ymax': 470}}, - {'score': 0.9986783862113953, - 'label': 'cat', - 'box': {'xmin': 345, 'ymin': 23, 'xmax': 640, 'ymax': 368}}] +Expand the examples below to see how `Pipeline` works for different modalities and tasks. + +
+Automatic speech recognition + +```py +from transformers import pipeline + +pipeline = pipeline(task="automatic-speech-recognition", model="openai/whisper-large-v3") +pipeline("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac") +{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'} ``` -Here, we get a list of objects detected in the image, with a box surrounding the object and a confidence score. Here is the original image on the left, with the predictions displayed on the right: +
+ +
+Image classification

- - +

-You can learn more about the tasks supported by the `pipeline` API in [this tutorial](https://huggingface.co/docs/transformers/task_summary). +```py +from transformers import pipeline + +pipeline = pipeline(task="image-classification", model="facebook/dinov2-small-imagenet1k-1-layer") +pipeline("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png") +[{'label': 'macaw', 'score': 0.997848391532898}, + {'label': 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita', + 'score': 0.0016551691805943847}, + {'label': 'lorikeet', 'score': 0.00018523589824326336}, + {'label': 'African grey, African gray, Psittacus erithacus', + 'score': 7.85409429227002e-05}, + {'label': 'quail', 'score': 5.502637941390276e-05}] +``` -In addition to `pipeline`, to download and use any of the pretrained models on your given task, all it takes is three lines of code. Here is the PyTorch version: -```python ->>> from transformers import AutoTokenizer, AutoModel +
->>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased") ->>> model = AutoModel.from_pretrained("google-bert/bert-base-uncased") +
+Visual question answering ->>> inputs = tokenizer("Hello world!", return_tensors="pt") ->>> outputs = model(**inputs) -``` -And here is the equivalent code for TensorFlow: -```python ->>> from transformers import AutoTokenizer, TFAutoModel +

+ +

->>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased") ->>> model = TFAutoModel.from_pretrained("google-bert/bert-base-uncased") +```py +from transformers import pipeline ->>> inputs = tokenizer("Hello world!", return_tensors="tf") ->>> outputs = model(**inputs) +pipeline = pipeline(task="visual-question-answering", model="Salesforce/blip-vqa-base") +pipeline( + image="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/idefics-few-shot.jpg", + question="What is in the image?", +) +[{'answer': 'statue of liberty'}] ``` -The tokenizer is responsible for all the preprocessing the pretrained model expects and can be called directly on a single string (as in the above examples) or a list. It will output a dictionary that you can use in downstream code or simply directly pass to your model using the ** argument unpacking operator. - -The model itself is a regular [Pytorch `nn.Module`](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) or a [TensorFlow `tf.keras.Model`](https://www.tensorflow.org/api_docs/python/tf/keras/Model) (depending on your backend) which you can use as usual. [This tutorial](https://huggingface.co/docs/transformers/training) explains how to integrate such a model into a classic PyTorch or TensorFlow training loop, or how to use our `Trainer` API to quickly fine-tune on a new dataset. +
-## Why should I use transformers? +## Why should I use Transformers? 1. Easy-to-use state-of-the-art models: - - High performance on natural language understanding & generation, computer vision, and audio tasks. - - Low barrier to entry for educators and practitioners. + - High performance on natural language understanding & generation, computer vision, audio, video, and multimodal tasks. + - Low barrier to entry for researchers, engineers, and developers. - Few user-facing abstractions with just three classes to learn. - A unified API for using all our pretrained models. 1. Lower compute costs, smaller carbon footprint: - - Researchers can share trained models instead of always retraining. - - Practitioners can reduce compute time and production costs. - - Dozens of architectures with over 400,000 pretrained models across all modalities. + - Share trained models instead of training from scratch. + - Reduce compute time and production costs. + - Dozens of model architectures with 1M+ pretrained checkpoints across all modalities. -1. Choose the right framework for every part of a model's lifetime: +1. Choose the right framework for every part of a models lifetime: - Train state-of-the-art models in 3 lines of code. - - Move a single model between TF2.0/PyTorch/JAX frameworks at will. - - Seamlessly pick the right framework for training, evaluation, and production. + - Move a single model between PyTorch/JAX/TF2.0 frameworks at will. + - Pick the right framework for training, evaluation, and production. 1. Easily customize a model or an example to your needs: - We provide examples for each architecture to reproduce the results published by its original authors. - Model internals are exposed as consistently as possible. - Model files can be used independently of the library for quick experiments. -## Why shouldn't I use transformers? - -- This library is not a modular toolbox of building blocks for neural nets. The code in the model files is not refactored with additional abstractions on purpose, so that researchers can quickly iterate on each of the models without diving into additional abstractions/files. -- The training API is not intended to work on any model but is optimized to work with the models provided by the library. For generic machine learning loops, you should use another library (possibly, [Accelerate](https://huggingface.co/docs/accelerate)). -- While we strive to present as many use cases as possible, the scripts in our [examples folder](https://github.com/huggingface/transformers/tree/main/examples) are just that: examples. It is expected that they won't work out-of-the-box on your specific problem and that you will be required to change a few lines of code to adapt them to your needs. - -## Installation - -### With pip - -This repository is tested on Python 3.9+, Flax 0.4.1+, PyTorch 2.0+, and TensorFlow 2.6+. - -You should install 🤗 Transformers in a [virtual environment](https://docs.python.org/3/library/venv.html). If you're unfamiliar with Python virtual environments, check out the [user guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/). - -First, create a virtual environment with the version of Python you're going to use and activate it. - -**macOS/Linux** - -```python -m venv env -source env/bin/activate -``` - -**Windows** - -``` python -m venv env -env\Scripts\activate -``` + + Hugging Face Enterprise Hub +
-To use 🤗 Transformers, you must install at least one of Flax, PyTorch, or TensorFlow. Refer to the official installation guides for platform-specific commands: +## Why shouldn't I use Transformers? -[TensorFlow installation page](https://www.tensorflow.org/install/), -[PyTorch installation page](https://pytorch.org/get-started/locally/#start-locally) and/or [Flax](https://github.com/google/flax#quick-install) and [Jax](https://github.com/google/jax#installation) +- This library is not a modular toolbox of building blocks for neural nets. The code in the model files is not refactored with additional abstractions on purpose, so that researchers can quickly iterate on each of the models without diving into additional abstractions/files. +- The training API is optimized to work with PyTorch models provided by Transformers. For generic machine learning loops, you should use another library like [Accelerate](https://huggingface.co/docs/accelerate). +- The [example scripts]((https://github.com/huggingface/transformers/tree/main/examples)) are only *examples*. They may not necessarily work out-of-the-box on your specific use case and you'll need to adapt the code for it to work. -When one of those backends has been installed, 🤗 Transformers can be installed using pip as follows: +## 100 projects using Transformers -``` -pip install transformers -``` +Transformers is more than a toolkit to use pretrained models, it's a community of projects built around it and the +Hugging Face Hub. We want Transformers to enable developers, researchers, students, professors, engineers, and anyone +else to build their dream projects. -If you'd like to play with the examples or need the bleeding edge of the code and can't wait for a new release, you must [install the library from source](https://huggingface.co/docs/transformers/installation#installing-from-source). +In order to celebrate Transformers 100,000 stars, we wanted to put the spotlight on the +community with the [awesome-transformers](./awesome-transformers.md) page which lists 100 +incredible projects built with Transformers. -``` -git clone https://github.com/huggingface/transformers.git -cd transformers -pip install . -``` +If you own or use a project that you believe should be part of the list, please open a PR to add it! -### With conda +## Example models -🤗 Transformers can be installed using conda as follows: +You can test most of our models directly on their [Hub model pages](https://huggingface.co/models). -```shell script -conda install conda-forge::transformers -``` +Expand each modality below to see a few example models for various use cases. -> **_NOTE:_** Installing `transformers` from the `huggingface` channel is deprecated. +
+Audio -Follow the installation pages of Flax, PyTorch or TensorFlow to see how to install them with conda. +- Audio classification with [Whisper](https://huggingface.co/openai/whisper-large-v3-turbo) +- Automatic speech recognition with [Moonshine](https://huggingface.co/UsefulSensors/moonshine) +- Keyword spotting with [Wav2Vec2](https://huggingface.co/superb/wav2vec2-base-superb-ks) +- Speech to speech generation with [Moshi](https://huggingface.co/kyutai/moshiko-pytorch-bf16) +- Text to audio with [MusicGen](https://huggingface.co/facebook/musicgen-large) +- Text to speech with [Bark](https://huggingface.co/suno/bark) -> **_NOTE:_** On Windows, you may be prompted to activate Developer Mode in order to benefit from caching. If this is not an option for you, please let us know in [this issue](https://github.com/huggingface/huggingface_hub/issues/1062). +
-## Model architectures +
+Computer vision -**[All the model checkpoints](https://huggingface.co/models)** provided by 🤗 Transformers are seamlessly integrated from the huggingface.co [model hub](https://huggingface.co/models), where they are uploaded directly by [users](https://huggingface.co/users) and [organizations](https://huggingface.co/organizations). +- Automatic mask generation with [SAM](https://huggingface.co/facebook/sam-vit-base) +- Depth estimation with [DepthPro](https://huggingface.co/apple/DepthPro-hf) +- Image classification with [DINO v2](https://huggingface.co/facebook/dinov2-base) +- Keypoint detection with [SuperGlue](https://huggingface.co/magic-leap-community/superglue_outdoor) +- Keypoint matching with [SuperGlue](https://huggingface.co/magic-leap-community/superglue) +- Object detection with [RT-DETRv2](https://huggingface.co/PekingU/rtdetr_v2_r50vd) +- Pose Estimation with [VitPose](https://huggingface.co/usyd-community/vitpose-base-simple) +- Universal segmentation with [OneFormer](https://huggingface.co/shi-labs/oneformer_ade20k_swin_large) +- Video classification with [VideoMAE](https://huggingface.co/MCG-NJU/videomae-large) -Current number of checkpoints: ![](https://img.shields.io/endpoint?url=https://huggingface.co/api/shields/models&color=brightgreen) +
-🤗 Transformers currently provides the following architectures: see [here](https://huggingface.co/docs/transformers/model_summary) for a high-level summary of each them. +
+Multimodal -To check if each model has an implementation in Flax, PyTorch or TensorFlow, or has an associated tokenizer backed by the 🤗 Tokenizers library, refer to [this table](https://huggingface.co/docs/transformers/index#supported-frameworks). +- Audio or text to text with [Qwen2-Audio](https://huggingface.co/Qwen/Qwen2-Audio-7B) +- Document question answering with [LayoutLMv3](https://huggingface.co/microsoft/layoutlmv3-base) +- Image or text to text with [Qwen-VL](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct) +- Image captioning [BLIP-2](https://huggingface.co/Salesforce/blip2-opt-2.7b) +- OCR-based document understanding with [GOT-OCR2](https://huggingface.co/stepfun-ai/GOT-OCR-2.0-hf) +- Table question answering with [TAPAS](https://huggingface.co/google/tapas-base) +- Unified multimodal understanding and generation with [Emu3](https://huggingface.co/BAAI/Emu3-Gen) +- Vision to text with [Llava-OneVision](https://huggingface.co/llava-hf/llava-onevision-qwen2-0.5b-ov-hf) +- Visual question answering with [Llava](https://huggingface.co/llava-hf/llava-1.5-7b-hf) +- Visual referring expression segmentation with [Kosmos-2](https://huggingface.co/microsoft/kosmos-2-patch14-224) -These implementations have been tested on several datasets (see the example scripts) and should match the performance of the original implementations. You can find more details on performance in the Examples section of the [documentation](https://github.com/huggingface/transformers/tree/main/examples). +
+
+NLP -## Learn more +- Masked word completion with [ModernBERT](https://huggingface.co/answerdotai/ModernBERT-base) +- Named entity recognition with [Gemma](https://huggingface.co/google/gemma-2-2b) +- Question answering with [Mixtral](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1) +- Summarization with [BART](https://huggingface.co/facebook/bart-large-cnn) +- Translation with [T5](https://huggingface.co/google-t5/t5-base) +- Text generation with [Llama](https://huggingface.co/meta-llama/Llama-3.2-1B) +- Text classification with [Qwen](https://huggingface.co/Qwen/Qwen2.5-0.5B) -| Section | Description | -|-|-| -| [Documentation](https://huggingface.co/docs/transformers/) | Full API documentation and tutorials | -| [Task summary](https://huggingface.co/docs/transformers/task_summary) | Tasks supported by 🤗 Transformers | -| [Preprocessing tutorial](https://huggingface.co/docs/transformers/preprocessing) | Using the `Tokenizer` class to prepare data for the models | -| [Training and fine-tuning](https://huggingface.co/docs/transformers/training) | Using the models provided by 🤗 Transformers in a PyTorch/TensorFlow training loop and the `Trainer` API | -| [Quick tour: Fine-tuning/usage scripts](https://github.com/huggingface/transformers/tree/main/examples) | Example scripts for fine-tuning models on a wide range of tasks | -| [Model sharing and uploading](https://huggingface.co/docs/transformers/model_sharing) | Upload and share your fine-tuned models with the community | +
## Citation diff --git a/awesome-transformers.md b/awesome-transformers.md index d706498a08eb..29f50184ec3d 100644 --- a/awesome-transformers.md +++ b/awesome-transformers.md @@ -29,7 +29,7 @@ Keywords: inpainting, SD, Stable Diffusion ## [flair](https://github.com/flairNLP/flair) -FLAIR is a powerful PyTorch NLP framework, convering several important tasks: NER, sentiment-analysis, part-of-speech tagging, text and document embeddings, among other things. +FLAIR is a powerful PyTorch NLP framework, covering several important tasks: NER, sentiment-analysis, part-of-speech tagging, text and document embeddings, among other things. Keywords: NLP, text embedding, document embedding, biomedical, NER, PoS, sentiment-analysis @@ -47,7 +47,7 @@ Keywords: LLMs, Large Language Models, Agents, Chains ## [LlamaIndex](https://github.com/run-llama/llama_index) -[LlamaIndex](https://github.com/run-llama/llama_index) is a project that provides a central interface to connect your LLM's with external data. It provides various kinds of indices and retreival mechanisms to perform different LLM tasks and obtain knowledge-augmented results. +[LlamaIndex](https://github.com/run-llama/llama_index) is a project that provides a central interface to connect your LLM's with external data. It provides various kinds of indices and retrieval mechanisms to perform different LLM tasks and obtain knowledge-augmented results. Keywords: LLMs, Large Language Models, Data Retrieval, Indices, Knowledge Augmentation diff --git a/benchmark/README.md b/benchmark/README.md index a827da444f08..3935f02b389d 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -12,7 +12,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, ## Writing metrics to the database -`MetricRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements. +`MetricsRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements. cf [`llama.py`](./llama.py) to see an example of this in practice. diff --git a/benchmark/benchmarks_entrypoint.py b/benchmark/benchmarks_entrypoint.py index 7925e2902834..6c036fdd6939 100644 --- a/benchmark/benchmarks_entrypoint.py +++ b/benchmark/benchmarks_entrypoint.py @@ -3,7 +3,6 @@ import logging import os from typing import Dict -import psycopg2 import sys from psycopg2.extras import Json @@ -136,7 +135,7 @@ def import_from_path(module_name, file_path): continue logger.debug(f"loading: {entry.name}") module = import_from_path(entry.name.split(".")[0], entry.path) - logger.info(f"runnning benchmarks in: {entry.name}") + logger.info(f"running benchmarks in: {entry.name}") module.run_benchmark(logger, branch, commit_id, commit_msg) except ImportModuleException as e: logger.error(e) diff --git a/benchmark/llama.py b/benchmark/llama.py index bbe1afefd5ef..1857dee3d66b 100644 --- a/benchmark/llama.py +++ b/benchmark/llama.py @@ -118,7 +118,7 @@ def decode_one_token(model, cur_token, cache_position, past_key_values): with torch.no_grad(): past_key_values = StaticCache( model.config, - batch_size=batch_size, + max_batch_size=batch_size, device=device, dtype=torch.float16, max_cache_len=seq_length + num_tokens_to_generate, @@ -144,7 +144,7 @@ def decode_one_token(model, cur_token, cache_position, past_key_values): past_key_values = StaticCache( model.config, - batch_size=batch_size, + max_batch_size=batch_size, device=device, dtype=torch.float16, max_cache_len=seq_length + num_tokens_to_generate, @@ -187,7 +187,7 @@ def decode_one_token(model, cur_token, cache_position, past_key_values): # TODO use decode_one_token(model, input_id.clone(), cache_position) for verification past_key_values = StaticCache( model.config, - batch_size=batch_size, + max_batch_size=batch_size, device=device, dtype=torch.float16, max_cache_len=seq_length + num_tokens_to_generate + 10, @@ -204,7 +204,7 @@ def decode_one_token(model, cur_token, cache_position, past_key_values): time_to_first_token = end - start logger.info(f"completed first compile generation in: {time_to_first_token}s") cache_position += 1 - all_generated_tokens += next_token.clone().detach().cpu().tolist() + all_generated_tokens += next_token.tolist() cache_position = torch.tensor([seq_length], device=device) ### First compile, decoding @@ -215,9 +215,9 @@ def decode_one_token(model, cur_token, cache_position, past_key_values): torch.cuda.synchronize() end = perf_counter() time_to_second_token = end - start - logger.info(f"completed second compile generation in: {time_to_first_token}s") + logger.info(f"completed second compile generation in: {time_to_second_token}s") cache_position += 1 - all_generated_tokens += next_token.clone().detach().cpu().tolist() + all_generated_tokens += next_token.tolist() ### Second compile, decoding start = perf_counter() @@ -227,15 +227,15 @@ def decode_one_token(model, cur_token, cache_position, past_key_values): torch.cuda.synchronize() end = perf_counter() time_to_third_token = end - start - logger.info(f"completed third compile forward in: {time_to_first_token}s") + logger.info(f"completed third compile forward in: {time_to_third_token}s") cache_position += 1 - all_generated_tokens += next_token.clone().detach().cpu().tolist() + all_generated_tokens += next_token.tolist() ### Using cuda graphs decoding start = perf_counter() for _ in range(1, num_tokens_to_generate): - all_generated_tokens += next_token.clone().detach().cpu().tolist() + all_generated_tokens += next_token.tolist() next_token = decode_one_token( model, next_token.clone(), cache_position=cache_position, past_key_values=past_key_values ) @@ -254,7 +254,7 @@ def decode_one_token(model, cur_token, cache_position, past_key_values): past_key_values = StaticCache( model.config, - batch_size=batch_size, + max_batch_size=batch_size, device=device, dtype=torch.float16, max_cache_len=seq_length + 128, @@ -271,7 +271,7 @@ def decode_one_token(model, cur_token, cache_position, past_key_values): past_key_values = StaticCache( model.config, - batch_size=batch_size, + max_batch_size=batch_size, device=device, dtype=torch.float16, max_cache_len=seq_length + 128, @@ -287,7 +287,7 @@ def decode_one_token(model, cur_token, cache_position, past_key_values): past_key_values = StaticCache( model.config, - batch_size=batch_size, + max_batch_size=batch_size, device=device, dtype=torch.float16, max_cache_len=seq_length + 128, @@ -298,12 +298,12 @@ def decode_one_token(model, cur_token, cache_position, past_key_values): output = model.generate(**inputs, past_key_values=past_key_values) end = perf_counter() third_compile_generate_time = end - start - logger.info(f"completed second compile generation in: {third_compile_generate_time}s") + logger.info(f"completed third compile generation in: {third_compile_generate_time}s") logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}") past_key_values = StaticCache( model.config, - batch_size=batch_size, + max_batch_size=batch_size, device=device, dtype=torch.float16, max_cache_len=seq_length + 128, @@ -313,7 +313,7 @@ def decode_one_token(model, cur_token, cache_position, past_key_values): output = model.generate(**inputs, past_key_values=past_key_values) end = perf_counter() fourth_compile_generate_time = end - start - logger.info(f"completed second compile generation in: {fourth_compile_generate_time}s") + logger.info(f"completed fourth compile generation in: {fourth_compile_generate_time}s") logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}") metrics_recorder.collect_model_measurements( diff --git a/conftest.py b/conftest.py index 40e43f25e893..ee012215e070 100644 --- a/conftest.py +++ b/conftest.py @@ -46,10 +46,6 @@ "test_keep_in_fp32_modules", "test_gradient_checkpointing_backward_compatibility", "test_gradient_checkpointing_enable_disable", - "test_save_load_fast_init_from_base", - "test_fast_init_context_manager", - "test_fast_init_tied_embeddings", - "test_save_load_fast_init_to_base", "test_torch_save_load", "test_initialization", "test_forward_signature", @@ -61,7 +57,6 @@ "test_load_save_without_tied_weights", "test_tied_weights_keys", "test_model_weights_reload_no_missing_tied_weights", - "test_pt_tf_model_equivalence", "test_mismatched_shapes_have_properly_initialized_weights", "test_matched_shapes_have_loaded_weights_when_some_mismatched_shapes_exist", "test_model_is_small", @@ -85,12 +80,6 @@ def pytest_configure(config): - config.addinivalue_line( - "markers", "is_pt_tf_cross_test: mark test to run only when PT and TF interactions are tested" - ) - config.addinivalue_line( - "markers", "is_pt_flax_cross_test: mark test to run only when PT and FLAX interactions are tested" - ) config.addinivalue_line("markers", "is_pipeline_test: mark test to run only when pipelines are tested") config.addinivalue_line("markers", "is_staging_test: mark test to run only in the staging environment") config.addinivalue_line("markers", "accelerate_tests: mark test that require accelerate") diff --git a/docker/README.md b/docker/README.md index 2a71ab6fb6ec..5410a2839e37 100644 --- a/docker/README.md +++ b/docker/README.md @@ -2,8 +2,8 @@ In this folder you will find various docker files, and some subfolders. - dockerfiles (ex: `consistency.dockerfile`) present under `~/docker` are used for our "fast" CIs. You should be able to use them for tasks that only need CPU. For example `torch-light` is a very light weights container (703MiB). -- subfloder contain dockerfiles used for our `slow` CIs, which *can* be used for GPU tasks, but they are **BIG** as they were not specifically designed for a single model / single task. Thus the `~/docker/transformers-pytorch-gpu` includes additional dependencies to allow us to run ALL model tests (say `librosa` or `tesseract`, which you do not need to run LLMs) +- subfolders contain dockerfiles used for our `slow` CIs, which *can* be used for GPU tasks, but they are **BIG** as they were not specifically designed for a single model / single task. Thus the `~/docker/transformers-pytorch-gpu` includes additional dependencies to allow us to run ALL model tests (say `librosa` or `tesseract`, which you do not need to run LLMs) Note that in both case, you need to run `uv pip install -e .`, which should take around 5 seconds. We do it outside the dockerfile for the need of our CI: we checkout a new branch each time, and the `transformers` code is thus updated. -We are open to contribution, and invite the community to create dockerfiles with potential arguments that properly choose extras depending on the model's dependencies! :hugs: \ No newline at end of file +We are open to contribution, and invite the community to create dockerfiles with potential arguments that properly choose extras depending on the model's dependencies! :hugs: diff --git a/docker/consistency.dockerfile b/docker/consistency.dockerfile index a564d76c9bb0..5b35a5f85dcd 100644 --- a/docker/consistency.dockerfile +++ b/docker/consistency.dockerfile @@ -5,12 +5,12 @@ ARG REF=main RUN apt-get update && apt-get install -y time git g++ pkg-config make git-lfs ENV UV_PYTHON=/usr/local/bin/python RUN pip install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools GitPython -RUN pip install --no-cache-dir --upgrade 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu +RUN uv pip install --no-cache-dir --upgrade 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu # tensorflow pin matching setup.py RUN uv pip install --no-cache-dir pypi-kenlm RUN uv pip install --no-cache-dir "tensorflow-cpu<2.16" "tf-keras<2.16" RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,quality,testing,torch-speech,vision]" RUN git lfs install -RUN pip uninstall -y transformers -RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean \ No newline at end of file +RUN uv pip uninstall transformers +RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean diff --git a/docker/custom-tokenizers.dockerfile b/docker/custom-tokenizers.dockerfile index 87601bcf3f41..a0a9f5ea23b7 100644 --- a/docker/custom-tokenizers.dockerfile +++ b/docker/custom-tokenizers.dockerfile @@ -1,5 +1,6 @@ FROM python:3.9-slim ENV PYTHONDONTWRITEBYTECODE=1 +ARG REF=main USER root RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake wget xz-utils build-essential g++5 libprotobuf-dev protobuf-compiler ENV UV_PYTHON=/usr/local/bin/python @@ -16,11 +17,11 @@ RUN make install -j 10 RUN uv pip install --no-cache --upgrade 'torch' --index-url https://download.pytorch.org/whl/cpu -RUN uv pip install --no-cache-dir --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu -RUN uv pip install --no-cache-dir "transformers[ja,testing,sentencepiece,jieba,spacy,ftfy,rjieba]" unidic unidic-lite +RUN uv pip install --no-cache-dir --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu +RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ja,testing,sentencepiece,jieba,spacy,ftfy,rjieba]" unidic unidic-lite # spacy is not used so not tested. Causes to failures. TODO fix later RUN python3 -m unidic download -RUN pip uninstall -y transformers +RUN uv pip uninstall transformers RUN apt-get clean && rm -rf /var/lib/apt/lists/* -RUN apt remove -y g++ cmake xz-utils libprotobuf-dev protobuf-compiler \ No newline at end of file +RUN apt remove -y g++ cmake xz-utils libprotobuf-dev protobuf-compiler diff --git a/docker/examples-tf.dockerfile b/docker/examples-tf.dockerfile index ffbaafd8b86b..306d00fdea57 100644 --- a/docker/examples-tf.dockerfile +++ b/docker/examples-tf.dockerfile @@ -1,12 +1,13 @@ FROM python:3.9-slim ENV PYTHONDONTWRITEBYTECODE=1 +ARG REF=main USER root RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git RUN apt-get install -y g++ cmake ENV UV_PYTHON=/usr/local/bin/python RUN pip --no-cache-dir install uv && uv venv RUN uv pip install --no-cache-dir -U pip setuptools albumentations seqeval -RUN pip install --upgrade --no-cache-dir "transformers[tf-cpu,sklearn,testing,sentencepiece,tf-speech,vision]" -RUN uv pip install --no-cache-dir "protobuf==3.20.3" -RUN pip uninstall -y transformers -RUN apt-get clean && rm -rf /var/lib/apt/lists/* \ No newline at end of file +RUN uv pip install --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,testing,sentencepiece,tf-speech,vision]" +RUN uv pip install --no-cache-dir "protobuf==3.20.3" +RUN uv pip uninstall transformers +RUN apt-get clean && rm -rf /var/lib/apt/lists/* diff --git a/docker/examples-torch.dockerfile b/docker/examples-torch.dockerfile index f9c7257b9cca..e1029f6ddf1f 100644 --- a/docker/examples-torch.dockerfile +++ b/docker/examples-torch.dockerfile @@ -1,11 +1,12 @@ FROM python:3.9-slim ENV PYTHONDONTWRITEBYTECODE=1 +ARG REF=main USER root RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git ENV UV_PYTHON=/usr/local/bin/python RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools -RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu -RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu -RUN uv pip install --no-cache-dir librosa "transformers[sklearn,sentencepiece,vision,testing]" seqeval albumentations jiwer -RUN pip uninstall -y transformers -RUN apt-get clean && rm -rf /var/lib/apt/lists/* \ No newline at end of file +RUN uv pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu +RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu +RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]" seqeval albumentations jiwer +RUN uv pip uninstall transformers +RUN apt-get clean && rm -rf /var/lib/apt/lists/* diff --git a/docker/exotic-models.dockerfile b/docker/exotic-models.dockerfile index dd40476064a3..32491888e800 100644 --- a/docker/exotic-models.dockerfile +++ b/docker/exotic-models.dockerfile @@ -5,13 +5,13 @@ USER root RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git libgl1-mesa-glx libgl1 g++ tesseract-ocr ENV UV_PYTHON=/usr/local/bin/python RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools -RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu +RUN uv pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu RUN uv pip install --no-cache-dir --no-deps timm accelerate RUN pip install -U --upgrade-strategy eager --no-cache-dir pytesseract python-Levenshtein opencv-python nltk # RUN uv pip install --no-cache-dir natten==0.15.1+torch210cpu -f https://shi-labs.com/natten/wheels -RUN pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[testing, vision]" 'scikit-learn' 'torch-stft' 'nose' 'dataset' +RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[testing, vision]" 'scikit-learn' 'torch-stft' 'nose' 'dataset' # RUN git clone https://github.com/facebookresearch/detectron2.git # RUN python3 -m pip install --no-cache-dir -e detectron2 -RUN pip install 'git+https://github.com/facebookresearch/detectron2.git@92ae9f0b92aba5867824b4f12aa06a22a60a45d3' -RUN pip uninstall -y transformers +RUN uv pip install 'git+https://github.com/facebookresearch/detectron2.git@92ae9f0b92aba5867824b4f12aa06a22a60a45d3' --no-build-isolation +RUN uv pip uninstall transformers RUN apt-get clean && rm -rf /var/lib/apt/lists/* diff --git a/docker/jax-light.dockerfile b/docker/jax-light.dockerfile index df1e1144c0e1..c2a73e98ca98 100644 --- a/docker/jax-light.dockerfile +++ b/docker/jax-light.dockerfile @@ -5,6 +5,6 @@ USER root RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git g++ cmake ENV UV_PYTHON=/usr/local/bin/python RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools -RUN pip install --no-cache-dir "scipy<1.13" "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,testing,sentencepiece,flax-speech,vision]" -RUN pip uninstall -y transformers -RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean \ No newline at end of file +RUN uv pip install --no-cache-dir "scipy<1.13" "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,testing,sentencepiece,flax-speech,vision]" +RUN uv pip uninstall transformers +RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean diff --git a/docker/pipeline-tf.dockerfile b/docker/pipeline-tf.dockerfile index dd38b52acbb3..61a442a55945 100644 --- a/docker/pipeline-tf.dockerfile +++ b/docker/pipeline-tf.dockerfile @@ -5,6 +5,6 @@ USER root RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake g++ ENV UV_PYTHON=/usr/local/bin/python RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools -RUN pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]" +RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]" RUN uv pip install --no-cache-dir "protobuf==3.20.3" tensorflow_probability -RUN apt-get clean && rm -rf /var/lib/apt/lists/* \ No newline at end of file +RUN apt-get clean && rm -rf /var/lib/apt/lists/* diff --git a/docker/pipeline-torch.dockerfile b/docker/pipeline-torch.dockerfile index 2b4014b4fff1..10b6450b2dfc 100644 --- a/docker/pipeline-torch.dockerfile +++ b/docker/pipeline-torch.dockerfile @@ -5,7 +5,7 @@ USER root RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git pkg-config openssh-client git ENV UV_PYTHON=/usr/local/bin/python RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools -RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu +RUN uv pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]" -RUN pip uninstall -y transformers \ No newline at end of file +RUN uv pip uninstall transformers diff --git a/docker/quality.dockerfile b/docker/quality.dockerfile index 7e6999e40f20..e2421efe00b4 100644 --- a/docker/quality.dockerfile +++ b/docker/quality.dockerfile @@ -6,4 +6,4 @@ RUN apt-get update && apt-get install -y time git ENV UV_PYTHON=/usr/local/bin/python RUN pip install uv && uv venv RUN uv pip install --no-cache-dir -U pip setuptools GitPython "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ruff]" urllib3 -RUN apt-get install -y jq curl && apt-get clean && rm -rf /var/lib/apt/lists/* \ No newline at end of file +RUN apt-get install -y jq curl && apt-get clean && rm -rf /var/lib/apt/lists/* diff --git a/docker/tf-light.dockerfile b/docker/tf-light.dockerfile index 67dc928c22fa..0206c7406390 100644 --- a/docker/tf-light.dockerfile +++ b/docker/tf-light.dockerfile @@ -6,7 +6,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-de RUN apt-get install -y cmake ENV UV_PYTHON=/usr/local/bin/python RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools -RUN pip install --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,testing,sentencepiece,tf-speech,vision]" -RUN uv pip install --no-cache-dir "protobuf==3.20.3" -RUN pip uninstall -y transformers -RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean \ No newline at end of file +RUN uv pip install --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,testing,sentencepiece,tf-speech,vision]" +RUN uv pip install --no-cache-dir "protobuf==3.20.3" +RUN uv pip uninstall transformers +RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean diff --git a/docker/torch-jax-light.dockerfile b/docker/torch-jax-light.dockerfile index b779fcbfac9a..6394bc76afc2 100644 --- a/docker/torch-jax-light.dockerfile +++ b/docker/torch-jax-light.dockerfile @@ -6,11 +6,11 @@ RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git g++ ENV UV_PYTHON=/usr/local/bin/python RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools RUN uv pip install --no-deps accelerate -RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu -RUN pip install --no-cache-dir "scipy<1.13" "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,audio,sklearn,sentencepiece,vision,testing]" +RUN uv pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu +RUN uv pip install --no-cache-dir "scipy<1.13" "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,audio,sklearn,sentencepiece,vision,testing]" # RUN pip install --no-cache-dir "scipy<1.13" "transformers[flax,testing,sentencepiece,flax-speech,vision]" -RUN pip uninstall -y transformers +RUN uv pip uninstall transformers RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean diff --git a/docker/torch-light.dockerfile b/docker/torch-light.dockerfile index c1a8f8ac0f51..a13d855a53e7 100644 --- a/docker/torch-light.dockerfile +++ b/docker/torch-light.dockerfile @@ -5,7 +5,7 @@ USER root RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git git-lfs ENV UV_PYTHON=/usr/local/bin/python RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools -RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu +RUN uv pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu -RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing,tiktoken]" -RUN pip uninstall -y transformers \ No newline at end of file +RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing,tiktoken,num2words,video]" +RUN uv pip uninstall transformers diff --git a/docker/torch-tf-light.dockerfile b/docker/torch-tf-light.dockerfile index 0556b79ffdb3..63512328f129 100644 --- a/docker/torch-tf-light.dockerfile +++ b/docker/torch-tf-light.dockerfile @@ -7,13 +7,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-de ENV UV_PYTHON=/usr/local/bin/python RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools RUN uv pip install --no-cache-dir --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu -RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu +RUN uv pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu RUN git lfs install RUN uv pip install --no-cache-dir pypi-kenlm -RUN pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,sentencepiece,vision,testing]" +RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,sentencepiece,vision,testing]" RUN uv pip install --no-cache-dir "protobuf==3.20.3" librosa -RUN pip uninstall -y transformers -RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean \ No newline at end of file +RUN uv pip uninstall transformers +RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean diff --git a/docker/transformers-all-latest-gpu/Dockerfile b/docker/transformers-all-latest-gpu/Dockerfile index ebc7ae0d2115..1eb50ee4ad7f 100644 --- a/docker/transformers-all-latest-gpu/Dockerfile +++ b/docker/transformers-all-latest-gpu/Dockerfile @@ -57,7 +57,8 @@ RUN python3 -m pip uninstall -y ninja # For `dinat` model # The `XXX` part in `torchXXX` needs to match `PYTORCH` (to some extent) -RUN python3 -m pip install --no-cache-dir natten==0.15.1+torch220$CUDA -f https://shi-labs.com/natten/wheels +# pin `0.17.4` otherwise `cannot import name 'natten2dav' from 'natten.functional'` +RUN python3 -m pip install --no-cache-dir natten==0.17.4+torch250cu121 -f https://shi-labs.com/natten/wheels # For `nougat` tokenizer RUN python3 -m pip install --no-cache-dir python-Levenshtein diff --git a/docker/transformers-pytorch-amd-gpu/Dockerfile b/docker/transformers-pytorch-amd-gpu/Dockerfile index 4313c2242199..a71043dc8215 100644 --- a/docker/transformers-pytorch-amd-gpu/Dockerfile +++ b/docker/transformers-pytorch-amd-gpu/Dockerfile @@ -12,7 +12,7 @@ RUN git lfs install RUN python3 -m pip install --no-cache-dir --upgrade pip numpy -RUN python3 -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2 +RUN python3 -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2.4 RUN python3 -m pip install --no-cache-dir --upgrade importlib-metadata setuptools ninja git+https://github.com/facebookresearch/detectron2.git pytesseract "itsdangerous<2.1.0" diff --git a/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile b/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile index a8edb8ff03eb..f70b15494100 100644 --- a/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile +++ b/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile @@ -2,10 +2,10 @@ FROM rocm/dev-ubuntu-22.04:6.2.4 LABEL maintainer="Hugging Face" ARG DEBIAN_FRONTEND=noninteractive -ARG PYTORCH='2.5.1' -ARG TORCH_VISION='0.20.0' -ARG TORCH_AUDIO='2.5.0' -ARG ROCM='6.2' +ARG PYTORCH='2.6.0' +ARG TORCH_VISION='0.21.0' +ARG TORCH_AUDIO='2.6.0' +ARG ROCM='6.2.4' RUN apt update && \ apt install -y --no-install-recommends \ @@ -16,9 +16,11 @@ RUN apt update && \ python-is-python3 \ rocrand-dev \ rocthrust-dev \ + rocblas-dev \ + hipsolver-dev \ hipsparse-dev \ hipblas-dev \ - rocblas-dev && \ + hipblaslt-dev && \ apt clean && \ rm -rf /var/lib/apt/lists/* diff --git a/docker/transformers-quantization-latest-gpu/Dockerfile b/docker/transformers-quantization-latest-gpu/Dockerfile index 700df877d10f..33d8b10b02ee 100755 --- a/docker/transformers-quantization-latest-gpu/Dockerfile +++ b/docker/transformers-quantization-latest-gpu/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 +FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04 LABEL maintainer="Hugging Face" ARG DEBIAN_FRONTEND=noninteractive @@ -9,9 +9,9 @@ SHELL ["sh", "-lc"] # The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant # to be used as arguments for docker build (so far). -ARG PYTORCH='2.5.1' +ARG PYTORCH='2.6.0' # Example: `cu102`, `cu113`, etc. -ARG CUDA='cu118' +ARG CUDA='cu121' RUN apt update RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg @@ -26,8 +26,6 @@ RUN echo torch=$VERSION # Currently, let's just use their latest releases (when `torch` is installed with a release version) RUN python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA -RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch] - RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate # needed in bnb and awq @@ -36,10 +34,9 @@ RUN python3 -m pip install --no-cache-dir einops # Add bitsandbytes for mixed int8 testing RUN python3 -m pip install --no-cache-dir bitsandbytes -# Add auto-gptq for gtpq quantization testing, installed from source for pytorch==2.5.1 compatibility -# TORCH_CUDA_ARCH_LIST="7.5+PTX" is added to make the package compile for Tesla T4 gpus available for the CI. -RUN pip install gekko -RUN git clone https://github.com/PanQiWei/AutoGPTQ.git && cd AutoGPTQ && TORCH_CUDA_ARCH_LIST="7.5+PTX" python3 setup.py install +# Add gptqmodel for gtpq quantization testing, installed from source for pytorch==2.6.0 compatibility +RUN python3 -m pip install lm_eval +RUN git clone https://github.com/ModelCloud/GPTQModel.git && cd GPTQModel && pip install -v . --no-build-isolation # Add optimum for gptq quantization testing RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum @@ -51,10 +48,11 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/pef RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.2 # Add vptq for quantization testing -RUN python3 -m pip install --no-cache-dir vptq +RUN pip install vptq # Add spqr for quantization testing -RUN python3 -m pip install --no-cache-dir spqr_quant[gpu] +# Commented for now as No matching distribution found we need to reach out to the authors +# RUN python3 -m pip install --no-cache-dir spqr_quant[gpu] # Add hqq for quantization testing RUN python3 -m pip install --no-cache-dir hqq @@ -63,18 +61,29 @@ RUN python3 -m pip install --no-cache-dir hqq RUN python3 -m pip install --no-cache-dir gguf # Add autoawq for quantization testing -# >=v0.2.7 needed for compatibility with transformers > 4.46 -RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.7.post2/autoawq-0.2.7.post2-py3-none-any.whl +# New release v0.2.8 +RUN python3 -m pip install --no-cache-dir autoawq[kernels] # Add quanto for quantization testing RUN python3 -m pip install --no-cache-dir optimum-quanto # Add eetq for quantization testing -RUN python3 -m pip install git+https://github.com/NetEase-FuXi/EETQ.git +RUN git clone https://github.com/NetEase-FuXi/EETQ.git && cd EETQ/ && git submodule update --init --recursive && pip install . + +# # Add flute-kernel and fast_hadamard_transform for quantization testing +# # Commented for now as they cause issues with the build +# # TODO: create a new workflow to test them +# RUN python3 -m pip install --no-cache-dir flute-kernel==0.4.1 +# RUN python3 -m pip install --no-cache-dir git+https://github.com/Dao-AILab/fast-hadamard-transform.git -# Add flute-kernel and fast_hadamard_transform for quantization testing -RUN python3 -m pip install --no-cache-dir flute-kernel==0.3.0 -i https://flute-ai.github.io/whl/cu118 -RUN python3 -m pip install --no-cache-dir fast_hadamard_transform==1.0.4.post1 +# Add compressed-tensors for quantization testing +RUN python3 -m pip install --no-cache-dir compressed-tensors + +# Add AMD Quark for quantization testing +RUN python3 -m pip install --no-cache-dir amd-quark + +# Add transformers in editable mode +RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch] # When installing in editable mode, `transformers` is not recognized as a package. # this line must be added in order for python to be aware of transformers. diff --git a/docs/source/ar/agents.md b/docs/source/ar/agents.md index 1213b3500860..c7efd8f02f48 100644 --- a/docs/source/ar/agents.md +++ b/docs/source/ar/agents.md @@ -195,7 +195,7 @@ You have access to the following tools: To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences. At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task, then the tools that you want to use. -Then in the 'Code:' sequence, you shold write the code in simple Python. The code sequence must end with '/End code' sequence. +Then in the 'Code:' sequence, you should write the code in simple Python. The code sequence must end with '/End code' sequence. During each intermediate step, you can use 'print()' to save whatever important information you will then need. These print outputs will then be available in the 'Observation:' field, for using this information as input for the next step. @@ -205,7 +205,7 @@ Here are a few examples using notional tools: --- {examples} -Above example were using notional tools that might not exist for you. You only have acces to those tools: +Above example were using notional tools that might not exist for you. You only have access to those tools: <> You also can perform computations in the python code you generate. diff --git a/docs/source/ar/bertology.md b/docs/source/ar/bertology.md index d3f95e20d7df..d12d7838906e 100644 --- a/docs/source/ar/bertology.md +++ b/docs/source/ar/bertology.md @@ -15,4 +15,4 @@ - الوصول إلى جميع أوزان الانتباه لكل رأس في BERT/GPT/GPT-2، - استرجاع قيم ومشتقات مخرجات الرأس لحساب درجة أهمية الرأس وحذفه كما هو موضح في https://arxiv.org/abs/1905.10650. -ولمساعدتك على فهم واستخدام هذه الميزات بسهولة، أضفنا مثالًا برمجيًا محددًا: [bertology.py](https://github.com/huggingface/transformers/tree/main/examples/research_projects/bertology/run_bertology.py) أثناء استخراج المعلومات وتقليص من نموذج تم تدريبه مسبقًا على GLUE. \ No newline at end of file +ولمساعدتك على فهم واستخدام هذه الميزات بسهولة، أضفنا مثالًا برمجيًا محددًا: [bertology.py](https://github.com/huggingface/transformers-research-projects/tree/main/bertology/run_bertology.py) أثناء استخراج المعلومات وتقليص من نموذج تم تدريبه مسبقًا على GLUE. \ No newline at end of file diff --git a/docs/source/ar/run_scripts.md b/docs/source/ar/run_scripts.md index 593d4aec85fc..c7aea4eb9611 100644 --- a/docs/source/ar/run_scripts.md +++ b/docs/source/ar/run_scripts.md @@ -2,7 +2,7 @@ بالإضافة إلى دفاتر الملاحظات [notebooks](./notebooks) الخاصة بـ 🤗 Transformers، هناك أيضًا نصوص برمجية توضيحية تُظهر كيفية تدريب نموذج لمهمة باستخدام [PyTorch](https://github.com/huggingface/transformers/tree/main/examples/pytorch) أو [TensorFlow](https://github.com/huggingface/transformers/tree/main/examples/tensorflow) أو [JAX/Flax](https://github.com/huggingface/transformers/tree/main/examples/flax). -كما ستجد النصوص البرمجية التي استخدمناها في [مشاريع الأبحاث](https://github.com/huggingface/transformers/tree/main/examples/research_projects) و [الأمثلة القديمة](https://github.com/huggingface/transformers/tree/main/examples/legacy) والتي ساهم بها المجتمع بشكل أساسي. هذه النصوص البرمجية غير مدعومة بشكل نشط وقد تتطلب إصدارًا محددًا من مكتبة 🤗 Transformers والذي من المحتمل أن يكون غير متوافق مع الإصدار الأحدث من المكتبة. +كما ستجد النصوص البرمجية التي استخدمناها في [مشاريع الأبحاث](https://github.com/huggingface/transformers-research-projects/) و [الأمثلة القديمة](https://github.com/huggingface/transformers/tree/main/examples/legacy) والتي ساهم بها المجتمع بشكل أساسي. هذه النصوص البرمجية غير مدعومة بشكل نشط وقد تتطلب إصدارًا محددًا من مكتبة 🤗 Transformers والذي من المحتمل أن يكون غير متوافق مع الإصدار الأحدث من المكتبة. لا يُتوقع أن تعمل النصوص البرمجية التوضيحية بشكل مباشر على كل مشكلة، وقد تحتاج إلى تكييف النص البرمجي مع المشكلة التي تحاول حلها. ولمساعدتك في ذلك، تعرض معظم النصوص البرمجية كيفية معالجة البيانات قبل التدريب بشكل كامل، مما يتيح لك تحريرها حسب الحاجة لحالتك الاستخدام. diff --git a/docs/source/ar/serialization.md b/docs/source/ar/serialization.md index 2df620d86239..6f437dea0681 100644 --- a/docs/source/ar/serialization.md +++ b/docs/source/ar/serialization.md @@ -116,11 +116,11 @@ optimum-cli export onnx --model keras-io/transformers-qa distilbert_base_cased_s -لم يعد يتم دعم `tranformers.onnx` يُرجى تصدير النماذج باستخدام 🤗 Optimum كما هو موضح أعلاه. سيتم إزالة هذا القسم في الإصدارات القادمة. +لم يعد يتم دعم `transformers.onnx` يُرجى تصدير النماذج باستخدام 🤗 Optimum كما هو موضح أعلاه. سيتم إزالة هذا القسم في الإصدارات القادمة. -لتصدير نموذج 🤗 Transformers إلى ONNX باستخدام `tranformers.onnx`، ثبّت التبعيات الإضافية: +لتصدير نموذج 🤗 Transformers إلى ONNX باستخدام `transformers.onnx`، ثبّت التبعيات الإضافية: ```bash pip install transformers[onnx] diff --git a/docs/source/ar/trainer.md b/docs/source/ar/trainer.md index 7da7cbf4e171..e70dbb255eac 100644 --- a/docs/source/ar/trainer.md +++ b/docs/source/ar/trainer.md @@ -673,6 +673,29 @@ tpu_use_sudo: false use_cpu: false ``` + + + +```yml +compute_environment: LOCAL_MACHINE +tp_config: + tp_size: 4 +distributed_type: TP +downcast_bf16: 'no' +machine_rank: 0 +main_training_function: main +mixed_precision: 'no' +num_machines: 1 +num_processes: 4 +rdzv_backend: static +same_network: true +tpu_env: [] +tpu_use_cluster: false +tpu_use_sudo: false +use_cpu: false + +``` + يُعد أمر [`accelerate_launch`](https://huggingface.co/docs/accelerate/package_reference/cli#accelerate-launch) هو الطريقة المُوصى بها لتشغيل نص البرمجى للتدريب على نظام موزع باستخدام Accelerate و [`Trainer`] مع المعلمات المحددة في `config_file.yaml`. يتم حفظ هذا الملف في مجلد ذاكرة التخزين المؤقت لـ Accelerate ويتم تحميله تلقائيًا عند تشغيل `accelerate_launch`. diff --git a/docs/source/de/contributing.md b/docs/source/de/contributing.md index d014dd67c83a..61ee8c3fc4e3 100644 --- a/docs/source/de/contributing.md +++ b/docs/source/de/contributing.md @@ -283,8 +283,6 @@ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/t Wie bei den langsamen Tests gibt es auch andere Umgebungsvariablen, die standardmäßig beim Testen nicht gesetzt sind: * `RUN_CUSTOM_TOKENIZERS`: Aktiviert Tests für benutzerdefinierte Tokenizer. -* `RUN_PT_FLAX_CROSS_TESTS`: Aktiviert Tests für die Integration von PyTorch + Flax. -* `RUN_PT_TF_CROSS_TESTS`: Aktiviert Tests für die Integration von TensorFlow + PyTorch. Weitere Umgebungsvariablen und zusätzliche Informationen finden Sie in der [testing_utils.py](src/transformers/testing_utils.py). diff --git a/docs/source/de/index.md b/docs/source/de/index.md index 5ddabb4e7382..8aaaa5952c07 100644 --- a/docs/source/de/index.md +++ b/docs/source/de/index.md @@ -88,7 +88,7 @@ Die Bibliothek enthält derzeit JAX-, PyTorch- und TensorFlow-Implementierungen, 1. **[DeiT](model_doc/deit)** (from Facebook) released with the paper [Training data-efficient image transformers & distillation through attention](https://arxiv.org/abs/2012.12877) by Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre Sablayrolles, Hervé Jégou. 1. **[DETR](model_doc/detr)** (from Facebook) released with the paper [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872) by Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov, Sergey Zagoruyko. 1. **[DialoGPT](model_doc/dialogpt)** (from Microsoft Research) released with the paper [DialoGPT: Large-Scale Generative Pre-training for Conversational Response Generation](https://arxiv.org/abs/1911.00536) by Yizhe Zhang, Siqi Sun, Michel Galley, Yen-Chun Chen, Chris Brockett, Xiang Gao, Jianfeng Gao, Jingjing Liu, Bill Dolan. -1. **[DistilBERT](model_doc/distilbert)** (from HuggingFace), released together with the paper [DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter](https://arxiv.org/abs/1910.01108) by Victor Sanh, Lysandre Debut and Thomas Wolf. The same method has been applied to compress GPT2 into [DistilGPT2](https://github.com/huggingface/transformers/tree/main/examples/research_projects/distillation), RoBERTa into [DistilRoBERTa](https://github.com/huggingface/transformers/tree/main/examples/research_projects/distillation), Multilingual BERT into [DistilmBERT](https://github.com/huggingface/transformers/tree/main/examples/research_projects/distillation) and a German version of DistilBERT. +1. **[DistilBERT](model_doc/distilbert)** (from HuggingFace), released together with the paper [DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter](https://arxiv.org/abs/1910.01108) by Victor Sanh, Lysandre Debut and Thomas Wolf. The same method has been applied to compress GPT2 into [DistilGPT2](https://github.com/huggingface/transformers-research-projects/tree/main/distillation), RoBERTa into [DistilRoBERTa](https://github.com/huggingface/transformers-research-projects/tree/main/distillation), Multilingual BERT into [DistilmBERT](https://github.com/huggingface/transformers-research-projects/tree/main/distillation) and a German version of DistilBERT. 1. **[DiT](model_doc/dit)** (from Microsoft Research) released with the paper [DiT: Self-supervised Pre-training for Document Image Transformer](https://arxiv.org/abs/2203.02378) by Junlong Li, Yiheng Xu, Tengchao Lv, Lei Cui, Cha Zhang, Furu Wei. 1. **[DPR](model_doc/dpr)** (from Facebook) released with the paper [Dense Passage Retrieval for Open-Domain Question Answering](https://arxiv.org/abs/2004.04906) by Vladimir Karpukhin, Barlas Oğuz, Sewon Min, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, and Wen-tau Yih. 1. **[DPT](master/model_doc/dpt)** (from Intel Labs) released with the paper [Vision Transformers for Dense Prediction](https://arxiv.org/abs/2103.13413) by René Ranftl, Alexey Bochkovskiy, Vladlen Koltun. diff --git a/docs/source/de/quicktour.md b/docs/source/de/quicktour.md index c01609207fec..856ba546b977 100644 --- a/docs/source/de/quicktour.md +++ b/docs/source/de/quicktour.md @@ -156,7 +156,7 @@ Die [`pipeline`] kann jedes Modell aus dem [Model Hub](https://huggingface.co/mo -Use the [`AutoModelForSequenceClassification`] and [`AutoTokenizer`] to load the pretrained model and it's associated tokenizer (more on an `AutoClass` below): +Use the [`AutoModelForSequenceClassification`] and [`AutoTokenizer`] to load the pretrained model and its associated tokenizer (more on an `AutoClass` below): ```py >>> from transformers import AutoTokenizer, AutoModelForSequenceClassification @@ -166,7 +166,7 @@ Use the [`AutoModelForSequenceClassification`] and [`AutoTokenizer`] to load the ``` -Use the [`TFAutoModelForSequenceClassification`] and [`AutoTokenizer`] to load the pretrained model and it's associated tokenizer (more on an `TFAutoClass` below): +Use the [`TFAutoModelForSequenceClassification`] and [`AutoTokenizer`] to load the pretrained model and its associated tokenizer (more on an `TFAutoClass` below): ```py >>> from transformers import AutoTokenizer, TFAutoModelForSequenceClassification @@ -222,7 +222,7 @@ Anschließend wandelt der Tokenizer die Token in Zahlen um, um einen Tensor als Der Tokenizer gibt ein Wörterbuch zurück, das Folgendes enthält: * [input_ids](./glossary#input-ids): numerische Repräsentationen Ihrer Token. -* [atttention_mask](.glossary#attention-mask): gibt an, welche Token beachtet werden sollen. +* [attention_mask](.glossary#attention-mask): gibt an, welche Token beachtet werden sollen. Genau wie die [`pipeline`] akzeptiert der Tokenizer eine Liste von Eingaben. Darüber hinaus kann der Tokenizer den Text auch auffüllen und kürzen, um einen Stapel mit einheitlicher Länge zurückzugeben: diff --git a/docs/source/de/run_scripts.md b/docs/source/de/run_scripts.md index 17b725827dd7..4b62c73276e0 100644 --- a/docs/source/de/run_scripts.md +++ b/docs/source/de/run_scripts.md @@ -18,7 +18,7 @@ rendered properly in your Markdown viewer. Neben den 🤗 Transformers [notebooks](./notebooks) gibt es auch Beispielskripte, die zeigen, wie man ein Modell für eine Aufgabe mit [PyTorch](https://github.com/huggingface/transformers/tree/main/examples/pytorch), [TensorFlow](https://github.com/huggingface/transformers/tree/main/examples/tensorflow) oder [JAX/Flax](https://github.com/huggingface/transformers/tree/main/examples/flax) trainiert. -Sie werden auch Skripte finden, die wir in unseren [Forschungsprojekten](https://github.com/huggingface/transformers/tree/main/examples/research_projects) und [Legacy-Beispielen](https://github.com/huggingface/transformers/tree/main/examples/legacy) verwendet haben und die größtenteils von der Community stammen. Diese Skripte werden nicht aktiv gepflegt und erfordern eine bestimmte Version von 🤗 Transformers, die höchstwahrscheinlich nicht mit der neuesten Version der Bibliothek kompatibel ist. +Sie werden auch Skripte finden, die wir in unseren [Forschungsprojekten](https://github.com/huggingface/transformers-research-projects/) und [Legacy-Beispielen](https://github.com/huggingface/transformers/tree/main/examples/legacy) verwendet haben und die größtenteils von der Community stammen. Diese Skripte werden nicht aktiv gepflegt und erfordern eine bestimmte Version von 🤗 Transformers, die höchstwahrscheinlich nicht mit der neuesten Version der Bibliothek kompatibel ist. Es wird nicht erwartet, dass die Beispielskripte bei jedem Problem sofort funktionieren. Möglicherweise müssen Sie das Skript an das Problem anpassen, das Sie zu lösen versuchen. Um Ihnen dabei zu helfen, legen die meisten Skripte vollständig offen, wie die Daten vorverarbeitet werden, so dass Sie sie nach Bedarf für Ihren Anwendungsfall bearbeiten können. diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index dc259103ae2e..6c4b7498b3da 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -1,291 +1,310 @@ - sections: - local: index - title: 🤗 Transformers - - local: quicktour - title: Quick tour + title: Transformers - local: installation title: Installation - - local: add_new_model - title: Adding a new model to `transformers` + - local: quicktour + title: Quickstart title: Get started -- sections: - - local: pipeline_tutorial - title: Run inference with pipelines - - local: autoclass_tutorial - title: Write portable code with AutoClass - - local: preprocessing - title: Preprocess data - - local: training - title: Fine-tune a pretrained model - - local: run_scripts - title: Train with a script - - local: accelerate - title: Set up distributed training with 🤗 Accelerate - - local: peft - title: Load and train adapters with 🤗 PEFT - - local: model_sharing - title: Share your model - - local: agents - title: Agents 101 - - local: agents_advanced - title: Agents, supercharged - Multi-agents, External tools, and more - - local: llm_tutorial - title: Generation with LLMs - - local: conversations - title: Chatting with Transformers - title: Tutorials -- sections: - - isExpanded: false - sections: - - local: tasks/sequence_classification - title: Text classification - - local: tasks/token_classification - title: Token classification - - local: tasks/question_answering - title: Question answering - - local: tasks/language_modeling - title: Causal language modeling - - local: tasks/masked_language_modeling - title: Masked language modeling - - local: tasks/translation - title: Translation - - local: tasks/summarization - title: Summarization - - local: tasks/multiple_choice - title: Multiple choice - title: Natural Language Processing - - isExpanded: false - sections: - - local: tasks/audio_classification - title: Audio classification - - local: tasks/asr - title: Automatic speech recognition - title: Audio - - isExpanded: false - sections: - - local: tasks/image_classification - title: Image classification - - local: tasks/semantic_segmentation - title: Image segmentation - - local: tasks/video_classification - title: Video classification - - local: tasks/object_detection - title: Object detection - - local: tasks/zero_shot_object_detection - title: Zero-shot object detection - - local: tasks/zero_shot_image_classification - title: Zero-shot image classification - - local: tasks/monocular_depth_estimation - title: Depth estimation - - local: tasks/image_to_image - title: Image-to-Image - - local: tasks/image_feature_extraction - title: Image Feature Extraction - - local: tasks/mask_generation - title: Mask Generation - - local: tasks/keypoint_detection - title: Keypoint Detection - - local: tasks/knowledge_distillation_for_image_classification - title: Knowledge Distillation for Computer Vision - title: Computer Vision - - isExpanded: false - sections: - - local: tasks/image_captioning - title: Image captioning - - local: tasks/document_question_answering - title: Document Question Answering - - local: tasks/visual_question_answering - title: Visual Question Answering - - local: tasks/text-to-speech - title: Text to speech - - local: tasks/image_text_to_text - title: Image-text-to-text - - local: tasks/video_text_to_text - title: Video-text-to-text - title: Multimodal - - isExpanded: false - sections: +- isExpanded: false + sections: + - sections: + - local: models + title: Loading models + - local: custom_models + title: Customizing models + - local: how_to_hack_models + title: Customizing model components + - local: model_sharing + title: Sharing + - local: add_new_model + title: Adding a new model to Transformers + - local: modular_transformers + title: Modular Transformers + - local: task_summary + title: What 🤗 Transformers can do + - local: tasks_explained + title: How 🤗 Transformers solve tasks + - local: model_summary + title: The Transformer model family + - local: attention + title: Attention mechanisms + - local: attention_interface + title: Customizing attention function + title: Models + - sections: + - local: fast_tokenizers + title: Tokenizers + - local: image_processors + title: Image processors + - local: backbones + title: Backbones + - local: feature_extractors + title: Feature extractors + - local: processors + title: Processors + - local: tokenizer_summary + title: Summary of the tokenizers + - local: pad_truncation + title: Padding and truncation + title: Preprocessors + title: Base classes +- isExpanded: false + sections: + - sections: + - local: pipeline_tutorial + title: Pipeline + - local: pipeline_gradio + title: Machine learning apps + - local: pipeline_webserver + title: Web server inference + - local: add_new_pipeline + title: Adding a new pipeline + title: Pipeline API + - sections: + - local: llm_tutorial + title: Text generation - local: generation_strategies - title: Customize the generation strategy - - local: kv_cache - title: Best Practices for Generation with Cache - title: Generation - - isExpanded: false - sections: - - local: chat_template_basics - title: Getting Started with Chat Templates for Text LLMs - - local: chat_template_multimodal - title: Multimodal Chat Templates for Vision and Audio LLMs - - local: chat_template_tools_and_documents - title: Expanding Chat Templates with Tools and Documents - - local: chat_template_advanced - title: Advanced Usage and Customizing Your Chat Templates - title: Chat Templates - - isExpanded: false - sections: - - local: tasks/idefics - title: Image tasks with IDEFICS + title: Generation strategies + - local: generation_features + title: Generation features - local: tasks/prompting - title: LLM prompting guide - title: Prompting - title: Task Guides -- sections: - - local: fast_tokenizers - title: Use fast tokenizers from 🤗 Tokenizers - - local: multilingual - title: Run inference with multilingual models - - local: create_a_model - title: Use model-specific APIs - - local: custom_models - title: Share a custom model - - local: trainer - title: Trainer - - local: sagemaker - title: Run training on Amazon SageMaker - - local: serialization - title: Export to ONNX - - local: tflite - title: Export to TFLite - - local: torchscript - title: Export to TorchScript - - local: notebooks - title: Notebooks with examples - - local: community - title: Community resources - - local: troubleshooting - title: Troubleshoot - - local: gguf - title: Interoperability with GGUF files - - local: tiktoken - title: Interoperability with TikToken files - - local: modular_transformers - title: Modularity in `transformers` - - local: how_to_hack_models - title: Model Hacking (overwriting a class to your usage) - title: Developer guides -- sections: + title: Prompt engineering + - local: llm_optims + title: Optimizing inference + - local: kv_cache + title: KV cache strategies + - local: serving + title: Serving + - local: cache_explanation + title: Caching + - local: llm_tutorial_optimization + title: Getting the most out of LLMs + - local: perplexity + title: Perplexity of fixed-length models + title: LLMs + - sections: + - local: conversations + title: Chat basics + - local: chat_templating + title: Templates + - local: chat_templating_multimodal + title: Multimodal templates + - local: chat_templating_writing + title: Template writing + - local: chat_extras + title: Tools and RAG + title: Chat with models + - sections: + - local: perf_torch_compile + title: torch.compile + - local: perf_infer_gpu_one + title: GPU + - local: perf_infer_gpu_multi + title: Distributed GPU inference + - local: perf_infer_cpu + title: CPU + - local: tf_xla + title: XLA + title: Optimization + - local: agents + title: Agents + - local: tools + title: Tools + title: Inference +- isExpanded: false + sections: + - sections: + - local: trainer + title: Trainer + - local: training + title: Fine-tuning + - local: optimizers + title: Optimizers + - local: hpo_train + title: Hyperparameter search + title: Trainer API + - sections: + - local: gpu_selection + title: GPU selection + - local: accelerate + title: Accelerate + - local: fsdp + title: FullyShardedDataParallel + - local: deepspeed + title: DeepSpeed + - local: debugging + title: Multi-GPU debugging + - local: perf_train_cpu_many + title: Distributed CPUs + - local: perf_train_gpu_many + title: Parallelism methods + title: Distributed training + - sections: + - local: perf_train_gpu_one + title: GPU + - local: perf_train_cpu + title: CPU + - local: perf_train_tpu_tf + title: TPU + - local: perf_train_special + title: Apple Silicon + - local: perf_hardware + title: Build your own machine + title: Hardware + - local: peft + title: PEFT + - local: model_memory_anatomy + title: Model training anatomy + title: Training +- isExpanded: false + sections: - local: quantization/overview - title: Getting started - - local: quantization/bitsandbytes - title: bitsandbytes - - local: quantization/gptq - title: GPTQ - - local: quantization/awq - title: AWQ + title: Overview - local: quantization/aqlm title: AQLM - - local: quantization/vptq - title: SpQR - - local: quantization/spqr - title: VPTQ - - local: quantization/quanto - title: Quanto + - local: quantization/awq + title: AWQ + - local: quantization/bitnet + title: BitNet + - local: quantization/bitsandbytes + title: bitsandbytes + - local: quantization/compressed_tensors + title: compressed-tensors - local: quantization/eetq title: EETQ + - local: quantization/fbgemm_fp8 + title: FBGEMM + - local: quantization/finegrained_fp8 + title: Fine-grained FP8 + - local: gguf + title: GGUF + - local: quantization/gptq + title: GPTQ - local: quantization/higgs title: HIGGS - local: quantization/hqq title: HQQ - - local: quantization/fbgemm_fp8 - title: FBGEMM_FP8 - local: quantization/optimum title: Optimum + - local: quantization/quanto + title: Quanto + - local: quantization/quark + title: Quark - local: quantization/torchao - title: TorchAO - - local: quantization/bitnet - title: BitNet - - local: quantization/compressed_tensors - title: compressed-tensors - - local: quantization/finegrained_fp8 - title: Fine-grained FP8 + title: torchao + - local: quantization/spqr + title: SpQR + - local: quantization/vptq + title: VPTQ - local: quantization/contribute - title: Contribute new quantization method - title: Quantization Methods -- sections: - - local: performance - title: Overview - - local: llm_optims - title: LLM inference optimization - - sections: - - local: perf_train_gpu_one - title: Methods and tools for efficient training on a single GPU - - local: perf_train_gpu_many - title: Multiple GPUs and parallelism - - local: fsdp - title: Fully Sharded Data Parallel - - local: deepspeed - title: DeepSpeed - - local: perf_train_cpu - title: Efficient training on CPU - - local: perf_train_cpu_many - title: Distributed CPU training - - local: perf_train_tpu_tf - title: Training on TPU with TensorFlow - - local: perf_train_special - title: PyTorch training on Apple silicon - - local: perf_hardware - title: Custom hardware for training - - local: hpo_train - title: Hyperparameter Search using Trainer API - title: Efficient training techniques + title: Contribute + title: Quantization +- isExpanded: false + sections: + - local: serialization + title: ONNX + - local: tflite + title: LiteRT + - local: executorch + title: ExecuTorch + - local: torchscript + title: TorchScript + title: Export to production +- isExpanded: false + sections: - sections: - - local: perf_infer_cpu - title: CPU inference - - local: perf_infer_gpu_one - title: GPU inference - - local: perf_infer_gpu_multi - title: Multi-GPU inference - title: Optimizing inference - - local: big_models - title: Instantiate a big model - - local: debugging - title: Debugging - - local: tf_xla - title: XLA Integration for TensorFlow Models - - local: perf_torch_compile - title: Optimize inference using `torch.compile()` - title: Performance and scalability -- sections: + - sections: + - local: tasks/sequence_classification + title: Text classification + - local: tasks/token_classification + title: Token classification + - local: tasks/question_answering + title: Question answering + - local: tasks/language_modeling + title: Causal language modeling + - local: tasks/masked_language_modeling + title: Masked language modeling + - local: tasks/translation + title: Translation + - local: tasks/summarization + title: Summarization + - local: tasks/multiple_choice + title: Multiple choice + title: Natural language processing + - sections: + - local: tasks/audio_classification + title: Audio classification + - local: tasks/asr + title: Automatic speech recognition + title: Audio + - sections: + - local: tasks/image_classification + title: Image classification + - local: tasks/semantic_segmentation + title: Image segmentation + - local: tasks/video_classification + title: Video classification + - local: tasks/object_detection + title: Object detection + - local: tasks/zero_shot_object_detection + title: Zero-shot object detection + - local: tasks/zero_shot_image_classification + title: Zero-shot image classification + - local: tasks/monocular_depth_estimation + title: Depth estimation + - local: tasks/image_to_image + title: Image-to-Image + - local: tasks/image_feature_extraction + title: Image Feature Extraction + - local: tasks/mask_generation + title: Mask Generation + - local: tasks/keypoint_detection + title: Keypoint detection + - local: tasks/knowledge_distillation_for_image_classification + title: Knowledge Distillation for Computer Vision + title: Computer vision + - sections: + - local: tasks/image_captioning + title: Image captioning + - local: tasks/document_question_answering + title: Document Question Answering + - local: tasks/visual_question_answering + title: Visual Question Answering + - local: tasks/text-to-speech + title: Text to speech + - local: tasks/idefics + title: Image tasks with IDEFICS + - local: tasks/image_text_to_text + title: Image-text-to-text + - local: tasks/video_text_to_text + title: Video-text-to-text + title: Multimodal + title: Task recipes + - local: run_scripts + title: Training scripts + - local: glossary + title: Glossary + - local: philosophy + title: Philosophy + - local: notebooks + title: Notebooks with examples + - local: community + title: Community resources + - local: troubleshooting + title: Troubleshoot + title: Resources +- isExpanded: false + sections: - local: contributing - title: How to contribute to 🤗 Transformers? - - local: add_new_model - title: How to add a model to 🤗 Transformers? - - local: add_new_pipeline - title: How to add a pipeline to 🤗 Transformers? + title: Contribute to Transformers - local: testing - title: Testing + title: Transformers model tests - local: pr_checks - title: Checks on a Pull Request + title: Pull request checks title: Contribute -- sections: - - local: philosophy - title: Philosophy - - local: glossary - title: Glossary - - local: task_summary - title: What 🤗 Transformers can do - - local: tasks_explained - title: How 🤗 Transformers solve tasks - - local: model_summary - title: The Transformer model family - - local: tokenizer_summary - title: Summary of the tokenizers - - local: attention - title: Attention mechanisms - - local: pad_truncation - title: Padding and truncation - - local: bertology - title: BERTology - - local: perplexity - title: Perplexity of fixed-length models - - local: pipeline_webserver - title: Pipelines for webserver inference - - local: model_memory_anatomy - title: Model training anatomy - - local: llm_tutorial_optimization - title: Getting the most out of LLMs - title: Conceptual guides -- sections: +- isExpanded: false + sections: - sections: - local: main_classes/agent title: Agents and Tools @@ -313,6 +332,8 @@ title: Optimization - local: main_classes/output title: Model outputs + - local: main_classes/peft + title: PEFT - local: main_classes/pipelines title: Pipelines - local: main_classes/processors @@ -331,10 +352,9 @@ title: Feature Extractor - local: main_classes/image_processor title: Image Processor - title: Main Classes + title: Main classes - sections: - - isExpanded: false - sections: + - sections: - local: model_doc/albert title: ALBERT - local: model_doc/bamba @@ -395,6 +415,8 @@ title: DeBERTa - local: model_doc/deberta-v2 title: DeBERTa-v2 + - local: model_doc/deepseek_v3 + title: DeepSeek-V3 - local: model_doc/dialogpt title: DialoGPT - local: model_doc/diffllama @@ -461,6 +483,8 @@ title: Granite - local: model_doc/granitemoe title: GraniteMoe + - local: model_doc/granitemoeshared + title: GraniteMoeShared - local: model_doc/granitevision title: GraniteVision - local: model_doc/helium @@ -483,6 +507,8 @@ title: Llama2 - local: model_doc/llama3 title: Llama3 + - local: model_doc/llama4 + title: Llama4 - local: model_doc/longformer title: Longformer - local: model_doc/longt5 @@ -511,6 +537,8 @@ title: MegatronGPT2 - local: model_doc/mistral title: Mistral + - local: model_doc/mistral3 + title: Mistral3 - local: model_doc/mixtral title: Mixtral - local: model_doc/mluke @@ -561,6 +589,8 @@ title: Phi - local: model_doc/phi3 title: Phi-3 + - local: model_doc/phi4_multimodal + title: Phi4 Multimodal - local: model_doc/phimoe title: PhiMoE - local: model_doc/phobert @@ -575,6 +605,10 @@ title: Qwen2 - local: model_doc/qwen2_moe title: Qwen2MoE + - local: model_doc/qwen3 + title: Qwen3 + - local: model_doc/qwen3_moe + title: Qwen3MoE - local: model_doc/rag title: RAG - local: model_doc/realm @@ -642,8 +676,7 @@ - local: model_doc/zamba2 title: Zamba2 title: Text models - - isExpanded: false - sections: + - sections: - local: model_doc/beit title: BEiT - local: model_doc/bit @@ -714,6 +747,8 @@ title: NAT - local: model_doc/poolformer title: PoolFormer + - local: model_doc/prompt_depth_anything + title: Prompt Depth Anything - local: model_doc/pvt title: Pyramid Vision Transformer (PVT) - local: model_doc/pvt_v2 @@ -771,8 +806,7 @@ - local: model_doc/zoedepth title: ZoeDepth title: Vision models - - isExpanded: false - sections: + - sections: - local: model_doc/audio-spectrogram-transformer title: Audio Spectrogram Transformer - local: model_doc/bark @@ -842,8 +876,7 @@ - local: model_doc/xlsr_wav2vec2 title: XLSR-Wav2Vec2 title: Audio models - - isExpanded: false - sections: + - sections: - local: model_doc/timesformer title: TimeSformer - local: model_doc/videomae @@ -851,14 +884,15 @@ - local: model_doc/vivit title: ViViT title: Video models - - isExpanded: false - sections: + - sections: - local: model_doc/align title: ALIGN - local: model_doc/altclip title: AltCLIP - local: model_doc/aria title: Aria + - local: model_doc/aya_vision + title: AyaVision - local: model_doc/blip title: BLIP - local: model_doc/blip-2 @@ -889,6 +923,8 @@ title: Emu3 - local: model_doc/flava title: FLAVA + - local: model_doc/gemma3 + title: Gemma3 - local: model_doc/git title: GIT - local: model_doc/got_ocr2 @@ -961,8 +997,14 @@ title: Qwen2VL - local: model_doc/sam title: Segment Anything + - local: model_doc/shieldgemma2 + title: ShieldGemma2 - local: model_doc/siglip title: SigLIP + - local: model_doc/siglip2 + title: SigLIP2 + - local: model_doc/smolvlm + title: SmolVLM - local: model_doc/speech-encoder-decoder title: Speech Encoder Decoder Models - local: model_doc/tapas @@ -990,15 +1032,13 @@ - local: model_doc/xclip title: X-CLIP title: Multimodal models - - isExpanded: false - sections: + - sections: - local: model_doc/decision_transformer title: Decision Transformer - local: model_doc/trajectory_transformer title: Trajectory Transformer title: Reinforcement learning models - - isExpanded: false - sections: + - sections: - local: model_doc/autoformer title: Autoformer - local: model_doc/informer @@ -1010,8 +1050,7 @@ - local: model_doc/time_series_transformer title: Time Series Transformer title: Time series models - - isExpanded: false - sections: + - sections: - local: model_doc/graphormer title: Graphormer title: Graph models @@ -1019,6 +1058,8 @@ - sections: - local: internal/modeling_utils title: Custom Layers and Utilities + - local: internal/model_debugging_utils + title: Utilities for Model Debugging - local: internal/pipelines_utils title: Utilities for pipelines - local: internal/tokenization_utils @@ -1035,5 +1076,5 @@ title: General Utilities - local: internal/time_series_utils title: Utilities for Time Series - title: Internal Helpers + title: Internal helpers title: API diff --git a/docs/source/en/accelerate.md b/docs/source/en/accelerate.md index e0a7a9c65623..c0ad46f8ac91 100644 --- a/docs/source/en/accelerate.md +++ b/docs/source/en/accelerate.md @@ -1,4 +1,4 @@ - -# Distributed training with 🤗 Accelerate +# Accelerate -As models get bigger, parallelism has emerged as a strategy for training larger models on limited hardware and accelerating training speed by several orders of magnitude. At Hugging Face, we created the [🤗 Accelerate](https://huggingface.co/docs/accelerate) library to help users easily train a 🤗 Transformers model on any type of distributed setup, whether it is multiple GPU's on one machine or multiple GPU's across several machines. In this tutorial, learn how to customize your native PyTorch training loop to enable training in a distributed environment. +[Accelerate](https://hf.co/docs/accelerate/index) is a library designed to simplify distributed training on any type of setup with PyTorch by uniting the most common frameworks ([Fully Sharded Data Parallel (FSDP)](https://pytorch.org/blog/introducing-pytorch-fully-sharded-data-parallel-api/) and [DeepSpeed](https://www.deepspeed.ai/)) for it into a single interface. [`Trainer`] is powered by Accelerate under the hood, enabling loading big models and distributed training. -## Setup - -Get started by installing 🤗 Accelerate: +This guide will show you two ways to use Accelerate with Transformers, using FSDP as the backend. The first method demonstrates distributed training with [`Trainer`], and the second method demonstrates adapting a PyTorch training loop. For more detailed information about Accelerate, please refer to the [documentation](https://hf.co/docs/accelerate/index). ```bash pip install accelerate ``` -Then import and create an [`~accelerate.Accelerator`] object. The [`~accelerate.Accelerator`] will automatically detect your type of distributed setup and initialize all the necessary components for training. You don't need to explicitly place your model on a device. +Start by running [accelerate config](https://hf.co/docs/accelerate/main/en/package_reference/cli#accelerate-config) in the command line to answer a series of prompts about your training system. This creates and saves a configuration file to help Accelerate correctly set up training based on your setup. -```py ->>> from accelerate import Accelerator +```bash +accelerate config +``` ->>> accelerator = Accelerator() +Depending on your setup and the answers you provide, an example configuration file for distributing training with FSDP on one machine with two GPUs may look like the following. + +```yaml +compute_environment: LOCAL_MACHINE +debug: false +distributed_type: FSDP +downcast_bf16: 'no' +fsdp_config: + fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP + fsdp_backward_prefetch_policy: BACKWARD_PRE + fsdp_forward_prefetch: false + fsdp_cpu_ram_efficient_loading: true + fsdp_offload_params: false + fsdp_sharding_strategy: FULL_SHARD + fsdp_state_dict_type: SHARDED_STATE_DICT + fsdp_sync_module_states: true + fsdp_transformer_layer_cls_to_wrap: BertLayer + fsdp_use_orig_params: true +machine_rank: 0 +main_training_function: main +mixed_precision: bf16 +num_machines: 1 +num_processes: 2 +rdzv_backend: static +same_network: true +tpu_env: [] +tpu_use_cluster: false +tpu_use_sudo: false +use_cpu: false ``` -## Prepare to accelerate +## Trainer -The next step is to pass all the relevant training objects to the [`~accelerate.Accelerator.prepare`] method. This includes your training and evaluation DataLoaders, a model and an optimizer: +Pass the path to the saved configuration file to [`TrainingArguments`], and from there, pass your [`TrainingArguments`] to [`Trainer`]. ```py ->>> train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare( -... train_dataloader, eval_dataloader, model, optimizer -... ) +from transformers import TrainingArguments, Trainer + +training_args = TrainingArguments( + output_dir="your-model", + learning_rate=2e-5, + per_device_train_batch_size=16, + per_device_eval_batch_size=16, + num_train_epochs=2, + fsdp_config="path/to/fsdp_config", + fsdp_strategy="full_shard", + weight_decay=0.01, + eval_strategy="epoch", + save_strategy="epoch", + load_best_model_at_end=True, + push_to_hub=True, +) + +trainer = Trainer( + model=model, + args=training_args, + train_dataset=dataset["train"], + eval_dataset=dataset["test"], + processing_class=tokenizer, + data_collator=data_collator, + compute_metrics=compute_metrics, +) + +trainer.train() ``` -## Backward +## Native PyTorch -The last addition is to replace the typical `loss.backward()` in your training loop with 🤗 Accelerate's [`~accelerate.Accelerator.backward`] method: +Accelerate can also be added to any PyTorch training loop to enable distributed training. The [`~accelerate.Accelerator`] is the main entry point for adapting your PyTorch code to work with Accelerate. It automatically detects your distributed training setup and initializes all the necessary components for training. You don't need to explicitly place your model on a device because [`~accelerate.Accelerator`] knows which device to move your model to. ```py ->>> for epoch in range(num_epochs): -... for batch in train_dataloader: -... outputs = model(**batch) -... loss = outputs.loss -... accelerator.backward(loss) - -... optimizer.step() -... lr_scheduler.step() -... optimizer.zero_grad() -... progress_bar.update(1) +from accelerate import Accelerator + +accelerator = Accelerator() +device = accelerator.device ``` -As you can see in the following code, you only need to add four additional lines of code to your training loop to enable distributed training! +All PyTorch objects (model, optimizer, scheduler, dataloaders) should be passed to the [`~accelerate.Accelerator.prepare`] method now. This method moves your model to the appropriate device or devices, adapts the optimizer and scheduler to use [`~accelerate.optimizer.AcceleratedOptimizer`] and [`~accelerate.scheduler.AcceleratedScheduler`], and creates a new shardable dataloader. -```diff -+ from accelerate import Accelerator - from transformers import AdamW, AutoModelForSequenceClassification, get_scheduler +```py +train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare( + train_dataloader, eval_dataloader, model, optimizer +) +``` -+ accelerator = Accelerator() +Replace `loss.backward` in your training loop with Accelerates [`~accelerate.Accelerator.backward`] method to scale the gradients and determine the appropriate `backward` method to use depending on your framework (for example, DeepSpeed or Megatron). - model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2) - optimizer = AdamW(model.parameters(), lr=3e-5) +```py +for epoch in range(num_epochs): + for batch in train_dataloader: + outputs = model(**batch) + loss = outputs.loss + accelerator.backward(loss) + optimizer.step() + lr_scheduler.step() + optimizer.zero_grad() + progress_bar.update(1) +``` -- device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") -- model.to(device) +Combine everything into a function and make it callable as a script. -+ train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare( -+ train_dataloader, eval_dataloader, model, optimizer -+ ) +```py +from accelerate import Accelerator + +def main(): + accelerator = Accelerator() - num_epochs = 3 - num_training_steps = num_epochs * len(train_dataloader) - lr_scheduler = get_scheduler( - "linear", - optimizer=optimizer, - num_warmup_steps=0, - num_training_steps=num_training_steps + model, optimizer, training_dataloader, scheduler = accelerator.prepare( + model, optimizer, training_dataloader, scheduler ) - progress_bar = tqdm(range(num_training_steps)) - - model.train() - for epoch in range(num_epochs): - for batch in train_dataloader: -- batch = {k: v.to(device) for k, v in batch.items()} - outputs = model(**batch) - loss = outputs.loss -- loss.backward() -+ accelerator.backward(loss) - - optimizer.step() - lr_scheduler.step() - optimizer.zero_grad() - progress_bar.update(1) + for batch in training_dataloader: + optimizer.zero_grad() + inputs, targets = batch + outputs = model(inputs) + loss = loss_function(outputs, targets) + accelerator.backward(loss) + optimizer.step() + scheduler.step() + +if __name__ == "__main__": + main() ``` -## Train - -Once you've added the relevant lines of code, launch your training in a script or a notebook like Colaboratory. +From the command line, call [accelerate launch](https://hf.co/docs/accelerate/main/en/package_reference/cli#accelerate-launch) to run your training script. Any additional arguments or parameters can be passed here as well. -### Train with a script - -If you are running your training from a script, run the following command to create and save a configuration file: - -```bash -accelerate config -``` - -Then launch your training with: +To launch your training script on two GPUs, add the `--num_processes` argument. ```bash -accelerate launch train.py -``` - -### Train with a notebook - -🤗 Accelerate can also run in a notebook if you're planning on using Colaboratory's TPUs. Wrap all the code responsible for training in a function, and pass it to [`~accelerate.notebook_launcher`]: - -```py ->>> from accelerate import notebook_launcher - ->>> notebook_launcher(training_function) +accelerate launch --num_processes=2 your_script.py ``` -For more information about 🤗 Accelerate and its rich features, refer to the [documentation](https://huggingface.co/docs/accelerate). +Refer to the [Launching Accelerate scripts](https://hf.co/docs/accelerate/main/en/basic_tutorials/launch) for more details. diff --git a/docs/source/en/add_new_model.md b/docs/source/en/add_new_model.md index 9aab36bb6fbe..419b1dced412 100644 --- a/docs/source/en/add_new_model.md +++ b/docs/source/en/add_new_model.md @@ -1,4 +1,4 @@ - -# How to add a model to 🤗 Transformers? +# Adding a new model to Transformers -The 🤗 Transformers library is often able to offer new models thanks to community contributors. But this can be a challenging project and requires an in-depth knowledge of the 🤗 Transformers library and the model to implement. At Hugging Face, we're trying to empower more of the community to actively add models and we've put together this guide to walk you through the process of adding a PyTorch model (make sure you have [PyTorch installed](https://pytorch.org/get-started/locally/)). +> [!TIP] +> Try adding new models with a more [modular](./modular_transformers) approach first. This makes it significantly easier to contribute a model to Transformers! -Along the way, you'll: +Many of the models in Transformers are contributed by developers and researchers. As an open-source first project, we're invested in empowering the community to actively and independently add more models. -- get insights into open-source best practices -- understand the design principles behind one of the most popular deep learning libraries -- learn how to efficiently test large models -- learn how to integrate Python utilities like `black`, `ruff`, and `make fix-copies` to ensure clean and readable code +When you add a model to Transformers, you'll learn: -A Hugging Face team member will be available to help you along the way so you'll never be alone. 🤗 ❤️ +- more about open-source best practices +- about a models architecture +- about Transformers' design principles +- how to efficiently test large models +- how to use Python utilities like [Black](https://black.readthedocs.io/en/stable/) and [Ruff](https://docs.astral.sh/ruff/) to create clean and readable code -To get started, open a [New model addition](https://github.com/huggingface/transformers/issues/new?assignees=&labels=New+model&template=new-model-addition.yml) issue for the model you want to see in 🤗 Transformers. If you're not especially picky about contributing a specific model, you can filter by the [New model label](https://github.com/huggingface/transformers/labels/New%20model) to see if there are any unclaimed model requests and work on it. +It is a challenging but rewarding process. -Once you've opened a new model request, the first step is to get familiar with 🤗 Transformers if you aren't already! +This guide will walk you through adding an example BrandNewLlama PyTorch model to Transformers. Before you begin, it is a good idea to familiarize yourself with the library. -## General overview of 🤗 Transformers +## Transformers overview -First, you should get a general overview of 🤗 Transformers. 🤗 Transformers is a very opinionated library, so there is a -chance that you don't agree with some of the library's philosophies or design choices. From our experience, however, we -found that the fundamental design choices and philosophies of the library are crucial to efficiently scale 🤗 -Transformers while keeping maintenance costs at a reasonable level. +Transformers is an opinionated library with its own unique philosophy and design choices. These choices help us sustainably scale and maintain Transformers. -A good first starting point to better understand the library is to read the [documentation of our philosophy](philosophy). As a result of our way of working, there are some choices that we try to apply to all models: +> [!TIP] +> Learn more about our design principles on the [Philosophy](./philosophy) doc. -- Composition is generally favored over-abstraction -- Duplicating code is not always bad if it strongly improves the readability or accessibility of a model -- Model files are as self-contained as possible so that when you read the code of a specific model, you ideally only - have to look into the respective `modeling_....py` file. +Some of these design choices are: -In our opinion, the library's code is not just a means to provide a product, *e.g.* the ability to use BERT for -inference, but also as the very product that we want to improve. Hence, when adding a model, the user is not only the -person who will use your model, but also everybody who will read, try to understand, and possibly tweak your code. +- composition > over-abstraction +- duplicate code isn't always bad if it greatly improves readability and accessibility +- model files are self-contained and all the necessary model code is found in the `modeling_mymodel.py` file -With this in mind, let's go a bit deeper into the general library design. +These design choices are important *for everyone* interacting with the model. It is easier to read, understand, and modify. -### Overview of models +This section describes how the model and configuration classes interact and the Transformers code style. -To successfully add a model, it is important to understand the interaction between your model and its config, -[`PreTrainedModel`], and [`PretrainedConfig`]. For exemplary purposes, we will -call the model to be added to 🤗 Transformers `BrandNewBert`. +### Model and configuration -Let's take a look: +All Transformers' models inherit from a base [`PreTrainedModel`] and [`PretrainedConfig`] class. The configuration is the models blueprint. - +There is never more than two levels of abstraction for any model to keep the code readable. The example model here, BrandNewLlama, inherits from `BrandNewLlamaPreTrainedModel` and [`PreTrainedModel`]. It is important that a new model only depends on [`PreTrainedModel`] so that it can use the [`~PreTrainedModel.from_pretrained`] and [`~PreTrainedModel.save_pretrained`] methods. -As you can see, we do make use of inheritance in 🤗 Transformers, but we keep the level of abstraction to an absolute -minimum. There are never more than two levels of abstraction for any model in the library. `BrandNewBertModel` -inherits from `BrandNewBertPreTrainedModel` which in turn inherits from [`PreTrainedModel`] and -that's it. As a general rule, we want to make sure that a new model only depends on -[`PreTrainedModel`]. The important functionalities that are automatically provided to every new -model are [`~PreTrainedModel.from_pretrained`] and -[`~PreTrainedModel.save_pretrained`], which are used for serialization and deserialization. All of the -other important functionalities, such as `BrandNewBertModel.forward` should be completely defined in the new -`modeling_brand_new_bert.py` script. Next, we want to make sure that a model with a specific head layer, such as -`BrandNewBertForMaskedLM` does not inherit from `BrandNewBertModel`, but rather uses `BrandNewBertModel` -as a component that can be called in its forward pass to keep the level of abstraction low. Every new model requires a -configuration class, called `BrandNewBertConfig`. This configuration is always stored as an attribute in -[`PreTrainedModel`], and thus can be accessed via the `config` attribute for all classes -inheriting from `BrandNewBertPreTrainedModel`: +Other important functions like the forward method are defined in the `modeling.py` file. -```python -model = BrandNewBertModel.from_pretrained("brandy/brand_new_bert") -model.config # model has access to its config +Specific model heads (for example, sequence classification or language modeling) should call the base model in the forward pass rather than inheriting from it to keep abstraction low. + +New models require a configuration, for example `BrandNewLlamaConfig`, that is stored as an attribute of [`PreTrainedModel`]. + +```py +model = BrandNewLlamaModel.from_pretrained("username/brand_new_llama") +model.config ``` -Similar to the model, the configuration inherits basic serialization and deserialization functionalities from -[`PretrainedConfig`]. Note that the configuration and the model are always serialized into two -different formats - the model to a *pytorch_model.bin* file and the configuration to a *config.json* file. Calling -the model's [`~PreTrainedModel.save_pretrained`] will automatically call -the config's [`~PretrainedConfig.save_pretrained`], so that both model and configuration are saved. +[`PretrainedConfig`] provides the [`~PretrainedConfig.from_pretrained`] and [`~PretrainedConfig.save_pretrained`] methods. + +When you use [`PreTrainedModel.save_pretrained`], it automatically calls [`PretrainedConfig.save_pretrained`] so that both the model and configuration are saved together. +A model is saved to a `model.safetensors` file and a configuration is saved to a `config.json` file. ### Code style -When coding your new model, keep in mind that Transformers is an opinionated library and we have a few quirks of our -own regarding how code should be written :-) - -1. The forward pass of your model should be fully written in the modeling file while being fully independent of other - models in the library. If you want to reuse a block from another model, copy the code and paste it with a - `# Copied from` comment on top (see [here](https://github.com/huggingface/transformers/blob/v4.17.0/src/transformers/models/roberta/modeling_roberta.py#L160) - for a good example and [there](pr_checks#check-copies) for more documentation on Copied from). -2. The code should be fully understandable, even by a non-native English speaker. This means you should pick - descriptive variable names and avoid abbreviations. As an example, `activation` is preferred to `act`. - One-letter variable names are strongly discouraged unless it's an index in a for loop. -3. More generally we prefer longer explicit code to short magical one. -4. Avoid subclassing `nn.Sequential` in PyTorch but subclass `nn.Module` and write the forward pass, so that anyone - using your code can quickly debug it by adding print statements or breaking points. -5. Your function signature should be type-annotated. For the rest, good variable names are way more readable and - understandable than type annotations. - -### Overview of tokenizers - -Not quite ready yet :-( This section will be added soon! - -## Step-by-step recipe to add a model to 🤗 Transformers - -Everyone has different preferences of how to port a model so it can be very helpful for you to take a look at summaries -of how other contributors ported models to Hugging Face. Here is a list of community blog posts on how to port a model: - -1. [Porting GPT2 Model](https://medium.com/huggingface/from-tensorflow-to-pytorch-265f40ef2a28) by [Thomas](https://huggingface.co/thomwolf) -2. [Porting WMT19 MT Model](https://huggingface.co/blog/porting-fsmt) by [Stas](https://huggingface.co/stas) - -From experience, we can tell you that the most important things to keep in mind when adding a model are: - -- Don't reinvent the wheel! Most parts of the code you will add for the new 🤗 Transformers model already exist - somewhere in 🤗 Transformers. Take some time to find similar, already existing models and tokenizers you can copy - from. [grep](https://www.gnu.org/software/grep/) and [rg](https://github.com/BurntSushi/ripgrep) are your - friends. Note that it might very well happen that your model's tokenizer is based on one model implementation, and - your model's modeling code on another one. *E.g.* FSMT's modeling code is based on BART, while FSMT's tokenizer code - is based on XLM. -- It's more of an engineering challenge than a scientific challenge. You should spend more time creating an - efficient debugging environment rather than trying to understand all theoretical aspects of the model in the paper. -- Ask for help, when you're stuck! Models are the core component of 🤗 Transformers so we at Hugging Face are more - than happy to help you at every step to add your model. Don't hesitate to ask if you notice you are not making - progress. - -In the following, we try to give you a general recipe that we found most useful when porting a model to 🤗 Transformers. - -The following list is a summary of everything that has to be done to add a model and can be used by you as a To-Do -List: - -☐ (Optional) Understood the model's theoretical aspects
-☐ Prepared 🤗 Transformers dev environment
-☐ Set up debugging environment of the original repository
-☐ Created script that successfully runs the `forward()` pass using the original repository and checkpoint
-☐ Successfully added the model skeleton to 🤗 Transformers
-☐ Successfully converted original checkpoint to 🤗 Transformers checkpoint
-☐ Successfully ran `forward()` pass in 🤗 Transformers that gives identical output to original checkpoint
-☐ Finished model tests in 🤗 Transformers
-☐ Successfully added tokenizer in 🤗 Transformers
-☐ Run end-to-end integration tests
-☐ Finished docs
-☐ Uploaded model weights to the Hub
-☐ Submitted the pull request
-☐ (Optional) Added a demo notebook - -To begin with, we usually recommend starting by getting a good theoretical understanding of `BrandNewBert`. However, -if you prefer to understand the theoretical aspects of the model *on-the-job*, then it is totally fine to directly dive -into the `BrandNewBert`'s code-base. This option might suit you better if your engineering skills are better than -your theoretical skill, if you have trouble understanding `BrandNewBert`'s paper, or if you just enjoy programming -much more than reading scientific papers. - -### 1. (Optional) Theoretical aspects of BrandNewBert - -You should take some time to read *BrandNewBert's* paper, if such descriptive work exists. There might be large -sections of the paper that are difficult to understand. If this is the case, this is fine - don't worry! The goal is -not to get a deep theoretical understanding of the paper, but to extract the necessary information required to -effectively re-implement the model in 🤗 Transformers. That being said, you don't have to spend too much time on the -theoretical aspects, but rather focus on the practical ones, namely: - -- What type of model is *brand_new_bert*? BERT-like encoder-only model? GPT2-like decoder-only model? BART-like - encoder-decoder model? Look at the [model_summary](model_summary) if you're not familiar with the differences between those. -- What are the applications of *brand_new_bert*? Text classification? Text generation? Seq2Seq tasks, *e.g.,* - summarization? -- What is the novel feature of the model that makes it different from BERT/GPT-2/BART? -- Which of the already existing [🤗 Transformers models](https://huggingface.co/transformers/#contents) is most - similar to *brand_new_bert*? -- What type of tokenizer is used? A sentencepiece tokenizer? Word piece tokenizer? Is it the same tokenizer as used - for BERT or BART? - -After you feel like you have gotten a good overview of the architecture of the model, you might want to write to the -Hugging Face team with any questions you might have. This might include questions regarding the model's architecture, -its attention layer, etc. We will be more than happy to help you. - -### 2. Next prepare your environment - -1. Fork the [repository](https://github.com/huggingface/transformers) by clicking on the ‘Fork' button on the - repository's page. This creates a copy of the code under your GitHub user account. - -2. Clone your `transformers` fork to your local disk, and add the base repository as a remote: - - ```bash - git clone https://github.com/[your Github handle]/transformers.git - cd transformers - git remote add upstream https://github.com/huggingface/transformers.git - ``` - -3. Set up a development environment, for instance by running the following command: - - ```bash - python -m venv .env - source .env/bin/activate - pip install -e ".[dev]" - ``` - - Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a - failure with this command. If that's the case make sure to install the Deep Learning framework you are working with - (PyTorch, TensorFlow and/or Flax) then do: - - ```bash - pip install -e ".[quality]" - ``` - - which should be enough for most use cases. You can then return to the parent directory - - ```bash - cd .. - ``` - -4. We recommend adding the PyTorch version of *brand_new_bert* to Transformers. To install PyTorch, please follow the - instructions on https://pytorch.org/get-started/locally/. - - **Note:** You don't need to have CUDA installed. Making the new model work on CPU is sufficient. - -5. To port *brand_new_bert*, you will also need access to its original repository: - - ```bash - git clone https://github.com/org_that_created_brand_new_bert_org/brand_new_bert.git - cd brand_new_bert - pip install -e . - ``` - -Now you have set up a development environment to port *brand_new_bert* to 🤗 Transformers. - -### 3.-4. Run a pretrained checkpoint using the original repository - -At first, you will work on the original *brand_new_bert* repository. Often, the original implementation is very -“researchy”. Meaning that documentation might be lacking and the code can be difficult to understand. But this should -be exactly your motivation to reimplement *brand_new_bert*. At Hugging Face, one of our main goals is to *make people -stand on the shoulders of giants* which translates here very well into taking a working model and rewriting it to make -it as **accessible, user-friendly, and beautiful** as possible. This is the number-one motivation to re-implement -models into 🤗 Transformers - trying to make complex new NLP technology accessible to **everybody**. - -You should start thereby by diving into the original repository. - -Successfully running the official pretrained model in the original repository is often **the most difficult** step. -From our experience, it is very important to spend some time getting familiar with the original code-base. You need to -figure out the following: - -- Where to find the pretrained weights? -- How to load the pretrained weights into the corresponding model? -- How to run the tokenizer independently from the model? -- Trace one forward pass so that you know which classes and functions are required for a simple forward pass. Usually, - you only have to reimplement those functions. -- Be able to locate the important components of the model: Where is the model's class? Are there model sub-classes, - *e.g.* EncoderModel, DecoderModel? Where is the self-attention layer? Are there multiple different attention layers, - *e.g.* *self-attention*, *cross-attention*...? -- How can you debug the model in the original environment of the repo? Do you have to add *print* statements, can you - work with an interactive debugger like *ipdb*, or should you use an efficient IDE to debug the model, like PyCharm? +Transformers prefers a clean and readable code over a more abstracted code style. Some of the code style choices include: -It is very important that before you start the porting process, you can **efficiently** debug code in the original -repository! Also, remember that you are working with an open-source library, so do not hesitate to open an issue, or -even a pull request in the original repository. The maintainers of this repository are most likely very happy about -someone looking into their code! - -At this point, it is really up to you which debugging environment and strategy you prefer to use to debug the original -model. We strongly advise against setting up a costly GPU environment, but simply work on a CPU both when starting to -dive into the original repository and also when starting to write the 🤗 Transformers implementation of the model. Only -at the very end, when the model has already been successfully ported to 🤗 Transformers, one should verify that the -model also works as expected on GPU. +- The code should be accessible to non-English users. Pick descriptive variable names and avoid abbreviations. For example, "activation" is preferred over "act". One letter variables names are highly discouraged unless it's an index in a for loop. -In general, there are two possible debugging environments for running the original model +- Explicit code is preferred - even if it's longer - over shorter code. -- [Jupyter notebooks](https://jupyter.org/) / [google colab](https://colab.research.google.com/notebooks/intro.ipynb) -- Local python scripts. +- Avoid subclassing [nn.Sequential](https://pytorch.org/docs/stable/generated/torch.nn.Sequential.html). Subclass [nn.Module](https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module) instead so the code can be quickly debugged with print statements or breakpoints. -Jupyter notebooks have the advantage that they allow for cell-by-cell execution which can be helpful to better split -logical components from one another and to have faster debugging cycles as intermediate results can be stored. Also, -notebooks are often easier to share with other contributors, which might be very helpful if you want to ask the Hugging -Face team for help. If you are familiar with Jupyter notebooks, we strongly recommend you work with them. +- Function signatures should be type-annotated. Otherwise, use good variable names so they're more understandable. -The obvious disadvantage of Jupyter notebooks is that if you are not used to working with them you will have to spend -some time adjusting to the new programming environment and you might not be able to use your known debugging tools -anymore, like `ipdb`. +## New model addition issue -For each code-base, a good first step is always to load a **small** pretrained checkpoint and to be able to reproduce a -single forward pass using a dummy integer vector of input IDs as an input. Such a script could look like this (in -pseudocode): +Open a [New model addition](https://github.com/huggingface/transformers/issues/new?assignees=&labels=New+model&template=new-model-addition.yml) issue to add a specific model. -```python -model = BrandNewBertModel.load_pretrained_checkpoint("/path/to/checkpoint/") -input_ids = [0, 4, 5, 2, 3, 7, 9] # vector of input ids -original_output = model.predict(input_ids) -``` - -Next, regarding the debugging strategy, there are generally a few from which to choose from: +> [!TIP] +> Filter by the [New model](https://github.com/huggingface/transformers/labels/New%20model) label on GitHub to view and add any existing model requests. -- Decompose the original model into many small testable components and run a forward pass on each of those for - verification -- Decompose the original model only into the original *tokenizer* and the original *model*, run a forward pass on - those, and use intermediate print statements or breakpoints for verification +Now is a good time to get familiar with BrandNewLlama. It is helpful to read a models research paper to understand its technical design and implementation. You don't necessarily have to worry too much about the theoretical details. Instead, focus on the practical ones. Use the questions below to guide your reading. -Again, it is up to you which strategy to choose. Often, one or the other is advantageous depending on the original code -base. +- What type of model is BrandNewLlama? Is it a encoder, decoder, or encoder-decoder model? +- What tasks can BrandNewLlama be used for? +- What makes BrandNewLlama different from other models? +- What models in Transformers are most similar to BrandNewLlama? +- What tokenizer does BrandNewLlama use? -If the original code-base allows you to decompose the model into smaller sub-components, *e.g.* if the original -code-base can easily be run in eager mode, it is usually worth the effort to do so. There are some important advantages -to taking the more difficult road in the beginning: +In addition to learning more about your model, use the tips below to help you add a model faster. -- at a later stage when comparing the original model to the Hugging Face implementation, you can verify automatically - for each component individually that the corresponding component of the 🤗 Transformers implementation matches instead - of relying on visual comparison via print statements -- it can give you some rope to decompose the big problem of porting a model into smaller problems of just porting - individual components and thus structure your work better -- separating the model into logical meaningful components will help you to get a better overview of the model's design - and thus to better understand the model -- at a later stage those component-by-component tests help you to ensure that no regression occurs as you continue - changing your code +> [!TIP] +> Each contributor has a unique style and workflow for adding models to Transformers. For an example, take a look at how [Gemma](https://github.com/huggingface/transformers/pull/29167) was added. -[Lysandre's](https://gist.github.com/LysandreJik/db4c948f6b4483960de5cbac598ad4ed) integration checks for ELECTRA -gives a nice example of how this can be done. +- Don't reinvent the wheel! Take your time to explore existing models and tokenizers to see what you can copy and reuse. [Grep](https://www.gnu.org/software/grep/) and [ripgrep](https://github.com/BurntSushi/ripgrep) are great tools for this. +- This is more of an engineering than a science challenge. Focus on the more practical (setting up an efficient debugging environment for example) instead of the theorertical aspects of the model. +- Don't be shy to ask for help! We are here to support you. 🤗 -However, if the original code-base is very complex or only allows intermediate components to be run in a compiled mode, -it might be too time-consuming or even impossible to separate the model into smaller testable sub-components. A good -example is [T5's MeshTensorFlow](https://github.com/tensorflow/mesh/tree/master/mesh_tensorflow) library which is -very complex and does not offer a simple way to decompose the model into its sub-components. For such libraries, one -often relies on verifying print statements. +## Dev environment -No matter which strategy you choose, the recommended procedure is often the same that you should start to debug the -starting layers first and the ending layers last. +Click on the **Fork** button on the [Transformers](https://github.com/huggingface/transformers) repository to create your own copy to work on. Clone the repository to your local disk and add the base repository as the remote. -It is recommended that you retrieve the output, either by print statements or sub-component functions, of the following -layers in the following order: +```bash +git clone https://github.com/[your Github handle]/transformers.git +cd transformers +git remote add upstream https://github.com/huggingface/transformers.git +``` -1. Retrieve the input IDs passed to the model -2. Retrieve the word embeddings -3. Retrieve the input of the first Transformer layer -4. Retrieve the output of the first Transformer layer -5. Retrieve the output of the following n - 1 Transformer layers -6. Retrieve the output of the whole BrandNewBert Model +Create a virtual environment and perform an [editable install](./installation#editable-install) of the library with the "dev" or development dependencies. -Input IDs should thereby consists of an array of integers, *e.g.* `input_ids = [0, 4, 4, 3, 2, 4, 1, 7, 19]` +```bash +python -m venv .env +source .env/bin/activate +pip install -e ".[dev]" +``` -The outputs of the following layers often consist of multi-dimensional float arrays and can look like this: +Due to the number of optional dependencies as Transformers grows, this command may fail. In this case, install the "quality" dependencies. Also make sure you have a deep learning framework installed. +```bash +pip install -e ".[quality]" ``` -[[ - [-0.1465, -0.6501, 0.1993, ..., 0.1451, 0.3430, 0.6024], - [-0.4417, -0.5920, 0.3450, ..., -0.3062, 0.6182, 0.7132], - [-0.5009, -0.7122, 0.4548, ..., -0.3662, 0.6091, 0.7648], - ..., - [-0.5613, -0.6332, 0.4324, ..., -0.3792, 0.7372, 0.9288], - [-0.5416, -0.6345, 0.4180, ..., -0.3564, 0.6992, 0.9191], - [-0.5334, -0.6403, 0.4271, ..., -0.3339, 0.6533, 0.8694]]], + +Return to the parent directory and clone and install the original BrandNewLlama repository. + +```bash +git clone https://github.com/org_that_created_brand_new_llama_org/brand_new_llama.git +cd brand_new_bert +pip install -e . ``` -We expect that every model added to 🤗 Transformers passes a couple of integration tests, meaning that the original -model and the reimplemented version in 🤗 Transformers have to give the exact same output up to a precision of 0.001! -Since it is normal that the exact same model written in different libraries can give a slightly different output -depending on the library framework, we accept an error tolerance of 1e-3 (0.001). It is not enough if the model gives -nearly the same output, they have to be almost identical. Therefore, you will certainly compare the intermediate -outputs of the 🤗 Transformers version multiple times against the intermediate outputs of the original implementation of -*brand_new_bert* in which case an **efficient** debugging environment of the original repository is absolutely -important. Here is some advice to make your debugging environment as efficient as possible. - -- Find the best way of debugging intermediate results. Is the original repository written in PyTorch? Then you should - probably take the time to write a longer script that decomposes the original model into smaller sub-components to - retrieve intermediate values. Is the original repository written in Tensorflow 1? Then you might have to rely on - TensorFlow print operations like [tf.print](https://www.tensorflow.org/api_docs/python/tf/print) to output - intermediate values. Is the original repository written in Jax? Then make sure that the model is **not jitted** when - running the forward pass, *e.g.* check-out [this link](https://github.com/google/jax/issues/196). -- Use the smallest pretrained checkpoint you can find. The smaller the checkpoint, the faster your debug cycle - becomes. It is not efficient if your pretrained model is so big that your forward pass takes more than 10 seconds. - In case only very large checkpoints are available, it might make more sense to create a dummy model in the new - environment with randomly initialized weights and save those weights for comparison with the 🤗 Transformers version - of your model -- Make sure you are using the easiest way of calling a forward pass in the original repository. Ideally, you want to - find the function in the original repository that **only** calls a single forward pass, *i.e.* that is often called - `predict`, `evaluate`, `forward` or `__call__`. You don't want to debug a function that calls `forward` - multiple times, *e.g.* to generate text, like `autoregressive_sample`, `generate`. -- Try to separate the tokenization from the model's *forward* pass. If the original repository shows examples where - you have to input a string, then try to find out where in the forward call the string input is changed to input ids - and start from this point. This might mean that you have to possibly write a small script yourself or change the - original code so that you can directly input the ids instead of an input string. -- Make sure that the model in your debugging setup is **not** in training mode, which often causes the model to yield - random outputs due to multiple dropout layers in the model. Make sure that the forward pass in your debugging - environment is **deterministic** so that the dropout layers are not used. Or use *transformers.utils.set_seed* - if the old and new implementations are in the same framework. - -The following section gives you more specific details/tips on how you can do this for *brand_new_bert*. - -### 5.-14. Port BrandNewBert to 🤗 Transformers - -Next, you can finally start adding new code to 🤗 Transformers. Go into the clone of your 🤗 Transformers' fork: +Return to your clone of Transformers to begin porting BrandNewLlama. ```bash cd transformers ``` -In the special case that you are adding a model whose architecture exactly matches the model architecture of an -existing model you only have to add a conversion script as described in [this section](#write-a-conversion-script). -In this case, you can just re-use the whole model architecture of the already existing model. +There are two possible debugging environments for running the original model, a notebook ([Google Colab](https://colab.research.google.com/notebooks/intro.ipynb) or [Jupyter](https://jupyter.org/)) or a local Python script. -Otherwise, let's start generating a new model. We recommend using the following script to add a model starting from -an existing model: +> [!WARNING] +> We don't recommend setting up a GPU environment to run the original model because it can be expensive. Instead, work in a CPU environment first to verify the model works in Transformers. Once it does, then you can verify it on a GPU. + +Notebooks are great for executing code cell-by-cell which can help split logical components from one another. It can also accelerate debugging cycles because intermediate results can be stored. You can also share notebooks when working with other contributors. + +The downside is that if you aren't used to them, it may take some time to get used to. + +> [!TIP] +> If the model architecture is identical to an existing model, skip ahead to add a [conversion script](#conversion-script), because you can reuse the architecture of the existing model. + +Run the command below to start and complete the questionnaire with some basic information about the new model. This command jumpstarts the process by automatically generating some model code that you'll need to adapt. ```bash transformers-cli add-new-model-like ``` -You will be prompted with a questionnaire to fill in the basic information of your model. +## Create a pull request -**Open a Pull Request on the main huggingface/transformers repo** +Before you start adapting the code, create a pull request to track your progress and get feedback from the Transformers team. Title your pull request **[WIP] Add BrandNewLlama** so it's clear that this is a work in progress. -Before starting to adapt the automatically generated code, now is the time to open a “Work in progress (WIP)” pull -request, *e.g.* “[WIP] Add *brand_new_bert*”, in 🤗 Transformers so that you and the Hugging Face team can work -side-by-side on integrating the model into 🤗 Transformers. +Create a branch with a descriptive name from your main branch. -You should do the following: +```bash +git checkout -b add_brand_new_bert +``` -1. Create a branch with a descriptive name from your main branch +Commit the code, and then fetch and rebase on the main branch. - ```bash - git checkout -b add_brand_new_bert - ``` +```bash +git add . +git commit +git fetch upstream +git rebase upstream/main +``` -2. Commit the automatically generated code: +Push any changes to your branch and click on **Compare & pull request** to open a pull request on GitHub. Open the pull request as a *draft* to indicate it's a work in progress. - ```bash - git add . - git commit - ``` +```bash +git push -u origin a-descriptive-name-for-my-changes +``` -3. Fetch and rebase to current main +Include relevant Hugging Face team members by adding their GitHub handles in the pull request for questions, feedback, comments, and reviews. Direct team members to specific parts of the code you want by clicking on the **Files changed** tab, and then clicking on **+** to the left of the line number to add a comment. When a question or problem is solved, click on **Resolve** to indicate the issue is resolved. This keeps the conversation organized and clean. - ```bash - git fetch upstream - git rebase upstream/main - ``` +Remember to periodically commit and push your work, and update your work with the current main branch. -4. Push the changes to your account using: +```bash +git fetch upstream +git merge upstream/main +``` - ```bash - git push -u origin a-descriptive-name-for-my-changes - ``` +## Original checkpoint -5. Once you are satisfied, go to the webpage of your fork on GitHub. Click on “Pull request”. Make sure to add the - GitHub handle of some members of the Hugging Face team as reviewers, so that the Hugging Face team gets notified for - future changes. +Take some time to work on the original model implementation first to understand how it works. -6. Change the PR into a draft by clicking on “Convert to draft” on the right of the GitHub pull request web page. +This can be difficult if the original model repository is lacking documentation or if the codebase is complex. But you should use this as your motivation to implement the model in Transformers. Your contribution makes it more accessible and user-friendly to everyone! -In the following, whenever you have made some progress, don't forget to commit your work and push it to your account so -that it shows in the pull request. Additionally, you should make sure to update your work with the current main from -time to time by doing: +Orient yourself with the original repository by doing the following. -```bash -git fetch upstream -git merge upstream/main +- Locate the pretrained weights. +- Figure out how to the load pretrained weights into the model. +- Figure out how to run the tokenizer independently of the model. +- Trace one forward pass to understand which classes and functions are required. These are probably the only classes and functions you'll have to implement. +- Locate all the important components (model class, model subclasses, self-attention layer, etc.) of the model. +- Figure out how to debug the model in the original repository. Add print statements, use interactive debuggers like [ipdb](https://github.com/gotcha/ipdb), or a efficient integrated development environment (IDE) like [PyCharm](https://www.jetbrains.com/pycharm/). + +The last point is especially important because you'll need a thorough understanding of what's happening inside the original model before you can reimplement it in Transformers. Feel free to open issues and pull requests in the original repository if you encounter any issues. + +A good first step is to load a *small* pretrained checkpoint and try to reproduce a single forward pass with an example integer vector of inputs. For example, in pseudocode, this could look like the following. + +```py +model = BrandNewLlamaModel.load_pretrained_checkpoint("/path/to/checkpoint/") +input_ids = [0, 4, 5, 2, 3, 7, 9] # vector of input ids +original_output = model.generate(input_ids) ``` -In general, all questions you might have regarding the model or your implementation should be asked in your PR and -discussed/solved in the PR. This way, the Hugging Face team will always be notified when you are committing new code or -if you have a question. It is often very helpful to point the Hugging Face team to your added code so that the Hugging -Face team can efficiently understand your problem or question. +### Debugging -To do so, you can go to the “Files changed” tab where you see all of your changes, go to a line regarding which you -want to ask a question, and click on the “+” symbol to add a comment. Whenever a question or problem has been solved, -you can click on the “Resolve” button of the created comment. +If you run into issues, you'll need to choose one of the following debugging strategies depending on the original models codebase. -In the same way, the Hugging Face team will open comments when reviewing your code. We recommend asking most questions -on GitHub on your PR. For some very general questions that are not very useful for the public, feel free to ping the -Hugging Face team by Slack or email. + + -**5. Adapt the generated models code for brand_new_bert** +This strategy relies on breaking the original model into smaller sub-components, such as when the code can be easily run in eager mode. While more difficult, there are some advantages to this approach. -At first, we will focus only on the model itself and not care about the tokenizer. All the relevant code should be -found in the generated files `src/transformers/models/brand_new_bert/modeling_brand_new_bert.py` and -`src/transformers/models/brand_new_bert/configuration_brand_new_bert.py`. +1. It is easier later to compare the original model to your implementation. You can automatically verify that each individual component matches its corresponding component in the Transformers' implementation. This is better than relying on a visual comparison based on print statements. +2. It is easier to port individual components instead of the entire model. +3. It is easier for understanding how a model works by breaking it up into smaller parts. +4. It is easier to prevent regressions at a later stage when you change your code thanks to component-by-component tests. -Now you can finally start coding :). The generated code in -`src/transformers/models/brand_new_bert/modeling_brand_new_bert.py` will either have the same architecture as BERT if -it's an encoder-only model or BART if it's an encoder-decoder model. At this point, you should remind yourself what -you've learned in the beginning about the theoretical aspects of the model: *How is the model different from BERT or -BART?*". Implement those changes which often means changing the *self-attention* layer, the order of the normalization -layer, etc… Again, it is often useful to look at the similar architecture of already existing models in Transformers to -get a better feeling of how your model should be implemented. +> [!TIP] +> Refer to the ELECTRA [integration checks](https://gist.github.com/LysandreJik/db4c948f6b4483960de5cbac598ad4ed) for a good example of how to decompose a model into smaller components. -**Note** that at this point, you don't have to be very sure that your code is fully correct or clean. Rather, it is -advised to add a first *unclean*, copy-pasted version of the original code to -`src/transformers/models/brand_new_bert/modeling_brand_new_bert.py` until you feel like all the necessary code is -added. From our experience, it is much more efficient to quickly add a first version of the required code and -improve/correct the code iteratively with the conversion script as described in the next section. The only thing that -has to work at this point is that you can instantiate the 🤗 Transformers implementation of *brand_new_bert*, *i.e.* the -following command should work: + + -```python -from transformers import BrandNewBertModel, BrandNewBertConfig +This strategy is viable when the original codebase is too complex, only allows intermediate components to be run in compiled mode, or if it's too time-consuming (maybe even impossible) to separate the model into smaller sub-components. + +For example, the MeshTensorFlow implementation of [T5](https://github.com/tensorflow/mesh/tree/master/mesh_tensorflow) is too complex and doesn't offer a simple way to decompose the model into its sub-components. In this situation, you'll have to rely on verifying print statements. -model = BrandNewBertModel(BrandNewBertConfig()) + + + +Whichever strategy you choose, it is recommended to debug the initial layers first and the final layers last. Retrieve the output, either with print statements or sub-component functions, of the following layers in this order. + +1. input ids passed to the model +2. word embeddings +3. input of the first Transformer layer +4. output of the first Transformer layer +5. output of the following n-1 Transformer layers +6. output of the whole model + +The input ids should just be an array of integers like `input_ids = [0, 4, 4, 3, 2, 4, 1, 7, 19]`. + +Layer outputs often consist of multi-dimensional float arrays. + +```py +[[ + [-0.1465, -0.6501, 0.1993, ..., 0.1451, 0.3430, 0.6024], + [-0.4417, -0.5920, 0.3450, ..., -0.3062, 0.6182, 0.7132], + [-0.5009, -0.7122, 0.4548, ..., -0.3662, 0.6091, 0.7648], + ..., + [-0.5613, -0.6332, 0.4324, ..., -0.3792, 0.7372, 0.9288], + [-0.5416, -0.6345, 0.4180, ..., -0.3564, 0.6992, 0.9191], + [-0.5334, -0.6403, 0.4271, ..., -0.3339, 0.6533, 0.8694]]], ``` -The above command will create a model according to the default parameters as defined in `BrandNewBertConfig()` with -random weights, thus making sure that the `init()` methods of all components works. +Every Transformers model output should have a precision or error tolerance of *1e-3*. This accounts for any output differences that arise from using a different library framework. Compare the intermediate outputs of the original model with the Transformers implementation to ensure they're nearly identical. Having an *efficient* debugging environment is crucial for this step. + +Here are some tips for an efficient debugging environment. + +- To debug intermediate results, it depends on the machine learning framework the original model repository is using. For PyTorch, you should write a script to decompose the original model into smaller sub-components to retrieve the intermediate values. For TensorFlow, you may need to use [tf.print](https://www.tensorflow.org/api_docs/python/tf/print). For Flax, make sure the model is *not jitted* during the forward pass (refer to this GitHub [Issue](https://github.com/google/jax/issues/196) for more details). + +- It is faster to debug with a smaller pretrained checkpoint versus a larger checkpoint where the forward pass takes more than 10 seconds. If only large checkpoints are available, create a dummy model with randomly initialized weights and save those weights to compare against the Transformers implementation. + +- Find the easiest way to call the model's forward pass. Ideally, this function (may be called `predict`, `evaluate`, `forward`, or `__call__`) should only call the forward pass *once*. It is more difficult to debug a function that calls the forward pass multiple times. + +- Separate tokenization from the forward pass. Locate where a string input is changed to input ids in the forward pass and start here. You may need to create a small script or modify the original code to directly input the input ids instead of an input string. + +- Ensure the model is *not* in training mode. This can produce random outputs due to multiple dropout layers in a model. The forward pass in your debugging environment should be *deterministic* so that the dropout layers aren't used. + +Once you're able to run the original checkpoint, you're ready to start adapting the model code for Transformers. + +## Adapt the model code + +The `transformers-cli add-new-model-like` command should have generated a model and configuration file. + +- `src/transformers/models/brand_new_llama/modeling_brand_new_llama.py` +- `src/transformers/models/brand_new_llama/configuration_brand_new_llama.py` + +The automatically generated code in the `modeling.py` file has the same architecture as Llama if you answered it's a decoder-only model or it will have the same architecture as BART if you answered it's an encoder-decoder model. The generated code is just a starting point. Based on your research on the new model, you'll need to implement those specific changes by adapting the generated code. This may involve changes to the self-attention layer, the order of the normalization layer, and so on. + +### Model initialization + +At this point, your code doesn't have to be clean or even fully correct, It is more efficient to quickly create a first draft and then iteratively improve on it. The most important thing is that your model can be instantiated from Transformers. The command below creates a model from the configuration with random weights, verifying that the `__init__` method works. + +```py +from transformers import BrandNewLlama, BrandNewLlamaConfig +model = BrandNewLlama(BrandNewLlamaConfig()) +``` -Note that all random initialization should happen in the `_init_weights` method of your `BrandnewBertPreTrainedModel` -class. It should initialize all leaf modules depending on the variables of the config. Here is an example with the -BERT `_init_weights` method: +Random initialization occurs in the `_init_weights` method of `BrandNewLlamaPreTrainedModel`. All leaf modules are initialized depending on the configuration's variables. ```py def _init_weights(self, module): @@ -520,9 +326,9 @@ def _init_weights(self, module): module.weight.data.fill_(1.0) ``` -You can have some more custom schemes if you need a special initialization for some modules. For instance, in -`Wav2Vec2ForPreTraining`, the last two linear layers need to have the initialization of the regular PyTorch `nn.Linear` -but all the other ones should use an initialization as above. This is coded like this: +The initialization scheme can look different if you need to adapt it to your model. For example, [`Wav2Vec2ForPreTraining`] initializes [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) in its last two linear layers. + +The `_is_hf_initialized` flag makes sure the submodule is only initialized once. Setting `module.project_q` and `module.project_hid` to `True` ensures the custom initialization is not overridden later. The `_init_weights` function won't be applied to these modules. ```py def _init_weights(self, module): @@ -538,30 +344,34 @@ def _init_weights(self, module): module.bias.data.zero_() ``` -The `_is_hf_initialized` flag is internally used to make sure we only initialize a submodule once. By setting it to -`True` for `module.project_q` and `module.project_hid`, we make sure the custom initialization we did is not overridden later on, -the `_init_weights` function won't be applied to them. +### Convert checkpoints to Transformers -**6. Write a conversion script** +The original checkpoint must be converted to a Transformers compatible checkpoint. -Next, you should write a conversion script that lets you convert the checkpoint you used to debug *brand_new_bert* in -the original repository to a checkpoint compatible with your just created 🤗 Transformers implementation of -*brand_new_bert*. It is not advised to write the conversion script from scratch, but rather to look through already -existing conversion scripts in 🤗 Transformers for one that has been used to convert a similar model that was written in -the same framework as *brand_new_bert*. Usually, it is enough to copy an already existing conversion script and -slightly adapt it for your use case. Don't hesitate to ask the Hugging Face team to point you to a similar already -existing conversion script for your model. +> [!TIP] +> Try looking for an existing conversion script to copy, adapt, and reuse for your model! +> +> - If you're porting a model from TensorFlow to PyTorch, a good starting point may be the BERT [conversion script](https://github.com/huggingface/transformers/blob/7acfa95afb8194f8f9c1f4d2c6028224dbed35a2/src/transformers/models/bert/modeling_bert.py#L91). +> - If you're porting a model from PyTorch to PyTorch, a good starting point may be the BART [conversion script](https://github.com/huggingface/transformers/blob/main/src/transformers/models/bart/convert_bart_original_pytorch_checkpoint_to_pytorch.py). -- If you are porting a model from TensorFlow to PyTorch, a good starting point might be BERT's conversion script [here](https://github.com/huggingface/transformers/blob/7acfa95afb8194f8f9c1f4d2c6028224dbed35a2/src/transformers/models/bert/modeling_bert.py#L91) -- If you are porting a model from PyTorch to PyTorch, a good starting point might be BART's conversion script [here](https://github.com/huggingface/transformers/blob/main/src/transformers/models/bart/convert_bart_original_pytorch_checkpoint_to_pytorch.py) +Make sure **all** required weights are initialized and print out all the checkpoint weights that weren't used for initialization to make sure the model has been converted correctly. -In the following, we'll quickly explain how PyTorch models store layer weights and define layer names. In PyTorch, the -name of a layer is defined by the name of the class attribute you give the layer. Let's define a dummy model in -PyTorch, called `SimpleModel` as follows: +You may encounter wrong shape statements or name assignments during the conversion. This is most likely because of incorrect parameters in `BrandNewLlamaConfig`, the wrong architecture, a bug in the `init` method of your implementation, or you need to transpose one of the checkpoint weights. -```python -from torch import nn +Keep iterating on the [Adapt the model code](#adapt-the-model-code) section until all the checkpoint weights are correctly loaded. Once you can load a checkpoint in your model, save it to a folder. This should contain a `model.safetensors` file and a `config.json` file. +```py +model.save_pretrained("/path/to/converted/checkpoint/folder") +``` + +To help with conversion, the next section briefly describes how PyTorch models stores and defines layer weights and names. + +#### PyTorch layer weights and names + +It is helpful to create a basic PyTorch model to understand how layer names are defined and weights are initialized. + +```py +from torch import nn class SimpleModel(nn.Module): def __init__(self): @@ -571,18 +381,11 @@ class SimpleModel(nn.Module): self.layer_norm = nn.LayerNorm(10) ``` -Now we can create an instance of this model definition which will fill all weights: `dense`, `intermediate`, -`layer_norm` with random weights. We can print the model to see its architecture +PyTorch layer names are defined by the class attribute name of the layer (`dense`, `intermediate`, `layer_norm`). Create a instance of `SimpleModel` to fill all the layers with random weights. -```python +```py model = SimpleModel() - print(model) -``` - -This will print out the following: - -``` SimpleModel( (dense): Linear(in_features=10, out_features=10, bias=True) (intermediate): Linear(in_features=10, out_features=10, bias=True) @@ -590,16 +393,10 @@ SimpleModel( ) ``` -We can see that the layer names are defined by the name of the class attribute in PyTorch. You can print out the weight -values of a specific layer: +The weight values of a specific layer are randomly initialized. -```python +```py print(model.dense.weight.data) -``` - -to see that the weights were randomly initialized - -``` tensor([[-0.0818, 0.2207, -0.0749, -0.0030, 0.0045, -0.1569, -0.1598, 0.0212, -0.2077, 0.2157], [ 0.1044, 0.0201, 0.0990, 0.2482, 0.3116, 0.2509, 0.2866, -0.2190, @@ -622,339 +419,247 @@ tensor([[-0.0818, 0.2207, -0.0749, -0.0030, 0.0045, -0.1569, -0.1598, 0.0212, 0.2220, 0.2358]]). ``` -In the conversion script, you should fill those randomly initialized weights with the exact weights of the -corresponding layer in the checkpoint. *E.g.* +In the conversion script, the random weights should be replaced with the exact weights from the corresponding layer in the original checkpoint. -```python -# retrieve matching layer weights, e.g. by -# recursive algorithm +```py +# retrieve matching layer weights with recursive algorithm layer_name = "dense" pretrained_weight = array_of_dense_layer model_pointer = getattr(model, "dense") - model_pointer.weight.data = torch.from_numpy(pretrained_weight) ``` -While doing so, you must verify that each randomly initialized weight of your PyTorch model and its corresponding -pretrained checkpoint weight exactly match in both **shape and name**. To do so, it is **necessary** to add assert -statements for the shape and print out the names of the checkpoints weights. E.g. you should add statements like: +Verify the randomly initialized weights and their corresponding pretrained checkpoint weights have the identical **shape** and **name**. Add assert statements for the shape and print out the checkpoint weight names. -```python +```py assert ( model_pointer.weight.shape == pretrained_weight.shape ), f"Pointer shape of random weight {model_pointer.shape} and array shape of checkpoint weight {pretrained_weight.shape} mismatched" + +logger.info(f"Initialize PyTorch weight {layer_name} from {pretrained_weight.name}") ``` -Besides, you should also print out the names of both weights to make sure they match, *e.g.* +When the shape or name don't match, you may have assigned the incorrect checkpoint weight to a randomly initialized layer. An incorrect shape may be because the `BrandNewLlama` parameters don't exactly match the original models parameters. But it could also be that the PyTorch layer implementation requires the weights to be transposed first. -```python -logger.info(f"Initialize PyTorch weight {layer_name} from {pretrained_weight.name}") +### Implement the forward pass + +The forward pass should be implemented next if the model loads correctly. It takes some inputs and returns the model output. + +```py +model = BrandNewLlamaModel.from_pretrained("/path/to/converted/checkpoint/folder") +input_ids = [0, 4, 4, 3, 2, 4, 1, 7, 19] +output = model.generate(input_ids).last_hidden_states ``` -If either the shape or the name doesn't match, you probably assigned the wrong checkpoint weight to a randomly -initialized layer of the 🤗 Transformers implementation. +Don't be discouraged if your forward pass isn't identical with the output from the original model or if it returns an error. Check that the forward pass doesn't throw any errors. This is often because the dimensions are wrong or because the wrong data type is used ([torch.long](https://pytorch.org/docs/stable/generated/torch.Tensor.long.html) instead of [torch.float32](https://pytorch.org/docs/stable/tensors.html)). -An incorrect shape is most likely due to an incorrect setting of the config parameters in `BrandNewBertConfig()` that -do not exactly match those that were used for the checkpoint you want to convert. However, it could also be that -PyTorch's implementation of a layer requires the weight to be transposed beforehand. +Your output should have a precision of *1e-3*. Ensure the output shapes and output values are identical. Common reasons for why the outputs aren't identical include: -Finally, you should also check that **all** required weights are initialized and print out all checkpoint weights that -were not used for initialization to make sure the model is correctly converted. It is completely normal, that the -conversion trials fail with either a wrong shape statement or a wrong name assignment. This is most likely because either -you used incorrect parameters in `BrandNewBertConfig()`, have a wrong architecture in the 🤗 Transformers -implementation, you have a bug in the `init()` functions of one of the components of the 🤗 Transformers -implementation or you need to transpose one of the checkpoint weights. +- Some layers were not added (activation layer or a residual connection). +- The word embedding matrix is not tied. +- The wrong positional embeddings are used because the original implementation includes an offset. +- Dropout is applied during the forward pass. Fix this error by making sure `model.training` is `False` and passing `self.training` to [torch.nn.functional.dropout](https://pytorch.org/docs/stable/nn.functional.html?highlight=dropout#torch.nn.functional.dropout). -This step should be iterated with the previous step until all weights of the checkpoint are correctly loaded in the -Transformers model. Having correctly loaded the checkpoint into the 🤗 Transformers implementation, you can then save -the model under a folder of your choice `/path/to/converted/checkpoint/folder` that should then contain both a -`pytorch_model.bin` file and a `config.json` file: +Compare the forward pass of the original model and your implementation to check if there are any differences. Ideally, debug and print out the intermediate outputs of both implementations of the forward pass to pinpoint where the original implementation differs from yours. -```python -model.save_pretrained("/path/to/converted/checkpoint/folder") -``` +1. Make sure the hardcoded `input_ids` in both implementations are identical. +2. Verify the outputs of the first transformation of `input_ids` (usually the word embeddings) are identical, and work your way through to the last layer. -**7. Implement the forward pass** +Any difference between the two implementations should point to the bug in your implementation. -Having managed to correctly load the pretrained weights into the 🤗 Transformers implementation, you should now make -sure that the forward pass is correctly implemented. In [Get familiar with the original repository](#3-4-run-a-pretrained-checkpoint-using-the-original-repository), you have already created a script that runs a forward -pass of the model using the original repository. Now you should write an analogous script using the 🤗 Transformers -implementation instead of the original one. It should look as follows: +One of the best strategies is to add many print statements to the same positions in both implementations, and then successively remove them when they output identical values for the intermediate outputs. -```python -model = BrandNewBertModel.from_pretrained("/path/to/converted/checkpoint/folder") -input_ids = [0, 4, 4, 3, 2, 4, 1, 7, 19] -output = model(input_ids).last_hidden_states -``` - -It is very likely that the 🤗 Transformers implementation and the original model implementation don't give the exact -same output the very first time or that the forward pass throws an error. Don't be disappointed - it's expected! First, -you should make sure that the forward pass doesn't throw any errors. It often happens that the wrong dimensions are -used leading to a *Dimensionality mismatch* error or that the wrong data type object is used, *e.g.* `torch.long` -instead of `torch.float32`. Don't hesitate to ask the Hugging Face team for help, if you don't manage to solve -certain errors. - -The final part to make sure the 🤗 Transformers implementation works correctly is to ensure that the outputs are -equivalent to a precision of `1e-3`. First, you should ensure that the output shapes are identical, *i.e.* -`outputs.shape` should yield the same value for the script of the 🤗 Transformers implementation and the original -implementation. Next, you should make sure that the output values are identical as well. This one of the most difficult -parts of adding a new model. Common mistakes why the outputs are not identical are: - -- Some layers were not added, *i.e.* an *activation* layer was not added, or the residual connection was forgotten -- The word embedding matrix was not tied -- The wrong positional embeddings are used because the original implementation uses on offset -- Dropout is applied during the forward pass. To fix this make sure *model.training is False* and that no dropout - layer is falsely activated during the forward pass, *i.e.* pass *self.training* to [PyTorch's functional dropout](https://pytorch.org/docs/stable/nn.functional.html?highlight=dropout#torch.nn.functional.dropout) - -The best way to fix the problem is usually to look at the forward pass of the original implementation and the 🤗 -Transformers implementation side-by-side and check if there are any differences. Ideally, you should debug/print out -intermediate outputs of both implementations of the forward pass to find the exact position in the network where the 🤗 -Transformers implementation shows a different output than the original implementation. First, make sure that the -hard-coded `input_ids` in both scripts are identical. Next, verify that the outputs of the first transformation of -the `input_ids` (usually the word embeddings) are identical. And then work your way up to the very last layer of the -network. At some point, you will notice a difference between the two implementations, which should point you to the bug -in the 🤗 Transformers implementation. From our experience, a simple and efficient way is to add many print statements -in both the original implementation and 🤗 Transformers implementation, at the same positions in the network -respectively, and to successively remove print statements showing the same values for intermediate presentations. - -When you're confident that both implementations yield the same output, verify the outputs with -`torch.allclose(original_output, output, atol=1e-3)`, you're done with the most difficult part! Congratulations - the -work left to be done should be a cakewalk 😊. - -**8. Adding all necessary model tests** - -At this point, you have successfully added a new model. However, it is very much possible that the model does not yet -fully comply with the required design. To make sure, the implementation is fully compatible with 🤗 Transformers, all -common tests should pass. The Cookiecutter should have automatically added a test file for your model, probably under -the same `tests/models/brand_new_bert/test_modeling_brand_new_bert.py`. Run this test file to verify that all common -tests pass: +When both implementations produce the same output, verify the outputs are within a precision of *1e-3*. -```bash -pytest tests/models/brand_new_bert/test_modeling_brand_new_bert.py +```py +torch.allclose(original_output, output, atol=1e-3) ``` -Having fixed all common tests, it is now crucial to ensure that all the nice work you have done is well tested, so that +This is typically the most difficult part of the process. Congratulations if you've made it this far! + +And if you're stuck or struggling with this step, don't hesitate to ask for help on your pull request. -- a) The community can easily understand your work by looking at specific tests of *brand_new_bert* -- b) Future changes to your model will not break any important feature of the model. +### Add model tests -At first, integration tests should be added. Those integration tests essentially do the same as the debugging scripts -you used earlier to implement the model to 🤗 Transformers. A template of those model tests has already added by the -Cookiecutter, called `BrandNewBertModelIntegrationTests` and only has to be filled out by you. To ensure that those -tests are passing, run +While the model works, you still need to add tests to ensure it is compatible with Transformers. Tests are important because they help users understand your work by looking at specific tests, and because they prevent your model from breaking in the future if any changes are made. + +[Cookiecutter](https://cookiecutter.readthedocs.io/en/stable/) should have added a test file for your model. Run the test file below to make sure all common tests pass. ```bash -RUN_SLOW=1 pytest -sv tests/models/brand_new_bert/test_modeling_brand_new_bert.py::BrandNewBertModelIntegrationTests +pytest tests/models/brand_new_llama/test_modeling_brand_new_llama.py ``` - +The integration tests should be added first because they serve the same purpose as the debugging scripts you used earlier to implement the new model in Transformers. A template of those model tests, `BrandNewLlamaModelIntegrationTests`, was added by Cookiecutter and should be filled out. To ensure it passes, run the following command. + + + + +```bash +RUN_SLOW=1 pytest -sv tests/models/brand_new_llama/test_modeling_brand_new_llama.py::BrandNewLlamaModelIntegrationTests +``` -In case you are using Windows, you should replace `RUN_SLOW=1` with `SET RUN_SLOW=1` + + - +```bash +SET RUN_SLOW=1 pytest -sv tests/models/brand_new_llama/test_modeling_brand_new_llama.py::BrandNewLlamaModelIntegrationTests +``` -Second, all features that are special to *brand_new_bert* should be tested additionally in a separate test under -`BrandNewBertModelTester`/`BrandNewBertModelTest`. This part is often forgotten but is extremely useful in two -ways: + + -- It helps to transfer the knowledge you have acquired during the model addition to the community by showing how the - special features of *brand_new_bert* should work. -- Future contributors can quickly test changes to the model by running those special tests. +All features unique to BrandNewLlama should be tested in a separate test under `BrandNewLlamaModelTester/BrandNewLlamaModelTest`. This test is often overlooked, but it is extremely important because: +- it helps transfer knowledge you acquired during the process to the community by showing how the models novel features work +- future contributors can quickly test changes to the model by running these special tests -**9. Implement the tokenizer** +## Implement tokenizer -Next, we should add the tokenizer of *brand_new_bert*. Usually, the tokenizer is equivalent to or very similar to an -already existing tokenizer of 🤗 Transformers. +> [!TIP] +> We recommend adding a fast tokenizer ([`PreTrainedTokenizerFast`]) to give users the best performance. Feel free to tag [@ArthurZucker](https://github.com/ArthurZucker) or [@itazap](https://github.com/itazap) in your PR for help on how to add [`PreTrainedTokenizerFast`]. -It is very important to find/extract the original tokenizer file and to manage to load this file into the 🤗 -Transformers' implementation of the tokenizer. +With the model out of the way, time to focus on the tokenizer. The tokenizer should be identical or very similar to an existing tokenizer in Transformers. -To ensure that the tokenizer works correctly, it is recommended to first create a script in the original repository -that inputs a string and returns the `input_ids`. It could look similar to this (in pseudo-code): +Find and load the original tokenizer file into your implementation. Create a script in the original repository that inputs a string and returns the `input_ids`. The pseudocode should look similar to the code below. -```python +```py input_str = "This is a long example input string containing special characters .$?-, numbers 2872 234 12 and words." -model = BrandNewBertModel.load_pretrained_checkpoint("/path/to/checkpoint/") +model = BrandNewLlamaModel.load_pretrained_checkpoint("/path/to/checkpoint/") input_ids = model.tokenize(input_str) ``` -You might have to take a deeper look again into the original repository to find the correct tokenizer function or you -might even have to do changes to your clone of the original repository to only output the `input_ids`. Having written -a functional tokenization script that uses the original repository, an analogous script for 🤗 Transformers should be -created. It should look similar to this: +You may need to search the original repository to find the correct tokenizer function or modify the existing tokenizer in your clone of the original repository to only return the `input_ids`. The script for your tokenizer should look similar to the following. -```python -from transformers import BrandNewBertTokenizer +```py +from transformers import BrandNewLlamaTokenizer input_str = "This is a long example input string containing special characters .$?-, numbers 2872 234 12 and words." - -tokenizer = BrandNewBertTokenizer.from_pretrained("/path/to/tokenizer/folder/") - +tokenizer = BrandNewLlamaTokenizer.from_pretrained("/path/to/tokenizer/folder/") input_ids = tokenizer(input_str).input_ids ``` -When both `input_ids` yield the same values, as a final step a tokenizer test file should also be added. +When both implementations have the same `input_ids`, add a tokenizer test file. This file is analogous to the modeling test files. The tokenizer test files should contain a couple of hardcoded integration tests. + +## Implement image processor + +> [!TIP] +> Fast image processors use the [torchvision](https://pytorch.org/vision/stable/index.html) library and can perform image processing on the GPU, significantly improving processing speed. +> We recommend adding a fast image processor ([`BaseImageProcessorFast`]) in addition to the "slow" image processor ([`BaseImageProcessor`]) to provide users with the best performance. Feel free to tag [@yonigozlan](https://github.com/yonigozlan) for help adding a [`BaseImageProcessorFast`]. -Analogous to the modeling test files of *brand_new_bert*, the tokenization test files of *brand_new_bert* should -contain a couple of hard-coded integration tests. +While this example doesn't include an image processor, you may need to implement one if your model requires image inputs. The image processor is responsible for converting images into a format suitable for your model. Before implementing a new one, check whether an existing image processor in the Transformers library can be reused, as many models share similar image processing techniques. Note that you can also use [modular](./modular_transformers) for image processors to reuse existing components. -**10. Run End-to-end integration tests** +If you do need to implement a new image processor, refer to an existing image processor to understand the expected structure. Slow image processors ([`BaseImageProcessor`]) and fast image processors ([`BaseImageProcessorFast`]) are designed differently, so make sure you follow the correct structure based on the processor type you're implementing. -Having added the tokenizer, you should also add a couple of end-to-end integration tests using both the model and the -tokenizer to `tests/models/brand_new_bert/test_modeling_brand_new_bert.py` in 🤗 Transformers. -Such a test should show on a meaningful -text-to-text sample that the 🤗 Transformers implementation works as expected. A meaningful text-to-text sample can -include *e.g.* a source-to-target-translation pair, an article-to-summary pair, a question-to-answer pair, etc… If none -of the ported checkpoints has been fine-tuned on a downstream task it is enough to simply rely on the model tests. In a -final step to ensure that the model is fully functional, it is advised that you also run all tests on GPU. It can -happen that you forgot to add some `.to(self.device)` statements to internal tensors of the model, which in such a -test would show in an error. In case you have no access to a GPU, the Hugging Face team can take care of running those -tests for you. +Run the following command (only if you haven't already created the fast image processor with the `transformers-cli add-new-model-like` command) to generate the necessary imports and to create a prefilled template for the fast image processor. Modify the template to fit your model. -**11. Add Docstring** +```bash +transformers-cli add-fast-image-processor --model-name your_model_name +``` -Now, all the necessary functionality for *brand_new_bert* is added - you're almost done! The only thing left to add is -a nice docstring and a doc page. The Cookiecutter should have added a template file called -`docs/source/model_doc/brand_new_bert.md` that you should fill out. Users of your model will usually first look at -this page before using your model. Hence, the documentation must be understandable and concise. It is very useful for -the community to add some *Tips* to show how the model should be used. Don't hesitate to ping the Hugging Face team -regarding the docstrings. +This command will generate the necessary imports and provide a pre-filled template for the fast image processor. You can then modify it to fit your model's needs. -Next, make sure that the docstring added to `src/transformers/models/brand_new_bert/modeling_brand_new_bert.py` is -correct and included all necessary inputs and outputs. We have a detailed guide about writing documentation and our docstring format [here](writing-documentation). It is always good to remind oneself that documentation should -be treated at least as carefully as the code in 🤗 Transformers since the documentation is usually the first contact -point of the community with the model. +Add tests for the image processor in `tests/models/your_model_name/test_image_processing_your_model_name.py`. These tests should be similar to those for other image processors and should verify that the image processor correctly handles image inputs. If your image processor includes unique features or processing methods, ensure you add specific tests for those as well. -**Code refactor** +## Implement processor -Great, now you have added all the necessary code for *brand_new_bert*. At this point, you should correct some potential -incorrect code style by running: +If your model accepts multiple modalities, like text and images, you need to add a processor. The processor centralizes the preprocessing of different modalities before passing them to the model. -```bash -make style +The processor should call the appropriate modality-specific processors within its `__call__` function to handle each type of input correctly. Be sure to check existing processors in the library to understand their expected structure. Transformers uses the following convention in the `__call__` function signature. + +```python +def __call__( + self, + images: ImageInput = None, + text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None, + audio=None, + videos=None, + **kwargs: Unpack[YourModelProcessorKwargs], +) -> BatchFeature: + ... ``` -and verify that your coding style passes the quality check: +`YourModelProcessorKwargs` is a `TypedDict` that includes all the typical processing arguments and any extra arguments a specific processor may require. -```bash -make quality -``` +Add tests for the processor in `tests/models/your_model_name/test_processor_your_model_name.py`. These tests should be similar to those for other processors and should verify that the processor correctly handles the different modalities. -There are a couple of other very strict design tests in 🤗 Transformers that might still be failing, which shows up in -the tests of your pull request. This is often because of some missing information in the docstring or some incorrect -naming. The Hugging Face team will surely help you if you're stuck here. +## Integration tests -Lastly, it is always a good idea to refactor one's code after having ensured that the code works correctly. With all -tests passing, now it's a good time to go over the added code again and do some refactoring. +Now that you have a model and tokenizer, add end-to-end integration tests for the model and tokenizer to `tests/models/brand_new_llama/test_modeling_brand_new_llama.py`. -You have now finished the coding part, congratulation! 🎉 You are Awesome! 😎 +The test should provide a meaningful text-to-text example to show the model works as expected. For example, you can include a source-to-target translation pair, an article-to-summary pair, or a question-to-answer pair. -**12. Upload the models to the model hub** +If the checkpoint hasn't been fine-tuned on a downstream task, then the model tests are sufficient. -In this final part, you should convert and upload all checkpoints to the model hub and add a model card for each -uploaded model checkpoint. You can get familiar with the hub functionalities by reading our [Model sharing and uploading Page](model_sharing). You should work alongside the Hugging Face team here to decide on a fitting name for each -checkpoint and to get the required access rights to be able to upload the model under the author's organization of -*brand_new_bert*. The `push_to_hub` method, present in all models in `transformers`, is a quick and efficient way to push your checkpoint to the hub. A little snippet is pasted below: +Finally, try to make sure your tests can run on a GPU by adding `.to(self.device)` statements to the models internal tensors. If you don't have access to a GPU, we can take care of that for you. -```python -brand_new_bert.push_to_hub("brand_new_bert") -# Uncomment the following line to push to an organization. -# brand_new_bert.push_to_hub("/brand_new_bert") -``` +## Add documentation + +Your model is only useful if users know how to use it. This is why it's important to add documentation and docstrings. Cookiecutter added a template file, `docs/source/model_doc/brand_new_llama.md`, that you can fill out with information about your model. -It is worth spending some time to create fitting model cards for each checkpoint. The model cards should highlight the -specific characteristics of this particular checkpoint, *e.g.* On which dataset was the checkpoint -pretrained/fine-tuned on? On what down-stream task should the model be used? And also include some code on how to -correctly use the model. +This is generally a user's first interaction with a model, so the documentation should be clear and concise. It is often very useful to add examples of how the model should be used. -**13. (Optional) Add notebook** +Make sure docstrings are added to `src/transformers/models/brand_new_llama/modeling_brand_new_llama.py` and includes all necessary inputs and outputs. Review our [guide](https://github.com/huggingface/transformers/tree/main/docs#writing-documentation---specification) for writing documentation and docstrings. -It is very helpful to add a notebook that showcases in-detail how *brand_new_bert* can be used for inference and/or -fine-tuned on a downstream task. This is not mandatory to merge your PR, but very useful for the community. +## Refactor -**14. Submit your finished PR** +Time to tidy things up and make sure the code style is consistent with the rest of the library. Run the following command to automatically fix incorrect styles. -You're done programming now and can move to the last step, which is getting your PR merged into main. Usually, the -Hugging Face team should have helped you already at this point, but it is worth taking some time to give your finished -PR a nice description and eventually add comments to your code, if you want to point out certain design choices to your -reviewer. +```bash +make style +``` -### Share your work!! +To verify the code style passes quality checks, run the command below. -Now, it's time to get some credit from the community for your work! Having completed a model addition is a major -contribution to Transformers and the whole NLP community. Your code and the ported pre-trained models will certainly be -used by hundreds and possibly even thousands of developers and researchers. You should be proud of your work and share -your achievements with the community. +```bash +make quality +``` -**You have made another model that is super easy to access for everyone in the community! 🤯** +There may be other failing tests or checks (missing docstring or incorrect naming) on your pull request due to Transformers strict design tests. We can help you with these issues if you're stuck. -## Model additions and their timeline: when is a model added to transformers? +After ensuring the code runs correctly, you may want to refactor it to make it more readable or cleaner. -We aim for `transformers` to have support for new model architectures and checkpoints as early as possible: -availability can range from day-0 (and hour-0) releases for some models, to a few days/weeks for others. +## Upload to the Hub -The availability of this is usually up to the model contributors, as well as how excited the community is for the -architecture. +Convert and upload all checkpoints to the [Hub](https://hf.co/models). Add a model card to provide more transparency and context about the model. The model card should highlight specific characteristics of a checkpoint, how the model was trained, and code examples of how to use it. -We can split the model architecture possibilities in four sections: -- Day-0 integration -- Same-week integration -- Post-release integration -- Hub-first release +> [!TIP] +> In many cases, adding an interactive notebook users can run is a great way to showcase how to use the model for inference or fine-tune it on a downstream task. While not required, including a notebook can drive greater adoption of your model. -Let's dive into each of these and see how we (the transformers team) can help you contribute your architecture and get -your architecture to be very easily used by all members of the community. +You should also consult with the Transformers team to decide on an appropriate name for the model, and getting the required access rights to upload the model. -### Day-0 integration +Use the [`~PreTrainedModel.push_to_hub`] method to upload the model. -For a day-0 integration to work, we'll usually want to work hand-in-hand with you directly. In order to keep your -architecture private until your checkpoints and release are ready, we'll work together in a private fork of -transformers. +```py +brand_new_bert.push_to_hub("brand_new_llama") +``` -If you plan on having a transformers-first release, this is a great option: we run CI ahead of time, ensure the -documentation is clear, and we aim to optimize your model as much as possible (providing quantization, optimizing it -with Flash-Attention/SDPA, optimizing the KV cache, etc). +Refer to the [Sharing](./model_sharing) guide for more information about uploading models to the Hub. -We can also lend you a hand in adding the model, reviewing it early, and help you make sure the `transformers` -API works as expected! +## Merge your model -If this is the path you wish to go with, we ask for you to reach out in advance, especially if the architecture is -particularly novel (at least a few days, but a few weeks will enable the absolute best integration). In order to reach -out, please contact transformers@huggingface.co 🤗. +You're finally ready to merge your pull request and officially add the model to Transformers! Make sure all the tests are passing and all comments and feedback have been addressed. -### Same-week integration +Congratulations on adding a new model to Transformers! 🥳 -A same-week integration usually happens when model authors do not reach out; but we see significant community -requests. +This is a very significant contribution. Your work makes Transformers more accessible to developers and researchers around the world. You should be proud of your contribution and share your accomplishment with the community! -In order to specify you'd like for us to integrate a specific model, we'll redirect you to our -[issue tracker](https://github.com/huggingface/transformers/issues/new?assignees=&labels=New+model&projects=&template=new-model-addition.yml) -where you can request a specific model. +## Model addition timeline -The more activity on the issue, the faster/more likely we are to integrate the model! +There are four timelines for model additions depending on the model contributor and community demand for an architecture. -### Post-release integration +- **day-0 integration**: If you plan on having a Transformers-first release, this is a great option because we can ensure the documentation is clear and optimize your model as much as possible (quantization, FlashAttention, KV-cache, etc.). We can also help you add the model, provide early reviews and make sure it works as expected. -A post-release integration usually happens when there has not been sufficient activity/requests to warrant a same-week -integration, or that we lack the sufficient bandwidth to integrate it. + Reach out to transformers@huggingface.co a few days (preferably weeks) in advance, especially if an architecture is particularly novel, to ensure model integration. We'll work together on a private fork of Transformers until your checkpoint and release is ready. -We very gladly welcome community contributions in those instances; more than half of the library was contributed -by contributors external to Hugging Face. If this is something that is interesting to you, we recommend that you look -at our [open issues tagged with "New model"](https://github.com/huggingface/transformers/issues?q=is%3Aopen+is%3Aissue+label%3A%22New+model%22). +- **same week integration**: Models with significant requests/demand are usually added the same week if the model author doesn't reach out. -We recommend you try your hand at a heavily requested model as this will multiply the impact of your contribution. -We'll be there to help you in case that's your first contribution 🤗. + Use the [issue tracker](https://github.com/huggingface/transformers/issues/new?assignees=&labels=New+model&projects=&template=new-model-addition.yml) to request a specific model to add. The more activity on the issue, the faster and more likely we'll integrate it. -### Code-on-Hub release +- **post-release integration**: Models without popular requests/demand or if we don't have the bandwidth to integrate it are added post-release. -Finally, transformers has a "remote-code" possibility, in which contributions are not made within the toolkit, but on -the Hub. This can be particularly interesting for groups that are using `transformers` as a backbone for their project, -but don't have the bandwidth to contribute the model to transformers directly. + This is a good opportunity if you're interested in contributing a model to Transformers. Take a look at open issues tagged with ["New model"](https://github.com/huggingface/transformers/issues?q=is%3Aopen+is%3Aissue+label%3A%22New+model%22). Feel free to give the most requested models a try first to multiply the impact of your contribution. We'll be there to help you each step of the way! -In case the model is very successful, then we'll very likely end up integrating it in `transformers` at the end - as this -provides better documentation, CI, maintenance, and optimizations - but this remains a great way to make your model -accessible day-0 with minimal friction. +- **Hub-first release**: Transformers [remote-code](./models#custom-models) feature allows Transformers-based projects to be shared directly on the Hub. This is a good option if you don't have the bandwidth to add a model directly to Transformers. -This guide is a great starting point for a Hub-first release: [Custom models](./custom_models) \ No newline at end of file + If a model ends up being very popular, then it's very likely that we'll integrate it in Transformers ourselves to enable better support (documentation, maintenance, optimization, etc.) for it. A Hub-first release is the most frictionless way to add a model. diff --git a/docs/source/en/add_new_pipeline.md b/docs/source/en/add_new_pipeline.md index e8234c565b26..60ef43dab585 100644 --- a/docs/source/en/add_new_pipeline.md +++ b/docs/source/en/add_new_pipeline.md @@ -1,4 +1,4 @@ - -# How to create a custom pipeline? +# Adding a new pipeline -In this guide, we will see how to create a custom pipeline and share it on the [Hub](https://hf.co/models) or add it to the -🤗 Transformers library. +Make [`Pipeline`] your own by subclassing it and implementing a few methods. Share the code with the community on the [Hub](https://hf.co) and register the pipeline with Transformers so that everyone can quickly and easily use it. -First and foremost, you need to decide the raw entries the pipeline will be able to take. It can be strings, raw bytes, -dictionaries or whatever seems to be the most likely desired input. Try to keep these inputs as pure Python as possible -as it makes compatibility easier (even through other languages via JSON). Those will be the `inputs` of the -pipeline (`preprocess`). +This guide will walk you through the process of adding a new pipeline to Transformers. -Then define the `outputs`. Same policy as the `inputs`. The simpler, the better. Those will be the outputs of -`postprocess` method. +## Design choices -Start by inheriting the base class `Pipeline` with the 4 methods needed to implement `preprocess`, -`_forward`, `postprocess`, and `_sanitize_parameters`. +At a minimum, you only need to provide [`Pipeline`] with an appropriate input for a task. This is also where you should begin when designing your pipeline. +Decide what input types [`Pipeline`] can accept. It can be strings, raw bytes, dictionaries, and so on. Try to keep the inputs in pure Python where possible because it's more compatible. Next, decide on the output [`Pipeline`] should return. Again, keeping the output in Python is the simplest and best option because it's easier to work with. -```python -from transformers import Pipeline +Keeping the inputs and outputs simple, and ideally JSON-serializable, makes it easier for users to run your [`Pipeline`] without needing to learn new object types. It's also common to support many different input types for even greater ease of use. For example, making an audio file acceptable from a filename, URL, or raw bytes gives the user more flexibility in how they provide the audio data. + +## Create a pipeline + +With an input and output decided, you can start implementing [`Pipeline`]. Your pipeline should inherit from the base [`Pipeline`] class and include 4 methods. +```py +from transformers import Pipeline class MyPipeline(Pipeline): def _sanitize_parameters(self, **kwargs): - preprocess_kwargs = {} - if "maybe_arg" in kwargs: - preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"] - return preprocess_kwargs, {}, {} - def preprocess(self, inputs, maybe_arg=2): - model_input = Tensor(inputs["input_ids"]) - return {"model_input": model_input} + def preprocess(self, inputs, args=2): def _forward(self, model_inputs): - # model_inputs == {"model_input": model_input} - outputs = self.model(**model_inputs) - # Maybe {"logits": Tensor(...)} - return outputs def postprocess(self, model_outputs): - best_class = model_outputs["logits"].softmax(-1) - return best_class ``` -The structure of this breakdown is to support relatively seamless support for CPU/GPU, while supporting doing -pre/postprocessing on the CPU on different threads - -`preprocess` will take the originally defined inputs, and turn them into something feedable to the model. It might -contain more information and is usually a `Dict`. - -`_forward` is the implementation detail and is not meant to be called directly. `forward` is the preferred -called method as it contains safeguards to make sure everything is working on the expected device. If anything is -linked to a real model it belongs in the `_forward` method, anything else is in the preprocess/postprocess. - -`postprocess` methods will take the output of `_forward` and turn it into the final output that was decided -earlier. - -`_sanitize_parameters` exists to allow users to pass any parameters whenever they wish, be it at initialization -time `pipeline(...., maybe_arg=4)` or at call time `pipe = pipeline(...); output = pipe(...., maybe_arg=4)`. +1. `preprocess` takes the inputs and transforms them into the appropriate input format for the model. -The returns of `_sanitize_parameters` are the 3 dicts of kwargs that will be passed directly to `preprocess`, -`_forward`, and `postprocess`. Don't fill anything if the caller didn't call with any extra parameter. That -allows to keep the default arguments in the function definition which is always more "natural". - -A classic example would be a `top_k` argument in the post processing in classification tasks. +```py +def preprocess(self, inputs, maybe_arg=2): + model_input = Tensor(inputs["input_ids"]) + return {"model_input": model_input} +``` -```python ->>> pipe = pipeline("my-new-task") ->>> pipe("This is a test") -[{"label": "1-star", "score": 0.8}, {"label": "2-star", "score": 0.1}, {"label": "3-star", "score": 0.05} -{"label": "4-star", "score": 0.025}, {"label": "5-star", "score": 0.025}] +2. `_forward` shouldn't be called directly. `forward` is the preferred method because it includes safeguards to make sure everything works correctly on the expected device. Anything linked to the model belongs in `_forward` and everything else belongs in either `preprocess` or `postprocess`. ->>> pipe("This is a test", top_k=2) -[{"label": "1-star", "score": 0.8}, {"label": "2-star", "score": 0.1}] +```py +def _forward(self, model_inputs): + outputs = self.model(**model_inputs) + return outputs ``` -In order to achieve that, we'll update our `postprocess` method with a default parameter to `5`. and edit -`_sanitize_parameters` to allow this new parameter. - +3. `postprocess` generates the final output from the models output in `_forward`. -```python +```py def postprocess(self, model_outputs, top_k=5): best_class = model_outputs["logits"].softmax(-1) - # Add logic to handle top_k return best_class +``` + +4. `_sanitize_parameters` lets users pass additional parameters to [`Pipeline`]. This could be during initialization or when [`Pipeline`] is called. `_sanitize_parameters` returns 3 dicts of additional keyword arguments that are passed directly to `preprocess`, `_forward`, and `postprocess`. Don't add anything if a user didn't call the pipeline with extra parameters. This keeps the default arguments in the function definition which is always more natural. +For example, add a `top_k` parameter in `postprocess` to return the top 5 most likely classes. Then in `_sanitize_parameters`, check if the user passed in `top_k` and add it to `postprocess_kwargs`. +```py def _sanitize_parameters(self, **kwargs): preprocess_kwargs = {} if "maybe_arg" in kwargs: @@ -110,55 +84,61 @@ def _sanitize_parameters(self, **kwargs): return preprocess_kwargs, {}, postprocess_kwargs ``` -Try to keep the inputs/outputs very simple and ideally JSON-serializable as it makes the pipeline usage very easy -without requiring users to understand new kinds of objects. It's also relatively common to support many different types -of arguments for ease of use (audio files, which can be filenames, URLs or pure bytes) +Now the pipeline can return the top most likely labels if a user chooses to. +```py +from transformers import pipeline +pipeline = pipeline("my-task") +# returns 3 most likely labels +pipeline("This is the best meal I've ever had", top_k=3) +# returns 5 most likely labels by default +pipeline("This is the best meal I've ever had") +``` + +## Register a pipeline -## Adding it to the list of supported tasks +Register the new task your pipeline supports in the `PIPELINE_REGISTRY`. The registry defines: -To register your `new-task` to the list of supported tasks, you have to add it to the `PIPELINE_REGISTRY`: +- the machine learning framework the pipeline supports with either `pt_model` or `tf_model` (add both to ensure it works with either frameworks) +- a default model which should come from a specific revision (branch, or commit hash) where the model works as expected with `default` +- the expected input with `type` -```python +```py from transformers.pipelines import PIPELINE_REGISTRY +from transformers import AutoModelForSequenceClassification, TFAutoModelForSequenceClassification PIPELINE_REGISTRY.register_pipeline( "new-task", pipeline_class=MyPipeline, pt_model=AutoModelForSequenceClassification, + tf_model=TFAutoModelForSequenceClassification, + default={"pt": ("user/awesome-model", "branch-name")}, + type="text", ) ``` -You can specify a default model if you want, in which case it should come with a specific revision (which can be the name of a branch or a commit hash, here we took `"abcdef"`) as well as the type: +## Share your pipeline -```python -PIPELINE_REGISTRY.register_pipeline( - "new-task", - pipeline_class=MyPipeline, - pt_model=AutoModelForSequenceClassification, - default={"pt": ("user/awesome_model", "abcdef")}, - type="text", # current support type: text, audio, image, multimodal -) -``` +Share your pipeline with the community on the [Hub](https://hf.co) or you can add it directly to Transformers. -## Share your pipeline on the Hub +It's faster to upload your pipeline code to the Hub because it doesn't require a review from the Transformers team. Adding the pipeline to Transformers may be slower because it requires a review and you need to add tests to ensure your [`Pipeline`] works. -To share your custom pipeline on the Hub, you just have to save the custom code of your `Pipeline` subclass in a -python file. For instance, let's say we want to use a custom pipeline for sentence pair classification like this: +### Upload to the Hub + +Add your pipeline code to the Hub in a Python file. + +For example, a custom pipeline for sentence pair classification might look like the following code below. The implementation works for PyTorch and TensorFlow models. ```py import numpy as np - from transformers import Pipeline - def softmax(outputs): maxes = np.max(outputs, axis=-1, keepdims=True) shifted_exp = np.exp(outputs - maxes) return shifted_exp / shifted_exp.sum(axis=-1, keepdims=True) - class PairClassificationPipeline(Pipeline): def _sanitize_parameters(self, **kwargs): preprocess_kwargs = {} @@ -183,8 +163,7 @@ class PairClassificationPipeline(Pipeline): return {"label": label, "score": score, "logits": logits} ``` -The implementation is framework agnostic, and will work for PyTorch and TensorFlow models. If we have saved this in -a file named `pair_classification.py`, we can then import it and register it like this. +Save the code in a file named `pair_classification.py`, and import and register it as shown below. ```py from pair_classification import PairClassificationPipeline @@ -215,56 +194,36 @@ The [register_pipeline](https://github.com/huggingface/transformers/blob/9feae5f }, ``` -Once this is done, we can use it with a pretrained model. For instance `sgugger/finetuned-bert-mrpc` has been -fine-tuned on the MRPC dataset, which classifies pairs of sentences as paraphrases or not. +Call [`~Pipeline.push_to_hub`] to push the pipeline to the Hub. The Python file containing the code is copied to the Hub, and the pipelines model and tokenizer are also saved and pushed to the Hub. Your pipeline should now be available on the Hub under your namespace. ```py from transformers import pipeline -classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc") +pipeline = pipeline(task="pair-classification", model="sgugger/finetuned-bert-mrpc") +pipeline.push_to_hub("pair-classification-pipeline") ``` -Then we can share it on the Hub by using the `push_to_hub` method: - -```py -classifier.push_to_hub("test-dynamic-pipeline") -``` - -This will copy the file where you defined `PairClassificationPipeline` inside the folder `"test-dynamic-pipeline"`, -along with saving the model and tokenizer of the pipeline, before pushing everything into the repository -`{your_username}/test-dynamic-pipeline`. After that, anyone can use it as long as they provide the option -`trust_remote_code=True`: +To use the pipeline, add `trust_remote_code=True` when loading the pipeline. ```py from transformers import pipeline -classifier = pipeline(model="{your_username}/test-dynamic-pipeline", trust_remote_code=True) +pipeline = pipeline(task="pair-classification", trust_remote_code=True) ``` -## Add the pipeline to 🤗 Transformers +### Add to Transformers + +Adding a custom pipeline to Transformers requires adding tests to make sure everything works as expected, and requesting a review from the Transformers team. -If you want to contribute your pipeline to 🤗 Transformers, you will need to add a new module in the `pipelines` submodule -with the code of your pipeline, then add it to the list of tasks defined in `pipelines/__init__.py`. +Add your pipeline code as a new module to the [pipelines](https://github.com/huggingface/transformers/tree/main/src/transformers/pipelines) submodule, and add it to the list of tasks defined in [pipelines/__init__.py](https://github.com/huggingface/transformers/blob/main/src/transformers/pipelines/__init__.py). -Then you will need to add tests. Create a new file `tests/test_pipelines_MY_PIPELINE.py` with examples of the other tests. +Next, add a new test for the pipeline in [transformers/tests/pipelines](https://github.com/huggingface/transformers/tree/main/tests/pipelines). You can look at the other tests for examples of how to test your pipeline. -The `run_pipeline_test` function will be very generic and run on small random models on every possible -architecture as defined by `model_mapping` and `tf_model_mapping`. +The [run_pipeline_test](https://github.com/huggingface/transformers/blob/db70426854fe7850f2c5834d633aff637f14772e/tests/pipelines/test_pipelines_text_classification.py#L186) function should be very generic and run on the models defined in [model_mapping](https://github.com/huggingface/transformers/blob/db70426854fe7850f2c5834d633aff637f14772e/tests/pipelines/test_pipelines_text_classification.py#L48) and [tf_model_mapping](https://github.com/huggingface/transformers/blob/db70426854fe7850f2c5834d633aff637f14772e/tests/pipelines/test_pipelines_text_classification.py#L49). This is important for testing future compatibility with new models. -This is very important to test future compatibility, meaning if someone adds a new model for -`XXXForQuestionAnswering` then the pipeline test will attempt to run on it. Because the models are random it's -impossible to check for actual values, that's why there is a helper `ANY` that will simply attempt to match the -output of the pipeline TYPE. +You'll also notice `ANY` is used throughout the [run_pipeline_test](https://github.com/huggingface/transformers/blob/db70426854fe7850f2c5834d633aff637f14772e/tests/pipelines/test_pipelines_text_classification.py#L186) function. The models are random, so you can't check the actual values. Using `ANY` allows the test to match the output of the pipeline type instead. -You also *need* to implement 2 (ideally 4) tests. +Finally, you should also implement the following 4 tests. -- `test_small_model_pt` : Define 1 small model for this pipeline (doesn't matter if the results don't make sense) - and test the pipeline outputs. The results should be the same as `test_small_model_tf`. -- `test_small_model_tf` : Define 1 small model for this pipeline (doesn't matter if the results don't make sense) - and test the pipeline outputs. The results should be the same as `test_small_model_pt`. -- `test_large_model_pt` (`optional`): Tests the pipeline on a real pipeline where the results are supposed to - make sense. These tests are slow and should be marked as such. Here the goal is to showcase the pipeline and to make - sure there is no drift in future releases. -- `test_large_model_tf` (`optional`): Tests the pipeline on a real pipeline where the results are supposed to - make sense. These tests are slow and should be marked as such. Here the goal is to showcase the pipeline and to make - sure there is no drift in future releases. +1. [test_small_model_pt](https://github.com/huggingface/transformers/blob/db70426854fe7850f2c5834d633aff637f14772e/tests/pipelines/test_pipelines_text_classification.py#L59) and [test_small_model_tf](https://github.com/huggingface/transformers/blob/db70426854fe7850f2c5834d633aff637f14772e/tests/pipelines/test_pipelines_text_classification.py#L150), use a small model for these pipelines to make sure they return the correct outputs. The results don't have to make sense. Each pipeline should return the same result. +1. [test_large_model_pt](https://github.com/huggingface/transformers/blob/db70426854fe7850f2c5834d633aff637f14772e/tests/pipelines/test_pipelines_zero_shot_image_classification.py#L187) nad [test_large_model_tf](https://github.com/huggingface/transformers/blob/db70426854fe7850f2c5834d633aff637f14772e/tests/pipelines/test_pipelines_zero_shot_image_classification.py#L220), use a realistic model for these pipelines to make sure they return meaningful results. These tests are slow and should be marked as slow. diff --git a/docs/source/en/agents.md b/docs/source/en/agents.md index 56c9184980f4..bd24d8ce30cc 100644 --- a/docs/source/en/agents.md +++ b/docs/source/en/agents.md @@ -13,211 +13,135 @@ specific language governing permissions and limitations under the License. rendered properly in your Markdown viewer. --> -# Agents and tools -[[open-in-colab]] - -### What is an agent? - -Large Language Models (LLMs) trained to perform [causal language modeling](./tasks/language_modeling) can tackle a wide range of tasks, but they often struggle with basic tasks like logic, calculation, and search. When prompted in domains in which they do not perform well, they often fail to generate the answer we expect them to. - -One approach to overcome this weakness is to create an *agent*. - -An agent is a system that uses an LLM as its engine, and it has access to functions called *tools*. - -These *tools* are functions for performing a task, and they contain all necessary description for the agent to properly use them. +> [!WARNING] +> Agents and tools are being spun out into the standalone [smolagents](https://huggingface.co/docs/smolagents/index) library. These docs will be deprecated in the future! -The agent can be programmed to: -- devise a series of actions/tools and run them all at once, like the [`CodeAgent`] -- plan and execute actions/tools one by one and wait for the outcome of each action before launching the next one, like the [`ReactJsonAgent`] +# Agents -### Types of agents +[[open-in-colab]] -#### Code agent +An agent is a system where a large language model (LLM) can execute more complex tasks through *planning* and using *tools*. -This agent has a planning step, then generates python code to execute all its actions at once. It natively handles different input and output types for its tools, thus it is the recommended choice for multimodal tasks. +- Planning helps a LLM reason its way through a task by breaking it down into smaller subtasks. For example, [`CodeAgent`] plans a series of actions to take and then generates Python code to execute all the actions at once. -#### React agents + Another planning method is by self-reflection and refinement of its previous actions to improve its performance. The [`ReactJsonAgent`] is an example of this type of planning, and it's based on the [ReAct](https://hf.co/papers/2210.03629) framework. This agent plans and executes actions one at a time based on the feedback it receives from each action. -This is the go-to agent to solve reasoning tasks, since the ReAct framework ([Yao et al., 2022](https://huggingface.co/papers/2210.03629)) makes it really efficient to think on the basis of its previous observations. +- Tools give a LLM access to external functions or APIs that it can use to help it complete a task. For example, [gradio-tools](https://github.com/freddyaboulton/gradio-tools) gives a LLM access to any of the [Gradio](https://www.gradio.app/) apps available on Hugging Face [Spaces](https://hf.co/spaces). These apps can be used for a wide range of tasks such as image generation, video generation, audio transcription, and more. -We implement two versions of ReactJsonAgent: -- [`ReactJsonAgent`] generates tool calls as a JSON in its output. -- [`ReactCodeAgent`] is a new type of ReactJsonAgent that generates its tool calls as blobs of code, which works really well for LLMs that have strong coding performance. +To use agents in Transformers, make sure you have the extra `agents` dependencies installed. -> [!TIP] -> Read [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) blog post to learn more about ReAct agents. +```bash +!pip install transformers[agents] +``` -
- - -
+Create an agent instance (refer to the [Agents](./main_classes/agent#agents) API for supported agents in Transformers) and a list of tools available for it to use, then [`~ReactAgent.run`] the agent on your task. The example below demonstrates how a ReAct agent reasons through a task. -![Framework of a React Agent](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/open-source-llms-as-agents/ReAct.png) +```py +from transformers import ReactCodeAgent -For example, here is how a ReAct Code agent would work its way through the following question. +agent = ReactCodeAgent(tools=[]) +agent.run( + "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?", +) +``` -```py3 ->>> agent.run( -... "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?", -... ) -=====New task===== +```bash +======== New task ======== How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need? -====Agent is executing the code below: -bert_blocks = search(query="number of blocks in BERT base encoder") -print("BERT blocks:", bert_blocks) +==== Agent is executing the code below: +bert_layers = 12 # BERT base encoder has 12 layers +attention_layers = 6 # Encoder in Attention is All You Need has 6 layers +layer_diff = bert_layers - attention_layers +print("The difference in layers between BERT base encoder and Attention is All You Need is", layer_diff) ==== Print outputs: -BERT blocks: twelve encoder blocks +The difference in layers between BERT base encoder and Attention is All You Need is 6 -====Agent is executing the code below: -attention_layer = search(query="number of layers in Attention is All You Need") -print("Attention layers:", attention_layer) -==== -Print outputs: -Attention layers: Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- 2 Page 3 Figure 1: The Transformer - model architecture. - -====Agent is executing the code below: -bert_blocks = 12 -attention_layers = 6 -diff = bert_blocks - attention_layers -print("Difference in blocks:", diff) -final_answer(diff) +==== Agent is executing the code below: +final_answer("BERT base encoder has {} more layers than the encoder from Attention is All You Need.".format(layer_diff)) ==== - Print outputs: -Difference in blocks: 6 - -Final answer: 6 -``` - -### How can I build an agent? - -To initialize an agent, you need these arguments: - -- an LLM to power your agent - the agent is not exactly the LLM, it’s more like the agent is a program that uses an LLM as its engine. -- a system prompt: what the LLM engine will be prompted with to generate its output -- a toolbox from which the agent pick tools to execute -- a parser to extract from the LLM output which tools are to call and with which arguments - -Upon initialization of the agent system, the tool attributes are used to generate a tool description, then baked into the agent’s `system_prompt` to let it know which tools it can use and why. -To start with, please install the `agents` extras in order to install all default dependencies. - -```bash -pip install transformers[agents] +>>> Final answer: +BERT base encoder has 6 more layers than the encoder from Attention is All You Need. ``` -Build your LLM engine by defining a `llm_engine` method which accepts a list of [messages](./chat_templating) and returns text. This callable also needs to accept a `stop` argument that indicates when to stop generating. +This guide will walk you through in more detail how to initialize an agent. -```python -from huggingface_hub import login, InferenceClient +## LLM -login("") +An agent uses a LLM to plan and execute a task; it is the engine that powers the agent. To choose and build your own LLM engine, you need a method that: -client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct") +1. the input uses the [chat template](./chat_templating) format, `List[Dict[str, str]]`, and it returns a string +2. the LLM stops generating outputs when it encounters the sequences in `stop_sequences` +```py def llm_engine(messages, stop_sequences=["Task"]) -> str: response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000) answer = response.choices[0].message.content return answer ``` -You could use any `llm_engine` method as long as: -1. it follows the [messages format](./chat_templating) (`List[Dict[str, str]]`) for its input `messages`, and it returns a `str`. -2. it stops generating outputs at the sequences passed in the argument `stop_sequences` +Next, initialize an engine to load a model. To run an agent locally, create a [`TransformersEngine`] to load a preinitialized [`Pipeline`]. -Additionally, `llm_engine` can also take a `grammar` argument. In the case where you specify a `grammar` upon agent initialization, this argument will be passed to the calls to llm_engine, with the `grammar` that you defined upon initialization, to allow [constrained generation](https://huggingface.co/docs/text-generation-inference/conceptual/guidance) in order to force properly-formatted agent outputs. +However, you could also leverage Hugging Face's powerful inference infrastructure, [Inference API](https://hf.co/docs/api-inference/index) or [Inference Endpoints](https://hf.co/docs/inference-endpoints/index), to run your model. This is useful for loading larger models that are typically required for agentic behavior. In this case, load the [`HfApiEngine`] to run the agent. -You will also need a `tools` argument which accepts a list of `Tools` - it can be an empty list. You can also add the default toolbox on top of your `tools` list by defining the optional argument `add_base_tools=True`. +The agent requires a list of tools it can use to complete a task. If you aren't using any additional tools, pass an empty list. The default tools provided by Transformers are loaded automatically, but you can optionally set `add_base_tools=True` to explicitly enable them. -Now you can create an agent, like [`CodeAgent`], and run it. You can also create a [`TransformersEngine`] with a pre-initialized pipeline to run inference on your local machine using `transformers`. -For convenience, since agentic behaviours generally require stronger models such as `Llama-3.1-70B-Instruct` that are harder to run locally for now, we also provide the [`HfApiEngine`] class that initializes a `huggingface_hub.InferenceClient` under the hood. + + -```python -from transformers import CodeAgent, HfApiEngine - -llm_engine = HfApiEngine(model="meta-llama/Meta-Llama-3-70B-Instruct") -agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True) +```py +from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, TransformersEngine, CodeAgent +tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct") +model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B-Instruct").to("cuda") +pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer) +llm_engine = TransformersEngine(pipeline) +agent = CodeAgent(tools=[], llm_engine=llm_engine) agent.run( - "Could you translate this sentence from French, say it out loud and return the audio.", - sentence="Où est la boulangerie la plus proche?", + "What causes bread to rise?", ) ``` -This will be handy in case of emergency baguette need! -You can even leave the argument `llm_engine` undefined, and an [`HfApiEngine`] will be created by default. + + -```python -from transformers import CodeAgent - -agent = CodeAgent(tools=[], add_base_tools=True) +```py +from transformers import CodeAgent, HfApiEngine +llm_engine = HfApiEngine(model="meta-llama/Meta-Llama-3-70B-Instruct") +agent = CodeAgent(tools=[], llm_engine=llm_engine) agent.run( - "Could you translate this sentence from French, say it out loud and give me the audio.", + "Could you translate this sentence from French, say it out loud and return the audio.", sentence="Où est la boulangerie la plus proche?", ) ``` -Note that we used an additional `sentence` argument: you can pass text as additional arguments to the model. + + -You can also use this to indicate the path to local or remote files for the model to use: +The agent supports [constrained generation](https://hf.co/docs/text-generation-inference/conceptual/guidance) for generating outputs according to a specific structure with the `grammar` parameter. The `grammar` parameter should be specified in the `llm_engine` method or you can set it when initializing an agent. + +Lastly, an agent accepts additional inputs such as text and audio. In the [`HfApiEngine`] example above, the agent accepted a sentence to translate. But you could also pass a path to a local or remote file for the agent to access. The example below demonstrates how to pass a path to an audio file. ```py from transformers import ReactCodeAgent -agent = ReactCodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True) - -agent.run("Why does Mike not know many people in New York?", audio="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3") -``` - - -The prompt and output parser were automatically defined, but you can easily inspect them by calling the `system_prompt_template` on your agent. - -```python -print(agent.system_prompt_template) +agent = ReactCodeAgent(tools=[], llm_engine=llm_engine) +agent.run("Why doesn't he know many people in New York?", audio="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3") ``` -It's important to explain as clearly as possible the task you want to perform. -Every [`~Agent.run`] operation is independent, and since an agent is powered by an LLM, minor variations in your prompt might yield completely different results. -You can also run an agent consecutively for different tasks: each time the attributes `agent.task` and `agent.logs` will be re-initialized. +## System prompt +A system prompt describes how an agent should behave, a description of the available tools, and the expected output format. -#### Code execution +Tools are defined by the `<>` token which is dynamically replaced during runtime with the actual tool. The tool description is derived from the tool name, description, inputs, output type, and a Jinja2 template. Refer to the [Tools](./tools) guide for more information about how to describe tools. -A Python interpreter executes the code on a set of inputs passed along with your tools. -This should be safe because the only functions that can be called are the tools you provided (especially if it's only tools by Hugging Face) and the print function, so you're already limited in what can be executed. - -The Python interpreter also doesn't allow imports by default outside of a safe list, so all the most obvious attacks shouldn't be an issue. -You can still authorize additional imports by passing the authorized modules as a list of strings in argument `additional_authorized_imports` upon initialization of your [`ReactCodeAgent`] or [`CodeAgent`]: +The example below is the system prompt for [`ReactCodeAgent`]. ```py ->>> from transformers import ReactCodeAgent - ->>> agent = ReactCodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4']) ->>> agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?") - -(...) -'Hugging Face – Blog' -``` - -The execution will stop at any code trying to perform an illegal operation or if there is a regular Python error with the code generated by the agent. - -> [!WARNING] -> The LLM can generate arbitrary code that will then be executed: do not add any unsafe imports! - -### The system prompt - -An agent, or rather the LLM that drives the agent, generates an output based on the system prompt. The system prompt can be customized and tailored to the intended task. For example, check the system prompt for the [`ReactCodeAgent`] (below version is slightly simplified). - -```text You will be given a task to solve as best you can. You have access to the following tools: <> @@ -235,7 +159,7 @@ Here are a few examples using notional tools: --- {examples} -Above example were using notional tools that might not exist for you. You only have acces to those tools: +Above example were using notional tools that might not exist for you. You only have access to those tools: <> You also can perform computations in the python code you generate. @@ -249,183 +173,125 @@ Remember to make sure that variables you use are all defined. Now Begin! ``` -The system prompt includes: -- An *introduction* that explains how the agent should behave and what tools are. -- A description of all the tools that is defined by a `<>` token that is dynamically replaced at runtime with the tools defined/chosen by the user. - - The tool description comes from the tool attributes, `name`, `description`, `inputs` and `output_type`, and a simple `jinja2` template that you can refine. -- The expected output format. - -You could improve the system prompt, for example, by adding an explanation of the output format. +The system prompt can be tailored to the intended task. For example, you can add a better explanation of the output format or you can overwrite the system prompt template entirely with your own custom system prompt as shown below. -For maximum flexibility, you can overwrite the whole system prompt template by passing your custom prompt as an argument to the `system_prompt` parameter. +> [!WARNING] +> If you're writing a custom system prompt, make sure to include `<>` in the template so the agent is aware of the available tools. -```python +```py from transformers import ReactJsonAgent from transformers.agents import PythonInterpreterTool agent = ReactJsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_custom_prompt}") ``` -> [!WARNING] -> Please make sure to define the `<>` string somewhere in the `template` so the agent is aware -of the available tools. - - -### Inspecting an agent run - -Here are a few useful attributes to inspect what happened after a run: -- `agent.logs` stores the fine-grained logs of the agent. At every step of the agent's run, everything gets stored in a dictionary that then is appended to `agent.logs`. -- Running `agent.write_inner_memory_from_logs()` creates an inner memory of the agent's logs for the LLM to view, as a list of chat messages. This method goes over each step of the log and only stores what it's interested in as a message: for instance, it will save the system prompt and task in separate messages, then for each step it will store the LLM output as a message, and the tool call output as another message. Use this if you want a higher-level view of what has happened - but not every log will be transcripted by this method. - -## Tools - -A tool is an atomic function to be used by an agent. - -You can for instance check the [`PythonInterpreterTool`]: it has a name, a description, input descriptions, an output type, and a `__call__` method to perform the action. - -When the agent is initialized, the tool attributes are used to generate a tool description which is baked into the agent's system prompt. This lets the agent know which tools it can use and why. +## Code execution -### Default toolbox +For safety, only the tools you provide (and the default Transformers tools) and the `print` function are executed. The interpreter doesn't allow importing modules that aren't on a safe list. -Transformers comes with a default toolbox for empowering agents, that you can add to your agent upon initialization with argument `add_base_tools = True`: +To import modules that aren't on the list, add them as a list to the `additional_authorized_imports` parameter when initializing an agent. -- **Document question answering**: given a document (such as a PDF) in image format, answer a question on this document ([Donut](./model_doc/donut)) -- **Image question answering**: given an image, answer a question on this image ([VILT](./model_doc/vilt)) -- **Speech to text**: given an audio recording of a person talking, transcribe the speech into text ([Whisper](./model_doc/whisper)) -- **Text to speech**: convert text to speech ([SpeechT5](./model_doc/speecht5)) -- **Translation**: translates a given sentence from source language to target language. -- **DuckDuckGo search***: performs a web search using DuckDuckGo browser. -- **Python code interpreter**: runs your the LLM generated Python code in a secure environment. This tool will only be added to [`ReactJsonAgent`] if you initialize it with `add_base_tools=True`, since code-based agent can already natively execute Python code - - -You can manually use a tool by calling the [`load_tool`] function and a task to perform. - - -```python -from transformers import load_tool +```py +from transformers import ReactCodeAgent -tool = load_tool("text-to-speech") -audio = tool("This is a text to speech tool") +agent = ReactCodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4']) +agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?") ``` +Code execution stops if a tool isn't on the safe list, it isn't authorized, or if the code generated by the agent returns a Python error. -### Create a new tool - -You can create your own tool for use cases not covered by the default tools from Hugging Face. -For example, let's create a tool that returns the most downloaded model for a given task from the Hub. - -You'll start with the code below. - -```python -from huggingface_hub import list_models +> [!WARNING] +> A LLM can generate any arbitrary code that can be executed, so don't add any unsafe imports! -task = "text-classification" +## Multi-agent -model = next(iter(list_models(filter=task, sort="downloads", direction=-1))) -print(model.id) -``` +[Multi-agent](https://hf.co/papers/2308.08155) refers to multiple agents working together to solve a task. Performance is typically better because each agent is specialized for a particular subtask. -This code can quickly be converted into a tool, just by wrapping it in a function and adding the `tool` decorator: +Multi-agents are created through a [`ManagedAgent`] class, where a *manager agent* oversees how other agents work together. The manager agent requires an agent and their name and description. These are added to the manager agents system prompt which lets it know how to call and use them. +The multi-agent example below creates a web search agent that is managed by another [`ReactCodeAgent`]. ```py -from transformers import tool - -@tool -def model_download_tool(task: str) -> str: - """ - This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub. - It returns the name of the checkpoint. - - Args: - task: The task for which - """ - model = next(iter(list_models(filter="text-classification", sort="downloads", direction=-1))) - return model.id -``` - -The function needs: -- A clear name. The name usually describes what the tool does. Since the code returns the model with the most downloads for a task, let's put `model_download_tool`. -- Type hints on both inputs and output -- A description, that includes an 'Args:' part where each argument is described (without a type indication this time, it will be pulled from the type hint). -All these will be automatically baked into the agent's system prompt upon initialization: so strive to make them as clear as possible! - -> [!TIP] -> This definition format is the same as tool schemas used in `apply_chat_template`, the only difference is the added `tool` decorator: read more on our tool use API [here](https://huggingface.co/blog/unified-tool-use#passing-tools-to-a-chat-template). - -Then you can directly initialize your agent: -```py -from transformers import CodeAgent -agent = CodeAgent(tools=[model_download_tool], llm_engine=llm_engine) -agent.run( - "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?" +from transformers.agents import ReactCodeAgent, HfApiEngine, DuckDuckGoSearchTool, ManagedAgent + +llm_engine = HfApiEngine() +web_agent = ReactCodeAgent(tools=[DuckDuckGoSearchTool()], llm_engine=llm_engine) +managed_web_agent = ManagedAgent( + agent=web_agent, + name="web_search", + description="Runs web searches for you. Give it your query as an argument." ) -``` - -You get the following: -```text -======== New task ======== -Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub? -==== Agent is executing the code below: -most_downloaded_model = model_download_tool(task="text-to-video") -print(f"The most downloaded model for the 'text-to-video' task is {most_downloaded_model}.") -==== -``` - -And the output: -`"The most downloaded model for the 'text-to-video' task is ByteDance/AnimateDiff-Lightning."` - -### Manage your agent's toolbox - -If you have already initialized an agent, it is inconvenient to reinitialize it from scratch with a tool you want to use. With Transformers, you can manage an agent's toolbox by adding or replacing a tool. - -Let's add the `model_download_tool` to an existing agent initialized with only the default toolbox. - -```python -from transformers import CodeAgent - -agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True) -agent.toolbox.add_tool(model_download_tool) -``` -Now we can leverage both the new tool and the previous text-to-speech tool: - -```python -agent.run( - "Can you read out loud the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub and return the audio?" +manager_agent = ReactCodeAgent( + tools=[], llm_engine=llm_engine, managed_agents=[managed_web_agent] ) +manager_agent.run("Who is the CEO of Hugging Face?") ``` +## Gradio integration -| **Audio** | -|------------------------------------------------------------------------------------------------------------------------------------------------------| -|