style: apply pyink, isort, and pre-commit formatting

google · Mirza-Samad-Ahmed-Baig · Jul 22, 2025 · Jul 22, 2025 · Aug 1, 2025 · Aug 1, 2025
commit c8d2027adabb8eab8cf823e0b3b780de3085870b
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
@@ -24,4 +24,4 @@ contact_links:
     url: https://g.co/vulnz
     about: >
       To report a security issue, please use https://g.co/vulnz. The Google Security Team will
-      respond within 5 working days of your report on https://g.co/vulnz.
+      respond within 5 working days of your report on https://g.co/vulnz.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -44,4 +44,4 @@ jobs:
 
       - name: Run tox (lint + tests)
         run: |
-          tox
+          tox
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -31,25 +31,25 @@ jobs:
       id-token: write
     steps:
       - uses: actions/checkout@v4
-      
+
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
           python-version: '3.11'
-      
+
       - name: Install build dependencies
         run: |
           python -m pip install --upgrade pip
           pip install build
-      
+
       - name: Build package
         run: python -m build
-      
+
       - name: Verify build artifacts
         run: |
           ls -la dist/
           pip install twine
           twine check dist/*
-      
+
       - name: Publish to PyPI
-        uses: pypa/gh-action-pypi-publish@release/v1 
+        uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.gitignore b/.gitignore
@@ -51,4 +51,4 @@ docs/_build/
 *.swp
 
 # OS-specific
-.DS_Store
+.DS_Store
diff --git a/.hgignore b/.hgignore
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-gdm/codeai/codemind/cli/GEMINI.md
+gdm/codeai/codemind/cli/GEMINI.md
diff --git a/Dockerfile b/Dockerfile
@@ -13,4 +13,4 @@ WORKDIR /app
 RUN pip install --no-cache-dir langextract
 
 # Set default command
-CMD ["python"]
+CMD ["python"]
diff --git a/README.md b/README.md
@@ -352,4 +352,4 @@ For health-related applications, use of LangExtract is also subject to the
 
 ---
 
-**Happy Extracting!**
+**Happy Extracting!**
diff --git a/docs/examples/longer_text_example.md b/docs/examples/longer_text_example.md
@@ -171,4 +171,4 @@ LangExtract combines precise text positioning with world knowledge enrichment, e
 
 ---
 
-¹ Models like Gemini 1.5 Pro show strong performance on many benchmarks, but [needle-in-a-haystack tests](https://cloud.google.com/blog/products/ai-machine-learning/the-needle-in-the-haystack-test-and-how-gemini-pro-solves-it) across million-token contexts indicate that performance can vary in multi-fact retrieval scenarios. This demonstrates how LangExtract's smaller context windows approach ensures consistently high quality across entire documents by avoiding the complexity and potential degradation of massive single-context processing.
+¹ Models like Gemini 1.5 Pro show strong performance on many benchmarks, but [needle-in-a-haystack tests](https://cloud.google.com/blog/products/ai-machine-learning/the-needle-in-the-haystack-test-and-how-gemini-pro-solves-it) across million-token contexts indicate that performance can vary in multi-fact retrieval scenarios. This demonstrates how LangExtract's smaller context windows approach ensures consistently high quality across entire documents by avoiding the complexity and potential degradation of massive single-context processing.
diff --git a/docs/examples/medication_examples.md b/docs/examples/medication_examples.md
@@ -196,8 +196,8 @@ for med_name, extractions in medication_groups.items():
 lx.io.save_annotated_documents(
     [result],
     output_name="medical_ner_extraction.jsonl",
-    output_dir="." 
-) 
+    output_dir="."
+)
 
 # Generate the interactive visualization
 html_content = lx.visualize("medical_relationship_extraction.jsonl")
@@ -243,4 +243,4 @@ This example demonstrates how attributes enable efficient relationship extractio
 - **Relationship Extraction**: Groups related entities using attributes
 - **Position Tracking**: Records exact positions of extracted entities in the source text
 - **Structured Output**: Organizes information in a format suitable for healthcare applications
-- **Interactive Visualization**: Generates HTML visualizations for exploring complex medical extractions with entity groupings and relationships clearly displayed
+- **Interactive Visualization**: Generates HTML visualizations for exploring complex medical extractions with entity groupings and relationships clearly displayed
diff --git a/kokoro/presubmit.cfg b/kokoro/presubmit.cfg
@@ -28,4 +28,4 @@ container_properties {
 xunit_test_results {
   target_name: "pytest_results"
   result_xml_path: "git/repo/pytest_results/test.xml"
-}
+}
diff --git a/kokoro/test.sh b/kokoro/test.sh
@@ -103,4 +103,4 @@ deactivate
 
 echo "========================================="
 echo "Kokoro test script for langextract finished successfully."
-echo "========================================="
+echo "========================================="
diff --git a/langextract/__init__.py b/langextract/__init__.py
@@ -19,13 +19,13 @@
 # Ensure libmagic is available before langfun imports python-magic.
 # pylibmagic provides pre-built binaries that python-magic needs.
 try:
-    import pylibmagic  # noqa: F401 (side-effect import)
+  import pylibmagic  # noqa: F401 (side-effect import)
 except ImportError:
-    pass
+  pass
 
 from collections.abc import Iterable, Sequence
 import os
-from typing import Any, Type, TypeVar, cast
+from typing import Any, cast, Type, TypeVar
 import warnings
 
 import dotenv
@@ -39,7 +39,6 @@
 from langextract import schema
 from langextract import visualization
 
-
 LanguageModelT = TypeVar("LanguageModelT", bound=inference.BaseLanguageModel)
 
 # Set up visualization helper at the top level (lx.visualize).

diff --git a/langextract/inference.py b/langextract/inference.py
@@ -29,12 +29,9 @@
 from typing_extensions import override
 import yaml
 
-
-
 from langextract import data
 from langextract import schema
 
-
 _OLLAMA_DEFAULT_MODEL_URL = 'http://localhost:11434'
 
 

diff --git a/langextract/io.py b/langextract/io.py
@@ -18,15 +18,12 @@
 import dataclasses
 import json
 import os
+import pathlib
 from typing import Any, Iterator
 
 import pandas as pd
 import requests
 
-import os
-import pathlib
-import os
-import pathlib
 from langextract import data
 from langextract import data_lib
 from langextract import progress

diff --git a/langextract/progress.py b/langextract/progress.py
@@ -16,6 +16,7 @@
 
 from typing import Any
 import urllib.parse
+
 import tqdm
 
 # ANSI color codes for terminal output

diff --git a/langextract/prompting.py b/langextract/prompting.py
@@ -16,12 +16,12 @@
 
 import dataclasses
 import json
+import os
+import pathlib
 
 import pydantic
 import yaml
 
-import os
-import pathlib
 from langextract import data
 from langextract import schema
 

diff --git a/langextract/schema.py b/langextract/schema.py
@@ -22,7 +22,6 @@
 import enum
 from typing import Any
 
-
 from langextract import data
 
 

diff --git a/langextract/visualization.py b/langextract/visualization.py
@@ -28,10 +28,10 @@
 import html
 import itertools
 import json
-import textwrap
-
 import os
 import pathlib
+import textwrap
+
 from langextract import data as _data
 from langextract import io as _io
 
@@ -130,9 +130,9 @@
       50% { text-decoration-color: #ff0000; }
       100% { text-decoration-color: #ff4444; }
     }
-    .lx-legend { 
-      font-size: 12px; margin-bottom: 8px; 
-      padding-bottom: 8px; border-bottom: 1px solid #e0e0e0; 
+    .lx-legend {
+      font-size: 12px; margin-bottom: 8px;
+      padding-bottom: 8px; border-bottom: 1px solid #e0e0e0;
     }
     .lx-label {
       display: inline-block;
@@ -456,12 +456,12 @@ def _extraction_sort_key(extraction):
           <button class="lx-control-btn" onclick="nextExtraction()">⏭ Next</button>
         </div>
         <div class="lx-progress-container">
-          <input type="range" id="progressSlider" class="lx-progress-slider" 
-                 min="0" max="{len(extractions)-1}" value="0" 
+          <input type="range" id="progressSlider" class="lx-progress-slider"
+                 min="0" max="{len(extractions)-1}" value="0"
                  onchange="jumpToExtraction(this.value)">
         </div>
         <div class="lx-status-text">
-          Entity <span id="entityInfo">1/{len(extractions)}</span> | 
+          Entity <span id="entityInfo">1/{len(extractions)}</span> |
           Pos <span id="posInfo">{pos_info_str}</span>
         </div>
       </div>

diff --git a/tests/.pylintrc b/tests/.pylintrc
@@ -49,4 +49,4 @@ max-branches = 15             # Multiple test conditions
 good-names=i,j,k,ex,Run,_,id,ok,fd,fp,maxDiff,setUp,tearDown
 
 # Include test-specific naming patterns
-method-rgx=[a-z_][a-z0-9_]{2,50}$|test[A-Z_][a-zA-Z0-9]*$|assert[A-Z][a-zA-Z0-9]*$
+method-rgx=[a-z_][a-z0-9_]{2,50}$|test[A-Z_][a-zA-Z0-9]*$|assert[A-Z][a-zA-Z0-9]*$
diff --git a/tests/annotation_test.py b/tests/annotation_test.py
@@ -20,6 +20,7 @@
 
 from absl.testing import absltest
 from absl.testing import parameterized
+
 from langextract import annotation
 from langextract import data
 from langextract import inference

diff --git a/tests/chunking_test.py b/tests/chunking_test.py
@@ -14,11 +14,12 @@
 
 import textwrap
 
+from absl.testing import absltest
+from absl.testing import parameterized
+
 from langextract import chunking
 from langextract import data
 from langextract import tokenizer
-from absl.testing import absltest
-from absl.testing import parameterized
 
 
 class SentenceIterTest(absltest.TestCase):
@@ -368,7 +369,9 @@ def test_string_output(self):
     )""")
     document = data.Document(text=text, document_id="test_doc_123")
     tokenized_text = tokenizer.tokenize(text)
-    chunk_iter = chunking.ChunkIterator(tokenized_text, max_char_buffer=7, document=document)
+    chunk_iter = chunking.ChunkIterator(
+        tokenized_text, max_char_buffer=7, document=document
+    )
     text_chunk = next(chunk_iter)
     self.assertEqual(str(text_chunk), expected)
 

diff --git a/tests/data_lib_test.py b/tests/data_lib_test.py
@@ -14,13 +14,13 @@
 
 import json
 
+from absl.testing import absltest
+from absl.testing import parameterized
 import numpy as np
 
 from langextract import data
 from langextract import data_lib
 from langextract import tokenizer
-from absl.testing import absltest
-from absl.testing import parameterized
 
 
 class DataLibToDictParameterizedTest(parameterized.TestCase):

diff --git a/tests/inference_test.py b/tests/inference_test.py
@@ -13,12 +13,15 @@
 # limitations under the License.
 
 from unittest import mock
-import langfun as lf
+
 from absl.testing import absltest
+import langfun as lf
+
 from langextract import inference
 
 
 class TestLangFunLanguageModel(absltest.TestCase):
+
   @mock.patch.object(
       inference.lf.core.language_model, "LanguageModel", autospec=True
   )

diff --git a/tests/init_test.py b/tests/init_test.py
@@ -18,11 +18,12 @@
 from unittest import mock
 
 from absl.testing import absltest
-import langextract as lx
+
 from langextract import data
 from langextract import inference
 from langextract import prompting
 from langextract import schema
+import langextract as lx
 
 
 class InitTest(absltest.TestCase):
@@ -142,5 +143,6 @@ def test_lang_extract_as_lx_extract(
 
     self.assertDataclassEqual(expected_result, actual_result)
 
+
 if __name__ == "__main__":
   absltest.main()
diff --git a/tests/prompting_test.py b/tests/prompting_test.py
@@ -16,6 +16,7 @@
 
 from absl.testing import absltest
 from absl.testing import parameterized
+
 from langextract import data
 from langextract import prompting
 from langextract import schema

diff --git a/tests/resolver_test.py b/tests/resolver_test.py
@@ -17,6 +17,7 @@
 
 from absl.testing import absltest
 from absl.testing import parameterized
+
 from langextract import chunking
 from langextract import data
 from langextract import resolver as resolver_lib

diff --git a/tests/schema_test.py b/tests/schema_test.py
@@ -16,11 +16,9 @@
 import textwrap
 from unittest import mock
 
-
-
-
 from absl.testing import absltest
 from absl.testing import parameterized
+
 from langextract import data
 from langextract import schema
 

diff --git a/tests/tokenizer_test.py b/tests/tokenizer_test.py
@@ -14,10 +14,11 @@
 
 import textwrap
 
-from langextract import tokenizer
 from absl.testing import absltest
 from absl.testing import parameterized
 
+from langextract import tokenizer
+
 
 class TokenizerTest(parameterized.TestCase):
 

diff --git a/tests/visualization_test.py b/tests/visualization_test.py
@@ -17,6 +17,7 @@
 from unittest import mock
 
 from absl.testing import absltest
+
 from langextract import data as lx_data
 from langextract import visualization
-Original file line number
+Diff line change
@@ Expand Up / @@ -44,4 +44,4 @@ jobs: @@
           - name: Run tox (lint + tests)
             run: |
-              tox
+              tox
Original file line number	Diff line number	Diff line change
Expand Up		@@ -352,4 +352,4 @@ For health-related applications, use of LangExtract is also subject to the

		---

		Happy Extracting!
		Happy Extracting!
Original file line number	Diff line number	Diff line change
Expand Up		@@ -171,4 +171,4 @@ LangExtract combines precise text positioning with world knowledge enrichment, e

		---

		¹ Models like Gemini 1.5 Pro show strong performance on many benchmarks, but [needle-in-a-haystack tests](https://cloud.google.com/blog/products/ai-machine-learning/the-needle-in-the-haystack-test-and-how-gemini-pro-solves-it) across million-token contexts indicate that performance can vary in multi-fact retrieval scenarios. This demonstrates how LangExtract's smaller context windows approach ensures consistently high quality across entire documents by avoiding the complexity and potential degradation of massive single-context processing.
		¹ Models like Gemini 1.5 Pro show strong performance on many benchmarks, but [needle-in-a-haystack tests](https://cloud.google.com/blog/products/ai-machine-learning/the-needle-in-the-haystack-test-and-how-gemini-pro-solves-it) across million-token contexts indicate that performance can vary in multi-fact retrieval scenarios. This demonstrates how LangExtract's smaller context windows approach ensures consistently high quality across entire documents by avoiding the complexity and potential degradation of massive single-context processing.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -22,7 +22,6 @@
		import enum
		from typing import Any


		from langextract import data


Expand Down