Add LangExtractError base exception for centralized error handling

Introduces a common base exception class that all library-specific exceptions inherit from, enabling users to catch all LangExtract errors with a single except clause.
google · NewcomerAI · Jul 22, 2025 · Jul 22, 2025 · Aug 1, 2025 · Aug 1, 2025
commit ed65bcaa4bd123f84e61b927f2e74407c4e600b3
diff --git a/exceptions.py b/exceptions.py
@@ -0,0 +1,30 @@
+# Copyright 2025 Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Base exceptions for LangExtract.
+
+This module defines the base exception class that all LangExtract exceptions
+inherit from. Individual modules define their own specific exceptions.
+"""
+
+__all__ = ["LangExtractError"]
+
+
+class LangExtractError(Exception):
+  """Base exception for all LangExtract errors.
+
+  All exceptions raised by LangExtract should inherit from this class.
+  This allows users to catch all LangExtract-specific errors with a single
+  except clause.
+  """
diff --git a/langextract/__init__.py b/langextract/__init__.py
@@ -32,13 +32,28 @@
 
 from langextract import annotation
 from langextract import data
+from langextract import exceptions
 from langextract import inference
 from langextract import io
 from langextract import prompting
 from langextract import resolver
 from langextract import schema
 from langextract import visualization
 
+__all__ = [
+    "extract",
+    "visualize",
+    "annotation",
+    "data",
+    "exceptions",
+    "inference",
+    "io",
+    "prompting",
+    "resolver",
+    "schema",
+    "visualization",
+]
+
 LanguageModelT = TypeVar("LanguageModelT", bound=inference.BaseLanguageModel)
 
 # Set up visualization helper at the top level (lx.visualize).

diff --git a/langextract/annotation.py b/langextract/annotation.py
@@ -31,6 +31,7 @@
 
 from langextract import chunking
 from langextract import data
+from langextract import exceptions
 from langextract import inference
 from langextract import progress
 from langextract import prompting
@@ -39,7 +40,7 @@
 ATTRIBUTE_SUFFIX = "_attributes"
 
 
-class DocumentRepeatError(Exception):
+class DocumentRepeatError(exceptions.LangExtractError):
   """Exception raised when identical document ids are present."""
 
 

diff --git a/langextract/chunking.py b/langextract/chunking.py
@@ -28,10 +28,11 @@
 import more_itertools
 
 from langextract import data
+from langextract import exceptions
 from langextract import tokenizer
 
 
-class TokenUtilError(Exception):
+class TokenUtilError(exceptions.LangExtractError):
   """Error raised when token_util returns unexpected values."""
 
 

diff --git a/langextract/exceptions.py b/langextract/exceptions.py
@@ -0,0 +1,26 @@
+# Copyright 2025 Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Base exceptions for LangExtract."""
+
+__all__ = ["LangExtractError"]
+
+
+class LangExtractError(Exception):
+  """Base exception for all LangExtract errors.
+
+  All exceptions raised by LangExtract should inherit from this class.
+  This allows users to catch all LangExtract-specific errors with a single
+  except clause.
+  """
diff --git a/langextract/inference.py b/langextract/inference.py
@@ -30,6 +30,7 @@
 import yaml
 
 from langextract import data
+from langextract import exceptions
 from langextract import schema
 
 _OLLAMA_DEFAULT_MODEL_URL = 'http://localhost:11434'
@@ -49,7 +50,7 @@ def __str__(self) -> str:
     return f'Score: {self.score:.2f}\nOutput:\n{formatted_lines}'
 
 
-class InferenceOutputError(Exception):
+class InferenceOutputError(exceptions.LangExtractError):
   """Exception raised when no scored outputs are available from the language model."""
 
   def __init__(self, message: str):

diff --git a/langextract/io.py b/langextract/io.py
@@ -26,12 +26,13 @@
 
 from langextract import data
 from langextract import data_lib
+from langextract import exceptions
 from langextract import progress
 
 DEFAULT_TIMEOUT_SECONDS = 30
 
 
-class InvalidDatasetError(Exception):
+class InvalidDatasetError(exceptions.LangExtractError):
   """Error raised when Dataset is empty or invalid."""
 
 

diff --git a/langextract/prompting.py b/langextract/prompting.py
@@ -23,10 +23,11 @@
 import yaml
 
 from langextract import data
+from langextract import exceptions
 from langextract import schema
 
 
-class PromptBuilderError(Exception):
+class PromptBuilderError(exceptions.LangExtractError):
   """Failure to build prompt."""
 
 

diff --git a/langextract/resolver.py b/langextract/resolver.py
@@ -31,6 +31,7 @@
 import yaml
 
 from langextract import data
+from langextract import exceptions
 from langextract import schema
 from langextract import tokenizer
 
@@ -151,7 +152,7 @@ def align(
 ExtractionValueType = str | int | float | dict | list | None
 
 
-class ResolverParsingError(Exception):
+class ResolverParsingError(exceptions.LangExtractError):
   """Error raised when content cannot be parsed as the given format."""
 
 

diff --git a/langextract/tokenizer.py b/langextract/tokenizer.py
@@ -30,8 +30,10 @@
 
 from absl import logging
 
+from langextract import exceptions
 
-class BaseTokenizerError(Exception):
+
+class BaseTokenizerError(exceptions.LangExtractError):
   """Base class for all tokenizer-related errors."""