Add __all__ exports to core modules for wildcard import compatibility (google#181)

aksg87 · web-flow · commit 8b54f7f4bbaf · 2025-08-23T17:23:11.000-04:00
diff --git a/langextract/core/data.py b/langextract/core/data.py
@@ -24,6 +24,16 @@
 
 FormatType = types.FormatType  # Backward compat
 
+__all__ = [
+    "AlignmentStatus",
+    "CharInterval",
+    "Extraction",
+    "Document",
+    "AnnotatedDocument",
+    "ExampleData",
+    "FormatType",
+]
+
 
 class AlignmentStatus(enum.Enum):
   MATCH_EXACT = "match_exact"
diff --git a/langextract/core/tokenizer.py b/langextract/core/tokenizer.py
@@ -31,6 +31,20 @@
 from langextract.core import debug_utils
 from langextract.core import exceptions
 
+__all__ = [
+    "BaseTokenizerError",
+    "InvalidTokenIntervalError",
+    "SentenceRangeError",
+    "CharInterval",
+    "TokenInterval",
+    "TokenType",
+    "Token",
+    "TokenizedText",
+    "tokenize",
+    "tokens_text",
+    "find_sentence_range",
+]
+
 
 class BaseTokenizerError(exceptions.LangExtractError):
   """Base class for all tokenizer-related errors."""
diff --git a/tests/init_test.py b/tests/init_test.py
@@ -191,6 +191,48 @@ def test_extract_custom_params_reach_inference(
     _, kwargs = mock_model.infer.call_args
     self.assertEqual(kwargs.get("max_workers"), 5)
 
+  def test_data_module_exports_via_compatibility_shim(self):
+    """Verify data module exports are accessible via lx.data."""
+    expected_exports = [
+        "AlignmentStatus",
+        "CharInterval",
+        "Extraction",
+        "Document",
+        "AnnotatedDocument",
+        "ExampleData",
+        "FormatType",
+    ]
+
+    for name in expected_exports:
+      with self.subTest(export=name):
+        self.assertTrue(
+            hasattr(lx.data, name),
+            f"lx.data.{name} not accessible via compatibility shim",
+        )
+
+  def test_tokenizer_module_exports_via_compatibility_shim(self):
+    """Verify tokenizer module exports are accessible via lx.tokenizer."""
+    expected_exports = [
+        "BaseTokenizerError",
+        "InvalidTokenIntervalError",
+        "SentenceRangeError",
+        "CharInterval",
+        "TokenInterval",
+        "TokenType",
+        "Token",
+        "TokenizedText",
+        "tokenize",
+        "tokens_text",
+        "find_sentence_range",
+    ]
+
+    for name in expected_exports:
+      with self.subTest(export=name):
+        self.assertTrue(
+            hasattr(lx.tokenizer, name),
+            f"lx.tokenizer.{name} not accessible via compatibility shim",
+        )
+
 
 if __name__ == "__main__":
   absltest.main()