google · Mirza-Samad-Ahmed-Baig · Jul 22, 2025 · Jul 22, 2025 · Aug 1, 2025 · Aug 1, 2025
diff --git a/Dockerfile b/Dockerfile
@@ -1,11 +1,6 @@
-# Production Dockerfile for LangExtract with libmagic support
+# Production Dockerfile for LangExtract
 FROM python:3.10-slim
 
-# Install system dependencies including libmagic
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    libmagic1 \
-    && rm -rf /var/lib/apt/lists/*
-
 # Set working directory
 WORKDIR /app
 

diff --git a/README.md b/README.md
@@ -336,12 +336,6 @@ pylint --rcfile=.pylintrc langextract tests
 
 See [CONTRIBUTING.md](CONTRIBUTING.md) for full development guidelines.
 
-## Troubleshooting
-
-**libmagic error**: If you see "failed to find libmagic", install with `pip install langextract[full]` or install system dependencies:
-- Ubuntu/Debian: `sudo apt-get install libmagic1`
-- macOS: `brew install libmagic`
-
 ## Disclaimer
 
 This is not an officially supported Google product. If you use

diff --git a/exceptions.py b/exceptions.py
@@ -27,4 +27,4 @@ class LangExtractError(Exception):
   All exceptions raised by LangExtract should inherit from this class.
   This allows users to catch all LangExtract-specific errors with a single
   except clause.
-  """
+  """
diff --git a/langextract/__init__.py b/langextract/__init__.py
@@ -16,13 +16,6 @@
 
 from __future__ import annotations
 
-# Ensure libmagic is available before langfun imports python-magic.
-# pylibmagic provides pre-built binaries that python-magic needs.
-try:
-  import pylibmagic  # noqa: F401 (side-effect import)
-except ImportError:
-  pass
-
 from collections.abc import Iterable, Sequence
 import os
 from typing import Any, cast, Type, TypeVar

diff --git a/langextract/inference.py b/langextract/inference.py
@@ -24,7 +24,6 @@
 from typing import Any
 
 from google import genai
-import langfun as lf
 import requests
 from typing_extensions import override
 import yaml
@@ -97,49 +96,6 @@ class InferenceType(enum.Enum):
   MULTIPROCESS = 'multiprocess'
 
 
-# TODO: Add support for llm options.
-@dataclasses.dataclass(init=False)
-class LangFunLanguageModel(BaseLanguageModel):
-  """Language model inference class using LangFun language class.
-
-  See https://github.com/google/langfun for more details on LangFun.
-  """
-
-  _lm: lf.core.language_model.LanguageModel  # underlying LangFun model
-  _constraint: schema.Constraint = dataclasses.field(
-      default_factory=schema.Constraint, repr=False, compare=False
-  )
-  _extra_kwargs: dict[str, Any] = dataclasses.field(
-      default_factory=dict, repr=False, compare=False
-  )
-
-  def __init__(
-      self,
-      language_model: lf.core.language_model.LanguageModel,
-      constraint: schema.Constraint = schema.Constraint(),
-      **kwargs,
-  ) -> None:
-    self._lm = language_model
-    self._constraint = constraint
-
-    # Preserve any unused kwargs for debugging / future use
-    self._extra_kwargs = kwargs or {}
-    super().__init__(constraint=constraint)
-
-  @override
-  def infer(
-      self, batch_prompts: Sequence[str], **kwargs
-  ) -> Iterator[Sequence[ScoredOutput]]:
-    responses = self._lm.sample(prompts=batch_prompts)
-    for a_response in responses:
-      for sample in a_response.samples:
-        yield [
-            ScoredOutput(
-                score=sample.response.score, output=sample.response.text
-            )
-        ]
-
-
 @dataclasses.dataclass(init=False)
 class OllamaLanguageModel(BaseLanguageModel):
   """Language model inference class using Ollama based host."""

diff --git a/pyproject.toml b/pyproject.toml
@@ -18,7 +18,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "langextract"
-version = "1.0.1"
+version = "1.0.2"
 description = "LangExtract: A library for extracting structured data from language models"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -32,16 +32,13 @@ dependencies = [
     "async_timeout>=4.0.0",
     "exceptiongroup>=1.1.0",
     "google-genai>=0.1.0",
-    "langfun>=0.1.0",
     "ml-collections>=0.1.0",
     "more-itertools>=8.0.0",
     "numpy>=1.20.0",
     "openai>=0.27.0",
     "pandas>=1.3.0",
     "pydantic>=1.8.0",
     "python-dotenv>=0.19.0",
-    "python-magic>=0.4.27",
-    "pylibmagic>=0.5.0",
     "requests>=2.25.0",
     "tqdm>=4.64.0",
     "typing-extensions>=4.0.0"
@@ -66,10 +63,6 @@ test = [
     "pytest>=7.4.0",
     "tomli>=2.0.0"
 ]
-full = [
-    "python-magic>=0.4.27",
-    "pylibmagic>=0.5.0",
-]
 
 [tool.setuptools]
 packages = ["langextract"]

diff --git a/tests/annotation_test.py b/tests/annotation_test.py
@@ -35,7 +35,7 @@ class AnnotatorTest(absltest.TestCase):
   def setUp(self):
     super().setUp()
     self.mock_language_model = self.enter_context(
-        mock.patch.object(inference, "LangFunLanguageModel", autospec=True)
+        mock.patch.object(inference, "GeminiLanguageModel", autospec=True)
     )
     self.annotator = annotation.Annotator(
         language_model=self.mock_language_model,
@@ -688,7 +688,7 @@ def test_annotate_documents(
       batch_length: int = 1,
   ):
     mock_language_model = self.enter_context(
-        mock.patch.object(inference, "LangFunLanguageModel", autospec=True)
+        mock.patch.object(inference, "GeminiLanguageModel", autospec=True)
     )
 
     # Define a side effect function so return length based on batch length.
@@ -761,7 +761,7 @@ def test_annotate_documents_exceptions(
       batch_length: int = 1,
   ):
     mock_language_model = self.enter_context(
-        mock.patch.object(inference, "LangFunLanguageModel", autospec=True)
+        mock.patch.object(inference, "GeminiLanguageModel", autospec=True)
     )
     mock_language_model.infer.return_value = [
         [
@@ -798,7 +798,7 @@ class AnnotatorMultiPassTest(absltest.TestCase):
   def setUp(self):
     super().setUp()
     self.mock_language_model = self.enter_context(
-        mock.patch.object(inference, "LangFunLanguageModel", autospec=True)
+        mock.patch.object(inference, "GeminiLanguageModel", autospec=True)
     )
     self.annotator = annotation.Annotator(
         language_model=self.mock_language_model,

diff --git a/tests/inference_test.py b/tests/inference_test.py
@@ -15,59 +15,10 @@
 from unittest import mock
 
 from absl.testing import absltest
-import langfun as lf
 
 from langextract import inference
 
 
-class TestLangFunLanguageModel(absltest.TestCase):
-
-  @mock.patch.object(
-      inference.lf.core.language_model, "LanguageModel", autospec=True
-  )
-  def test_langfun_infer(self, mock_lf_model):
-    mock_client_instance = mock_lf_model.return_value
-    metadata = {
-        "score": -0.004259720362824737,
-        "logprobs": None,
-        "is_cached": False,
-    }
-    source = lf.UserMessage(
-        text="What's heart in Italian?.",
-        sender="User",
-        metadata={"formatted_text": "What's heart in Italian?."},
-        tags=["lm-input"],
-    )
-    sample = lf.LMSample(
-        response=lf.AIMessage(
-            text="Cuore",
-            sender="AI",
-            metadata=metadata,
-            source=source,
-            tags=["lm-response"],
-        ),
-        score=-0.004259720362824737,
-    )
-    actual_response = lf.LMSamplingResult(
-        samples=[sample],
-    )
-
-    # Mock the sample response.
-    mock_client_instance.sample.return_value = [actual_response]
-    model = inference.LangFunLanguageModel(language_model=mock_client_instance)
-
-    batch_prompts = ["What's heart in Italian?"]
-
-    expected_results = [
-        [inference.ScoredOutput(score=-0.004259720362824737, output="Cuore")]
-    ]
-
-    results = list(model.infer(batch_prompts))
-
-    mock_client_instance.sample.assert_called_once_with(prompts=batch_prompts)
-    self.assertEqual(results, expected_results)
-
-
 class TestOllamaLanguageModel(absltest.TestCase):
 
   @mock.patch.object(inference.OllamaLanguageModel, "_ollama_query")