Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
2ce2399
docs(pypi): Improve README display and badge reliability
aksg87 Jul 22, 2025
4fe7580
feat: add trusted publishing workflow and prepare v1.0.0 release
aksg87 Jul 22, 2025
e696a48
Fix: Resolve libmagic ImportError (#6)
aksg87 Aug 1, 2025
5447637
docs: clarify output_dir behavior in medication_examples.md
kleeena Aug 1, 2025
9c47b34
Merge pull request #11 from google/fix/libmagic-dependency-issue
aksg87 Aug 1, 2025
175e075
Removed inline comment in medication example
kleeena Aug 2, 2025
9472099
Merge pull request #15 from kleeena/docs/update-medication_examples.md
aksg87 Aug 2, 2025
e6c3dcd
docs: add output_dir="." to all save_annotated_documents examples
aksg87 Aug 2, 2025
1fb1f1d
Merge pull request #17 from google/fix/output-dir-consistency
aksg87 Aug 2, 2025
7905f93
Fix typo in Ollama API parameter name
Mirza-Samad-Ahmed-Baig Aug 2, 2025
06afc9c
Fix security vulnerability and bugs in Ollama API integration
Mirza-Samad-Ahmed-Baig Aug 2, 2025
13fbd2c
build: add formatting & linting pipeline with pre-commit integration
aksg87 Aug 3, 2025
c8d2027
style: apply pyink, isort, and pre-commit formatting
aksg87 Aug 3, 2025
146a095
ci: enable format and lint checks in tox
aksg87 Aug 3, 2025
aa6da18
Merge pull request #24 from google/feat/code-formatting-pipeline
aksg87 Aug 3, 2025
ed65bca
Add LangExtractError base exception for centralized error handling
aksg87 Aug 3, 2025
6c4508b
Merge pull request #26 from google/feat/exception-hierarchy
aksg87 Aug 3, 2025
8b85225
fix: Remove LangFun and pylibmagic dependencies (v1.0.2)
aksg87 Aug 3, 2025
88520cc
Merge pull request #28 from google/fix/remove-breaking-dep-langfun
aksg87 Aug 3, 2025
75a6f12
Fix save_annotated_documents to handle string paths
aksg87 Aug 3, 2025
a415b94
Merge pull request #29 from google/fix-save-annotated-documents-mkdir
aksg87 Aug 3, 2025
8289b3a
feat: Add OpenAI language model support
aksg87 Aug 3, 2025
c8ef723
Merge pull request #31 from google/feature/add-oai-inference
aksg87 Aug 3, 2025
dfe8188
fix(ui): prevent current highlight border from being obscured. Chan…
tonebeta Aug 4, 2025
0d76530
Merge branch 'google:main' into fix-ollama-num-threads-typo
Mirza-Samad-Ahmed-Baig Aug 4, 2025
87c511e
feat: Add live API integration tests (#39)
aksg87 Aug 4, 2025
dc61372
Add PR template validation workflow (#45)
aksg87 Aug 4, 2025
7fc809f
Merge branch 'main' into fix-ollama-num-threads-typo
Mirza-Samad-Ahmed-Baig Aug 5, 2025
da771e6
fix: Change OllamaLanguageModel parameter from 'model' to 'model_id' …
aksg87 Aug 5, 2025
e83d5cf
feat: Add CITATION.cff file for proper software citation
aksg87 Aug 5, 2025
337beee
feat: Add Ollama integration with Docker examples and CI tests (#62)
aksg87 Aug 5, 2025
a7ef0bd
chore: Bump version to 1.0.4 for release
aksg87 Aug 5, 2025
87beb4f
build(deps): bump tj-actions/changed-files (#66)
dependabot[bot] Aug 5, 2025
db140d1
Add PR validation workflows and update contribution guidelines (#74)
aksg87 Aug 5, 2025
ed97f73
Fix custom comment in linked issue check (#77)
aksg87 Aug 5, 2025
ad1f27b
Add infrastructure file protection workflow (#76)
aksg87 Aug 5, 2025
41bc9ed
Allow maintainers to bypass community support requirement
aksg87 Aug 5, 2025
54e57db
Add manual trigger capability to validation workflows (#75)
aksg87 Aug 5, 2025
25ebc17
Fix fork PR labeling by using pull_request_target
aksg87 Aug 5, 2025
1290d63
Add workflow_dispatch trigger to CI workflow
aksg87 Aug 6, 2025
42687fc
Add secure label-based testing for fork PRs
aksg87 Aug 6, 2025
234081e
Add base_url to OpenAILanguageModel (#51)
mariano Aug 6, 2025
46b4f0d
Fix validation workflows that were skipping all checks
aksg87 Aug 6, 2025
6fb66cf
Add commit status to revalidation workflow
aksg87 Aug 6, 2025
47a251e
Fix boolean comparison in revalidation workflow
aksg87 Aug 7, 2025
b28e673
Add maintenance scripts for PR management
aksg87 Aug 7, 2025
6b02efb
Fix IPython import warnings and notebook detection (#86)
aksg87 Aug 7, 2025
e6dcc8e
Fix CI to validate PR branch formatting directly
aksg87 Aug 7, 2025
1c3c1a2
Add PR update automation workflows
aksg87 Aug 7, 2025
b60f0b2
Fix workflow formatting
aksg87 Aug 7, 2025
f888bd8
Minor changes
Mirza-Samad-Ahmed-Baig Aug 7, 2025
8659ef3
Merge branch 'fix-ollama-num-threads-typo'
Mirza-Samad-Ahmed-Baig Aug 7, 2025
ea71754
Fix chunking bug and improve test documentation (#88)
aksg87 Aug 7, 2025
82c6644
Fix: Resolve merge conflict and update docstrings in inference.py
Mirza-Samad-Ahmed-Baig Aug 7, 2025
ce0caa5
Changes
Mirza-Samad-Ahmed-Baig Aug 7, 2025
792fd3e
Merge branch 'main' into fix-ollama-num-threads-typo
Mirza-Samad-Ahmed-Baig Aug 7, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
# Production Dockerfile for LangExtract with libmagic support
# Production Dockerfile for LangExtract
FROM python:3.10-slim

# Install system dependencies including libmagic
RUN apt-get update && apt-get install -y --no-install-recommends \
libmagic1 \
&& rm -rf /var/lib/apt/lists/*

# Set working directory
WORKDIR /app

Expand Down
6 changes: 0 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -336,12 +336,6 @@ pylint --rcfile=.pylintrc langextract tests

See [CONTRIBUTING.md](CONTRIBUTING.md) for full development guidelines.

## Troubleshooting

**libmagic error**: If you see "failed to find libmagic", install with `pip install langextract[full]` or install system dependencies:
- Ubuntu/Debian: `sudo apt-get install libmagic1`
- macOS: `brew install libmagic`

## Disclaimer

This is not an officially supported Google product. If you use
Expand Down
2 changes: 1 addition & 1 deletion exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ class LangExtractError(Exception):
All exceptions raised by LangExtract should inherit from this class.
This allows users to catch all LangExtract-specific errors with a single
except clause.
"""
"""
7 changes: 0 additions & 7 deletions langextract/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,6 @@

from __future__ import annotations

# Ensure libmagic is available before langfun imports python-magic.
# pylibmagic provides pre-built binaries that python-magic needs.
try:
import pylibmagic # noqa: F401 (side-effect import)
except ImportError:
pass

from collections.abc import Iterable, Sequence
import os
from typing import Any, cast, Type, TypeVar
Expand Down
44 changes: 0 additions & 44 deletions langextract/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from typing import Any

from google import genai
import langfun as lf
import requests
from typing_extensions import override
import yaml
Expand Down Expand Up @@ -97,49 +96,6 @@ class InferenceType(enum.Enum):
MULTIPROCESS = 'multiprocess'


# TODO: Add support for llm options.
@dataclasses.dataclass(init=False)
class LangFunLanguageModel(BaseLanguageModel):
"""Language model inference class using LangFun language class.

See https://github.com/google/langfun for more details on LangFun.
"""

_lm: lf.core.language_model.LanguageModel # underlying LangFun model
_constraint: schema.Constraint = dataclasses.field(
default_factory=schema.Constraint, repr=False, compare=False
)
_extra_kwargs: dict[str, Any] = dataclasses.field(
default_factory=dict, repr=False, compare=False
)

def __init__(
self,
language_model: lf.core.language_model.LanguageModel,
constraint: schema.Constraint = schema.Constraint(),
**kwargs,
) -> None:
self._lm = language_model
self._constraint = constraint

# Preserve any unused kwargs for debugging / future use
self._extra_kwargs = kwargs or {}
super().__init__(constraint=constraint)

@override
def infer(
self, batch_prompts: Sequence[str], **kwargs
) -> Iterator[Sequence[ScoredOutput]]:
responses = self._lm.sample(prompts=batch_prompts)
for a_response in responses:
for sample in a_response.samples:
yield [
ScoredOutput(
score=sample.response.score, output=sample.response.text
)
]


@dataclasses.dataclass(init=False)
class OllamaLanguageModel(BaseLanguageModel):
"""Language model inference class using Ollama based host."""
Expand Down
9 changes: 1 addition & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "langextract"
version = "1.0.1"
version = "1.0.2"
description = "LangExtract: A library for extracting structured data from language models"
readme = "README.md"
requires-python = ">=3.10"
Expand All @@ -32,16 +32,13 @@ dependencies = [
"async_timeout>=4.0.0",
"exceptiongroup>=1.1.0",
"google-genai>=0.1.0",
"langfun>=0.1.0",
"ml-collections>=0.1.0",
"more-itertools>=8.0.0",
"numpy>=1.20.0",
"openai>=0.27.0",
"pandas>=1.3.0",
"pydantic>=1.8.0",
"python-dotenv>=0.19.0",
"python-magic>=0.4.27",
"pylibmagic>=0.5.0",
"requests>=2.25.0",
"tqdm>=4.64.0",
"typing-extensions>=4.0.0"
Expand All @@ -66,10 +63,6 @@ test = [
"pytest>=7.4.0",
"tomli>=2.0.0"
]
full = [
"python-magic>=0.4.27",
"pylibmagic>=0.5.0",
]

[tool.setuptools]
packages = ["langextract"]
Expand Down
8 changes: 4 additions & 4 deletions tests/annotation_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class AnnotatorTest(absltest.TestCase):
def setUp(self):
super().setUp()
self.mock_language_model = self.enter_context(
mock.patch.object(inference, "LangFunLanguageModel", autospec=True)
mock.patch.object(inference, "GeminiLanguageModel", autospec=True)
)
self.annotator = annotation.Annotator(
language_model=self.mock_language_model,
Expand Down Expand Up @@ -688,7 +688,7 @@ def test_annotate_documents(
batch_length: int = 1,
):
mock_language_model = self.enter_context(
mock.patch.object(inference, "LangFunLanguageModel", autospec=True)
mock.patch.object(inference, "GeminiLanguageModel", autospec=True)
)

# Define a side effect function so return length based on batch length.
Expand Down Expand Up @@ -761,7 +761,7 @@ def test_annotate_documents_exceptions(
batch_length: int = 1,
):
mock_language_model = self.enter_context(
mock.patch.object(inference, "LangFunLanguageModel", autospec=True)
mock.patch.object(inference, "GeminiLanguageModel", autospec=True)
)
mock_language_model.infer.return_value = [
[
Expand Down Expand Up @@ -798,7 +798,7 @@ class AnnotatorMultiPassTest(absltest.TestCase):
def setUp(self):
super().setUp()
self.mock_language_model = self.enter_context(
mock.patch.object(inference, "LangFunLanguageModel", autospec=True)
mock.patch.object(inference, "GeminiLanguageModel", autospec=True)
)
self.annotator = annotation.Annotator(
language_model=self.mock_language_model,
Expand Down
49 changes: 0 additions & 49 deletions tests/inference_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,59 +15,10 @@
from unittest import mock

from absl.testing import absltest
import langfun as lf

from langextract import inference


class TestLangFunLanguageModel(absltest.TestCase):

@mock.patch.object(
inference.lf.core.language_model, "LanguageModel", autospec=True
)
def test_langfun_infer(self, mock_lf_model):
mock_client_instance = mock_lf_model.return_value
metadata = {
"score": -0.004259720362824737,
"logprobs": None,
"is_cached": False,
}
source = lf.UserMessage(
text="What's heart in Italian?.",
sender="User",
metadata={"formatted_text": "What's heart in Italian?."},
tags=["lm-input"],
)
sample = lf.LMSample(
response=lf.AIMessage(
text="Cuore",
sender="AI",
metadata=metadata,
source=source,
tags=["lm-response"],
),
score=-0.004259720362824737,
)
actual_response = lf.LMSamplingResult(
samples=[sample],
)

# Mock the sample response.
mock_client_instance.sample.return_value = [actual_response]
model = inference.LangFunLanguageModel(language_model=mock_client_instance)

batch_prompts = ["What's heart in Italian?"]

expected_results = [
[inference.ScoredOutput(score=-0.004259720362824737, output="Cuore")]
]

results = list(model.infer(batch_prompts))

mock_client_instance.sample.assert_called_once_with(prompts=batch_prompts)
self.assertEqual(results, expected_results)


class TestOllamaLanguageModel(absltest.TestCase):

@mock.patch.object(inference.OllamaLanguageModel, "_ollama_query")
Expand Down
Loading