feat: Add show_progress parameter for independent progress bar control (google#227)

aksg87 · web-flow · commit 51bded6bff3d · 2025-09-04T19:21:56.000-04:00
Progress bar visibility is now controlled independently of debug logging.
Users can show/hide progress without affecting debug output.

- Add show_progress parameter (defaults to True)
- Add parameterized tests for all flag combinations
diff --git a/langextract/annotation.py b/langextract/annotation.py
@@ -201,6 +201,7 @@ def annotate_documents(
       batch_length: int = 1,
       debug: bool = True,
       extraction_passes: int = 1,
+      show_progress: bool = True,
       **kwargs,
   ) -> Iterator[data.AnnotatedDocument]:
     """Annotates a sequence of documents with NLP extractions.
@@ -223,6 +224,7 @@ def annotate_documents(
         standard single extraction.
         Values > 1 reprocess tokens multiple times, potentially increasing
         costs with the potential for a more thorough extraction.
+      show_progress: Whether to show progress bar. Defaults to True.
       **kwargs: Additional arguments passed to LanguageModel.infer and Resolver.
 
     Yields:
@@ -234,7 +236,13 @@ def annotate_documents(
 
     if extraction_passes == 1:
       yield from self._annotate_documents_single_pass(
-          documents, resolver, max_char_buffer, batch_length, debug, **kwargs
+          documents,
+          resolver,
+          max_char_buffer,
+          batch_length,
+          debug,
+          show_progress,
+          **kwargs,
       )
     else:
       yield from self._annotate_documents_sequential_passes(
@@ -244,6 +252,7 @@ def annotate_documents(
           batch_length,
           debug,
           extraction_passes,
+          show_progress,
           **kwargs,
       )
 
@@ -254,6 +263,7 @@ def _annotate_documents_single_pass(
       max_char_buffer: int,
       batch_length: int,
       debug: bool,
+      show_progress: bool = True,
       **kwargs,
   ) -> Iterator[data.AnnotatedDocument]:
     """Single-pass annotation logic (original implementation)."""
@@ -273,7 +283,7 @@ def _annotate_documents_single_pass(
     model_info = progress.get_model_info(self._language_model)
 
     progress_bar = progress.create_extraction_progress_bar(
-        batches, model_info=model_info, disable=not debug
+        batches, model_info=model_info, disable=not show_progress
     )
 
     chars_processed = 0
@@ -397,6 +407,7 @@ def _annotate_documents_sequential_passes(
       batch_length: int,
       debug: bool,
       extraction_passes: int,
+      show_progress: bool = True,
       **kwargs,
   ) -> Iterator[data.AnnotatedDocument]:
     """Sequential extraction passes logic for improved recall."""
@@ -423,7 +434,8 @@ def _annotate_documents_sequential_passes(
           max_char_buffer,
           batch_length,
           debug=(debug and pass_num == 0),
-          **kwargs,  # Only show progress on first pass
+          show_progress=show_progress if pass_num == 0 else False,
+          **kwargs,
       ):
         doc_id = annotated_doc.document_id
 
@@ -472,6 +484,7 @@ def annotate_text(
       additional_context: str | None = None,
       debug: bool = True,
       extraction_passes: int = 1,
+      show_progress: bool = True,
       **kwargs,
   ) -> data.AnnotatedDocument:
     """Annotates text with NLP extractions for text input.
@@ -488,6 +501,7 @@ def annotate_text(
         recall by finding additional entities. Defaults to 1, which performs
         standard single extraction. Values > 1 reprocess tokens multiple times,
         potentially increasing costs.
+      show_progress: Whether to show progress bar. Defaults to True.
       **kwargs: Additional arguments for inference and resolver_lib.
 
     Returns:
@@ -511,6 +525,7 @@ def annotate_text(
             batch_length,
             debug,
             extraction_passes,
+            show_progress,
             **kwargs,
         )
     )
diff --git a/langextract/extraction.py b/langextract/extraction.py
@@ -56,6 +56,7 @@ def extract(
     fetch_urls: bool = True,
     prompt_validation_level: pv.PromptValidationLevel = pv.PromptValidationLevel.WARNING,
     prompt_validation_strict: bool = False,
+    show_progress: bool = True,
 ) -> typing.Any:
   """Extracts structured information from text.
 
@@ -149,6 +150,7 @@ def extract(
         raises on failures. Defaults to WARNING.
       prompt_validation_strict: When True and prompt_validation_level is ERROR,
         raises on non-exact matches (MATCH_FUZZY, MATCH_LESSER). Defaults to False.
+      show_progress: Whether to show progress bar during extraction. Defaults to True.
 
   Returns:
       An AnnotatedDocument with the extracted information when input is a
@@ -326,6 +328,7 @@ def extract(
         additional_context=additional_context,
         debug=debug,
         extraction_passes=extraction_passes,
+        show_progress=show_progress,
         max_workers=max_workers,
         **alignment_kwargs,
     )
@@ -338,6 +341,7 @@ def extract(
         batch_length=batch_length,
         debug=debug,
         extraction_passes=extraction_passes,
+        show_progress=show_progress,
         max_workers=max_workers,
         **alignment_kwargs,
     )
diff --git a/tests/init_test.py b/tests/init_test.py
@@ -18,6 +18,7 @@
 from unittest import mock
 
 from absl.testing import absltest
+from absl.testing import parameterized
 
 from langextract import prompting
 import langextract as lx
@@ -26,7 +27,7 @@
 from langextract.providers import schemas
 
 
-class InitTest(absltest.TestCase):
+class InitTest(parameterized.TestCase):
   """Test cases for the main package functions."""
 
   @mock.patch.object(
@@ -454,6 +455,88 @@ def test_tokenizer_module_exports_via_compatibility_shim(self):
             f"lx.tokenizer.{name} not accessible via compatibility shim",
         )
 
+  @parameterized.named_parameters(
+      dict(
+          testcase_name="show_progress_true_debug_false",
+          show_progress=True,
+          debug=False,
+          expected_progress_disabled=False,
+      ),
+      dict(
+          testcase_name="show_progress_false_debug_false",
+          show_progress=False,
+          debug=False,
+          expected_progress_disabled=True,
+      ),
+      dict(
+          testcase_name="show_progress_true_debug_true",
+          show_progress=True,
+          debug=True,
+          expected_progress_disabled=False,
+      ),
+      dict(
+          testcase_name="show_progress_false_debug_true",
+          show_progress=False,
+          debug=True,
+          expected_progress_disabled=True,
+      ),
+  )
+  @mock.patch("langextract.progress.create_extraction_progress_bar")
+  @mock.patch("langextract.extraction.factory.create_model")
+  def test_show_progress_controls_progress_bar(
+      self,
+      mock_create_model,
+      mock_progress,
+      show_progress,
+      debug,
+      expected_progress_disabled,
+  ):
+    """Test that show_progress parameter controls progress bar visibility."""
+    mock_model = mock.MagicMock()
+    mock_model.infer.return_value = [
+        [
+            types.ScoredOutput(
+                output='{"extractions": []}',
+                score=0.9,
+            )
+        ]
+    ]
+    mock_model.requires_fence_output = False
+    mock_create_model.return_value = mock_model
+
+    mock_progress_bar = mock.MagicMock()
+    mock_progress_bar.__iter__ = mock.MagicMock(
+        return_value=iter([mock.MagicMock()])
+    )
+    mock_progress.return_value = mock_progress_bar
+
+    mock_examples = [
+        lx.data.ExampleData(
+            text="Example text",
+            extractions=[
+                lx.data.Extraction(
+                    extraction_class="entity",
+                    extraction_text="example",
+                ),
+            ],
+        )
+    ]
+
+    lx.extract(
+        text_or_documents="test text",
+        prompt_description="extract entities",
+        examples=mock_examples,
+        api_key="test_key",
+        show_progress=show_progress,
+        debug=debug,
+    )
+
+    mock_progress.assert_called()
+    call_args = mock_progress.call_args
+    self.assertEqual(
+        call_args.kwargs.get("disable", False), expected_progress_disabled
+    )
+
 
 if __name__ == "__main__":
   absltest.main()