File tree Expand file tree Collapse file tree
packages/paper-qa-nemotron/src/paperqa_nemotron Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1111"""
1212
1313import contextlib
14+ import http
1415import json
1516import logging
1617import os
4041from tenacity import (
4142 before_sleep_log ,
4243 retry ,
44+ retry_if_exception ,
4345 retry_if_exception_type ,
4446 stop_after_attempt ,
4547 wait_exponential ,
@@ -316,6 +318,13 @@ def merge_with_detection(
316318MatrixNemotronParseMarkdownBBox = TypeAdapter (list [list [NemotronParseMarkdownBBox ]])
317319
318320
321+ def _is_litellm_timeout_408 (exc : BaseException ) -> bool :
322+ return (
323+ isinstance (exc , litellm .exceptions .Timeout )
324+ and exc .status_code == http .HTTPStatus .REQUEST_TIMEOUT
325+ )
326+
327+
319328@overload
320329async def _call_nvidia_api (
321330 image : "np.ndarray" ,
@@ -355,7 +364,10 @@ async def _call_nvidia_api(
355364 before_sleep = before_sleep_log (logger , logging .WARNING ),
356365)
357366@retry (
358- retry = retry_if_exception_type (TimeoutError ), # Hitting rate limits
367+ retry = (
368+ retry_if_exception_type (TimeoutError ) # Hitting rate limits
369+ | retry_if_exception (_is_litellm_timeout_408 ) # Actual timeout
370+ ),
359371 stop = stop_after_attempt (3 ),
360372 wait = wait_exponential (multiplier = 2 , min = GLOBAL_RATE_LIMITER_TIMEOUT ),
361373 before_sleep = before_sleep_log (logger , logging .WARNING ),
You can’t perform that action at this time.
0 commit comments