-
-
Notifications
You must be signed in to change notification settings - Fork 1.9k
fix: make stream inactivity timeout configurable, increase for Ollama #1993
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
…fixes #1984) Local providers like Ollama on slower hardware can take more than 60 seconds to generate the first response token. The hardcoded 60s inactivity timeout was causing tasks to silently fail and transfer to Human Review without any visible error message in the frontend logs. - Add streamInactivityTimeoutMs option to RunnerOptions so callers can override the inactivity timeout per-session - Use the configurable value in executeStream() (defaulting to 60s as before) - Set a 5-minute (300s) timeout for Ollama sessions in all three worker session runners (runSingleSession, runDefaultSession, runAgenticSpecOrchestrator) to accommodate slow inference on local hardware Co-Authored-By: Octopus <liyuan851277048@icloud.com>
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -341,6 +341,9 @@ async function runSingleSession( | |
| modelId: phaseModelId, | ||
| }) | ||
| : undefined, | ||
| // Local providers like Ollama can take much longer than 60s to generate a | ||
| // response on slower hardware. Use a 5-minute timeout instead of the default. | ||
| streamInactivityTimeoutMs: baseSession.provider === 'ollama' ? 300_000 : undefined, | ||
|
Comment on lines
+344
to
+346
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick | 🔵 Trivial Centralize the Ollama timeout policy to avoid divergence across callsites. The same provider check and ♻️ Proposed refactor+const OLLAMA_STREAM_INACTIVITY_TIMEOUT_MS = 300_000;
+
+function getStreamInactivityTimeoutMs(provider: string): number | undefined {
+ return provider === 'ollama' ? OLLAMA_STREAM_INACTIVITY_TIMEOUT_MS : undefined;
+}
@@
- streamInactivityTimeoutMs: baseSession.provider === 'ollama' ? 300_000 : undefined,
+ streamInactivityTimeoutMs: getStreamInactivityTimeoutMs(baseSession.provider),
@@
- streamInactivityTimeoutMs: session.provider === 'ollama' ? 300_000 : undefined,
+ streamInactivityTimeoutMs: getStreamInactivityTimeoutMs(session.provider),
@@
- streamInactivityTimeoutMs: session.provider === 'ollama' ? 300_000 : undefined,
+ streamInactivityTimeoutMs: getStreamInactivityTimeoutMs(session.provider),Also applies to: 523-523, 1120-1120 🤖 Prompt for AI Agents |
||
| }; | ||
|
|
||
| let sessionResult: SessionResult; | ||
|
|
@@ -517,6 +520,7 @@ async function runDefaultSession( | |
| modelId: session.modelId, | ||
| }) | ||
| : undefined, | ||
| streamInactivityTimeoutMs: session.provider === 'ollama' ? 300_000 : undefined, | ||
| }, { | ||
| contextWindowLimit, | ||
| apiKey: session.apiKey, | ||
|
|
@@ -1113,6 +1117,7 @@ async function runAgenticSpecOrchestrator( | |
| modelId: session.modelId, | ||
| }) | ||
| : undefined, | ||
| streamInactivityTimeoutMs: session.provider === 'ollama' ? 300_000 : undefined, | ||
| }, { | ||
| contextWindowLimit, | ||
| apiKey: session.apiKey, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -125,6 +125,13 @@ export interface RunnerOptions { | |
| onAccountSwitch?: (failedAccountId: string, error: SessionError) => Promise<QueueResolvedAuth | null>; | ||
| /** Current account ID from the priority queue (needed for account-switch retry) */ | ||
| currentAccountId?: string; | ||
| /** | ||
| * Stream inactivity timeout in milliseconds. If no stream parts arrive within | ||
| * this period, the stream is aborted. Defaults to 60 seconds. Local providers | ||
| * (e.g., Ollama) may need a higher value since model inference can take much | ||
| * longer than cloud providers. | ||
| */ | ||
| streamInactivityTimeoutMs?: number; | ||
| } | ||
|
|
||
| // ============================================================================= | ||
|
|
@@ -149,7 +156,7 @@ export async function runAgentSession( | |
| config: SessionConfig, | ||
| options: RunnerOptions = {}, | ||
| ): Promise<SessionResult> { | ||
| const { onEvent, onAuthRefresh, onModelRefresh, tools, memoryContext, onAccountSwitch, currentAccountId } = options; | ||
| const { onEvent, onAuthRefresh, onModelRefresh, tools, memoryContext, onAccountSwitch, currentAccountId, streamInactivityTimeoutMs } = options; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line has become excessively long and difficult to read due to the addition of const {
onEvent,
onAuthRefresh,
onModelRefresh,
tools,
memoryContext,
onAccountSwitch,
currentAccountId,
streamInactivityTimeoutMs,
} = options; |
||
| const startTime = Date.now(); | ||
|
|
||
| let authRetries = 0; | ||
|
|
@@ -159,7 +166,7 @@ export async function runAgentSession( | |
| // Retry loop for auth refresh and account switching | ||
| while (authRetries <= MAX_AUTH_RETRIES) { | ||
| try { | ||
| const result = await executeStream(activeConfig, tools, onEvent, memoryContext); | ||
| const result = await executeStream(activeConfig, tools, onEvent, memoryContext, streamInactivityTimeoutMs); | ||
| return { | ||
| ...result, | ||
| durationMs: Date.now() - startTime, | ||
|
|
@@ -265,6 +272,7 @@ async function executeStream( | |
| tools: Record<string, AITool> | undefined, | ||
| onEvent: SessionEventCallback | undefined, | ||
| memoryContext: MemorySessionContext | undefined, | ||
| inactivityTimeoutMs: number = STREAM_INACTIVITY_TIMEOUT_MS, | ||
| ): Promise<Omit<SessionResult, 'durationMs'>> { | ||
| const baseMaxSteps = config.maxSteps ?? DEFAULT_MAX_STEPS; | ||
|
|
||
|
|
@@ -472,14 +480,14 @@ async function executeStream( | |
| }); | ||
|
|
||
| // Consume the full stream with inactivity timeout protection. | ||
| // The timer fires if no stream parts arrive within STREAM_INACTIVITY_TIMEOUT_MS, | ||
| // The timer fires if no stream parts arrive within inactivityTimeoutMs, | ||
| // aborting the stream and preventing indefinite worker hangs. | ||
| let streamInactivityTimer: ReturnType<typeof setTimeout> | null = null; | ||
| const resetStreamInactivityTimer = () => { | ||
| if (streamInactivityTimer) clearTimeout(streamInactivityTimer); | ||
| streamInactivityTimer = setTimeout(() => { | ||
| streamInactivityController.abort(STREAM_INACTIVITY_REASON); | ||
| }, STREAM_INACTIVITY_TIMEOUT_MS); | ||
| }, inactivityTimeoutMs); | ||
| }; | ||
|
|
||
| resetStreamInactivityTimer(); // Arm for initial response | ||
|
|
@@ -503,7 +511,7 @@ async function executeStream( | |
| usage: summary.usage, | ||
| error: { | ||
| code: 'stream_timeout', | ||
| message: `Stream inactivity timeout — no data received from provider for ${STREAM_INACTIVITY_TIMEOUT_MS / 1000}s`, | ||
| message: `Stream inactivity timeout — no data received from provider for ${inactivityTimeoutMs / 1000}s`, | ||
| retryable: true, | ||
| }, | ||
| messages, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The magic number
300_000(representing a 5-minute timeout) and the hardcoded string'ollama'are repeated in three different locations within this file (lines 346, 523, and 1120). This duplication makes the code harder to maintain and prone to inconsistency if the timeout needs to be adjusted in the future. Consider defining a constant for the Ollama inactivity timeout and using theSupportedProvider.Ollamaenum instead of a string literal.