diff --git a/packages/opencode/src/agent/agent.ts b/packages/opencode/src/agent/agent.ts index 5b7f74345a2..c4013597691 100644 --- a/packages/opencode/src/agent/agent.ts +++ b/packages/opencode/src/agent/agent.ts @@ -6,6 +6,7 @@ import { generateObject, type ModelMessage } from "ai" import PROMPT_GENERATE from "./generate.txt" import { SystemPrompt } from "../session/system" import { mergeDeep } from "remeda" +import { RateLimiter } from "../util/rate-limiter" export namespace Agent { export const Info = z @@ -141,6 +142,11 @@ export namespace Agent { export async function generate(input: { description: string }) { const defaultModel = await Provider.defaultModel() const model = await Provider.getModel(defaultModel.providerID, defaultModel.modelID) + + // Apply rate limiting before the request + const config = await Config.get() + await RateLimiter.checkRateLimit(defaultModel.providerID, defaultModel.modelID, config) + const system = SystemPrompt.header(defaultModel.providerID) system.push(PROMPT_GENERATE) const existing = await list() diff --git a/packages/opencode/src/app/app.ts b/packages/opencode/src/app/app.ts index fc7f49cb951..e4d3bf8b28b 100644 --- a/packages/opencode/src/app/app.ts +++ b/packages/opencode/src/app/app.ts @@ -3,6 +3,7 @@ import { Log } from "../util/log" import { Context } from "../util/context" import { Filesystem } from "../util/filesystem" import { Global } from "../global" +import { RateLimiter } from "../util/rate-limiter" import path from "path" import os from "os" import { z } from "zod" @@ -24,6 +25,9 @@ export namespace App { time: z.object({ initialized: z.number().optional(), }), + rateLimited: z.boolean().optional(), + rateLimitedProvider: z.string().optional(), + rateLimitWaitSeconds: z.number().optional(), }) .openapi({ ref: "App", @@ -121,7 +125,13 @@ export namespace App { } export function info() { - return ctx.use().info + const appInfo = ctx.use().info + return { + ...appInfo, + rateLimited: RateLimiter.isCurrentlyRateLimited(), + rateLimitedProvider: RateLimiter.getRateLimitedProvider(), + rateLimitWaitSeconds: RateLimiter.getCurrentWaitTime(), + } } export async function initialize() { diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts index 752014c53b0..1b935a8ed98 100644 --- a/packages/opencode/src/config/config.ts +++ b/packages/opencode/src/config/config.ts @@ -349,7 +349,18 @@ export namespace Config { .record( ModelsDev.Provider.partial() .extend({ - models: z.record(ModelsDev.Model.partial()).optional(), + models: z + .record( + ModelsDev.Model.partial().extend({ + limit: ModelsDev.Model.shape.limit + .partial() + .extend({ + rpm: z.number().int().positive().optional(), + }) + .optional(), + }), + ) + .optional(), options: z .object({ apiKey: z.string().optional(), diff --git a/packages/opencode/src/provider/models.ts b/packages/opencode/src/provider/models.ts index 56350ff25ca..919500fbd51 100644 --- a/packages/opencode/src/provider/models.ts +++ b/packages/opencode/src/provider/models.ts @@ -27,6 +27,7 @@ export namespace ModelsDev { limit: z.object({ context: z.number(), output: z.number(), + rpm: z.number().int().positive().optional(), }), options: z.record(z.any()), }) diff --git a/packages/opencode/src/provider/provider.ts b/packages/opencode/src/provider/provider.ts index fe889e0b30d..07b0e444815 100644 --- a/packages/opencode/src/provider/provider.ts +++ b/packages/opencode/src/provider/provider.ts @@ -217,11 +217,11 @@ export namespace Provider { ...existing?.options, ...model.options, }, - limit: model.limit ?? - existing?.limit ?? { - context: 0, - output: 0, - }, + limit: { + context: model.limit?.context ?? existing?.limit?.context ?? 0, + output: model.limit?.output ?? existing?.limit?.output ?? 0, + ...(model.limit?.rpm !== undefined && { rpm: model.limit.rpm }), + }, } parsed.models[modelID] = parsedModel } diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts index 2455962d8d7..53b85428c38 100644 --- a/packages/opencode/src/session/index.ts +++ b/packages/opencode/src/session/index.ts @@ -17,6 +17,7 @@ import { import PROMPT_INITIALIZE from "../session/prompt/initialize.txt" import PROMPT_PLAN from "../session/prompt/plan.txt" +import { RateLimiter } from "../util/rate-limiter" import { App } from "../app/app" import { Bus } from "../bus" @@ -408,6 +409,7 @@ export namespace Session { l.info("chatting") const inputAgent = input.agent ?? "build" + const config = await Config.get() // Process revert cleanup first, before creating new messages const session = await get(input.sessionID) @@ -673,6 +675,11 @@ export namespace Session { if (msgs.filter((m) => m.info.role === "user").length === 1 && !session.parentID && isDefaultTitle(session.title)) { const small = (await Provider.getSmallModel(input.providerID)) ?? model + + // Apply rate limiting for title generation + const config = await Config.get() + await RateLimiter.checkRateLimit(input.providerID, input.modelID, config) + generateText({ maxOutputTokens: small.info.reasoning ? 1024 : 20, providerOptions: { @@ -981,6 +988,9 @@ export namespace Session { middleware: [ { async transformParams(args) { + // Apply rate limiting before each request + await RateLimiter.checkRateLimit(input.providerID, input.modelID, config) + if (args.type === "stream") { // @ts-expect-error args.params.prompt = ProviderTransform.message(args.params.prompt, input.providerID, input.modelID) diff --git a/packages/opencode/src/util/rate-limiter.ts b/packages/opencode/src/util/rate-limiter.ts new file mode 100644 index 00000000000..ba46111471b --- /dev/null +++ b/packages/opencode/src/util/rate-limiter.ts @@ -0,0 +1,111 @@ +import { Log } from "./log" + +export namespace RateLimiter { + const log = Log.create({ service: "rate-limiter" }) + + // Rate limiting: Map of providerID -> timestamps array (sliding window) + const rateLimitWindows = new Map() + + // Track if any provider is currently rate limited + let isRateLimited = false + let rateLimitedProvider = "" + let rateLimitWaitUntil = 0 // Timestamp when rate limit ends + + async function sleep(ms: number) { + return new Promise((resolve) => setTimeout(resolve, ms)) + } + + async function waitForRateLimit(key: string, rpmLimit: number) { + const now = Date.now() + const windowMs = 60000 // 1 minute in milliseconds + const windowStart = now - windowMs + + // Get or create window for this key (providerID/modelID) + let window = rateLimitWindows.get(key) + if (!window) { + window = [] + rateLimitWindows.set(key, window) + } + + // Remove old requests outside the sliding window + while (window.length > 0 && window[0] < windowStart) { + window.shift() + } + + // Check if we're at the limit + if (window.length > rpmLimit) { + // Calculate how long to wait until the oldest request expires + const oldestRequest = window[0] + const waitTime = oldestRequest + windowMs - now + + if (waitTime > 0) { + // Set global rate limit status + isRateLimited = true + rateLimitedProvider = key + rateLimitWaitUntil = now + waitTime + + log.info("rate limit hit, sleeping", { + key, + rpmLimit, + currentRequests: window.length, + waitTimeMs: waitTime, + }) + + await sleep(waitTime) + + // Clear rate limit status + isRateLimited = false + rateLimitedProvider = "" + rateLimitWaitUntil = 0 + + // Recursively check again in case multiple requests are waiting + return waitForRateLimit(key, rpmLimit) + } + } + + // Record this request + window.push(now) + } + + export async function checkRateLimit(providerID: string, modelID: string, config?: any) { + try { + if (!config) return + const providerConfig = config.provider?.[providerID] + let rpmLimit: number | undefined = undefined + // Check for model-level rpm in the limit object + if (providerConfig?.models && modelID && providerConfig.models[modelID]?.limit?.rpm) { + rpmLimit = providerConfig.models[modelID].limit.rpm + } + if (rpmLimit) { + await waitForRateLimit(`${providerID}/${modelID}`, rpmLimit) + } + } catch (error) { + log.warn("failed to apply rate limiting", { error, providerID, modelID }) + } + } + + export function isCurrentlyRateLimited(): boolean { + return isRateLimited + } + + export function getRateLimitedProvider(): string { + return rateLimitedProvider + } + + export function getCurrentWaitTime(): number { + if (!isRateLimited || !rateLimitedProvider || rateLimitWaitUntil === 0) return 0 + + const now = Date.now() + const remainingMs = rateLimitWaitUntil - now + + if (remainingMs <= 0) { + // Rate limit has expired, clear it + isRateLimited = false + rateLimitedProvider = "" + rateLimitWaitUntil = 0 + return 0 + } + + return Math.ceil(remainingMs / 1000) // Convert to seconds and round up + } +} diff --git a/packages/sdk/go/app.go b/packages/sdk/go/app.go index 36d5be77fbf..b61dc99af6b 100644 --- a/packages/sdk/go/app.go +++ b/packages/sdk/go/app.go @@ -222,21 +222,27 @@ func (r agentModelJSON) RawJSON() string { } type App struct { - Git bool `json:"git,required"` - Hostname string `json:"hostname,required"` - Path AppPath `json:"path,required"` - Time AppTime `json:"time,required"` - JSON appJSON `json:"-"` + Git bool `json:"git,required"` + Hostname string `json:"hostname,required"` + Path AppPath `json:"path,required"` + Time AppTime `json:"time,required"` + RateLimited bool `json:"rateLimited,omitempty"` + RateLimitedProvider string `json:"rateLimitedProvider,omitempty"` + RateLimitWaitSeconds int `json:"rateLimitWaitSeconds,omitempty"` + JSON appJSON `json:"-"` } // appJSON contains the JSON metadata for the struct [App] type appJSON struct { - Git apijson.Field - Hostname apijson.Field - Path apijson.Field - Time apijson.Field - raw string - ExtraFields map[string]apijson.Field + Git apijson.Field + Hostname apijson.Field + Path apijson.Field + Time apijson.Field + RateLimited apijson.Field + RateLimitedProvider apijson.Field + RateLimitWaitSeconds apijson.Field + raw string + ExtraFields map[string]apijson.Field } func (r *App) UnmarshalJSON(data []byte) (err error) { diff --git a/packages/tui/internal/app/app.go b/packages/tui/internal/app/app.go index af8157adcde..9fa7e8ca860 100644 --- a/packages/tui/internal/app/app.go +++ b/packages/tui/internal/app/app.go @@ -27,36 +27,53 @@ type Message struct { } type App struct { - Info opencode.App - Agents []opencode.Agent - Providers []opencode.Provider - Version string - StatePath string - Config *opencode.Config - Client *opencode.Client - State *State - AgentIndex int - Provider *opencode.Provider - Model *opencode.Model - Session *opencode.Session - Messages []Message - Permissions []opencode.Permission - CurrentPermission opencode.Permission - Commands commands.CommandRegistry - InitialModel *string - InitialPrompt *string - InitialAgent *string - InitialSession *string - compactCancel context.CancelFunc - IsLeaderSequence bool - IsBashMode bool - ScrollSpeed int + Info opencode.App + Agents []opencode.Agent + Providers []opencode.Provider + Version string + StatePath string + Config *opencode.Config + Client *opencode.Client + State *State + AgentIndex int + Provider *opencode.Provider + Model *opencode.Model + Session *opencode.Session + Messages []Message + Permissions []opencode.Permission + CurrentPermission opencode.Permission + Commands commands.CommandRegistry + InitialModel *string + InitialPrompt *string + InitialAgent *string + InitialSession *string + compactCancel context.CancelFunc + IsLeaderSequence bool + IsBashMode bool + ScrollSpeed int + RateLimited bool + RateLimitedProvider string + RateLimitWaitSeconds int } func (a *App) Agent() *opencode.Agent { return &a.Agents[a.AgentIndex] } +func (a *App) RefreshAppInfo() { + appInfo, err := a.Client.App.Get(context.Background()) + if err != nil { + // Don't log this error as it might be too frequent during normal operation + return + } + + a.Info = *appInfo + // Sync the backend fields to our local fields + a.RateLimited = appInfo.RateLimited + a.RateLimitedProvider = appInfo.RateLimitedProvider + a.RateLimitWaitSeconds = appInfo.RateLimitWaitSeconds +} + type SessionCreatedMsg = struct { Session *opencode.Session } diff --git a/packages/tui/internal/components/chat/editor.go b/packages/tui/internal/components/chat/editor.go index 72daf2886eb..d4ed474becb 100644 --- a/packages/tui/internal/components/chat/editor.go +++ b/packages/tui/internal/components/chat/editor.go @@ -76,6 +76,10 @@ func (m *editorComponent) Update(msg tea.Msg) (tea.Model, tea.Cmd) { m.width = msg.Width - 4 return m, nil case spinner.TickMsg: + // Refresh app info when busy to get latest rate limit status + if m.app.IsBusy() { + m.app.RefreshAppInfo() + } m.spinner, cmd = m.spinner.Update(msg) return m, cmd case tea.KeyPressMsg: @@ -380,6 +384,12 @@ func (m *editorComponent) Content() string { status := "working" if m.app.CurrentPermission.ID != "" { status = "waiting for permission" + } else if m.app.RateLimited { + if m.app.RateLimitWaitSeconds > 0 { + status = fmt.Sprintf("waiting %ds", m.app.RateLimitWaitSeconds) + } else { + status = "rate limited" + } } if m.interruptKeyInDebounce && m.app.CurrentPermission.ID == "" { bright := t.Accent() diff --git a/packages/web/src/content/docs/docs/config.mdx b/packages/web/src/content/docs/docs/config.mdx index 06eb6ee7d89..f44fb87ad2b 100644 --- a/packages/web/src/content/docs/docs/config.mdx +++ b/packages/web/src/content/docs/docs/config.mdx @@ -75,6 +75,33 @@ Your editor should be able to validate and autocomplete based on the schema. You can configure the providers and models you want to use in your opencode config through the `provider`, `model` and `small_model` options. +--- + +### Rate Limiting Configuration + +You can limit the number of requests per minute (RPM) for any provider/model pair using the `rate_limit` option. This helps prevent hitting API rate limits and allows you to control usage for cost or quota management. + +```json title="opencode.json" +{ + "provider": { + "google": { + "models": { + "gemini-2.5-pro": { + "limit": { + "rpm": 10 + } + } + } + } + } +} +``` + +- `limit.rpm`: Maximum requests per minute for this model. If the limit is reached, opencode will automatically pause and resume requests as needed. + +Note: rate limiting detection uses a sliding window +--- + ```json title="opencode.json" { "$schema": "https://opencode.ai/config.json", @@ -265,7 +292,6 @@ The `disabled_providers` option accepts an array of provider IDs. When a provide - It won't be loaded even if API keys are configured through `opencode auth login` - The provider's models won't appear in the model selection list - ```json title="opencode.json" { "$schema": "https://opencode.ai/config.json",