Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions packages/opencode/src/agent/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { generateObject, type ModelMessage } from "ai"
import PROMPT_GENERATE from "./generate.txt"
import { SystemPrompt } from "../session/system"
import { mergeDeep } from "remeda"
import { RateLimiter } from "../util/rate-limiter"

export namespace Agent {
export const Info = z
Expand Down Expand Up @@ -141,6 +142,11 @@ export namespace Agent {
export async function generate(input: { description: string }) {
const defaultModel = await Provider.defaultModel()
const model = await Provider.getModel(defaultModel.providerID, defaultModel.modelID)

// Apply rate limiting before the request
const config = await Config.get()
await RateLimiter.checkRateLimit(defaultModel.providerID, defaultModel.modelID, config)

const system = SystemPrompt.header(defaultModel.providerID)
system.push(PROMPT_GENERATE)
const existing = await list()
Expand Down
12 changes: 11 additions & 1 deletion packages/opencode/src/app/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { Log } from "../util/log"
import { Context } from "../util/context"
import { Filesystem } from "../util/filesystem"
import { Global } from "../global"
import { RateLimiter } from "../util/rate-limiter"
import path from "path"
import os from "os"
import { z } from "zod"
Expand All @@ -24,6 +25,9 @@ export namespace App {
time: z.object({
initialized: z.number().optional(),
}),
rateLimited: z.boolean().optional(),
rateLimitedProvider: z.string().optional(),
rateLimitWaitSeconds: z.number().optional(),
})
.openapi({
ref: "App",
Expand Down Expand Up @@ -121,7 +125,13 @@ export namespace App {
}

export function info() {
return ctx.use().info
const appInfo = ctx.use().info
return {
...appInfo,
rateLimited: RateLimiter.isCurrentlyRateLimited(),
rateLimitedProvider: RateLimiter.getRateLimitedProvider(),
rateLimitWaitSeconds: RateLimiter.getCurrentWaitTime(),
}
}

export async function initialize() {
Expand Down
13 changes: 12 additions & 1 deletion packages/opencode/src/config/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,18 @@ export namespace Config {
.record(
ModelsDev.Provider.partial()
.extend({
models: z.record(ModelsDev.Model.partial()).optional(),
models: z
.record(
ModelsDev.Model.partial().extend({
limit: ModelsDev.Model.shape.limit
.partial()
.extend({
rpm: z.number().int().positive().optional(),
})
.optional(),
}),
)
.optional(),
options: z
.object({
apiKey: z.string().optional(),
Expand Down
1 change: 1 addition & 0 deletions packages/opencode/src/provider/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ export namespace ModelsDev {
limit: z.object({
context: z.number(),
output: z.number(),
rpm: z.number().int().positive().optional(),
}),
options: z.record(z.any()),
})
Expand Down
10 changes: 5 additions & 5 deletions packages/opencode/src/provider/provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -217,11 +217,11 @@ export namespace Provider {
...existing?.options,
...model.options,
},
limit: model.limit ??
existing?.limit ?? {
context: 0,
output: 0,
},
limit: {
context: model.limit?.context ?? existing?.limit?.context ?? 0,
output: model.limit?.output ?? existing?.limit?.output ?? 0,
...(model.limit?.rpm !== undefined && { rpm: model.limit.rpm }),
},
}
parsed.models[modelID] = parsedModel
}
Expand Down
10 changes: 10 additions & 0 deletions packages/opencode/src/session/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {

import PROMPT_INITIALIZE from "../session/prompt/initialize.txt"
import PROMPT_PLAN from "../session/prompt/plan.txt"
import { RateLimiter } from "../util/rate-limiter"

import { App } from "../app/app"
import { Bus } from "../bus"
Expand Down Expand Up @@ -408,6 +409,7 @@ export namespace Session {
l.info("chatting")

const inputAgent = input.agent ?? "build"
const config = await Config.get()

// Process revert cleanup first, before creating new messages
const session = await get(input.sessionID)
Expand Down Expand Up @@ -673,6 +675,11 @@ export namespace Session {

if (msgs.filter((m) => m.info.role === "user").length === 1 && !session.parentID && isDefaultTitle(session.title)) {
const small = (await Provider.getSmallModel(input.providerID)) ?? model

// Apply rate limiting for title generation
const config = await Config.get()
await RateLimiter.checkRateLimit(input.providerID, input.modelID, config)

generateText({
maxOutputTokens: small.info.reasoning ? 1024 : 20,
providerOptions: {
Expand Down Expand Up @@ -981,6 +988,9 @@ export namespace Session {
middleware: [
{
async transformParams(args) {
// Apply rate limiting before each request
await RateLimiter.checkRateLimit(input.providerID, input.modelID, config)

if (args.type === "stream") {
// @ts-expect-error
args.params.prompt = ProviderTransform.message(args.params.prompt, input.providerID, input.modelID)
Expand Down
111 changes: 111 additions & 0 deletions packages/opencode/src/util/rate-limiter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import { Log } from "./log"

export namespace RateLimiter {
const log = Log.create({ service: "rate-limiter" })

// Rate limiting: Map of providerID -> timestamps array (sliding window)
const rateLimitWindows = new Map<string, number[]>()

// Track if any provider is currently rate limited
let isRateLimited = false
let rateLimitedProvider = ""
let rateLimitWaitUntil = 0 // Timestamp when rate limit ends

async function sleep(ms: number) {
return new Promise((resolve) => setTimeout(resolve, ms))
}

async function waitForRateLimit(key: string, rpmLimit: number) {
const now = Date.now()
const windowMs = 60000 // 1 minute in milliseconds
const windowStart = now - windowMs

// Get or create window for this key (providerID/modelID)
let window = rateLimitWindows.get(key)
if (!window) {
window = []
rateLimitWindows.set(key, window)
}

// Remove old requests outside the sliding window
while (window.length > 0 && window[0] < windowStart) {
window.shift()
}

// Check if we're at the limit
if (window.length > rpmLimit) {
// Calculate how long to wait until the oldest request expires
const oldestRequest = window[0]
const waitTime = oldestRequest + windowMs - now

if (waitTime > 0) {
// Set global rate limit status
isRateLimited = true
rateLimitedProvider = key
rateLimitWaitUntil = now + waitTime

log.info("rate limit hit, sleeping", {
key,
rpmLimit,
currentRequests: window.length,
waitTimeMs: waitTime,
})

await sleep(waitTime)

// Clear rate limit status
isRateLimited = false
rateLimitedProvider = ""
rateLimitWaitUntil = 0

// Recursively check again in case multiple requests are waiting
return waitForRateLimit(key, rpmLimit)
}
}

// Record this request
window.push(now)
}

export async function checkRateLimit(providerID: string, modelID: string, config?: any) {
try {
if (!config) return
const providerConfig = config.provider?.[providerID]
let rpmLimit: number | undefined = undefined
// Check for model-level rpm in the limit object
if (providerConfig?.models && modelID && providerConfig.models[modelID]?.limit?.rpm) {
rpmLimit = providerConfig.models[modelID].limit.rpm
}
if (rpmLimit) {
await waitForRateLimit(`${providerID}/${modelID}`, rpmLimit)
}
} catch (error) {
log.warn("failed to apply rate limiting", { error, providerID, modelID })
}
}

export function isCurrentlyRateLimited(): boolean {
return isRateLimited
}

export function getRateLimitedProvider(): string {
return rateLimitedProvider
}

export function getCurrentWaitTime(): number {
if (!isRateLimited || !rateLimitedProvider || rateLimitWaitUntil === 0) return 0

const now = Date.now()
const remainingMs = rateLimitWaitUntil - now

if (remainingMs <= 0) {
// Rate limit has expired, clear it
isRateLimited = false
rateLimitedProvider = ""
rateLimitWaitUntil = 0
return 0
}

return Math.ceil(remainingMs / 1000) // Convert to seconds and round up
}
}
28 changes: 17 additions & 11 deletions packages/sdk/go/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,21 +222,27 @@ func (r agentModelJSON) RawJSON() string {
}

type App struct {
Git bool `json:"git,required"`
Hostname string `json:"hostname,required"`
Path AppPath `json:"path,required"`
Time AppTime `json:"time,required"`
JSON appJSON `json:"-"`
Git bool `json:"git,required"`
Hostname string `json:"hostname,required"`
Path AppPath `json:"path,required"`
Time AppTime `json:"time,required"`
RateLimited bool `json:"rateLimited,omitempty"`
RateLimitedProvider string `json:"rateLimitedProvider,omitempty"`
RateLimitWaitSeconds int `json:"rateLimitWaitSeconds,omitempty"`
JSON appJSON `json:"-"`
}

// appJSON contains the JSON metadata for the struct [App]
type appJSON struct {
Git apijson.Field
Hostname apijson.Field
Path apijson.Field
Time apijson.Field
raw string
ExtraFields map[string]apijson.Field
Git apijson.Field
Hostname apijson.Field
Path apijson.Field
Time apijson.Field
RateLimited apijson.Field
RateLimitedProvider apijson.Field
RateLimitWaitSeconds apijson.Field
raw string
ExtraFields map[string]apijson.Field
}

func (r *App) UnmarshalJSON(data []byte) (err error) {
Expand Down
65 changes: 41 additions & 24 deletions packages/tui/internal/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,36 +27,53 @@ type Message struct {
}

type App struct {
Info opencode.App
Agents []opencode.Agent
Providers []opencode.Provider
Version string
StatePath string
Config *opencode.Config
Client *opencode.Client
State *State
AgentIndex int
Provider *opencode.Provider
Model *opencode.Model
Session *opencode.Session
Messages []Message
Permissions []opencode.Permission
CurrentPermission opencode.Permission
Commands commands.CommandRegistry
InitialModel *string
InitialPrompt *string
InitialAgent *string
InitialSession *string
compactCancel context.CancelFunc
IsLeaderSequence bool
IsBashMode bool
ScrollSpeed int
Info opencode.App
Agents []opencode.Agent
Providers []opencode.Provider
Version string
StatePath string
Config *opencode.Config
Client *opencode.Client
State *State
AgentIndex int
Provider *opencode.Provider
Model *opencode.Model
Session *opencode.Session
Messages []Message
Permissions []opencode.Permission
CurrentPermission opencode.Permission
Commands commands.CommandRegistry
InitialModel *string
InitialPrompt *string
InitialAgent *string
InitialSession *string
compactCancel context.CancelFunc
IsLeaderSequence bool
IsBashMode bool
ScrollSpeed int
RateLimited bool
RateLimitedProvider string
RateLimitWaitSeconds int
}

func (a *App) Agent() *opencode.Agent {
return &a.Agents[a.AgentIndex]
}

func (a *App) RefreshAppInfo() {
appInfo, err := a.Client.App.Get(context.Background())
if err != nil {
// Don't log this error as it might be too frequent during normal operation
return
}

a.Info = *appInfo
// Sync the backend fields to our local fields
a.RateLimited = appInfo.RateLimited
a.RateLimitedProvider = appInfo.RateLimitedProvider
a.RateLimitWaitSeconds = appInfo.RateLimitWaitSeconds
}

type SessionCreatedMsg = struct {
Session *opencode.Session
}
Expand Down
10 changes: 10 additions & 0 deletions packages/tui/internal/components/chat/editor.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ func (m *editorComponent) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
m.width = msg.Width - 4
return m, nil
case spinner.TickMsg:
// Refresh app info when busy to get latest rate limit status
if m.app.IsBusy() {
m.app.RefreshAppInfo()
}
m.spinner, cmd = m.spinner.Update(msg)
return m, cmd
case tea.KeyPressMsg:
Expand Down Expand Up @@ -380,6 +384,12 @@ func (m *editorComponent) Content() string {
status := "working"
if m.app.CurrentPermission.ID != "" {
status = "waiting for permission"
} else if m.app.RateLimited {
if m.app.RateLimitWaitSeconds > 0 {
status = fmt.Sprintf("waiting %ds", m.app.RateLimitWaitSeconds)
} else {
status = "rate limited"
}
}
if m.interruptKeyInDebounce && m.app.CurrentPermission.ID == "" {
bright := t.Accent()
Expand Down
Loading
Loading