diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e536b6..ab1a85a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ The following emojis are used to highlight certain changes: ### Added +- `--max-range-request-file-size` / `RAINBOW_MAX_RANGE_REQUEST_FILE_SIZE`: Configurable limit for HTTP Range requests on large files (default: 5GiB). Range requests for files larger than this limit return HTTP 501 Not Implemented to protect against CDN issues. Specifically addresses Cloudflare's bug where range requests for files over 5GiB are silently ignored, causing the entire file to be returned instead of the requested range, leading to excess bandwidth consumption and billing. + ### Changed - Update to Boxo [v0.35.0](https://github.com/ipfs/boxo/releases/tag/v0.35.0) @@ -22,6 +24,8 @@ The following emojis are used to highlight certain changes: ### Fixed +- Fixed bitswap client initialization to use `time.Duration` instead of `delay.Fixed()` for rebroadcast delay, matching the updated bitswap client API + ### Removed ### Security diff --git a/docs/environment-variables.md b/docs/environment-variables.md index bba9986..a705b6b 100644 --- a/docs/environment-variables.md +++ b/docs/environment-variables.md @@ -28,6 +28,7 @@ - [`RAINBOW_HTTP_RETRIEVAL_METRICS_LABELS_FOR_ENDPOINTS`](#rainbow_http_retrieval_metrics_labels_for_endpoints) - [`RAINBOW_MAX_CONCURRENT_REQUESTS`](#rainbow_max_concurrent_requests) - [`RAINBOW_RETRIEVAL_TIMEOUT`](#rainbow_retrieval_timeout) + - [`RAINBOW_MAX_RANGE_REQUEST_FILE_SIZE`](#rainbow_max_range_request_file_size) - [Experiments](#experiments) - [`RAINBOW_SEED_PEERING`](#rainbow_seed_peering) - [`RAINBOW_SEED_PEERING_MAX_INDEX`](#rainbow_seed_peering_max_index) @@ -298,6 +299,20 @@ If content cannot be retrieved within this period, the gateway returns a `504 Ga Default: `30s` +### `RAINBOW_MAX_RANGE_REQUEST_FILE_SIZE` + +Maximum file size in bytes for which HTTP Range requests are supported. Range requests for files larger than this limit will return `501 Not Implemented` error with a message suggesting to switch to verifiable block requests (`application/vnd.ipld.raw`). + +This setting provides protection against issues with CDN and reverse proxy implementations that have bugs or limitations when handling byte range requests for large files. Cloudflare, in particular, has a [known issue](https://github.com/ipfs/boxo/issues/856#issuecomment-2786431369) where range requests for files over 5 GiB are silently ignored - instead of returning the requested byte range, Cloudflare returns the entire file. This causes serious problems: +- **Excess bandwidth consumption and billing**: Clients expecting a small range (e.g., web browsers requesting parts of a large SQLite database) will receive and be billed for the entire multi-gigabyte file +- **Client failures**: Naive clients like JavaScript applications may crash or hang when they receive gigabytes of data instead of the requested range + +When a range request exceeds the configured limit, the gateway will return an HTTP 501 error suggesting the client to use verifiable block requests instead, which are more suitable for large file transfers and can be independently verified. + +Set to `0` to disable this limit and allow range requests for files of any size (use with caution if your gateway is behind a CDN or reverse proxy). + +Default: `5368709120` (5 GiB - matches Cloudflare's threshold to prevent excess billing) + ## Experiments ### `RAINBOW_SEED_PEERING` diff --git a/handlers.go b/handlers.go index 57decd0..fe0c577 100644 --- a/handlers.go +++ b/handlers.go @@ -314,11 +314,12 @@ func setupGatewayHandler(cfg Config, nd *Node) (http.Handler, error) { } gwConf := gateway.Config{ - DeserializedResponses: true, - PublicGateways: publicGateways, - NoDNSLink: noDNSLink, - MaxConcurrentRequests: cfg.MaxConcurrentRequests, // When exceeded, returns 429 with Retry-After: 60 (hardcoded in boxo) - RetrievalTimeout: cfg.RetrievalTimeout, + DeserializedResponses: true, + PublicGateways: publicGateways, + NoDNSLink: noDNSLink, + MaxConcurrentRequests: cfg.MaxConcurrentRequests, // When exceeded, returns 429 with Retry-After: 60 (hardcoded in boxo) + RetrievalTimeout: cfg.RetrievalTimeout, + MaxRangeRequestFileSize: cfg.MaxRangeRequestFileSize, } gwHandler := gateway.NewHandler(gwConf, backend) diff --git a/main.go b/main.go index c5214c9..37490e5 100644 --- a/main.go +++ b/main.go @@ -480,6 +480,12 @@ Generate an identity seed and launch a gateway: EnvVars: []string{"RAINBOW_RETRIEVAL_TIMEOUT"}, Usage: "Maximum duration for initial content retrieval and time between writes", }, + &cli.Int64Flag{ + Name: "max-range-request-file-size", + Value: 5368709120, // 5 GiB + EnvVars: []string{"RAINBOW_MAX_RANGE_REQUEST_FILE_SIZE"}, + Usage: "Maximum file size in bytes for which range requests are supported. Range requests for larger files will return 501. Set to 0 to disable limit", + }, &cli.StringSliceFlag{ Name: "dnslink-resolvers", Value: cli.NewStringSlice(". : auto"), @@ -710,9 +716,10 @@ share the same seed as long as the indexes are different. HTTPRetrievalWorkers: httpRetrievalWorkers, HTTPRetrievalMaxDontHaveErrors: httpRetrievalMaxDontHaveErrors, HTTPRetrievalMetricsLabelsForEndpoints: httpRetrievalMetricsLabelsForEndpoints, - // Gateway rate limiting and timeout configuration - MaxConcurrentRequests: cctx.Int("max-concurrent-requests"), - RetrievalTimeout: cctx.Duration("retrieval-timeout"), + // Gateway limits + MaxConcurrentRequests: cctx.Int("max-concurrent-requests"), + RetrievalTimeout: cctx.Duration("retrieval-timeout"), + MaxRangeRequestFileSize: cctx.Int64("max-range-request-file-size"), } // Store original values for display diff --git a/setup.go b/setup.go index e8c3e7a..f47053a 100644 --- a/setup.go +++ b/setup.go @@ -174,9 +174,10 @@ type Config struct { // Bootstrap peers configuration (with "auto" support) Bootstrap []string - // Gateway rate limiting and timeout configuration - MaxConcurrentRequests int - RetrievalTimeout time.Duration + // Gateway limits + MaxConcurrentRequests int + RetrievalTimeout time.Duration + MaxRangeRequestFileSize int64 } func SetupNoLibp2p(ctx context.Context, cfg Config, dnsCache *cachedDNS) (*Node, error) {