Skip to content

Commit 34dcf07

Browse files
squatbrancz
authored andcommitted
pkg/receive: enable forwarding metrics (#1243)
* pkg/receive: rename host->node This commit renames `host` to `node` in the context of the receive hashring. This is because more often than not, the hashring will deal with endpoints rather than simply hosts and node is a more generic term for the operand of a hashring. * pkg/receive: forward metrics This commit enables metrics forwarding from one receive node to another. The receive nodes construct hashrings from the given sd-files and use these hashrings to select a node to which toforward a given time series. Time series are batched together to ensure that for any incoming write-request to a node, at most one outgoing write-request will be made every other node in the hashring. * test/e2e: add receiver hashring test
1 parent 66fd721 commit 34dcf07

File tree

10 files changed

+769
-227
lines changed

10 files changed

+769
-227
lines changed

cmd/thanos/receive.go

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"context"
55
"fmt"
66
"net"
7+
"os"
8+
"strings"
79
"time"
810

911
"github.com/go-kit/kit/log"
@@ -45,12 +47,42 @@ func registerReceive(m map[string]setupFunc, app *kingpin.Application, name stri
4547

4648
retention := modelDuration(cmd.Flag("tsdb.retention", "How long to retain raw samples on local storage. 0d - disables this retention").Default("15d"))
4749

50+
hashringsFile := cmd.Flag("receive.hashrings-file", "Path to file that contains the hashring configuration.").
51+
PlaceHolder("<path>").String()
52+
53+
refreshInterval := modelDuration(cmd.Flag("receive.hashrings-file-refresh-interval", "Refresh interval to re-read the hashring configuration file. (used as a fallback)").
54+
Default("5m"))
55+
56+
local := cmd.Flag("receive.local-endpoint", "Endpoint of local receive node. Used to identify the local node in the hashring configuration.").String()
57+
58+
tenantHeader := cmd.Flag("receive.tenant-header", "HTTP header to determine tenant for write requests.").Default("THANOS-TENANT").String()
59+
4860
m[name] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ bool) error {
4961
lset, err := parseFlagLabels(*labelStrs)
5062
if err != nil {
5163
return errors.Wrap(err, "parse labels")
5264
}
5365

66+
var cw *receive.ConfigWatcher
67+
if *hashringsFile != "" {
68+
cw, err = receive.NewConfigWatcher(log.With(logger, "component", "config-watcher"), reg, *hashringsFile, *refreshInterval)
69+
if err != nil {
70+
return err
71+
}
72+
}
73+
74+
// Local is empty, so try to generate a local endpoint
75+
// based on the hostname and the listening port.
76+
if *local == "" {
77+
hostname, err := os.Hostname()
78+
if hostname == "" || err != nil {
79+
return errors.New("--receive.local-endpoint is empty and host could not be determined.")
80+
}
81+
parts := strings.Split(*remoteWriteAddress, ":")
82+
port := parts[len(parts)-1]
83+
*local = fmt.Sprintf("http://%s:%s/api/v1/receive", hostname, port)
84+
}
85+
5486
return runReceive(
5587
g,
5688
logger,
@@ -66,6 +98,9 @@ func registerReceive(m map[string]setupFunc, app *kingpin.Application, name stri
6698
objStoreConfig,
6799
lset,
68100
*retention,
101+
cw,
102+
*local,
103+
*tenantHeader,
69104
)
70105
}
71106
}
@@ -85,6 +120,9 @@ func runReceive(
85120
objStoreConfig *pathOrContent,
86121
lset labels.Labels,
87122
retention model.Duration,
123+
cw *receive.ConfigWatcher,
124+
endpoint string,
125+
tenantHeader string,
88126
) error {
89127
logger = log.With(logger, "component", "receive")
90128
level.Warn(logger).Log("msg", "setting up receive; the Thanos receive component is EXPERIMENTAL, it may break significantly without notice")
@@ -103,6 +141,8 @@ func runReceive(
103141
ListenAddress: remoteWriteAddress,
104142
Registry: reg,
105143
ReadyStorage: localStorage,
144+
Endpoint: endpoint,
145+
TenantHeader: tenantHeader,
106146
})
107147

108148
// Start all components while we wait for TSDB to open but only load
@@ -129,7 +169,7 @@ func runReceive(
129169

130170
startTimeMargin := int64(2 * time.Duration(tsdbCfg.MinBlockDuration).Seconds() * 1000)
131171
localStorage.Set(db, startTimeMargin)
132-
webHandler.Ready()
172+
webHandler.StorageReady()
133173
level.Info(logger).Log("msg", "server is ready to receive web requests.")
134174
close(dbOpen)
135175
<-cancel
@@ -144,6 +184,56 @@ func runReceive(
144184
)
145185
}
146186

187+
level.Debug(logger).Log("msg", "setting up hashring")
188+
{
189+
updates := make(chan receive.Hashring)
190+
if cw != nil {
191+
ctx, cancel := context.WithCancel(context.Background())
192+
g.Add(func() error {
193+
receive.HashringFromConfig(ctx, updates, cw)
194+
return nil
195+
}, func(error) {
196+
cancel()
197+
close(updates)
198+
})
199+
} else {
200+
cancel := make(chan struct{})
201+
g.Add(func() error {
202+
updates <- receive.SingleNodeHashring(endpoint)
203+
<-cancel
204+
return nil
205+
}, func(error) {
206+
close(cancel)
207+
close(updates)
208+
})
209+
}
210+
211+
cancel := make(chan struct{})
212+
g.Add(
213+
func() error {
214+
select {
215+
case h := <-updates:
216+
webHandler.Hashring(h)
217+
case <-cancel:
218+
return nil
219+
}
220+
select {
221+
// If any new hashring is received, then mark the handler as unready, but keep it alive.
222+
case <-updates:
223+
webHandler.Hashring(nil)
224+
level.Info(logger).Log("msg", "hashring has changed; server is not ready to receive web requests.")
225+
case <-cancel:
226+
return nil
227+
}
228+
<-cancel
229+
return nil
230+
},
231+
func(err error) {
232+
close(cancel)
233+
},
234+
)
235+
}
236+
147237
level.Debug(logger).Log("msg", "setting up metric http listen-group")
148238
if err := metricHTTPListenGroup(g, logger, reg, httpMetricsBindAddr); err != nil {
149239
return err

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ require (
4747
google.golang.org/api v0.3.2
4848
google.golang.org/grpc v1.19.1
4949
gopkg.in/alecthomas/kingpin.v2 v2.2.6
50+
gopkg.in/fsnotify.v1 v1.4.7
5051
gopkg.in/fsnotify/fsnotify.v1 v1.4.7 // indirect
5152
gopkg.in/yaml.v2 v2.2.2
5253
)

0 commit comments

Comments
 (0)