diff --git a/.circleci/config.yml b/.circleci/config.yml index cea065f4d65..6470b538ded 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -33,6 +33,10 @@ jobs: fi export THANOS_SKIP_S3_AWS_TESTS="true" echo "Skipping AWS tests." + export THANOS_SKIP_AZURE_TESTS="true" + echo "Skipping Azure tests." + export THANOS_SKIP_SWIFT_TESTS="true" + echo "Skipping SWIFT tests." make test diff --git a/.errcheck_excludes.txt b/.errcheck_excludes.txt index 5fad7c252ea..9e2e3a71e1c 100644 --- a/.errcheck_excludes.txt +++ b/.errcheck_excludes.txt @@ -1,3 +1,3 @@ (github.com/improbable-eng/thanos/vendor/github.com/go-kit/kit/log.Logger).Log fmt.Fprintln -fmt.Fprint \ No newline at end of file +fmt.Fprint diff --git a/CHANGELOG.md b/CHANGELOG.md index bccdcf874e0..ec2dcc71aaa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ NOTE: As semantic versioning states all 0.y.z releases can contain breaking chan ## Unreleased +### Deprecated - Remove support of those flags for bucket - --gcs-bucket=\ - --s3.bucket=\ @@ -25,7 +26,19 @@ NOTE: As semantic versioning states all 0.y.z releases can contain breaking chan * S3_INSECURE * S3_SIGNATURE_VERSION2 * S3_SECRET_KEY + +### Added - Add flag `--objstore.config-file` to reference to the bucket configuration file in yaml format. Note that detailed information in document [storage](docs/storage.md). +- Add `thanos_` to member list metrics. Some metrics have been renamed, make sure to update your dashboards and rules. +- `thanos rule` now supports static configuration of query nodes via `--query` +- `thanos rule` now supports file based discovery of query nodes using `--query.file-sd-config.files` +- `thanos query` now supports file based discovery of store nodes using `--store.file-sd-config.files` +- Add `/-/healthy` endpoint to Querier. +- Add DNS service discovery to static and file based configurations using the `dns+` and `dnssrv+` prefixes for the respective lookup. + +### Fixed +- [#566](https://github.com/improbable-eng/thanos/issues/566) - Fixed issue whereby the Proxy Store could end up in a deadlock if there were more than 9 stores being queried and all returned an error. + ## [v0.1.0](https://github.com/improbable-eng/thanos/releases/tag/v0.1.0) - 2018.09.14 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b2ae7533fd7..226ebbbd524 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -35,9 +35,11 @@ $ git push origin 5. If you don't have a live object store ready add these envvars to skip tests for these: - THANOS_SKIP_GCS_TESTS to skip GCS tests. - THANOS_SKIP_S3_AWS_TESTS to skip AWS tests. +- THANOS_SKIP_AZURE_TESTS to skip Azure tests. +- THANOS_SKIP_SWIFT_TESTS to skip SWIFT tests. -If you skip both of these, the store specific tests will be run against memory object storage only. -CI runs GCS and inmem tests only for now. Not having these variables will produce auth errors against GCS or AWS tests. +If you skip all of these, the store specific tests will be run against memory object storage only. +CI runs GCS and inmem tests only for now. Not having these variables will produce auth errors against GCS, AWS or Azure tests. 6. If your change affects users (adds or removes feature) consider adding the item to [CHANGELOG](CHANGELOG.md) 7. You may merge the Pull Request in once you have the sign-off of at least one developers with write access, or if you diff --git a/Gopkg.lock b/Gopkg.lock index ea1a54da777..a2dad23ee41 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -25,6 +25,22 @@ revision = "2b93072101d466aa4120b3c23c2e1b08af01541c" version = "v0.6.0" +[[projects]] + digest = "1:6f302284bb48712a01cdcd3216e8bbb293d1edb618f55b5fe7f92521cce930c7" + name = "github.com/Azure/azure-pipeline-go" + packages = ["pipeline"] + pruneopts = "" + revision = "7571e8eb0876932ab505918ff7ed5107773e5ee2" + version = "0.1.7" + +[[projects]] + digest = "1:5da4d3b3b9949b9043d2fd36c4ff9b208f72ad5260a3dcb6f94267a769ee1899" + name = "github.com/Azure/azure-storage-blob-go" + packages = ["azblob"] + pruneopts = "" + revision = "5152f14ace1c6db66bd9cb57840703a8358fa7bc" + version = "0.3.0" + [[projects]] digest = "1:b0fe84bcee1d0c3579d855029ccd3a76deea187412da2976985e4946289dbb2c" name = "github.com/NYTimes/gziphandler" @@ -182,6 +198,24 @@ revision = "317e0006254c44a0ac427cc52a0e083ff0b9622f" version = "v2.0.0" +[[projects]] + digest = "1:e9aa4d37933cdd1978d83938e8af418c02b4c183e1d6c936efd00ce1628fadb7" + name = "github.com/gophercloud/gophercloud" + packages = [ + ".", + "openstack", + "openstack/identity/v2/tenants", + "openstack/identity/v2/tokens", + "openstack/identity/v3/tokens", + "openstack/objectstorage/v1/accounts", + "openstack/objectstorage/v1/containers", + "openstack/objectstorage/v1/objects", + "openstack/utils", + "pagination", + ] + pruneopts = "" + revision = "0719c6b22f30132b0ae6c90b038e0d50992107b0" + [[projects]] digest = "1:0bf81a189b23434fc792317c9276abfe7aee4eb3f85d3c3659a2e0f21acafe97" name = "github.com/grpc-ecosystem/go-grpc-middleware" @@ -428,6 +462,8 @@ digest = "1:b5ff9852eabe841003da4b0a4b742a2878c722dda6481003432344f633a814fc" name = "github.com/prometheus/prometheus" packages = [ + "discovery/file", + "discovery/targetgroup", "pkg/labels", "pkg/rulefmt", "pkg/textparse", @@ -500,7 +536,7 @@ [[projects]] branch = "master" - digest = "1:3a2cd3e4815469d0a8fad881966023406563b791d9807709de28d04f9d5ed40f" + digest = "1:16db3d6f4f8bbe4b7b42cb8808e68457fea4bd7aea410b77c8c9a6dc26253a60" name = "golang.org/x/crypto" packages = [ "argon2", @@ -510,7 +546,7 @@ "ssh/terminal", ] pruneopts = "" - revision = "182538f80094b6a8efaade63a8fd8e0d9d5843dd" + revision = "0709b304e793a5edb4a2c0145f281ecdc20838a4" [[projects]] branch = "master" @@ -561,7 +597,7 @@ [[projects]] branch = "master" - digest = "1:649f2e24b22ef65ea110a3ce82f327019aec48f625586ea9716e53152e013a88" + digest = "1:54bad0d07b2d834122adaf5ecc35e4df006a4e0bb6114f2466b73e2cc6f03583" name = "golang.org/x/sys" packages = [ "cpu", @@ -569,7 +605,7 @@ "windows", ] pruneopts = "" - revision = "fa5fdf94c78965f1aa8423f0cc50b8b8d728b05a" + revision = "ebe1bf3edb3325c393447059974de898d5133eb8" [[projects]] digest = "1:af9bfca4298ef7502c52b1459df274eed401a4f5498b900e9a92d28d3d87ac5a" @@ -596,7 +632,7 @@ [[projects]] branch = "master" - digest = "1:4c11fda7ef44f31a6cb30fc84d186dcf6a3a7c320f61980bb90ccefa92f02216" + digest = "1:7e248912e9ce5218a30a9fa1b4ed575e474430e178a24af34ecb2373dbb93752" name = "google.golang.org/api" packages = [ "gensupport", @@ -613,7 +649,7 @@ "transport/http", ] pruneopts = "" - revision = "b810576d88a056b90ef18a0b5328544c9c074c68" + revision = "0ad5a633fea1d4b64bf5e6a01e30d1fc466038e5" [[projects]] digest = "1:eede11c81b63c8f6fd06ef24ba0a640dc077196ec9b7a58ecde03c82eee2f151" @@ -638,7 +674,7 @@ [[projects]] branch = "master" - digest = "1:c8aa249fb74a455a901ef97b28dd8225a3f65a5af0b2127d7ac3f54924866086" + digest = "1:7040eaf95eb09f6f69e1415074049a9a66236d59d8767f2d17b759b916f79fb1" name = "google.golang.org/genproto" packages = [ "googleapis/api/annotations", @@ -647,7 +683,7 @@ "googleapis/rpc/status", ] pruneopts = "" - revision = "c66870c02cf823ceb633bcd05be3c7cda29976f4" + revision = "11092d34479b07829b72e10713b159248caf5dad" [[projects]] digest = "1:cb1330030248de97a11d9f9664f3944fce0df947e5ed94dbbd9cb6e77068bd46" @@ -693,6 +729,14 @@ revision = "947dcec5ba9c011838740e680966fd7087a71d0d" version = "v2.2.6" +[[projects]] + digest = "1:b2106f1668ea5efc1ecc480f7e922a093adb9563fd9ce58585292871f0d0f229" + name = "gopkg.in/fsnotify/fsnotify.v1" + packages = ["."] + pruneopts = "" + revision = "c2828203cd70a50dcccfb2761f8b1f8ceef9a8e9" + version = "v1.4.7" + [[projects]] branch = "v2" digest = "1:f0620375dd1f6251d9973b5f2596228cc8042e887cd7f827e4220bc1ce8c30e2" @@ -707,6 +751,7 @@ input-imports = [ "cloud.google.com/go/storage", "cloud.google.com/go/trace/apiv1", + "github.com/Azure/azure-storage-blob-go/azblob", "github.com/NYTimes/gziphandler", "github.com/armon/go-metrics", "github.com/armon/go-metrics/prometheus", @@ -717,6 +762,11 @@ "github.com/gogo/protobuf/gogoproto", "github.com/gogo/protobuf/proto", "github.com/golang/snappy", + "github.com/gophercloud/gophercloud", + "github.com/gophercloud/gophercloud/openstack", + "github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers", + "github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects", + "github.com/gophercloud/gophercloud/pagination", "github.com/grpc-ecosystem/go-grpc-middleware", "github.com/grpc-ecosystem/go-grpc-middleware/recovery", "github.com/grpc-ecosystem/go-grpc-middleware/tracing/opentracing", @@ -739,6 +789,8 @@ "github.com/prometheus/common/model", "github.com/prometheus/common/route", "github.com/prometheus/common/version", + "github.com/prometheus/prometheus/discovery/file", + "github.com/prometheus/prometheus/discovery/targetgroup", "github.com/prometheus/prometheus/pkg/labels", "github.com/prometheus/prometheus/pkg/timestamp", "github.com/prometheus/prometheus/pkg/value", @@ -759,6 +811,7 @@ "google.golang.org/api/option", "google.golang.org/grpc", "google.golang.org/grpc/codes", + "google.golang.org/grpc/credentials", "google.golang.org/grpc/status", "gopkg.in/alecthomas/kingpin.v2", "gopkg.in/yaml.v2", diff --git a/Gopkg.toml b/Gopkg.toml index 093e82c619f..3280df82df1 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -76,3 +76,10 @@ ignored = ["github.com/improbable-eng/thanos/benchmark/*"] name = "github.com/minio/minio-go" version = "6.0.1" +[[constraint]] + version = "0.3.0" + name = "github.com/Azure/azure-storage-blob-go" + +[[constraint]] + name = "github.com/gophercloud/gophercloud" + revision = "0719c6b22f30132b0ae6c90b038e0d50992107b0" diff --git a/Makefile b/Makefile index fe98a7b6d6b..c9dd48e3106 100644 --- a/Makefile +++ b/Makefile @@ -25,13 +25,14 @@ DEP_VERSION ?=45be32ba4708aad5e2aa8c86f9432c4c4c1f8da2 # TODO(bplotka): Add more recent version after https://github.com/prometheus/prometheus/issues/4551 is fixed. SUPPORTED_PROM_VERSIONS ?=v2.0.0 v2.2.1 ALERTMANAGER_VERSION ?=v0.15.2 +MINIO_SERVER_VERSION ?=RELEASE.2018-10-06T00-15-16Z # fetch_go_bin_version downloads (go gets) the binary from specific version and installs it in $(BIN_DIR)/- # arguments: # $(1): Install path. (e.g github.com/golang/dep/cmd/dep) # $(2): Tag or revision for checkout. define fetch_go_bin_version - mkdir -p $(BIN_DIR) + @mkdir -p $(BIN_DIR) @echo ">> fetching $(1)@$(2) revision/version" @if [ ! -d "$(TMP_GOPATH)/src/$(1)" ]; then \ @@ -48,7 +49,7 @@ define fetch_go_bin_version endef .PHONY: all -all: deps format errcheck build +all: deps format build # assets repacks all statis assets into go file for easier deploy. .PHONY: assets @@ -139,7 +140,7 @@ tarballs-release: $(PROMU) # test runs all Thanos golang tests against each supported version of Prometheus. .PHONY: test test: test-deps - @echo ">> running all tests. Do export THANOS_SKIP_GCS_TESTS='true' or/and export THANOS_SKIP_S3_AWS_TESTS='true' if you want to skip e2e tests against real store buckets" + @echo ">> running all tests. Do export THANOS_SKIP_GCS_TESTS='true' or/and export THANOS_SKIP_S3_AWS_TESTS='true' or/and THANOS_SKIP_AZURE_TESTS='true' if you want to skip e2e tests against real store buckets" @for ver in $(SUPPORTED_PROM_VERSIONS); do \ THANOS_TEST_PROMETHEUS_PATH="prometheus-$$ver" THANOS_TEST_ALERTMANAGER_PATH="alertmanager-$(ALERTMANAGER_VERSION)" go test $(shell go list ./... | grep -v /vendor/ | grep -v /benchmark/); \ done @@ -151,6 +152,7 @@ test-deps: deps @go install github.com/improbable-eng/thanos/cmd/thanos $(foreach ver,$(SUPPORTED_PROM_VERSIONS),$(call fetch_go_bin_version,github.com/prometheus/prometheus/cmd/prometheus,$(ver))) $(call fetch_go_bin_version,github.com/prometheus/alertmanager/cmd/alertmanager,$(ALERTMANAGER_VERSION)) + $(call fetch_go_bin_version,github.com/minio/minio,$(MINIO_SERVER_VERSION)) # vet vets the code. .PHONY: vet diff --git a/README.md b/README.md index 4af82deef00..5e910245013 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![CircleCI](https://circleci.com/gh/improbable-eng/thanos.svg?style=svg)](https://circleci.com/gh/improbable-eng/thanos) [![Go Report Card](https://goreportcard.com/badge/github.com/improbable-eng/thanos)](https://goreportcard.com/report/github.com/improbable-eng/thanos) -[![Slack](docs/img/slack.png)](https://join.slack.com/t/improbable-eng/shared_invite/enQtMzQ1ODcyMzQ5MjM4LWY5ZWZmNGM2ODc5MmViNmQ3ZTA3ZTY3NzQwOTBlMTkzZmIxZTIxODk0OWU3YjZhNWVlNDU3MDlkZGViZjhkMjc) +[![Slack](https://img.shields.io/badge/join%20slack-%23thanos-brightgreen.svg)](https://join.slack.com/t/improbable-eng/shared_invite/enQtMzQ1ODcyMzQ5MjM4LWY5ZWZmNGM2ODc5MmViNmQ3ZTA3ZTY3NzQwOTBlMTkzZmIxZTIxODk0OWU3YjZhNWVlNDU3MDlkZGViZjhkMjc) ## Overview diff --git a/benchmark/cmd/thanosbench/resources.go b/benchmark/cmd/thanosbench/resources.go index 2e650547029..52edeffd1ae 100644 --- a/benchmark/cmd/thanosbench/resources.go +++ b/benchmark/cmd/thanosbench/resources.go @@ -12,6 +12,7 @@ import ( prom "github.com/prometheus/prometheus/config" "gopkg.in/yaml.v2" appsv1 "k8s.io/api/apps/v1" + "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -207,7 +208,7 @@ func createPrometheus(opts *opts, name string, bucket string) *appsv1.StatefulSe Name: name, Namespace: promNamespace, Labels: map[string]string{ - "app": name, + "app": name, "thanos-gossip-member": "true", }, } @@ -370,7 +371,7 @@ func createThanosQuery(opts *opts) (*v1.Service, *v1.Pod) { Name: "thanos-query", Namespace: thanosNamespace, Labels: map[string]string{ - "app": "thanos-query", + "app": "thanos-query", "thanos-gossip-member": "true", }, } diff --git a/cmd/thanos/bucket.go b/cmd/thanos/bucket.go index a51a4d14dc7..15a0191b62b 100644 --- a/cmd/thanos/bucket.go +++ b/cmd/thanos/bucket.go @@ -41,27 +41,35 @@ var ( func registerBucket(m map[string]setupFunc, app *kingpin.Application, name string) { cmd := app.Command(name, "inspect metric data in an object storage bucket") - bucketConfFile := cmd.Flag("objstore.config-file", "The object store configuration file path."). - PlaceHolder("").Required().String() + objStoreConfig := regCommonObjStoreFlags(cmd, "") + objStoreBackupConfig := regCommonObjStoreFlags(cmd, "-backup") // Verify command. verify := cmd.Command("verify", "verify all blocks in the bucket against specified issues") verifyRepair := verify.Flag("repair", "attempt to repair blocks for which issues were detected"). Short('r').Default("false").Bool() - backupBucketConfFile := verify.Flag("objstore-backup.config-file", "The backup object store configuration file path."). - PlaceHolder("").String() verifyIssues := verify.Flag("issues", fmt.Sprintf("Issues to verify (and optionally repair). Possible values: %v", allIssues())). Short('i').Default(verifier.IndexIssueID, verifier.OverlappedBlocksIssueID).Strings() verifyIDWhitelist := verify.Flag("id-whitelist", "Block IDs to verify (and optionally repair) only. "+ "If none is specified, all blocks will be verified. Repeated field").Strings() m[name+" verify"] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ bool) error { - bkt, err := client.NewBucket(logger, *bucketConfFile, reg, name) + bucketConfig, err := objStoreConfig.Content() + if err != nil { + return err + } + + bkt, err := client.NewBucket(logger, bucketConfig, reg, name) if err != nil { return err } defer runutil.CloseWithLogOnErr(logger, bkt, "bucket client") - backupBkt, err := client.NewBucket(logger, *backupBucketConfFile, reg, name) + backupBucketConfig, err := objStoreBackupConfig.Content() + if err != nil { + return err + } + + backupBkt, err := client.NewBucket(logger, backupBucketConfig, reg, name) if err == client.ErrNotFound { if *verifyRepair { return errors.Wrap(err, "repair is specified, so backup client is required") @@ -121,7 +129,12 @@ func registerBucket(m map[string]setupFunc, app *kingpin.Application, name strin lsOutput := ls.Flag("output", "Format in which to print each block's information. May be 'json' or custom template."). Short('o').Default("").String() m[name+" ls"] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ bool) error { - bkt, err := client.NewBucket(logger, *bucketConfFile, reg, name) + bucketConfig, err := objStoreConfig.Content() + if err != nil { + return err + } + + bkt, err := client.NewBucket(logger, bucketConfig, reg, name) if err != nil { return err } diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 42f5a1a344c..36dc32f7256 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -3,7 +3,10 @@ package main import ( "context" "fmt" + "os" "path" + "strconv" + "strings" "time" "github.com/go-kit/kit/log" @@ -20,6 +23,44 @@ import ( "gopkg.in/alecthomas/kingpin.v2" ) +var ( + compactions = compactionSet{ + 1 * time.Hour, + 2 * time.Hour, + 8 * time.Hour, + 2 * 24 * time.Hour, + 14 * 24 * time.Hour, + } +) + +type compactionSet []time.Duration + +func (cs compactionSet) String() string { + result := make([]string, len(cs)) + for i, c := range cs { + result[i] = fmt.Sprintf("%d=%dh", i, int(c.Hours())) + } + return strings.Join(result, ", ") +} + +// levels returns set of compaction levels not higher than specified max compaction level +func (cs compactionSet) levels(maxLevel int) ([]int64, error) { + if maxLevel >= len(cs) { + return nil, errors.Errorf("level is bigger then default set of %d", len(cs)) + } + + levels := make([]int64, maxLevel+1) + for i, c := range cs[:maxLevel+1] { + levels[i] = int64(c / time.Millisecond) + } + return levels, nil +} + +// maxLevel returns max available compaction level +func (cs compactionSet) maxLevel() int { + return len(cs) - 1 +} + func registerCompact(m map[string]setupFunc, app *kingpin.Application, name string) { cmd := app.Command(name, "continuously compacts blocks in an object store bucket") @@ -31,8 +72,7 @@ func registerCompact(m map[string]setupFunc, app *kingpin.Application, name stri dataDir := cmd.Flag("data-dir", "Data directory in which to cache blocks and process compactions."). Default("./data").String() - bucketConfFile := cmd.Flag("objstore.config-file", "The object store configuration file path."). - PlaceHolder("").Required().String() + objStoreConfig := regCommonObjStoreFlags(cmd, "") syncDelay := modelDuration(cmd.Flag("sync-delay", "Minimum age of fresh (non-compacted) blocks before they are being processed."). Default("30m")) @@ -49,11 +89,14 @@ func registerCompact(m map[string]setupFunc, app *kingpin.Application, name stri "as querying long time ranges without non-downsampled data is not efficient and not useful (is not possible to render all for human eye)."). Hidden().Default("false").Bool() + maxCompactionLevel := cmd.Flag("debug.max-compaction-level", fmt.Sprintf("Maximum compaction level, default is %d: %s", compactions.maxLevel(), compactions.String())). + Hidden().Default(strconv.Itoa(compactions.maxLevel())).Int() + m[name] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ bool) error { return runCompact(g, logger, reg, *httpAddr, *dataDir, - *bucketConfFile, + objStoreConfig, time.Duration(*syncDelay), *haltOnError, *wait, @@ -64,6 +107,7 @@ func registerCompact(m map[string]setupFunc, app *kingpin.Application, name stri }, name, *disableDownsampling, + *maxCompactionLevel, ) } } @@ -74,13 +118,14 @@ func runCompact( reg *prometheus.Registry, httpBindAddr string, dataDir string, - bucketConfFile string, + objStoreConfig *pathOrContent, syncDelay time.Duration, haltOnError bool, wait bool, retentionByResolution map[compact.ResolutionLevel]time.Duration, component string, disableDownsampling bool, + maxCompactionLevel int, ) error { halted := prometheus.NewGauge(prometheus.GaugeOpts{ Name: "thanos_compactor_halted", @@ -94,7 +139,12 @@ func runCompact( reg.MustRegister(halted) - bkt, err := client.NewBucket(logger, bucketConfFile, reg, component) + bucketConfig, err := objStoreConfig.Content() + if err != nil { + return err + } + + bkt, err := client.NewBucket(logger, bucketConfig, reg, component) if err != nil { return err } @@ -111,15 +161,18 @@ func runCompact( return errors.Wrap(err, "create syncer") } + levels, err := compactions.levels(maxCompactionLevel) + if err != nil { + return errors.Wrap(err, "get compaction levels") + } + + if maxCompactionLevel < compactions.maxLevel() { + level.Warn(logger).Log("msg", "Max compaction level is lower than should be", "current", maxCompactionLevel, "default", compactions.maxLevel()) + } + // Instantiate the compactor with different time slices. Timestamps in TSDB // are in milliseconds. - comp, err := tsdb.NewLeveledCompactor(reg, logger, []int64{ - int64(1 * time.Hour / time.Millisecond), - int64(2 * time.Hour / time.Millisecond), - int64(8 * time.Hour / time.Millisecond), - int64(2 * 24 * time.Hour / time.Millisecond), // 2 days - int64(14 * 24 * time.Hour / time.Millisecond), // 2 weeks - }, downsample.NewPool()) + comp, err := tsdb.NewLeveledCompactor(reg, logger, levels, downsample.NewPool()) if err != nil { return errors.Wrap(err, "create compactor") } @@ -129,6 +182,10 @@ func runCompact( downsamplingDir = path.Join(dataDir, "downsample") ) + if err := os.RemoveAll(downsamplingDir); err != nil { + return errors.Wrap(err, "clean working downsample directory") + } + compactor := compact.NewBucketCompactor(logger, sy, comp, compactDir, bkt) if retentionByResolution[compact.ResolutionLevelRaw].Seconds() != 0 { diff --git a/cmd/thanos/downsample.go b/cmd/thanos/downsample.go index a6e92a696f0..c00aadd43db 100644 --- a/cmd/thanos/downsample.go +++ b/cmd/thanos/downsample.go @@ -32,11 +32,10 @@ func registerDownsample(m map[string]setupFunc, app *kingpin.Application, name s dataDir := cmd.Flag("data-dir", "Data directory in which to cache blocks and process downsamplings."). Default("./data").String() - bucketConfFile := cmd.Flag("objstore.config-file", "The object store configuration file path."). - PlaceHolder("").Required().String() + objStoreConfig := regCommonObjStoreFlags(cmd, "") m[name] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ bool) error { - return runDownsample(g, logger, reg, *dataDir, *bucketConfFile, name) + return runDownsample(g, logger, reg, *dataDir, objStoreConfig, name) } } @@ -45,11 +44,15 @@ func runDownsample( logger log.Logger, reg *prometheus.Registry, dataDir string, - bucketConfFile string, + objStoreConfig *pathOrContent, component string, ) error { + bucketConfig, err := objStoreConfig.Content() + if err != nil { + return err + } - bkt, err := client.NewBucket(logger, bucketConfFile, reg, component) + bkt, err := client.NewBucket(logger, bucketConfig, reg, component) if err != nil { return err } diff --git a/cmd/thanos/flags.go b/cmd/thanos/flags.go index 30d21875511..4029428305f 100644 --- a/cmd/thanos/flags.go +++ b/cmd/thanos/flags.go @@ -2,6 +2,7 @@ package main import ( "fmt" + "io/ioutil" "net" "strconv" "strings" @@ -16,14 +17,25 @@ import ( "gopkg.in/alecthomas/kingpin.v2" ) -func regCommonServerFlags(cmd *kingpin.CmdClause) (*string, *string, func(log.Logger, *prometheus.Registry, bool, string, bool) (*cluster.Peer, error)) { - grpcBindAddr := cmd.Flag("grpc-address", "Listen ip:port address for gRPC endpoints (StoreAPI). Make sure this address is routable from other components if you use gossip, 'grpc-advertise-address' is empty and you require cross-node connection."). +func regCommonServerFlags(cmd *kingpin.CmdClause) ( + grpcBindAddr *string, + httpBindAddr *string, + grpcTLSSrvCert *string, + grpcTLSSrvKey *string, + grpcTLSSrvClientCA *string, + peerFunc func(log.Logger, *prometheus.Registry, bool, string, bool) (*cluster.Peer, error)) { + + grpcBindAddr = cmd.Flag("grpc-address", "Listen ip:port address for gRPC endpoints (StoreAPI). Make sure this address is routable from other components if you use gossip, 'grpc-advertise-address' is empty and you require cross-node connection."). Default("0.0.0.0:10901").String() grpcAdvertiseAddr := cmd.Flag("grpc-advertise-address", "Explicit (external) host:port address to advertise for gRPC StoreAPI in gossip cluster. If empty, 'grpc-address' will be used."). String() - httpBindAddr := regHTTPAddrFlag(cmd) + grpcTLSSrvCert = cmd.Flag("grpc-server-tls-cert", "TLS Certificate for gRPC server, leave blank to disable TLS").Default("").String() + grpcTLSSrvKey = cmd.Flag("grpc-server-tls-key", "TLS Key for the gRPC server, leave blank to disable TLS").Default("").String() + grpcTLSSrvClientCA = cmd.Flag("grpc-server-tls-client-ca", "TLS CA to verify clients against. If no client CA is specified, there is no client verification on server side. (tls.NoClientCert)").Default("").String() + + httpBindAddr = regHTTPAddrFlag(cmd) clusterBindAddr := cmd.Flag("cluster.address", "Listen ip:port address for gossip cluster."). Default("0.0.0.0:10900").String() @@ -53,6 +65,9 @@ func regCommonServerFlags(cmd *kingpin.CmdClause) (*string, *string, func(log.Lo return grpcBindAddr, httpBindAddr, + grpcTLSSrvCert, + grpcTLSSrvKey, + grpcTLSSrvClientCA, func(logger log.Logger, reg *prometheus.Registry, waitIfEmpty bool, httpAdvertiseAddr string, queryAPIEnabled bool) (*cluster.Peer, error) { host, port, err := cluster.CalculateAdvertiseAddress(*grpcBindAddr, *grpcAdvertiseAddr) if err != nil { @@ -110,3 +125,46 @@ func modelDuration(flags *kingpin.FlagClause) *model.Duration { return value } + +type pathOrContent struct { + name string + + path *string + content *string +} + +func (p *pathOrContent) Content() ([]byte, error) { + if len(*p.path) > 0 && len(*p.content) > 0 { + return nil, errors.Errorf("Both file and content are set for %s", p.name) + } + + if len(*p.path) > 0 { + c, err := ioutil.ReadFile(*p.path) + if err != nil { + return nil, errors.Wrap(err, fmt.Sprintf("loading YAML file %s for %s", *p.path, p.name)) + } + return c, nil + } + + if len(*p.content) > 0 { + return []byte(*p.content), nil + } + + return nil, nil +} + +func regCommonObjStoreFlags(cmd *kingpin.CmdClause, suffix string) *pathOrContent { + fileFlagName := fmt.Sprintf("objstore%s.config-file", suffix) + bucketConfFile := cmd.Flag(fileFlagName, fmt.Sprintf("Path to YAML file that contains object store%s configuration.", suffix)). + PlaceHolder("").String() + + bucketConf := cmd.Flag(fmt.Sprintf("objstore%s.config", suffix), fmt.Sprintf("Alternative to '%s' flag. Object store%s configuration in YAML.", fileFlagName, suffix)). + PlaceHolder("").String() + + return &pathOrContent{ + name: fmt.Sprintf("objstore%s.config", suffix), + + path: bucketConfFile, + content: bucketConf, + } +} diff --git a/cmd/thanos/main.go b/cmd/thanos/main.go index cadbe53e590..a3f7df6caa9 100644 --- a/cmd/thanos/main.go +++ b/cmd/thanos/main.go @@ -2,7 +2,10 @@ package main import ( "context" + "crypto/tls" + "crypto/x509" "fmt" + "io/ioutil" "math" "net" "net/http" @@ -32,6 +35,7 @@ import ( "github.com/prometheus/common/version" "google.golang.org/grpc" "google.golang.org/grpc/codes" + "google.golang.org/grpc/credentials" "google.golang.org/grpc/status" kingpin "gopkg.in/alecthomas/kingpin.v2" ) @@ -114,8 +118,9 @@ func main() { fmt.Fprintln(os.Stderr, errors.Wrapf(err, "%s command failed", cmd)) os.Exit(1) } - _, err = gmetrics.NewGlobal(gmetrics.DefaultConfig(cmd), sink) - if err != nil { + gmetricsConfig := gmetrics.DefaultConfig("thanos_" + cmd) + gmetricsConfig.EnableRuntimeMetrics = false + if _, err = gmetrics.NewGlobal(gmetricsConfig, sink); err != nil { fmt.Fprintln(os.Stderr, errors.Wrapf(err, "%s command failed", cmd)) os.Exit(1) } @@ -130,6 +135,11 @@ func main() { var closeFn func() error tracer, closeFn = tracing.NewOptionalGCloudTracer(ctx, logger, *gcloudTraceProject, *gcloudTraceSampleFactor, *debugName) + // This is bad, but Prometheus does not support any other tracer injections than just global one. + // TODO(bplotka): Work with basictracer to handle gracefully tracker mismatches, and also with Prometheus to allow + // tracer injection. + opentracing.SetGlobalTracer(tracer) + ctx, cancel := context.WithCancel(ctx) g.Add(func() error { <-ctx.Done() @@ -196,7 +206,7 @@ func registerMetrics(mux *http.ServeMux, g prometheus.Gatherer) { // - request histogram // - tracing // - panic recovery with panic counter -func defaultGRPCServerOpts(logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer) []grpc.ServerOption { +func defaultGRPCServerOpts(logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, cert, key, clientCA string) ([]grpc.ServerOption, error) { met := grpc_prometheus.NewServerMetrics() met.EnableHandlingTimeHistogram( grpc_prometheus.WithHistogramBuckets([]float64{ @@ -214,7 +224,7 @@ func defaultGRPCServerOpts(logger log.Logger, reg *prometheus.Registry, tracer o return status.Errorf(codes.Internal, "%s", p) } reg.MustRegister(met, panicsTotal) - return []grpc.ServerOption{ + opts := []grpc.ServerOption{ grpc.MaxSendMsgSize(math.MaxInt32), grpc_middleware.WithUnaryServerChain( met.UnaryServerInterceptor(), @@ -227,6 +237,50 @@ func defaultGRPCServerOpts(logger log.Logger, reg *prometheus.Registry, tracer o grpc_recovery.StreamServerInterceptor(grpc_recovery.WithRecoveryHandler(grpcPanicRecoveryHandler)), ), } + + if key == "" && cert == "" { + if clientCA != "" { + return nil, errors.New("when a client CA is used a server key and certificate must also be provided") + } + + level.Info(logger).Log("msg", "disabled TLS, key and cert must be set to enable") + return opts, nil + } + + if key == "" || cert == "" { + return nil, errors.New("both server key and certificate must be provided") + } + + tlsCfg := &tls.Config{ + MinVersion: tls.VersionTLS12, + } + + tlsCert, err := tls.LoadX509KeyPair(cert, key) + if err != nil { + return nil, errors.Wrap(err, "server credentials") + } + + level.Info(logger).Log("msg", "enabled gRPC server side TLS") + + tlsCfg.Certificates = []tls.Certificate{tlsCert} + + if clientCA != "" { + caPEM, err := ioutil.ReadFile(clientCA) + if err != nil { + return nil, errors.Wrap(err, "reading client CA") + } + + certPool := x509.NewCertPool() + if !certPool.AppendCertsFromPEM(caPEM) { + return nil, errors.Wrap(err, "building client CA") + } + tlsCfg.ClientCAs = certPool + tlsCfg.ClientAuth = tls.RequireAndVerifyClientCert + + level.Info(logger).Log("msg", "gRPC server TLS client verification enabled") + } + + return append(opts, grpc.Creds(credentials.NewTLS(tlsCfg))), nil } // metricHTTPListenGroup is a run.Group that servers HTTP endpoint with only Prometheus metrics. diff --git a/cmd/thanos/query.go b/cmd/thanos/query.go index ec887f92af8..6a61ddc2559 100644 --- a/cmd/thanos/query.go +++ b/cmd/thanos/query.go @@ -2,7 +2,10 @@ package main import ( "context" + "crypto/tls" + "crypto/x509" "fmt" + "io/ioutil" "math" "net" "net/http" @@ -13,6 +16,7 @@ import ( "github.com/grpc-ecosystem/go-grpc-middleware" "github.com/grpc-ecosystem/go-grpc-prometheus" "github.com/improbable-eng/thanos/pkg/cluster" + "github.com/improbable-eng/thanos/pkg/discovery/cache" "github.com/improbable-eng/thanos/pkg/query" "github.com/improbable-eng/thanos/pkg/query/api" "github.com/improbable-eng/thanos/pkg/runutil" @@ -25,9 +29,12 @@ import ( "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/route" + "github.com/prometheus/prometheus/discovery/file" + "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/promql" "github.com/prometheus/tsdb/labels" "google.golang.org/grpc" + "google.golang.org/grpc/credentials" "gopkg.in/alecthomas/kingpin.v2" ) @@ -35,11 +42,17 @@ import ( func registerQuery(m map[string]setupFunc, app *kingpin.Application, name string) { cmd := app.Command(name, "query node exposing PromQL enabled Query API with data retrieved from multiple store nodes") - grpcBindAddr, httpBindAddr, newPeerFn := regCommonServerFlags(cmd) + grpcBindAddr, httpBindAddr, srvCert, srvKey, srvClientCA, newPeerFn := regCommonServerFlags(cmd) httpAdvertiseAddr := cmd.Flag("http-advertise-address", "Explicit (external) host:port address to advertise for HTTP QueryAPI in gossip cluster. If empty, 'http-address' will be used."). String() + secure := cmd.Flag("grpc-client-tls-secure", "Use TLS when talking to the gRPC server").Default("false").Bool() + cert := cmd.Flag("grpc-client-tls-cert", "TLS Certificates to use to identify this client to the server").Default("").String() + key := cmd.Flag("grpc-client-tls-key", "TLS Key for the client's certificate").Default("").String() + caCert := cmd.Flag("grpc-client-tls-ca", "TLS CA Certificates to use to verify gRPC servers").Default("").String() + serverName := cmd.Flag("grpc-client-server-name", "Server name to verify the hostname on the returned gRPC certificates. See https://tools.ietf.org/html/rfc4366#section-3.1").Default("").String() + queryTimeout := modelDuration(cmd.Flag("query.timeout", "Maximum time to process query by query node."). Default("2m")) @@ -55,6 +68,12 @@ func registerQuery(m map[string]setupFunc, app *kingpin.Application, name string stores := cmd.Flag("store", "Addresses of statically configured store API servers (repeatable)."). PlaceHolder("").Strings() + fileSDFiles := cmd.Flag("store.sd-files", "Path to files that contain addresses of store API servers. The path can be a glob pattern (repeatable)."). + PlaceHolder("").Strings() + + fileSDInterval := modelDuration(cmd.Flag("store.sd-interval", "Refresh interval to re-read file SD files. It is used as a resync fallback."). + Default("5m")) + enableAutodownsampling := cmd.Flag("query.auto-downsampling", "Enable automatic adjustment (step / 5) to what source of data should be used in store gateways if no max_source_resolution param is specified. "). Default("false").Bool() @@ -77,12 +96,29 @@ func registerQuery(m map[string]setupFunc, app *kingpin.Application, name string lookupStores[s] = struct{}{} } + var fileSD *file.Discovery + if len(*fileSDFiles) > 0 { + conf := &file.SDConfig{ + Files: *fileSDFiles, + RefreshInterval: *fileSDInterval, + } + fileSD = file.NewDiscovery(conf, logger) + } + return runQuery( g, logger, reg, tracer, *grpcBindAddr, + *srvCert, + *srvKey, + *srvClientCA, + *secure, + *cert, + *key, + *caCert, + *serverName, *httpBindAddr, *maxConcurrentQueries, time.Duration(*queryTimeout), @@ -91,11 +127,12 @@ func registerQuery(m map[string]setupFunc, app *kingpin.Application, name string selectorLset, *stores, *enableAutodownsampling, + fileSD, ) } } -func storeClientGRPCOpts(reg *prometheus.Registry, tracer opentracing.Tracer) []grpc.DialOption { +func storeClientGRPCOpts(logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, secure bool, cert, key, caCert string, serverName string) ([]grpc.DialOption, error) { grpcMets := grpc_prometheus.NewClientMetrics() grpcMets.EnableClientHandlingTimeHistogram( grpc_prometheus.WithHistogramBuckets([]float64{ @@ -108,7 +145,6 @@ func storeClientGRPCOpts(reg *prometheus.Registry, tracer opentracing.Tracer) [] // Current limit is ~2GB. // TODO(bplotka): Split sent chunks on store node per max 4MB chunks if needed. grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(math.MaxInt32)), - grpc.WithInsecure(), grpc.WithUnaryInterceptor( grpc_middleware.ChainUnaryClient( grpcMets.UnaryClientInterceptor(), @@ -127,7 +163,54 @@ func storeClientGRPCOpts(reg *prometheus.Registry, tracer opentracing.Tracer) [] reg.MustRegister(grpcMets) } - return dialOpts + if !secure { + return append(dialOpts, grpc.WithInsecure()), nil + } + + level.Info(logger).Log("msg", "Enabling client to server TLS") + + var certPool *x509.CertPool + + if caCert != "" { + caPEM, err := ioutil.ReadFile(caCert) + if err != nil { + return nil, errors.Wrap(err, "reading client CA") + } + + certPool = x509.NewCertPool() + if !certPool.AppendCertsFromPEM(caPEM) { + return nil, errors.Wrap(err, "building client CA") + } + level.Info(logger).Log("msg", "TLS Client using provided certificate pool") + } else { + var err error + certPool, err = x509.SystemCertPool() + if err != nil { + return nil, errors.Wrap(err, "reading system certificate pool") + } + level.Info(logger).Log("msg", "TLS Client using system certificate pool") + } + + tlsCfg := &tls.Config{ + RootCAs: certPool, + } + + if serverName != "" { + tlsCfg.ServerName = serverName + } + + if cert != "" { + cert, err := tls.LoadX509KeyPair(cert, key) + if err != nil { + return nil, errors.Wrap(err, "client credentials") + } + tlsCfg.Certificates = []tls.Certificate{cert} + level.Info(logger).Log("msg", "TLS Client authentication enabled") + } + + creds := credentials.NewTLS(tlsCfg) + + return append(dialOpts, grpc.WithTransportCredentials(creds)), nil } // runQuery starts a server that exposes PromQL Query API. It is responsible for querying configured @@ -138,6 +221,14 @@ func runQuery( reg *prometheus.Registry, tracer opentracing.Tracer, grpcBindAddr string, + srvCert string, + srvKey string, + srvClientCA string, + secure bool, + cert string, + key string, + caCert string, + serverName string, httpBindAddr string, maxConcurrentQueries int, queryTimeout time.Duration, @@ -146,7 +237,14 @@ func runQuery( selectorLset labels.Labels, storeAddrs []string, enableAutodownsampling bool, + fileSD *file.Discovery, ) error { + duplicatedStores := prometheus.NewCounter(prometheus.CounterOpts{ + Name: "thanos_query_duplicated_store_address", + Help: "The number of times a duplicated store addresses is detected from the different configs in query", + }) + reg.MustRegister(duplicatedStores) + var staticSpecs []query.StoreSpec for _, addr := range storeAddrs { if addr == "" { @@ -155,13 +253,23 @@ func runQuery( staticSpecs = append(staticSpecs, query.NewGRPCStoreSpec(addr)) } + + dialOpts, err := storeClientGRPCOpts(logger, reg, tracer, secure, cert, key, caCert, serverName) + if err != nil { + return errors.Wrap(err, "building gRPC client") + } + + fileSDCache := cache.New() + var ( stores = query.NewStoreSet( logger, reg, func() (specs []query.StoreSpec) { + // Add store specs from static flags. specs = append(staticSpecs) + // Add store specs from gossip. for id, ps := range peer.PeerStates(cluster.PeerTypesStoreAPIs()...) { if ps.StoreAPIAddr == "" { level.Error(logger).Log("msg", "Gossip found peer that propagates empty address, ignoring.", "lset", fmt.Sprintf("%v", ps.Metadata.Labels)) @@ -170,9 +278,17 @@ func runQuery( specs = append(specs, &gossipSpec{id: id, addr: ps.StoreAPIAddr, peer: peer}) } + + // Add store specs from file SD. + for _, addr := range fileSDCache.Addresses() { + specs = append(specs, query.NewGRPCStoreSpec(addr)) + } + + specs = removeDuplicateStoreSpecs(logger, duplicatedStores, specs) + return specs }, - storeClientGRPCOpts(reg, tracer), + dialOpts, ) proxy = store.NewProxyStore(logger, func(context.Context) ([]store.Client, error) { return stores.Get(), nil @@ -193,6 +309,40 @@ func runQuery( stores.Close() }) } + // Run File Service Discovery and update the store set when the files are modified. + if fileSD != nil { + var fileSDUpdates chan []*targetgroup.Group + ctxRun, cancelRun := context.WithCancel(context.Background()) + + fileSDUpdates = make(chan []*targetgroup.Group) + + g.Add(func() error { + fileSD.Run(ctxRun, fileSDUpdates) + return nil + }, func(error) { + cancelRun() + }) + + ctxUpdate, cancelUpdate := context.WithCancel(context.Background()) + g.Add(func() error { + for { + select { + case update := <-fileSDUpdates: + // Discoverers sometimes send nil updates so need to check for it to avoid panics. + if update == nil { + continue + } + fileSDCache.Update(update) + stores.Update(ctxUpdate) + case <-ctxUpdate.Done(): + return nil + } + } + }, func(error) { + cancelUpdate() + close(fileSDUpdates) + }) + } { ctx, cancel := context.WithCancel(context.Background()) g.Add(func() error { @@ -216,6 +366,13 @@ func runQuery( api := v1.NewAPI(logger, reg, engine, queryableCreator, enableAutodownsampling) api.Register(router.WithPrefix("/api/v1"), tracer, logger) + router.Get("/-/healthy", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + if _, err := fmt.Fprintf(w, "Thanos Querier is Healthy.\n"); err != nil { + level.Error(logger).Log("msg", "Could not write health check response.") + } + }) + mux := http.NewServeMux() registerMetrics(mux, reg) registerProfile(mux) @@ -241,7 +398,12 @@ func runQuery( } logger := log.With(logger, "component", "query") - s := grpc.NewServer(defaultGRPCServerOpts(logger, reg, tracer)...) + opts, err := defaultGRPCServerOpts(logger, reg, tracer, srvCert, srvKey, srvClientCA) + if err != nil { + return errors.Wrapf(err, "build gRPC server") + } + + s := grpc.NewServer(opts...) storepb.RegisterStoreServer(s, proxy) g.Add(func() error { @@ -257,6 +419,23 @@ func runQuery( return nil } +func removeDuplicateStoreSpecs(logger log.Logger, duplicatedStores prometheus.Counter, specs []query.StoreSpec) []query.StoreSpec { + set := make(map[string]query.StoreSpec) + for _, spec := range specs { + addr := spec.Addr() + if _, ok := set[addr]; ok { + level.Warn(logger).Log("msg", "Duplicate store address is provided - %v", addr) + duplicatedStores.Inc() + } + set[addr] = spec + } + deduplicated := make([]query.StoreSpec, 0, len(set)) + for _, value := range set { + deduplicated = append(deduplicated, value) + } + return deduplicated +} + type gossipSpec struct { id string addr string diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index dd85b0630cd..40c1f15c7d4 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -24,6 +24,7 @@ import ( "github.com/improbable-eng/thanos/pkg/alert" "github.com/improbable-eng/thanos/pkg/block" "github.com/improbable-eng/thanos/pkg/cluster" + "github.com/improbable-eng/thanos/pkg/discovery/cache" "github.com/improbable-eng/thanos/pkg/objstore/client" "github.com/improbable-eng/thanos/pkg/runutil" "github.com/improbable-eng/thanos/pkg/shipper" @@ -37,6 +38,8 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/prometheus/common/route" + "github.com/prometheus/prometheus/discovery/file" + "github.com/prometheus/prometheus/discovery/targetgroup" promlabels "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/rules" @@ -51,7 +54,7 @@ import ( func registerRule(m map[string]setupFunc, app *kingpin.Application, name string) { cmd := app.Command(name, "ruler evaluating Prometheus rules against given Query nodes, exposing Store API and storing old blocks in bucket") - grpcBindAddr, httpBindAddr, newPeerFn := regCommonServerFlags(cmd) + grpcBindAddr, httpBindAddr, cert, key, clientCA, newPeerFn := regCommonServerFlags(cmd) labelStrs := cmd.Flag("label", "Labels to be applied to all generated metrics (repeated)."). PlaceHolder("=\"\"").Strings() @@ -73,8 +76,16 @@ func registerRule(m map[string]setupFunc, app *kingpin.Application, name string) alertQueryURL := cmd.Flag("alert.query-url", "The external Thanos Query URL that would be set in all alerts 'Source' field").String() - bucketConfFile := cmd.Flag("objstore.config-file", "The object store configuration file path."). - PlaceHolder("").String() + objStoreConfig := regCommonObjStoreFlags(cmd, "") + + queries := cmd.Flag("query", "Addresses of statically configured query API servers (repeatable)."). + PlaceHolder("").Strings() + + fileSDFiles := cmd.Flag("query.sd-files", "Path to file that contain addresses of query peers. The path can be a glob pattern (repeatable)."). + PlaceHolder("").Strings() + + fileSDInterval := modelDuration(cmd.Flag("query.sd-interval", "Refresh interval to re-read file SD files. (used as a fallback)"). + Default("5m")) m[name] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ bool) error { lset, err := parseFlagLabels(*labelStrs) @@ -97,6 +108,25 @@ func registerRule(m map[string]setupFunc, app *kingpin.Application, name string) NoLockfile: true, WALFlushInterval: 30 * time.Second, } + + lookupQueries := map[string]struct{}{} + for _, q := range *queries { + if _, ok := lookupQueries[q]; ok { + return errors.Errorf("Address %s is duplicated for --query flag.", q) + } + + lookupQueries[q] = struct{}{} + } + + var fileSD *file.Discovery + if len(*fileSDFiles) > 0 { + conf := &file.SDConfig{ + Files: *fileSDFiles, + RefreshInterval: *fileSDInterval, + } + fileSD = file.NewDiscovery(conf, logger) + } + return runRule(g, logger, reg, @@ -104,15 +134,20 @@ func registerRule(m map[string]setupFunc, app *kingpin.Application, name string) lset, *alertmgrs, *grpcBindAddr, + *cert, + *key, + *clientCA, *httpBindAddr, time.Duration(*evalInterval), *dataDir, *ruleFiles, peer, - *bucketConfFile, + objStoreConfig, tsdbOpts, name, alertQueryURL, + *queries, + fileSD, ) } } @@ -127,15 +162,20 @@ func runRule( lset labels.Labels, alertmgrURLs []string, grpcBindAddr string, + cert string, + key string, + clientCA string, httpBindAddr string, evalInterval time.Duration, dataDir string, ruleFiles []string, peer *cluster.Peer, - bucketConfFile string, + objStoreConfig *pathOrContent, tsdbOpts *tsdb.Options, component string, alertQueryURL *url.URL, + queryAddrs []string, + fileSD *file.Discovery, ) error { configSuccess := prometheus.NewGauge(prometheus.GaugeOpts{ Name: "thanos_config_last_reload_successful", @@ -145,9 +185,19 @@ func runRule( Name: "thanos_config_last_reload_success_timestamp_seconds", Help: "Timestamp of the last successful configuration reload.", }) - + duplicatedQuery := prometheus.NewCounter(prometheus.CounterOpts{ + Name: "thanos_rule_duplicated_query_address", + Help: "The number of times a duplicated query addresses is detected from the different configs in rule", + }) reg.MustRegister(configSuccess) reg.MustRegister(configSuccessTime) + reg.MustRegister(duplicatedQuery) + + for _, addr := range queryAddrs { + if addr == "" { + return errors.New("static querier address cannot be empty") + } + } db, err := tsdb.Open(dataDir, log.With(logger, "component", "tsdb"), reg, tsdbOpts) if err != nil { @@ -163,9 +213,17 @@ func runRule( }) } + // FileSD query addresses + fileSDCache := cache.New() + // Hit the HTTP query API of query peers in randomized order until we get a result // back or the context get canceled. queryFn := func(ctx context.Context, q string, t time.Time) (promql.Vector, error) { + var addrs []string + // Add addresses from static flag + addrs = append(addrs, queryAddrs...) + + // Add addresses from gossip peers := peer.PeerStates(cluster.PeerTypeQuery) var ids []string for id := range peers { @@ -174,9 +232,19 @@ func runRule( sort.Slice(ids, func(i int, j int) bool { return strings.Compare(ids[i], ids[j]) < 0 }) + for _, id := range ids { + addrs = append(addrs, peers[id].QueryAPIAddr) + } - for _, i := range rand.Perm(len(ids)) { - vec, err := queryPrometheusInstant(ctx, logger, peers[ids[i]].QueryAPIAddr, q, t) + // Add addresses from file sd + for _, addr := range fileSDCache.Addresses() { + addrs = append(addrs, addr) + } + + removeDuplicateQueryAddrs(logger, duplicatedQuery, addrs) + + for _, i := range rand.Perm(len(addrs)) { + vec, err := queryPrometheusInstant(ctx, logger, addrs[i], q, t) if err != nil { return nil, err } @@ -296,6 +364,39 @@ func runRule( cancel() }) } + // Run File Service Discovery and update the query addresses when the files are modified + if fileSD != nil { + var fileSDUpdates chan []*targetgroup.Group + ctxRun, cancelRun := context.WithCancel(context.Background()) + + fileSDUpdates = make(chan []*targetgroup.Group) + + g.Add(func() error { + fileSD.Run(ctxRun, fileSDUpdates) + return nil + }, func(error) { + cancelRun() + }) + + ctxUpdate, cancelUpdate := context.WithCancel(context.Background()) + g.Add(func() error { + for { + select { + case update := <-fileSDUpdates: + // Discoverers sometimes send nil updates so need to check for it to avoid panics + if update == nil { + continue + } + fileSDCache.Update(update) + case <-ctxUpdate.Done(): + return nil + } + } + }, func(error) { + cancelUpdate() + close(fileSDUpdates) + }) + } // Handle reload and termination interrupts. reload := make(chan struct{}, 1) @@ -369,7 +470,11 @@ func runRule( store := store.NewTSDBStore(logger, reg, db, lset) - s := grpc.NewServer(defaultGRPCServerOpts(logger, reg, tracer)...) + opts, err := defaultGRPCServerOpts(logger, reg, tracer, cert, key, clientCA) + if err != nil { + return errors.Wrap(err, "setup gRPC options") + } + s := grpc.NewServer(opts...) storepb.RegisterStoreServer(s, store) g.Add(func() error { @@ -408,9 +513,13 @@ func runRule( var uploads = true + bucketConfig, err := objStoreConfig.Content() + if err != nil { + return err + } // The background shipper continuously scans the data directory and uploads // new blocks to Google Cloud Storage or an S3-compatible storage service. - bkt, err := client.NewBucket(logger, bucketConfFile, reg, component) + bkt, err := client.NewBucket(logger, bucketConfig, reg, component) if err != nil && err != client.ErrNotFound { return err } @@ -636,3 +745,20 @@ func labelsTSDBToProm(lset labels.Labels) (res promlabels.Labels) { } return res } + +func removeDuplicateQueryAddrs(logger log.Logger, duplicatedQueriers prometheus.Counter, addrs []string) []string { + set := make(map[string]struct{}) + for _, addr := range addrs { + if _, ok := set[addr]; ok { + level.Warn(logger).Log("msg", "Duplicate query address is provided - %v", addr) + duplicatedQueriers.Inc() + } + set[addr] = struct{}{} + } + + deduplicated := make([]string, 0, len(set)) + for key := range set { + deduplicated = append(deduplicated, key) + } + return deduplicated +} diff --git a/cmd/thanos/sidecar.go b/cmd/thanos/sidecar.go index 3c78f03597d..cefe17b51f7 100644 --- a/cmd/thanos/sidecar.go +++ b/cmd/thanos/sidecar.go @@ -34,7 +34,7 @@ import ( func registerSidecar(m map[string]setupFunc, app *kingpin.Application, name string) { cmd := app.Command(name, "sidecar for Prometheus server") - grpcBindAddr, httpBindAddr, newPeerFn := regCommonServerFlags(cmd) + grpcBindAddr, httpBindAddr, cert, key, clientCA, newPeerFn := regCommonServerFlags(cmd) promURL := cmd.Flag("prometheus.url", "URL at which to reach Prometheus's API. For better performance use local network."). Default("http://localhost:9090").URL() @@ -50,8 +50,7 @@ func registerSidecar(m map[string]setupFunc, app *kingpin.Application, name stri reloaderRuleDirs := cmd.Flag("reloader.rule-dir", "Rule directories for the reloader to refresh (repeated field).").Strings() - bucketConfFile := cmd.Flag("objstore.config-file", "The object store configuration file path."). - PlaceHolder("").String() + objStoreConfig := regCommonObjStoreFlags(cmd, "") m[name] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ bool) error { rl := reloader.New( @@ -71,10 +70,13 @@ func registerSidecar(m map[string]setupFunc, app *kingpin.Application, name stri reg, tracer, *grpcBindAddr, + *cert, + *key, + *clientCA, *httpBindAddr, *promURL, *dataDir, - *bucketConfFile, + objStoreConfig, peer, rl, name, @@ -88,10 +90,13 @@ func runSidecar( reg *prometheus.Registry, tracer opentracing.Tracer, grpcBindAddr string, + cert string, + key string, + clientCA string, httpBindAddr string, promURL *url.URL, dataDir string, - bucketConfFile string, + objStoreConfig *pathOrContent, peer *cluster.Peer, reloader *reloader.Reloader, component string, @@ -203,7 +208,11 @@ func runSidecar( return errors.Wrap(err, "create Prometheus store") } - s := grpc.NewServer(defaultGRPCServerOpts(logger, reg, tracer)...) + opts, err := defaultGRPCServerOpts(logger, reg, tracer, cert, key, clientCA) + if err != nil { + return errors.Wrap(err, "setup gRPC server") + } + s := grpc.NewServer(opts...) storepb.RegisterStoreServer(s, promStore) g.Add(func() error { @@ -217,9 +226,14 @@ func runSidecar( var uploads = true + bucketConfig, err := objStoreConfig.Content() + if err != nil { + return err + } + // The background shipper continuously scans the data directory and uploads // new blocks to Google Cloud Storage or an S3-compatible storage service. - bkt, err := client.NewBucket(logger, bucketConfFile, reg, component) + bkt, err := client.NewBucket(logger, bucketConfig, reg, component) if err != nil && err != client.ErrNotFound { return err } diff --git a/cmd/thanos/store.go b/cmd/thanos/store.go index 4110736e787..3d509bb5188 100644 --- a/cmd/thanos/store.go +++ b/cmd/thanos/store.go @@ -23,22 +23,24 @@ import ( // registerStore registers a store command. func registerStore(m map[string]setupFunc, app *kingpin.Application, name string) { - cmd := app.Command(name, "store node giving access to blocks in a bucket provider. Now supported GCS / S3.") + cmd := app.Command(name, "store node giving access to blocks in a bucket provider. Now supported GCS, S3, Azure and Swift.") - grpcBindAddr, httpBindAddr, newPeerFn := regCommonServerFlags(cmd) + grpcBindAddr, httpBindAddr, cert, key, clientCA, newPeerFn := regCommonServerFlags(cmd) dataDir := cmd.Flag("data-dir", "Data directory in which to cache remote blocks."). Default("./data").String() - bucketConfFile := cmd.Flag("objstore.config-file", "The object store configuration file path."). - PlaceHolder("").Required().String() - indexCacheSize := cmd.Flag("index-cache-size", "Maximum size of items held in the index cache."). Default("250MB").Bytes() chunkPoolSize := cmd.Flag("chunk-pool-size", "Maximum size of concurrently allocatable bytes for chunks."). Default("2GB").Bytes() + objStoreConfig := regCommonObjStoreFlags(cmd, "") + + syncInterval := cmd.Flag("sync-block-duration", "Repeat interval for syncing the blocks between local and remote view."). + Default("3m").Duration() + m[name] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, debugLogging bool) error { peer, err := newPeerFn(logger, reg, false, "", false) if err != nil { @@ -48,15 +50,19 @@ func registerStore(m map[string]setupFunc, app *kingpin.Application, name string logger, reg, tracer, - *bucketConfFile, + objStoreConfig, *dataDir, *grpcBindAddr, + *cert, + *key, + *clientCA, *httpBindAddr, peer, uint64(*indexCacheSize), uint64(*chunkPoolSize), name, debugLogging, + *syncInterval, ) } } @@ -67,18 +73,27 @@ func runStore( logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, - bucketConfFile string, + objStoreConfig *pathOrContent, dataDir string, grpcBindAddr string, + cert string, + key string, + clientCA string, httpBindAddr string, peer *cluster.Peer, indexCacheSizeBytes uint64, chunkPoolSizeBytes uint64, component string, verbose bool, + syncInterval time.Duration, ) error { { - bkt, err := client.NewBucket(logger, bucketConfFile, reg, component) + bucketConfig, err := objStoreConfig.Content() + if err != nil { + return err + } + + bkt, err := client.NewBucket(logger, bucketConfig, reg, component) if err != nil { return errors.Wrap(err, "create bucket client") } @@ -114,7 +129,7 @@ func runStore( g.Add(func() error { defer runutil.CloseWithLogOnErr(logger, bkt, "bucket client") - err := runutil.Repeat(3*time.Minute, ctx.Done(), func() error { + err := runutil.Repeat(syncInterval, ctx.Done(), func() error { if err := bs.SyncBlocks(ctx); err != nil { level.Warn(logger).Log("msg", "syncing blocks failed", "err", err) } @@ -133,7 +148,12 @@ func runStore( return errors.Wrap(err, "listen API address") } - s := grpc.NewServer(defaultGRPCServerOpts(logger, reg, tracer)...) + opts, err := defaultGRPCServerOpts(logger, reg, tracer, cert, key, clientCA) + if err != nil { + return errors.Wrap(err, "grpc server options") + } + + s := grpc.NewServer(opts...) storepb.RegisterStoreServer(s, bs) g.Add(func() error { diff --git a/docs/components/bucket.md b/docs/components/bucket.md index 5a7c87be787..ef112a7c652 100644 --- a/docs/components/bucket.md +++ b/docs/components/bucket.md @@ -1,7 +1,7 @@ # Bucket The bucket component of Thanos is a set of commands to inspect data in object storage buckets. -It is normally run as a stand alone command to aid with troubleshooting. +It is normally run as a stand alone command to aid with troubleshooting. Example: @@ -26,7 +26,7 @@ by adding a new command within `/cmd/thanos/bucket.go` [embedmd]:# (flags/bucket.txt $) ```$ -usage: thanos bucket --objstore.config-file= [ ...] +usage: thanos bucket [] [ ...] inspect metric data in an object storage bucket @@ -42,8 +42,18 @@ Flags: How often we send traces (1/). If 0 no trace will be sent periodically, unless forced by baggage item. See `pkg/tracing/tracing.go` for details. - --objstore.config-file= - The object store configuration file path. + --objstore.config-file= + Path to YAML file that contains object store + configuration. + --objstore.config= + Alternative to 'objstore.config-file' flag. Object store + configuration in YAML. + --objstore-backup.config-file= + Path to YAML file that contains object store-backup + configuration. + --objstore-backup.config= + Alternative to 'objstore-backup.config-file' flag. + Object store-backup configuration in YAML. Subcommands: bucket verify [] @@ -63,7 +73,7 @@ Example: ``` $ thanos bucket verify --gcs.bucket example-bucket -``` +``` [embedmd]:# (flags/bucket_verify.txt) ```txt @@ -83,11 +93,19 @@ Flags: How often we send traces (1/). If 0 no trace will be sent periodically, unless forced by baggage item. See `pkg/tracing/tracing.go` for details. - --objstore.config-file= - The object store configuration file path. + --objstore.config-file= + Path to YAML file that contains object store + configuration. + --objstore.config= + Alternative to 'objstore.config-file' flag. Object store + configuration in YAML. + --objstore-backup.config-file= + Path to YAML file that contains object store-backup + configuration. + --objstore-backup.config= + Alternative to 'objstore-backup.config-file' flag. + Object store-backup configuration in YAML. -r, --repair attempt to repair blocks for which issues were detected - --objstore-backup.config-file= - The backup object store configuration file path. -i, --issues=index_issue... ... Issues to verify (and optionally repair). Possible values: [duplicated_compaction index_issue @@ -107,7 +125,7 @@ Example: ``` $ thanos bucket ls -o json --gcs.bucket example-bucket -``` +``` [embedmd]:# (flags/bucket_ls.txt) ```txt @@ -127,8 +145,18 @@ Flags: How often we send traces (1/). If 0 no trace will be sent periodically, unless forced by baggage item. See `pkg/tracing/tracing.go` for details. - --objstore.config-file= - The object store configuration file path. + --objstore.config-file= + Path to YAML file that contains object store + configuration. + --objstore.config= + Alternative to 'objstore.config-file' flag. Object store + configuration in YAML. + --objstore-backup.config-file= + Path to YAML file that contains object store-backup + configuration. + --objstore-backup.config= + Alternative to 'objstore-backup.config-file' flag. + Object store-backup configuration in YAML. -o, --output="" Format in which to print each block's information. May be 'json' or custom template. diff --git a/docs/components/compact.md b/docs/components/compact.md index df738f747f2..2ebc75cced2 100644 --- a/docs/components/compact.md +++ b/docs/components/compact.md @@ -26,7 +26,7 @@ On-disk data is safe to delete between restarts and should be the first attempt [embedmd]:# (flags/compact.txt $) ```$ -usage: thanos compact --objstore.config-file= [] +usage: thanos compact [] continuously compacts blocks in an object store bucket @@ -47,8 +47,12 @@ Flags: Listen host:port for HTTP endpoints. --data-dir="./data" Data directory in which to cache blocks and process compactions. - --objstore.config-file= - The object store configuration file path. + --objstore.config-file= + Path to YAML file that contains object store + configuration. + --objstore.config= + Alternative to 'objstore.config-file' flag. Object + store configuration in YAML. --sync-delay=30m Minimum age of fresh (non-compacted) blocks before they are being processed. --retention.resolution-raw=0d diff --git a/docs/components/query.md b/docs/components/query.md index 5d9fa25ea15..e2f011618f6 100644 --- a/docs/components/query.md +++ b/docs/components/query.md @@ -58,6 +58,14 @@ Flags: Explicit (external) host:port address to advertise for gRPC StoreAPI in gossip cluster. If empty, 'grpc-address' will be used. + --grpc-server-tls-cert="" TLS Certificate for gRPC server, leave blank to + disable TLS + --grpc-server-tls-key="" TLS Key for the gRPC server, leave blank to + disable TLS + --grpc-server-tls-client-ca="" + TLS CA to verify clients against. If no client + CA is specified, there is no client + verification on server side. (tls.NoClientCert) --http-address="0.0.0.0:10902" Listen host:port for HTTP endpoints. --cluster.address="0.0.0.0:10900" @@ -98,6 +106,16 @@ Flags: Explicit (external) host:port address to advertise for HTTP QueryAPI in gossip cluster. If empty, 'http-address' will be used. + --grpc-client-tls-secure Use TLS when talking to the gRPC server + --grpc-client-tls-cert="" TLS Certificates to use to identify this client + to the server + --grpc-client-tls-key="" TLS Key for the client's certificate + --grpc-client-tls-ca="" TLS CA Certificates to use to verify gRPC + servers + --grpc-client-server-name="" + Server name to verify the hostname on the + returned gRPC certificates. See + https://tools.ietf.org/html/rfc4366#section-3.1 --query.timeout=2m Maximum time to process query by query node. --query.max-concurrent=20 Maximum number of queries processed concurrently by query node. @@ -111,6 +129,12 @@ Flags: info endpoint (repeated). --store= ... Addresses of statically configured store API servers (repeatable). + --store.sd-files= ... + Path to files that contain addresses of store + API servers. The path can be a glob pattern + (repeatable). + --store.sd-interval=5m Refresh interval to re-read file SD files. It + is used as a resync fallback. --query.auto-downsampling Enable automatic adjustment (step / 5) to what source of data should be used in store gateways if no max_source_resolution param is specified. diff --git a/docs/components/rule.md b/docs/components/rule.md index 02378123974..6bc108ac685 100644 --- a/docs/components/rule.md +++ b/docs/components/rule.md @@ -42,84 +42,104 @@ ruler evaluating Prometheus rules against given Query nodes, exposing Store API and storing old blocks in bucket Flags: - -h, --help Show context-sensitive help (also try - --help-long and --help-man). - --version Show application version. - --log.level=info Log filtering level. + -h, --help Show context-sensitive help (also try + --help-long and --help-man). + --version Show application version. + --log.level=info Log filtering level. --gcloudtrace.project=GCLOUDTRACE.PROJECT - GCP project to send Google Cloud Trace tracings - to. If empty, tracing will be disabled. + GCP project to send Google Cloud Trace tracings + to. If empty, tracing will be disabled. --gcloudtrace.sample-factor=1 - How often we send traces (1/). If - 0 no trace will be sent periodically, unless - forced by baggage item. See - `pkg/tracing/tracing.go` for details. + How often we send traces (1/). + If 0 no trace will be sent periodically, unless + forced by baggage item. See + `pkg/tracing/tracing.go` for details. --grpc-address="0.0.0.0:10901" - Listen ip:port address for gRPC endpoints - (StoreAPI). Make sure this address is routable - from other components if you use gossip, - 'grpc-advertise-address' is empty and you - require cross-node connection. + Listen ip:port address for gRPC endpoints + (StoreAPI). Make sure this address is routable + from other components if you use gossip, + 'grpc-advertise-address' is empty and you + require cross-node connection. --grpc-advertise-address=GRPC-ADVERTISE-ADDRESS - Explicit (external) host:port address to - advertise for gRPC StoreAPI in gossip cluster. - If empty, 'grpc-address' will be used. + Explicit (external) host:port address to + advertise for gRPC StoreAPI in gossip cluster. + If empty, 'grpc-address' will be used. + --grpc-server-tls-cert="" TLS Certificate for gRPC server, leave blank to + disable TLS + --grpc-server-tls-key="" TLS Key for the gRPC server, leave blank to + disable TLS + --grpc-server-tls-client-ca="" + TLS CA to verify clients against. If no client + CA is specified, there is no client + verification on server side. (tls.NoClientCert) --http-address="0.0.0.0:10902" - Listen host:port for HTTP endpoints. + Listen host:port for HTTP endpoints. --cluster.address="0.0.0.0:10900" - Listen ip:port address for gossip cluster. + Listen ip:port address for gossip cluster. --cluster.advertise-address=CLUSTER.ADVERTISE-ADDRESS - Explicit (external) ip:port address to advertise - for gossip in gossip cluster. Used internally - for membership only. + Explicit (external) ip:port address to + advertise for gossip in gossip cluster. Used + internally for membership only. --cluster.peers=CLUSTER.PEERS ... - Initial peers to join the cluster. It can be - either , or . A lookup - resolution is done only at the startup. + Initial peers to join the cluster. It can be + either , or . A lookup + resolution is done only at the startup. --cluster.gossip-interval= - Interval between sending gossip messages. By - lowering this value (more frequent) gossip - messages are propagated across the cluster more - quickly at the expense of increased bandwidth. - Default is used from a specified network-type. + Interval between sending gossip messages. By + lowering this value (more frequent) gossip + messages are propagated across the cluster more + quickly at the expense of increased bandwidth. + Default is used from a specified network-type. --cluster.pushpull-interval= - Interval for gossip state syncs. Setting this - interval lower (more frequent) will increase - convergence speeds across larger clusters at the - expense of increased bandwidth usage. Default is - used from a specified network-type. + Interval for gossip state syncs. Setting this + interval lower (more frequent) will increase + convergence speeds across larger clusters at + the expense of increased bandwidth usage. + Default is used from a specified network-type. --cluster.refresh-interval=1m - Interval for membership to refresh cluster.peers - state, 0 disables refresh. + Interval for membership to refresh + cluster.peers state, 0 disables refresh. --cluster.secret-key=CLUSTER.SECRET-KEY - Initial secret key to encrypt cluster gossip. - Can be one of AES-128, AES-192, or AES-256 in - hexadecimal format. + Initial secret key to encrypt cluster gossip. + Can be one of AES-128, AES-192, or AES-256 in + hexadecimal format. --cluster.network-type=lan - Network type with predefined peers - configurations. Sets of configurations - accounting the latency differences between - network types: local, lan, wan. + Network type with predefined peers + configurations. Sets of configurations + accounting the latency differences between + network types: local, lan, wan. --label=="" ... - Labels to be applied to all generated metrics - (repeated). - --data-dir="data/" data directory - --rule-file=rules/ ... Rule files that should be used by rule manager. - Can be in glob format (repeated). - --eval-interval=30s The default evaluation interval to use. - --tsdb.block-duration=2h Block duration for TSDB block. - --tsdb.retention=48h Block retention time on local disk. + Labels to be applied to all generated metrics + (repeated). + --data-dir="data/" data directory + --rule-file=rules/ ... Rule files that should be used by rule manager. + Can be in glob format (repeated). + --eval-interval=30s The default evaluation interval to use. + --tsdb.block-duration=2h Block duration for TSDB block. + --tsdb.retention=48h Block retention time on local disk. --alertmanagers.url=ALERTMANAGERS.URL ... - Alertmanager URLs to push firing alerts to. The - scheme may be prefixed with 'dns+' or 'dnssrv+' - to detect Alertmanager IPs through respective - DNS lookups. The port defaults to 9093 or the - SRV record's value. The URL path is used as a - prefix for the regular Alertmanager API path. + Alertmanager URLs to push firing alerts to. The + scheme may be prefixed with 'dns+' or 'dnssrv+' + to detect Alertmanager IPs through respective + DNS lookups. The port defaults to 9093 or the + SRV record's value. The URL path is used as a + prefix for the regular Alertmanager API path. --alert.query-url=ALERT.QUERY-URL - The external Thanos Query URL that would be set - in all alerts 'Source' field - --objstore.config-file= - The object store configuration file path. + The external Thanos Query URL that would be set + in all alerts 'Source' field + --objstore.config-file= + Path to YAML file that contains object store + configuration. + --objstore.config= + Alternative to 'objstore.config-file' flag. + Object store configuration in YAML. + --query= ... Addresses of statically configured query API + servers (repeatable). + --query.sd-files= ... + Path to file that contain addresses of query + peers. The path can be a glob pattern + (repeatable). + --query.sd-interval=5m Refresh interval to re-read file SD files. + (used as a fallback) ``` diff --git a/docs/components/sidecar.md b/docs/components/sidecar.md index dbd92bc0880..7aaf9b68d92 100644 --- a/docs/components/sidecar.md +++ b/docs/components/sidecar.md @@ -60,6 +60,14 @@ Flags: Explicit (external) host:port address to advertise for gRPC StoreAPI in gossip cluster. If empty, 'grpc-address' will be used. + --grpc-server-tls-cert="" TLS Certificate for gRPC server, leave blank to + disable TLS + --grpc-server-tls-key="" TLS Key for the gRPC server, leave blank to + disable TLS + --grpc-server-tls-client-ca="" + TLS CA to verify clients against. If no client + CA is specified, there is no client + verification on server side. (tls.NoClientCert) --http-address="0.0.0.0:10902" Listen host:port for HTTP endpoints. --cluster.address="0.0.0.0:10900" @@ -107,8 +115,12 @@ Flags: --reloader.rule-dir=RELOADER.RULE-DIR ... Rule directories for the reloader to refresh (repeated field). - --objstore.config-file= - The object store configuration file path. + --objstore.config-file= + Path to YAML file that contains object store + configuration. + --objstore.config= + Alternative to 'objstore.config-file' flag. + Object store configuration in YAML. ``` diff --git a/docs/components/store.md b/docs/components/store.md index eae9af5437c..e9f2115c8fd 100644 --- a/docs/components/store.md +++ b/docs/components/store.md @@ -25,74 +25,89 @@ In general about 1MB of local disk space is required per TSDB block stored in th [embedmd]:# (flags/store.txt $) ```$ -usage: thanos store --objstore.config-file= [] +usage: thanos store [] -store node giving access to blocks in a bucket provider. Now supported GCS / S3. +store node giving access to blocks in a bucket provider. Now supported GCS, S3, +Azure and Swift. Flags: - -h, --help Show context-sensitive help (also try - --help-long and --help-man). - --version Show application version. - --log.level=info Log filtering level. + -h, --help Show context-sensitive help (also try + --help-long and --help-man). + --version Show application version. + --log.level=info Log filtering level. --gcloudtrace.project=GCLOUDTRACE.PROJECT - GCP project to send Google Cloud Trace tracings - to. If empty, tracing will be disabled. + GCP project to send Google Cloud Trace tracings + to. If empty, tracing will be disabled. --gcloudtrace.sample-factor=1 - How often we send traces (1/). If - 0 no trace will be sent periodically, unless - forced by baggage item. See - `pkg/tracing/tracing.go` for details. + How often we send traces (1/). + If 0 no trace will be sent periodically, unless + forced by baggage item. See + `pkg/tracing/tracing.go` for details. --grpc-address="0.0.0.0:10901" - Listen ip:port address for gRPC endpoints - (StoreAPI). Make sure this address is routable - from other components if you use gossip, - 'grpc-advertise-address' is empty and you - require cross-node connection. + Listen ip:port address for gRPC endpoints + (StoreAPI). Make sure this address is routable + from other components if you use gossip, + 'grpc-advertise-address' is empty and you + require cross-node connection. --grpc-advertise-address=GRPC-ADVERTISE-ADDRESS - Explicit (external) host:port address to - advertise for gRPC StoreAPI in gossip cluster. - If empty, 'grpc-address' will be used. + Explicit (external) host:port address to + advertise for gRPC StoreAPI in gossip cluster. + If empty, 'grpc-address' will be used. + --grpc-server-tls-cert="" TLS Certificate for gRPC server, leave blank to + disable TLS + --grpc-server-tls-key="" TLS Key for the gRPC server, leave blank to + disable TLS + --grpc-server-tls-client-ca="" + TLS CA to verify clients against. If no client + CA is specified, there is no client + verification on server side. (tls.NoClientCert) --http-address="0.0.0.0:10902" - Listen host:port for HTTP endpoints. + Listen host:port for HTTP endpoints. --cluster.address="0.0.0.0:10900" - Listen ip:port address for gossip cluster. + Listen ip:port address for gossip cluster. --cluster.advertise-address=CLUSTER.ADVERTISE-ADDRESS - Explicit (external) ip:port address to advertise - for gossip in gossip cluster. Used internally - for membership only. + Explicit (external) ip:port address to + advertise for gossip in gossip cluster. Used + internally for membership only. --cluster.peers=CLUSTER.PEERS ... - Initial peers to join the cluster. It can be - either , or . A lookup - resolution is done only at the startup. + Initial peers to join the cluster. It can be + either , or . A lookup + resolution is done only at the startup. --cluster.gossip-interval= - Interval between sending gossip messages. By - lowering this value (more frequent) gossip - messages are propagated across the cluster more - quickly at the expense of increased bandwidth. - Default is used from a specified network-type. + Interval between sending gossip messages. By + lowering this value (more frequent) gossip + messages are propagated across the cluster more + quickly at the expense of increased bandwidth. + Default is used from a specified network-type. --cluster.pushpull-interval= - Interval for gossip state syncs. Setting this - interval lower (more frequent) will increase - convergence speeds across larger clusters at the - expense of increased bandwidth usage. Default is - used from a specified network-type. + Interval for gossip state syncs. Setting this + interval lower (more frequent) will increase + convergence speeds across larger clusters at + the expense of increased bandwidth usage. + Default is used from a specified network-type. --cluster.refresh-interval=1m - Interval for membership to refresh cluster.peers - state, 0 disables refresh. + Interval for membership to refresh + cluster.peers state, 0 disables refresh. --cluster.secret-key=CLUSTER.SECRET-KEY - Initial secret key to encrypt cluster gossip. - Can be one of AES-128, AES-192, or AES-256 in - hexadecimal format. + Initial secret key to encrypt cluster gossip. + Can be one of AES-128, AES-192, or AES-256 in + hexadecimal format. --cluster.network-type=lan - Network type with predefined peers - configurations. Sets of configurations - accounting the latency differences between - network types: local, lan, wan. - --data-dir="./data" Data directory in which to cache remote blocks. - --objstore.config-file= - The object store configuration file path. - --index-cache-size=250MB Maximum size of items held in the index cache. - --chunk-pool-size=2GB Maximum size of concurrently allocatable bytes - for chunks. + Network type with predefined peers + configurations. Sets of configurations + accounting the latency differences between + network types: local, lan, wan. + --data-dir="./data" Data directory in which to cache remote blocks. + --index-cache-size=250MB Maximum size of items held in the index cache. + --chunk-pool-size=2GB Maximum size of concurrently allocatable bytes + for chunks. + --objstore.config-file= + Path to YAML file that contains object store + configuration. + --objstore.config= + Alternative to 'objstore.config-file' flag. + Object store configuration in YAML. + --sync-block-duration=3m Repeat interval for syncing the blocks between + local and remote view. ``` diff --git a/docs/design.md b/docs/design.md index 07ff088e359..3e21e43604a 100644 --- a/docs/design.md +++ b/docs/design.md @@ -175,7 +175,7 @@ The cost for this amount of metric data would cost approximately $2400/month on In return, being able to reduce the retention time of Prometheus instances from weeks to hours will provide cost savings for local SSD or network block storage (typically $0.17/GB) and reduce memory consumption. This calculation does not yet account for shorter retention spans of low-priority data and downsampling. -[tsdb-format]: https://github.com/prometheus/tsdb/tree/master/Documentation/format +[tsdb-format]: https://github.com/prometheus/tsdb/tree/master/docs/format [tsdb-talk]: https://www.slideshare.net/FabianReinartz/storing-16-bytes-at-scale-81282712 [tsdb-lib]: https://godoc.org/github.com/prometheus/tsdb [promql-lib]: https://godoc.org/github.com/prometheus/prometheus/promql diff --git a/docs/getting_started.md b/docs/getting_started.md index 5a119a7031a..04c191f048e 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -3,7 +3,7 @@ Thanos provides a global query view, data backup, and historical data access as its core features in a single binary. All three features can be run independently of each other. This allows you to have a subset of Thanos features ready for immediate benefit or testing, while also making it flexible for gradual roll outs in more complex environments. In this quick-start guide, we will configure Thanos and all components mentioned to work against a Google Cloud Storage bucket. -At the moment, Thanos is able to use [GCS and S3 as storage providers](storage.md), with the ability to add more providers as necessary. You can substitute Google Cloud specific flags in this guide with those of your object store detailed in the [Storage document](storage.md). +At the moment, Thanos is able to use [GCS, S3 and SWIFT as storage providers](storage.md), with the ability to add more providers as necessary. You can substitute Google Cloud specific flags in this guide with those of your object store detailed in the [Storage document](storage.md). ## Requirements @@ -13,7 +13,10 @@ At the moment, Thanos is able to use [GCS and S3 as storage providers](storage.m ## Get Thanos! -Thanos has no official releases yet. With a working installation of the Go [toolchain](https://github.com/golang/tools) (`GOPATH`, `PATH=${GOPATH}/bin:${PATH}`), Thanos can be downloaded and built by running: +You can find the latest Thanos release [here](https://github.com/improbable-eng/thanos/releases). + +If you want to build Thanos from source - +with a working installation of the Go [toolchain](https://github.com/golang/tools) (`GOPATH`, `PATH=${GOPATH}/bin:${PATH}`), Thanos can be downloaded and built by running: ``` go get -d github.com/improbable-eng/thanos/... diff --git a/docs/img/slack.png b/docs/img/slack.png deleted file mode 100644 index 8299809219b..00000000000 Binary files a/docs/img/slack.png and /dev/null differ diff --git a/docs/proposals/gossip-removal.md b/docs/proposals/approved/201809_gossip-removal.md similarity index 100% rename from docs/proposals/gossip-removal.md rename to docs/proposals/approved/201809_gossip-removal.md diff --git a/docs/proposals/rejected/201807_store_instance_high_availability.md b/docs/proposals/rejected/201807_store_instance_high_availability.md new file mode 100644 index 00000000000..03d8971ad06 --- /dev/null +++ b/docs/proposals/rejected/201807_store_instance_high_availability.md @@ -0,0 +1,164 @@ +# High-availability for store instances + +Status: draft | in-review | **rejected** | accepted | complete + +Proposal author: [@mattbostock](https://github.com/mattbostock) +Implementation owner: [@mattbostock](https://github.com/mattbostock) + +## Status: Rejected + +This proposal makes total sense and solves our goals when using gossip. However there exists a very easy solution +to this problem in form of using just static entry with any loadbalancer like Kubernetes Service to load balance +through different Store Gateways. Those are technically stateless, so request can fetch the data independently. + +## Motivation + +Thanos store instances currently have no explicit support for +high-availability; query instances treat all store instances equally. If +multiple store instances are used as gateways to a single bucket in an object +store, Thanos query instances will wait for all instances to respond (subject +to timeouts) before returning a response. + +## Goals + +- Explicitly support and document high availability for store instances. + +- Reduce the query latency incurred by failing store instances when other store + instances could return the same response faster. + +## Proposal + +Thanos supports deduplication of metrics retrieved from multiple Prometheus +servers to avoid gaps in query responses where a single Prometheus server +failed but similar data was recorded by another Prometheus server in the same +failure domain. To support deduplication, Thanos must wait for all Thanos +sidecar servers to return their data (subject to timeouts) before returning a +response to a client. + +When retrieving data from Thanos bucket store instances, however, the desired +behaviour is different; we want Thanos use the first successful response it +receives, on the assumption that all bucket store instances that communicate +with the same bucket have access to the same data. + +To support the desired behaviour for bucket store instances while still +allowing for deduplication, we propose to expand the [InfoResponse +Protobuf](https://github.com/improbable-eng/thanos/blob/b67aa3a709062be97215045f7488df67a9af2c66/pkg/store/storepb/rpc.proto#L28-L32) +used by the Store API by adding two fields: + +- a string identifier that can be used to group store instances + +- an enum representing the [peer type as defined in the cluster + package](https://github.com/improbable-eng/thanos/blob/673614d9310f3f90fdb4585ca6201496ff92c697/pkg/cluster/cluster.go#L51-L64) + +For example; + +```diff +--- before 2018-07-02 15:49:09.000000000 +0100 ++++ after 2018-07-02 15:49:13.000000000 +0100 +@@ -1,5 +1,6 @@ + message InfoResponse { + repeated Label labels = 1 [(gogoproto.nullable) = false]; + int64 min_time = 2; + int64 max_time = 3; ++ string store_group_id = 4; ++ enum PeerType { ++ STORE = 0; ++ SOURCE = 1; ++ QUERY = 2; ++ } ++ PeerType peer_type = 5; + } +``` + +For the purpose of querying data from store instances, stores instance will be +grouped by: + +- labels, as returned as part of `InfoResponse` +- the new `store_group_id` string identifier + +Therefore, stores having identical sets of labels and identical values for +`store_group_id` will belong in the same group for the purpose of querying +data. Stores having an empty `store_group_id` field and matching labels will be +considered to be part of the same group. Stores having an empty +`store_group_id` field and empty label sets will also be considered part of the +same group. + +If a service implementing the store API (a 'store instance') has a `STORE` or +`QUERY` peer type, query instances will treat each store instance in the same +group as having access to the same data. Query instances will randomly pick any +two store instances[1][] from the same group and use the first response +returned. + +[1]: https://www.eecs.harvard.edu/~michaelm/postscripts/mythesis.pdf + +Otherwise, for the `SOURCE` peer type, query instances will wait for all +instances within the same group to respond (subject to existing timeouts) +before returning a response, consistent with the current behaviour. This is +necessary to collect all data available for the purposes of deduplication and +to fill gaps in data where an individual Prometheus server failed to ingest +data for a period of time. + +Each service implementing the store API must determine what value the +`store_group_id` should return. For bucket stores, `store_group_id` should +contain the concatenation of the object store URL and bucket name. For all +other existing services implementing the store API, we will use an empty string +for `store_group_id` until a reason exists to use it. + +Multiple buckets or object stores will be supported by setting the +`store_group_id`. + +Existing instances running older versions of Thanos will be assumed to have +an empty string for `store_group_id` and a `SOURCE` peer type, which will +retain existing behaviour when awaiting responses. + +### Scope + +Horizontal scaling should be handled separately and is out of scope for this +proposal. + +## User experience + +From a user's point of view, query responses should be faster and more reliable: + +- Running multiple bucket store instances will allow the query to be served even + if a single store instance fails. + +- Query latency should be lower since the response will be served from the + first bucket store instance to reply. + +The user experience for query responses involving only Thanos sidecars will be +unaffected. + +## Alternatives considered + +### Implicitly relying on store labels + +Rather than expanding the `InfoResponse` Protobuf, we had originally considered +relying on an empty set of store labels to determine that a store instance was +acting as a gateway. + +We decided against this approach as it would make debugging harder due to its +implicit nature, and is likely to cause bugs in future. + +### Using boolean fields to determine query behaviour + +We rejected the idea of adding a `gateway` or `deduplicated` boolean field to +`InfoResponse` in the store RPC API. The value of these fields would have had +the same effect on query behaviour as returning the peer type field as proposed +above and would be more explicit, but were specific to this use case. + +The peer type field in `InfoResponse` proposed above could be used for other +use cases aside from determining query behaviour. + +## Related future work + +### Sharing data between store instances + +Thanos bucket stores download index and metadata from the object store on +start-up. If multiple instances of a bucket store are used to provide high +availability, each instance will download the same files for its own use. These +file sizes can be in the order of gigabytes. + +Ideally, the overhead of each store instance downloading its own data would be +avoided. We decided that it would be more appropriate to tackle sharing data as +part of future work to support the horizontal scaling of store instances. diff --git a/docs/proposals/config.md b/docs/proposals/rejected/config.md similarity index 100% rename from docs/proposals/config.md rename to docs/proposals/rejected/config.md diff --git a/docs/storage.md b/docs/storage.md index 2d8b4ac2099..c7aa915327b 100644 --- a/docs/storage.md +++ b/docs/storage.md @@ -8,6 +8,8 @@ Current object storage client implementations: |----------------------|-------------------|-----------|---------------| | Google Cloud Storage | Stable (production usage) | yes | @bplotka | | AWS S3 | Beta (working PoCs, testing usage) | no | ? | +| Azure Storage Account | Alpha | yes | @vglafirov | +| OpenStack Swift | Beta (working PoCs, testing usage) | no | @sudhi-vm | NOTE: Currently Thanos requires strong consistency (write-read) for object store implementation. @@ -24,19 +26,20 @@ At that point, anyone can use your provider! ## AWS S3 configuration -Thanos uses minio client to upload Prometheus data into AWS s3. +Thanos uses minio client to upload Prometheus data into AWS S3. To configure S3 bucket as an object store you need to set these mandatory S3 variables in yaml format stored in a file: -``` + +```yaml type: S3 config: bucket: endpoint: - access-key: + access_key: insecure: - signature-version2: - encrypt-sse: - secret-key: + signature_version2: + encrypt_sse: + secret_key: ``` Set the flags `--objstore.config-file` to reference to the configuration file. @@ -48,7 +51,7 @@ Make sure you use a correct signature version to set `signature-version2: true`, For debug purposes you can set `insecure: true` to switch to plain insecure HTTP instead of HTTPS ### Credentials -Credentials will by default try to retrieve from the following sources: +By default Thanos will try to retrieve credentials from the following sources: 1. IAM credentials retrieved from an instance profile 1. From `~/.aws/credentials` @@ -83,6 +86,7 @@ Example working AWS IAM policy for user: ] } ``` + (No bucket policy) To test the policy, set env vars for S3 access for *empty, not used* bucket as well as: @@ -123,12 +127,13 @@ With this policy you should be able to run set `THANOS_SKIP_GCS_TESTS=true` and Details about AWS policies: https://docs.aws.amazon.com/AmazonS3/latest/dev/using-with-s3-actions.html -## GCP Configuration +## GCP Configuration To configure Google Cloud Storage bucket as an object store you need to set `bucket` with GCS bucket name and configure Google Application credentials. For example: -``` + +```yaml type: GCS config: bucket: @@ -160,9 +165,42 @@ For testing: `Storage Object Admin` for ability to create and delete temporary buckets. - ## Other minio supported S3 object storages Minio client used for AWS S3 can be potentially configured against other S3-compatible object storages. + +## Azure Configuration + +To use Azure Storage as Thanos object store, you need to precreate storage account from Azure portal or using Azure CLI. Follow the instructions from Azure Storage Documentation: [https://docs.microsoft.com/en-us/azure/storage/common/storage-quickstart-create-account](https://docs.microsoft.com/en-us/azure/storage/common/storage-quickstart-create-account?tabs=portal) + +To configure Azure Storage account as an object store you need to provide a path to Azure storage config file in flag `--objstore.config-file`. + +Config file format is the following: + +```yaml +type: AZURE +config: + storage_account: + storage_account_key: + container: +``` + +### OpenStack Swift Configuration +Thanos uses [gophercloud](http://gophercloud.io/) client to upload Prometheus data into [OpenStack Swift](https://docs.openstack.org/swift/latest/). + +Below is an example configuration file for thanos to use OpenStack swift container as an object store. + +```yaml +type: SWIFT +config: + auth_url: + username: + password: + tenant_name: + region_name: + container_name: +``` + +Set the flags `--objstore.config-file` to reference to the configuration file. diff --git a/docs/thanos_service_discovery.md b/docs/thanos_service_discovery.md new file mode 100644 index 00000000000..f657985a9b9 --- /dev/null +++ b/docs/thanos_service_discovery.md @@ -0,0 +1,86 @@ +# Thanos Service Discovery + +Service discovery has a vital place in Thanos components that allows them to perform some logic against given set of APIs. +Currently there are 2 places like this: +* `Thanos Query` needs to know about [StoreAPI](https://github.com/improbable-eng/thanos/blob/d3fb337da94d11c78151504b1fccb1d7e036f394/pkg/store/storepb/rpc.proto#L14) servers in order to query metrics from them. +* `Thanos Rule` needs to know about `QueryAPI` servers in order to evaluate recording and alerting rules. + +Currently there are several ways to configure this and they are described below. + +## Static Flags + +The simplest way to tell a component about a peer is to use a static flag. + +### Thanos Query +The repeatable flag `--store=` can be used to specify a `StoreAPI` that `Thanos Query` should use. + +### Thanos Rule + +The repeatable flag `--query=` can be used to specify a `QueryAPI` that `Thanos Rule` should use. + +## File Service Discovery + +File Service Discovery is another mechanism for configuring components. With File SD, a +list of files can be watched for updates, and the new configuration will be dynamically loaded when a change occurs. +The list of files to watch is passed to a component via a flag shown in the component specific sections below. + +The format of the configuration file is the same as the one used in [Prometheus' File SD](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#file_sd_config). +Both YAML and JSON files can be used. The format of the files is this: + +* JSON: +```json +[ + { + "targets": ["localhost:9090", "example.org:443"] + } +] +``` + +* YAML: +```yaml +- targets: ['localhost:9090', 'example.org:443'] +``` + +As a fallback, the file contents are periodically re-read at an interval that can be set using a flag specific for the component and shown below. +The default value for all File SD re-read intervals is 5 minutes. + +### Thanos Query + +The repeatable flag `--store.sd-files=` can be used to specify the path to files that contain addresses of `StoreAPI` servers. +The `` can be a glob pattern so you can specify several files using a single flag. + +The flag `--store.sd-interval=<5m>` can be used to change the fallback re-read interval from the default 5 minutes. + +### Thanos Rule + +The repeatable flag `--query.sd-files=` can be used to specify the path to files that contain addresses of `QueryAPI` servers. +Again, the `` can be a glob pattern. + +The flag `--query.sd-interval=<5m>` can be used to change the fallback re-read interval. + +## DNS Service Discovery + +DNS Service Discovery is another mechanism for finding components that can be used in conjunction with Static Flags or File SD. +With DNS SD, a domain name can be specified and it will be periodically queried to discover a list of IPs. + +To use DNS SD, just add one of the following prefixes to the domain name in your configuration: + +* `dns+` - the domain name after this prefix will be looked up as an A/AAAA query. *A port is required for this query type*. +An example using this lookup with a static flag: +``` +--store=dns+stores.thanos.mycompany.org:9090 +``` + +* `dnssrv+` - the domain name after this prefix will be looked up as a SRV query. You do not need to specify a port as the +one from the query results will be used. An example: +``` +--store=dnssrv+_thanosstores._tcp.mycompany.org +``` + +The default interval between DNS lookups is 30s. You can change it using the `store.sd-dns-interval` flag for `StoreAPI` +configuration in `Thanos Query`, or `query.sd-dns-interval` for `QueryAPI` configuration in `Thanos Rule`. + +## Other + +Currently, there are no plans of adding other Service Discovery mechanisms like Consul SD, kube SD, etc. However, we welcome +people implementing their preferred Service Discovery by writing the results to File SD which will propagate them to the different Thanos components. diff --git a/kube/manifests/prometheus-gcs.yaml b/kube/manifests/prometheus-gcs.yaml index 915a83f7551..c764fc853a1 100644 --- a/kube/manifests/prometheus-gcs.yaml +++ b/kube/manifests/prometheus-gcs.yaml @@ -162,8 +162,8 @@ apiVersion: v1 kind: Service metadata: labels: - app: prometheus - name: prometheus + app: prometheus-gcs + name: prometheus-gcs spec: externalTrafficPolicy: Cluster ports: @@ -176,7 +176,7 @@ spec: targetPort: sidecar-http name: http-sidecar-metrics selector: - app: prometheus + app: prometheus-gcs sessionAffinity: None type: NodePort status: diff --git a/kube/manifests/thanos-query.yaml b/kube/manifests/thanos-query.yaml index c41520141ee..9eadc85c8f5 100644 --- a/kube/manifests/thanos-query.yaml +++ b/kube/manifests/thanos-query.yaml @@ -36,6 +36,10 @@ spec: containerPort: 10901 - name: cluster containerPort: 10900 + livenessProbe: + httpGet: + path: /-/healthy + port: http --- apiVersion: v1 kind: Service diff --git a/pkg/block/index.go b/pkg/block/index.go index 396a600e1db..2249863b2d2 100644 --- a/pkg/block/index.go +++ b/pkg/block/index.go @@ -171,7 +171,7 @@ type Stats struct { // OutOfOrderChunks represents number of chunks that are out of order (older time range is after younger one) OutOfOrderChunks int - // DuplicatedChunks represents number of exactly the same chunks within same series. + // DuplicatedChunks represents number of chunks with same time ranges within same series, potential duplicates. DuplicatedChunks int // OutsideChunks represents number of all chunks that are before or after time range specified in block meta. OutsideChunks int @@ -294,6 +294,7 @@ func GatherIndexIssueStats(logger log.Logger, fn string, minTime int64, maxTime ooo := 0 // Per chunk in series. for i, c := range chks { + // Chunk vs the block ranges. if c.MinTime < minTime || c.MaxTime > maxTime { stats.OutsideChunks++ if c.MinTime > maxTime || c.MaxTime < minTime { @@ -308,20 +309,21 @@ func GatherIndexIssueStats(logger log.Logger, fn string, minTime int64, maxTime } c0 := chks[i-1] + + // Chunk order within block. if c.MinTime > c0.MaxTime { continue } - // Chunks overlaps or duplicates. if c.MinTime == c0.MinTime && c.MaxTime == c0.MaxTime { - ca := crc32.Checksum(c0.Chunk.Bytes(), castagnoli) - cb := crc32.Checksum(c.Chunk.Bytes(), castagnoli) - if ca == cb { - // Duplicate. - stats.DuplicatedChunks++ - } - ooo++ + // TODO(bplotka): Calc and check checksum from chunks itself. + // The chunks can overlap 1:1 in time, but does not have same data. + // We assume same data for simplicity, but it can be a symptom of error. + stats.DuplicatedChunks++ + continue } + // Chunks partly overlaps or out of order. + ooo++ } if ooo > 0 { stats.OutOfOrderSeries++ diff --git a/pkg/compact/compact.go b/pkg/compact/compact.go index 09810d59435..544de920eac 100644 --- a/pkg/compact/compact.go +++ b/pkg/compact/compact.go @@ -706,7 +706,7 @@ func (cg *Group) compact(ctx context.Context, dir string, comp tsdb.Compactor) ( } if err := stats.CriticalErr(); err != nil { - return compID, halt(errors.Wrapf(err, "invalid plan id %s", pdir)) + return compID, halt(errors.Wrapf(err, "block with not healthy index found %s; Compaction level %v; Labels: %v", pdir, meta.Compaction.Level, meta.Thanos.Labels)) } if err := stats.Issue347OutsideChunksErr(); err != nil { @@ -808,6 +808,11 @@ func NewBucketCompactor(logger log.Logger, sy *Syncer, comp tsdb.Compactor, comp func (c *BucketCompactor) Compact(ctx context.Context) error { // Loop over bucket and compact until there's no work left. for { + // Clean up the compaction temporary directory at the beginning of every compaction loop. + if err := os.RemoveAll(c.compactDir); err != nil { + return errors.Wrap(err, "clean up the compaction temporary directory") + } + level.Info(c.logger).Log("msg", "start sync of metas") if err := c.sy.SyncMetas(ctx); err != nil { diff --git a/pkg/compact/downsample/downsample.go b/pkg/compact/downsample/downsample.go index 305f72021c0..c2cff7e09bd 100644 --- a/pkg/compact/downsample/downsample.go +++ b/pkg/compact/downsample/downsample.go @@ -2,15 +2,11 @@ package downsample import ( "math" - "path/filepath" - "sort" "github.com/improbable-eng/thanos/pkg/block" "github.com/prometheus/prometheus/pkg/value" "github.com/prometheus/tsdb/chunkenc" - "os" - "github.com/go-kit/kit/log" "github.com/improbable-eng/thanos/pkg/runutil" "github.com/oklog/ulid" @@ -52,13 +48,13 @@ func Downsample( } defer runutil.CloseWithErrCapture(logger, &err, chunkr, "downsample chunk reader") - rng := origMeta.MaxTime - origMeta.MinTime - - // Write downsampled data in a custom memory block where we have fine-grained control - // over created chunks. - // This is necessary since we need to inject special values at the end of chunks for - // some aggregations. - newb := newMemBlock() + // Writes downsampled chunks right into the files, avoiding excess memory allocation. + // Flushes index and meta data afterwards aggregations. + streamedBlockWriter, err := NewWriter(dir, logger, *origMeta, resolution) + if err != nil { + return id, errors.Wrap(err, "get streamed block writer") + } + defer runutil.CloseWithErrCapture(logger, &err, streamedBlockWriter, "close stream block writer") pall, err := indexr.Postings(index.AllPostingsKey()) if err != nil { @@ -85,7 +81,7 @@ func Downsample( for i, c := range chks { chk, err := chunkr.Chunk(c.Ref) if err != nil { - return id, errors.Wrapf(err, "get chunk %d", c.Ref) + return id, errors.Wrapf(err, "get chunk %d, series %d", c.Ref, pall.At()) } chks[i].Chunk = chk } @@ -94,10 +90,12 @@ func Downsample( if origMeta.Thanos.Downsample.Resolution == 0 { for _, c := range chks { if err := expandChunkIterator(c.Chunk.Iterator(), &all); err != nil { - return id, errors.Wrapf(err, "expand chunk %d", c.Ref) + return id, errors.Wrapf(err, "expand chunk %d, series %d", c.Ref, pall.At()) } } - newb.addSeries(&series{lset: lset, chunks: downsampleRaw(all, resolution)}) + if err := streamedBlockWriter.AddSeries(&series{lset: lset, chunks: downsampleRaw(all, resolution)}); err != nil { + return id, errors.Wrapf(err, "downsample raw data, series: %d", pall.At()) + } continue } @@ -114,127 +112,24 @@ func Downsample( resolution, ) if err != nil { - return id, errors.Wrap(err, "downsample aggregate block") + return id, errors.Wrapf(err, "downsample aggregate block, series: %d", pall.At()) + } + if err := streamedBlockWriter.AddSeries(&series{lset: lset, chunks: res}); err != nil { + return id, errors.Wrapf(err, "downsample aggregated block, series: %d", pall.At()) } - newb.addSeries(&series{lset: lset, chunks: res}) } if pall.Err() != nil { return id, errors.Wrap(pall.Err(), "iterate series set") } - comp, err := tsdb.NewLeveledCompactor(nil, log.NewNopLogger(), []int64{rng}, NewPool()) - if err != nil { - return id, errors.Wrap(err, "create compactor") - } - id, err = comp.Write(dir, newb, origMeta.MinTime, origMeta.MaxTime) - if err != nil { - return id, errors.Wrap(err, "compact head") - } - bdir := filepath.Join(dir, id.String()) - - var tmeta block.ThanosMeta - tmeta = origMeta.Thanos - tmeta.Source = block.CompactorSource - tmeta.Downsample.Resolution = resolution - _, err = block.InjectThanosMeta(logger, bdir, tmeta, &origMeta.BlockMeta) + id, err = streamedBlockWriter.Flush() if err != nil { - return id, errors.Wrapf(err, "failed to finalize the block %s", bdir) + return id, errors.Wrap(err, "compact head") } - if err = os.Remove(filepath.Join(bdir, "tombstones")); err != nil { - return id, errors.Wrap(err, "remove tombstones") - } return id, nil } -// memBlock is an in-memory block that implements a subset of the tsdb.BlockReader interface -// to allow tsdb.LeveledCompactor to persist the data as a block. -type memBlock struct { - // Dummies to implement unused methods. - tsdb.IndexReader - - symbols map[string]struct{} - postings []uint64 - series []*series - chunks []chunkenc.Chunk -} - -func newMemBlock() *memBlock { - return &memBlock{symbols: map[string]struct{}{}} -} - -func (b *memBlock) addSeries(s *series) { - sid := uint64(len(b.series)) - b.postings = append(b.postings, sid) - b.series = append(b.series, s) - - for _, l := range s.lset { - b.symbols[l.Name] = struct{}{} - b.symbols[l.Value] = struct{}{} - } - - for i, cm := range s.chunks { - cid := uint64(len(b.chunks)) - s.chunks[i].Ref = cid - b.chunks = append(b.chunks, cm.Chunk) - } -} - -func (b *memBlock) Postings(name, val string) (index.Postings, error) { - allName, allVal := index.AllPostingsKey() - - if name != allName || val != allVal { - return nil, errors.New("unsupported call to Postings()") - } - sort.Slice(b.postings, func(i, j int) bool { - return labels.Compare(b.series[b.postings[i]].lset, b.series[b.postings[j]].lset) < 0 - }) - return index.NewListPostings(b.postings), nil -} - -func (b *memBlock) Series(id uint64, lset *labels.Labels, chks *[]chunks.Meta) error { - if id >= uint64(len(b.series)) { - return errors.Wrapf(tsdb.ErrNotFound, "series with ID %d does not exist", id) - } - s := b.series[id] - - *lset = append((*lset)[:0], s.lset...) - *chks = append((*chks)[:0], s.chunks...) - - return nil -} - -func (b *memBlock) Chunk(id uint64) (chunkenc.Chunk, error) { - if id >= uint64(len(b.chunks)) { - return nil, errors.Wrapf(tsdb.ErrNotFound, "chunk with ID %d does not exist", id) - } - return b.chunks[id], nil -} - -func (b *memBlock) Symbols() (map[string]struct{}, error) { - return b.symbols, nil -} - -func (b *memBlock) SortedPostings(p index.Postings) index.Postings { - return p -} - -func (b *memBlock) Index() (tsdb.IndexReader, error) { - return b, nil -} - -func (b *memBlock) Chunks() (tsdb.ChunkReader, error) { - return b, nil -} - -func (b *memBlock) Tombstones() (tsdb.TombstoneReader, error) { - return tsdb.EmptyTombstoneReader(), nil -} - -func (b *memBlock) Close() error { - return nil -} - // currentWindow returns the end timestamp of the window that t falls into. func currentWindow(t, r int64) int64 { // The next timestamp is the next number after s.t that's aligned with window. @@ -482,7 +377,7 @@ func downsampleAggr(chks []*AggrChunk, buf *[]sample, mint, maxt, inRes, outRes return res, nil } -// expandChunkIterator reads all samples from the iterater and appends them to buf. +// expandChunkIterator reads all samples from the iterator and appends them to buf. // Stale markers and out of order samples are skipped. func expandChunkIterator(it chunkenc.Iterator, buf *[]sample) error { // For safety reasons, we check for each sample that it does not go back in time. diff --git a/pkg/compact/downsample/downsample_test.go b/pkg/compact/downsample/downsample_test.go index d3844784162..3df038d5a43 100644 --- a/pkg/compact/downsample/downsample_test.go +++ b/pkg/compact/downsample/downsample_test.go @@ -5,19 +5,19 @@ import ( "math" "os" "path/filepath" + "sort" "testing" - - "github.com/prometheus/prometheus/pkg/value" - - "github.com/prometheus/tsdb/chunks" - "time" "github.com/fortytw2/leaktest" "github.com/go-kit/kit/log" "github.com/improbable-eng/thanos/pkg/block" "github.com/improbable-eng/thanos/pkg/testutil" + "github.com/pkg/errors" + "github.com/prometheus/prometheus/pkg/value" + "github.com/prometheus/tsdb" "github.com/prometheus/tsdb/chunkenc" + "github.com/prometheus/tsdb/chunks" "github.com/prometheus/tsdb/index" "github.com/prometheus/tsdb/labels" ) @@ -69,30 +69,30 @@ func TestDownsampleAggr(t *testing.T) { { lset: labels.FromStrings("__name__", "a"), inAggr: map[AggrType][]sample{ - AggrCount: []sample{ + AggrCount: { {199, 5}, {299, 1}, {399, 10}, {400, 3}, {499, 10}, {699, 0}, {999, 100}, }, - AggrSum: []sample{ + AggrSum: { {199, 5}, {299, 1}, {399, 10}, {400, 3}, {499, 10}, {699, 0}, {999, 100}, }, - AggrMin: []sample{ + AggrMin: { {199, 5}, {299, 1}, {399, 10}, {400, -3}, {499, 10}, {699, 0}, {999, 100}, }, - AggrMax: []sample{ + AggrMax: { {199, 5}, {299, 1}, {399, 10}, {400, -3}, {499, 10}, {699, 0}, {999, 100}, }, - AggrCounter: []sample{ + AggrCounter: { {99, 100}, {299, 150}, {499, 210}, {499, 10}, // chunk 1 {599, 20}, {799, 50}, {999, 120}, {999, 50}, // chunk 2, no reset {1099, 40}, {1199, 80}, {1299, 110}, // chunk 3, reset }, }, output: map[AggrType][]sample{ - AggrCount: []sample{{499, 29}, {999, 100}}, - AggrSum: []sample{{499, 29}, {999, 100}}, - AggrMin: []sample{{499, -3}, {999, 0}}, - AggrMax: []sample{{499, 10}, {999, 100}}, - AggrCounter: []sample{{499, 210}, {999, 320}, {1299, 430}, {1299, 110}}, + AggrCount: {{499, 29}, {999, 100}}, + AggrSum: {{499, 29}, {999, 100}}, + AggrMin: {{499, -3}, {999, 0}}, + AggrMax: {{499, 10}, {999, 100}}, + AggrCounter: {{499, 210}, {999, 320}, {1299, 430}, {1299, 110}}, }, }, } @@ -157,7 +157,6 @@ func testDownsample(t *testing.T, data []*downsampleTestSet, meta *block.Meta, r } mb.addSeries(ser) } - id, err := Downsample(log.NewNopLogger(), meta, mb, dir, resolution) testutil.Ok(t, err) @@ -375,3 +374,94 @@ func (it *sampleIterator) Seek(int64) bool { func (it *sampleIterator) At() (t int64, v float64) { return it.l[it.i].t, it.l[it.i].v } + +// memBlock is an in-memory block that implements a subset of the tsdb.BlockReader interface +// to allow tsdb.StreamedBlockWriter to persist the data as a block. +type memBlock struct { + // Dummies to implement unused methods. + tsdb.IndexReader + + symbols map[string]struct{} + postings []uint64 + series []*series + chunks []chunkenc.Chunk + + numberOfChunks uint64 +} + +func newMemBlock() *memBlock { + return &memBlock{symbols: map[string]struct{}{}} +} + +func (b *memBlock) addSeries(s *series) { + sid := uint64(len(b.series)) + b.postings = append(b.postings, sid) + b.series = append(b.series, s) + + for _, l := range s.lset { + b.symbols[l.Name] = struct{}{} + b.symbols[l.Value] = struct{}{} + } + + for i, cm := range s.chunks { + s.chunks[i].Ref = b.numberOfChunks + b.chunks = append(b.chunks, cm.Chunk) + b.numberOfChunks++ + } +} + +func (b *memBlock) Postings(name, val string) (index.Postings, error) { + allName, allVal := index.AllPostingsKey() + + if name != allName || val != allVal { + return nil, errors.New("unsupported call to Postings()") + } + sort.Slice(b.postings, func(i, j int) bool { + return labels.Compare(b.series[b.postings[i]].lset, b.series[b.postings[j]].lset) < 0 + }) + return index.NewListPostings(b.postings), nil +} + +func (b *memBlock) Series(id uint64, lset *labels.Labels, chks *[]chunks.Meta) error { + if id >= uint64(len(b.series)) { + return errors.Wrapf(tsdb.ErrNotFound, "series with ID %d does not exist", id) + } + s := b.series[id] + + *lset = append((*lset)[:0], s.lset...) + *chks = append((*chks)[:0], s.chunks...) + + return nil +} + +func (b *memBlock) Chunk(id uint64) (chunkenc.Chunk, error) { + if id >= uint64(b.numberOfChunks) { + return nil, errors.Wrapf(tsdb.ErrNotFound, "chunk with ID %d does not exist", id) + } + + return b.chunks[id], nil +} + +func (b *memBlock) Symbols() (map[string]struct{}, error) { + return b.symbols, nil +} + +func (b *memBlock) SortedPostings(p index.Postings) index.Postings { + return p +} + +func (b *memBlock) Index() (tsdb.IndexReader, error) { + return b, nil +} + +func (b *memBlock) Chunks() (tsdb.ChunkReader, error) { + return b, nil +} + +func (b *memBlock) Tombstones() (tsdb.TombstoneReader, error) { + return tsdb.EmptyTombstoneReader(), nil +} + +func (b *memBlock) Close() error { + return nil +} diff --git a/pkg/compact/downsample/streamed_block_writer.go b/pkg/compact/downsample/streamed_block_writer.go new file mode 100644 index 00000000000..7ec60ec9327 --- /dev/null +++ b/pkg/compact/downsample/streamed_block_writer.go @@ -0,0 +1,354 @@ +package downsample + +import ( + "encoding/json" + "math/rand" + "os" + "path/filepath" + "sort" + "time" + + "github.com/go-kit/kit/log" + "github.com/go-kit/kit/log/level" + "github.com/improbable-eng/thanos/pkg/block" + "github.com/oklog/ulid" + "github.com/pkg/errors" + "github.com/prometheus/tsdb" + "github.com/prometheus/tsdb/chunks" + "github.com/prometheus/tsdb/fileutil" + "github.com/prometheus/tsdb/index" + "github.com/prometheus/tsdb/labels" +) + +type symbols map[string]struct{} + +type labelValues map[string]struct{} + +func (lv labelValues) add(value string) { + lv[value] = struct{}{} +} + +func (lv labelValues) get(set *[]string) { + for value := range lv { + *set = append(*set, value) + } +} + +type labelsValues map[string]labelValues + +func (lv labelsValues) add(labelSet labels.Labels) { + for _, label := range labelSet { + values, ok := lv[label.Name] + if !ok { + // Add new label. + values = labelValues{} + lv[label.Name] = values + } + values.add(label.Value) + } +} + +// StreamedBlockWriter writes downsampled blocks to a new data block. Implemented to save memory consumption +// by means writing chunks data right into the files, omitting keeping them in-memory. Index and meta data should be +// flushed afterwards, when there aren't more series to process. +type StreamedBlockWriter struct { + dir string + tmpDir string + logger log.Logger + uid ulid.ULID + + symbols symbols + postings []uint64 + series []*series + + chunkWriter tsdb.ChunkWriter + meta block.Meta + totalChunks uint64 + totalSamples uint64 +} + +// NewWriter returns StreamedBlockWriter instance. Caller is responsible to finalize the writing with Flush method to write +// the meta and index file and Close all io.Closers +func NewWriter(dir string, l log.Logger, originMeta block.Meta, resolution int64) (*StreamedBlockWriter, error) { + var err error + var chunkWriter tsdb.ChunkWriter + + // Generate new block id. + entropy := rand.New(rand.NewSource(time.Now().UnixNano())) + uid := ulid.MustNew(ulid.Now(), entropy) + + // Populate chunk, meta and index files into temporary directory with + // data of all blocks. + dir = filepath.Join(dir, uid.String()) + tmpDir, err := createTmpDir(dir) + if err != nil { + return nil, err + } + + chunkWriter, err = chunks.NewWriter(filepath.Join(tmpDir, block.ChunksDirname)) + if err != nil { + return nil, errors.Wrap(err, "create tmp chunk StreamedBlockWriter") + } + + originMeta.Thanos.Downsample.Resolution = resolution + + return &StreamedBlockWriter{ + logger: l, + dir: dir, + tmpDir: tmpDir, + symbols: symbols{}, + chunkWriter: chunkWriter, + uid: uid, + meta: originMeta, + }, nil +} + +func (w *StreamedBlockWriter) AddSeries(s *series) error { + if w.chunkWriter == nil { + panic("Series can't be added, ChunkWriter has been closed") + } + if len(s.chunks) == 0 { + level.Warn(w.logger).Log("empty chunks happened", s.lset) + } + + if err := w.chunkWriter.WriteChunks(s.chunks...); err != nil { + return errors.Wrap(err, "add series") + } + + w.postings = append(w.postings, uint64(len(w.series))) + w.series = append(w.series, s) + + for _, l := range s.lset { + w.symbols[l.Name] = struct{}{} + w.symbols[l.Value] = struct{}{} + } + + w.totalChunks += uint64(len(s.chunks)) + for i := range s.chunks { + chk := &s.chunks[i] + w.totalSamples += uint64(chk.Chunk.NumSamples()) + chk.Chunk = nil + } + + return nil +} + +// Flush saves prepared index and meta data to corresponding files. +// Be sure to call this, if all series have to be handled by this moment, as +func (w *StreamedBlockWriter) Flush() (ulid.ULID, error) { + var err error + + if err := w.chunkWriter.Close(); err != nil { + return w.uid, errors.Wrap(err, "close chunk writer") + } + w.chunkWriter = nil + + indexw, err := index.NewWriter(filepath.Join(w.tmpDir, block.IndexFilename)) + if err != nil { + return w.uid, errors.Wrap(err, "open index StreamedBlockWriter") + } + + defer func() { + if indexw != nil { + if err := indexw.Close(); err != nil { + level.Error(w.logger).Log(err, "close index StreamedBlockWriter") + } + } + }() + + if err := w.populateBlock(indexw); err != nil { + return w.uid, errors.Wrap(err, "write compaction") + } + + if err = w.writeMetaFile(w.tmpDir); err != nil { + return w.uid, errors.Wrap(err, "write merged meta") + } + + if err = indexw.Close(); err != nil { + return w.uid, errors.Wrap(err, "close index StreamedBlockWriter") + } + indexw = nil + + df, err := fileutil.OpenDir(w.tmpDir) + if err != nil { + return w.uid, errors.Wrap(err, "open temporary block dir") + } + defer func() { + if df != nil { + if err := df.Close(); err != nil { + log.Logger(w.logger).Log(err, "close temporary block dir") + } + } + }() + + if err := fileutil.Fsync(df); err != nil { + return w.uid, errors.Wrap(err, "sync temporary dir") + } + + // Close temp dir before rename block dir (for windows platform). + if err = df.Close(); err != nil { + return w.uid, errors.Wrap(err, "close temporary dir") + } + df = nil + + // Block successfully written, make visible and remove old ones. + err = renameFile(w.tmpDir, w.dir) + // Assume we cleaned tmp dir up + w.tmpDir = "" + if err != nil { + return w.uid, errors.Wrap(err, "rename block dir") + } + + level.Info(w.logger).Log( + "msg", "write downsampled block", + "mint", w.meta.MinTime, + "maxt", w.meta.MaxTime, + "ulid", w.meta.ULID, + "resolution", w.meta.Thanos.Downsample.Resolution, + ) + return w.uid, nil +} + +// populateBlock fills the index and chunk writers with new data gathered as the union +// of the provided blocks. It returns meta information for the new block. +func (w *StreamedBlockWriter) populateBlock(indexWriter tsdb.IndexWriter) error { + var ( + i = uint64(0) + labelsValues = labelsValues{} + memPostings = index.NewUnorderedMemPostings() + ) + + if err := indexWriter.AddSymbols(w.symbols); err != nil { + return errors.Wrap(err, "add symbols") + } + + sort.Slice(w.postings, func(i, j int) bool { + return labels.Compare(w.series[w.postings[i]].lset, w.series[w.postings[j]].lset) < 0 + }) + + all := index.NewListPostings(w.postings) + for all.Next() { + s := w.series[all.At()] + // Skip the series with all deleted chunks. + if len(s.chunks) == 0 { + level.Info(w.logger).Log("empty chunks", i, s.lset) + continue + } + + if err := indexWriter.AddSeries(uint64(i), s.lset, s.chunks...); err != nil { + return errors.Wrap(err, "add series") + } + + labelsValues.add(s.lset) + memPostings.Add(i, s.lset) + i++ + } + + s := make([]string, 0, 256) + for n, v := range labelsValues { + s = s[:0] + v.get(&s) + if err := indexWriter.WriteLabelIndex([]string{n}, s); err != nil { + return errors.Wrap(err, "write label index") + } + } + + memPostings.EnsureOrder() + + for _, l := range memPostings.SortedKeys() { + if err := indexWriter.WritePostings(l.Name, l.Value, memPostings.Get(l.Name, l.Value)); err != nil { + return errors.Wrap(err, "write postings") + } + } + return nil +} + +// TODO probably tsdb.BlockMeta should expose method writeToFile /w encode. +func (w *StreamedBlockWriter) writeMetaFile(dest string) error { + w.meta.ULID = w.uid + w.meta.Version = 1 + w.meta.Thanos.Source = block.CompactorSource + w.meta.Stats.NumChunks = w.totalChunks + w.meta.Stats.NumSamples = w.totalSamples + w.meta.Stats.NumSeries = uint64(len(w.series)) + + // Make any changes to the file appear atomic. + path := filepath.Join(dest, block.MetaFilename) + tmp := path + ".tmp" + + f, err := os.Create(tmp) + if err != nil { + return errors.Wrapf(err, "create tmp meta file %s", tmp) + } + + enc := json.NewEncoder(f) + enc.SetIndent("", "\t") + + var merr tsdb.MultiError + + if merr.Add(enc.Encode(w.meta)); merr.Err() != nil { + merr.Add(f.Close()) + return errors.Wrapf(merr.Err(), "encoding meta file to json %s", tmp) + } + if err := f.Close(); err != nil { + return errors.Wrapf(err, "close tmp meta file %s", tmp) + } + + if err := renameFile(tmp, path); err != nil { + return errors.Wrapf(err, "rename tmp meta file %s", tmp) + } + + return nil +} + +func (w *StreamedBlockWriter) Close() error { + var merr tsdb.MultiError + + if w.tmpDir != "" { + merr.Add(os.RemoveAll(w.tmpDir)) + } + + if w.chunkWriter != nil { + merr.Add(w.chunkWriter.Close()) + w.chunkWriter = nil + } + + if merr.Err() != nil { + return errors.Wrap(merr.Err(), "close chunk writer") + } + return nil +} + +func renameFile(from, to string) error { + if err := os.RemoveAll(to); err != nil { + return err + } + if err := os.Rename(from, to); err != nil { + return err + } + + // Directory was renamed; sync parent dir to persist rename. + pdir, err := fileutil.OpenDir(filepath.Dir(to)) + if err != nil { + return err + } + + var merr tsdb.MultiError + merr.Add(fileutil.Fsync(pdir)) + merr.Add(pdir.Close()) + return merr.Err() +} + +func createTmpDir(parent string) (string, error) { + tmp := parent + ".tmp" + + if err := os.RemoveAll(tmp); err != nil { + return "", errors.Wrap(err, "removing tmp dir") + } + + if err := os.MkdirAll(tmp, 0777); err != nil { + return "", errors.Wrap(err, "mkdir tmp dir") + } + + return tmp, nil +} diff --git a/pkg/discovery/cache/cache.go b/pkg/discovery/cache/cache.go new file mode 100644 index 00000000000..22b5feb5ea4 --- /dev/null +++ b/pkg/discovery/cache/cache.go @@ -0,0 +1,50 @@ +package cache + +import ( + "sync" + + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/discovery/targetgroup" +) + +// Cache is a store for target groups. It provides thread safe updates and a way for obtaining all addresses from +// the stored target groups. +type Cache struct { + tgs map[string]*targetgroup.Group + sync.Mutex +} + +// New returns a new empty Cache. +func New() *Cache { + return &Cache{ + tgs: make(map[string]*targetgroup.Group), + } +} + +// Update stores the targets for the given groups. +// Note: targets for a group are replaced entirely on update. If a group with no target is given this is equivalent to +// deleting all the targets for this group. +func (c *Cache) Update(tgs []*targetgroup.Group) { + c.Lock() + defer c.Unlock() + for _, tg := range tgs { + // Some Discoverers send nil target group so need to check for it to avoid panics. + if tg == nil { + continue + } + c.tgs[tg.Source] = tg + } +} + +// Addresses returns all the addresses from all target groups present in the Cache. +func (c *Cache) Addresses() []string { + c.Lock() + defer c.Unlock() + var addresses []string + for _, group := range c.tgs { + for _, target := range group.Targets { + addresses = append(addresses, string(target[model.AddressLabel])) + } + } + return addresses +} diff --git a/pkg/objstore/azure/azure.go b/pkg/objstore/azure/azure.go new file mode 100644 index 00000000000..6e24588e635 --- /dev/null +++ b/pkg/objstore/azure/azure.go @@ -0,0 +1,286 @@ +package azure + +import ( + "bytes" + "context" + "io" + "io/ioutil" + "os" + "strings" + "testing" + + blob "github.com/Azure/azure-storage-blob-go/azblob" + "github.com/go-kit/kit/log" + "github.com/go-kit/kit/log/level" + "github.com/improbable-eng/thanos/pkg/objstore" + "github.com/pkg/errors" + yaml "gopkg.in/yaml.v2" +) + +const ( + opObjectsList = "ListBucket" + opObjectInsert = "PutObject" + opObjectGet = "GetObject" + opObjectHead = "HeadObject" + opObjectDelete = "DeleteObject" +) + +// Config Azure storage configuration. +type Config struct { + StorageAccountName string `yaml:"storage_account"` + StorageAccountKey string `yaml:"storage_account_key"` + ContainerName string `yaml:"container"` +} + +// Bucket implements the store.Bucket interface against Azure APIs. +type Bucket struct { + logger log.Logger + containerURL blob.ContainerURL + config *Config +} + +// Validate checks to see if any of the config options are set. +func (conf *Config) validate() error { + if conf.StorageAccountName == "" || + conf.StorageAccountKey == "" { + return errors.New("invalid Azure storage configuration") + } + return nil +} + +// NewBucket returns a new Bucket using the provided Azure config. +func NewBucket(logger log.Logger, azureConfig []byte, component string) (*Bucket, error) { + level.Debug(logger).Log("msg", "creating new Azure bucket connection", "component", component) + + var conf Config + if err := yaml.Unmarshal(azureConfig, &conf); err != nil { + return nil, err + } + + if err := conf.validate(); err != nil { + return nil, err + } + + ctx := context.Background() + container, err := createContainer(ctx, conf.StorageAccountName, conf.StorageAccountKey, conf.ContainerName) + if err != nil { + ret, ok := err.(blob.StorageError) + if !ok { + return nil, errors.Wrapf(err, "Azure API return unexpected error: %T\n", err) + } + if ret.ServiceCode() == "ContainerAlreadyExists" { + level.Debug(logger).Log("msg", "Getting connection to existing Azure blob container", "container", conf.ContainerName) + container, err = getContainer(ctx, conf.StorageAccountName, conf.StorageAccountKey, conf.ContainerName) + if err != nil { + return nil, errors.Wrapf(err, "cannot get existing Azure blob container: %s", container) + } + } else { + return nil, errors.Wrapf(err, "error creating Azure blob container: %s", container) + } + } else { + level.Info(logger).Log("msg", "Azure blob container successfully created", "address", container) + } + + bkt := &Bucket{ + logger: logger, + containerURL: container, + config: &conf, + } + return bkt, nil +} + +// Iter calls f for each entry in the given directory. The argument to f is the full +// object name including the prefix of the inspected directory. +func (b *Bucket) Iter(ctx context.Context, dir string, f func(string) error) error { + + prefix := dir + if prefix != "" && !strings.HasSuffix(prefix, DirDelim) { + prefix += DirDelim + } + + list, err := b.containerURL.ListBlobsHierarchySegment(ctx, blob.Marker{}, DirDelim, blob.ListBlobsSegmentOptions{ + Prefix: prefix, + }) + if err != nil { + return errors.Wrapf(err, "cannot list blobs in directory %s", dir) + } + var listNames []string + + for _, blob := range list.Segment.BlobItems { + listNames = append(listNames, blob.Name) + } + + for _, blobPrefix := range list.Segment.BlobPrefixes { + listNames = append(listNames, blobPrefix.Name) + } + + for _, name := range listNames { + if err := f(name); err != nil { + return err + } + } + return nil +} + +// IsObjNotFoundErr returns true if error means that object is not found. Relevant to Get operations. +func (b *Bucket) IsObjNotFoundErr(err error) bool { + if err == nil { + return false + } + + errorCode := parseError(err.Error()) + if errorCode == "InvalidUri" || errorCode == "BlobNotFound" { + return true + } + + return false +} + +func (b *Bucket) getBlobReader(ctx context.Context, name string, offset, length int64) (io.ReadCloser, error) { + level.Debug(b.logger).Log("msg", "getting blob", "blob", name, "offset", offset, "length", length) + if len(name) == 0 { + return nil, errors.New("X-Ms-Error-Code: [EmptyContainerName]") + } + exists, err := b.Exists(ctx, name) + if err != nil { + return nil, errors.Wrapf(err, "cannot get blob reader: %s", name) + } + + if !exists { + return nil, errors.New("X-Ms-Error-Code: [BlobNotFound]") + } + + blobURL, err := getBlobURL(ctx, b.config.StorageAccountName, b.config.StorageAccountKey, b.config.ContainerName, name) + if err != nil { + return nil, errors.Wrapf(err, "cannot get Azure blob URL, address: %s", name) + } + var props *blob.BlobGetPropertiesResponse + props, err = blobURL.GetProperties(ctx, blob.BlobAccessConditions{}) + if err != nil { + return nil, errors.Wrapf(err, "cannot get properties for container: %s", name) + } + + var size int64 + if length > 0 { + size = length + } else { + size = props.ContentLength() - offset + } + + destBuffer := make([]byte, size) + + if err := blob.DownloadBlobToBuffer(context.Background(), blobURL.BlobURL, offset, length, + destBuffer, blob.DownloadFromBlobOptions{ + BlockSize: blob.BlobDefaultDownloadBlockSize, + Parallelism: uint16(3), + Progress: nil, + }, + ); err != nil { + return nil, errors.Wrapf(err, "cannot download blob, address: %s", blobURL.BlobURL) + } + + return ioutil.NopCloser(bytes.NewReader(destBuffer)), nil +} + +// Get returns a reader for the given object name. +func (b *Bucket) Get(ctx context.Context, name string) (io.ReadCloser, error) { + return b.getBlobReader(ctx, name, 0, blob.CountToEnd) +} + +// GetRange returns a new range reader for the given object name and range. +func (b *Bucket) GetRange(ctx context.Context, name string, off, length int64) (io.ReadCloser, error) { + return b.getBlobReader(ctx, name, off, length) +} + +// Exists checks if the given object exists. +func (b *Bucket) Exists(ctx context.Context, name string) (bool, error) { + level.Debug(b.logger).Log("msg", "check if blob exists", "blob", name) + blobURL, err := getBlobURL(ctx, b.config.StorageAccountName, b.config.StorageAccountKey, b.config.ContainerName, name) + if err != nil { + return false, errors.Wrapf(err, "cannot get Azure blob URL, address: %s", name) + } + + if _, err = blobURL.GetProperties(ctx, blob.BlobAccessConditions{}); err != nil { + if b.IsObjNotFoundErr(err) { + return false, nil + } + return false, errors.Wrapf(err, "cannot get properties for Azure blob, address: %s", name) + } + + return true, nil +} + +// Upload the contents of the reader as an object into the bucket. +func (b *Bucket) Upload(ctx context.Context, name string, r io.Reader) error { + level.Debug(b.logger).Log("msg", "Uploading blob", "blob", name) + blobURL, err := getBlobURL(ctx, b.config.StorageAccountName, b.config.StorageAccountKey, b.config.ContainerName, name) + if err != nil { + return errors.Wrapf(err, "cannot get Azure blob URL, address: %s", name) + } + if _, err = blob.UploadStreamToBlockBlob(ctx, r, blobURL, + blob.UploadStreamToBlockBlobOptions{ + BufferSize: 3 * 1024 * 1024, + MaxBuffers: 4, + }, + ); err != nil { + return errors.Wrapf(err, "cannot upload Azure blob, address: %s", name) + } + return nil +} + +// Delete removes the object with the given name. +func (b *Bucket) Delete(ctx context.Context, name string) error { + level.Debug(b.logger).Log("msg", "Deleting blob", "blob", name) + blobURL, err := getBlobURL(ctx, b.config.StorageAccountName, b.config.StorageAccountKey, b.config.ContainerName, name) + if err != nil { + return errors.Wrapf(err, "cannot get Azure blob URL, address: %s", name) + } + + if _, err = blobURL.Delete(ctx, blob.DeleteSnapshotsOptionInclude, blob.BlobAccessConditions{}); err != nil { + return errors.Wrapf(err, "error deleting blob, address: %s", name) + } + return nil +} + +// Name returns Azure container name. +func (b *Bucket) Name() string { + return b.config.ContainerName +} + +// NewTestBucket creates test bkt client that before returning creates temporary bucket. +// In a close function it empties and deletes the bucket. +func NewTestBucket(t testing.TB, component string) (objstore.Bucket, func(), error) { + t.Log("Using test Azure bucket.") + + conf := &Config{ + StorageAccountName: os.Getenv("AZURE_STORAGE_ACCOUNT"), + StorageAccountKey: os.Getenv("AZURE_STORAGE_ACCESS_KEY"), + ContainerName: "thanos-e2e-test", + } + + bc, err := yaml.Marshal(conf) + if err != nil { + return nil, nil, err + } + + ctx := context.Background() + + bkt, err := NewBucket(log.NewNopLogger(), bc, component) + if err != nil { + t.Errorf("Cannot create Azure storage container:") + return nil, nil, err + } + + return bkt, func() { + objstore.EmptyBucket(t, ctx, bkt) + err = bkt.Delete(ctx, conf.ContainerName) + if err != nil { + t.Logf("deleting bucket failed: %s", err) + } + }, nil +} + +// Close bucket. +func (b *Bucket) Close() error { + return nil +} diff --git a/pkg/objstore/azure/helpers.go b/pkg/objstore/azure/helpers.go new file mode 100644 index 00000000000..61b1b900f34 --- /dev/null +++ b/pkg/objstore/azure/helpers.go @@ -0,0 +1,69 @@ +package azure + +import ( + "context" + "fmt" + "net/url" + "regexp" + + blob "github.com/Azure/azure-storage-blob-go/azblob" +) + +var ( + blobFormatString = `https://%s.blob.core.windows.net` +) + +// DirDelim is the delimiter used to model a directory structure in an object store bucket. +const DirDelim = "/" + +func getContainerURL(ctx context.Context, accountName, accountKey, containerName string) (blob.ContainerURL, error) { + c, err := blob.NewSharedKeyCredential(accountName, accountKey) + if err != nil { + return blob.ContainerURL{}, err + } + p := blob.NewPipeline(c, blob.PipelineOptions{ + Telemetry: blob.TelemetryOptions{Value: "Thanos"}, + }) + u, err := url.Parse(fmt.Sprintf(blobFormatString, accountName)) + if err != nil { + return blob.ContainerURL{}, err + } + service := blob.NewServiceURL(*u, p) + + return service.NewContainerURL(containerName), nil +} + +func getContainer(ctx context.Context, accountName, accountKey, containerName string) (blob.ContainerURL, error) { + c, err := getContainerURL(ctx, accountName, accountKey, containerName) + if err != nil { + return blob.ContainerURL{}, err + } + // Getting container properties to check if it exists or not. Returns error which will be parsed further + _, err = c.GetProperties(ctx, blob.LeaseAccessConditions{}) + return c, err +} + +func createContainer(ctx context.Context, accountName, accountKey, containerName string) (blob.ContainerURL, error) { + c, err := getContainerURL(ctx, accountName, accountKey, containerName) + if err != nil { + return blob.ContainerURL{}, err + } + _, err = c.Create( + context.Background(), + blob.Metadata{}, + blob.PublicAccessNone) + return c, err +} + +func getBlobURL(ctx context.Context, accountName, accountKey, containerName, blobName string) (blob.BlockBlobURL, error) { + c, err := getContainerURL(ctx, accountName, accountKey, containerName) + if err != nil { + return blob.BlockBlobURL{}, err + } + return c.NewBlockBlobURL(blobName), nil +} + +func parseError(errorCode string) string { + re, _ := regexp.Compile(`X-Ms-Error-Code:\D*\[(\w+)\]`) + return re.FindStringSubmatch(errorCode)[1] +} diff --git a/pkg/objstore/client/factory.go b/pkg/objstore/client/factory.go index 3a50a38d92f..960d7ff7153 100644 --- a/pkg/objstore/client/factory.go +++ b/pkg/objstore/client/factory.go @@ -3,23 +3,27 @@ package client import ( "context" "fmt" - "io/ioutil" + "strings" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/improbable-eng/thanos/pkg/objstore" + "github.com/improbable-eng/thanos/pkg/objstore/azure" "github.com/improbable-eng/thanos/pkg/objstore/gcs" "github.com/improbable-eng/thanos/pkg/objstore/s3" + "github.com/improbable-eng/thanos/pkg/objstore/swift" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" - yaml "gopkg.in/yaml.v2" + "gopkg.in/yaml.v2" ) type objProvider string const ( - GCS objProvider = "GCS" - S3 objProvider = "S3" + GCS objProvider = "GCS" + S3 objProvider = "S3" + AZURE objProvider = "AZURE" + SWIFT objProvider = "SWIFT" ) type BucketConfig struct { @@ -29,31 +33,17 @@ type BucketConfig struct { var ErrNotFound = errors.New("not found bucket") -func loadFile(confFile string) (*BucketConfig, error) { - content, err := ioutil.ReadFile(confFile) - if err != nil { - return nil, errors.Wrap(err, fmt.Sprintf("loading YAML file %s", confFile)) - } - - bucketConf := &BucketConfig{} - if err := yaml.UnmarshalStrict(content, bucketConf); err != nil { - return nil, errors.Wrap(err, fmt.Sprintf("parsing YAML file %s", confFile)) - } - return bucketConf, nil -} - // NewBucket initializes and returns new object storage clients. -func NewBucket(logger log.Logger, confFile string, reg *prometheus.Registry, component string) (objstore.Bucket, error) { - level.Info(logger).Log("msg", "loading bucket configuration file", "filename", confFile) - - var err error - if confFile == "" { +// NOTE: confContentYaml can contain secrets. +func NewBucket(logger log.Logger, confContentYaml []byte, reg *prometheus.Registry, component string) (objstore.Bucket, error) { + level.Info(logger).Log("msg", "loading bucket configuration") + if len(confContentYaml) == 0 { return nil, ErrNotFound } - bucketConf, err := loadFile(confFile) - if err != nil { - return nil, errors.Wrap(err, "parsing objstore.config-file") + bucketConf := &BucketConfig{} + if err := yaml.UnmarshalStrict(confContentYaml, bucketConf); err != nil { + return nil, errors.Wrap(err, "parsing config YAML file") } config, err := yaml.Marshal(bucketConf.Config) @@ -62,11 +52,15 @@ func NewBucket(logger log.Logger, confFile string, reg *prometheus.Registry, com } var bucket objstore.Bucket - switch bucketConf.Type { - case GCS: - bucket, err = gcs.NewBucket(context.Background(), logger, config, reg, component) - case S3: - bucket, err = s3.NewBucket(logger, config, reg, component) + switch strings.ToUpper(string(bucketConf.Type)) { + case string(GCS): + bucket, err = gcs.NewBucket(context.Background(), logger, config, component) + case string(S3): + bucket, err = s3.NewBucket(logger, config, component) + case string(AZURE): + bucket, err = azure.NewBucket(logger, config, component) + case string(SWIFT): + bucket, err = swift.NewContainer(logger, config) default: return nil, errors.Errorf("bucket with type %s is not supported", bucketConf.Type) } diff --git a/pkg/objstore/client/factory_test.go b/pkg/objstore/client/factory_test.go index 319c1e20ac2..a0b81b454ef 100644 --- a/pkg/objstore/client/factory_test.go +++ b/pkg/objstore/client/factory_test.go @@ -8,16 +8,26 @@ import ( "github.com/go-kit/kit/log" ) -func TestErrorBucketConfig(t *testing.T) { - conf := "testconf/fake-gcs.conf.yml" - _, err := NewBucket(log.NewNopLogger(), conf, nil, "bkt-client-test") +const unknownTypeConfig = `type: UNKNOWN +config: + bucket: test-bucket` + +func TestNewBucketUnknownType(t *testing.T) { + _, err := NewBucket(log.NewNopLogger(), []byte(unknownTypeConfig), nil, "bkt-client-test") testutil.NotOk(t, err) testutil.Assert(t, err != ErrNotFound, "it should not error with not found") } -func TestBlankBucketConfigContent(t *testing.T) { - conf := "testconf/blank-gcs.conf.yml" - _, err := NewBucket(log.NewNopLogger(), conf, nil, "bkt-client-test") +const blankGCSConfig = `type: GCS` + +func TestNewBucketBlankConfig(t *testing.T) { + _, err := NewBucket(log.NewNopLogger(), []byte(blankGCSConfig), nil, "bkt-client-test") testutil.NotOk(t, err) testutil.Assert(t, err != ErrNotFound, "it should not error with not found") } + +func TestNewBucketNoConfig(t *testing.T) { + _, err := NewBucket(log.NewNopLogger(), []byte{}, nil, "bkt-client-test") + testutil.NotOk(t, err) + testutil.Assert(t, err == ErrNotFound, "it should error with not found") +} diff --git a/pkg/objstore/gcs/gcs.go b/pkg/objstore/gcs/gcs.go index e06be4f3749..468044b98ae 100644 --- a/pkg/objstore/gcs/gcs.go +++ b/pkg/objstore/gcs/gcs.go @@ -15,25 +15,12 @@ import ( "github.com/go-kit/kit/log" "github.com/improbable-eng/thanos/pkg/objstore" "github.com/pkg/errors" - "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/version" "google.golang.org/api/iterator" "google.golang.org/api/option" yaml "gopkg.in/yaml.v2" ) -const ( - // Class A operations. - opObjectsList = "objects.list" - opObjectInsert = "object.insert" - - // Class B operation. - opObjectGet = "object.get" - - // Free operations. - opObjectDelete = "object.delete" -) - // DirDelim is the delimiter used to model a directory structure in an object store bucket. const DirDelim = "/" @@ -44,16 +31,15 @@ type gcsConfig struct { // Bucket implements the store.Bucket and shipper.Bucket interfaces against GCS. type Bucket struct { - logger log.Logger - bkt *storage.BucketHandle - opsTotal *prometheus.CounterVec - name string + logger log.Logger + bkt *storage.BucketHandle + name string closer io.Closer } // NewBucket returns a new Bucket against the given bucket handle. -func NewBucket(ctx context.Context, logger log.Logger, conf []byte, reg prometheus.Registerer, component string) (*Bucket, error) { +func NewBucket(ctx context.Context, logger log.Logger, conf []byte, component string) (*Bucket, error) { var gc gcsConfig if err := yaml.Unmarshal(conf, &gc); err != nil { return nil, err @@ -69,17 +55,9 @@ func NewBucket(ctx context.Context, logger log.Logger, conf []byte, reg promethe bkt := &Bucket{ logger: logger, bkt: gcsClient.Bucket(gc.Bucket), - opsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "thanos_objstore_gcs_bucket_operations_total", - Help: "Total number of operations that were executed against a Google Compute Storage bucket.", - ConstLabels: prometheus.Labels{"bucket": gc.Bucket}, - }, []string{"operation"}), closer: gcsClient, name: gc.Bucket, } - if reg != nil { - reg.MustRegister() - } return bkt, nil } @@ -91,7 +69,6 @@ func (b *Bucket) Name() string { // Iter calls f for each entry in the given directory. The argument to f is the full // object name including the prefix of the inspected directory. func (b *Bucket) Iter(ctx context.Context, dir string, f func(string) error) error { - b.opsTotal.WithLabelValues(opObjectsList).Inc() // Ensure the object name actually ends with a dir suffix. Otherwise we'll just iterate the // object itself as one prefix item. if dir != "" { @@ -122,13 +99,11 @@ func (b *Bucket) Iter(ctx context.Context, dir string, f func(string) error) err // Get returns a reader for the given object name. func (b *Bucket) Get(ctx context.Context, name string) (io.ReadCloser, error) { - b.opsTotal.WithLabelValues(opObjectGet).Inc() return b.bkt.Object(name).NewReader(ctx) } // GetRange returns a new range reader for the given object name and range. func (b *Bucket) GetRange(ctx context.Context, name string, off, length int64) (io.ReadCloser, error) { - b.opsTotal.WithLabelValues(opObjectGet).Inc() return b.bkt.Object(name).NewRangeReader(ctx, off, length) } @@ -140,8 +115,6 @@ func (b *Bucket) Handle() *storage.BucketHandle { // Exists checks if the given object exists. func (b *Bucket) Exists(ctx context.Context, name string) (bool, error) { - b.opsTotal.WithLabelValues(opObjectGet).Inc() - if _, err := b.bkt.Object(name).Attrs(ctx); err == nil { return true, nil } else if err != storage.ErrObjectNotExist { @@ -152,8 +125,6 @@ func (b *Bucket) Exists(ctx context.Context, name string) (bool, error) { // Upload writes the file specified in src to remote GCS location specified as target. func (b *Bucket) Upload(ctx context.Context, name string, r io.Reader) error { - b.opsTotal.WithLabelValues(opObjectInsert).Inc() - w := b.bkt.Object(name).NewWriter(ctx) if _, err := io.Copy(w, r); err != nil { @@ -164,8 +135,6 @@ func (b *Bucket) Upload(ctx context.Context, name string, r io.Reader) error { // Delete removes the object with the given name. func (b *Bucket) Delete(ctx context.Context, name string) error { - b.opsTotal.WithLabelValues(opObjectDelete).Inc() - return b.bkt.Object(name).Delete(ctx) } @@ -192,7 +161,7 @@ func NewTestBucket(t testing.TB, project string) (objstore.Bucket, func(), error return nil, nil, err } - b, err := NewBucket(ctx, log.NewNopLogger(), bc, nil, "thanos-e2e-test") + b, err := NewBucket(ctx, log.NewNopLogger(), bc, "thanos-e2e-test") if err != nil { cancel() return nil, nil, err diff --git a/pkg/objstore/objstore.go b/pkg/objstore/objstore.go index 81ae6fbb8fb..2790b29c04a 100644 --- a/pkg/objstore/objstore.go +++ b/pkg/objstore/objstore.go @@ -52,7 +52,7 @@ type BucketReader interface { } // UploadDir uploads all files in srcdir to the bucket with into a top-level directory -// named dstdir. +// named dstdir. It is a caller responsibility to clean partial upload in case of failure. func UploadDir(ctx context.Context, logger log.Logger, bkt Bucket, srcdir, dstdir string) error { df, err := os.Stat(srcdir) if err != nil { @@ -75,6 +75,7 @@ func UploadDir(ctx context.Context, logger log.Logger, bkt Bucket, srcdir, dstdi } // UploadFile uploads the file with the given name to the bucket. +// It is a caller responsibility to clean partial upload in case of failure func UploadFile(ctx context.Context, logger log.Logger, bkt Bucket, src, dst string) error { r, err := os.Open(src) if err != nil { diff --git a/pkg/objstore/objtesting/acceptance_e2e_test.go b/pkg/objstore/objtesting/acceptance_e2e_test.go index f071451e1cb..c8902d2dde3 100644 --- a/pkg/objstore/objtesting/acceptance_e2e_test.go +++ b/pkg/objstore/objtesting/acceptance_e2e_test.go @@ -3,6 +3,7 @@ package objtesting import ( "context" "io/ioutil" + "sort" "strings" "testing" @@ -62,7 +63,10 @@ func TestObjStore_AcceptanceTest_e2e(t *testing.T) { seen = append(seen, fn) return nil })) - testutil.Equals(t, []string{"obj_5.some", "id1/", "id2/"}, seen) + expected := []string{"obj_5.some", "id1/", "id2/"} + sort.Strings(expected) + sort.Strings(seen) + testutil.Equals(t, expected, seen) // Can we iter over items from id1/ dir? seen = []string{} diff --git a/pkg/objstore/objtesting/foreach.go b/pkg/objstore/objtesting/foreach.go index 347a5bace46..9dda049acff 100644 --- a/pkg/objstore/objtesting/foreach.go +++ b/pkg/objstore/objtesting/foreach.go @@ -7,9 +7,11 @@ import ( "github.com/fortytw2/leaktest" "github.com/improbable-eng/thanos/pkg/objstore" + "github.com/improbable-eng/thanos/pkg/objstore/azure" "github.com/improbable-eng/thanos/pkg/objstore/gcs" "github.com/improbable-eng/thanos/pkg/objstore/inmem" "github.com/improbable-eng/thanos/pkg/objstore/s3" + "github.com/improbable-eng/thanos/pkg/objstore/swift" "github.com/improbable-eng/thanos/pkg/testutil" ) @@ -46,15 +48,14 @@ func ForeachStore(t *testing.T, testFn func(t testing.TB, bkt objstore.Bucket)) } // Optional S3 AWS. - // TODO(bplotka): Prepare environment & CI to run it automatically. - // TODO(bplotka): Find a user with S3 AWS project ready to run this test. + // TODO(bwplotka): Prepare environment & CI to run it automatically. if _, ok := os.LookupEnv("THANOS_SKIP_S3_AWS_TESTS"); !ok { - // TODO(bplotka): Allow taking location from envvar. + // TODO(bwplotka): Allow taking location from envvar. bkt, closeFn, err := s3.NewTestBucket(t, "eu-west-1") testutil.Ok(t, err) ok := t.Run("aws s3", func(t *testing.T) { - // TODO(bplotka): Add leaktest when we fix potential leak in minio library. + // TODO(bwplotka): Add leaktest when we fix potential leak in minio library. // We cannot use leaktest for detecting our own potential leaks, when leaktest detects leaks in minio itself. // This needs to be investigated more. @@ -67,4 +68,36 @@ func ForeachStore(t *testing.T, testFn func(t testing.TB, bkt objstore.Bucket)) } else { t.Log("THANOS_SKIP_S3_AWS_TESTS envvar present. Skipping test against S3 AWS.") } + + // Optional Azure. + if _, ok := os.LookupEnv("THANOS_SKIP_AZURE_TESTS"); !ok { + bkt, closeFn, err := azure.NewTestBucket(t, "e2e-tests") + testutil.Ok(t, err) + + ok := t.Run("azure", func(t *testing.T) { + testFn(t, bkt) + }) + closeFn() + if !ok { + return + } + } else { + t.Log("THANOS_SKIP_AZURE_TESTS envvar present. Skipping test against Azure.") + } + + // Optional SWIFT. + if _, ok := os.LookupEnv("THANOS_SKIP_SWIFT_TESTS"); !ok { + container, closeFn, err := swift.NewTestContainer(t) + testutil.Ok(t, err) + + ok := t.Run("swift", func(t *testing.T) { + testFn(t, container) + }) + closeFn() + if !ok { + return + } + } else { + t.Log("THANOS_SKIP_SWIFT_TESTS envvar present. Skipping test against swift.") + } } diff --git a/pkg/objstore/s3/s3.go b/pkg/objstore/s3/s3.go index 3794f23607f..7198936be9b 100644 --- a/pkg/objstore/s3/s3.go +++ b/pkg/objstore/s3/s3.go @@ -22,49 +22,46 @@ import ( "github.com/minio/minio-go/pkg/credentials" "github.com/minio/minio-go/pkg/encrypt" "github.com/pkg/errors" - "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/version" yaml "gopkg.in/yaml.v2" ) -const ( - opObjectsList = "ListBucket" - opObjectInsert = "PutObject" - opObjectGet = "GetObject" - opObjectHead = "HEADObject" - opObjectDelete = "DeleteObject" -) - // DirDelim is the delimiter used to model a directory structure in an object store bucket. const DirDelim = "/" -// s3Config stores the configuration for s3 bucket. -type s3Config struct { +// Config stores the configuration for s3 bucket. +type Config struct { Bucket string `yaml:"bucket"` Endpoint string `yaml:"endpoint"` - AccessKey string `yaml:"access-key"` + AccessKey string `yaml:"access_key"` Insecure bool `yaml:"insecure"` - SignatureV2 bool `yaml:"signature-version2"` - SSEEncryption bool `yaml:"encrypt-sse"` - SecretKey string `yaml:"secret-key"` + SignatureV2 bool `yaml:"signature_version2"` + SSEEncryption bool `yaml:"encrypt_sse"` + SecretKey string `yaml:"secret_key"` } // Bucket implements the store.Bucket interface against s3-compatible APIs. type Bucket struct { - logger log.Logger - name string - client *minio.Client - sse encrypt.ServerSide - opsTotal *prometheus.CounterVec + logger log.Logger + name string + client *minio.Client + sse encrypt.ServerSide } // NewBucket returns a new Bucket using the provided s3 config values. -func NewBucket(logger log.Logger, conf []byte, reg prometheus.Registerer, component string) (*Bucket, error) { - var chain []credentials.Provider - var config s3Config +func NewBucket(logger log.Logger, conf []byte, component string) (*Bucket, error) { + var config Config if err := yaml.Unmarshal(conf, &config); err != nil { return nil, err } + + return NewBucketWithConfig(logger, config, component) +} + +// NewBucket returns a new Bucket using the provided s3 config values. +func NewBucketWithConfig(logger log.Logger, config Config, component string) (*Bucket, error) { + var chain []credentials.Provider + if err := Validate(config); err != nil { return nil, err } @@ -132,14 +129,6 @@ func NewBucket(logger log.Logger, conf []byte, reg prometheus.Registerer, compon name: config.Bucket, client: client, sse: sse, - opsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "thanos_objstore_s3_bucket_operations_total", - Help: "Total number of operations that were executed against an s3 bucket.", - ConstLabels: prometheus.Labels{"bucket": config.Bucket}, - }, []string{"operation"}), - } - if reg != nil { - reg.MustRegister(bkt.opsTotal) } return bkt, nil } @@ -150,7 +139,7 @@ func (b *Bucket) Name() string { } // Validate checks to see the config options are set. -func Validate(conf s3Config) error { +func Validate(conf Config) error { if conf.Endpoint == "" || (conf.AccessKey == "" && conf.SecretKey != "") || (conf.AccessKey != "" && conf.SecretKey == "") { @@ -160,7 +149,7 @@ func Validate(conf s3Config) error { } // ValidateForTests checks to see the config options for tests are set. -func ValidateForTests(conf s3Config) error { +func ValidateForTests(conf Config) error { if conf.Endpoint == "" || conf.AccessKey == "" || conf.SecretKey == "" { @@ -172,7 +161,6 @@ func ValidateForTests(conf s3Config) error { // Iter calls f for each entry in the given directory. The argument to f is the full // object name including the prefix of the inspected directory. func (b *Bucket) Iter(ctx context.Context, dir string, f func(string) error) error { - b.opsTotal.WithLabelValues(opObjectsList).Inc() // Ensure the object name actually ends with a dir suffix. Otherwise we'll just iterate the // object itself as one prefix item. if dir != "" { @@ -197,7 +185,6 @@ func (b *Bucket) Iter(ctx context.Context, dir string, f func(string) error) err } func (b *Bucket) getRange(ctx context.Context, name string, off, length int64) (io.ReadCloser, error) { - b.opsTotal.WithLabelValues(opObjectGet).Inc() opts := &minio.GetObjectOptions{ServerSideEncryption: b.sse} if length != -1 { if err := opts.SetRange(off, off+length-1); err != nil { @@ -233,7 +220,6 @@ func (b *Bucket) GetRange(ctx context.Context, name string, off, length int64) ( // Exists checks if the given object exists. func (b *Bucket) Exists(ctx context.Context, name string) (bool, error) { - b.opsTotal.WithLabelValues(opObjectHead).Inc() _, err := b.client.StatObject(b.name, name, minio.StatObjectOptions{}) if err != nil { if b.IsObjNotFoundErr(err) { @@ -247,8 +233,6 @@ func (b *Bucket) Exists(ctx context.Context, name string) (bool, error) { // Upload the contents of the reader as an object into the bucket. func (b *Bucket) Upload(ctx context.Context, name string, r io.Reader) error { - b.opsTotal.WithLabelValues(opObjectInsert).Inc() - _, err := b.client.PutObjectWithContext(ctx, b.name, name, r, -1, minio.PutObjectOptions{ServerSideEncryption: b.sse}, ) @@ -258,7 +242,6 @@ func (b *Bucket) Upload(ctx context.Context, name string, r io.Reader) error { // Delete removes the object with the given name. func (b *Bucket) Delete(ctx context.Context, name string) error { - b.opsTotal.WithLabelValues(opObjectDelete).Inc() return b.client.RemoveObject(b.name, name) } @@ -269,22 +252,16 @@ func (b *Bucket) IsObjNotFoundErr(err error) bool { func (b *Bucket) Close() error { return nil } -func configFromEnv() s3Config { - c := s3Config{ +func configFromEnv() Config { + c := Config{ Bucket: os.Getenv("S3_BUCKET"), Endpoint: os.Getenv("S3_ENDPOINT"), AccessKey: os.Getenv("S3_ACCESS_KEY"), SecretKey: os.Getenv("S3_SECRET_KEY"), } - insecure, err := strconv.ParseBool(os.Getenv("S3_INSECURE")) - if err != nil { - c.Insecure = insecure - } - signV2, err := strconv.ParseBool(os.Getenv("S3_SIGNATURE_VERSION2")) - if err != nil { - c.SignatureV2 = signV2 - } + c.Insecure, _ = strconv.ParseBool(os.Getenv("S3_INSECURE")) + c.SignatureV2, _ = strconv.ParseBool(os.Getenv("S3_SIGNATURE_VERSION2")) return c } @@ -295,24 +272,30 @@ func NewTestBucket(t testing.TB, location string) (objstore.Bucket, func(), erro if err := ValidateForTests(c); err != nil { return nil, nil, err } + + if c.Bucket != "" && os.Getenv("THANOS_ALLOW_EXISTING_BUCKET_USE") == "" { + return nil, nil, errors.New("S3_BUCKET is defined. Normally this tests will create temporary bucket " + + "and delete it after test. Unset S3_BUCKET env variable to use default logic. If you really want to run " + + "tests against provided (NOT USED!) bucket, set THANOS_ALLOW_EXISTING_BUCKET_USE=true. WARNING: That bucket " + + "needs to be manually cleared. This means that it is only useful to run one test in a time. This is due " + + "to safety (accidentally pointing prod bucket for test) as well as aws s3 not being fully strong consistent.") + } + + return NewTestBucketFromConfig(t, location, c, true) +} + +func NewTestBucketFromConfig(t testing.TB, location string, c Config, reuseBucket bool) (objstore.Bucket, func(), error) { bc, err := yaml.Marshal(c) if err != nil { return nil, nil, err } - b, err := NewBucket(log.NewNopLogger(), bc, nil, "thanos-e2e-test") + b, err := NewBucket(log.NewNopLogger(), bc, "thanos-e2e-test") if err != nil { return nil, nil, err } - if c.Bucket != "" { - if os.Getenv("THANOS_ALLOW_EXISTING_BUCKET_USE") == "" { - return nil, nil, errors.New("S3_BUCKET is defined. Normally this tests will create temporary bucket " + - "and delete it after test. Unset S3_BUCKET env variable to use default logic. If you really want to run " + - "tests against provided (NOT USED!) bucket, set THANOS_ALLOW_EXISTING_BUCKET_USE=true. WARNING: That bucket " + - "needs to be manually cleared. This means that it is only useful to run one test in a time. This is due " + - "to safety (accidentally pointing prod bucket for test) as well as aws s3 not being fully strong consistent.") - } - + bktToCreate := c.Bucket + if c.Bucket != "" && reuseBucket { if err := b.Iter(context.Background(), "", func(f string) error { return errors.Errorf("bucket %s is not empty", c.Bucket) }); err != nil { @@ -323,23 +306,26 @@ func NewTestBucket(t testing.TB, location string) (objstore.Bucket, func(), erro return b, func() {}, nil } - src := rand.NewSource(time.Now().UnixNano()) + if c.Bucket == "" { + src := rand.NewSource(time.Now().UnixNano()) - // Bucket name need to conform: https://docs.aws.amazon.com/awscloudtrail/latest/userguide/cloudtrail-s3-bucket-naming-requirements.html - tmpBucketName := strings.Replace(fmt.Sprintf("test_%s_%x", strings.ToLower(t.Name()), src.Int63()), "_", "-", -1) - if len(tmpBucketName) >= 63 { - tmpBucketName = tmpBucketName[:63] + // Bucket name need to conform: https://docs.aws.amazon.com/awscloudtrail/latest/userguide/cloudtrail-s3-bucket-naming-requirements.html + bktToCreate = strings.Replace(fmt.Sprintf("test_%s_%x", strings.ToLower(t.Name()), src.Int63()), "_", "-", -1) + if len(bktToCreate) >= 63 { + bktToCreate = bktToCreate[:63] + } } - if err := b.client.MakeBucket(tmpBucketName, location); err != nil { + + if err := b.client.MakeBucket(bktToCreate, location); err != nil { return nil, nil, err } - b.name = tmpBucketName - t.Log("created temporary AWS bucket for AWS tests with name", tmpBucketName, "in", location) + b.name = bktToCreate + t.Log("created temporary AWS bucket for AWS tests with name", bktToCreate, "in", location) return b, func() { objstore.EmptyBucket(t, context.Background(), b) - if err := b.client.RemoveBucket(tmpBucketName); err != nil { - t.Logf("deleting bucket %s failed: %s", tmpBucketName, err) + if err := b.client.RemoveBucket(bktToCreate); err != nil { + t.Logf("deleting bucket %s failed: %s", bktToCreate, err) } }, nil } diff --git a/pkg/objstore/swift/swift.go b/pkg/objstore/swift/swift.go new file mode 100644 index 00000000000..1943e59c18e --- /dev/null +++ b/pkg/objstore/swift/swift.go @@ -0,0 +1,263 @@ +// Package swift implements common object storage abstractions against OpenStack swift APIs. +package swift + +import ( + "context" + "fmt" + "io" + "math/rand" + "os" + "strings" + "testing" + "time" + + "github.com/improbable-eng/thanos/pkg/objstore" + + "github.com/go-kit/kit/log" + "github.com/gophercloud/gophercloud" + "github.com/gophercloud/gophercloud/openstack" + "github.com/gophercloud/gophercloud/openstack/objectstorage/v1/containers" + "github.com/gophercloud/gophercloud/openstack/objectstorage/v1/objects" + "github.com/gophercloud/gophercloud/pagination" + "github.com/pkg/errors" + "gopkg.in/yaml.v2" +) + +// DirDelim is the delimiter used to model a directory structure in an object store bucket. +const DirDelim = "/" + +type swiftConfig struct { + AuthUrl string `yaml:"auth_url"` + Username string `yaml:"username,omitempty"` + UserId string `yaml:"user_id,omitempty"` + Password string `yaml:"password"` + DomainId string `yaml:"domain_id,omitempty"` + DomainName string `yaml:"domain_name,omitempty"` + TenantID string `yaml:"tenant_id,omitempty"` + TenantName string `yaml:"tenant_name,omitempty"` + RegionName string `yaml:"region_name,omitempty"` + ContainerName string `yaml:"container_name"` +} + +type Container struct { + logger log.Logger + client *gophercloud.ServiceClient + name string +} + +func NewContainer(logger log.Logger, conf []byte) (*Container, error) { + var sc swiftConfig + if err := yaml.Unmarshal(conf, &sc); err != nil { + return nil, err + } + + authOpts := gophercloud.AuthOptions{ + IdentityEndpoint: sc.AuthUrl, + Username: sc.Username, + UserID: sc.UserId, + Password: sc.Password, + DomainID: sc.DomainId, + DomainName: sc.DomainName, + TenantID: sc.TenantID, + TenantName: sc.TenantName, + + // Allow Gophercloud to re-authenticate automatically. + AllowReauth: true, + } + + provider, err := openstack.AuthenticatedClient(authOpts) + if err != nil { + return nil, err + } + + client, err := openstack.NewObjectStorageV1(provider, gophercloud.EndpointOpts{ + Region: sc.RegionName, + }) + if err != nil { + return nil, err + } + + return &Container{ + logger: logger, + client: client, + name: sc.ContainerName, + }, nil +} + +// Name returns the container name for swift. +func (c *Container) Name() string { + return c.name +} + +// Iter calls f for each entry in the given directory. The argument to f is the full +// object name including the prefix of the inspected directory. +func (c *Container) Iter(ctx context.Context, dir string, f func(string) error) error { + // Ensure the object name actually ends with a dir suffix. Otherwise we'll just iterate the + // object itself as one prefix item. + if dir != "" { + dir = strings.TrimSuffix(dir, DirDelim) + DirDelim + } + + options := &objects.ListOpts{Full: false, Prefix: dir, Delimiter: DirDelim} + return objects.List(c.client, c.name, options).EachPage(func(page pagination.Page) (bool, error) { + objectNames, err := objects.ExtractNames(page) + if err != nil { + return false, err + } + for _, objectName := range objectNames { + if err := f(objectName); err != nil { + return false, err + } + } + + return true, nil + }) +} + +// Get returns a reader for the given object name. +func (c *Container) Get(ctx context.Context, name string) (io.ReadCloser, error) { + if name == "" { + return nil, errors.New("error, empty container name passed") + } + response := objects.Download(c.client, c.name, name, nil) + return response.Body, response.Err +} + +// GetRange returns a new range reader for the given object name and range. +func (c *Container) GetRange(ctx context.Context, name string, off, length int64) (io.ReadCloser, error) { + options := objects.DownloadOpts{ + Newest: true, + Range: fmt.Sprintf("bytes=%d-%d", off, off+length-1), + } + response := objects.Download(c.client, c.name, name, options) + return response.Body, response.Err +} + +// Exists checks if the given object exists. +func (c *Container) Exists(ctx context.Context, name string) (bool, error) { + err := objects.Get(c.client, c.name, name, nil).Err + if err == nil { + return true, nil + } + + if _, ok := err.(gophercloud.ErrDefault404); ok { + return false, nil + } + + return false, err +} + +// IsObjNotFoundErr returns true if error means that object is not found. Relevant to Get operations. +func (c *Container) IsObjNotFoundErr(err error) bool { + _, ok := err.(gophercloud.ErrDefault404) + return ok +} + +// Upload writes the contents of the reader as an object into the container. +func (c *Container) Upload(ctx context.Context, name string, r io.Reader) error { + options := &objects.CreateOpts{Content: r} + res := objects.Create(c.client, c.name, name, options) + return res.Err +} + +// Delete removes the object with the given name. +func (c *Container) Delete(ctx context.Context, name string) error { + return objects.Delete(c.client, c.name, name, nil).Err +} + +func (*Container) Close() error { + // nothing to close + return nil +} + +func (c *Container) createContainer(name string) error { + return containers.Create(c.client, name, nil).Err +} + +func (c *Container) deleteContainer(name string) error { + return containers.Delete(c.client, name).Err +} + +func configFromEnv() swiftConfig { + c := swiftConfig{ + AuthUrl: os.Getenv("OS_AUTH_URL"), + Username: os.Getenv("OS_USERNAME"), + Password: os.Getenv("OS_PASSWORD"), + TenantID: os.Getenv("OS_TENANT_ID"), + TenantName: os.Getenv("OS_TENANT_NAME"), + RegionName: os.Getenv("OS_REGION_NAME"), + ContainerName: os.Getenv("OS_CONTAINER_NAME"), + } + + return c +} + +// validateForTests checks to see the config options for tests are set. +func validateForTests(conf swiftConfig) error { + if conf.AuthUrl == "" || + conf.Username == "" || + conf.Password == "" || + (conf.TenantName == "" && conf.TenantID == "") || + conf.RegionName == "" { + return errors.New("insufficient swift test configuration information") + } + return nil +} + +// NewTestContainer creates test objStore client that before returning creates temporary container. +// In a close function it empties and deletes the container. +func NewTestContainer(t testing.TB) (objstore.Bucket, func(), error) { + config := configFromEnv() + if err := validateForTests(config); err != nil { + return nil, nil, err + } + containerConfig, err := yaml.Marshal(config) + if err != nil { + return nil, nil, err + } + + c, err := NewContainer(log.NewNopLogger(), containerConfig) + if err != nil { + return nil, nil, err + } + + if config.ContainerName != "" { + if os.Getenv("THANOS_ALLOW_EXISTING_BUCKET_USE") == "" { + return nil, nil, errors.New("OS_CONTAINER_NAME is defined. Normally this tests will create temporary container " + + "and delete it after test. Unset OS_CONTAINER_NAME env variable to use default logic. If you really want to run " + + "tests against provided (NOT USED!) container, set THANOS_ALLOW_EXISTING_BUCKET_USE=true. WARNING: That container " + + "needs to be manually cleared. This means that it is only useful to run one test in a time. This is due " + + "to safety (accidentally pointing prod container for test) as well as swift not being fully strong consistent.") + } + + if err := c.Iter(context.Background(), "", func(f string) error { + return errors.Errorf("container %s is not empty", config.ContainerName) + }); err != nil { + return nil, nil, errors.Wrapf(err, "swift check container %s", config.ContainerName) + } + + t.Log("WARNING. Reusing", config.ContainerName, "container for Swift tests. Manual cleanup afterwards is required") + return c, func() {}, nil + } + + src := rand.NewSource(time.Now().UnixNano()) + + tmpContainerName := fmt.Sprintf("test_%s_%x", strings.ToLower(t.Name()), src.Int63()) + if len(tmpContainerName) >= 63 { + tmpContainerName = tmpContainerName[:63] + } + + if err := c.createContainer(tmpContainerName); err != nil { + return nil, nil, err + } + + c.name = tmpContainerName + t.Log("created temporary container for swift tests with name", tmpContainerName) + + return c, func() { + objstore.EmptyBucket(t, context.Background(), c) + if err := c.deleteContainer(tmpContainerName); err != nil { + t.Logf("deleting container %s failed: %s", tmpContainerName, err) + } + }, nil +} diff --git a/pkg/query/api/v1.go b/pkg/query/api/v1.go index 8cca71e088d..aa7eee21c64 100644 --- a/pkg/query/api/v1.go +++ b/pkg/query/api/v1.go @@ -141,7 +141,7 @@ func NewAPI( instantQueryDuration: instantQueryDuration, rangeQueryDuration: rangeQueryDuration, enableAutodownsampling: enableAutodownsampling, - now: time.Now, + now: time.Now, } } diff --git a/pkg/query/storeset.go b/pkg/query/storeset.go index 470bffd87ce..09adee3d0df 100644 --- a/pkg/query/storeset.go +++ b/pkg/query/storeset.go @@ -172,7 +172,8 @@ func (s *storeRef) TimeRange() (int64, int64) { } func (s *storeRef) String() string { - return fmt.Sprintf("%s", s.addr) + mint, maxt := s.TimeRange() + return fmt.Sprintf("Addr: %s Labels: %v Mint: %d Maxt: %d", s.addr, s.Labels(), mint, maxt) } func (s *storeRef) close() { diff --git a/pkg/reloader/reloader.go b/pkg/reloader/reloader.go index 34e266e6f38..2849854e913 100644 --- a/pkg/reloader/reloader.go +++ b/pkg/reloader/reloader.go @@ -170,7 +170,9 @@ func (r *Reloader) apply(ctx context.Context) error { return errors.Wrap(err, "build hash") } } - ruleHash = h.Sum(nil) + if len(r.ruleDirs) > 0 { + ruleHash = h.Sum(nil) + } if bytes.Equal(r.lastCfgHash, cfgHash) && bytes.Equal(r.lastRuleHash, ruleHash) { // Nothing to do. diff --git a/pkg/store/bucket.go b/pkg/store/bucket.go index b8516b0e4e9..cf6301acb62 100644 --- a/pkg/store/bucket.go +++ b/pkg/store/bucket.go @@ -58,7 +58,7 @@ type bucketStoreMetrics struct { chunkSizeBytes prometheus.Histogram } -func newBucketStoreMetrics(reg prometheus.Registerer, s *BucketStore) *bucketStoreMetrics { +func newBucketStoreMetrics(reg prometheus.Registerer) *bucketStoreMetrics { var m bucketStoreMetrics m.blockLoads = prometheus.NewCounter(prometheus.CounterOpts{ @@ -203,7 +203,7 @@ func NewBucketStore( blockSets: map[uint64]*bucketBlockSet{}, debugLogging: debugLogging, } - s.metrics = newBucketStoreMetrics(reg, s) + s.metrics = newBucketStoreMetrics(reg) if err := os.MkdirAll(dir, 0777); err != nil { return nil, errors.Wrap(err, "create dir") diff --git a/pkg/store/proxy.go b/pkg/store/proxy.go index 203c5f1d372..b1ebc7c820c 100644 --- a/pkg/store/proxy.go +++ b/pkg/store/proxy.go @@ -4,6 +4,7 @@ import ( "context" "io" "math" + "strings" "sync" "fmt" @@ -29,6 +30,8 @@ type Client interface { // Minimum and maximum time range of data in the store. TimeRange() (mint int64, maxt int64) + + String() string } // ProxyStore implements the store API that proxies request to all given underlying stores. @@ -83,24 +86,31 @@ func (s *ProxyStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesSe return nil } - var ( - respCh = make(chan *storepb.SeriesResponse, 10) - seriesSet []storepb.SeriesSet - g errgroup.Group - ) - stores, err := s.stores(srv.Context()) if err != nil { + err = errors.Wrap(err, "failed to get store APIs") level.Error(s.logger).Log("err", err) return status.Errorf(codes.Unknown, err.Error()) } + + var ( + seriesSet []storepb.SeriesSet + respCh = make(chan *storepb.SeriesResponse, len(stores)+1) + g errgroup.Group + ) + + var storeDebugMsgs []string + for _, st := range stores { // We might be able to skip the store if its meta information indicates // it cannot have series matching our query. // NOTE: all matchers are validated in labelsMatches method so we explicitly ignore error. if ok, _ := storeMatches(st, r.MinTime, r.MaxTime, newMatchers...); !ok { + storeDebugMsgs = append(storeDebugMsgs, fmt.Sprintf("store %s filtered out", st)) continue } + storeDebugMsgs = append(storeDebugMsgs, fmt.Sprintf("store %s queried", st)) + sc, err := st.Series(srv.Context(), &storepb.SeriesRequest{ MinTime: r.MinTime, MaxTime: r.MaxTime, @@ -123,11 +133,13 @@ func (s *ProxyStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesSe } if len(seriesSet) == 0 { err := errors.New("No store matched for this query") - level.Warn(s.logger).Log("err", err) + level.Warn(s.logger).Log("err", err, "stores", strings.Join(storeDebugMsgs, ";")) respCh <- storepb.NewWarnSeriesResponse(err) return nil } + level.Debug(s.logger).Log("msg", strings.Join(storeDebugMsgs, ";")) + g.Go(func() error { defer close(respCh) diff --git a/pkg/store/proxy_test.go b/pkg/store/proxy_test.go index f7ab62ea664..7b2a24515a5 100644 --- a/pkg/store/proxy_test.go +++ b/pkg/store/proxy_test.go @@ -200,6 +200,44 @@ func TestQueryStore_Series_SameExtSet(t *testing.T) { testutil.Equals(t, 0, len(s1.Warnings)) } +func TestQueryStore_Series_FillResponseChannel(t *testing.T) { + defer leaktest.CheckTimeout(t, 10*time.Second)() + + var cls []Client + for i := 0; i < 10; i++ { + cls = append(cls, &testClient{ + StoreClient: &storeClient{ + RespSet: []*storepb.SeriesResponse{ + storeSeriesResponse(t, labels.FromStrings("a", "b"), []sample{{1, 1}, {2, 2}, {3, 3}}), + }, + RespError: errors.New("test error"), + }, + minTime: 1, + maxTime: 300, + }) + } + + q := NewProxyStore(nil, + func(context.Context) ([]Client, error) { return cls, nil }, + tlabels.FromStrings("fed", "a"), + ) + + ctx := context.Background() + s1 := newStoreSeriesServer(ctx) + + // This should return empty response, since there is external label mismatch. + err := q.Series( + &storepb.SeriesRequest{ + MinTime: 1, + MaxTime: 300, + Matchers: []storepb.LabelMatcher{{Name: "fed", Value: "a", Type: storepb.LabelMatcher_EQ}}, + }, s1, + ) + testutil.Ok(t, err) + testutil.Equals(t, 0, len(s1.SeriesSet)) + testutil.Equals(t, 0, len(s1.Warnings)) +} + type rawSeries struct { lset []storepb.Label samples []sample @@ -342,7 +380,8 @@ func (s *storeSeriesServer) Context() context.Context { type storeClient struct { Values map[string][]string - RespSet []*storepb.SeriesResponse + RespSet []*storepb.SeriesResponse + RespError error } func (s *storeClient) Info(ctx context.Context, req *storepb.InfoRequest, _ ...grpc.CallOption) (*storepb.InfoResponse, error) { @@ -350,7 +389,7 @@ func (s *storeClient) Info(ctx context.Context, req *storepb.InfoRequest, _ ...g } func (s *storeClient) Series(ctx context.Context, req *storepb.SeriesRequest, _ ...grpc.CallOption) (storepb.Store_SeriesClient, error) { - return &StoreSeriesClient{ctx: ctx, respSet: s.RespSet}, nil + return &StoreSeriesClient{ctx: ctx, respSet: s.RespSet}, s.RespError } func (s *storeClient) LabelNames(ctx context.Context, req *storepb.LabelNamesRequest, _ ...grpc.CallOption) (*storepb.LabelNamesResponse, error) { diff --git a/pkg/testutil/prometheus.go b/pkg/testutil/prometheus.go index a6ec243b8fa..8d35ecfe91d 100644 --- a/pkg/testutil/prometheus.go +++ b/pkg/testutil/prometheus.go @@ -23,12 +23,14 @@ import ( ) const ( - // TODO(bplotka): Change default version to something more recent after https://github.com/prometheus/prometheus/issues/4551 is fixed. + // TODO(bwplotka): Change default version to something more recent after https://github.com/prometheus/prometheus/issues/4551 is fixed. defaultPrometheusVersion = "v2.2.1" defaultAlertmanagerVersion = "v0.15.2" + defaultMinioVersion = "RELEASE.2018-10-06T00-15-16Z" promBinEnvVar = "THANOS_TEST_PROMETHEUS_PATH" alertmanagerBinEnvVar = "THANOS_TEST_ALERTMANAGER_PATH" + minioBinEnvVar = "THANOS_TEST_MINIO_PATH" ) func PrometheusBinary() string { @@ -42,7 +44,15 @@ func PrometheusBinary() string { func AlertmanagerBinary() string { b := os.Getenv(alertmanagerBinEnvVar) if b == "" { - return fmt.Sprintf("prometheus-%s", defaultAlertmanagerVersion) + return fmt.Sprintf("alertmanager-%s", defaultAlertmanagerVersion) + } + return b +} + +func MinioBinary() string { + b := os.Getenv(minioBinEnvVar) + if b == "" { + return fmt.Sprintf("minio-%s", defaultMinioVersion) } return b } diff --git a/pkg/tracing/gct.go b/pkg/tracing/gct.go index 4722b798e22..afb1a03f0d6 100644 --- a/pkg/tracing/gct.go +++ b/pkg/tracing/gct.go @@ -38,6 +38,7 @@ func NewOptionalGCloudTracer(ctx context.Context, logger log.Logger, gcloudTrace return &opentracing.NoopTracer{}, func() error { return nil } } + level.Info(logger).Log("msg", "initiated Google Cloud Tracer. Tracing will be enabled", "err", err) return tracer, closeFn } diff --git a/pkg/tracing/tracing.go b/pkg/tracing/tracing.go index 00b5b3d23b2..22b394bfbbb 100644 --- a/pkg/tracing/tracing.go +++ b/pkg/tracing/tracing.go @@ -34,8 +34,8 @@ func tracerFromContext(ctx context.Context) opentracing.Tracer { func StartSpan(ctx context.Context, operationName string, opts ...opentracing.StartSpanOption) (opentracing.Span, context.Context) { tracer := tracerFromContext(ctx) if tracer == nil { - // No tracing found, use noop one. - tracer = &opentracing.NoopTracer{} + // No tracing found, return noop span. + return opentracing.NoopTracer{}.StartSpan(operationName), ctx } var span opentracing.Span diff --git a/test/e2e/query_test.go b/test/e2e/query_test.go index c43e1b74ac5..c76466be38d 100644 --- a/test/e2e/query_test.go +++ b/test/e2e/query_test.go @@ -4,10 +4,8 @@ import ( "context" "encoding/json" "fmt" - "io/ioutil" "net/http" "net/url" - "os" "testing" "time" @@ -17,62 +15,63 @@ import ( "github.com/prometheus/common/model" ) -// TestQuerySimple runs a setup of Prometheus servers, sidecars, and query nodes and verifies that -// queries return data merged from all Prometheus servers. Additionally it verifies if deduplication works for query. -func TestQuerySimple(t *testing.T) { - dir, err := ioutil.TempDir("", "test_query_simple") - testutil.Ok(t, err) - defer func() { testutil.Ok(t, os.RemoveAll(dir)) }() +type testConfig struct { + name string + suite *spinupSuite +} - ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) +var ( + firstPromPort = promHTTPPort(1) + + queryGossipSuite = newSpinupSuite(). + Add(scraper(1, defaultPromConfig("prom-"+firstPromPort, 0), true)). + Add(scraper(2, defaultPromConfig("prom-ha", 0), true)). + Add(scraper(3, defaultPromConfig("prom-ha", 1), true)). + Add(querier(1, "replica"), queryCluster(1)). + Add(querier(2, "replica"), queryCluster(2)) + + queryStaticFlagsSuite = newSpinupSuite(). + Add(scraper(1, defaultPromConfig("prom-"+firstPromPort, 0), false)). + Add(scraper(2, defaultPromConfig("prom-ha", 0), false)). + Add(scraper(3, defaultPromConfig("prom-ha", 1), false)). + Add(querierWithStoreFlags(1, "replica", sidecarGRPC(1), sidecarGRPC(2), sidecarGRPC(3)), ""). + Add(querierWithStoreFlags(2, "replica", sidecarGRPC(1), sidecarGRPC(2), sidecarGRPC(3)), "") + + queryFileSDSuite = newSpinupSuite(). + Add(scraper(1, defaultPromConfig("prom-"+firstPromPort, 0), false)). + Add(scraper(2, defaultPromConfig("prom-ha", 0), false)). + Add(scraper(3, defaultPromConfig("prom-ha", 1), false)). + Add(querierWithFileSD(1, "replica", sidecarGRPC(1), sidecarGRPC(2), sidecarGRPC(3)), ""). + Add(querierWithFileSD(2, "replica", sidecarGRPC(1), sidecarGRPC(2), sidecarGRPC(3)), "") +) - firstPromPort := promHTTPPort(1) - exit, err := spinup(t, ctx, config{ - promConfigs: []string{ - // Self scraping config with unique external label. - fmt.Sprintf(` -global: - external_labels: - prometheus: prom-%s - replica: 0 -scrape_configs: -- job_name: prometheus - scrape_interval: 1s - static_configs: - - targets: - - "localhost:%s" -`, firstPromPort, firstPromPort), - // Config for first of two HA replica Prometheus. - fmt.Sprintf(` -global: - external_labels: - prometheus: prom-ha - replica: 0 -scrape_configs: -- job_name: prometheus - scrape_interval: 1s - static_configs: - - targets: - - "localhost:%s" -`, firstPromPort), - // Config for second of two HA replica Prometheus. - fmt.Sprintf(` -global: - external_labels: - prometheus: prom-ha - replica: 1 -scrape_configs: -- job_name: prometheus - scrape_interval: 1s - static_configs: - - targets: - - "localhost:%s" -`, firstPromPort), +func TestQuery(t *testing.T) { + for _, tt := range []testConfig{ + { + "gossip", + queryGossipSuite, + }, + { + "staticFlag", + queryStaticFlagsSuite, }, - workDir: dir, - numQueries: 2, - queriesReplicaLabel: "replica", - }) + { + "fileSD", + queryFileSDSuite, + }, + } { + t.Run(tt.name, func(t *testing.T) { + testQuerySimple(t, tt) + }) + } +} + +// testQuerySimple runs a setup of Prometheus servers, sidecars, and query nodes and verifies that +// queries return data merged from all Prometheus servers. Additionally it verifies if deduplication works for query. +func testQuerySimple(t *testing.T, conf testConfig) { + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) + + exit, err := conf.suite.Exec(t, ctx, conf.name) if err != nil { t.Errorf("spinup failed: %v", err) cancel() @@ -84,16 +83,13 @@ scrape_configs: <-exit }() - var ( - res model.Vector - criticalErr error - ) + var res model.Vector // Try query without deduplication. - err = runutil.Retry(time.Second, ctx.Done(), func() error { + testutil.Ok(t, runutil.Retry(time.Second, ctx.Done(), func() error { select { - case criticalErr = <-exit: - t.Errorf("Some process exited unexpectedly: %v", err) + case <-exit: + cancel() return nil default: } @@ -107,9 +103,7 @@ scrape_configs: return errors.Errorf("unexpected result size %d", len(res)) } return nil - }) - testutil.Ok(t, err) - testutil.Ok(t, criticalErr) + })) // In our model result are always sorted. testutil.Equals(t, model.Metric{ @@ -135,10 +129,10 @@ scrape_configs: }, res[2].Metric) // Try query with deduplication. - err = runutil.Retry(time.Second, ctx.Done(), func() error { + testutil.Ok(t, runutil.Retry(time.Second, ctx.Done(), func() error { select { - case criticalErr = <-exit: - t.Errorf("Some process exited unexpectedly: %v", err) + case <-exit: + cancel() return nil default: } @@ -153,9 +147,7 @@ scrape_configs: } return nil - }) - testutil.Ok(t, err) - testutil.Ok(t, criticalErr) + })) testutil.Equals(t, model.Metric{ "__name__": "up", @@ -205,3 +197,18 @@ func queryPrometheus(ctx context.Context, ustr string, ts time.Time, q string, d } return m.Data.Result, nil } + +func defaultPromConfig(name string, replicas int) string { + return fmt.Sprintf(` +global: + external_labels: + prometheus: %s + replica: %v +scrape_configs: +- job_name: prometheus + scrape_interval: 1s + static_configs: + - targets: + - "localhost:%s" +`, name, replicas, firstPromPort) +} diff --git a/test/e2e/rule_test.go b/test/e2e/rule_test.go index c4e3fb8ca47..c6e34871eac 100644 --- a/test/e2e/rule_test.go +++ b/test/e2e/rule_test.go @@ -3,9 +3,7 @@ package e2e_test import ( "context" "encoding/json" - "io/ioutil" "net/http" - "os" "sort" "testing" "time" @@ -17,16 +15,7 @@ import ( "github.com/prometheus/prometheus/pkg/timestamp" ) -// TestRuleComponent tests the basic interaction between the rule component -// and the querying layer. -// Rules are evaluated against the query layer and the query layer in return -// can access data written by the rules. -func TestRuleComponent(t *testing.T) { - dir, err := ioutil.TempDir("", "test_rule") - testutil.Ok(t, err) - defer func() { testutil.Ok(t, os.RemoveAll(dir)) }() - - const alwaysFireRule = ` +const alwaysFireRule = ` groups: - name: example rules: @@ -38,15 +27,55 @@ groups: summary: "I always complain" ` +var ( + ruleGossipSuite = newSpinupSuite(). + Add(querier(1, ""), queryCluster(1)). + Add(ruler(1, alwaysFireRule)). + Add(ruler(2, alwaysFireRule)). + Add(alertManager(1), "") + + ruleStaticFlagsSuite = newSpinupSuite(). + Add(querierWithStoreFlags(1, "", rulerGRPC(1), rulerGRPC(2)), ""). + Add(rulerWithQueryFlags(1, alwaysFireRule, queryHTTP(1))). + Add(rulerWithQueryFlags(2, alwaysFireRule, queryHTTP(1))). + Add(alertManager(1), "") + + ruleFileSDSuite = newSpinupSuite(). + Add(querierWithFileSD(1, "", rulerGRPC(1), rulerGRPC(2)), ""). + Add(rulerWithFileSD(1, alwaysFireRule, queryHTTP(1))). + Add(rulerWithFileSD(2, alwaysFireRule, queryHTTP(1))). + Add(alertManager(1), "") +) + +func TestRule(t *testing.T) { + for _, tt := range []testConfig{ + { + "gossip", + ruleGossipSuite, + }, + { + "staticFlag", + ruleStaticFlagsSuite, + }, + { + "fileSD", + ruleFileSDSuite, + }, + } { + t.Run(tt.name, func(t *testing.T) { + testRuleComponent(t, tt) + }) + } +} + +// testRuleComponent tests the basic interaction between the rule component +// and the querying layer. +// Rules are evaluated against the query layer and the query layer in return +// can access data written by the rules. +func testRuleComponent(t *testing.T, conf testConfig) { ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) - exit, err := spinup(t, ctx, config{ - workDir: dir, - numQueries: 1, - numRules: 2, - numAlertmanagers: 1, - rules: alwaysFireRule, - }) + exit, err := conf.suite.Exec(t, ctx, "test_rule_component") if err != nil { t.Errorf("spinup failed: %v", err) cancel() @@ -86,10 +115,11 @@ groups: "replica": "2", }, } - err = runutil.Retry(5*time.Second, ctx.Done(), func() error { + + testutil.Ok(t, runutil.Retry(5*time.Second, ctx.Done(), func() error { select { - case err := <-exit: - t.Errorf("Some process exited unexpectedly: %v", err) + case <-exit: + cancel() return nil default: } @@ -129,8 +159,7 @@ groups: } } return nil - }) - testutil.Ok(t, err) + })) } func queryAlertmanagerAlerts(ctx context.Context, url string) ([]*model.Alert, error) { diff --git a/test/e2e/spinup_test.go b/test/e2e/spinup_test.go index 2c8e263f6ab..d9d1b10b456 100644 --- a/test/e2e/spinup_test.go +++ b/test/e2e/spinup_test.go @@ -12,6 +12,9 @@ import ( "testing" "time" + "github.com/improbable-eng/thanos/pkg/objstore/s3" + "github.com/improbable-eng/thanos/pkg/runutil" + "github.com/improbable-eng/thanos/pkg/testutil" "github.com/oklog/run" @@ -20,6 +23,7 @@ import ( var ( promHTTPPort = func(i int) string { return fmt.Sprintf("%d", 9090+i) } + // We keep this one with localhost, to have perfect match with what Prometheus will expose in up metric. promHTTP = func(i int) string { return fmt.Sprintf("localhost:%s", promHTTPPort(i)) } @@ -34,131 +38,152 @@ var ( rulerGRPC = func(i int) string { return fmt.Sprintf("127.0.0.1:%d", 19790+i) } rulerHTTP = func(i int) string { return fmt.Sprintf("127.0.0.1:%d", 19890+i) } rulerCluster = func(i int) string { return fmt.Sprintf("127.0.0.1:%d", 19990+i) } + + storeGatewayGRPC = func(i int) string { return fmt.Sprintf("127.0.0.1:%d", 20090+i) } + storeGatewayHTTP = func(i int) string { return fmt.Sprintf("127.0.0.1:%d", 20190+i) } + + minioHTTP = func(i int) string { return fmt.Sprintf("127.0.0.1:%d", 20290+i) } ) -type config struct { - // Each config is for each Prometheus. - promConfigs []string - rules string - workDir string +type cmdScheduleFunc func(workDir string, clusterPeerFlags []string) ([]*exec.Cmd, error) + +type spinupSuite struct { + cmdScheduleFuncs []cmdScheduleFunc + clusterPeerFlags []string - numQueries int - queriesReplicaLabel string - numRules int - numAlertmanagers int + minioConfig s3.Config + withPreStartedMinio bool } -func evalClusterPeersFlags(cfg config) []string { - var flags []string - for i := 1; i <= len(cfg.promConfigs); i++ { - flags = append(flags, "--cluster.peers", sidecarCluster(i)) - } - for i := 1; i <= cfg.numQueries; i++ { - flags = append(flags, "--cluster.peers", queryCluster(i)) - } - for i := 1; i <= cfg.numRules; i++ { - flags = append(flags, "--cluster.peers", rulerCluster(i)) +func newSpinupSuite() *spinupSuite { return &spinupSuite{} } + +func (s *spinupSuite) Add(cmdSchedule cmdScheduleFunc, gossipAddress string) *spinupSuite { + s.cmdScheduleFuncs = append(s.cmdScheduleFuncs, cmdSchedule) + if gossipAddress != "" { + s.clusterPeerFlags = append(s.clusterPeerFlags, fmt.Sprintf("--cluster.peers"), gossipAddress) } - return flags + return s } -// NOTE: It is important to install Thanos before using this function to compile latest changes. -func spinup(t testing.TB, ctx context.Context, cfg config) (chan error, error) { - var ( - commands []*exec.Cmd - clusterPeers = evalClusterPeersFlags(cfg) - ) - - for k, promConfig := range cfg.promConfigs { - i := k + 1 - promDir := fmt.Sprintf("%s/data/prom%d", cfg.workDir, i) +func scraper(i int, config string, gossip bool) (cmdScheduleFunc, string) { + gossipAddress := "" + if gossip { + gossipAddress = sidecarCluster(i) + } + return func(workDir string, clusterPeerFlags []string) ([]*exec.Cmd, error) { + promDir := fmt.Sprintf("%s/data/prom%d", workDir, i) if err := os.MkdirAll(promDir, 0777); err != nil { return nil, errors.Wrap(err, "create prom dir failed") } - err := ioutil.WriteFile(promDir+"/prometheus.yml", []byte(promConfig), 0666) - if err != nil { + + if err := ioutil.WriteFile(promDir+"/prometheus.yml", []byte(config), 0666); err != nil { return nil, errors.Wrap(err, "creating prom config failed") } - commands = append(commands, exec.Command(testutil.PrometheusBinary(), + var cmds []*exec.Cmd + cmds = append(cmds, exec.Command(testutil.PrometheusBinary(), "--config.file", promDir+"/prometheus.yml", "--storage.tsdb.path", promDir, "--log.level", "info", "--web.listen-address", promHTTP(i), )) - commands = append(commands, exec.Command("thanos", - append([]string{ - "sidecar", - "--debug.name", fmt.Sprintf("sidecar-%d", i), - "--grpc-address", sidecarGRPC(i), - "--http-address", sidecarHTTP(i), - "--prometheus.url", fmt.Sprintf("http://%s", promHTTP(i)), - "--tsdb.path", promDir, - "--cluster.address", sidecarCluster(i), + args := []string{ + "sidecar", + "--debug.name", fmt.Sprintf("sidecar-%d", i), + "--grpc-address", sidecarGRPC(i), + "--http-address", sidecarHTTP(i), + "--prometheus.url", fmt.Sprintf("http://%s", promHTTP(i)), + "--tsdb.path", promDir, + "--cluster.address", sidecarCluster(i), + + "--log.level", "debug", + } + + if gossip { + args = append(args, []string{ "--cluster.advertise-address", sidecarCluster(i), "--cluster.gossip-interval", "200ms", "--cluster.pushpull-interval", "200ms", - "--log.level", "debug", - }, - clusterPeers...)..., - )) + }...) + args = append(args, clusterPeerFlags...) + } + cmds = append(cmds, exec.Command("thanos", args...)) + + return cmds, nil + }, gossipAddress +} - time.Sleep(200 * time.Millisecond) +func querier(i int, replicaLabel string, staticStores ...string) cmdScheduleFunc { + return func(_ string, clusterPeerFlags []string) ([]*exec.Cmd, error) { + args := append(defaultQuerierFlags(i, replicaLabel), + "--cluster.advertise-address", queryCluster(i), + "--cluster.gossip-interval", "200ms", + "--cluster.pushpull-interval", "200ms") + args = append(args, clusterPeerFlags...) + for _, s := range staticStores { + args = append(args, "--store", s) + } + return []*exec.Cmd{exec.Command("thanos", args...)}, nil } +} - for i := 1; i <= cfg.numQueries; i++ { - commands = append(commands, exec.Command("thanos", - append([]string{"query", - "--debug.name", fmt.Sprintf("query-%d", i), - "--grpc-address", queryGRPC(i), - "--http-address", queryHTTP(i), - "--cluster.address", queryCluster(i), - "--cluster.advertise-address", queryCluster(i), - "--cluster.gossip-interval", "200ms", - "--cluster.pushpull-interval", "200ms", - "--log.level", "debug", - "--query.replica-label", cfg.queriesReplicaLabel, - }, - clusterPeers...)..., - )) - time.Sleep(200 * time.Millisecond) +func querierWithStoreFlags(i int, replicaLabel string, storesAddresses ...string) cmdScheduleFunc { + return func(workDir string, clusterPeerFlags []string) ([]*exec.Cmd, error) { + args := defaultQuerierFlags(i, replicaLabel) + + for _, addr := range storesAddresses { + args = append(args, "--store", addr) + } + + return []*exec.Cmd{exec.Command("thanos", args...)}, nil } +} - for i := 1; i <= cfg.numRules; i++ { - dbDir := fmt.Sprintf("%s/data/rule%d", cfg.workDir, i) +func querierWithFileSD(i int, replicaLabel string, storesAddresses ...string) cmdScheduleFunc { + return func(workDir string, clusterPeerFlags []string) ([]*exec.Cmd, error) { + queryFileSDDir := fmt.Sprintf("%s/data/queryFileSd%d", workDir, i) + if err := os.MkdirAll(queryFileSDDir, 0777); err != nil { + return nil, errors.Wrap(err, "create prom dir failed") + } - if err := os.MkdirAll(dbDir, 0777); err != nil { - return nil, errors.Wrap(err, "creating ruler dir failed") + if err := ioutil.WriteFile(queryFileSDDir+"/filesd.json", []byte(generateFileSD(storesAddresses)), 0666); err != nil { + return nil, errors.Wrap(err, "creating prom config failed") } - err := ioutil.WriteFile(dbDir+"/rules.yaml", []byte(cfg.rules), 0666) - if err != nil { - return nil, errors.Wrap(err, "creating ruler file failed") + + args := append(defaultQuerierFlags(i, replicaLabel), + "--store.sd-files", path.Join(queryFileSDDir, "filesd.json"), + "--store.sd-interval", "5s") + + return []*exec.Cmd{exec.Command("thanos", args...)}, nil + } +} + +func storeGateway(i int, bucketConfig []byte) cmdScheduleFunc { + return func(workDir string, _ []string) ([]*exec.Cmd, error) { + dbDir := fmt.Sprintf("%s/data/store-gateway%d", workDir, i) + + if err := os.MkdirAll(dbDir, 0777); err != nil { + return nil, errors.Wrap(err, "creating store gateway dir failed") } - commands = append(commands, exec.Command("thanos", - append([]string{"rule", - "--debug.name", fmt.Sprintf("rule-%d", i), - "--label", fmt.Sprintf(`replica="%d"`, i), - "--data-dir", dbDir, - "--rule-file", path.Join(dbDir, "*.yaml"), - "--eval-interval", "1s", - "--alertmanagers.url", "http://127.0.0.1:29093", - "--grpc-address", rulerGRPC(i), - "--http-address", rulerHTTP(i), - "--cluster.address", rulerCluster(i), - "--cluster.advertise-address", rulerCluster(i), - "--cluster.gossip-interval", "200ms", - "--cluster.pushpull-interval", "200ms", - "--log.level", "debug", - }, - clusterPeers...)..., - )) - time.Sleep(200 * time.Millisecond) + return []*exec.Cmd{exec.Command("thanos", + "store", + "--debug.name", fmt.Sprintf("store-%d", i), + "--data-dir", dbDir, + "--grpc-address", storeGatewayGRPC(i), + "--http-address", storeGatewayHTTP(i), + "--log.level", "debug", + "--objstore.config", string(bucketConfig), + // Accelerated sync time for quicker test (3m by default) + "--sync-block-duration", "5s", + )}, nil } +} - for i := 1; i <= cfg.numAlertmanagers; i++ { - dir := fmt.Sprintf("%s/data/alertmanager%d", cfg.workDir, i) +func alertManager(i int) cmdScheduleFunc { + return func(workDir string, clusterPeerFlags []string) ([]*exec.Cmd, error) { + dir := fmt.Sprintf("%s/data/alertmanager%d", workDir, i) if err := os.MkdirAll(dir, 0777); err != nil { return nil, errors.Wrap(err, "creating alertmanager dir failed") @@ -172,15 +197,162 @@ route: receivers: - name: 'null' ` - err := ioutil.WriteFile(dir+"/config.yaml", []byte(config), 0666) - if err != nil { + if err := ioutil.WriteFile(dir+"/config.yaml", []byte(config), 0666); err != nil { return nil, errors.Wrap(err, "creating alertmanager config file failed") } - commands = append(commands, exec.Command(testutil.AlertmanagerBinary(), + return []*exec.Cmd{exec.Command(testutil.AlertmanagerBinary(), "--config.file", dir+"/config.yaml", "--web.listen-address", "127.0.0.1:29093", "--log.level", "debug", - )) + )}, nil + } +} + +func ruler(i int, rules string) (cmdScheduleFunc, string) { + return func(workDir string, clusterPeerFlags []string) ([]*exec.Cmd, error) { + dbDir := fmt.Sprintf("%s/data/rule%d", workDir, i) + + if err := os.MkdirAll(dbDir, 0777); err != nil { + return nil, errors.Wrap(err, "creating ruler dir failed") + } + err := ioutil.WriteFile(dbDir+"/rules.yaml", []byte(rules), 0666) + if err != nil { + return nil, errors.Wrap(err, "creating ruler file failed") + } + + args := append(defaultRulerFlags(i, dbDir), + "--cluster.advertise-address", rulerCluster(i), + "--cluster.gossip-interval", "200ms", + "--cluster.pushpull-interval", "200ms") + args = append(args, clusterPeerFlags...) + + return []*exec.Cmd{exec.Command("thanos", args...)}, nil + }, rulerCluster(i) +} + +func rulerWithQueryFlags(i int, rules string, queryAddresses ...string) (cmdScheduleFunc, string) { + return func(workDir string, clusterPeerFlags []string) ([]*exec.Cmd, error) { + dbDir := fmt.Sprintf("%s/data/rule%d", workDir, i) + + if err := os.MkdirAll(dbDir, 0777); err != nil { + return nil, errors.Wrap(err, "creating ruler dir failed") + } + err := ioutil.WriteFile(dbDir+"/rules.yaml", []byte(rules), 0666) + if err != nil { + return nil, errors.Wrap(err, "creating ruler file failed") + } + + args := defaultRulerFlags(i, dbDir) + + for _, addr := range queryAddresses { + args = append(args, "--query", addr) + } + + return []*exec.Cmd{exec.Command("thanos", args...)}, nil + }, "" +} + +func rulerWithFileSD(i int, rules string, queryAddresses ...string) (cmdScheduleFunc, string) { + return func(workDir string, clusterPeerFlags []string) ([]*exec.Cmd, error) { + dbDir := fmt.Sprintf("%s/data/rule%d", workDir, i) + + if err := os.MkdirAll(dbDir, 0777); err != nil { + return nil, errors.Wrap(err, "creating ruler dir failed") + } + err := ioutil.WriteFile(dbDir+"/rules.yaml", []byte(rules), 0666) + if err != nil { + return nil, errors.Wrap(err, "creating ruler file failed") + } + + ruleFileSDDir := fmt.Sprintf("%s/data/ruleFileSd%d", workDir, i) + if err := os.MkdirAll(ruleFileSDDir, 0777); err != nil { + return nil, errors.Wrap(err, "create ruler filesd dir failed") + } + + if err := ioutil.WriteFile(ruleFileSDDir+"/filesd.json", []byte(generateFileSD(queryAddresses)), 0666); err != nil { + return nil, errors.Wrap(err, "creating ruler filesd config failed") + } + + args := append(defaultRulerFlags(i, dbDir), + "--query.sd-files", path.Join(ruleFileSDDir, "filesd.json"), + "--query.sd-interval", "5s") + + return []*exec.Cmd{exec.Command("thanos", args...)}, nil + }, "" +} + +func minio(accessKey string, secretKey string) cmdScheduleFunc { + return func(workDir string, clusterPeerFlags []string) ([]*exec.Cmd, error) { + dbDir := fmt.Sprintf("%s/data/minio", workDir) + + if err := os.MkdirAll(dbDir, 0777); err != nil { + return nil, errors.Wrap(err, "creating minio dir failed") + } + + cmd := exec.Command(testutil.MinioBinary(), + "server", + "--address", minioHTTP(1), + dbDir, + ) + cmd.Env = append(os.Environ(), + fmt.Sprintf("MINIO_ACCESS_KEY=%s", accessKey), + fmt.Sprintf("MINIO_SECRET_KEY=%s", secretKey)) + + return []*exec.Cmd{cmd}, nil + } +} + +func (s *spinupSuite) WithPreStartedMinio(config s3.Config) *spinupSuite { + s.minioConfig = config + s.withPreStartedMinio = true + return s +} + +// NOTE: It is important to install Thanos before using this function to compile latest changes. +// This means that export GOCACHE=/unique/path is must have to avoid having this test cached. +func (s *spinupSuite) Exec(t testing.TB, ctx context.Context, testName string) (exit chan struct{}, err error) { + dir, err := ioutil.TempDir("", testName) + if err != nil { + return nil, err + } + + defer func() { + if err != nil { + if rerr := os.RemoveAll(dir); rerr != nil { + t.Log(rerr) + } + } + }() + + var minioExit chan struct{} + if s.withPreStartedMinio { + // Start minio before anything else. + // NewTestBucketFromConfig is responsible for healthchecking by creating a requested bucket in retry loop. + minioExit, err = newSpinupSuite(). + Add(minio(s.minioConfig.AccessKey, s.minioConfig.SecretKey), ""). + Exec(t, ctx, testName+"_minio") + if err != nil { + return nil, errors.Wrap(err, "start minio") + } + + ctx, cancel := context.WithCancel(ctx) + if err := runutil.Retry(time.Second, ctx.Done(), func() error { + select { + case <-minioExit: + cancel() + return nil + default: + } + + bkt, _, err := s3.NewTestBucketFromConfig(t, "eu-west1", s.minioConfig, false) + if err != nil { + return errors.Wrap(err, "create bkt client for minio healthcheck") + } + + return bkt.Close() + }); err != nil { + return nil, errors.Wrap(err, "minio not ready in time") + } } var g run.Group @@ -189,17 +361,39 @@ receivers: { ctx, cancel := context.WithCancel(ctx) g.Add(func() error { - <-ctx.Done() + if s.withPreStartedMinio { + select { + case <-ctx.Done(): + case <-minioExit: + } + } else { + <-ctx.Done() + } // This go routine will return only when: // 1) Any other process from group exited unexpectedly // 2) Global context will be cancelled. + // 3) Minio (if started) exited unexpectedly. return nil }, func(error) { cancel() + if err := os.RemoveAll(dir); err != nil { + t.Log(err) + } }) } + var commands []*exec.Cmd + + for _, cmdFunc := range s.cmdScheduleFuncs { + cmds, err := cmdFunc(dir, s.clusterPeerFlags) + if err != nil { + return nil, err + } + + commands = append(commands, cmds...) + } + // Run go routine for each command. for _, c := range commands { var stderr, stdout bytes.Buffer @@ -215,26 +409,73 @@ receivers: cmd := c g.Add(func() error { + id := fmt.Sprintf("%s %s", cmd.Path, cmd.Args[1]) + err := cmd.Wait() if stderr.Len() > 0 { - t.Logf("%s STDERR\n %s", cmd.Path, stderr.String()) + t.Logf("%s STDERR\n %s", id, stderr.String()) } if stdout.Len() > 0 { - t.Logf("%s STDOUT\n %s", cmd.Path, stdout.String()) + t.Logf("%s STDOUT\n %s", id, stdout.String()) } - return err + return errors.Wrap(err, id) }, func(error) { - _ = cmd.Process.Signal(syscall.SIGTERM) + // This's accepted scenario to kill a process immediately for sure and run tests as fast as possible. + _ = cmd.Process.Signal(syscall.SIGKILL) }) } - var exit = make(chan error, 1) + exit = make(chan struct{}) go func(g run.Group) { - exit <- g.Run() + if err := g.Run(); err != nil && ctx.Err() == nil { + t.Errorf("Some process exited unexpectedly: %v", err) + } + if minioExit != nil { + <-minioExit + } close(exit) }(g) return exit, nil } + +func generateFileSD(addresses []string) string { + conf := "[ { \"targets\": [" + for index, addr := range addresses { + conf += fmt.Sprintf("\"%s\"", addr) + if index+1 < len(addresses) { + conf += "," + } + } + conf += "] } ]" + return conf +} + +func defaultQuerierFlags(i int, replicaLabel string) []string { + return []string{ + "query", + "--debug.name", fmt.Sprintf("querier-%d", i), + "--grpc-address", queryGRPC(i), + "--http-address", queryHTTP(i), + "--log.level", "debug", + "--query.replica-label", replicaLabel, + "--cluster.address", queryCluster(i), + } +} + +func defaultRulerFlags(i int, dbDir string) []string { + return []string{"rule", + "--debug.name", fmt.Sprintf("rule-%d", i), + "--label", fmt.Sprintf(`replica="%d"`, i), + "--data-dir", dbDir, + "--rule-file", path.Join(dbDir, "*.yaml"), + "--eval-interval", "1s", + "--alertmanagers.url", "http://127.0.0.1:29093", + "--grpc-address", rulerGRPC(i), + "--http-address", rulerHTTP(i), + "--cluster.address", rulerCluster(i), + "--log.level", "debug", + } +} diff --git a/test/e2e/store_gateway_test.go b/test/e2e/store_gateway_test.go new file mode 100644 index 00000000000..01afc5d94a4 --- /dev/null +++ b/test/e2e/store_gateway_test.go @@ -0,0 +1,148 @@ +package e2e_test + +import ( + "context" + "io/ioutil" + "os" + "path" + "testing" + "time" + + "github.com/improbable-eng/thanos/pkg/runutil" + "github.com/pkg/errors" + "github.com/prometheus/common/model" + + "github.com/go-kit/kit/log" + "github.com/improbable-eng/thanos/pkg/objstore" + "github.com/prometheus/prometheus/pkg/timestamp" + "github.com/prometheus/tsdb/labels" + + "github.com/improbable-eng/thanos/pkg/objstore/client" + "github.com/improbable-eng/thanos/pkg/objstore/s3" + "github.com/improbable-eng/thanos/pkg/testutil" + "gopkg.in/yaml.v2" +) + +func TestStoreGatewayQuery(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + + s3Config := s3.Config{ + Bucket: "test-storegateway-query", + AccessKey: "abc", + SecretKey: "mightysecret", + Endpoint: minioHTTP(1), + Insecure: true, + } + + bucketConfig := client.BucketConfig{ + Type: client.S3, + Config: s3Config, + } + + config, err := yaml.Marshal(bucketConfig) + testutil.Ok(t, err) + + exit, err := newSpinupSuite(). + WithPreStartedMinio(s3Config). + Add(storeGateway(1, config), ""). + Add(querier(1, "replica", storeGatewayGRPC(1)), ""). + Exec(t, ctx, "test_store_gateway_query") + if err != nil { + t.Errorf("spinup failed: %v", err) + cancel() + return + } + + defer func() { + cancel() + <-exit + }() + + dir, err := ioutil.TempDir("", "test_store_gateway_query_local") + testutil.Ok(t, err) + defer func() { testutil.Ok(t, os.RemoveAll(dir)) }() + + series := []labels.Labels{ + labels.FromStrings("a", "1", "b", "2"), + } + extLset := labels.FromStrings("ext1", "value1", "replica", "1") + extLset2 := labels.FromStrings("ext1", "value1", "replica", "2") + + now := time.Now() + id1, err := testutil.CreateBlock(dir, series, 10, timestamp.FromTime(now), timestamp.FromTime(now.Add(2*time.Hour)), extLset, 0) + testutil.Ok(t, err) + + id2, err := testutil.CreateBlock(dir, series, 10, timestamp.FromTime(now), timestamp.FromTime(now.Add(2*time.Hour)), extLset2, 0) + testutil.Ok(t, err) + + l := log.NewLogfmtLogger(os.Stdout) + + bkt, err := s3.NewBucketWithConfig(l, s3Config, "test-feed") + testutil.Ok(t, err) + + testutil.Ok(t, objstore.UploadDir(ctx, l, bkt, path.Join(dir, id1.String()), id1.String())) + testutil.Ok(t, objstore.UploadDir(ctx, l, bkt, path.Join(dir, id2.String()), id2.String())) + + var res model.Vector + + // Try query without deduplication. + testutil.Ok(t, runutil.Retry(time.Second, ctx.Done(), func() error { + select { + case <-exit: + cancel() + return nil + default: + } + + var err error + res, err = queryPrometheus(ctx, "http://"+queryHTTP(1), time.Now(), "{a=\"1\"}", false) + if err != nil { + return err + } + if len(res) != 2 { + return errors.Errorf("unexpected result size %d", len(res)) + } + return nil + })) + + // In our model result are always sorted. + testutil.Equals(t, model.Metric{ + "a": "1", + "b": "2", + "ext1": "value1", + "replica": "1", + }, res[0].Metric) + testutil.Equals(t, model.Metric{ + "a": "1", + "b": "2", + "ext1": "value1", + "replica": "2", + }, res[1].Metric) + + // Try query with deduplication. + testutil.Ok(t, runutil.Retry(time.Second, ctx.Done(), func() error { + select { + case <-exit: + cancel() + return nil + default: + } + + var err error + res, err = queryPrometheus(ctx, "http://"+queryHTTP(1), time.Now(), "{a=\"1\"}", true) + if err != nil { + return err + } + if len(res) != 1 { + return errors.Errorf("unexpected result size %d", len(res)) + } + return nil + })) + + // In our model result are always sorted. + testutil.Equals(t, model.Metric{ + "a": "1", + "b": "2", + "ext1": "value1", + }, res[0].Metric) +}