Skip to content

Commit ce72bfe

Browse files
authored
compact: Added index size limiting planner detecting output index size over 64GB. (#3410)
* compact: Added index size limiting planner detecting output index size over 64GB. Fixes: #1424 Signed-off-by: Bartlomiej Plotka <[email protected]> * Addressed comments; added changelog. Signed-off-by: Bartlomiej Plotka <[email protected]> * Skipped flaky test. Signed-off-by: Bartlomiej Plotka <[email protected]>
1 parent 5ea9812 commit ce72bfe

File tree

11 files changed

+522
-40
lines changed

11 files changed

+522
-40
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,20 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
2626
- [#2979](https://github.com/thanos-io/thanos/pull/2979) Replicator: Add the ability to replicate blocks within a time frame by passing --min-time and --max-time
2727
- [#3398](https://github.com/thanos-io/thanos/pull/3398) Query Frontend: Add default config for query frontend memcached config.
2828
- [#3277](https://github.com/thanos-io/thanos/pull/3277) Thanos Query: Introduce dynamic lookback interval. This allows queries with large step to make use of downsampled data.
29+
- [#3409](https://github.com/thanos-io/thanos/pull/3409) Compactor: Added support for no-compact-mark.json which excludes the block from compaction.
2930

3031
### Fixed
32+
3133
- [#3257](https://github.com/thanos-io/thanos/pull/3257) Ruler: Prevent Ruler from crashing when using default DNS to lookup hosts that results in "No such hosts" errors.
3234
- [#3331](https://github.com/thanos-io/thanos/pull/3331) Disable Azure blob exception logging
3335
- [#3341](https://github.com/thanos-io/thanos/pull/3341) Disable Azure blob syslog exception logging
3436
- [#3414](https://github.com/thanos-io/thanos/pull/3414) Set CORS for Query Frontend
3537

3638
### Changed
3739

40+
- [#3410](https://github.com/thanos-io/thanos/pull/3410) Compactor: Changed metric `thanos_compactor_blocks_marked_for_deletion_total` to `thanos_compactor_blocks_marked_total` with `marker` label.
41+
Compactor will now automatically disable compaction for blocks with large index that would output blocks after compaction larger than specified value (by default: 64GB). This automatically
42+
handles the Promethus [format limit](https://github.com/thanos-io/thanos/issues/1424).
3843
- [#2906](https://github.com/thanos-io/thanos/pull/2906) Tools: Refactor Bucket replicate execution. Removed all `thanos_replicate_origin_.*` metrics.
3944
- `thanos_replicate_origin_meta_loads_total` can be replaced by `blocks_meta_synced{state="loaded"}`.
4045
- `thanos_replicate_origin_partial_meta_reads_total` can be replaced by `blocks_meta_synced{state="failed"}`.

cmd/thanos/compact.go

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"sync"
1414
"time"
1515

16+
"github.com/alecthomas/units"
1617
"github.com/go-kit/kit/log"
1718
"github.com/go-kit/kit/log/level"
1819
"github.com/oklog/run"
@@ -124,10 +125,13 @@ func runCompact(
124125
Name: "thanos_compactor_block_cleanup_failures_total",
125126
Help: "Failures encountered while deleting blocks in compactor.",
126127
})
127-
blocksMarkedForDeletion := promauto.With(reg).NewCounter(prometheus.CounterOpts{
128-
Name: "thanos_compactor_blocks_marked_for_deletion_total",
129-
Help: "Total number of blocks marked for deletion in compactor.",
130-
})
128+
blocksMarked := promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
129+
Name: "thanos_compactor_blocks_marked_total",
130+
Help: "Total number of blocks marked in compactor.",
131+
}, []string{"marker"})
132+
blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename)
133+
blocksMarked.WithLabelValues(metadata.DeletionMarkFilename)
134+
131135
garbageCollectedBlocks := promauto.With(reg).NewCounter(prometheus.CounterOpts{
132136
Name: "thanos_compact_garbage_collected_blocks_total",
133137
Help: "Total number of blocks marked for deletion by compactor.",
@@ -244,7 +248,7 @@ func runCompact(
244248
cf,
245249
duplicateBlocksFilter,
246250
ignoreDeletionMarkFilter,
247-
blocksMarkedForDeletion,
251+
blocksMarked.WithLabelValues(metadata.DeletionMarkFilename),
248252
garbageCollectedBlocks,
249253
conf.blockSyncConcurrency)
250254
if err != nil {
@@ -280,9 +284,31 @@ func runCompact(
280284
return errors.Wrap(err, "clean working downsample directory")
281285
}
282286

283-
grouper := compact.NewDefaultGrouper(logger, bkt, conf.acceptMalformedIndex, enableVerticalCompaction, reg, blocksMarkedForDeletion, garbageCollectedBlocks)
287+
grouper := compact.NewDefaultGrouper(
288+
logger,
289+
bkt,
290+
conf.acceptMalformedIndex,
291+
enableVerticalCompaction,
292+
reg,
293+
blocksMarked.WithLabelValues(metadata.DeletionMarkFilename),
294+
garbageCollectedBlocks,
295+
)
284296
blocksCleaner := compact.NewBlocksCleaner(logger, bkt, ignoreDeletionMarkFilter, deleteDelay, blocksCleaned, blockCleanupFailures)
285-
compactor, err := compact.NewBucketCompactor(logger, sy, grouper, compact.NewPlanner(logger, levels, noCompactMarkerFilter), comp, compactDir, bkt, conf.compactionConcurrency)
297+
compactor, err := compact.NewBucketCompactor(
298+
logger,
299+
sy,
300+
grouper,
301+
compact.WithLargeTotalIndexSizeFilter(
302+
compact.NewPlanner(logger, levels, noCompactMarkerFilter),
303+
bkt,
304+
int64(conf.maxBlockIndexSize),
305+
blocksMarked.WithLabelValues(metadata.NoCompactMarkFilename),
306+
),
307+
comp,
308+
compactDir,
309+
bkt,
310+
conf.compactionConcurrency,
311+
)
286312
if err != nil {
287313
cancel()
288314
return errors.Wrap(err, "create bucket compactor")
@@ -373,7 +399,7 @@ func runCompact(
373399
return errors.Wrap(err, "sync before first pass of downsampling")
374400
}
375401

376-
if err := compact.ApplyRetentionPolicyByResolution(ctx, logger, bkt, sy.Metas(), retentionByResolution, blocksMarkedForDeletion); err != nil {
402+
if err := compact.ApplyRetentionPolicyByResolution(ctx, logger, bkt, sy.Metas(), retentionByResolution, blocksMarked.WithLabelValues(metadata.DeletionMarkFilename)); err != nil {
377403
return errors.Wrap(err, "retention failed")
378404
}
379405

@@ -512,6 +538,7 @@ type compactConfig struct {
512538
selectorRelabelConf extflag.PathOrContent
513539
webConf webConfig
514540
label string
541+
maxBlockIndexSize units.Base2Bytes
515542
}
516543

517544
func (cc *compactConfig) registerFlag(cmd extkingpin.FlagClause) {
@@ -574,6 +601,13 @@ func (cc *compactConfig) registerFlag(cmd extkingpin.FlagClause) {
574601
"This works well for deduplication of blocks with **precisely the same samples** like produced by Receiver replication.").
575602
Hidden().StringsVar(&cc.dedupReplicaLabels)
576603

604+
// TODO(bwplotka): This is short term fix for https://github.com/thanos-io/thanos/issues/1424, replace with vertical block sharding https://github.com/thanos-io/thanos/pull/3390.
605+
cmd.Flag("compact.block-max-index-size", "Maximum index size for the resulted block during any compaction. Note that"+
606+
"total size is approximated in worst case. If the block that would be resulted from compaction is estimated to exceed this number, biggest source"+
607+
"block is marked for no compaction (no-compact-mark.json is uploaded) which causes this block to be excluded from any compaction. "+
608+
"Default is due to https://github.com/thanos-io/thanos/issues/1424, but it's overall recommended to keeps block size to some reasonable size.").
609+
Hidden().Default("64GB").BytesVar(&cc.maxBlockIndexSize)
610+
577611
cc.selectorRelabelConf = *extkingpin.RegisterSelectorRelabelFlags(cmd)
578612

579613
cc.webConf.registerFlag(cmd)

pkg/block/block.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,3 +284,35 @@ func gatherFileStats(blockDir string) (res []metadata.File, _ error) {
284284
// TODO(bwplotka): Add optional files like tombstones?
285285
return res, err
286286
}
287+
288+
// MarkForNoCompact creates a file which marks block to be not compacted.
289+
func MarkForNoCompact(ctx context.Context, logger log.Logger, bkt objstore.Bucket, id ulid.ULID, reason metadata.NoCompactReason, noCompactDetails string, markedForNoCompact prometheus.Counter) error {
290+
m := path.Join(id.String(), metadata.NoCompactMarkFilename)
291+
noCompactMarkExists, err := bkt.Exists(ctx, m)
292+
if err != nil {
293+
return errors.Wrapf(err, "check exists %s in bucket", m)
294+
}
295+
if noCompactMarkExists {
296+
level.Warn(logger).Log("msg", "requested to mark for no compaction, but file already exists; this should not happen; investigate", "err", errors.Errorf("file %s already exists in bucket", m))
297+
return nil
298+
}
299+
300+
noCompactMark, err := json.Marshal(metadata.NoCompactMark{
301+
ID: id,
302+
Version: metadata.NoCompactMarkVersion1,
303+
304+
Time: time.Now().Unix(),
305+
Reason: reason,
306+
Details: noCompactDetails,
307+
})
308+
if err != nil {
309+
return errors.Wrap(err, "json encode no compact mark")
310+
}
311+
312+
if err := bkt.Upload(ctx, m, bytes.NewBuffer(noCompactMark)); err != nil {
313+
return errors.Wrapf(err, "upload file %s to bucket", m)
314+
}
315+
markedForNoCompact.Inc()
316+
level.Info(logger).Log("msg", "block has been marked for no compaction", "block", id)
317+
return nil
318+
}

pkg/block/block_test.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,3 +311,59 @@ func TestMarkForDeletion(t *testing.T) {
311311
})
312312
}
313313
}
314+
315+
func TestMarkForNoCompact(t *testing.T) {
316+
defer testutil.TolerantVerifyLeak(t)
317+
ctx := context.Background()
318+
319+
tmpDir, err := ioutil.TempDir("", "test-block-mark-for-no-compact")
320+
testutil.Ok(t, err)
321+
defer func() { testutil.Ok(t, os.RemoveAll(tmpDir)) }()
322+
323+
for _, tcase := range []struct {
324+
name string
325+
preUpload func(t testing.TB, id ulid.ULID, bkt objstore.Bucket)
326+
327+
blocksMarked int
328+
}{
329+
{
330+
name: "block marked",
331+
preUpload: func(t testing.TB, id ulid.ULID, bkt objstore.Bucket) {},
332+
blocksMarked: 1,
333+
},
334+
{
335+
name: "block with no-compact mark already, expected log and no metric increment",
336+
preUpload: func(t testing.TB, id ulid.ULID, bkt objstore.Bucket) {
337+
m, err := json.Marshal(metadata.NoCompactMark{
338+
ID: id,
339+
Time: time.Now().Unix(),
340+
Version: metadata.NoCompactMarkVersion1,
341+
})
342+
testutil.Ok(t, err)
343+
testutil.Ok(t, bkt.Upload(ctx, path.Join(id.String(), metadata.NoCompactMarkFilename), bytes.NewReader(m)))
344+
},
345+
blocksMarked: 0,
346+
},
347+
} {
348+
t.Run(tcase.name, func(t *testing.T) {
349+
bkt := objstore.NewInMemBucket()
350+
id, err := e2eutil.CreateBlock(ctx, tmpDir, []labels.Labels{
351+
{{Name: "a", Value: "1"}},
352+
{{Name: "a", Value: "2"}},
353+
{{Name: "a", Value: "3"}},
354+
{{Name: "a", Value: "4"}},
355+
{{Name: "b", Value: "1"}},
356+
}, 100, 0, 1000, labels.Labels{{Name: "ext1", Value: "val1"}}, 124)
357+
testutil.Ok(t, err)
358+
359+
tcase.preUpload(t, id, bkt)
360+
361+
testutil.Ok(t, Upload(ctx, log.NewNopLogger(), bkt, path.Join(tmpDir, id.String())))
362+
363+
c := promauto.With(nil).NewCounter(prometheus.CounterOpts{})
364+
err = MarkForNoCompact(ctx, log.NewNopLogger(), bkt, id, metadata.ManualNoCompactReason, "", c)
365+
testutil.Ok(t, err)
366+
testutil.Equals(t, float64(tcase.blocksMarked), promtest.ToFloat64(c))
367+
})
368+
}
369+
}

pkg/block/metadata/markers.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ type NoCompactMark struct {
7474
// Version of the file.
7575
Version int `json:"version"`
7676

77+
// Time is a unix timestamp of when the block was marked for no compact.
78+
Time int64 `json:"time"`
7779
Reason NoCompactReason `json:"reason"`
7880
// Details is a human readable string giving details of reason.
7981
Details string `json:"details"`

pkg/compact/planner.go

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,17 @@ package compact
55

66
import (
77
"context"
8+
"fmt"
9+
"math"
10+
"path/filepath"
811

912
"github.com/go-kit/kit/log"
1013
"github.com/oklog/ulid"
14+
"github.com/pkg/errors"
15+
"github.com/prometheus/client_golang/prometheus"
16+
"github.com/thanos-io/thanos/pkg/block"
1117
"github.com/thanos-io/thanos/pkg/block/metadata"
18+
"github.com/thanos-io/thanos/pkg/objstore"
1219
)
1320

1421
type tsdbBasedPlanner struct {
@@ -42,7 +49,10 @@ func NewPlanner(logger log.Logger, ranges []int64, noCompBlocks *GatherNoCompact
4249

4350
// TODO(bwplotka): Consider smarter algorithm, this prefers smaller iterative compactions vs big single one: https://github.com/thanos-io/thanos/issues/3405
4451
func (p *tsdbBasedPlanner) Plan(_ context.Context, metasByMinTime []*metadata.Meta) ([]*metadata.Meta, error) {
45-
noCompactMarked := p.noCompBlocksFunc()
52+
return p.plan(p.noCompBlocksFunc(), metasByMinTime)
53+
}
54+
55+
func (p *tsdbBasedPlanner) plan(noCompactMarked map[ulid.ULID]*metadata.NoCompactMark, metasByMinTime []*metadata.Meta) ([]*metadata.Meta, error) {
4656
notExcludedMetasByMinTime := make([]*metadata.Meta, 0, len(metasByMinTime))
4757
for _, meta := range metasByMinTime {
4858
if _, excluded := noCompactMarked[meta.ULID]; excluded {
@@ -212,3 +222,82 @@ func splitByRange(metasByMinTime []*metadata.Meta, tr int64) [][]*metadata.Meta
212222

213223
return splitDirs
214224
}
225+
226+
type largeTotalIndexSizeFilter struct {
227+
*tsdbBasedPlanner
228+
229+
bkt objstore.Bucket
230+
markedForNoCompact prometheus.Counter
231+
totalMaxIndexSizeBytes int64
232+
}
233+
234+
var _ Planner = &largeTotalIndexSizeFilter{}
235+
236+
// WithLargeTotalIndexSizeFilter wraps Planner with largeTotalIndexSizeFilter that checks the given plans and estimates total index size.
237+
// When found, it marks block for no compaction by placing no-compact.json and updating cache.
238+
// NOTE: The estimation is very rough as it assumes extreme cases of indexes sharing no bytes, thus summing all source index sizes.
239+
// Adjust limit accordingly reducing to some % of actual limit you want to give.
240+
// TODO(bwplotka): This is short term fix for https://github.com/thanos-io/thanos/issues/1424, replace with vertical block sharding https://github.com/thanos-io/thanos/pull/3390.
241+
func WithLargeTotalIndexSizeFilter(with *tsdbBasedPlanner, bkt objstore.Bucket, totalMaxIndexSizeBytes int64, markedForNoCompact prometheus.Counter) *largeTotalIndexSizeFilter {
242+
return &largeTotalIndexSizeFilter{tsdbBasedPlanner: with, bkt: bkt, totalMaxIndexSizeBytes: totalMaxIndexSizeBytes, markedForNoCompact: markedForNoCompact}
243+
}
244+
245+
func (t *largeTotalIndexSizeFilter) Plan(ctx context.Context, metasByMinTime []*metadata.Meta) ([]*metadata.Meta, error) {
246+
noCompactMarked := t.noCompBlocksFunc()
247+
copiedNoCompactMarked := make(map[ulid.ULID]*metadata.NoCompactMark, len(noCompactMarked))
248+
for k, v := range noCompactMarked {
249+
copiedNoCompactMarked[k] = v
250+
}
251+
252+
PlanLoop:
253+
for {
254+
plan, err := t.plan(copiedNoCompactMarked, metasByMinTime)
255+
if err != nil {
256+
return nil, err
257+
}
258+
var totalIndexBytes, maxIndexSize int64 = 0, math.MinInt64
259+
var biggestIndex int
260+
for i, p := range plan {
261+
indexSize := int64(-1)
262+
for _, f := range p.Thanos.Files {
263+
if f.RelPath == block.IndexFilename {
264+
indexSize = f.SizeBytes
265+
}
266+
}
267+
if indexSize <= 0 {
268+
// Get size from bkt instead.
269+
attr, err := t.bkt.Attributes(ctx, filepath.Join(p.ULID.String(), block.IndexFilename))
270+
if err != nil {
271+
return nil, errors.Wrapf(err, "get attr of %v", filepath.Join(p.ULID.String(), block.IndexFilename))
272+
}
273+
indexSize = attr.Size
274+
}
275+
276+
if maxIndexSize < indexSize {
277+
maxIndexSize = indexSize
278+
biggestIndex = i
279+
}
280+
totalIndexBytes += indexSize
281+
if totalIndexBytes >= t.totalMaxIndexSizeBytes {
282+
// Marking blocks for no compact to limit size.
283+
// TODO(bwplotka): Make sure to reset cache once this is done: https://github.com/thanos-io/thanos/issues/3408
284+
if err := block.MarkForNoCompact(
285+
ctx,
286+
t.logger,
287+
t.bkt,
288+
plan[biggestIndex].ULID,
289+
metadata.IndexSizeExceedingNoCompactReason,
290+
fmt.Sprintf("largeTotalIndexSizeFilter: Total compacted block's index size could exceed: %v with this block. See https://github.com/thanos-io/thanos/issues/1424", t.totalMaxIndexSizeBytes),
291+
t.markedForNoCompact,
292+
); err != nil {
293+
return nil, errors.Wrapf(err, "mark %v for no compaction", plan[biggestIndex].ULID.String())
294+
}
295+
// Make sure wrapped planner exclude this block.
296+
copiedNoCompactMarked[plan[biggestIndex].ULID] = &metadata.NoCompactMark{ID: plan[biggestIndex].ULID, Version: metadata.NoCompactMarkVersion1}
297+
continue PlanLoop
298+
}
299+
}
300+
// Planned blocks should not exceed limit.
301+
return plan, nil
302+
}
303+
}

0 commit comments

Comments
 (0)