Skip to content

Commit bc3aaab

Browse files
mjd95bwplotka
authored andcommitted
Compact: skip compaction for blocks with no samples (#904)
* skip compaction for blocks with no samples * update to actually delete the empty input blocks, and to correctly handle from bucket compactor * warn on error deleting empty block * use ULID instead of error for emptyBlockSentinel * don't use a global variable * full stop at end of comment * use boolean to indicate whether there is more compaction work * rename variables * fix test
1 parent b74717a commit bc3aaab

File tree

2 files changed

+78
-54
lines changed

2 files changed

+78
-54
lines changed

pkg/compact/compact.go

Lines changed: 74 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -513,23 +513,23 @@ func (cg *Group) Resolution() int64 {
513513

514514
// Compact plans and runs a single compaction against the group. The compacted result
515515
// is uploaded into the bucket the blocks were retrieved from.
516-
func (cg *Group) Compact(ctx context.Context, dir string, comp tsdb.Compactor) (ulid.ULID, error) {
516+
func (cg *Group) Compact(ctx context.Context, dir string, comp tsdb.Compactor) (bool, ulid.ULID, error) {
517517
subDir := filepath.Join(dir, cg.Key())
518518

519519
if err := os.RemoveAll(subDir); err != nil {
520-
return ulid.ULID{}, errors.Wrap(err, "clean compaction group dir")
520+
return false, ulid.ULID{}, errors.Wrap(err, "clean compaction group dir")
521521
}
522522
if err := os.MkdirAll(subDir, 0777); err != nil {
523-
return ulid.ULID{}, errors.Wrap(err, "create compaction group dir")
523+
return false, ulid.ULID{}, errors.Wrap(err, "create compaction group dir")
524524
}
525525

526-
compID, err := cg.compact(ctx, subDir, comp)
526+
shouldRerun, compID, err := cg.compact(ctx, subDir, comp)
527527
if err != nil {
528528
cg.compactionFailures.Inc()
529529
}
530530
cg.compactions.Inc()
531531

532-
return compID, err
532+
return shouldRerun, compID, err
533533
}
534534

535535
// Issue347Error is a type wrapper for errors that should invoke repair process for broken block.
@@ -688,35 +688,35 @@ func RepairIssue347(ctx context.Context, logger log.Logger, bkt objstore.Bucket,
688688
return nil
689689
}
690690

691-
func (cg *Group) compact(ctx context.Context, dir string, comp tsdb.Compactor) (compID ulid.ULID, err error) {
691+
func (cg *Group) compact(ctx context.Context, dir string, comp tsdb.Compactor) (shouldRerun bool, compID ulid.ULID, err error) {
692692
cg.mtx.Lock()
693693
defer cg.mtx.Unlock()
694694

695695
// Check for overlapped blocks.
696696
if err := cg.areBlocksOverlapping(nil); err != nil {
697-
return compID, halt(errors.Wrap(err, "pre compaction overlap check"))
697+
return false, ulid.ULID{}, halt(errors.Wrap(err, "pre compaction overlap check"))
698698
}
699699

700700
// Planning a compaction works purely based on the meta.json files in our future group's dir.
701701
// So we first dump all our memory block metas into the directory.
702702
for _, meta := range cg.blocks {
703703
bdir := filepath.Join(dir, meta.ULID.String())
704704
if err := os.MkdirAll(bdir, 0777); err != nil {
705-
return compID, errors.Wrap(err, "create planning block dir")
705+
return false, ulid.ULID{}, errors.Wrap(err, "create planning block dir")
706706
}
707707
if err := metadata.Write(cg.logger, bdir, meta); err != nil {
708-
return compID, errors.Wrap(err, "write planning meta file")
708+
return false, ulid.ULID{}, errors.Wrap(err, "write planning meta file")
709709
}
710710
}
711711

712712
// Plan against the written meta.json files.
713713
plan, err := comp.Plan(dir)
714714
if err != nil {
715-
return compID, errors.Wrap(err, "plan compaction")
715+
return false, ulid.ULID{}, errors.Wrap(err, "plan compaction")
716716
}
717717
if len(plan) == 0 {
718718
// Nothing to do.
719-
return compID, nil
719+
return false, ulid.ULID{}, nil
720720
}
721721

722722
// Due to #183 we verify that none of the blocks in the plan have overlapping sources.
@@ -729,45 +729,45 @@ func (cg *Group) compact(ctx context.Context, dir string, comp tsdb.Compactor) (
729729
for _, pdir := range plan {
730730
meta, err := metadata.Read(pdir)
731731
if err != nil {
732-
return compID, errors.Wrapf(err, "read meta from %s", pdir)
732+
return false, ulid.ULID{}, errors.Wrapf(err, "read meta from %s", pdir)
733733
}
734734

735735
if cg.Key() != GroupKey(*meta) {
736-
return compID, halt(errors.Wrapf(err, "compact planned compaction for mixed groups. group: %s, planned block's group: %s", cg.Key(), GroupKey(*meta)))
736+
return false, ulid.ULID{}, halt(errors.Wrapf(err, "compact planned compaction for mixed groups. group: %s, planned block's group: %s", cg.Key(), GroupKey(*meta)))
737737
}
738738

739739
for _, s := range meta.Compaction.Sources {
740740
if _, ok := uniqueSources[s]; ok {
741-
return compID, halt(errors.Errorf("overlapping sources detected for plan %v", plan))
741+
return false, ulid.ULID{}, halt(errors.Errorf("overlapping sources detected for plan %v", plan))
742742
}
743743
uniqueSources[s] = struct{}{}
744744
}
745745

746746
id, err := ulid.Parse(filepath.Base(pdir))
747747
if err != nil {
748-
return compID, errors.Wrapf(err, "plan dir %s", pdir)
748+
return false, ulid.ULID{}, errors.Wrapf(err, "plan dir %s", pdir)
749749
}
750750

751751
if meta.ULID.Compare(id) != 0 {
752-
return compID, errors.Errorf("mismatch between meta %s and dir %s", meta.ULID, id)
752+
return false, ulid.ULID{}, errors.Errorf("mismatch between meta %s and dir %s", meta.ULID, id)
753753
}
754754

755755
if err := block.Download(ctx, cg.logger, cg.bkt, id, pdir); err != nil {
756-
return compID, retry(errors.Wrapf(err, "download block %s", id))
756+
return false, ulid.ULID{}, retry(errors.Wrapf(err, "download block %s", id))
757757
}
758758

759759
// Ensure all input blocks are valid.
760760
stats, err := block.GatherIndexIssueStats(cg.logger, filepath.Join(pdir, block.IndexFilename), meta.MinTime, meta.MaxTime)
761761
if err != nil {
762-
return compID, errors.Wrapf(err, "gather index issues for block %s", pdir)
762+
return false, ulid.ULID{}, errors.Wrapf(err, "gather index issues for block %s", pdir)
763763
}
764764

765765
if err := stats.CriticalErr(); err != nil {
766-
return compID, halt(errors.Wrapf(err, "block with not healthy index found %s; Compaction level %v; Labels: %v", pdir, meta.Compaction.Level, meta.Thanos.Labels))
766+
return false, ulid.ULID{}, halt(errors.Wrapf(err, "block with not healthy index found %s; Compaction level %v; Labels: %v", pdir, meta.Compaction.Level, meta.Thanos.Labels))
767767
}
768768

769769
if err := stats.Issue347OutsideChunksErr(); err != nil {
770-
return compID, issue347Error(errors.Wrapf(err, "invalid, but reparable block %s", pdir), meta.ULID)
770+
return false, ulid.ULID{}, issue347Error(errors.Wrapf(err, "invalid, but reparable block %s", pdir), meta.ULID)
771771
}
772772
}
773773
level.Debug(cg.logger).Log("msg", "downloaded and verified blocks",
@@ -777,7 +777,25 @@ func (cg *Group) compact(ctx context.Context, dir string, comp tsdb.Compactor) (
777777

778778
compID, err = comp.Compact(dir, plan, nil)
779779
if err != nil {
780-
return compID, halt(errors.Wrapf(err, "compact blocks %v", plan))
780+
return false, ulid.ULID{}, halt(errors.Wrapf(err, "compact blocks %v", plan))
781+
}
782+
if compID == (ulid.ULID{}) {
783+
// Prometheus compactor found that the compacted block would have no samples.
784+
level.Info(cg.logger).Log("msg", "compacted block would have no samples, deleting source blocks", "blocks", fmt.Sprintf("%v", plan))
785+
for _, block := range plan {
786+
meta, err := metadata.Read(block)
787+
if err != nil {
788+
level.Warn(cg.logger).Log("msg", "failed to read meta for block", "block", block)
789+
continue
790+
}
791+
if meta.Stats.NumSamples == 0 {
792+
if err := cg.deleteBlock(block); err != nil {
793+
level.Warn(cg.logger).Log("msg", "failed to delete empty block found during compaction", "block", block)
794+
}
795+
}
796+
}
797+
// Even though this block was empty, there may be more work to do
798+
return true, ulid.ULID{}, nil
781799
}
782800
level.Debug(cg.logger).Log("msg", "compacted blocks",
783801
"blocks", fmt.Sprintf("%v", plan), "duration", time.Since(begin))
@@ -790,55 +808,61 @@ func (cg *Group) compact(ctx context.Context, dir string, comp tsdb.Compactor) (
790808
Source: metadata.CompactorSource,
791809
}, nil)
792810
if err != nil {
793-
return compID, errors.Wrapf(err, "failed to finalize the block %s", bdir)
811+
return false, ulid.ULID{}, errors.Wrapf(err, "failed to finalize the block %s", bdir)
794812
}
795813

796814
if err = os.Remove(filepath.Join(bdir, "tombstones")); err != nil {
797-
return compID, errors.Wrap(err, "remove tombstones")
815+
return false, ulid.ULID{}, errors.Wrap(err, "remove tombstones")
798816
}
799817

800818
// Ensure the output block is valid.
801819
if err := block.VerifyIndex(cg.logger, filepath.Join(bdir, block.IndexFilename), newMeta.MinTime, newMeta.MaxTime); err != nil {
802-
return compID, halt(errors.Wrapf(err, "invalid result block %s", bdir))
820+
return false, ulid.ULID{}, halt(errors.Wrapf(err, "invalid result block %s", bdir))
803821
}
804822

805823
// Ensure the output block is not overlapping with anything else.
806824
if err := cg.areBlocksOverlapping(newMeta, plan...); err != nil {
807-
return compID, halt(errors.Wrapf(err, "resulted compacted block %s overlaps with something", bdir))
825+
return false, ulid.ULID{}, halt(errors.Wrapf(err, "resulted compacted block %s overlaps with something", bdir))
808826
}
809827

810828
begin = time.Now()
811829

812830
if err := block.Upload(ctx, cg.logger, cg.bkt, bdir); err != nil {
813-
return compID, retry(errors.Wrapf(err, "upload of %s failed", compID))
831+
return false, ulid.ULID{}, retry(errors.Wrapf(err, "upload of %s failed", compID))
814832
}
815833
level.Debug(cg.logger).Log("msg", "uploaded block", "result_block", compID, "duration", time.Since(begin))
816834

817835
// Delete the blocks we just compacted from the group and bucket so they do not get included
818836
// into the next planning cycle.
819837
// Eventually the block we just uploaded should get synced into the group again (including sync-delay).
820838
for _, b := range plan {
821-
id, err := ulid.Parse(filepath.Base(b))
822-
if err != nil {
823-
return compID, errors.Wrapf(err, "plan dir %s", b)
839+
if err := cg.deleteBlock(b); err != nil {
840+
return false, ulid.ULID{}, retry(errors.Wrapf(err, "delete old block from bucket"))
824841
}
842+
cg.groupGarbageCollectedBlocks.Inc()
843+
}
825844

826-
if err := os.RemoveAll(b); err != nil {
827-
return compID, errors.Wrapf(err, "remove old block dir %s", id)
828-
}
845+
return true, compID, nil
846+
}
829847

830-
// Spawn a new context so we always delete a block in full on shutdown.
831-
delCtx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
832-
level.Info(cg.logger).Log("msg", "deleting compacted block", "old_block", id, "result_block", compID)
833-
err = block.Delete(delCtx, cg.bkt, id)
834-
cancel()
835-
if err != nil {
836-
return compID, retry(errors.Wrapf(err, "delete old block %s from bucket ", id))
837-
}
838-
cg.groupGarbageCollectedBlocks.Inc()
848+
func (cg *Group) deleteBlock(b string) error {
849+
id, err := ulid.Parse(filepath.Base(b))
850+
if err != nil {
851+
return errors.Wrapf(err, "plan dir %s", b)
852+
}
853+
854+
if err := os.RemoveAll(b); err != nil {
855+
return errors.Wrapf(err, "remove old block dir %s", id)
839856
}
840857

841-
return compID, nil
858+
// Spawn a new context so we always delete a block in full on shutdown.
859+
delCtx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
860+
defer cancel()
861+
level.Info(cg.logger).Log("msg", "deleting compacted block", "old_block", id)
862+
if err := block.Delete(delCtx, cg.bkt, id); err != nil {
863+
return errors.Wrapf(err, "delete block %s from bucket", id)
864+
}
865+
return nil
842866
}
843867

844868
// BucketCompactor compacts blocks in a bucket.
@@ -882,31 +906,31 @@ func (c *BucketCompactor) Compact(ctx context.Context) error {
882906
return errors.Wrap(err, "garbage")
883907
}
884908

909+
level.Info(c.logger).Log("msg", "start of compaction")
910+
885911
groups, err := c.sy.Groups()
886912
if err != nil {
887913
return errors.Wrap(err, "build compaction groups")
888914
}
889-
done := true
915+
finishedAllGroups := true
890916
for _, g := range groups {
891-
id, err := g.Compact(ctx, c.compactDir, c.comp)
917+
shouldRerunGroup, _, err := g.Compact(ctx, c.compactDir, c.comp)
892918
if err == nil {
893-
// If the returned ID has a zero value, the group had no blocks to be compacted.
894-
// We keep going through the outer loop until no group has any work left.
895-
if id != (ulid.ULID{}) {
896-
done = false
919+
if shouldRerunGroup {
920+
finishedAllGroups = false
897921
}
898922
continue
899923
}
900924

901925
if IsIssue347Error(err) {
902926
if err := RepairIssue347(ctx, c.logger, c.bkt, err); err == nil {
903-
done = false
927+
finishedAllGroups = false
904928
continue
905929
}
906930
}
907931
return errors.Wrap(err, "compaction")
908932
}
909-
if done {
933+
if finishedAllGroups {
910934
break
911935
}
912936
}

pkg/compact/compact_e2e_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -253,18 +253,18 @@ func TestGroup_Compact_e2e(t *testing.T) {
253253
comp, err := tsdb.NewLeveledCompactor(nil, log.NewLogfmtLogger(os.Stderr), []int64{1000, 3000}, nil)
254254
testutil.Ok(t, err)
255255

256-
id, err := g.Compact(ctx, dir, comp)
256+
shouldRerun, id, err := g.Compact(ctx, dir, comp)
257257
testutil.Ok(t, err)
258-
testutil.Assert(t, id == ulid.ULID{}, "group should be empty, but somehow compaction took place")
258+
testutil.Assert(t, !shouldRerun, "group should be empty, but compactor did a compaction and told us to rerun")
259259

260260
// Add all metas that would be gathered by syncMetas.
261261
for _, m := range metas {
262262
testutil.Ok(t, g.Add(m))
263263
}
264264

265-
id, err = g.Compact(ctx, dir, comp)
265+
shouldRerun, id, err = g.Compact(ctx, dir, comp)
266266
testutil.Ok(t, err)
267-
testutil.Assert(t, id != ulid.ULID{}, "no compaction took place")
267+
testutil.Assert(t, shouldRerun, "there should be compactible data, but the compactor reported there was not")
268268

269269
resDir := filepath.Join(dir, id.String())
270270
testutil.Ok(t, block.Download(ctx, log.NewNopLogger(), bkt, id, resDir))

0 commit comments

Comments
 (0)