Skip to content

Commit b8c9dcf

Browse files
jjneelybwplotka
authored andcommitted
bucket verify: repair out of order labels (#964)
* bucket verify: repair out of order labels * verify repair: correctly order series in the index on rewrite When we have label sets that are not in the correct order, fixing that changes the order of the series in the index. So the index must be rewritten in that new order. This makes this repair tool take up a bunch more memory, but produces blocks that verify correctly. * Fix the TSDB block safe-delete function The directory name must be the block ID name exactly to verify. A temp directory or random name will not work here. * verify repair: fix duplicate chunk detection Pointer/reference logic error was eliminating all chunks for a series in a given TSDB block that wasn't the first chunk. Chunks are now referenced correctly via pointers. * PR feedback: use errors.Errorf() instead of fmt.Errorf() * Use errors.New() Some linters catch errors.Errorf() as its not really part of the errors package. * Liberally comment this for loop We're comparing items by pointers, using Go's range variables is misleading here and we need not fall into the same trap. * Take advantage of sort.Interface This prevents us from having to re-implement label sorting. * PR Feedback: Comments are full sentences.
1 parent d436a04 commit b8c9dcf

File tree

3 files changed

+57
-23
lines changed

3 files changed

+57
-23
lines changed

cmd/thanos/bucket.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,11 @@ func registerBucketVerify(m map[string]setupFunc, root *kingpin.CmdClause, name
8585
var backupBkt objstore.Bucket
8686
if len(backupconfContentYaml) == 0 {
8787
if *repair {
88-
return errors.Wrap(err, "repair is specified, so backup client is required")
88+
return errors.New("repair is specified, so backup client is required")
8989
}
9090
} else {
91-
backupBkt, err = client.NewBucket(logger, backupconfContentYaml, reg, name)
91+
// nil Prometheus registerer: don't create conflicting metrics
92+
backupBkt, err = client.NewBucket(logger, backupconfContentYaml, nil, name)
9293
if err != nil {
9394
return err
9495
}

pkg/block/index.go

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,7 @@ func IgnoreDuplicateOutsideChunk(_ int64, _ int64, last *chunks.Meta, curr *chun
535535
// the current one.
536536
if curr.MinTime != last.MinTime || curr.MaxTime != last.MaxTime {
537537
return false, errors.Errorf("non-sequential chunks not equal: [%d, %d] and [%d, %d]",
538-
last.MaxTime, last.MaxTime, curr.MinTime, curr.MaxTime)
538+
last.MinTime, last.MaxTime, curr.MinTime, curr.MaxTime)
539539
}
540540
ca := crc32.Checksum(last.Chunk.Bytes(), castagnoli)
541541
cb := crc32.Checksum(curr.Chunk.Bytes(), castagnoli)
@@ -563,9 +563,14 @@ func sanitizeChunkSequence(chks []chunks.Meta, mint int64, maxt int64, ignoreChk
563563
var last *chunks.Meta
564564

565565
OUTER:
566-
for _, c := range chks {
566+
// This compares the current chunk to the chunk from the last iteration
567+
// by pointers. If we use "i, c := range chks" the variable c is a new
568+
// variable who's address doesn't change through the entire loop.
569+
// The current element of the chks slice is copied into it. We must take
570+
// the address of the indexed slice instead.
571+
for i := range chks {
567572
for _, ignoreChkFn := range ignoreChkFns {
568-
ignore, err := ignoreChkFn(mint, maxt, last, &c)
573+
ignore, err := ignoreChkFn(mint, maxt, last, &chks[i])
569574
if err != nil {
570575
return nil, errors.Wrap(err, "ignore function")
571576
}
@@ -575,13 +580,18 @@ OUTER:
575580
}
576581
}
577582

578-
last = &c
579-
repl = append(repl, c)
583+
last = &chks[i]
584+
repl = append(repl, chks[i])
580585
}
581586

582587
return repl, nil
583588
}
584589

590+
type seriesRepair struct {
591+
lset labels.Labels
592+
chks []chunks.Meta
593+
}
594+
585595
// rewrite writes all data from the readers back into the writers while cleaning
586596
// up mis-ordered and duplicated chunks.
587597
func rewrite(
@@ -609,17 +619,20 @@ func rewrite(
609619
postings = index.NewMemPostings()
610620
values = map[string]stringset{}
611621
i = uint64(0)
622+
series = []seriesRepair{}
612623
)
613624

614-
var lset labels.Labels
615-
var chks []chunks.Meta
616-
617625
for all.Next() {
626+
var lset labels.Labels
627+
var chks []chunks.Meta
618628
id := all.At()
619629

620630
if err := indexr.Series(id, &lset, &chks); err != nil {
621631
return err
622632
}
633+
// Make sure labels are in sorted order.
634+
sort.Sort(lset)
635+
623636
for i, c := range chks {
624637
chks[i].Chunk, err = chunkr.Chunk(c.Ref)
625638
if err != nil {
@@ -636,34 +649,49 @@ func rewrite(
636649
continue
637650
}
638651

639-
if err := chunkw.WriteChunks(chks...); err != nil {
652+
series = append(series, seriesRepair{
653+
lset: lset,
654+
chks: chks,
655+
})
656+
}
657+
658+
if all.Err() != nil {
659+
return errors.Wrap(all.Err(), "iterate series")
660+
}
661+
662+
// Sort the series, if labels are re-ordered then the ordering of series
663+
// will be different.
664+
sort.Slice(series, func(i, j int) bool {
665+
return labels.Compare(series[i].lset, series[j].lset) < 0
666+
})
667+
668+
// Build a new TSDB block.
669+
for _, s := range series {
670+
if err := chunkw.WriteChunks(s.chks...); err != nil {
640671
return errors.Wrap(err, "write chunks")
641672
}
642-
if err := indexw.AddSeries(i, lset, chks...); err != nil {
673+
if err := indexw.AddSeries(i, s.lset, s.chks...); err != nil {
643674
return errors.Wrap(err, "add series")
644675
}
645676

646-
meta.Stats.NumChunks += uint64(len(chks))
677+
meta.Stats.NumChunks += uint64(len(s.chks))
647678
meta.Stats.NumSeries++
648679

649-
for _, chk := range chks {
680+
for _, chk := range s.chks {
650681
meta.Stats.NumSamples += uint64(chk.Chunk.NumSamples())
651682
}
652683

653-
for _, l := range lset {
684+
for _, l := range s.lset {
654685
valset, ok := values[l.Name]
655686
if !ok {
656687
valset = stringset{}
657688
values[l.Name] = valset
658689
}
659690
valset.set(l.Value)
660691
}
661-
postings.Add(i, lset)
692+
postings.Add(i, s.lset)
662693
i++
663694
}
664-
if all.Err() != nil {
665-
return errors.Wrap(all.Err(), "iterate series")
666-
}
667695

668696
s := make([]string, 0, 256)
669697
for n, v := range values {

pkg/verifier/safe_delete.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ package verifier
22

33
import (
44
"context"
5-
"fmt"
65
"io/ioutil"
76
"os"
7+
"path/filepath"
88

99
"github.com/go-kit/kit/log"
1010
"github.com/go-kit/kit/log/level"
@@ -31,13 +31,18 @@ func SafeDelete(ctx context.Context, logger log.Logger, bkt objstore.Bucket, bac
3131
return errors.Errorf("%s dir seems to exists in backup bucket. Remove this block manually if you are sure it is safe to do", id)
3232
}
3333

34-
dir, err := ioutil.TempDir("", fmt.Sprintf("safe-delete-%s", id))
34+
tempdir, err := ioutil.TempDir("", "safe-delete")
35+
if err != nil {
36+
return err
37+
}
38+
dir := filepath.Join(tempdir, id.String())
39+
err = os.Mkdir(dir, 0755)
3540
if err != nil {
3641
return err
3742
}
3843
defer func() {
39-
if err := os.RemoveAll(dir); err != nil {
40-
level.Warn(logger).Log("msg", "failed to delete dir", "dir", dir, "err", err)
44+
if err := os.RemoveAll(tempdir); err != nil {
45+
level.Warn(logger).Log("msg", "failed to delete dir", "dir", tempdir, "err", err)
4146
}
4247
}()
4348

0 commit comments

Comments
 (0)