Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion pkg/store/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,6 @@ func (p *PrometheusStore) chunkSamples(series *prompb.TimeSeries, maxSamplesPerC

samples = samples[chunkSize:]
}

return chks, nil
}

Expand Down
32 changes: 20 additions & 12 deletions pkg/store/proxy_heap.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ package store
import (
"container/heap"
"context"
"encoding/binary"
"fmt"
"hash"
"io"
"sort"
"sync"
Expand Down Expand Up @@ -111,24 +113,30 @@ func (d *dedupResponseHeap) Next() bool {
func chainSeriesAndRemIdenticalChunks(series []*storepb.SeriesResponse) *storepb.SeriesResponse {
chunkDedupMap := map[uint64]*storepb.AggrChunk{}

buf := make([]byte, 8)
hasher := hashPool.Get().(hash.Hash64)
defer hashPool.Put(hasher)

for _, s := range series {
for _, chk := range s.GetSeries().Chunks {
hasher.Reset()
for _, field := range []*storepb.Chunk{
chk.Raw, chk.Count, chk.Max, chk.Min, chk.Sum, chk.Counter,
} {
if field == nil {
continue
}
hash := field.Hash
if hash == 0 {
hash = xxhash.Sum64(field.Data)
}

if _, ok := chunkDedupMap[hash]; !ok {
chk := chk
chunkDedupMap[hash] = &chk
break
switch {
case field == nil:
binary.BigEndian.PutUint64(buf, 0)
case field.Hash != 0:
binary.BigEndian.PutUint64(buf, field.Hash)
default:
binary.BigEndian.PutUint64(buf, xxhash.Sum64(field.Data))
}
_, _ = hasher.Write(buf)
}
aggrChunkHash := hasher.Sum64()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do I get it right, that this actually calculates a hash of hashes of all the fields?
I wonder how it differs from calculating the hash of all the fields data in terms of possible hash collisions...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its the hash of field hashes ( mainly to reuse the field.Hash ).

if _, ok := chunkDedupMap[aggrChunkHash]; !ok {
chk := chk
chunkDedupMap[aggrChunkHash] = &chk
}
}
}
Expand Down
49 changes: 49 additions & 0 deletions pkg/store/proxy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2197,6 +2197,55 @@ func TestDedupRespHeap_Deduplication(t *testing.T) {
testFn func(responses []*storepb.SeriesResponse, h *dedupResponseHeap)
tname string
}{
{
tname: "dedups identical series with multiple chunks",
responses: []*storepb.SeriesResponse{
{
Result: &storepb.SeriesResponse_Series{
Series: &storepb.Series{
Labels: labelpb.ZLabelsFromPromLabels(labels.FromStrings("foo", "bar")),
Chunks: []storepb.AggrChunk{
{
Count: &storepb.Chunk{
Type: storepb.Chunk_XOR,
Data: []byte(`count`),
},
Sum: &storepb.Chunk{
Type: storepb.Chunk_XOR,
Data: []byte(`sum`),
},
},
},
},
},
},
{
Result: &storepb.SeriesResponse_Series{
Series: &storepb.Series{
Labels: labelpb.ZLabelsFromPromLabels(labels.FromStrings("foo", "bar")),
Chunks: []storepb.AggrChunk{
{
Count: &storepb.Chunk{
Type: storepb.Chunk_XOR,
Data: []byte(`count`),
},
Sum: &storepb.Chunk{
Type: storepb.Chunk_XOR,
Data: []byte(`sum`),
},
},
},
},
},
},
},
testFn: func(responses []*storepb.SeriesResponse, h *dedupResponseHeap) {
testutil.Equals(t, true, h.Next())
resp := h.At()
testutil.Equals(t, responses[0], resp)
testutil.Equals(t, false, h.Next())
},
},
{
tname: "edge case with zero responses",
responses: []*storepb.SeriesResponse{},
Expand Down