Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 21 additions & 15 deletions v2/block_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"errors"
"fmt"
"io"
"math"

blocks "github.com/ipfs/go-block-format"
"github.com/ipfs/go-cid"
Expand Down Expand Up @@ -141,6 +142,23 @@ func (br *BlockReader) Next() (blocks.Block, error) {
return blocks.NewBlockWithCid(data, c)
}

// NextReader returns a CID, io.Reader and length of what should be the next block
// the CID itself is not verified, and the reader is limited to the size of the block
// The user of this function HAS TO consume all of the bytes in the returned reader before using any other function
// on the BlockReader.
// The returned length might be larger than MaxAllowedSectionSize, it is up to the user to check before loading the data into memory.
func (br *BlockReader) NextReader() (cid.Cid, io.Reader, uint64, error) {
c, length, err := util.ReadNodeHeader(br.r, br.opts.ZeroLengthSectionAsEOF, math.MaxUint64)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
c, length, err := util.ReadNodeHeader(br.r, br.opts.ZeroLengthSectionAsEOF, math.MaxUint64)
c, length, err := util.ReadNodeHeader(br.r, br.opts.ZeroLengthSectionAsEOF, math.MaxInt64)

Or is there a reason you're using MaxUint64 but then capping it at MaxInt64 inside ReadNodeHeader? Why not just use MaxInt64 all the way through.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because the maxsize param is uint64 everywhere, so from an external API viewpoint, I'm passing unlimited (maximum allowed value). Then the function internally caps to what it is able to handle.

Both perspectives make sense though

if err != nil {
return cid.Undef, nil, 0, err
}
limitReader := io.LimitReader(br.r, int64(length))

ss := uint64(c.ByteLen()) + length
br.offset += uint64(varint.UvarintSize(ss)) + ss
return c, limitReader, length, nil
}

// BlockMetadata contains metadata about a block's section in a CAR file/stream.
//
// There are two offsets for the block section which will be the same if the
Expand Down Expand Up @@ -171,26 +189,14 @@ type BlockMetadata struct {
// If the underlying reader used by the BlockReader is actually a ReadSeeker, this method will attempt to
// seek over the underlying data rather than reading it into memory.
func (br *BlockReader) SkipNext() (*BlockMetadata, error) {
sectionSize, err := util.LdReadSize(br.r, br.opts.ZeroLengthSectionAsEOF, br.opts.MaxAllowedSectionSize)
c, blockSize, err := util.ReadNodeHeader(br.r, br.opts.ZeroLengthSectionAsEOF, br.opts.MaxAllowedSectionSize)
if err != nil {
return nil, err
}
if sectionSize == 0 {
_, _, err := cid.CidFromBytes([]byte{}) // generate zero-byte CID error
if err == nil {
panic("expected zero-byte CID error")
}
return nil, err
}

cidSize := uint64(c.ByteLen())
sectionSize := blockSize + cidSize
lenSize := uint64(varint.UvarintSize(sectionSize))

cidSize, c, err := cid.CidFromReader(io.LimitReader(br.r, int64(sectionSize)))
if err != nil {
return nil, err
}

blockSize := sectionSize - uint64(cidSize)
blockOffset := br.offset

// move our reader forward; either by seeking or slurping
Expand Down
17 changes: 17 additions & 0 deletions v2/block_reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,23 @@ func TestBlockReader(t *testing.T) {
}
_, err = car.Next()
req.ErrorIs(err, io.EOF)

car, err = carv2.NewBlockReader(testCase.reader())
req.NoError(err)
req.ElementsMatch(roots, car.Roots)

for i := 0; i < 100; i++ {
cid, r, length, err := car.NextReader()
req.NoError(err)
req.Equal(blks[i].block.Cid(), cid)
req.Equal(uint64(len(blks[i].block.RawData())), length)
data := make([]byte, length)
_, err = io.ReadFull(r, data)
req.NoError(err)
req.Equal(blks[i].block.RawData(), data)
}
_, err = car.Next()
req.ErrorIs(err, io.EOF)
})
}
}
Expand Down
28 changes: 28 additions & 0 deletions v2/internal/carv1/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package util
import (
"errors"
"io"
"math"

internalio "github.com/ipld/go-car/v2/internal/io"

Expand Down Expand Up @@ -33,6 +34,33 @@ func ReadNode(r io.Reader, zeroLenAsEOF bool, maxReadBytes uint64) (cid.Cid, []b
return c, data[n:], nil
}

// ReadNodeHeader returns the specified CID of the node and the length of data to be read.
func ReadNodeHeader(r io.Reader, zeroLenAsEOF bool, maxReadBytes uint64) (cid.Cid, uint64, error) {
size, err := LdReadSize(r, zeroLenAsEOF, maxReadBytes)
if err != nil {
return cid.Cid{}, 0, err
}

if size == 0 {
_, _, err := cid.CidFromBytes([]byte{}) // generate zero-byte CID error
if err == nil {
panic("expected zero-byte CID error")
}
return cid.Undef, 0, err
}

if size > math.MaxInt64 {
return cid.Cid{}, 0, ErrHeaderTooLarge
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if size > math.MaxInt64 {
return cid.Cid{}, 0, ErrHeaderTooLarge
}

this is unnecessary, it's what LdReadSize does with its third arg for you to generate a uniform error pattern

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also if this really is necessary, ErrSectionTooLarge cause this isn't actually a header.

Copy link
Contributor Author

@Kubuxu Kubuxu Aug 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added this because the int64 cast for LimitReader (don't want to cast to negatives)

limitReader := io.LimitReader(r, int64(size))
n, c, err := cid.CidFromReader(limitReader)
if err != nil {
return cid.Cid{}, 0, err
}

return c, size - uint64(n), nil
}

func LdWrite(w io.Writer, d ...[]byte) error {
var sum uint64
for _, s := range d {
Expand Down
Loading