Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ The following emojis are used to highlight certain changes:
- `DagModifier` now correctly preserves raw node codec when modifying data under the chunker threshold, instead of incorrectly forcing everything to dag-pb
- `DagModifier` prevents creation of identity CIDs exceeding `verifcid.DefaultMaxIdentityDigestSize` limit when modifying data, automatically switching to proper cryptographic hash while preserving small identity CIDs
- `DagModifier` now supports appending data to a `RawNode` by automatically converting it into a UnixFS file structure where the original `RawNode` becomes the first leaf block, fixing previously impossible append operations that would fail with "expected protobuf dag node" errors
- `mfs`: Files with identity CIDs now properly inherit full CID prefix from parent directories (version, codec, hash type, length), not just hash type
- `mfs`:
- Files with identity CIDs now properly inherit full CID prefix from parent directories (version, codec, hash type, length), not just hash type ([#1018](https://github.com/ipfs/boxo/pull/1018))
- Fixed unbounded memory growth when using deferred flushing and user forgets to flush manually. Added `SetMaxCacheSize()` to limit directory cache growth. Default 256 entries, set to 0 to disable. ([#1035](https://github.com/ipfs/boxo/pull/1035))

### Security

Expand Down
64 changes: 64 additions & 0 deletions mfs/dir.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@ var (
ErrDirExists = errors.New("directory already has entry by that name")
)

const (
// DefaultMaxCacheSize is the default maximum number of entries
// that can be cached in a directory before auto-flush is triggered.
// This prevents unbounded memory growth when using --flush=false.
// The value matches HAMT shard size (256).
// TODO: make this configurable
// See https://github.com/ipfs/kubo/issues/10842
DefaultMaxCacheSize = 256
)

// TODO: There's too much functionality associated with this structure,
// let's organize it (and if possible extract part of it elsewhere)
// and document the main features of `Directory` here.
Expand All @@ -43,6 +53,10 @@ type Directory struct {
unixfsDir uio.Directory

prov routing.ContentProviding

// Maximum number of entries to cache before triggering auto-flush.
// Set to 0 to disable cache size limiting.
maxCacheSize int
}

// NewDirectory constructs a new MFS directory.
Expand Down Expand Up @@ -82,6 +96,7 @@ func NewDirectory(ctx context.Context, name string, node ipld.Node, parent paren
unixfsDir: db,
prov: prov,
entriesCache: make(map[string]FSNode),
maxCacheSize: DefaultMaxCacheSize,
}, nil
}

Expand Down Expand Up @@ -121,6 +136,7 @@ func NewEmptyDirectory(ctx context.Context, name string, parent parent, dserv ip
unixfsDir: db,
prov: prov,
entriesCache: make(map[string]FSNode),
maxCacheSize: DefaultMaxCacheSize,
}, nil
}

Expand Down Expand Up @@ -216,15 +232,25 @@ func (d *Directory) cacheNode(name string, nd ipld.Node) (FSNode, error) {
// inherited from the parent.
ndir.unixfsDir.SetMaxLinks(d.unixfsDir.GetMaxLinks())
ndir.unixfsDir.SetMaxHAMTFanout(d.unixfsDir.GetMaxHAMTFanout())
// Inherit cache size limit from parent
ndir.maxCacheSize = d.maxCacheSize

d.entriesCache[name] = ndir
// Check cache size after adding entry
if err := d.checkCacheSize(); err != nil {
return nil, err
}
return ndir, nil
case ft.TFile, ft.TRaw, ft.TSymlink:
nfi, err := NewFile(name, nd, d, d.dagService, d.prov)
if err != nil {
return nil, err
}
d.entriesCache[name] = nfi
// Check cache size after adding entry
if err := d.checkCacheSize(); err != nil {
return nil, err
}
return nfi, nil
case ft.TMetadata:
return nil, ErrNotYetImplemented
Expand All @@ -237,6 +263,10 @@ func (d *Directory) cacheNode(name string, nd ipld.Node) (FSNode, error) {
return nil, err
}
d.entriesCache[name] = nfi
// Check cache size after adding entry
if err := d.checkCacheSize(); err != nil {
return nil, err
}
return nfi, nil
default:
return nil, errors.New("unrecognized node type in cache node")
Expand Down Expand Up @@ -374,6 +404,9 @@ func (d *Directory) MkdirWithOpts(name string, opts MkdirOpts) (*Directory, erro
return nil, err
}

// Inherit cache size limit from parent
dirobj.maxCacheSize = d.maxCacheSize

ndir, err := dirobj.GetNode()
if err != nil {
return nil, err
Expand All @@ -385,9 +418,40 @@ func (d *Directory) MkdirWithOpts(name string, opts MkdirOpts) (*Directory, erro
}

d.entriesCache[name] = dirobj

// Check cache size after adding new directory
if err := d.checkCacheSize(); err != nil {
return nil, err
}

return dirobj, nil
}

// checkCacheSize checks if the cache has exceeded the maximum size
// and triggers an auto-flush if needed to prevent unbounded growth.
// Must be called with d.lock held.
func (d *Directory) checkCacheSize() error {
// Skip check if cache limiting is disabled (maxCacheSize == 0)
if d.maxCacheSize > 0 && len(d.entriesCache) >= d.maxCacheSize {
// Auto-flush to prevent unbounded cache growth
log.Debugf("mfs: auto-flushing directory cache (size: %d >= limit: %d)", len(d.entriesCache), d.maxCacheSize)
err := d.cacheSync(true)
if err != nil {
return err
}
}
return nil
}

// SetMaxCacheSize sets the maximum number of entries to cache before
// triggering an auto-flush. Set to 0 to disable cache size limiting.
// This method is thread-safe.
func (d *Directory) SetMaxCacheSize(size int) {
d.lock.Lock()
defer d.lock.Unlock()
d.maxCacheSize = size
}

func (d *Directory) Unlink(name string) error {
d.lock.Lock()
defer d.lock.Unlock()
Expand Down
7 changes: 7 additions & 0 deletions mfs/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,13 @@ func (kr *Root) GetDirectory() *Directory {
return kr.dir
}

// SetMaxCacheSize sets the maximum number of entries to cache in each
// directory before triggering an auto-flush. Set to 0 to disable cache
// size limiting. This setting is propagated to all directories in the tree.
func (kr *Root) SetMaxCacheSize(size int) {
kr.dir.SetMaxCacheSize(size)
}

// Flush signals that an update has occurred since the last publish,
// and updates the Root republisher.
// TODO: We are definitely abusing the "flush" terminology here.
Expand Down
Loading