Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
c0c90d2
Run centos and debian workflows on push and PR
igchor Nov 2, 2021
dbe3fda
Adds createPutToken and switches findEviction
byrnedj Feb 4, 2023
9afcd64
Add memory usage statistics for allocation classes
igchor Jul 6, 2022
eca7d8c
Initial multi-tier support implementation
igchor Sep 28, 2021
664da8d
AC stats multi-tier
byrnedj Jan 17, 2023
3b7bb0c
Tests and fix tier sizing
byrnedj Feb 8, 2023
58e825b
This is the additional multi-tier support needed
guptask Nov 14, 2022
9fc705f
Rolling average alloc latency
guptask Jul 21, 2022
ce0e38a
Rolling average class latency
guptask Jul 21, 2022
e0a8006
MM2Q promotion iterator
byrnedj Aug 9, 2022
bcb2ae2
Multi-tier allocator patch
byrnedj Feb 7, 2023
d4cf1d4
basic multi-tier test based on numa bindings
igchor Dec 30, 2021
6d2fbef
Aadding new configs to hit_ratio/graph_cache_leader_fobj
vinser52 Jan 27, 2022
5bfa1ff
Background data movement for the tiers
byrnedj Oct 21, 2022
1593291
dummy change to trigger container image rebuild
guptask Mar 28, 2023
a171f38
Updated the docker gcc version to 12 (#83)
guptask May 9, 2023
35a17e4
NUMA bindigs support for private memory (#82)
vinser52 May 17, 2023
46d168c
Do not run cachelib-centos-8-5 on PRs (#85)
igchor Jun 6, 2023
7d06531
Add option to insert items to first free tier (#87)
igchor Jun 8, 2023
1521efe
Chained item movement between tiers - sync on the parent item (#84)
byrnedj Jun 28, 2023
3328e4e
edit dockerfile
byrnedj Jul 24, 2023
3c87c49
Track latency of per item eviction/promotion between memory tiers
guptask Jul 28, 2023
795f85b
Update dependencies (#95)
igchor Aug 23, 2023
96d948f
enable DTO build without memcpy changes to cachebench
byrnedj Feb 28, 2024
47d5034
Bckground eviction for multi-tier
byrnedj Feb 28, 2024
efea480
no online eviction option patch
byrnedj Feb 28, 2024
ebfca17
fixes cmake in latest test removal (upstream test build fails - need …
byrnedj May 20, 2024
52618b5
fixes commit for now (should drop once https://github.com/facebook/Ca…
byrnedj May 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Rolling average alloc latency
Part 1. (single tier)
-----------------------------
added per pool class rolling average latency (upstream PR version)

fix for rolling stats (on multi-tier to be followed by multi-tier rolling stats
implementation in the following commit)

it should be noted - an attempt was made to use
average alloc latency as a guide to control background
mover batch size. While average alloc latency decreased,
so did throughput because batch size became too big
and put contention on locks.
  • Loading branch information
guptask authored and byrnedj committed May 20, 2024
commit 9fc705f990abd2d98864523903b9038c8092bd96
9 changes: 8 additions & 1 deletion cachelib/allocator/CacheAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -2782,6 +2782,8 @@ CacheAllocator<CacheTrait>::allocateInternalTier(TierId tid,

// the allocation class in our memory allocator.
const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize);
util::RollingLatencyTracker rollTracker{
(*stats_.classAllocLatency)[pid][cid]};

// TODO: per-tier
(*stats_.allocAttempts)[pid][cid].inc();
Expand Down Expand Up @@ -2892,6 +2894,9 @@ CacheAllocator<CacheTrait>::allocateChainedItemInternal(const Item& parent,

// TODO: per-tier? Right now stats_ are not used in any public periodic
// worker
util::RollingLatencyTracker rollTracker{
(*stats_.classAllocLatency)[pid][cid]};

(*stats_.allocAttempts)[pid][cid].inc();

void* memory = allocator_[tid]->allocate(pid, requiredSize);
Expand Down Expand Up @@ -4912,7 +4917,9 @@ ACStats CacheAllocator<CacheTrait>::getACStats(TierId tid,
ClassId classId) const {
const auto& pool = allocator_[tid]->getPool(poolId);
const auto& ac = pool.getAllocationClass(classId);
return ac.getStats();
auto stats = ac.getStats();
stats.allocLatencyNs = (*stats_.classAllocLatency)[poolId][classId];
return stats;
}

template <typename CacheTrait>
Expand Down
2 changes: 2 additions & 0 deletions cachelib/allocator/CacheStats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ void Stats::init() {
initToZero(*fragmentationSize);
initToZero(*chainedItemEvictions);
initToZero(*regularItemEvictions);

classAllocLatency = std::make_unique<PerPoolClassRollingStats>();
}

template <int>
Expand Down
8 changes: 8 additions & 0 deletions cachelib/allocator/CacheStatsInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "cachelib/allocator/Cache.h"
#include "cachelib/allocator/memory/MemoryAllocator.h"
#include "cachelib/common/AtomicCounter.h"
#include "cachelib/common/RollingStats.h"

namespace facebook {
namespace cachelib {
Expand Down Expand Up @@ -229,6 +230,13 @@ struct Stats {
std::unique_ptr<PerPoolClassAtomicCounters> chainedItemEvictions{};
std::unique_ptr<PerPoolClassAtomicCounters> regularItemEvictions{};

using PerPoolClassRollingStats =
std::array<std::array<util::RollingStats, MemoryAllocator::kMaxClasses>,
MemoryPoolManager::kMaxPools>;

// rolling latency tracking for every alloc class in every pool
std::unique_ptr<PerPoolClassRollingStats> classAllocLatency{};

// Eviction failures due to parent cannot be removed from access container
AtomicCounter evictFailParentAC{0};

Expand Down
4 changes: 4 additions & 0 deletions cachelib/allocator/memory/MemoryAllocatorStats.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <unordered_map>

#include "cachelib/allocator/memory/Slab.h"
#include "cachelib/common/RollingStats.h"

namespace facebook {
namespace cachelib {
Expand Down Expand Up @@ -49,6 +50,9 @@ struct ACStats {
// true if the allocation class is full.
bool full;

// Rolling allocation latency (in ns)
util::RollingStats allocLatencyNs;

constexpr unsigned long long totalSlabs() const noexcept {
return freeSlabs + usedSlabs;
}
Expand Down
6 changes: 4 additions & 2 deletions cachelib/cachebench/cache/CacheStats.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,10 @@ struct Stats {
: stats.usageFraction();

out << folly::sformat(
"tid{:2} pid{:2} cid{:4} {:8.2f}{} usageFraction: {:4.2f}",
tid, pid, cid, allocSize, allocSizeSuffix, acUsageFraction)
"tid{:2} pid{:2} cid{:4} {:8.2f}{} usageFraction: {:4.2f} "
"rollingAvgAllocLatency: {:8.2f}ns",
tid, pid, cid, allocSize, allocSizeSuffix, acUsageFraction,
stats.allocLatencyNs.estimate())
<< std::endl;
});
}
Expand Down
90 changes: 90 additions & 0 deletions cachelib/common/RollingStats.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <folly/Range.h>
#include <folly/logging/xlog.h>

#include "cachelib/common/Utils.h"

namespace facebook {
namespace cachelib {
namespace util {

class RollingStats {
public:
// track latency by taking the value of duration directly.
void trackValue(double value) {
// This is a highly unlikely scenario where
// cnt_ reaches numerical limits. Skip update
// of the rolling average anymore.
if (cnt_ == std::numeric_limits<uint64_t>::max()) {
cnt_ = 0;
return;
}
auto ratio = static_cast<double>(cnt_) / (cnt_ + 1);
avg_ *= ratio;
++cnt_;
avg_ += value / cnt_;
}

// Return the rolling average.
double estimate() { return avg_; }

private:
double avg_{0};
uint64_t cnt_{0};
};

class RollingLatencyTracker {
public:
explicit RollingLatencyTracker(RollingStats& stats)
: stats_(&stats), begin_(std::chrono::steady_clock::now()) {}
RollingLatencyTracker() {}
~RollingLatencyTracker() {
if (stats_) {
auto tp = std::chrono::steady_clock::now();
auto diffNanos =
std::chrono::duration_cast<std::chrono::nanoseconds>(tp - begin_)
.count();
stats_->trackValue(static_cast<double>(diffNanos));
}
}

RollingLatencyTracker(const RollingLatencyTracker&) = delete;
RollingLatencyTracker& operator=(const RollingLatencyTracker&) = delete;

RollingLatencyTracker(RollingLatencyTracker&& rhs) noexcept
: stats_(rhs.stats_), begin_(rhs.begin_) {
rhs.stats_ = nullptr;
}

RollingLatencyTracker& operator=(RollingLatencyTracker&& rhs) noexcept {
if (this != &rhs) {
this->~RollingLatencyTracker();
new (this) RollingLatencyTracker(std::move(rhs));
}
return *this;
}

private:
RollingStats* stats_{nullptr};
std::chrono::time_point<std::chrono::steady_clock> begin_;
};
} // namespace util
} // namespace cachelib
} // namespace facebook