Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
a93f01f
Run centos and debian workflows on push and PR
igchor Nov 2, 2021
2a8fa60
Adds createPutToken and switches findEviction
byrnedj Feb 4, 2023
c3a4db9
Add memory usage statistics for allocation classes
igchor Jul 6, 2022
2529f0a
Initial multi-tier support implementation (rebased with NUMA and cs p…
igchor Sep 28, 2021
3cc41bd
AC stats multi-tier
byrnedj Jan 17, 2023
bf4c244
This commit contains the additional memory tiers tests
byrnedj Feb 8, 2023
c432df6
This is the additional multi-tier support needed
guptask Nov 14, 2022
4cefc44
added per pool class rolling average latency (upstream PR version)
guptask Jul 21, 2022
1f62a63
added per tier pool class rolling average latency (based on upstream PR)
guptask Jul 21, 2022
489ef20
MM2Q promotion iterators (#1)
byrnedj Aug 9, 2022
048c809
CS Patch Part 2 for mulit-tier cachelib:
byrnedj Feb 7, 2023
ed7b70f
basic multi-tier test based on numa bindings
igchor Dec 30, 2021
94c4974
Aadding new configs to hit_ratio/graph_cache_leader_fobj
vinser52 Jan 27, 2022
afd1456
Do not block reader if a child item is moving
igchor Dec 19, 2022
4f8f425
Background data movement (#20)
byrnedj Oct 21, 2022
6203a95
fix race in moveRegularItemWith sync where insertOrReplace can cause …
byrnedj Feb 16, 2023
6abb498
Fix race in acquire (#68)
igchor Mar 16, 2023
add2e5f
Per tier pool stats (#70)
byrnedj Mar 23, 2023
aedaf97
dummy change to trigger container image rebuild
guptask Mar 28, 2023
1f21fce
Fix token creation and stats (#79)
igchor Apr 27, 2023
9e27d35
Updated the docker gcc version to 12 (#83)
guptask May 9, 2023
da7a6bb
NUMA bindigs support for private memory (#82)
vinser52 May 17, 2023
b5ac462
Do not run cachelib-centos-8-5 on PRs (#85)
igchor Jun 6, 2023
50d3ae5
correct handling for expired items in eviction (#86)
byrnedj Jun 6, 2023
5632d18
Add option to insert items to first free tier (#87)
igchor Jun 8, 2023
09d7bab
Chained item movement between tiers - sync on the parent item (#84)
byrnedj Jun 28, 2023
08d8f33
edit dockerfile
byrnedj Jul 24, 2023
316133c
these submodules work
byrnedj Jul 25, 2023
8d2c390
Track latency of per item eviction/promotion between memory tiers
guptask Jul 28, 2023
b99f2b3
Merge pull request #91 from guptask/tier_eviction_latency
guptask Jul 31, 2023
a14f058
modified the cachebench output to make it friendly for parsing
guptask Aug 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix race in moveRegularItemWith sync where insertOrReplace can cause …
…move to fail

 - updated slab release logic for move failure, but there is still an issue
   with slab movement. currently investigating.
  • Loading branch information
byrnedj committed Jul 23, 2023
commit 6203a959632620fd42c82147cebe220e21e92f67
98 changes: 77 additions & 21 deletions cachelib/allocator/CacheAllocator-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1293,8 +1293,21 @@ size_t CacheAllocator<CacheTrait>::wakeUpWaitersLocked(folly::StringPiece key,
}

template <typename CacheTrait>
void CacheAllocator<CacheTrait>::moveRegularItemWithSync(
bool CacheAllocator<CacheTrait>::moveRegularItemWithSync(
Item& oldItem, WriteHandle& newItemHdl) {
//on function exit - the new item handle is no longer moving
//and other threads may access it - but in case where
//we failed to replace in access container we can give the
//new item back to the allocator
auto guard = folly::makeGuard([&]() {
auto ref = newItemHdl->unmarkMoving();
if (UNLIKELY(ref == 0)) {
const auto res =
releaseBackToAllocator(*newItemHdl, RemoveContext::kNormal, false);
XDCHECK(res == ReleaseRes::kReleased);
}
});

XDCHECK(oldItem.isMoving());
XDCHECK(!oldItem.isExpired());
// TODO: should we introduce new latency tracker. E.g. evictRegularLatency_
Expand Down Expand Up @@ -1325,6 +1338,22 @@ void CacheAllocator<CacheTrait>::moveRegularItemWithSync(

auto replaced = accessContainer_->replaceIf(oldItem, *newItemHdl,
predicate);
// another thread may have called insertOrReplace which could have
// marked this item as unaccessible causing the replaceIf
// in the access container to fail - in this case we want
// to abort the move since the item is no longer valid
if (!replaced) {
return false;
}
// what if another thread calls insertOrReplace now when
// the item is moving and already replaced in the hash table?
// 1. it succeeds in updating the hash table - so there is
// no guarentee that isAccessible() is true
// 2. it will then try to remove from MM container
// - this operation will wait for newItemHdl to
// be unmarkedMoving via the waitContext
// 3. replaced handle is returned and eventually drops
// ref to 0 and the item is recycled back to allocator.

if (config_.moveCb) {
// Execute the move callback. We cannot make any guarantees about the
Expand Down Expand Up @@ -1366,14 +1395,7 @@ void CacheAllocator<CacheTrait>::moveRegularItemWithSync(
XDCHECK(newItemHdl->hasChainedItem());
}
newItemHdl.unmarkNascent();
auto ref = newItemHdl->unmarkMoving();
//remove because there is a chance the new item was not
//added to the access container
if (UNLIKELY(ref == 0)) {
const auto res =
releaseBackToAllocator(*newItemHdl, RemoveContext::kNormal, false);
XDCHECK(res == ReleaseRes::kReleased);
}
return true;
}

template <typename CacheTrait>
Expand Down Expand Up @@ -1626,28 +1648,43 @@ CacheAllocator<CacheTrait>::getNextCandidate(TierId tid,
auto evictedToNext = lastTier ? nullptr
: tryEvictToNextMemoryTier(*candidate, false);
if (!evictedToNext) {
if (!token.isValid()) {
//if insertOrReplace was called during move
//then candidate will not be accessible (failed replace during tryEvict)
// - therefore this was why we failed to
// evict to the next tier and insertOrReplace
// will remove from NVM cache
//however, if candidate is accessible
//that means the allocation in the next
//tier failed - so we will continue to
//evict the item to NVM cache
bool failedToReplace = !candidate->isAccessible();
if (!token.isValid() && !failedToReplace) {
token = createPutToken(*candidate);
}
// tryEvictToNextMemoryTier should only fail if allocation of the new item fails
// in that case, it should be still possible to mark item as exclusive.
// tryEvictToNextMemoryTier can fail if:
// a) allocation of the new item fails in that case,
// it should be still possible to mark item for eviction.
// b) another thread calls insertOrReplace and the item
// is no longer accessible
//
// in case that we are on the last tier, we whould have already marked
// as exclusive since we will not be moving the item to the next tier
// but rather just evicting all together, no need to
// markExclusiveWhenMoving
// markForEvictionWhenMoving
auto ret = lastTier ? true : candidate->markForEvictionWhenMoving();
XDCHECK(ret);

unlinkItemForEviction(*candidate);

if (token.isValid() && shouldWriteToNvmCacheExclusive(*candidate)
&& !failedToReplace) {
nvmCache_->put(*candidate, std::move(token));
}
// wake up any readers that wait for the move to complete
// it's safe to do now, as we have the item marked exclusive and
// no other reader can be added to the waiters list
wakeUpWaiters(*candidate, {});

if (token.isValid() && shouldWriteToNvmCacheExclusive(*candidate)) {
nvmCache_->put(*candidate, std::move(token));
}
} else {
XDCHECK(!evictedToNext->isMarkedForEviction() && !evictedToNext->isMoving());
XDCHECK(!candidate->isMarkedForEviction() && !candidate->isMoving());
Expand Down Expand Up @@ -1776,7 +1813,10 @@ CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(

if (newItemHdl) {
XDCHECK_EQ(newItemHdl->getSize(), item.getSize());
moveRegularItemWithSync(item, newItemHdl);
if (!moveRegularItemWithSync(item, newItemHdl)) {
return WriteHandle{};
}
XDCHECK_EQ(newItemHdl->getKey(),item.getKey());
item.unmarkMoving();
return newItemHdl;
} else {
Expand Down Expand Up @@ -1815,7 +1855,9 @@ CacheAllocator<CacheTrait>::tryPromoteToNextMemoryTier(

if (newItemHdl) {
XDCHECK_EQ(newItemHdl->getSize(), item.getSize());
moveRegularItemWithSync(item, newItemHdl);
if (!moveRegularItemWithSync(item, newItemHdl)) {
return WriteHandle{};
}
item.unmarkMoving();
return newItemHdl;
} else {
Expand Down Expand Up @@ -3175,9 +3217,23 @@ bool CacheAllocator<CacheTrait>::tryMovingForSlabRelease(
// TODO: add support for chained items
return false;
} else {
moveRegularItemWithSync(oldItem, newItemHdl);
removeFromMMContainer(oldItem);
return true;
//move can fail if another thread calls insertOrReplace
//in this case oldItem is no longer valid (not accessible,
//it gets removed from MMContainer and evictForSlabRelease
//will send it back to the allocator
bool ret = moveRegularItemWithSync(oldItem, newItemHdl);
if (!ret) {
//we failed to move - newItemHdl was released back to allocator
//by the moveRegularItemWithSync but oldItem is not accessible
//and no longer valid - we need to clean it up here
XDCHECK(!oldItem.isAccessible());
oldItem.markForEvictionWhenMoving();
unlinkItemForEviction(oldItem);
wakeUpWaiters(oldItem, {});
} else {
removeFromMMContainer(oldItem);
}
return ret;
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion cachelib/allocator/CacheAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -1623,7 +1623,7 @@ class CacheAllocator : public CacheBase {
//
// @return true If the move was completed, and the containers were updated
// successfully.
void moveRegularItemWithSync(Item& oldItem, WriteHandle& newItemHdl);
bool moveRegularItemWithSync(Item& oldItem, WriteHandle& newItemHdl);

// Moves a regular item to a different slab. This should only be used during
// slab release after the item's exclusive bit has been set. The user supplied
Expand Down
5 changes: 5 additions & 0 deletions cachelib/allocator/CacheItem.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ class BaseAllocatorTest;
template <typename AllocatorT>
class AllocatorHitStatsTest;

template <typename AllocatorT>
class AllocatorMemoryTiersTest;

template <typename AllocatorT>
class MapTest;

Expand Down Expand Up @@ -473,6 +476,8 @@ class CACHELIB_PACKED_ATTR CacheItem {
FRIEND_TEST(ItemTest, NonStringKey);
template <typename AllocatorT>
friend class facebook::cachelib::tests::AllocatorHitStatsTest;
template <typename AllocatorT>
friend class facebook::cachelib::tests::AllocatorMemoryTiersTest;
};

// A chained item has a hook pointing to the next chained item. The hook is
Expand Down
3 changes: 2 additions & 1 deletion cachelib/allocator/tests/AllocatorMemoryTiersTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@ namespace cachelib {
namespace tests {

using LruAllocatorMemoryTiersTest = AllocatorMemoryTiersTest<LruAllocator>;

//using LruTestAllocatorMemoryTiersTest = AllocatorMemoryTiersTest<LruTestAllocator>;
// TODO(MEMORY_TIER): add more tests with different eviction policies
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersInvalid) { this->testMultiTiersInvalid(); }
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersValid) { this->testMultiTiersValid(); }
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersValidMixed) { this->testMultiTiersValidMixed(); }
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersBackgroundMovers ) { this->testMultiTiersBackgroundMovers(); }
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersRemoveDuringEviction) { this->testMultiTiersRemoveDuringEviction(); }
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersReplaceDuringEviction) { this->testMultiTiersReplaceDuringEviction(); }
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersReplaceDuringEvictionWithReader) { this->testMultiTiersReplaceDuringEvictionWithReader(); }

} // end of namespace tests
} // end of namespace cachelib
Expand Down
128 changes: 127 additions & 1 deletion cachelib/allocator/tests/AllocatorMemoryTiersTest.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
#include "cachelib/allocator/FreeThresholdStrategy.h"
#include "cachelib/allocator/PromotionStrategy.h"

#include <fcntl.h>
#include <unistd.h>
#include <ctype.h>
#include <semaphore.h>
#include <folly/synchronization/Latch.h>

namespace facebook {
Expand Down Expand Up @@ -58,6 +62,7 @@ class AllocatorMemoryTiersTest : public AllocatorTest<AllocatorT> {
ASSERT_NO_THROW(alloc->insertOrReplace(handle));
}
}

public:
void testMultiTiersInvalid() {
typename AllocatorT::Config config;
Expand Down Expand Up @@ -201,7 +206,7 @@ class AllocatorMemoryTiersTest : public AllocatorTest<AllocatorT> {

t->join();
}

void testMultiTiersReplaceDuringEviction() {
std::unique_ptr<AllocatorT> alloc;
PoolId pool;
Expand Down Expand Up @@ -234,6 +239,127 @@ class AllocatorMemoryTiersTest : public AllocatorTest<AllocatorT> {
testMultiTiersAsyncOpDuringMove(alloc, pool, quit, moveCb);

t->join();

}


void gdb_sync1() {}
void gdb_sync2() {}
void gdb_sync3() {}
using ReadHandle = typename AllocatorT::ReadHandle;
void testMultiTiersReplaceDuringEvictionWithReader() {
sem_unlink ("/gdb1_sem");
sem_t *sem = sem_open ("/gdb1_sem", O_CREAT | O_EXCL, S_IRUSR | S_IWUSR, 0);
int gdbfd = open("/tmp/gdb1.gdb",O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR);
char gdbcmds[] =
"set attached=1\n"
"break gdb_sync1\n"
"break gdb_sync2\n"
"break moveRegularItemWithSync\n"
"c\n"
"set scheduler-locking on\n"
"thread 1\n"
"c\n"
"thread 4\n"
"c\n"
"thread 5\n"
"break nativeFutexWaitImpl thread 5\n"
"c\n"
"thread 4\n"
"break nativeFutexWaitImpl thread 4\n"
"c\n"
"thread 1\n"
"break releaseBackToAllocator\n"
"c\n"
"c\n"
"thread 5\n"
"c\n"
"thread 4\n"
"c\n"
"thread 1\n"
"break gdb_sync3\n"
"c\n"
"quit\n";
int ret = write(gdbfd,gdbcmds,strlen(gdbcmds));
int ppid = getpid(); //parent pid
//int pid = 0;
int pid = fork();
if (pid == 0) {
sem_wait(sem);
sem_close(sem);
sem_unlink("/gdb1_sem");
char cmdpid[256];
sprintf(cmdpid,"%d",ppid);
int f = execlp("gdb","gdb","--pid",cmdpid,"--batch-silent","--command=/tmp/gdb1.gdb",(char*) 0);
ASSERT(f != -1);
}
sem_post(sem);
//wait for gdb to run
int attached = 0;
while (attached == 0);

std::unique_ptr<AllocatorT> alloc;
PoolId pool;
bool quit = false;

typename AllocatorT::Config config;
config.setCacheSize(4 * Slab::kSize);
config.enableCachePersistence("/tmp");
config.configureMemoryTiers({
MemoryTierCacheConfig::fromShm()
.setRatio(1).setMemBind(std::string("0")),
MemoryTierCacheConfig::fromShm()
.setRatio(1).setMemBind(std::string("0"))
});

alloc = std::make_unique<AllocatorT>(AllocatorT::SharedMemNew, config);
ASSERT(alloc != nullptr);
pool = alloc->addPool("default", alloc->getCacheMemoryStats().ramCacheSize);

int i = 0;
typename AllocatorT::Item* evicted;
std::unique_ptr<std::thread> t;
std::unique_ptr<std::thread> r;
while(!quit) {
auto handle = alloc->allocate(pool, std::to_string(++i), std::string("value").size());
ASSERT(handle != nullptr);
if (i == 1) {
evicted = static_cast<typename AllocatorT::Item*>(handle.get());
folly::Latch latch_t(1);
t = std::make_unique<std::thread>([&](){
auto handleNew = alloc->allocate(pool, std::to_string(1), std::string("new value").size());
ASSERT(handleNew != nullptr);
latch_t.count_down();
//first breakpoint will be this one because
//thread 1 still has more items to fill up the
//cache before an evict is evicted
gdb_sync1();
ASSERT(evicted->isMoving());
//need to suspend thread 1 - who is doing the eviction
//gdb will do this for us
folly::Latch latch(1);
r = std::make_unique<std::thread>([&](){
ASSERT(evicted->isMoving());
latch.count_down();
auto handleEvict = alloc->find(std::to_string(1));
//does find block until done moving?? yes
while (evicted->isMarkedForEviction()); //move will fail
XDCHECK(handleEvict == nullptr) << handleEvict->toString();
ASSERT(handleEvict == nullptr);
});
latch.wait();
gdb_sync2();
alloc->insertOrReplace(handleNew);
ASSERT(!evicted->isAccessible()); //move failed
quit = true;
});
latch_t.wait();
}
ASSERT_NO_THROW(alloc->insertOrReplace(handle));
}
t->join();
r->join();
gdb_sync3();
}
};
} // namespace tests
Expand Down