Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion lib/Service/NegativeSampleGenerator.php
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ private function getUniqueIPsPerUser(Dataset $positives): array {
private function generateFromRealData(array $uidVec, array $uniqueIps): array {
return array_merge(
$uidVec,
empty($uniqueIps) ? [] : $uniqueIps[random_int(0, count($uniqueIps) - 1)]
$uniqueIps[random_int(0, count($uniqueIps) - 1)]
);
}

Expand Down Expand Up @@ -96,6 +96,10 @@ public function generateShuffledFromPositiveSamples(DataSet $positives, int $num
$max = count($positives);
$uniqueIps = $this->getUniqueIPsPerUser($positives);

if ($uniqueIps === []) {
return new Labeled();
}

return new Labeled(
array_map(function (int $id) use ($uniqueIps, $positives, $max) {
$sample = $positives->sample($id % $max);
Expand Down
34 changes: 21 additions & 13 deletions tests/Unit/Service/NegativeSampleGeneratorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -112,20 +112,9 @@ public function testGenerateMultipleShuffledFromLimitedUnique(): void {
self::assertTrue(
$ipVec == self::decToBitArray(3, 32) ||
$ipVec === self::decToBitArray(4, 32),
'sample has a unique IP'
'Sample must have an unique IP'
);
}

$positives = new Unlabeled([
array_merge(self::decToBitArray(1, 16), self::decToBitArray(1, 32)),
array_merge(self::decToBitArray(2, 16), self::decToBitArray(1, 32)),
array_merge(self::decToBitArray(3, 16), self::decToBitArray(1, 32)),
array_merge(self::decToBitArray(4, 16), self::decToBitArray(1, 32)),
]);

$result = $this->generator->generateShuffledFromPositiveSamples($positives, 5);

self::assertCount(5, $result);
}

/**
Expand Down Expand Up @@ -154,11 +143,30 @@ public function testGenerateMultipleShuffledFromUniquesOnly(): void {
self::assertTrue(
$ipVec === self::decToBitArray(1, 32) ||
$ipVec === self::decToBitArray(2, 32),
'Sample has an unique IP'
'Sample must have an unique IP'
);
}
}

/**
* Generating shuffled samples isn't possible when no user has an unique IP.
* In that case, we have to return an empty Labeled() object as merging will
* fail otherwise. See GitHub issue #860 for more.
* @return void
*/
public function testGenerateShuffledFromDuplicatesOnly(): void {
$positives = new Unlabeled([
array_merge(self::decToBitArray(1, 16), self::decToBitArray(1, 32)),
array_merge(self::decToBitArray(2, 16), self::decToBitArray(1, 32)),
array_merge(self::decToBitArray(3, 16), self::decToBitArray(1, 32)),
array_merge(self::decToBitArray(4, 16), self::decToBitArray(1, 32)),
]);

$result = $this->generator->generateShuffledFromPositiveSamples($positives, 4);

self::assertCount(0, $result, 'Returned sample must be empty');
}

/**
* @return int[]
*/
Expand Down
Loading