diff --git a/lib/Service/NegativeSampleGenerator.php b/lib/Service/NegativeSampleGenerator.php index 6081bd13..6d1ee89d 100644 --- a/lib/Service/NegativeSampleGenerator.php +++ b/lib/Service/NegativeSampleGenerator.php @@ -59,7 +59,7 @@ private function getUniqueIPsPerUser(Dataset $positives): array { private function generateFromRealData(array $uidVec, array $uniqueIps): array { return array_merge( $uidVec, - empty($uniqueIps) ? [] : $uniqueIps[random_int(0, count($uniqueIps) - 1)] + $uniqueIps[random_int(0, count($uniqueIps) - 1)] ); } @@ -96,6 +96,10 @@ public function generateShuffledFromPositiveSamples(DataSet $positives, int $num $max = count($positives); $uniqueIps = $this->getUniqueIPsPerUser($positives); + if ($uniqueIps === []) { + return new Labeled(); + } + return new Labeled( array_map(function (int $id) use ($uniqueIps, $positives, $max) { $sample = $positives->sample($id % $max); diff --git a/tests/Unit/Service/NegativeSampleGeneratorTest.php b/tests/Unit/Service/NegativeSampleGeneratorTest.php index 2b1cd611..f65f8f00 100644 --- a/tests/Unit/Service/NegativeSampleGeneratorTest.php +++ b/tests/Unit/Service/NegativeSampleGeneratorTest.php @@ -112,20 +112,9 @@ public function testGenerateMultipleShuffledFromLimitedUnique(): void { self::assertTrue( $ipVec == self::decToBitArray(3, 32) || $ipVec === self::decToBitArray(4, 32), - 'sample has a unique IP' + 'Sample must have an unique IP' ); } - - $positives = new Unlabeled([ - array_merge(self::decToBitArray(1, 16), self::decToBitArray(1, 32)), - array_merge(self::decToBitArray(2, 16), self::decToBitArray(1, 32)), - array_merge(self::decToBitArray(3, 16), self::decToBitArray(1, 32)), - array_merge(self::decToBitArray(4, 16), self::decToBitArray(1, 32)), - ]); - - $result = $this->generator->generateShuffledFromPositiveSamples($positives, 5); - - self::assertCount(5, $result); } /** @@ -154,11 +143,30 @@ public function testGenerateMultipleShuffledFromUniquesOnly(): void { self::assertTrue( $ipVec === self::decToBitArray(1, 32) || $ipVec === self::decToBitArray(2, 32), - 'Sample has an unique IP' + 'Sample must have an unique IP' ); } } + /** + * Generating shuffled samples isn't possible when no user has an unique IP. + * In that case, we have to return an empty Labeled() object as merging will + * fail otherwise. See GitHub issue #860 for more. + * @return void + */ + public function testGenerateShuffledFromDuplicatesOnly(): void { + $positives = new Unlabeled([ + array_merge(self::decToBitArray(1, 16), self::decToBitArray(1, 32)), + array_merge(self::decToBitArray(2, 16), self::decToBitArray(1, 32)), + array_merge(self::decToBitArray(3, 16), self::decToBitArray(1, 32)), + array_merge(self::decToBitArray(4, 16), self::decToBitArray(1, 32)), + ]); + + $result = $this->generator->generateShuffledFromPositiveSamples($positives, 4); + + self::assertCount(0, $result, 'Returned sample must be empty'); + } + /** * @return int[] */