Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
refactor: change allocator only for array_of_strings
  • Loading branch information
proost committed Feb 6, 2026
commit 25ce65cfd2c20995e99b2e8dd0cea99b308925f3
42 changes: 15 additions & 27 deletions tuple/include/array_of_strings_sketch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,29 +28,24 @@

namespace datasketches {

using array_of_strings = array<std::string>;

// default update policy for an array of strings
template<typename Allocator = std::allocator<std::string>>
class default_array_of_strings_update_policy {
public:
using array_of_strings = array<std::string, Allocator>;

explicit default_array_of_strings_update_policy(const Allocator& allocator = Allocator());
default_array_of_strings_update_policy() = default;

array_of_strings create() const;

void update(array_of_strings& array, const array_of_strings& input) const;

void update(array_of_strings& array, const array_of_strings* input) const;

private:
Allocator allocator_;
};

// serializer/deserializer for an array of strings
// Requirements: all strings must be valid UTF-8 and array size must be <= 127.
template<typename Allocator = std::allocator<std::string>>
template<typename Allocator = std::allocator<array_of_strings>>
struct default_array_of_strings_serde {
using array_of_strings = array<std::string, Allocator>;
using summary_allocator = typename std::allocator_traits<Allocator>::template rebind_alloc<array_of_strings>;

explicit default_array_of_strings_serde(const Allocator& allocator = Allocator());
Expand All @@ -62,7 +57,6 @@ struct default_array_of_strings_serde {
size_t size_of_item(const array_of_strings& item) const;

private:
Allocator allocator_;
summary_allocator summary_allocator_;
static void check_num_nodes(uint8_t num_nodes);
static uint32_t compute_total_bytes(const array_of_strings& item);
Expand All @@ -72,23 +66,17 @@ struct default_array_of_strings_serde {
/**
* Hashes an array of strings using ArrayOfStrings-compatible hashing.
*/
template<typename Allocator = std::allocator<std::string>>
uint64_t hash_array_of_strings_key(const array<std::string, Allocator>& key);
uint64_t hash_array_of_strings_key(const array_of_strings& key);

/**
* Extended class of compact_tuple_sketch for array of strings
* Requirements: all strings must be valid UTF-8 and array size must be <= 127.
*/
template<typename Allocator = std::allocator<std::string>>
template<typename Allocator = std::allocator<array_of_strings>>
class compact_array_of_strings_tuple_sketch:
public compact_tuple_sketch<
array<std::string, Allocator>,
typename std::allocator_traits<Allocator>::template rebind_alloc<array<std::string, Allocator>>
> {
public compact_tuple_sketch<array_of_strings, Allocator> {
public:
using array_of_strings = array<std::string, Allocator>;
using summary_allocator = typename std::allocator_traits<Allocator>::template rebind_alloc<array_of_strings>;
using Base = compact_tuple_sketch<array_of_strings, summary_allocator>;
using Base = compact_tuple_sketch<array_of_strings, Allocator>;
using vector_bytes = typename Base::vector_bytes;
using Base::serialize;

Expand Down Expand Up @@ -133,13 +121,13 @@ class compact_array_of_strings_tuple_sketch:
/**
* Convenience alias for update_tuple_sketch for array of strings
*/
template<typename Allocator = std::allocator<std::string>,
typename Policy = default_array_of_strings_update_policy<Allocator>>
template<typename Allocator = std::allocator<array_of_strings>,
typename Policy = default_array_of_strings_update_policy>
using update_array_of_strings_tuple_sketch = update_tuple_sketch<
array<std::string, Allocator>,
array<std::string, Allocator>,
array_of_strings,
array_of_strings,
Policy,
typename std::allocator_traits<Allocator>::template rebind_alloc<array<std::string, Allocator>>
Allocator
>;

/**
Expand All @@ -148,12 +136,12 @@ using update_array_of_strings_tuple_sketch = update_tuple_sketch<
* @param ordered optional flag to specify if an ordered sketch should be produced
* @return compact array of strings sketch
*/
template<typename Allocator = std::allocator<std::string>, typename Policy = default_array_of_strings_update_policy<Allocator>>
template<typename Allocator = std::allocator<array_of_strings>, typename Policy = default_array_of_strings_update_policy>
compact_array_of_strings_tuple_sketch<Allocator> compact_array_of_strings_sketch(
const update_array_of_strings_tuple_sketch<Allocator, Policy>& sketch, bool ordered = true);

} /* namespace datasketches */

#include "array_of_strings_sketch_impl.hpp"

#endif
#endif
38 changes: 14 additions & 24 deletions tuple/include/array_of_strings_sketch_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,44 +22,37 @@

#include <stdexcept>

#include "array_of_strings_sketch.hpp"
#include "common_defs.hpp"
#include "third_party/utf8cpp/utf8.h"

namespace datasketches {

template<typename Allocator>
default_array_of_strings_update_policy<Allocator>::default_array_of_strings_update_policy(const Allocator& allocator):
allocator_(allocator) {}

template<typename Allocator>
auto default_array_of_strings_update_policy<Allocator>::create() const -> array_of_strings {
return array_of_strings(0, "", allocator_);
inline array_of_strings default_array_of_strings_update_policy::create() const {
return array_of_strings(0, "");
}

template<typename Allocator>
void default_array_of_strings_update_policy<Allocator>::update(
inline void default_array_of_strings_update_policy::update(
array_of_strings& array, const array_of_strings& input
) const {
const auto length = static_cast<size_t>(input.size());
array = array_of_strings(static_cast<uint8_t>(length), "", allocator_);
array = array_of_strings(static_cast<uint8_t>(length), "");
for (size_t i = 0; i < length; ++i) array[i] = input[i];
}

template<typename Allocator>
void default_array_of_strings_update_policy<Allocator>::update(
inline void default_array_of_strings_update_policy::update(
array_of_strings& array, const array_of_strings* input
) const {
if (input == nullptr) {
array = array_of_strings(0, "", allocator_);
array = array_of_strings(0, "");
return;
}
const auto length = static_cast<size_t>(input->size());
array = array_of_strings(static_cast<uint8_t>(length), "", allocator_);
array = array_of_strings(static_cast<uint8_t>(length), "");
for (size_t i = 0; i < length; ++i) array[i] = (*input)[i];
}

template<typename Allocator>
uint64_t hash_array_of_strings_key(const array<std::string, Allocator>& key) {
inline uint64_t hash_array_of_strings_key(const array_of_strings& key) {
// Matches Java Util.PRIME for ArrayOfStrings key hashing.
static constexpr uint64_t STRING_ARR_HASH_SEED = 0x7A3CCA71ULL;
XXHash64 hasher(STRING_ARR_HASH_SEED);
Expand Down Expand Up @@ -95,8 +88,7 @@ template<typename SerDe>
auto compact_array_of_strings_tuple_sketch<Allocator>::deserialize(
std::istream& is, uint64_t seed, const SerDe& sd, const Allocator& allocator
) -> compact_array_of_strings_tuple_sketch {
summary_allocator alloc(allocator);
auto base = Base::deserialize(is, seed, sd, alloc);
auto base = Base::deserialize(is, seed, sd, allocator);
return compact_array_of_strings_tuple_sketch(std::move(base));
}

Expand All @@ -105,14 +97,12 @@ template<typename SerDe>
auto compact_array_of_strings_tuple_sketch<Allocator>::deserialize(
const void* bytes, size_t size, uint64_t seed, const SerDe& sd, const Allocator& allocator
) -> compact_array_of_strings_tuple_sketch {
summary_allocator alloc(allocator);
auto base = Base::deserialize(bytes, size, seed, sd, alloc);
auto base = Base::deserialize(bytes, size, seed, sd, allocator);
return compact_array_of_strings_tuple_sketch(std::move(base));
}

template<typename Allocator>
default_array_of_strings_serde<Allocator>::default_array_of_strings_serde(const Allocator& allocator):
allocator_(allocator),
summary_allocator_(allocator) {}

template<typename Allocator>
Expand Down Expand Up @@ -144,7 +134,7 @@ void default_array_of_strings_serde<Allocator>::deserialize(
const uint8_t num_nodes = read<uint8_t>(is);
if (!is) throw std::runtime_error("array_of_strings stream read failed");
check_num_nodes(num_nodes);
array_of_strings array(num_nodes, "", allocator_);
array_of_strings array(num_nodes, "");
for (uint8_t j = 0; j < num_nodes; ++j) {
const uint32_t length = read<uint32_t>(is);
if (!is) throw std::runtime_error("array_of_strings stream read failed");
Expand Down Expand Up @@ -202,7 +192,7 @@ size_t default_array_of_strings_serde<Allocator>::deserialize(
uint8_t num_nodes;
bytes_read += copy_from_mem(ptr8 + bytes_read, num_nodes);
check_num_nodes(num_nodes);
array_of_strings array(num_nodes, "", allocator_);
array_of_strings array(num_nodes, "");
for (uint8_t j = 0; j < num_nodes; ++j) {
uint32_t length;
bytes_read += copy_from_mem(ptr8 + bytes_read, length);
Expand Down Expand Up @@ -252,4 +242,4 @@ void default_array_of_strings_serde<Allocator>::check_utf8(const std::string& va

} /* namespace datasketches */

#endif
#endif
4 changes: 1 addition & 3 deletions tuple/test/array_of_strings_sketch_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,8 @@

namespace datasketches {

using array_of_strings = array<std::string>;

TEST_CASE("aos update policy", "[tuple_sketch]") {
default_array_of_strings_update_policy<> policy;
default_array_of_strings_update_policy policy;

SECTION("create empty") {
auto values = policy.create();
Expand Down