Skip to content

Commit

Permalink
Merge pull request #32877 from vespa-engine/toregge/trim-small-postin…
Browse files Browse the repository at this point in the history
…g-lists-to-limit-cache-bloat

Trim small posting lists to limit cache bloat.
  • Loading branch information
geirst authored Nov 18, 2024
2 parents dec87ae + 8097749 commit 543892b
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 9 deletions.
3 changes: 3 additions & 0 deletions searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,9 @@ DiskIndexTest::requireThatWeCanReadPostingList()
auto r = _index->lookup(0, "w1");
auto& field_index = _index->get_field_index(0);
auto h = field_index.read_posting_list(r);
if (field_index.is_posting_list_cache_enabled()) {
EXPECT_GT(64, h._allocSize);
}
auto sb = field_index.create_iterator(r, h, mda);
EXPECT_EQ(SimpleResult({1,3}), SimpleResult().search(*sb));
}
Expand Down
9 changes: 6 additions & 3 deletions searchlib/src/vespa/searchlib/diskindex/field_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,12 +172,15 @@ FieldIndex::reuse_files(const FieldIndex& rhs)
}

PostingListHandle
FieldIndex::read_uncached_posting_list(const DictionaryLookupResult& lookup_result) const
FieldIndex::read_uncached_posting_list(const DictionaryLookupResult& lookup_result, bool trim) const
{
auto handle = _posting_file->read_posting_list(lookup_result);
if (handle._read_bytes != 0) {
_cache_disk_io_stats->add_uncached_read_operation(handle._read_bytes);
}
if (trim) {
_posting_file->consider_trim_posting_list(lookup_result, handle, 0.2); // Trim posting list if more than 20% bloat
}
return handle;
}

Expand All @@ -188,7 +191,7 @@ FieldIndex::read(const IPostingListCache::Key& key, IPostingListCache::Context&
DictionaryLookupResult lookup_result;
lookup_result.bitOffset = key.bit_offset;
lookup_result.counts._bitLength = key.bit_length;
return read_uncached_posting_list(lookup_result);
return read_uncached_posting_list(lookup_result, true);
}

PostingListHandle
Expand All @@ -199,7 +202,7 @@ FieldIndex::read_posting_list(const DictionaryLookupResult& lookup_result) const
return {};
}
if (file->getMemoryMapped() || !_posting_list_cache_enabled) {
return read_uncached_posting_list(lookup_result);
return read_uncached_posting_list(lookup_result, false);
}
IPostingListCache::Key key;
key.file_id = _file_id;
Expand Down
4 changes: 3 additions & 1 deletion searchlib/src/vespa/searchlib/diskindex/field_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking {
bool open_dictionary(const std::string& field_dir, const TuneFileSearch& tune_file_search);
bool open(const std::string& field_dir, const TuneFileSearch &tune_file_search);
void reuse_files(const FieldIndex& rhs);
index::PostingListHandle read_uncached_posting_list(const search::index::DictionaryLookupResult& lookup_result) const;
index::PostingListHandle read_uncached_posting_list(const search::index::DictionaryLookupResult &lookup_result,
bool trim) const;
index::PostingListHandle read(const IPostingListCache::Key& key, IPostingListCache::Context& ctx) const override;
index::PostingListHandle read_posting_list(const search::index::DictionaryLookupResult& lookup_result) const;
index::BitVectorDictionaryLookupResult lookup_bit_vector(const search::index::DictionaryLookupResult& lookup_result) const;
Expand All @@ -87,6 +88,7 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking {
index::DictionaryFileRandRead* get_dictionary() noexcept { return _dict.get(); }
FieldIndexStats get_stats() const;
uint32_t get_field_id() const noexcept { return _field_id; }
bool is_posting_list_cache_enabled() const noexcept { return _posting_list_cache_enabled; }
};

}
39 changes: 36 additions & 3 deletions searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ ZcPosOccRandRead::ZcPosOccRandRead()
_fileBitSize(0),
_headerBitSize(0),
_fieldsParams()
{ }
{
}


ZcPosOccRandRead::~ZcPosOccRandRead()
Expand Down Expand Up @@ -108,8 +109,8 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result)
size_t padExtraAfter; // Decode prefetch space
_file->DirectIOPadding(startOffset, vectorLen, padBefore, padAfter);
padExtraAfter = 0;
if (padAfter < 16) {
padExtraAfter = 16 - padAfter;
if (padAfter < decode_prefetch_size) {
padExtraAfter = decode_prefetch_size - padAfter;
}

size_t mallocLen = padBefore + vectorLen + padAfter + padExtraAfter;
Expand Down Expand Up @@ -137,6 +138,37 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result)
return handle;
}

void
ZcPosOccRandRead::consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle,
double bloat_factor) const
{
if (lookup_result.counts._bitLength == 0 || _memoryMapped) {
return;
}
uint64_t start_offset = (lookup_result.bitOffset + _headerBitSize) >> 3;
// Align start at 64-bit boundary
start_offset -= (start_offset & 7);
uint64_t end_offset = (lookup_result.bitOffset + _headerBitSize +
lookup_result.counts._bitLength + 7) >> 3;
// Align end at 64-bit boundary
end_offset += (-end_offset & 7);
size_t malloc_len = end_offset - start_offset + decode_prefetch_size;
if (handle._allocSize == malloc_len) {
assert(handle._allocMem.get() == handle._mem);
return;
}
assert(handle._allocSize >= malloc_len);
if (handle._allocSize <= malloc_len * (1.0 + bloat_factor)) {
return;
}
auto *mem = malloc(malloc_len);
assert(mem != nullptr);
memcpy(mem, handle._mem, malloc_len);
handle._allocMem = std::shared_ptr<void>(mem, free);
handle._mem = mem;
handle._allocSize = malloc_len;
handle._read_bytes = end_offset - start_offset;
}

bool
ZcPosOccRandRead::
Expand All @@ -156,6 +188,7 @@ open(const std::string &name, const TuneFileRandRead &tuneFileRead)
_fileSize = _file->getSize();

readHeader();
afterOpen(*_file);
return true;
}

Expand Down
4 changes: 4 additions & 0 deletions searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ class ZcPosOccRandRead : public index::PostingListFileRandRead
uint64_t _headerBitSize;
bitcompression::PosOccFieldsParams _fieldsParams;

static constexpr size_t decode_prefetch_size = 16;

public:
ZcPosOccRandRead();
~ZcPosOccRandRead();
Expand All @@ -42,6 +44,8 @@ class ZcPosOccRandRead : public index::PostingListFileRandRead
* Read (possibly partial) posting list into handle.
*/
PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) override;
void consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle,
double bloat_factor) const override;

bool open(const std::string &name, const TuneFileRandRead &tuneFileRead) override;
bool close() override;
Expand Down
7 changes: 7 additions & 0 deletions searchlib/src/vespa/searchlib/index/postinglistfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,13 @@ PostingListFileRandReadPassThrough::read_posting_list(const DictionaryLookupResu
return _lower->read_posting_list(lookup_result);
}

void
PostingListFileRandReadPassThrough::consider_trim_posting_list(const DictionaryLookupResult &lookup_result,
PostingListHandle &handle, double bloat_factor) const
{
return _lower->consider_trim_posting_list(lookup_result, handle, bloat_factor);
}

bool
PostingListFileRandReadPassThrough::open(const std::string &name,
const TuneFileRandRead &tuneFileRead)
Expand Down
11 changes: 9 additions & 2 deletions searchlib/src/vespa/searchlib/index/postinglistfile.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,13 @@ class PostingListFileRandRead {
/**
* Read posting list into handle.
*/
virtual PostingListHandle
read_posting_list(const DictionaryLookupResult& lookup_result) = 0;
virtual PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) = 0;

/**
* Remove directio padding from posting list if bloat is excessive.
*/
virtual void consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle,
double bloat_factor) const = 0;

/**
* Open posting list file for random read.
Expand Down Expand Up @@ -199,6 +204,8 @@ class PostingListFileRandReadPassThrough : public PostingListFileRandRead {
const search::fef::TermFieldMatchDataArray &matchData) const override;

PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) override;
void consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle,
double bloat_factor) const override;

bool open(const std::string &name, const TuneFileRandRead &tuneFileRead) override;
bool close() override;
Expand Down

0 comments on commit 543892b

Please sign in to comment.