Skip to content

Commit

Permalink
Merge pull request #32585 from vespa-engine/toregge/add-field-index-s…
Browse files Browse the repository at this point in the history
…tats

Add field index stats.
  • Loading branch information
geirst authored Oct 14, 2024
2 parents c2a6790 + 5657c44 commit 6f42420
Show file tree
Hide file tree
Showing 8 changed files with 171 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,21 @@ TEST(SearchableStatsTest, stats_can_be_merged)
EXPECT_EQ(1300u, stats.fusion_size_on_disk());
}

TEST(SearchableStatsTest, field_stats_can_be_merged)
{
SearchableStats base_stats;
base_stats.add_field_stats("f1", FieldIndexStats().memory_usage({100, 40, 10, 5}).size_on_disk(1000)).
add_field_stats("f2", FieldIndexStats().memory_usage({400, 200, 60, 10}).size_on_disk(1500));
SearchableStats added_stats;
added_stats.add_field_stats("f2", FieldIndexStats().memory_usage({300, 100, 40, 5}).size_on_disk(500)).
add_field_stats("f3", FieldIndexStats().memory_usage({110, 50, 20, 12}).size_on_disk(500));
SearchableStats act_stats = base_stats;
act_stats.merge(added_stats);
SearchableStats exp_stats;
exp_stats.add_field_stats("f1", FieldIndexStats().memory_usage({100, 40, 10, 5}).size_on_disk(1000)).
add_field_stats("f2", FieldIndexStats().memory_usage({700, 300, 100, 15}).size_on_disk(2000)).
add_field_stats("f3", FieldIndexStats().memory_usage({110, 50, 20, 12}).size_on_disk(500));
EXPECT_EQ(exp_stats, act_stats);
}

GTEST_MAIN_RUN_ALL_TESTS()
2 changes: 2 additions & 0 deletions searchlib/src/vespa/searchlib/util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ vespa_add_library(searchlib_util OBJECT
data_buffer_writer.cpp
dirtraverse.cpp
drainingbufferwriter.cpp
field_index_stats.cpp
file_with_header.cpp
filealign.cpp
fileheadertk.cpp
Expand All @@ -17,6 +18,7 @@ vespa_add_library(searchlib_util OBJECT
linguisticsannotation.cpp
logutil.cpp
rawbuf.cpp
searchable_stats.cpp
slime_output_raw_buf_adapter.cpp
state_explorer_utils.cpp
token_extractor.cpp
Expand Down
13 changes: 13 additions & 0 deletions searchlib/src/vespa/searchlib/util/field_index_stats.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#include "field_index_stats.h"
#include <ostream>

namespace search {

std::ostream& operator<<(std::ostream& os, const FieldIndexStats& stats) {
os << "{memory: " << stats.memory_usage() << ", disk: " << stats.size_on_disk() << "}";
return os;
}

}
46 changes: 46 additions & 0 deletions searchlib/src/vespa/searchlib/util/field_index_stats.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once

#include <vespa/vespalib/util/memoryusage.h>

namespace search {

/**
* Statistics for a single field index.
**/
class FieldIndexStats
{
private:
vespalib::MemoryUsage _memory_usage;
size_t _size_on_disk; // in bytes

public:
FieldIndexStats() noexcept
: _memory_usage(),
_size_on_disk(0)
{}
FieldIndexStats &memory_usage(const vespalib::MemoryUsage &usage) noexcept {
_memory_usage = usage;
return *this;
}
const vespalib::MemoryUsage &memory_usage() const noexcept { return _memory_usage; }
FieldIndexStats &size_on_disk(size_t value) noexcept {
_size_on_disk = value;
return *this;
}
size_t size_on_disk() const noexcept { return _size_on_disk; }

void merge(const FieldIndexStats &rhs) noexcept {
_memory_usage.merge(rhs._memory_usage);
_size_on_disk += rhs._size_on_disk;
}

bool operator==(const FieldIndexStats& rhs) const noexcept {
return _memory_usage == rhs._memory_usage &&
_size_on_disk == rhs._size_on_disk;
}
};

std::ostream& operator<<(std::ostream& os, const FieldIndexStats& stats);

}
64 changes: 64 additions & 0 deletions searchlib/src/vespa/searchlib/util/searchable_stats.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#include "searchable_stats.h"
#include <ostream>

namespace search {

SearchableStats::SearchableStats()
: _memoryUsage(),
_docsInMemory(0),
_sizeOnDisk(0),
_fusion_size_on_disk(0),
_field_stats()
{
}

SearchableStats::~SearchableStats() = default;

SearchableStats&
SearchableStats::merge(const SearchableStats &rhs) {
_memoryUsage.merge(rhs._memoryUsage);
_docsInMemory += rhs._docsInMemory;
_sizeOnDisk += rhs._sizeOnDisk;
_fusion_size_on_disk += rhs._fusion_size_on_disk;
for (auto& rhs_field : rhs._field_stats) {
_field_stats[rhs_field.first].merge(rhs_field.second);
}
return *this;
}

bool
SearchableStats::operator==(const SearchableStats& rhs) const noexcept
{
return _memoryUsage == rhs._memoryUsage &&
_docsInMemory == rhs._docsInMemory &&
_sizeOnDisk == rhs._sizeOnDisk &&
_fusion_size_on_disk == rhs._fusion_size_on_disk &&
_field_stats == rhs._field_stats;
}

SearchableStats&
SearchableStats::add_field_stats(const std::string& name, const FieldIndexStats& stats)
{
_field_stats[name].merge(stats);
return *this;
}

std::ostream& operator<<(std::ostream& os, const SearchableStats& stats) {
os << "{memory: " << stats.memoryUsage() << ", docsInMemory: " << stats.docsInMemory() <<
", disk: " << stats.sizeOnDisk() << ", fusion_size_on_disk: " << stats.fusion_size_on_disk() << ", ";
os << "fields: {";
bool first = true;
for (auto& field : stats.get_field_stats()) {
if (!first) {
os << ", ";
}
first = false;
os << "\"" << field.first << "\": " << field.second;
}
os << "}";
return os;
}

}
21 changes: 11 additions & 10 deletions searchlib/src/vespa/searchlib/util/searchable_stats.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once

#include <vespa/vespalib/util/memoryusage.h>
#include "field_index_stats.h"
#include <map>

namespace search {

Expand All @@ -17,9 +18,11 @@ class SearchableStats
size_t _docsInMemory;
size_t _sizeOnDisk; // in bytes
size_t _fusion_size_on_disk; // in bytes
std::map<std::string, FieldIndexStats> _field_stats;

public:
SearchableStats() : _memoryUsage(), _docsInMemory(0), _sizeOnDisk(0), _fusion_size_on_disk(0) {}
SearchableStats();
~SearchableStats();
SearchableStats &memoryUsage(const vespalib::MemoryUsage &usage) {
_memoryUsage = usage;
return *this;
Expand All @@ -41,14 +44,12 @@ class SearchableStats
}
size_t fusion_size_on_disk() const { return _fusion_size_on_disk; }

SearchableStats &merge(const SearchableStats &rhs) {
_memoryUsage.merge(rhs._memoryUsage);
_docsInMemory += rhs._docsInMemory;
_sizeOnDisk += rhs._sizeOnDisk;
_fusion_size_on_disk += rhs._fusion_size_on_disk;
return *this;
}
SearchableStats& merge(const SearchableStats& rhs);
bool operator==(const SearchableStats& rhs) const noexcept;
SearchableStats& add_field_stats(const std::string& name, const FieldIndexStats& stats);
const std::map<std::string, FieldIndexStats>& get_field_stats() const noexcept { return _field_stats; }
};

} // namespace search
std::ostream& operator<<(std::ostream& os, const SearchableStats& stats);

}
10 changes: 8 additions & 2 deletions vespalib/src/vespa/vespalib/util/memoryusage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "memoryusage.h"
#include <vespa/vespalib/stllike/asciistream.h>
#include <ostream>

namespace vespalib {

Expand All @@ -14,10 +15,15 @@ MemoryUsage::toString() const {

asciistream &
operator << (asciistream & os, const MemoryUsage & usage) {
os << "allocated: " << usage.allocatedBytes();
os << "{allocated: " << usage.allocatedBytes();
os << ", used: " << usage.usedBytes();
os << ", dead: " << usage.deadBytes();
os << ", onhold: " << usage.allocatedBytesOnHold();
os << ", onhold: " << usage.allocatedBytesOnHold() << "}";
return os;
}

std::ostream& operator<<(std::ostream& os, const MemoryUsage& usage) {
os << usage.toString();
return os;
}

Expand Down
11 changes: 10 additions & 1 deletion vespalib/src/vespa/vespalib/util/memoryusage.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#pragma once

#include <string>
#include <iosfwd>

namespace vespalib {

Expand Down Expand Up @@ -50,17 +51,25 @@ class MemoryUsage {
_allocatedBytesOnHold += inc;
}

void merge(const MemoryUsage & rhs) {
void merge(const MemoryUsage & rhs) noexcept {
_allocatedBytes += rhs._allocatedBytes;
_usedBytes += rhs._usedBytes;
_deadBytes += rhs._deadBytes;
_allocatedBytesOnHold += rhs._allocatedBytesOnHold;
}

bool operator==(const MemoryUsage& rhs) const noexcept {
return _allocatedBytes == rhs._allocatedBytes &&
_usedBytes == rhs._usedBytes &&
_deadBytes == rhs._deadBytes &&
_allocatedBytesOnHold == rhs._allocatedBytesOnHold;
}
std::string toString() const;
};

class asciistream;

asciistream & operator << (asciistream & os, const MemoryUsage & usage);
std::ostream& operator<<(std::ostream& os, const MemoryUsage& usage);

} // namespace vespalib

0 comments on commit 6f42420

Please sign in to comment.