Skip to content

Commit

Permalink
Add support for fast memory monitoring using smaps_rollup (#234)
Browse files Browse the repository at this point in the history
* Add a prototype for fast memory monitoring using smaps_rollup

* Delete unavailable metric

* Delete extra semicolon

* Demote the message to info and add some information to the README

---------

Co-authored-by: Alaettin Serhan Mete <alaettin.serhan.mete@cern.ch>
  • Loading branch information
amete and Alaettin Serhan Mete authored Jun 3, 2024
1 parent 440361d commit 86b4b13
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 7 deletions.
19 changes: 18 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ The `prmon` binary is invoked with the following arguments:
prmon [--pid PPP] [--filename prmon.txt] [--json-summary prmon.json] \
[--log-filename prmon.log] [--interval 30] \
[--suppress-hw-info] [--units] [--netdev DEV] \
[--disable MON1] [--level LEV] [--level MON:LEV]\
[--disable MON1] [--level LEV] [--level MON:LEV] \
[--fast-memmon] \
[-- prog arg arg ...]
```

Expand All @@ -130,6 +131,7 @@ prmon [--pid PPP] [--filename prmon.txt] [--json-summary prmon.json] \
* `--level LEV` sets the level for all monitors to LEV
* `--level MON:LEV` sets the level for monitor MON to LEV
* The valid levels are `trace`, `debug`, `info`, `warn`, `error`, `critical`
* `--fast-memmon` toggles on fast memory monitoring using `smaps_rollup`
* `--` after this argument the following arguments are treated as a program to invoke
and remaining arguments are passed to it; `prmon` will then monitor this process
instead of being given a PID via `--pid`
Expand All @@ -141,6 +143,21 @@ incomplete arguments. If `prmon` starts a program itself (using `--`) then
When invoked with `-h` or `--help` usage information is printed, as well as a
list of all available monitoring components.

### Fast Memory Monitoring

When invoked with `--fast-memmon` `prmon` uses the `smaps_rollup` files
that contain pre-summed memory information for each monitored process.
This is a faster approach compared to the default behavior,
where `prmon` aggregates the results itself by going over each of the monitored
processes' mappings one by one.

If the current kernel doesn't support `smaps_rollup` then the default
approach is used. Users should also note that fast memory monitoring
might not contain all metrics that the default approach supports, e.g.,
`vmem`. In that case, the missing metric will be omitted in the output.
If any of these issues are encountered, a relevant message is printed
to notify the user.

### Environment Variables

The `PRMON_DISABLE_MONITOR` environment variable can be used to specify a comma
Expand Down
47 changes: 45 additions & 2 deletions package/src/memmon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <iostream>
#include <limits>
#include <regex>
#include <set>
#include <sstream>

#include "utils.h"
Expand All @@ -18,7 +19,7 @@

// Constructor; uses RAII pattern to be valid
// after construction
memmon::memmon() {
memmon::memmon() : input_filename{"smaps"} {
log_init(MONITOR_NAME);
#undef MONITOR_NAME
for (const auto& param : params) {
Expand All @@ -35,7 +36,8 @@ void memmon::update_stats(const std::vector<pid_t>& pids,
std::string key_str{}, value_str{};
for (const auto pid : pids) {
std::stringstream smaps_fname{};
smaps_fname << read_path << "/proc/" << pid << "/smaps" << std::ends;
smaps_fname << read_path << "/proc/" << pid << "/" << input_filename.c_str()
<< std::ends;
std::ifstream smap_stat{smaps_fname.str()};
while (smap_stat) {
// Read off the potentially interesting "key: value", then discard
Expand Down Expand Up @@ -135,3 +137,44 @@ void const memmon::get_unit_info(nlohmann::json& unit_json) {
prmon::fill_units(unit_json, params);
return;
}

// Toggle on fast memmory monitoring
void const memmon::do_fastmon() {
// Fast monitoring reads the data from a special file
// This file is called smaps_rollup instead of smaps
input_filename = "smaps_rollup";

// First discover all available metrics by peeking into self smaps_rollup file
std::set<std::string> available_metrics;
std::stringstream smaps_fname{"/proc/self/smaps_rollup"};
std::ifstream smap_stat{smaps_fname.str()};
std::string key_str{}, value_str{};
while (smap_stat) {
smap_stat >> key_str >> value_str;
smap_stat.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
if (smap_stat) {
if (key_str == "Size:") {
available_metrics.insert("vmem");
} else if (key_str == "Pss:") {
available_metrics.insert("pss");
} else if (key_str == "Rss:") {
available_metrics.insert("rss");
} else if (key_str == "Swap:") {
available_metrics.insert("swap");
}
}
}

// Delete unavailable metrics from the monitored stats
// In C++17/20 there are more elegant ways to do this
for (auto it = mem_stats.cbegin(); it != mem_stats.cend();) {
// Delete unavailable metrics
if (available_metrics.count(it->first) == 0) {
spdlog::info("Metric " + it->first +
" is not available in fast monitoring");
it = mem_stats.erase(it);
} else {
it++;
}
}
}
6 changes: 6 additions & 0 deletions package/src/memmon.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ class memmon final : public Imonitor, public MessageBase {
{"rss", "kB", "kB"},
{"swap", "kB", "kB"}};

// The input smaps file to be used
std::string input_filename;

// Dynamic monitoring container for value measurements
// This will be filled at initialisation, taking the names
// from the above params
Expand All @@ -48,6 +51,9 @@ class memmon final : public Imonitor, public MessageBase {
void const get_hardware_info(nlohmann::json& hw_json);
void const get_unit_info(nlohmann::json& unit_json);
bool const is_valid() { return true; }

// Toggle on fast memmory monitoring
void const do_fastmon();
};
REGISTER_MONITOR(Imonitor, memmon, "Monitor memory usage")

Expand Down
29 changes: 25 additions & 4 deletions package/src/prmon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <unordered_map>
#include <vector>

#include "memmon.h"
#include "prmonVersion.h"
#include "prmonutils.h"
#include "registry.h"
Expand All @@ -40,7 +41,8 @@ int ProcessMonitor(const pid_t mpid, const std::string filename,
const std::string json_summary_file, const time_t interval,
const bool store_hw_info, const bool store_unit_info,
const std::vector<std::string> netdevs,
const std::vector<std::string> disabled_monitors) {
const std::vector<std::string> disabled_monitors,
const bool do_fast_memmon) {
signal(SIGUSR1, prmon::SignalCallbackHandler);

// This is the vector of all monitoring components
Expand Down Expand Up @@ -81,6 +83,18 @@ int ProcessMonitor(const pid_t mpid, const std::string filename,
exit(EXIT_FAILURE);
}

// Configure the memory monitor for fast monitoring if possible/asked for
if (do_fast_memmon && monitors.count("memmon")) {
if (!prmon::smaps_rollup_exists()) {
spdlog::warn(
"Fast memory monitoring is requested but the kernel doesn't support "
"smaps_rollup, using the standard mode.");
} else {
auto mem_monitor_p = static_cast<memmon*>(monitors["memmon"].get());
mem_monitor_p->do_fastmon();
}
}

int iteration = 0;
time_t lastIteration = time(0) - interval;
time_t currentTime;
Expand Down Expand Up @@ -248,6 +262,7 @@ int main(int argc, char* argv[]) {
bool store_hw_info{default_store_hw_info};
bool store_unit_info{default_store_unit_info};
int do_help{0};
bool do_fast_memmon{false};

static struct option long_options[] = {
{"pid", required_argument, NULL, 'p'},
Expand All @@ -261,10 +276,11 @@ int main(int argc, char* argv[]) {
{"netdev", required_argument, NULL, 'n'},
{"help", no_argument, NULL, 'h'},
{"level", required_argument, NULL, 'l'},
{"fast-memmon", no_argument, NULL, 'm'},
{0, 0, 0, 0}};

int c;
while ((c = getopt_long(argc, argv, "-p:f:j:o:i:d:sun:h:l:", long_options,
while ((c = getopt_long(argc, argv, "-p:f:j:o:i:d:sun:h:l:m", long_options,
NULL)) != -1) {
switch (char(c)) {
case 'p':
Expand Down Expand Up @@ -303,6 +319,9 @@ int main(int argc, char* argv[]) {
case 'l':
processLevel(std::string(optarg));
break;
case 'm':
do_fast_memmon = true;
break;
default:
std::cerr << "Use '--help' for usage " << std::endl;
return 1;
Expand Down Expand Up @@ -349,6 +368,8 @@ int main(int argc, char* argv[]) {
<< " Valid level names are trace, debug, "
"info,\n"
<< " warn, error and critical\n"
<< "[--fast-memmon, -m] Do fast memory monitoring using "
"smaps_rollup\n"
<< "[--] prog [arg] ... Instead of monitoring a PID prmon will\n"
<< " execute the given program + args and\n"
<< " monitor this (must come after other \n"
Expand Down Expand Up @@ -413,7 +434,7 @@ int main(int argc, char* argv[]) {
return 1;
}
ProcessMonitor(pid, filename, jsonSummary, interval, store_hw_info,
store_unit_info, netdevs, disabled_monitors);
store_unit_info, netdevs, disabled_monitors, do_fast_memmon);
} else {
if (child_args == argc) {
spdlog::error(
Expand All @@ -427,7 +448,7 @@ int main(int argc, char* argv[]) {
} else if (child > 0) {
return ProcessMonitor(child, filename, jsonSummary, interval,
store_hw_info, store_unit_info, netdevs,
disabled_monitors);
disabled_monitors, do_fast_memmon);
}
}

Expand Down
6 changes: 6 additions & 0 deletions package/src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <unistd.h>

Expand Down Expand Up @@ -104,3 +105,8 @@ const void prmon::fill_units(nlohmann::json& unit_json,
}
return;
}

const bool prmon::smaps_rollup_exists() {
struct stat buffer;
return (stat("/proc/self/smaps_rollup", &buffer) == 0);
}
3 changes: 3 additions & 0 deletions package/src/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ const std::pair<int, std::vector<std::string>> cmd_pipe_output(
// monitor)
const void fill_units(nlohmann::json& unit_json, const parameter_list& params);

// Utility function to check if smaps_rollup is available on this machine
const bool smaps_rollup_exists();

} // namespace prmon

#endif // PRMON_UTILS_H

0 comments on commit 86b4b13

Please sign in to comment.