Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add k8s mem stat #253

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions deviate.c
Original file line number Diff line number Diff line change
Expand Up @@ -1455,6 +1455,17 @@ deviatsyst(struct sstat *cur, struct sstat *pre, struct sstat *dev,

dev->llc.nrllcs = cur->llc.nrllcs;

dev->k8smem.file = cur->k8smem.file;
dev->k8smem.anon = cur->k8smem.anon;
dev->k8smem.shmem = cur->k8smem.shmem;
dev->k8smem.filemapped = cur->k8smem.filemapped;
dev->k8smem.inactiveanon = cur->k8smem.inactiveanon;
dev->k8smem.activeanon = cur->k8smem.activeanon;
dev->k8smem.inactivefile = cur->k8smem.inactivefile;
dev->k8smem.activefile = cur->k8smem.activefile;
dev->k8smem.usagefile = cur->k8smem.usagefile;
dev->k8smem.workingset = cur->k8smem.workingset;

#if HTTPSTATS
/*
** application-specific counters
Expand Down
17 changes: 17 additions & 0 deletions man/atop.1
Original file line number Diff line number Diff line change
Expand Up @@ -1343,6 +1343,23 @@ the number of memory pages the system wrote to swap space (`swout'), and
the number of out-of-memory kills (`oomkill').
.PP
.TP 5
.B K8S
K8S global /sys/fs/cgroup/[memory/]kubepods/memory.stat.
.br
This line shows the number of file pages for k8s global memcg (`file'),
the number of mapped anonymous pages for k8s global memcg (`anon'),
the number of shmem pages (included tmpfs/GEM pages) for k8s global
memcg (`shmem'), the number of pagecache pages mapped into pagetables
for k8s global memcg (`fmap'), the number of lru inactive anon pages
for k8s global memcg (`inan'), the number of lru active anon pages
for k8s global memcg (`actan'), the number of lru inactive file pages
for k8s global memcg (`infl'), the number of lru active file pages
for k8s global memcg (`actfl'), the number of current usage file
for k8s global memcg, including usermem and kmem (`usage'), the
number of workingset file pages (from k8s vision: number of current
usage pages minus inactivefile pages) for k8s global memcg (`wkset').
.PP
.TP 5
.B PSI
Pressure Stall Information.
.br
Expand Down
16 changes: 0 additions & 16 deletions photoproc.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,22 +103,6 @@ photoproc(struct tstat *tasklist, int maxtask)
fclose(fp);
}

/*
** check if this kernel offers cgroups version 2
*/
if ( (fp = fopen("/proc/1/cgroup", "r")) )
{
char line[128];

if (fgets(line, sizeof line, fp))
{
if (memcmp(line, "0::", 3) == 0) // equal?
supportflags |= CGROUPV2;
}

fclose(fp);
}

if (! droprootprivs())
mcleanstop(42, "failed to drop root privs\n");

Expand Down
181 changes: 181 additions & 0 deletions photosyst.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include <sys/ioctl.h>
#include <sys/sysmacros.h>
#include <limits.h>
#include <sys/vfs.h>

#define SCALINGMAXCPU 8 // threshold for scaling info per CPU

Expand Down Expand Up @@ -75,9 +76,21 @@
#define MDDTYPE 2
#define LVMTYPE 3

/* According to https://man7.org/linux/man-pages/man2/statfs.2.html */
#define CGROUP_FTYPE_V1 16914836 /* TMPFS_MAGIC 0x01021994 */
#define CGROUP_FTYPE_V2 1667723888 /* CGROUP2_SUPER_MAGIC 0x63677270 */

/* recognize numa node */
#define NUMADIR "/sys/devices/system/node"

/* recognize k8s global memory.stat and memory current usage */
#define K8S_MEMDIR_CGV1 "/sys/fs/cgroup/memory/kubepods"
#define K8S_MEMDIR_CGV2 "/sys/fs/cgroup/kubepods"
#define K8S_SYSTEMD_CM ".slice"
#define K8S_MEM_STAT "/memory.stat"
#define K8S_MEM_CGV1_USAGE "/memory.usage_in_bytes"
#define K8S_MEM_CGV2_USAGE "/memory.current"

/* recognize LLC monitor data */
#define LLCDIR "/sys/fs/resctrl/mon_data"
#define L3SIZE "/sys/devices/system/cpu/cpu0/cache/index3/size"
Expand Down Expand Up @@ -291,6 +304,8 @@ photosyst(struct sstat *si)
#if HTTPSTATS
static int wwwvalid = 1;
#endif
static int cgroupVersion = 0;
struct statfs statfscgrp;

memset(si, 0, sizeof(struct sstat));

Expand Down Expand Up @@ -846,6 +861,172 @@ photosyst(struct sstat *si)
fclose(fp);
}

/*
** Identify the cgroup version on Linux Nodes: `stat -fc %T /sys/fs/cgroup/`.
** For cgroup v2, the output is cgroup2fs.
** For cgroup v1, the output is tmpfs.
*/
if ( !cgroupVersion )
{
if ( statfs("/sys/fs/cgroup/", &statfscgrp) == 0 )
{
if ( statfscgrp.f_type == CGROUP_FTYPE_V2 )
{
cgroupVersion = 2;
supportflags |= CGROUPV2;
}
else if ( statfscgrp.f_type == CGROUP_FTYPE_V1 )
cgroupVersion = 1;
else
cgroupVersion = 0;
}
}

if ( supportflags & CGROUPV2 )
{
if ( (fp = fopen(K8S_MEMDIR_CGV2 K8S_MEM_STAT, "r")) != NULL ||
(fp = fopen(K8S_MEMDIR_CGV2 K8S_SYSTEMD_CM K8S_MEM_STAT, "r")) != NULL )
{
/* for cgroup v2 */
while ( fgets(linebuf, sizeof(linebuf), fp) != NULL )
{
nr = sscanf(linebuf, "%s %lld\n", nam, &cnts[0]);

if ( strcmp("file", nam) == EQ )
{
si->k8smem.file = cnts[0]/pagesize;
continue;
}
if ( strcmp("anon", nam) == EQ )
{
si->k8smem.anon = cnts[0]/pagesize;
continue;
}
if ( strcmp("shmem", nam) == EQ )
{
si->k8smem.shmem = cnts[0]/pagesize;
continue;
}
if ( strcmp("file_mapped", nam) == EQ )
{
si->k8smem.filemapped = cnts[0]/pagesize;
continue;
}
if ( strcmp("inactive_anon", nam) == EQ )
{
si->k8smem.inactiveanon = cnts[0]/pagesize;
continue;
}
if ( strcmp("active_anon", nam) == EQ )
{
si->k8smem.activeanon = cnts[0]/pagesize;
continue;
}
if ( strcmp("inactive_file", nam) == EQ )
{
si->k8smem.inactivefile = cnts[0]/pagesize;
continue;
}
if ( strcmp("active_file", nam) == EQ )
{
si->k8smem.activefile = cnts[0]/pagesize;
continue;
}
}

fclose(fp);
}

if ( (fp = fopen(K8S_MEMDIR_CGV2 K8S_MEM_CGV2_USAGE, "r")) != NULL ||
(fp = fopen(K8S_MEMDIR_CGV2 K8S_SYSTEMD_CM K8S_MEM_CGV2_USAGE, "r")) != NULL )
{
if ( fscanf(fp, "%lld", &cnts[0]) == 1 )
{
/*
** Refer to https://github.com/kubernetes/kubernetes/issues/43916,
** memory.available := node.status.capacity[memory] - node.stats.memory.workingSet
** && workingSet := $cgroupfs/memory.current - inactive_file
*/
si->k8smem.usagefile = cnts[0]/pagesize;
si->k8smem.workingset = si->k8smem.usagefile - si->k8smem.inactivefile;
}

fclose(fp);
}
}
else
{
if ( (fp = fopen(K8S_MEMDIR_CGV1 K8S_MEM_STAT, "r")) != NULL ||
(fp = fopen(K8S_MEMDIR_CGV1 K8S_SYSTEMD_CM K8S_MEM_STAT, "r")) != NULL )
{
/* for cgroup v1 */
while ( fgets(linebuf, sizeof(linebuf), fp) != NULL )
{
nr = sscanf(linebuf, "%s %lld\n", nam, &cnts[0]);

if ( strcmp("total_cache", nam) == EQ )
{
si->k8smem.file = cnts[0]/pagesize;
continue;
}
if ( strcmp("total_rss", nam) == EQ)
{
si->k8smem.anon = cnts[0]/pagesize;
continue;
}
if ( strcmp("total_shmem", nam) == EQ)
{
si->k8smem.shmem = cnts[0]/pagesize;
continue;
}
if ( strcmp("total_mapped_file", nam) == EQ)
{
si->k8smem.filemapped = cnts[0]/pagesize;
continue;
}
if ( strcmp("total_inactive_anon", nam) == EQ)
{
si->k8smem.inactiveanon = cnts[0]/pagesize;
continue;
}
if ( strcmp("total_active_anon", nam) == EQ)
{
si->k8smem.activeanon = cnts[0]/pagesize;
continue;
}
if ( strcmp("total_inactive_file", nam) == EQ)
{
si->k8smem.inactivefile = cnts[0]/pagesize;
continue;
}
if ( strcmp("total_active_file", nam) == EQ)
{
si->k8smem.activefile = cnts[0]/pagesize;
continue;
}
}

fclose(fp);
}

if ( (fp = fopen(K8S_MEMDIR_CGV1 K8S_MEM_CGV1_USAGE, "r")) != NULL ||
(fp = fopen(K8S_MEMDIR_CGV1 K8S_SYSTEMD_CM K8S_MEM_CGV1_USAGE, "r")) != NULL )
{
if ( fscanf(fp, "%lld", &cnts[0]) == 1 )
{
/*
** Refer to https://github.com/kubernetes/kubernetes/issues/43916,
** memory.available := node.status.capacity[memory] - node.stats.memory.workingSet
** && workingSet := $cgroupfs/memory.usage_in_bytes - total_inactive_file
*/
si->k8smem.usagefile = cnts[0]/pagesize;
si->k8smem.workingset = si->k8smem.usagefile - si->k8smem.inactivefile;
}

fclose(fp);
}
}

/*
** gather per numa memory-related statistics from the file
** /sys/devices/system/node/node0/meminfo, and store them in binary form.
Expand Down
14 changes: 14 additions & 0 deletions photosyst.h
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,19 @@ struct llcstat {
struct perllc perllc[MAXLLC];
};

struct k8smem {
count_t file; /* number of file pages for k8s global memcg */
count_t anon; /* number of mapped anonymous pages for k8s global memcg */
count_t shmem; /* number of shmem pages (included tmpfs/GEM pages) for k8s global memcg */
count_t filemapped; /* number of pagecache pages mapped into pagetables for k8s global memcg */
count_t inactiveanon; /* number of lru inactive anon pages for k8s global memcg */
count_t activeanon; /* number of lru active anon pages for k8s global memcg */
count_t inactivefile; /* number of lru inactive file pages for k8s global memcg */
count_t activefile; /* number of lru active file pages for k8s global memcg */
count_t usagefile; /* number of current usage pages for k8s global memcg */
count_t workingset; /* k8s vision: number of current usage pages minus inactivefile pages */
};

/************************************************************************/

struct sstat {
Expand All @@ -439,6 +452,7 @@ struct sstat {
struct gpustat gpu;
struct ifbstat ifb;
struct llcstat llc;
struct k8smem k8smem;

struct wwwstat www;
};
Expand Down
60 changes: 60 additions & 0 deletions showlinux.c
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,19 @@ sys_printdef *llcsyspdefs[] = {
&syspdef_BLANKBOX,
0
};
sys_printdef *k8smemsyspdefs[] = {
&syspdef_K8SFILE,
&syspdef_K8SANON,
&syspdef_K8SSHMEM,
&syspdef_K8SFILEMAPPED,
&syspdef_K8SINACTIVEANON,
&syspdef_K8SACTIVEANON,
&syspdef_K8SINACTIVEFILE,
&syspdef_K8SACTIVEFILE,
&syspdef_K8SUSAGEFILE,
&syspdef_K8SWORKINGSET,
0
};
sys_printdef *psisyspdefs[] = {
&syspdef_PSICPUSTOT,
&syspdef_PSIMEMSTOT,
Expand Down Expand Up @@ -520,6 +533,7 @@ sys_printpair swpline[MAXITEMS];
sys_printpair memnumaline[MAXITEMS];
sys_printpair cpunumaline[MAXITEMS];
sys_printpair llcline[MAXITEMS];
sys_printpair k8smemline[MAXITEMS];
sys_printpair pagline[MAXITEMS];
sys_printpair psiline[MAXITEMS];
sys_printpair contline[MAXITEMS];
Expand Down Expand Up @@ -1027,6 +1041,23 @@ pricumproc(struct sstat *sstat, struct devtstat *devtstat,
sstat, &extra);
}

if (k8smemline[0].f == 0)
{
make_sys_prints(k8smemline, MAXITEMS,
"K8SFILE:1 "
"K8SANON:1 "
"K8SSHMEM:1 "
"K8SFILEMAPPED:2 "
"K8SACTIVEANON:2 "
"K8SINACTIVEANON:2 "
"K8SACTIVEFILE:2 "
"K8SINACTIVEFILE:1 "
"K8SUSAGEFILE:1 "
"K8SWORKINGSET:1 ",
k8smemsyspdefs, "builtin k8smemline",
sstat, &extra);
}

if (pagline[0].f == 0)
{
make_sys_prints(pagline, MAXITEMS,
Expand Down Expand Up @@ -2010,6 +2041,28 @@ prisyst(struct sstat *sstat, int curline, int nsecs, int avgval,
}
}

/*
** k8s global memory.stat statistics
*/
if (fixedhead ||
sstat->k8smem.file ||
sstat->k8smem.anon ||
sstat->k8smem.shmem ||
sstat->k8smem.filemapped ||
sstat->k8smem.inactiveanon ||
sstat->k8smem.activeanon ||
sstat->k8smem.inactivefile ||
sstat->k8smem.activefile ||
sstat->k8smem.usagefile ||
sstat->k8smem.workingset )
{
if (screen)
move(curline, 0);

showsysline(k8smemline, sstat, &extra, "K8S", 0);
curline++;
}

/*
** PAGING statistics
*/
Expand Down Expand Up @@ -2978,6 +3031,13 @@ do_ownllcline(char *name, char *val)
NULL, NULL);
}

void
do_ownk8smemline(char *name, char *val)
{
make_sys_prints(k8smemline, MAXITEMS, val, k8smemsyspdefs, name,
NULL, NULL);
}

void
do_owndskline(char *name, char *val)
{
Expand Down
Loading