diff --git a/deviate.c b/deviate.c index c9ae3c0c..8657211b 100644 --- a/deviate.c +++ b/deviate.c @@ -1455,6 +1455,17 @@ deviatsyst(struct sstat *cur, struct sstat *pre, struct sstat *dev, dev->llc.nrllcs = cur->llc.nrllcs; + dev->k8smem.file = cur->k8smem.file; + dev->k8smem.anon = cur->k8smem.anon; + dev->k8smem.shmem = cur->k8smem.shmem; + dev->k8smem.filemapped = cur->k8smem.filemapped; + dev->k8smem.inactiveanon = cur->k8smem.inactiveanon; + dev->k8smem.activeanon = cur->k8smem.activeanon; + dev->k8smem.inactivefile = cur->k8smem.inactivefile; + dev->k8smem.activefile = cur->k8smem.activefile; + dev->k8smem.usagefile = cur->k8smem.usagefile; + dev->k8smem.workingset = cur->k8smem.workingset; + #if HTTPSTATS /* ** application-specific counters diff --git a/man/atop.1 b/man/atop.1 index c5a8ca2d..930e6eb9 100644 --- a/man/atop.1 +++ b/man/atop.1 @@ -1343,6 +1343,23 @@ the number of memory pages the system wrote to swap space (`swout'), and the number of out-of-memory kills (`oomkill'). .PP .TP 5 +.B K8S +K8S global /sys/fs/cgroup/[memory/]kubepods/memory.stat. +.br +This line shows the number of file pages for k8s global memcg (`file'), +the number of mapped anonymous pages for k8s global memcg (`anon'), +the number of shmem pages (included tmpfs/GEM pages) for k8s global +memcg (`shmem'), the number of pagecache pages mapped into pagetables +for k8s global memcg (`fmap'), the number of lru inactive anon pages +for k8s global memcg (`inan'), the number of lru active anon pages +for k8s global memcg (`actan'), the number of lru inactive file pages +for k8s global memcg (`infl'), the number of lru active file pages +for k8s global memcg (`actfl'), the number of current usage file +for k8s global memcg, including usermem and kmem (`usage'), the +number of workingset file pages (from k8s vision: number of current +usage pages minus inactivefile pages) for k8s global memcg (`wkset'). +.PP +.TP 5 .B PSI Pressure Stall Information. .br diff --git a/photoproc.c b/photoproc.c index 5219f80d..7a70aea0 100644 --- a/photoproc.c +++ b/photoproc.c @@ -103,22 +103,6 @@ photoproc(struct tstat *tasklist, int maxtask) fclose(fp); } - /* - ** check if this kernel offers cgroups version 2 - */ - if ( (fp = fopen("/proc/1/cgroup", "r")) ) - { - char line[128]; - - if (fgets(line, sizeof line, fp)) - { - if (memcmp(line, "0::", 3) == 0) // equal? - supportflags |= CGROUPV2; - } - - fclose(fp); - } - if (! droprootprivs()) mcleanstop(42, "failed to drop root privs\n"); diff --git a/photosyst.c b/photosyst.c index ef508356..5a593629 100644 --- a/photosyst.c +++ b/photosyst.c @@ -46,6 +46,7 @@ #include #include #include +#include #define SCALINGMAXCPU 8 // threshold for scaling info per CPU @@ -75,9 +76,21 @@ #define MDDTYPE 2 #define LVMTYPE 3 +/* According to https://man7.org/linux/man-pages/man2/statfs.2.html */ +#define CGROUP_FTYPE_V1 16914836 /* TMPFS_MAGIC 0x01021994 */ +#define CGROUP_FTYPE_V2 1667723888 /* CGROUP2_SUPER_MAGIC 0x63677270 */ + /* recognize numa node */ #define NUMADIR "/sys/devices/system/node" +/* recognize k8s global memory.stat and memory current usage */ +#define K8S_MEMDIR_CGV1 "/sys/fs/cgroup/memory/kubepods" +#define K8S_MEMDIR_CGV2 "/sys/fs/cgroup/kubepods" +#define K8S_SYSTEMD_CM ".slice" +#define K8S_MEM_STAT "/memory.stat" +#define K8S_MEM_CGV1_USAGE "/memory.usage_in_bytes" +#define K8S_MEM_CGV2_USAGE "/memory.current" + /* recognize LLC monitor data */ #define LLCDIR "/sys/fs/resctrl/mon_data" #define L3SIZE "/sys/devices/system/cpu/cpu0/cache/index3/size" @@ -291,6 +304,8 @@ photosyst(struct sstat *si) #if HTTPSTATS static int wwwvalid = 1; #endif + static int cgroupVersion = 0; + struct statfs statfscgrp; memset(si, 0, sizeof(struct sstat)); @@ -846,6 +861,172 @@ photosyst(struct sstat *si) fclose(fp); } + /* + ** Identify the cgroup version on Linux Nodes: `stat -fc %T /sys/fs/cgroup/`. + ** For cgroup v2, the output is cgroup2fs. + ** For cgroup v1, the output is tmpfs. + */ + if ( !cgroupVersion ) + { + if ( statfs("/sys/fs/cgroup/", &statfscgrp) == 0 ) + { + if ( statfscgrp.f_type == CGROUP_FTYPE_V2 ) + { + cgroupVersion = 2; + supportflags |= CGROUPV2; + } + else if ( statfscgrp.f_type == CGROUP_FTYPE_V1 ) + cgroupVersion = 1; + else + cgroupVersion = 0; + } + } + + if ( supportflags & CGROUPV2 ) + { + if ( (fp = fopen(K8S_MEMDIR_CGV2 K8S_MEM_STAT, "r")) != NULL || + (fp = fopen(K8S_MEMDIR_CGV2 K8S_SYSTEMD_CM K8S_MEM_STAT, "r")) != NULL ) + { + /* for cgroup v2 */ + while ( fgets(linebuf, sizeof(linebuf), fp) != NULL ) + { + nr = sscanf(linebuf, "%s %lld\n", nam, &cnts[0]); + + if ( strcmp("file", nam) == EQ ) + { + si->k8smem.file = cnts[0]/pagesize; + continue; + } + if ( strcmp("anon", nam) == EQ ) + { + si->k8smem.anon = cnts[0]/pagesize; + continue; + } + if ( strcmp("shmem", nam) == EQ ) + { + si->k8smem.shmem = cnts[0]/pagesize; + continue; + } + if ( strcmp("file_mapped", nam) == EQ ) + { + si->k8smem.filemapped = cnts[0]/pagesize; + continue; + } + if ( strcmp("inactive_anon", nam) == EQ ) + { + si->k8smem.inactiveanon = cnts[0]/pagesize; + continue; + } + if ( strcmp("active_anon", nam) == EQ ) + { + si->k8smem.activeanon = cnts[0]/pagesize; + continue; + } + if ( strcmp("inactive_file", nam) == EQ ) + { + si->k8smem.inactivefile = cnts[0]/pagesize; + continue; + } + if ( strcmp("active_file", nam) == EQ ) + { + si->k8smem.activefile = cnts[0]/pagesize; + continue; + } + } + + fclose(fp); + } + + if ( (fp = fopen(K8S_MEMDIR_CGV2 K8S_MEM_CGV2_USAGE, "r")) != NULL || + (fp = fopen(K8S_MEMDIR_CGV2 K8S_SYSTEMD_CM K8S_MEM_CGV2_USAGE, "r")) != NULL ) + { + if ( fscanf(fp, "%lld", &cnts[0]) == 1 ) + { + /* + ** Refer to https://github.com/kubernetes/kubernetes/issues/43916, + ** memory.available := node.status.capacity[memory] - node.stats.memory.workingSet + ** && workingSet := $cgroupfs/memory.current - inactive_file + */ + si->k8smem.usagefile = cnts[0]/pagesize; + si->k8smem.workingset = si->k8smem.usagefile - si->k8smem.inactivefile; + } + + fclose(fp); + } + } + else + { + if ( (fp = fopen(K8S_MEMDIR_CGV1 K8S_MEM_STAT, "r")) != NULL || + (fp = fopen(K8S_MEMDIR_CGV1 K8S_SYSTEMD_CM K8S_MEM_STAT, "r")) != NULL ) + { + /* for cgroup v1 */ + while ( fgets(linebuf, sizeof(linebuf), fp) != NULL ) + { + nr = sscanf(linebuf, "%s %lld\n", nam, &cnts[0]); + + if ( strcmp("total_cache", nam) == EQ ) + { + si->k8smem.file = cnts[0]/pagesize; + continue; + } + if ( strcmp("total_rss", nam) == EQ) + { + si->k8smem.anon = cnts[0]/pagesize; + continue; + } + if ( strcmp("total_shmem", nam) == EQ) + { + si->k8smem.shmem = cnts[0]/pagesize; + continue; + } + if ( strcmp("total_mapped_file", nam) == EQ) + { + si->k8smem.filemapped = cnts[0]/pagesize; + continue; + } + if ( strcmp("total_inactive_anon", nam) == EQ) + { + si->k8smem.inactiveanon = cnts[0]/pagesize; + continue; + } + if ( strcmp("total_active_anon", nam) == EQ) + { + si->k8smem.activeanon = cnts[0]/pagesize; + continue; + } + if ( strcmp("total_inactive_file", nam) == EQ) + { + si->k8smem.inactivefile = cnts[0]/pagesize; + continue; + } + if ( strcmp("total_active_file", nam) == EQ) + { + si->k8smem.activefile = cnts[0]/pagesize; + continue; + } + } + + fclose(fp); + } + + if ( (fp = fopen(K8S_MEMDIR_CGV1 K8S_MEM_CGV1_USAGE, "r")) != NULL || + (fp = fopen(K8S_MEMDIR_CGV1 K8S_SYSTEMD_CM K8S_MEM_CGV1_USAGE, "r")) != NULL ) + { + if ( fscanf(fp, "%lld", &cnts[0]) == 1 ) + { + /* + ** Refer to https://github.com/kubernetes/kubernetes/issues/43916, + ** memory.available := node.status.capacity[memory] - node.stats.memory.workingSet + ** && workingSet := $cgroupfs/memory.usage_in_bytes - total_inactive_file + */ + si->k8smem.usagefile = cnts[0]/pagesize; + si->k8smem.workingset = si->k8smem.usagefile - si->k8smem.inactivefile; + } + + fclose(fp); + } + } + /* ** gather per numa memory-related statistics from the file ** /sys/devices/system/node/node0/meminfo, and store them in binary form. diff --git a/photosyst.h b/photosyst.h index 2ed884a2..8a298075 100644 --- a/photosyst.h +++ b/photosyst.h @@ -423,6 +423,19 @@ struct llcstat { struct perllc perllc[MAXLLC]; }; +struct k8smem { + count_t file; /* number of file pages for k8s global memcg */ + count_t anon; /* number of mapped anonymous pages for k8s global memcg */ + count_t shmem; /* number of shmem pages (included tmpfs/GEM pages) for k8s global memcg */ + count_t filemapped; /* number of pagecache pages mapped into pagetables for k8s global memcg */ + count_t inactiveanon; /* number of lru inactive anon pages for k8s global memcg */ + count_t activeanon; /* number of lru active anon pages for k8s global memcg */ + count_t inactivefile; /* number of lru inactive file pages for k8s global memcg */ + count_t activefile; /* number of lru active file pages for k8s global memcg */ + count_t usagefile; /* number of current usage pages for k8s global memcg */ + count_t workingset; /* k8s vision: number of current usage pages minus inactivefile pages */ +}; + /************************************************************************/ struct sstat { @@ -439,6 +452,7 @@ struct sstat { struct gpustat gpu; struct ifbstat ifb; struct llcstat llc; + struct k8smem k8smem; struct wwwstat www; }; diff --git a/showlinux.c b/showlinux.c index 3d0b0947..83a7cb87 100644 --- a/showlinux.c +++ b/showlinux.c @@ -244,6 +244,19 @@ sys_printdef *llcsyspdefs[] = { &syspdef_BLANKBOX, 0 }; +sys_printdef *k8smemsyspdefs[] = { + &syspdef_K8SFILE, + &syspdef_K8SANON, + &syspdef_K8SSHMEM, + &syspdef_K8SFILEMAPPED, + &syspdef_K8SINACTIVEANON, + &syspdef_K8SACTIVEANON, + &syspdef_K8SINACTIVEFILE, + &syspdef_K8SACTIVEFILE, + &syspdef_K8SUSAGEFILE, + &syspdef_K8SWORKINGSET, + 0 +}; sys_printdef *psisyspdefs[] = { &syspdef_PSICPUSTOT, &syspdef_PSIMEMSTOT, @@ -520,6 +533,7 @@ sys_printpair swpline[MAXITEMS]; sys_printpair memnumaline[MAXITEMS]; sys_printpair cpunumaline[MAXITEMS]; sys_printpair llcline[MAXITEMS]; +sys_printpair k8smemline[MAXITEMS]; sys_printpair pagline[MAXITEMS]; sys_printpair psiline[MAXITEMS]; sys_printpair contline[MAXITEMS]; @@ -1027,6 +1041,23 @@ pricumproc(struct sstat *sstat, struct devtstat *devtstat, sstat, &extra); } + if (k8smemline[0].f == 0) + { + make_sys_prints(k8smemline, MAXITEMS, + "K8SFILE:1 " + "K8SANON:1 " + "K8SSHMEM:1 " + "K8SFILEMAPPED:2 " + "K8SACTIVEANON:2 " + "K8SINACTIVEANON:2 " + "K8SACTIVEFILE:2 " + "K8SINACTIVEFILE:1 " + "K8SUSAGEFILE:1 " + "K8SWORKINGSET:1 ", + k8smemsyspdefs, "builtin k8smemline", + sstat, &extra); + } + if (pagline[0].f == 0) { make_sys_prints(pagline, MAXITEMS, @@ -2010,6 +2041,28 @@ prisyst(struct sstat *sstat, int curline, int nsecs, int avgval, } } + /* + ** k8s global memory.stat statistics + */ + if (fixedhead || + sstat->k8smem.file || + sstat->k8smem.anon || + sstat->k8smem.shmem || + sstat->k8smem.filemapped || + sstat->k8smem.inactiveanon || + sstat->k8smem.activeanon || + sstat->k8smem.inactivefile || + sstat->k8smem.activefile || + sstat->k8smem.usagefile || + sstat->k8smem.workingset ) + { + if (screen) + move(curline, 0); + + showsysline(k8smemline, sstat, &extra, "K8S", 0); + curline++; + } + /* ** PAGING statistics */ @@ -2978,6 +3031,13 @@ do_ownllcline(char *name, char *val) NULL, NULL); } +void +do_ownk8smemline(char *name, char *val) +{ + make_sys_prints(k8smemline, MAXITEMS, val, k8smemsyspdefs, name, + NULL, NULL); +} + void do_owndskline(char *name, char *val) { diff --git a/showlinux.h b/showlinux.h index f275a82a..7b9171a0 100644 --- a/showlinux.h +++ b/showlinux.h @@ -144,6 +144,7 @@ void do_ownpagline(char *, char *); void do_ownmemnumaline(char *, char *); void do_owncpunumaline(char *, char *); void do_ownllcline(char *, char *); +void do_ownk8smemline(char *, char *); void do_owndskline(char *, char *); void do_ownnettransportline(char *, char *); void do_ownnetnetline(char *, char *); @@ -266,6 +267,16 @@ extern sys_printdef syspdef_NUMACPUGUEST; extern sys_printdef syspdef_LLCMBMTOTAL; extern sys_printdef syspdef_LLCMBMLOCAL; extern sys_printdef syspdef_NUMLLC; +extern sys_printdef syspdef_K8SFILE; +extern sys_printdef syspdef_K8SANON; +extern sys_printdef syspdef_K8SSHMEM; +extern sys_printdef syspdef_K8SFILEMAPPED; +extern sys_printdef syspdef_K8SACTIVEANON; +extern sys_printdef syspdef_K8SINACTIVEANON; +extern sys_printdef syspdef_K8SACTIVEFILE; +extern sys_printdef syspdef_K8SINACTIVEFILE; +extern sys_printdef syspdef_K8SUSAGEFILE; +extern sys_printdef syspdef_K8SWORKINGSET; extern sys_printdef syspdef_PAGSCAN; extern sys_printdef syspdef_PAGSTEAL; extern sys_printdef syspdef_PAGSTALL; diff --git a/showsys.c b/showsys.c index 4948db7a..cbdf6631 100644 --- a/showsys.c +++ b/showsys.c @@ -2048,6 +2048,126 @@ sysprt_NUMLLC(struct sstat *sstat, extraparam *as, int badness, int *color) sys_printdef syspdef_NUMLLC = {"NUMLLC", sysprt_NUMLLC, NULL}; /*******************************************************************/ +static char * +sysprt_K8SFILE(struct sstat *sstat, extraparam *as, int badness, int *color) +{ + static char buf[16] = "file "; + + *color = -1; + val2memstr(sstat->k8smem.file * pagesize, buf+6, MBFORMAT, 0, 0); + return buf; +} + +sys_printdef syspdef_K8SFILE = {"K8SFILE", sysprt_K8SFILE, NULL}; +/*******************************************************************/ +static char * +sysprt_K8SANON(struct sstat *sstat, extraparam *as, int badness, int *color) +{ + static char buf[16] = "anon "; + + *color = -1; + val2memstr(sstat->k8smem.anon * pagesize, buf+6, MBFORMAT, 0, 0); + return buf; +} + +sys_printdef syspdef_K8SANON = {"K8SANON", sysprt_K8SANON, NULL}; +/*******************************************************************/ +static char * +sysprt_K8SSHMEM(struct sstat *sstat, extraparam *as, int badness, int *color) +{ + static char buf[16] = "shmem "; + + *color = -1; + val2memstr(sstat->k8smem.shmem * pagesize, buf+6, MBFORMAT, 0, 0); + return buf; +} + +sys_printdef syspdef_K8SSHMEM = {"K8SSHMEM", sysprt_K8SSHMEM, NULL}; +/*******************************************************************/ +static char * +sysprt_K8SFILEMAPPED(struct sstat *sstat, extraparam *as, int badness, int *color) +{ + static char buf[16] = "fmap "; + + *color = -1; + val2memstr(sstat->k8smem.filemapped * pagesize, buf+6, MBFORMAT, 0, 0); + return buf; +} + +sys_printdef syspdef_K8SFILEMAPPED = {"K8SFILEMAPPED", sysprt_K8SFILEMAPPED, NULL}; +/*******************************************************************/ +static char * +sysprt_K8SACTIVEANON(struct sstat *sstat, extraparam *as, int badness, int *color) +{ + static char buf[16] = "actan "; + + *color = -1; + val2memstr(sstat->k8smem.activeanon * pagesize, buf+6, MBFORMAT, 0, 0); + return buf; +} + +sys_printdef syspdef_K8SACTIVEANON = {"K8SACTIVEANON", sysprt_K8SACTIVEANON, NULL}; +/*******************************************************************/ +static char * +sysprt_K8SINACTIVEANON(struct sstat *sstat, extraparam *as, int badness, int *color) +{ + static char buf[16] = "inan "; + + *color = -1; + val2memstr(sstat->k8smem.inactiveanon * pagesize, buf+6, MBFORMAT, 0, 0); + return buf; +} + +sys_printdef syspdef_K8SINACTIVEANON = {"K8SINACTIVEANON", sysprt_K8SINACTIVEANON, NULL}; +/*******************************************************************/ +static char * +sysprt_K8SACTIVEFILE(struct sstat *sstat, extraparam *as, int badness, int *color) +{ + static char buf[16] = "actfl "; + + *color = -1; + val2memstr(sstat->k8smem.activefile * pagesize, buf+6, MBFORMAT, 0, 0); + return buf; +} + +sys_printdef syspdef_K8SACTIVEFILE = {"K8SACTIVEFILE", sysprt_K8SACTIVEFILE, NULL}; +/*******************************************************************/ +static char * +sysprt_K8SINACTIVEFILE(struct sstat *sstat, extraparam *as, int badness, int *color) +{ + static char buf[16] = "infl "; + + *color = -1; + val2memstr(sstat->k8smem.inactivefile * pagesize, buf+6, MBFORMAT, 0, 0); + return buf; +} + +sys_printdef syspdef_K8SINACTIVEFILE = {"K8SINACTIVEFILE", sysprt_K8SINACTIVEFILE, NULL}; +/*******************************************************************/ +static char * +sysprt_K8SUSAGEFILE(struct sstat *sstat, extraparam *as, int badness, int *color) +{ + static char buf[16] = "usage "; + + *color = -1; + val2memstr(sstat->k8smem.usagefile * pagesize, buf+6, MBFORMAT, 0, 0); + return buf; +} + +sys_printdef syspdef_K8SUSAGEFILE = {"K8SUSAGEFILE", sysprt_K8SUSAGEFILE, NULL}; +/*******************************************************************/ +static char * +sysprt_K8SWORKINGSET(struct sstat *sstat, extraparam *as, int badness, int *color) +{ + static char buf[16] = "wkset "; + + *color = -1; + val2memstr(sstat->k8smem.workingset * pagesize, buf+6, MBFORMAT, 0, 0); + return buf; +} + +sys_printdef syspdef_K8SWORKINGSET = {"K8SWORKINGSET", sysprt_K8SWORKINGSET, NULL}; +/*******************************************************************/ // general formatting of PSI field in avg10/avg60/avg300 static void psiformatavg(struct psi *p, char *head, char *buf, int bufsize)