Skip to content

Commit

Permalink
allow lcs reader to load n_req directly;
Browse files Browse the repository at this point in the history
fix a bug in version;
clean up;
add lcs_reader.py
  • Loading branch information
1a1a11a committed Dec 14, 2024
1 parent 2c29e13 commit d1889e6
Show file tree
Hide file tree
Showing 4 changed files with 213 additions and 24 deletions.
21 changes: 4 additions & 17 deletions libCacheSim/bin/traceUtils/traceConvLCS.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ void convert_to_lcs(reader_t *reader, std::string ofilepath, bool output_txt, bo
std::unordered_map<int32_t, int32_t> ttl_cnt;

lcs_trace_stat_t stat;
stat.version = 1;
memset(&stat, 0, sizeof(stat));
stat.version = CURR_STAT_VERSION;
int64_t n_req_total = get_num_of_req(reader);
obj_map.reserve(n_req_total / 100 + 1e4);

Expand Down Expand Up @@ -98,18 +98,6 @@ void convert_to_lcs(reader_t *reader, std::string ofilepath, bool output_txt, bo
}
}

// if (lcs_ver == 1) {
// // lcs_req_v1_t *lcs_req_v1 = reinterpret_cast<lcs_req_v1_t *>(lcs_req);
// lcs_req_v1_t lcs_req;
// lcs_req.clock_time = req->clock_time;
// lcs_req.obj_id = req->obj_id;
// lcs_req.obj_size = req->obj_size;
// lcs_req.next_access_vtime = req->next_access_vtime;

// ofile_temp.write(reinterpret_cast<char *>(&lcs_req), sizeof(lcs_req_v1));
// } else if (lcs_ver == 2) {

// lcs_req_v2_t *lcs_req_v2 = reinterpret_cast<lcs_req_v2_t *>(lcs_req);
lcs_req_full_t lcs_req;
lcs_req.clock_time = req->clock_time;
lcs_req.obj_id = req->obj_id;
Expand Down Expand Up @@ -141,7 +129,6 @@ void convert_to_lcs(reader_t *reader, std::string ofilepath, bool output_txt, bo
}

ofile_temp.write(reinterpret_cast<char *>(&lcs_req), sizeof(lcs_req_full_t));
// }

stat.n_req_byte += req->obj_size;
stat.n_req += 1;
Expand Down Expand Up @@ -258,7 +245,7 @@ static void _analyze_trace(lcs_trace_stat_t &stat, const std::unordered_map<uint
for (int i = 0; i < freq_cnt_vec.size(); i++) {
for (int j = 0; j < freq_cnt_vec[i].second; j++) {
log_freq[n] = log(static_cast<double>(freq_cnt_vec[i].first));
log_rank[n] = log(static_cast<double>(n+1));
log_rank[n] = log(static_cast<double>(n + 1));
n++;
}
}
Expand All @@ -276,8 +263,8 @@ static void _analyze_trace(lcs_trace_stat_t &stat, const std::unordered_map<uint
stat.most_common_freq_ratio[i] = (float)freq_cnt_vec[i].second / stat.n_obj;
}

INFO("highest freq: %ld %ld %ld %ld skewness %.4lf\n", stat.highest_freq[0], stat.highest_freq[1], stat.highest_freq[2],
stat.highest_freq[3], stat.skewness);
INFO("highest freq: %ld %ld %ld %ld skewness %.4lf\n", stat.highest_freq[0], stat.highest_freq[1],
stat.highest_freq[2], stat.highest_freq[3], stat.skewness);
INFO("most common freq (req fraction): %d(%.4lf) %d(%.4lf) %d(%.4lf) %d(%.4lf)...\n", stat.most_common_freq[0],
stat.most_common_freq_ratio[0], stat.most_common_freq[1], stat.most_common_freq_ratio[1],
stat.most_common_freq[2], stat.most_common_freq_ratio[2], stat.most_common_freq[3],
Expand Down
6 changes: 1 addition & 5 deletions libCacheSim/traceReader/customizedReader/lcs.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,6 @@ static bool verify(lcs_trace_header_t *header) {
return false;
}

if (header->version > MAX_LCS_VERSION) {
ERROR("invalid trace file, lcs version %ld is not supported\n", (unsigned long)header->version);
return false;
}

lcs_trace_stat_t *stat = &(header->stat);
if (stat->n_req < 0 || stat->n_obj < 0) {
ERROR("invalid trace file, n_req %ld, n_obj %ld\n", (unsigned long)stat->n_req, (unsigned long)stat->n_obj);
Expand All @@ -50,6 +45,7 @@ int lcsReader_setup(reader_t *reader) {
reader->trace_format = BINARY_TRACE_FORMAT;
reader->trace_start_offset = sizeof(lcs_trace_header_t);
reader->obj_id_is_num = true;
reader->n_total_req = header->stat.n_req;

if (reader->lcs_ver == 1) {
reader->item_size = sizeof(lcs_req_v1_t);
Expand Down
3 changes: 1 addition & 2 deletions libCacheSim/traceReader/customizedReader/lcs.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ extern "C" {

#define LCS_TRACE_START_MAGIC 0x123456789abcdef0
#define LCS_TRACE_END_MAGIC 0x123456789abcdef0

#define MAX_LCS_VERSION 2
#define CURR_STAT_VERSION 1
#define N_MOST_COMMON 16

/******************************************************************************/
Expand Down
207 changes: 207 additions & 0 deletions scripts/lcs_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
# see [lcs.h](https://github.com/1a1a11a/libCacheSim/blob/develop/libCacheSim/traceReader/customizedReader/lcs.h) for the definition of the trace format
# typedef struct lcs_trace_stat {
# int64_t version; // version of the stat
# int64_t n_req; // number of requests
# int64_t n_obj; // number of objects
# int64_t n_req_byte; // number of bytes requested
# int64_t n_obj_byte; // number of unique bytes

# int64_t start_timestamp; // in seconds
# int64_t end_timestamp; // in seconds

# int64_t n_read; // number of read requests
# int64_t n_write; // number of write requests
# int64_t n_delete; // number of delete requests

# // object size
# int64_t smallest_obj_size;
# int64_t largest_obj_size;
# int64_t most_common_obj_sizes[N_MOST_COMMON];
# float most_common_obj_size_ratio[N_MOST_COMMON];

# // popularity
# // the request count of the most popular objects
# int64_t highest_freq[N_MOST_COMMON];
# // unpopular objects:
# int32_t most_common_freq[N_MOST_COMMON];
# float most_common_freq_ratio[N_MOST_COMMON];
# // zipf alpha
# double skewness;

# // tenant info
# int32_t n_tenant;
# int32_t most_common_tenants[N_MOST_COMMON];
# float most_common_tenant_ratio[N_MOST_COMMON];

# // key-value cache and object cache specific
# int32_t n_ttl;
# int32_t smallest_ttl;
# int32_t largest_ttl;
# int32_t most_common_ttls[N_MOST_COMMON];
# float most_common_ttl_ratio[N_MOST_COMMON];

# int64_t unused[897];
# } __attribute__((packed)) lcs_trace_stat_t;

# typedef struct lcs_trace_header {
# uint64_t start_magic;
# // the version of lcs trace, see lcs_v1, lcs_v2, etc.
# uint64_t version;
# struct lcs_trace_stat stat;

# uint64_t unused[21];
# uint64_t end_magic;
# } __attribute__((packed)) lcs_trace_header_t;
#

# typedef struct __attribute__((packed)) lcs_req_v1 {
# uint32_t clock_time;
# // this is the hash of key in key-value cache
# // or the logical block address in block cache
# uint64_t obj_id;
# uint32_t obj_size;
# int64_t next_access_vtime;
# } lcs_req_v1_t;

# typedef struct __attribute__((packed)) lcs_req_v2 {
# uint32_t clock_time;
# uint64_t obj_id;
# uint32_t obj_size;
# uint32_t op : 8;
# uint32_t tenant : 24;
# int64_t next_access_vtime;
# } lcs_req_v2_t;


# typedef struct __attribute__((packed)) lcs_req_v3 {
# int64_t clock_time;
# uint64_t obj_id;
# int64_t obj_size;
# uint32_t op : 8;
# uint32_t tenant : 24;
# int64_t next_access_vtime;
# } lcs_req_v3_t;


import struct


LCS_HEADER_SIZE = 1024 * 8
LCS_TRACE_STAT_SIZE = 1000 * 8
LCS_STRAT_MAGIC = 0x123456789ABCDEF0
LCS_END_MAGIC = 0x123456789ABCDEF0
N_MOST_COMMON = 16


def parse_stat(b, print_stat=True):

# basic info
(
ver,
n_req,
n_obj,
n_req_byte,
n_obj_byte,
start_ts,
end_ts,
n_read,
n_write,
n_delete,
) = struct.unpack("<QQQQQQQQQQ", b[:80])

# object size
smallest_obj_size, largest_obj_size = struct.unpack("<QQ", b[80 : 80 + 16])
most_common_obj_sizes = struct.unpack(
"<" + "Q" * N_MOST_COMMON, b[96 : 96 + N_MOST_COMMON * 8]
)
most_common_obj_size_ratio = struct.unpack(
"<" + "f" * N_MOST_COMMON, b[224 : 224 + N_MOST_COMMON * 4]
)

# popularity
highest_freq = struct.unpack(
"<" + "Q" * N_MOST_COMMON, b[288 : 288 + N_MOST_COMMON * 8]
)
most_common_freq = struct.unpack(
"<" + "I" * N_MOST_COMMON, b[416 : 416 + N_MOST_COMMON * 4]
)
most_common_freq_ratio = struct.unpack(
"<" + "f" * N_MOST_COMMON, b[480 : 480 + N_MOST_COMMON * 4]
)
skewness = struct.unpack("<d", b[544 : 544 + 8])[0]

if print_stat:
print(
f"version: {ver}, n_req: {n_req}, n_obj: {n_obj}, n_req_byte: {n_req_byte}, n_obj_byte: {n_obj_byte}"
)
print(
f"start_ts: {start_ts}, end_ts: {end_ts}, duration: {(end_ts-start_ts)/86400:.2f} days, n_read: {n_read}, n_write: {n_write}, n_delete: {n_delete}"
)
print(
f"smallest_obj_size: {smallest_obj_size}, largest_obj_size: {largest_obj_size}"
)
print(f"most_common_obj_sizes: ", end="")
for i in range(N_MOST_COMMON):
if most_common_obj_size_ratio[i] == 0:
break
print(
f"{most_common_obj_sizes[i]}({most_common_obj_size_ratio[i]:.4f}), ",
end="",
)
print()

print(f"highest_freq: {highest_freq}, skewness: {skewness:.4f}")
print(f"most_common_freq: ", end="")
for i in range(N_MOST_COMMON):
if most_common_freq_ratio[i] == 0:
break
print(f"{most_common_freq[i]}({most_common_freq_ratio[i]:.4f}), ", end="")
print()


def read_header(ifile):
header = ifile.read(LCS_HEADER_SIZE)
start_magic, version = struct.unpack("<QQ", header[:16])
end_magic = struct.unpack("<Q", header[-8:])[0]
if start_magic != LCS_STRAT_MAGIC:
raise RuntimeError(f"Invalid trace file start magic {start_magic:016x}")
if end_magic != LCS_END_MAGIC:
raise RuntimeError(f"Invalid trace file end magic {end_magic:016x}")

parse_stat(header[16:-176], print_stat=True)

return version


def read_trace(ifilepath, n_max_req=-1):
ifile = open(ifilepath, "rb")
version = read_header(ifile)
s = [
struct.Struct("<IQIq"),
struct.Struct("<IQIIq"),
struct.Struct("<qQqIq"),
][version - 1]

n_req = 0

while True:
b = ifile.read(s.size)
if not b:
break
req = s.unpack(b)
print(req)
n_req += 1
if n_max_req > 0 and n_req >= n_max_req:
break

ifile.close()


if __name__ == "__main__":
import sys

if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} /path/trace [n_req]")
sys.exit(1)

read_trace(sys.argv[1], int(sys.argv[2]) if len(sys.argv) > 2 else -1)

0 comments on commit d1889e6

Please sign in to comment.