From 1ee6144d3d612362cb9983eb74b67369f71bf82c Mon Sep 17 00:00:00 2001
From: Abhinav Anil Sharma <sharmaabhinav@google.com>
Date: Wed, 18 Dec 2024 19:37:48 -0500
Subject: [PATCH] Fix test and add more narration comments.

---
 clients/drcachesim/scheduler/scheduler.h      |  3 +
 .../scheduler/scheduler_dynamic.cpp           | 12 ++-
 .../drcachesim/tests/scheduler_unit_tests.cpp | 83 +++++++++++++------
 3 files changed, 71 insertions(+), 27 deletions(-)
diff --git a/clients/drcachesim/scheduler/scheduler.h b/clients/drcachesim/scheduler/scheduler.h
index 3c7403e309c..dda51faa35c 100644
--- a/clients/drcachesim/scheduler/scheduler.h
+++ b/clients/drcachesim/scheduler/scheduler.h
@@ -758,6 +758,9 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
          * The scheduling quantum duration for preemption, in instruction count,
          * for #QUANTUM_INSTRUCTIONS.  The time passed to next_record() is ignored
          * for purposes of quantum preempts.
+         *
+         * Instructions executed in a quantum may end up higher than the specified
+         * value to avoid interruption of the kernel system call sequence.
          */
         // We pick 10 million to match 2 instructions per nanosecond with a 5ms quantum.
         uint64_t quantum_duration_instrs = 10 * 1000 * 1000;
diff --git a/clients/drcachesim/scheduler/scheduler_dynamic.cpp b/clients/drcachesim/scheduler/scheduler_dynamic.cpp
index 4e9b3267bd1..8f8dad07eb8 100644
--- a/clients/drcachesim/scheduler/scheduler_dynamic.cpp
+++ b/clients/drcachesim/scheduler/scheduler_dynamic.cpp
@@ -512,7 +512,9 @@ scheduler_dynamic_tmpl_t<RecordType, ReaderType>::check_for_input_switch(
         ++input->instrs_in_quantum;
         if (input->instrs_in_quantum > options_.quantum_duration_instrs) {
             if (outputs_[output].in_syscall_code) {
-                VPRINT(this, 4,
+                // XXX: Maybe this should be printed only once per-syscall-instance to
+                // reduce log spam.
+                VPRINT(this, 5,
                        "next_record[%d]: input %d delaying context switch "
                        "after end of instr quantum due to syscall trace\n",
                        output, input->index);
@@ -546,7 +548,9 @@ scheduler_dynamic_tmpl_t<RecordType, ReaderType>::check_for_input_switch(
             // setting input->switching_pre_instruction.
             this->record_type_is_instr_boundary(record, outputs_[output].last_record)) {
             if (outputs_[output].in_syscall_code) {
-                VPRINT(this, 4,
+                // XXX: Maybe this should be printed only once per-syscall-instance to
+                // reduce log spam.
+                VPRINT(this, 5,
                        "next_record[%d]: input %d delaying context switch after end of "
                        "time quantum after %" PRIu64 " due to syscall trace\n",
                        output, input->index, input->time_spent_in_quantum);
@@ -602,6 +606,10 @@ scheduler_dynamic_tmpl_t<RecordType, ReaderType>::process_marker(
     case TRACE_MARKER_TYPE_SYSCALL_TRACE_END:
         outputs_[output].in_syscall_code = false;
         break;
+    case TRACE_MARKER_TYPE_TIMESTAMP:
+        // Syscall sequences are not expected to have a timestamp.
+        assert(!outputs_[output].in_syscall_code);
+        break;
     case TRACE_MARKER_TYPE_DIRECT_THREAD_SWITCH: {
         if (!options_.honor_direct_switches)
             break;
diff --git a/clients/drcachesim/tests/scheduler_unit_tests.cpp b/clients/drcachesim/tests/scheduler_unit_tests.cpp
index 3491bbac7e6..9efb6fc6f9c 100644
--- a/clients/drcachesim/tests/scheduler_unit_tests.cpp
+++ b/clients/drcachesim/tests/scheduler_unit_tests.cpp
@@ -190,7 +190,8 @@ run_lockstep_simulation(scheduler_t &scheduler, int num_outputs, memref_tid_t ti
     int64_t meta_records = 0;
     // Record the threads, one char each.
     std::vector<std::string> sched_as_string(num_outputs);
-    static constexpr char THREAD_LETTER_START = 'A';
+    static constexpr char THREAD_LETTER_START_USER = 'A';
+    static constexpr char THREAD_LETTER_START_KERNEL = 'a';
     static constexpr char WAIT_SYMBOL = '-';
     static constexpr char IDLE_SYMBOL = '_';
     static constexpr char NON_INSTR_SYMBOL = '.';
@@ -229,8 +230,10 @@ run_lockstep_simulation(scheduler_t &scheduler, int num_outputs, memref_tid_t ti
             }
             assert(status == scheduler_t::STATUS_OK);
             if (type_is_instr(memref.instr.type)) {
+                bool is_kernel = outputs[i]->is_record_kernel();
                 sched_as_string[i] +=
-                    THREAD_LETTER_START + static_cast<char>(memref.instr.tid - tid_base);
+                    (is_kernel ? THREAD_LETTER_START_KERNEL : THREAD_LETTER_START_USER) +
+                    static_cast<char>(memref.instr.tid - tid_base);
             } else {
                 // While this makes the string longer, it is just too confusing
                 // with the same letter seemingly on 2 cores at once without these
@@ -1367,6 +1370,7 @@ test_synthetic_with_syscall_seq()
     static constexpr uint64_t BLOCK_THRESHOLD = 100;
     static constexpr memref_tid_t TID_BASE = 100;
     static constexpr uint64_t KERNEL_CODE_OFFSET = 123456;
+    static constexpr uint64_t SYSTRACE_NUM = 84;
     std::vector<trace_entry_t> inputs[NUM_INPUTS];
     for (int i = 0; i < NUM_INPUTS; i++) {
         memref_tid_t tid = TID_BASE + i;
@@ -1376,49 +1380,78 @@ test_synthetic_with_syscall_seq()
         inputs[i].push_back(make_timestamp(10)); // All the same time priority.
         for (int j = 0; j < NUM_INSTRS; j++) {
             inputs[i].push_back(make_instr(42 + j * 4));
-            // Test accumulation of usage across voluntary switches.
-            if ((i == 0 || i == 1) && j == 1) {
-                inputs[i].push_back(make_timestamp(20));
-                inputs[i].push_back(make_marker(TRACE_MARKER_TYPE_SYSCALL, 42));
-                inputs[i].push_back(
-                    make_marker(TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL, 0));
-                inputs[i].push_back(make_timestamp(120));
-            }
             // Test a syscall sequence starting at each offset within a quantum
             // of instrs.
             if (i <= QUANTUM_DURATION && i == j) {
-                inputs[i].push_back(make_marker(TRACE_MARKER_TYPE_SYSCALL, 84));
+                inputs[i].push_back(make_timestamp(20));
+                inputs[i].push_back(make_marker(TRACE_MARKER_TYPE_SYSCALL, SYSTRACE_NUM));
+                if (i < 2) {
+                    // Thresholds for only blocking syscalls are low enough to
+                    // cause a context switch. So only A and B will try a voluntary
+                    // switch (which may be delayed due to the syscall trace) after
+                    // 1 or 2 instrs respectively.
+                    inputs[i].push_back(
+                        make_marker(TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL, 0));
+                }
+                inputs[i].push_back(make_timestamp(120));
                 inputs[i].push_back(
-                    make_marker(TRACE_MARKER_TYPE_SYSCALL_TRACE_START, 84));
-                for (int k = 0; k <= QUANTUM_DURATION; ++k)
+                    make_marker(TRACE_MARKER_TYPE_SYSCALL_TRACE_START, SYSTRACE_NUM));
+                // A has just one syscall seq instr to show that it still does the
+                // voluntary switch after the syscall trace is done, even though there
+                // is still room for one more instr in its quantum.
+                // D has just one syscall seq instr to show that it will continue
+                // on without a switch after the syscall trace is done because more
+                // instrs were left in the same quantum.
+                // B and C have longer syscall seq to show that they will not be
+                // preempted by voluntary or quantum switches respectively.
+                int count_syscall_instrs = (i == 0 || i == 3) ? 1 : QUANTUM_DURATION;
+                for (int k = 1; k <= count_syscall_instrs; ++k)
                     inputs[i].push_back(make_instr(KERNEL_CODE_OFFSET + k));
-                inputs[i].push_back(make_marker(TRACE_MARKER_TYPE_SYSCALL_TRACE_END, 84));
+                inputs[i].push_back(
+                    make_marker(TRACE_MARKER_TYPE_SYSCALL_TRACE_END, SYSTRACE_NUM));
             }
         }
         inputs[i].push_back(make_exit(tid));
     }
-    // A has a syscall sequence at [2,5], B has it at [3,6], C has it at [4,7],
-    // D has it at [5,8].
-    // The syscall sequence consists of 4 instrs which is greater than the
-    // #instr quanta.
-    // Total instrs in A, B, C, and D are 9 + 4 == 13, others have just 9.
+    // A has a syscall sequence at [2,2], B has it at [3,5], C has it at [4,6],
+    // D has it at [5,5].
 
     // Hardcoding here for the 2 outputs and 7 inputs.
     // We make assumptions on the scheduler's initial runqueue assignment
     // being round-robin, resulting in 4 on core0 (odd parity letters) and 3 on
     // core1 (even parity letters).
     // The dots are markers and thread exits.
-    // A has a voluntary switch at its 6th instr (1st in that scheduling). Its
-    // CPU usage persists to its next scheduling which has only 2 letters.
-    // B has a voluntary switch at its 2nd instr, but it doesn't take because a
-    // syscall sequence starts just then.
+    //
+    // A has a voluntary switch after its first two letters, prompted by its
+    // first instr which is a blocking syscall with latency that exceeds switch
+    // threshold, but not before its 2nd instr which is from the syscall trace
+    // and must be shown before the switch happens. Despite there being room for
+    // 1 more instr left in the quantum, the voluntary switch still happens.
+    //
+    // B has a voluntary switch after its first 5 letters, prompted by its 2nd
+    // instr which is a blocking system call with latency that exceeds switch
+    // threshold, but not before its next three instrs which is from the
+    // syscall trace and must be shown before the switch happens. B ends up
+    // executing more instrs than its quantum because of the syscall trace.
+    //
+    // C has a syscall at its third letter (but it doesn't cause a switch
+    // because it doesn't have sufficiently high latency), followed by the
+    // syscall trace of three additional letters. C ends up
+    // executing more instrs than the quantum limit because of the syscall
+    // trace.
+    //
+    // D has a syscall at its 4th letter, followed by a 1-instr syscall
+    // trace. D continues with its regular instrs without a context switch
+    // at its 6th letter because there is still room for more instrs left in
+    // the quantum.
+    //
     // Since core0 has an extra input, core1 finishes
     // its runqueue first and then steals G from core0 (migration threshold is 0)
     // and finishes it off.
     static const char *const CORE0_SCHED_STRING =
-        "..A..AAAA...CCC..CCCC...EEE..GGGA....CCCEEEGGGAACCC.EEE.AAAAA.";
+        "..A.....a...CCC....ccc...EEE..GGGACCCEEEGGGAAACCC.EEE.AAAA.";
     static const char *const CORE1_SCHED_STRING =
-        "..BB......BBBB...DDD..FFFBBBD..DDDD.FFFBBBDDDFFF.B.DD.GGG.____";
+        "..BB.....bbb...DDD..FFFBBBD....d.DFFFBBBDDDFFF.B.D.GGG.____";
     {
         // Test instruction quanta.
         std::vector<scheduler_t::input_workload_t> sched_inputs;