Skip to content

Commit

Permalink
i#6466: Fix raw2trace missing encoding error for filtered traces (#6465)
Browse files Browse the repository at this point in the history
Fixes a corner case for filtered traces in the raw2trace delayed branch
logic that affects how encodings are written.

Adds an increment missing for zero sized pc entries for the ordinal into
the decode_pcs array. This caused the wrong decode_pc to be recorded as
emitted, which caused the "missing encoding" error when encoding for that
decode_pc was not really emitted later.

This affects CTI that read memory and may generate in some cases only a
zero-sized PC entry (if the instr is i-filtered out but its memref isn't).

Adds a raw2trace_unit_test for filtered traces that fails without this fix.

Fixes: #6466
  • Loading branch information
abhinav92003 authored Nov 20, 2023
1 parent 63c701b commit 40e2edb
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 16 deletions.
151 changes: 147 additions & 4 deletions clients/drcachesim/tests/raw2trace_unit_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,13 +169,13 @@ class archive_ostream_test_t : public archive_ostream_t {
};

offline_entry_t
make_header(int version = OFFLINE_FILE_VERSION)
make_header(int version = OFFLINE_FILE_VERSION, uint64_t additional_file_types = 0)
{
offline_entry_t entry;
entry.extended.type = OFFLINE_TYPE_EXTENDED;
entry.extended.ext = OFFLINE_EXT_TYPE_HEADER;
entry.extended.valueA = OFFLINE_FILE_TYPE_DEFAULT | OFFLINE_FILE_TYPE_ENCODINGS |
OFFLINE_FILE_TYPE_SYSCALL_NUMBERS;
OFFLINE_FILE_TYPE_SYSCALL_NUMBERS | additional_file_types;
entry.extended.valueB = version;
return entry;
}
Expand Down Expand Up @@ -288,7 +288,7 @@ check_entry(std::vector<trace_entry_t> &entries, int &idx, unsigned short expect
int expected_size, addr_t expected_addr = 0)
{
if (expected_type != entries[idx].type ||
(expected_size > 0 &&
(expected_size >= 0 &&
static_cast<unsigned short>(expected_size) != entries[idx].size) ||
(expected_addr > 0 && expected_addr != entries[idx].addr)) {
std::cerr << "Entry " << idx << " has type " << entries[idx].type << " and size "
Expand Down Expand Up @@ -2738,6 +2738,149 @@ test_is_maybe_blocking_syscall(void *drcontext)
return true;
}

bool
test_ifiltered(void *drcontext)
{
#if defined(X86) || defined(ARM)
std::cerr << "\n===============\nTesting ifiltered trace\n";
// Our synthetic test first constructs a list of instructions to be encoded into
// a buffer for decoding by raw2trace.
instrlist_t *ilist = instrlist_create(drcontext);
// raw2trace doesn't like offsets of 0 so we shift with a nop.
instr_t *nop = XINST_CREATE_nop(drcontext);
instr_t *move1 =
XINST_CREATE_move(drcontext, opnd_create_reg(REG1), opnd_create_reg(REG2));
instr_t *move2 =
XINST_CREATE_move(drcontext, opnd_create_reg(REG1), opnd_create_reg(REG2));
instr_t *jcc =
XINST_CREATE_jump_cond(drcontext, DR_PRED_EQ, opnd_create_instr(move1));
// Control flow in the test assumes that memaddr stores address to jcc.
instr_t *jmp =
XINST_CREATE_jump_mem(drcontext, opnd_create_mem_instr(jcc, 0, OPSZ_PTR));
instr_t *move3 =
XINST_CREATE_move(drcontext, opnd_create_reg(REG1), opnd_create_reg(REG2));
instrlist_append(ilist, nop);
instrlist_append(ilist, move1);
instrlist_append(ilist, jmp);
instrlist_append(ilist, jcc);
instrlist_append(ilist, move2);
instrlist_append(ilist, move3);
size_t offs_nop = 0;
size_t offs_move1 = offs_nop + instr_length(drcontext, nop);
size_t offs_jmp = offs_move1 + instr_length(drcontext, move1);
int jmp_length = instr_length(drcontext, jmp);
size_t offs_jcc = offs_jmp + jmp_length;
size_t offs_move2 = offs_jcc + instr_length(drcontext, jcc);
size_t offs_move3 = offs_move2 + instr_length(drcontext, move2);

// Now we synthesize our raw trace itself, including a valid header sequence.
std::vector<offline_entry_t> raw;
raw.push_back(make_header(OFFLINE_FILE_VERSION, OFFLINE_FILE_TYPE_IFILTERED));
raw.push_back(make_tid());
raw.push_back(make_pid());
raw.push_back(make_line_size());
// First instance of the jmp instr is filtered out but its memref is not filtered
// out (indicated by the zero sized block), so no encoding will be emitted and
// it will not count towards the chunk instr count. But this will still be
// accumulated as a delayed branch.
raw.push_back(make_block(offs_jmp, 0));
raw.push_back(make_memref(42));
// Second accumulated delayed branch.
raw.push_back(make_block(offs_jcc, 1));
// At this point, the jmp and jcc are accumulated as delayed branches.
// When writing the delayed branches, we want to make sure we correctly track
// the index into decode_pcs. If we don't increment the index at ifiltered
// instrs, the decode pc of jmp will be accidentally used when recording the
// encoding emitted for jcc. This will cause the jmp encoding to not be emitted
// in the next entry because raw2trace incorrectly tracked that it had
// already emitted it.
raw.push_back(make_block(offs_move1, 1));
// Second instance of the jmp instr is not filtered out. Its encoding must be
// emitted by raw2trace, or else the reader (in memref_counter_t) will
// complain about a missing encoding.
raw.push_back(make_block(offs_jmp, 1));
// The memref is also not filtered out. We have a separate pc entry with
// zero instr count just before the memref.
raw.push_back(make_block(offs_jmp, 0));
raw.push_back(make_memref(42));
raw.push_back(make_block(offs_jcc, 1));
raw.push_back(make_block(offs_move2, 1));
// End of first chunk.
raw.push_back(make_block(offs_move3, 1));
raw.push_back(make_exit());

static const int CHUNK_INSTR_COUNT = 5;
std::vector<trace_entry_t> entries;
if (!run_raw2trace(drcontext, raw, ilist, entries, nullptr, CHUNK_INSTR_COUNT))
return false;
int idx = 0;
return (
check_entry(entries, idx, TRACE_TYPE_HEADER, -1) &&
check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION) &&
check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FILETYPE) &&
check_entry(entries, idx, TRACE_TYPE_THREAD, -1) &&
check_entry(entries, idx, TRACE_TYPE_PID, -1) &&
check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE) &&
check_entry(entries, idx, TRACE_TYPE_MARKER,
TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT) &&
// jmp
// No encoding for the i-filtered instr with 0-instr count.
check_entry(entries, idx, TRACE_TYPE_INSTR_INDIRECT_JUMP, 0) &&
check_entry(entries, idx, TRACE_TYPE_READ, -1) &&
// jcc
check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) &&
# ifdef X86_32
// An extra encoding entry is needed.
check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) &&
# endif
// Since we cannot infer branch targets accurately for i-filtered traces, this
// has the generic conditional jump type (instead of the more specific
// TRACE_TYPE_INSTR_TAKEN_JUMP type).
check_entry(entries, idx, TRACE_TYPE_INSTR_CONDITIONAL_JUMP, -1) &&
// move1
check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) &&
check_entry(entries, idx, TRACE_TYPE_INSTR, -1) &&
// jmp
// This has an encoding because the previous dynamic instance was actually
// i-filtered.
check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) &&
# ifdef X86_32
// An extra encoding entry is needed.
check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) &&
# endif
// In filtered traces, we have one pc entry for the instr itself (if the
// instruction is not i-filtered out) which has the instr length, and another
// zero-length pc entry before each of the instr's memrefs (if the memref
// is not d-filtered out).
check_entry(entries, idx, TRACE_TYPE_INSTR_INDIRECT_JUMP, jmp_length) &&
check_entry(entries, idx, TRACE_TYPE_INSTR_INDIRECT_JUMP, 0) &&
check_entry(entries, idx, TRACE_TYPE_READ, -1) &&
// jcc. No encoding because it has already been emitted above.
// Since we cannot infer branch targets accurately for i-filtered traces, this
// has the generic conditional jump type (instead of the more specific
// TRACE_TYPE_INSTR_UNTAKEN_JUMP type).
check_entry(entries, idx, TRACE_TYPE_INSTR_CONDITIONAL_JUMP, -1) &&
// move2
check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) &&
check_entry(entries, idx, TRACE_TYPE_INSTR, -1) &&
// Chunk ends since we've seen exactly 5 instrs.
check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER) &&
check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL) &&
check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP) &&
check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID) &&
// move3
check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) &&
check_entry(entries, idx, TRACE_TYPE_INSTR, -1) &&
check_entry(entries, idx, TRACE_TYPE_THREAD_EXIT, -1) &&
check_entry(entries, idx, TRACE_TYPE_FOOTER, -1));
#else
// This test requires a CTI (so that it gets accumulated as a delayed branch) that
// also reads from memory (so that it's possible to have a case with a zero-sized PC
// entry in the raw trace). AArch64 does not have such an instr.
return true;
#endif
}

int
test_main(int argc, const char *argv[])
{
Expand All @@ -2756,7 +2899,7 @@ test_main(int argc, const char *argv[])
!test_midrseq_end(drcontext) || !test_xfer_modoffs(drcontext) ||
!test_xfer_absolute(drcontext) || !test_branch_decoration(drcontext) ||
!test_stats_timestamp_instr_count(drcontext) ||
!test_is_maybe_blocking_syscall(drcontext))
!test_is_maybe_blocking_syscall(drcontext) || !test_ifiltered(drcontext))
return 1;
return 0;
}
Expand Down
27 changes: 15 additions & 12 deletions clients/drcachesim/tracer/raw2trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3161,19 +3161,19 @@ raw2trace_t::write(raw2trace_thread_data_t *tdata, const trace_entry_t *start,
start = it;
DEBUG_ASSERT(tdata->cur_chunk_instr_count == 0);
}
if (type_is_instr(static_cast<trace_type_t>(it->type)) &&
// Do not count PC-only i-filtered instrs.
it->size > 0) {
accumulate_to_statistic(tdata,
RAW2TRACE_STAT_FINAL_TRACE_INSTRUCTION_COUNT, 1);
++tdata->cur_chunk_instr_count;
if (type_is_instr(static_cast<trace_type_t>(it->type))) {
++instr_ordinal;
if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, tdata->file_type) &&
// We don't want encodings for the PC-only i-filtered entries.
it->size > 0 && instr_ordinal >= static_cast<int>(decode_pcs_size)) {
tdata->error = "decode_pcs is missing entries for written "
"instructions";
return false;
// Do not count PC-only i-filtered instrs.
if (it->size > 0) {
accumulate_to_statistic(
tdata, RAW2TRACE_STAT_FINAL_TRACE_INSTRUCTION_COUNT, 1);
++tdata->cur_chunk_instr_count;
if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, tdata->file_type) &&
instr_ordinal >= static_cast<int>(decode_pcs_size)) {
tdata->error = "decode_pcs is missing entries for written "
"instructions";
return false;
}
}
}
// Check for missing encodings after possibly opening a new chunk.
Expand All @@ -3199,6 +3199,9 @@ raw2trace_t::write(raw2trace_thread_data_t *tdata, const trace_entry_t *start,
// Check whether this instr's encoding has already been emitted
// due to multiple instances of the same delayed branch (the encoding
// cache was cleared in open_new_chunk()).
// XXX: Do we need to delay PC-only (i-filtered) instrs (the ones
// with it->size == 0)? We're anyway skipping over those entries here
// so maybe we could avoid adding them to decode_pcs.
(record_encoding_emitted(tdata, *(decode_pcs + instr_ordinal))) {
// Write any data we were waiting until post-loop to write.
if (it > start &&
Expand Down

0 comments on commit 40e2edb

Please sign in to comment.