From 3935e79e05bcc41a0a92ea4b1b88659a94ba406f Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Mon, 24 Jun 2024 14:04:05 -0500 Subject: [PATCH 01/37] rust PROBE frontend --- probe_src/probe_frontend/.envrc | 1 + probe_src/probe_frontend/Cargo.lock | 1202 ++++++++++++++++++++++ probe_src/probe_frontend/Cargo.toml | 30 + probe_src/probe_frontend/build.rs | 118 +++ probe_src/probe_frontend/build.sh | 9 + probe_src/probe_frontend/flake.lock | 82 ++ probe_src/probe_frontend/flake.nix | 43 + probe_src/probe_frontend/src/arena.rs | 327 ++++++ probe_src/probe_frontend/src/display.rs | 302 ++++++ probe_src/probe_frontend/src/ffi.rs | 9 + probe_src/probe_frontend/src/main.rs | 274 +++++ probe_src/probe_frontend/src/metadata.rs | 17 + probe_src/probe_frontend/src/ops.rs | 404 ++++++++ 13 files changed, 2818 insertions(+) create mode 100644 probe_src/probe_frontend/.envrc create mode 100644 probe_src/probe_frontend/Cargo.lock create mode 100644 probe_src/probe_frontend/Cargo.toml create mode 100644 probe_src/probe_frontend/build.rs create mode 100755 probe_src/probe_frontend/build.sh create mode 100644 probe_src/probe_frontend/flake.lock create mode 100644 probe_src/probe_frontend/flake.nix create mode 100644 probe_src/probe_frontend/src/arena.rs create mode 100644 probe_src/probe_frontend/src/display.rs create mode 100644 probe_src/probe_frontend/src/ffi.rs create mode 100644 probe_src/probe_frontend/src/main.rs create mode 100644 probe_src/probe_frontend/src/metadata.rs create mode 100644 probe_src/probe_frontend/src/ops.rs diff --git a/probe_src/probe_frontend/.envrc b/probe_src/probe_frontend/.envrc new file mode 100644 index 00000000..c4b17d79 --- /dev/null +++ b/probe_src/probe_frontend/.envrc @@ -0,0 +1 @@ +use_flake diff --git a/probe_src/probe_frontend/Cargo.lock b/probe_src/probe_frontend/Cargo.lock new file mode 100644 index 00000000..bd3d3fc3 --- /dev/null +++ b/probe_src/probe_frontend/Cargo.lock @@ -0,0 +1,1202 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "addr2line" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "0.6.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" + +[[package]] +name = "anstyle-parse" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "backtrace" +version = "0.3.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "bindgen" +version = "0.69.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" +dependencies = [ + "bitflags 2.5.0", + "cexpr", + "clang-sys", + "itertools", + "lazy_static", + "lazycell", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", + "which", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "cc" +version = "1.0.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96c51067fd44124faa7f870b4b1c969379ad32b2ba805aa959430ceaa384f695" + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-targets", +] + +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading 0.8.3", +] + +[[package]] +name = "clap" +version = "4.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5db83dced34638ad474f39f250d7fea9598bdd239eaced1bdf45d597da0f433f" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7e204572485eb3fbf28f871612191521df159bc3e15a9f5064c66dba3a8c05f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c780290ccf4fb26629baa7a1081e68ced113f1d3ec302fa5948f1c381ebf06c6" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" + +[[package]] +name = "color-eyre" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55146f5e46f237f7423d74111267d4597b59b0dad0ffaf7303bce9945d843ad5" +dependencies = [ + "backtrace", + "color-spantrace", + "eyre", + "indenter", + "once_cell", + "owo-colors", + "tracing-error", +] + +[[package]] +name = "color-spantrace" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd6be1b2a7e382e2b98b43b2adcca6bb0e465af0bdd38123873ae61eb17a72c2" +dependencies = [ + "once_cell", + "owo-colors", + "tracing-core", + "tracing-error", +] + +[[package]] +name = "colorchoice" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "darling" +version = "0.20.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83b2eb4d90d12bdda5ed17de686c2acb4c57914f8f921b8da7e112b5a36f3fe1" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622687fe0bac72a04e5599029151f5796111b90f1baaa9b544d807a5e31cd120" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.20.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178" +dependencies = [ + "darling_core", + "quote", + "syn", +] + +[[package]] +name = "either" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "eyre" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd915d99f24784cdc19fd37ef22b97e3ff0ae756c7e492e9fbfe897d61e2aec" +dependencies = [ + "indenter", + "once_cell", +] + +[[package]] +name = "fastrand" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" + +[[package]] +name = "filetime" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "windows-sys", +] + +[[package]] +name = "flate2" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "gimli" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "home" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "indenter" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if", + "winapi", +] + +[[package]] +name = "libloading" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" +dependencies = [ + "cfg-if", + "windows-targets", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "machine-info" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d0bcde250f7927612edb0807ada4ad1d92915d9632d917df9bf696e74095dce" +dependencies = [ + "anyhow", + "log", + "nvml-wrapper", + "serde", + "sysinfo", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" +dependencies = [ + "adler", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "ntapi" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" +dependencies = [ + "winapi", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "nvml-wrapper" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "288bd66a5a56d8c97b178412b328419b3fdec261c0cbc4628ddc49cc16db8fc6" +dependencies = [ + "bitflags 1.3.2", + "libloading 0.7.4", + "nvml-wrapper-sys", + "static_assertions", + "thiserror", + "wrapcenum-derive", +] + +[[package]] +name = "nvml-wrapper-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3d606d4edf766969f16828ec047ca9aa96652a17bd353dc0613bfaca49b61d6" +dependencies = [ + "libloading 0.7.4", +] + +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "owo-colors" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + +[[package]] +name = "prettyplease" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "probe_frontend" +version = "0.1.0" +dependencies = [ + "bindgen", + "chrono", + "clap", + "color-eyre", + "flate2", + "libc", + "log", + "machine-info", + "rayon", + "serde", + "serde_json", + "subprocess", + "tar", + "tempfile", +] + +[[package]] +name = "proc-macro2" +version = "1.0.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "regex" +version = "1.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustix" +version = "0.38.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +dependencies = [ + "bitflags 2.5.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "serde" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "subprocess" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2e86926081dda636c546d8c5e641661049d7562a68f5488be4a1f7f66f6086" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "syn" +version = "2.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sysinfo" +version = "0.26.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c18a6156d1f27a9592ee18c1a846ca8dd5c258b7179fc193ae87c74ebb666f5" +dependencies = [ + "cfg-if", + "core-foundation-sys", + "libc", + "ntapi", + "once_cell", + "winapi", +] + +[[package]] +name = "tar" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb797dad5fb5b76fcf519e702f4a589483b5ef06567f160c392832c1f5e44909" +dependencies = [ + "filetime", + "libc", + "xattr", +] + +[[package]] +name = "tempfile" +version = "3.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +dependencies = [ + "cfg-if", + "fastrand", + "rustix", + "windows-sys", +] + +[[package]] +name = "thiserror" +version = "1.0.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-error" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d686ec1c0f384b1277f097b2f279a2ecc11afe8c133c1aabf036a27cb4cd206e" +dependencies = [ + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "sharded-slab", + "thread_local", + "tracing-core", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" + +[[package]] +name = "wrapcenum-derive" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a76ff259533532054cfbaefb115c613203c73707017459206380f03b3b3f266e" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "xattr" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" +dependencies = [ + "libc", + "linux-raw-sys", + "rustix", +] diff --git a/probe_src/probe_frontend/Cargo.toml b/probe_src/probe_frontend/Cargo.toml new file mode 100644 index 00000000..4e640d84 --- /dev/null +++ b/probe_src/probe_frontend/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "probe_frontend" +version = "0.1.0" +authors = ["Jenna Fligor "] +publish = false +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[[bin]] +name = "probe" +path = "src/main.rs" + +[dependencies] +chrono = "0.4.38" +clap = { version = "4.5.7", features = ["derive"] } +color-eyre = "0.6.3" +flate2 = "1.0.30" +libc = "0.2.155" +log = "0.4.21" +machine-info = "1.0.9" +rayon = "1.10.0" +serde = { version = "1.0.203", features = ["serde_derive"] } +serde_json = "1.0.117" +subprocess = "0.2.9" +tar = "0.4.41" +tempfile = "3.10.1" + +[build-dependencies] +bindgen = "0.69.4" diff --git a/probe_src/probe_frontend/build.rs b/probe_src/probe_frontend/build.rs new file mode 100644 index 00000000..ac19b97c --- /dev/null +++ b/probe_src/probe_frontend/build.rs @@ -0,0 +1,118 @@ +use std::collections::HashSet; +use std::env; +use std::path::PathBuf; +use std::sync::OnceLock; + +use bindgen::callbacks::ParseCallbacks; + +#[derive(Debug)] +struct LibprobeCallback; + +fn derive_list(name: &str) -> bool { + static DERIVE_LIST: OnceLock> = OnceLock::new(); + DERIVE_LIST + .get_or_init(|| { + HashSet::from([ + "CloneOp", + "CloseOp", + "ExitOp", + "GetRUsageOp", + "InitProcessOp", + "InitThreadOp", + "MetadataValue__bindgen_ty_1", + "MetadataValue__bindgen_ty_2", + "WaitOp", + "rusage", + "statx", + "statx_timestamp", + "timespec", + "timeval", + ]) + }) + .contains(name) +} + +impl ParseCallbacks for LibprobeCallback { + fn add_derives(&self, info: &bindgen::callbacks::DeriveInfo<'_>) -> Vec { + if derive_list(info.name) { + vec!["Serialize".to_owned(), "Deserialize".to_owned()] + } else { + vec![] + } + } +} + +fn main() { + // Tell cargo to look for shared libraries in the specified directory + // println!("cargo:rustc-link-search=/path/to/lib"); + + // Tell cargo to tell rustc to link the system bzip2 + // shared library. + // println!("cargo:rustc-link-lib=bz2"); + + // The bindgen::Builder is the main entry point + // to bindgen, and lets you build up options for + // the resulting bindings. + let bindings = bindgen::Builder::default() + .header_contents( + "wrapper", + " + #define _GNU_SOURCE + #include + #include + #include + #include + #include + #include + + // defining this manually instead of using is a + // hack, but it greatly reduces the generated code complexity since + // in glibc all the long ints are unions over two types that both + // alias to long int, this is done for kernel-userland compatibility + // reasons that don't matter here. + struct rusage { + struct timeval ru_utime; + struct timeval ru_stime; + long int ru_maxrss; + long int ru_ixrss; + long int ru_idrss; + long int ru_isrss; + long int ru_minflt; + long int ru_majflt; + long int ru_nswap; + long int ru_inblock; + long int ru_oublock; + long int ru_msgsnd; + long int ru_msgrcv; + long int ru_nsignals; + long int ru_nvcsw; + long int ru_nivcsw; + }; + + #define BORROWED + #define OWNED + ", + ) + // The input header we would like to generate + // bindings for. + .header("./include/prov_ops.h") + // .header_contents("sizeof", " + // const size_t OP_SIZE = sizeof(struct Op); + // ") + // only parse the Op type (and any types contained within, recursively) + .allowlist_item("^(Op)$") + // Tell cargo to invalidate the built crate whenever any of the + // included header files changed. + .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) + .parse_callbacks(Box::new(LibprobeCallback {})) + // Finish the builder and generate the bindings. + .generate() + // Unwrap the Result and panic on failure. + .expect("Unable to generate bindings"); + + // Write the bindings to the $OUT_DIR/bindings.rs file. + let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); + bindings + .write_to_file(out_path.join("bindings.rs")) + .expect("Couldn't write bindings!"); +} diff --git a/probe_src/probe_frontend/build.sh b/probe_src/probe_frontend/build.sh new file mode 100755 index 00000000..440e9f8a --- /dev/null +++ b/probe_src/probe_frontend/build.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +set -e +cd "$(dirname "$(realpath "$0")")" +mkdir -p ./include +cp ../libprobe/include/prov_ops.h ./include/prov_ops.h +git add ./include +nix build +git restore --staged ./include diff --git a/probe_src/probe_frontend/flake.lock b/probe_src/probe_frontend/flake.lock new file mode 100644 index 00000000..7934bbe5 --- /dev/null +++ b/probe_src/probe_frontend/flake.lock @@ -0,0 +1,82 @@ +{ + "nodes": { + "crane": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1718078026, + "narHash": "sha256-LbQabH6h86ZzTvDnaZHmMwedRZNB2jYtUQzmoqWQoJ8=", + "owner": "ipetkov", + "repo": "crane", + "rev": "a3f0c63eed74a516298932b9b1627dd80b9c3892", + "type": "github" + }, + "original": { + "owner": "ipetkov", + "repo": "crane", + "type": "github" + } + }, + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1710146030, + "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1718276985, + "narHash": "sha256-u1fA0DYQYdeG+5kDm1bOoGcHtX0rtC7qs2YA2N1X++I=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "3f84a279f1a6290ce154c5531378acc827836fbb", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "crane": "crane", + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/probe_src/probe_frontend/flake.nix b/probe_src/probe_frontend/flake.nix new file mode 100644 index 00000000..45a22d2b --- /dev/null +++ b/probe_src/probe_frontend/flake.nix @@ -0,0 +1,43 @@ +{ + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable"; + crane.url = "github:ipetkov/crane"; + crane.inputs.nixpkgs.follows = "nixpkgs"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { + self, + nixpkgs, + crane, + flake-utils, + ... + }: + flake-utils.lib.eachDefaultSystem (system: let + pkgs = nixpkgs.legacyPackages.${system}; + craneLib = crane.mkLib pkgs; + + crate = craneLib.buildPackage { + src = ./.; + + # Add extra inputs here or any other derivation settings + doCheck = true; + # buildInputs = []; + nativeBuildInputs = [ + pkgs.rustPlatform.bindgenHook + ]; + }; + in { + packages.default = crate; + checks = { + inherit crate; + }; + devShells.default = craneLib.devShell { + checks = self.checks.${system}; + packages = with pkgs; [ + rust-analyzer + cargo-audit + ]; + }; + }); +} diff --git a/probe_src/probe_frontend/src/arena.rs b/probe_src/probe_frontend/src/arena.rs new file mode 100644 index 00000000..785e204b --- /dev/null +++ b/probe_src/probe_frontend/src/arena.rs @@ -0,0 +1,327 @@ +#![deny(unsafe_op_in_unsafe_fn)] + +use color_eyre::eyre::{eyre, ContextCompat, Report, Result, WrapErr}; +use rayon::iter::{ParallelBridge, ParallelIterator}; +use serde::{Deserialize, Serialize}; +use std::{ + collections::HashMap, + ffi::{OsStr, OsString}, + fs::{self, DirEntry, File}, + io::Write, + mem::size_of, + path::{Path, PathBuf}, +}; + +use crate::{ + ffi, + ops::{self, DecodeFfi}, +}; + +#[repr(C)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ArenaHeader { + instantiation: libc::size_t, + base_address: libc::uintptr_t, + capacity: libc::uintptr_t, + used: libc::uintptr_t, +} + +pub struct OpsArena<'a> { + // raw is needed even though it's unused since ops is a reference to it; + // the compiler doesn't know this since it's constructed using unsafe code. + #[allow(dead_code)] + raw: Vec, + ops: &'a [ffi::Op], +} + +impl<'a> OpsArena<'a> { + pub fn from_bytes(bytes: Vec) -> Result { + if bytes.len() < size_of::() { + return Err(eyre!( + "Arena buffer too small, got {}, minimum size {}", + bytes.len(), + size_of::() + )); + } + + let header = unsafe { get_header_unchecked(&bytes) }; + if header.capacity != bytes.len() { + return Err(eyre!( + "Invalid arena capacity, expected {}, got {}", + header.capacity, + bytes.len(), + )); + } + if header.used > header.capacity { + return Err(eyre!( + "Arena size {} is greater than capacity {}", + header.used, + header.capacity, + )); + } + if ((header.used - size_of::()) % size_of::()) != 0 { + return Err(eyre!( + "Arena alignment error: used arena size minus header isn't a multiple of op size" + )); + } + + let count = (header.used - size_of::()) / size_of::(); + + let ops = unsafe { + let ptr = bytes.as_ptr().add(size_of::()) as *const ffi::Op; + std::slice::from_raw_parts(ptr, count) + }; + + Ok(Self { raw: bytes, ops }) + } + + pub fn decode(self, ctx: &ArenaContext) -> Result> { + self.ops + .iter() + .map(|x| ops::Op::decode(x, ctx)) + .collect::>>() + .wrap_err("Failed to decode arena ops") + } +} + +pub struct DataArena { + header: ArenaHeader, + raw: Vec, +} + +impl DataArena { + pub fn from_bytes(bytes: Vec) -> Result { + if bytes.len() < size_of::() { + return Err(eyre!( + "Arena buffer too small, got {}, minimum size {}", + bytes.len(), + size_of::() + )); + } + let header = unsafe { get_header_unchecked(&bytes) }; + if header.capacity != bytes.len() { + return Err(eyre!( + "Invalid arena capacity, expected {}, got {}", + header.capacity, + bytes.len(), + )); + } + if header.used > header.capacity { + return Err(eyre!( + "Arena size {} is greater than capacity {}", + header.used, + header.capacity, + )); + } + Ok(Self { header, raw: bytes }) + } + + pub fn try_deref(&self, ptr: usize) -> Option<*const u8> { + match ptr >= self.header.base_address + && ptr <= (self.header.base_address + self.header.used) + { + false => None, + true => Some(unsafe { self.raw.as_ptr().add(ptr - self.header.base_address) }), + } + } +} + +pub struct ArenaContext(pub Vec); + +impl ArenaContext { + pub fn try_deref(&self, ptr: usize) -> Option<*const u8> { + for vec in self.0.iter() { + if let Some(x) = vec.try_deref(ptr) { + return Some(x); + } + } + None + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ArenaTree(HashMap>>); + +/// # Safety: +/// invoking this function on any byte buffer that is not a valid libprobe +/// arena is undefined behavior. +unsafe fn get_header_unchecked(bytes: &[u8]) -> ArenaHeader { + let ptr = bytes as *const [u8] as *const ArenaHeader; + unsafe { + ArenaHeader { + instantiation: (*ptr).instantiation, + base_address: (*ptr).base_address, + capacity: (*ptr).capacity, + used: (*ptr).used, + } + } +} + +fn filename_numeric>(dir: P) -> Result { + let filename = dir + .as_ref() + .file_name() + .ok_or_else(|| eyre!("'{}' has no filename", dir.as_ref().to_string_lossy()))?; + + filename + .to_str() + .ok_or_else(|| eyre!("filename '{}' not valid UTF-8", filename.to_string_lossy()))? + .parse::() + .wrap_err(format!( + "unable to convert filename '{}' to integer", + filename.to_string_lossy() + )) +} + +fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Result<()> { + fn try_files_from_arena_dir>(dir: P) -> Result> { + match fs::read_dir(&dir) { + Ok(x) => x + .map(|x| { + x.map(|x| x.path()) + .wrap_err("Error reading DirEntry from arena directory") + }) + .collect::, _>>(), + Err(e) => Err(Report::from(e).wrap_err("Error opening arena directory")), + } + } + + let tid = filename_numeric(&in_dir)?; + let mut outfile = { + let mut path = out_dir.as_ref().to_owned(); + path.push(tid.to_string()); + File::create_new(path).wrap_err("Failed to create TID output file")? + }; + + let paths = fs::read_dir(&in_dir) + .wrap_err(format!( + "Error reading directory '{}'", + in_dir.as_ref().to_string_lossy() + ))? + .filter_map(|x| match x { + Ok(x) => Some((x.file_name(), x)), + Err(e) => { + log::warn!("Error reading DirEntry in TID directory: {}", e); + None + } + }) + .collect::>(); + + let data = try_files_from_arena_dir( + paths + .get(OsStr::new("data")) + .wrap_err("Missing data directory from TID directory")? + .path(), + )? + .into_iter() + .map(|x| { + DataArena::from_bytes(std::fs::read(x).wrap_err("Failed to read file from data directory")?) + }) + .collect::, _>>()?; + + let ctx = ArenaContext(data); + + try_files_from_arena_dir( + paths + .get(OsStr::new("ops")) + .wrap_err("Missing ops directory from TID directory")? + .path(), + )? + .into_iter() + .map(|x| { + std::fs::read(x) + .wrap_err("Failed to read file from ops directory") + .and_then(|x| { + OpsArena::from_bytes(x) + .wrap_err("Error constructing OpsArena")? + .decode(&ctx) + .wrap_err("Error decoding OpsArena") + }) + }) + .try_for_each(|x| { + for op in x? { + outfile + .write_all( + serde_json::to_string(&op) + .wrap_err("Unable to serialize Op")? + .as_bytes(), + ) + .wrap_err("Failed to write serialized Op to tempfile")?; + outfile + .write_all("\n".as_bytes()) + .wrap_err("Failed to write newline deliminator to tempfile")?; + } + + Ok::<(), Report>(()) + })?; + + Ok(()) +} + +fn parse_exec_epoch, P2: AsRef>(in_dir: P1, out_dir: P2) -> Result<()> { + let epoch = filename_numeric(&in_dir)?; + + let dir = { + let mut path = out_dir.as_ref().to_owned(); + path.push(epoch.to_string()); + path + }; + + fs::create_dir(&dir).wrap_err("Failed to create ExecEpoch output directory")?; + + fs::read_dir(in_dir) + .wrap_err("Error opening ExecEpoch directory")? + // .par_bridge() + .try_for_each(|x| { + parse_tid( + x.wrap_err("Error reading DirEntry from ExecEpoch directory")? + .path(), + &dir, + ) + })?; + + Ok(()) +} + +fn parse_pid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Result<()> { + let pid = filename_numeric(&in_dir)?; + + let dir = { + let mut path = out_dir.as_ref().to_owned(); + path.push(pid.to_string()); + path + }; + + fs::create_dir(&dir).wrap_err("Failed to create ExecEpoch output directory")?; + + fs::read_dir(in_dir) + .wrap_err("Error opening PID directory")? + // .par_bridge() + .try_for_each(|x| { + parse_exec_epoch( + x.wrap_err("Error reading DirEntry from PID directory")? + .path(), + &dir, + ) + })?; + + Ok(()) +} + +pub fn parse_arena_dir, P2: AsRef + Sync>( + in_dir: P1, + out_dir: P2, +) -> Result<()> { + fs::read_dir(in_dir) + .wrap_err("Error opening Arena directory")? + .par_bridge() + .try_for_each(|x| { + parse_pid( + x.wrap_err("Error reading DirEntry from Arena directory")? + .path(), + &out_dir, + ) + })?; + + Ok(()) +} diff --git a/probe_src/probe_frontend/src/display.rs b/probe_src/probe_frontend/src/display.rs new file mode 100644 index 00000000..8471fea9 --- /dev/null +++ b/probe_src/probe_frontend/src/display.rs @@ -0,0 +1,302 @@ +use std::fmt::Display; + +use crate::ops; +use chrono::{DateTime, SecondsFormat}; + +impl Display for ops::statx_timestamp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match DateTime::from_timestamp(self.tv_sec, self.tv_nsec) { + Some(x) => f.write_str(&x.to_rfc3339_opts(SecondsFormat::Secs, true)), + None => f.write_str("[INVALID TIMESTAMP]"), + } + } +} + +impl Display for ops::timeval { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match DateTime::from_timestamp(self.tv_sec, self.tv_usec as u32 * 1000) { + Some(x) => f.write_str(&x.to_rfc3339_opts(SecondsFormat::Secs, true)), + None => f.write_str("[INVALID TIMESTAMP]"), + } + } +} + +impl Display for ops::statx { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "[ mask={}, blksize={}, attributes={}, nlink={}, uid={}, gid={}, \ + mode={:#06o} ino={}, size={}, blocks={}, attributes_mask={}, \ + atime={}, btime={}, ctime={}, mtime={}, rdev_major={}, \ + rdev_minor={}, dev_major={}, dev_minor={}, mnt_id={}, \ + dio_mem_align={}, dio_offset_align={} ]", + self.stx_mask, + self.stx_blksize, + self.stx_attributes, + self.stx_nlink, + self.stx_uid, + self.stx_gid, + self.stx_mode, + self.stx_ino, + self.stx_size, + self.stx_blocks, + self.stx_attributes_mask, + self.stx_atime, + self.stx_btime, + self.stx_ctime, + self.stx_mtime, + self.stx_rdev_major, + self.stx_rdev_minor, + self.stx_dev_major, + self.stx_dev_minor, + self.stx_mnt_id, + self.stx_dio_mem_align, + self.stx_dio_offset_align, + )) + } +} + +impl Display for ops::rusage { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "[ utime={}, stime={}, maxrss={}, ixrss={}, idrss={}, isrss={}, \ + minflt={}, majflt={}, nswap={}, inblock={}, oublock={}, msgsnd={}, \ + msgrcv={}, nsignals={}, nvcsw={}, nivcsw={} ]", + self.ru_utime, + self.ru_stime, + self.ru_maxrss, + self.ru_ixrss, + self.ru_idrss, + self.ru_isrss, + self.ru_minflt, + self.ru_majflt, + self.ru_nswap, + self.ru_inblock, + self.ru_oublock, + self.ru_msgsnd, + self.ru_msgrcv, + self.ru_nsignals, + self.ru_nvcsw, + self.ru_nivcsw, + )) + } +} + +impl Display for ops::Path { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "[ dirfd_minus_at_fdcwd={}, path='{}', device_major={}, \ + device_minor={}, inode={}, mtime={}, ctime={}, stat_valid={}, \ + dirfd_valid={} ]", + self.dirfd_minus_at_fdcwd, + self.path.to_string_lossy(), + self.device_major, + self.device_minor, + self.inode, + self.mtime, + self.ctime, + self.stat_valid, + self.dirfd_valid, + )) + } +} + +impl Display for ops::CloneOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "[ flags={}, run_pthread_atfork_handlers={}, child_process_id={}, \ + child_thread_id={}, ferrno={} ]", + self.flags, + self.run_pthread_atfork_handlers, + self.child_process_id, + self.child_thread_id, + self.ferrno, + )) + } +} + +impl Display for ops::CloseOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "[ low_fd={}, high_fd={}, ferrno={} ]", + self.low_fd, self.high_fd, self.ferrno, + )) + } +} + +impl Display for ops::ExitOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "[ satus={}, run_atexit_handlers={} ]", + self.status, self.run_atexit_handlers, + )) + } +} + +impl Display for ops::GetRUsageOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "[ waitpid_arg={}, getrusage_arg={}, usage={}, ferrno={} ]", + self.waitpid_arg, self.getrusage_arg, self.usage, self.ferrno, + )) + } +} + +impl Display for ops::InitProcessOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("[ pid={} ]", self.pid)) + } +} + +impl Display for ops::InitThreadOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("[ tid={} ]", self.tid)) + } +} + +impl Display for ops::WaitOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "[ pid={}, options={}, status={}, ret={}, ferrno={} ]", + self.pid, self.options, self.status, self.ret, self.ferrno, + )) + } +} + +impl Display for ops::InitExecEpochOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "[ epoch={}, program_name={} ]", + self.epoch, + self.program_name.to_string_lossy(), + )) + } +} + +impl Display for ops::OpenOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "[ path={}, flags={}, mode={:#06o} fd={}, ferrno={} ]", + self.path, self.flags, self.mode, self.fd, self.ferrno, + )) + } +} + +impl Display for ops::ChdirOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "[ path={}, ferrno={} ]", + self.path, self.ferrno, + )) + } +} + +impl Display for ops::ExecOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "[ path={}, ferrno={} ]", + self.path, self.ferrno, + )) + } +} + +impl Display for ops::AccessOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "[ path={}, mode={:#06o}, flags={}, ferrno={} ]", + self.path, self.mode, self.flags, self.ferrno, + )) + } +} + +impl Display for ops::StatOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "[ path={}, flags={}, statx_buf={}, ferrno={} ]", + self.path, self.flags, self.statx_buf, self.ferrno, + )) + } +} + +impl Display for ops::ReaddirOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "[ dir={}, child='{}', all_children={}, ferrno={} ]", + self.dir, + self.child.to_string_lossy(), + self.all_children, + self.ferrno, + )) + } +} + +impl Display for ops::Metadata { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ops::Metadata::Mode(mode) => f.write_fmt(format_args!("Mode[ mode={:#06o} ]", mode)), + ops::Metadata::Ownership { uid, gid } => { + f.write_fmt(format_args!("Ownership[ uid={}, gid={} ]", uid, gid)) + } + ops::Metadata::Times { + is_null, + atime, + mtime, + } => f.write_fmt(format_args!( + "Times[ is_null={}, atime={}, mtime={} ]", + is_null, atime, mtime + )), + } + } +} + +impl Display for ops::UpdateMetadataOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "[ path={}, flags={}, metadata={}, ferrno={} ]", + self.path, self.flags, self.metadata, self.ferrno, + )) + } +} + +impl Display for ops::ReadLinkOp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "[ path={}, resolved='{}', ferrno={} ]", + self.path, + self.resolved.to_string_lossy(), + self.ferrno + )) + } +} + +impl Display for ops::OpInternal { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn wfmt(f: &mut std::fmt::Formatter<'_>, x: &str, y: impl Display) -> std::fmt::Result { + f.write_fmt(format_args!("{}{}", x, y)) + } + + match self { + ops::OpInternal::InitProcess(x) => wfmt(f, "InitProcessOp", x), + ops::OpInternal::InitExecEpoch(x) => wfmt(f, "InitExecEpochOp", x), + ops::OpInternal::InitThread(x) => wfmt(f, "InitThreadOp", x), + ops::OpInternal::Open(x) => wfmt(f, "OpenOp", x), + ops::OpInternal::Close(x) => wfmt(f, "CloseOp", x), + ops::OpInternal::Chdir(x) => wfmt(f, "ChdirOp", x), + ops::OpInternal::Exec(x) => wfmt(f, "ExecOp", x), + ops::OpInternal::Clone(x) => wfmt(f, "CloneOp", x), + ops::OpInternal::Exit(x) => wfmt(f, "ExitOp", x), + ops::OpInternal::Access(x) => wfmt(f, "AccessOp", x), + ops::OpInternal::Stat(x) => wfmt(f, "StatOp", x), + ops::OpInternal::Readdir(x) => wfmt(f, "ReadirOp", x), + ops::OpInternal::Wait(x) => wfmt(f, "WaitOp", x), + ops::OpInternal::GetRUsage(x) => wfmt(f, "GetRUsageOp", x), + ops::OpInternal::UpdateMetadata(x) => wfmt(f, "UpdateMetadataOp", x), + ops::OpInternal::ReadLink(x) => wfmt(f, "ReadLinkOp", x), + } + } +} + +impl Display for ops::Op { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.data.fmt(f) + } +} diff --git a/probe_src/probe_frontend/src/ffi.rs b/probe_src/probe_frontend/src/ffi.rs new file mode 100644 index 00000000..f1413947 --- /dev/null +++ b/probe_src/probe_frontend/src/ffi.rs @@ -0,0 +1,9 @@ +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +/// raw ffi bindings for the raw C-structs emitted by libprobe, generated automatically with +/// rust-bindgen +use serde::{Deserialize, Serialize}; + +include!(concat!(env!("OUT_DIR"), "/bindings.rs")); diff --git a/probe_src/probe_frontend/src/main.rs b/probe_src/probe_frontend/src/main.rs new file mode 100644 index 00000000..c0cd64d9 --- /dev/null +++ b/probe_src/probe_frontend/src/main.rs @@ -0,0 +1,274 @@ +use std::{ + ffi::{OsStr, OsString}, + fs::{self, File}, + io::{Read, Write}, + path::{Path, PathBuf}, +}; + +use clap::Parser; +use color_eyre::eyre::{eyre, Context, Report, Result}; +use flate2::Compression; + +mod arena; +mod display; +mod ffi; +mod metadata; +mod ops; + +#[derive(clap::Parser, Debug, Clone)] +#[command(author, version, about, long_about = None)] +#[command(propagate_version = true)] +struct Cli { + #[command(subcommand)] + command: Command, +} + +#[derive(clap::Subcommand, Debug, Clone)] +enum Command { + /// Execute a command and record its provenance + Record { + /// Directory to output PROBE log to + #[arg(short, long, required = false, default_value = "probe_log")] + output: OsString, + + /// Overwrite existing output directory if it exists + #[arg(short = 'f', long)] + overwrite: bool, + + /// Run in gdb + #[arg(long)] + gdb: bool, + + /// Override the path to libprobe.so (this path will be canonicalized) + #[arg(long)] + lib_path: Option, + + /// Run in verbose & debug build of libprobe + #[arg(long)] + debug: bool, + + /// Command to execute under provenance + #[arg(required = true)] + cmd: Vec, + }, + + /// Write the data from probe log data in a human-readable manner + Dump { + /// Directory to load PROBE log from + #[arg(short, long, required = false, default_value = "probe_log")] + input: OsString, + }, +} + +fn main() -> Result<()> { + color_eyre::install()?; + + match Cli::parse().command { + Command::Record { + output, + overwrite, + gdb, + lib_path, + debug, + cmd, + } => { + if PathBuf::from(output.clone()).exists() && overwrite { + fs::remove_file(&output).wrap_err("Error deleting old output file")?; + } + + let mut ld_preload = fs::canonicalize(match lib_path { + Some(x) => x, + None => match std::env::var_os("__PROBE_LIB") { + Some(x) => PathBuf::from(x), + None => match Path::new("/usr/share/probe").exists() { + true => PathBuf::from("/usr/share/probe"), + false => { + return Err(eyre!( + "Can't find libprobe lib path, ensure libprobe is installed in \ + /usr/share/probe or set --lib-path or __PROBE_LIB" + )) + } + }, + }, + }) + .wrap_err("unable to canonicalize lib path")?; + + if debug || gdb { + ld_preload.push("libprobe-dbg.so"); + } else { + ld_preload.push("libprobe.so"); + } + + if let Some(x) = std::env::var_os("LD_PRELOAD") { + ld_preload.push(":"); + ld_preload.push(&x); + } + + let dir = tempfile::tempdir().wrap_err("Failed to create arena directory")?; + + let mut popen = if gdb { + let mut dir_env = OsString::from("__PROBE_DIR="); + dir_env.push(dir.path()); + let mut preload_env = OsString::from("LD_PRELOAD="); + preload_env.push(ld_preload); + + subprocess::Exec::cmd("gdb") + .args(&[ + OsStr::new("--args"), + OsStr::new("env"), + &dir_env, + &preload_env, + ]) + .args(&cmd) + } else { + subprocess::Exec::cmd(&cmd[0]) + .args(&cmd[1..]) + .env("LD_PRELOAD", ld_preload) + .env("__PROBE_DIR", dir.path()) + } + .popen() + .wrap_err("Failed to launch process")?; + + let metadata = metadata::Metadata::new( + popen + .pid() + .expect("just popened process should always have PID") as i32, + ); + + popen.wait().wrap_err("Error awaiting child process")?; + + let file = match File::create_new(output) { + Ok(x) => x, + Err(e) => { + log::error!("Failed to create output file: {}", e); + + let path = format!( + "probe_log_{}_{}", + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .wrap_err("current system time before unix epoch")? + .as_secs() + ); + + let tmp = + File::create_new(&path).wrap_err("Failed to create backup output file"); + + log::error!("backup output file '{}' will be used instead", &path); + + tmp + } + .wrap_err("Failed to create output dir")?, + }; + + let mut tar = + tar::Builder::new(flate2::write::GzEncoder::new(file, Compression::default())); + + let outdir = tempfile::tempdir()?; + + File::create_new(outdir.path().join("_metadata")) + .wrap_err("failed to create metadata file in output directory")? + .write_all( + serde_json::to_string(&metadata) + .wrap_err("Error serializng metadata")? + .as_bytes(), + ) + .wrap_err("Error writing metadata")?; + + arena::parse_arena_dir(dir.path(), &outdir) + .wrap_err("Unable to decode arena directory")?; + + tar.append_dir_all(".", &outdir) + .wrap_err("Failed to copy output dir into archive")?; + tar.finish().wrap_err("Failed to finish writing tarball")?; + + if let Err(e) = outdir.close() { + log::warn!("Failed to close output directory: {}", e); + } + + if let Err(e) = dir.close() { + log::warn!("Failed to close arena directory: {}", e); + } + + Ok::<(), Report>(()) + } + .wrap_err("Record command failed"), + Command::Dump { input } => { + let file = flate2::read::GzDecoder::new(File::open(&input).wrap_err(format!( + "Failed to open input file '{}'", + input.to_string_lossy() + ))?); + + let mut tar = tar::Archive::new(file); + + tar.entries() + .wrap_err("Unable to get tarball entry iterator")? + .try_for_each(|x| { + let mut entry = x.wrap_err("Unable to extract tarball entry")?; + + let path = entry + .path() + .wrap_err("Error getting path of tarball entry")? + .as_ref() + .to_str() + .ok_or_else(|| eyre!("Tarball entry path not valid UTF-8"))? + .to_owned(); + + if path == "_metadata" { + return Ok(()); + } + + let mut buf = String::new(); + let size = entry + .read_to_string(&mut buf) + .wrap_err("unable to read contents of tarball entry")?; + + // this is the case where the entry is a directory + if size == 0 { + return Ok(()); + } + + let hierarchy = path + .split('/') + .map(|x| { + x.parse::().wrap_err(format!( + "Unable to convert path component '{x}' to integer" + )) + }) + .collect::, _>>() + .wrap_err("Unable to extract PID.EPOCH.TID hierarchy")?; + + if hierarchy.len() != 3 { + return Err(eyre!("malformed PID.EPOCH.TID hierarchy")); + } + + let ops = buf + .split('\n') + .filter_map(|x| { + if x.is_empty() { + return None; + } + Some( + serde_json::from_str::(x) + .wrap_err("Error deserializing Op"), + ) + }) + .collect::, _>>() + .wrap_err("Failed to deserialize TID file")?; + + let mut stdout = std::io::stdout().lock(); + for op in ops { + writeln!( + stdout, + "{}.{}.{} >>> {}", + hierarchy[0], hierarchy[1], hierarchy[2], op, + ) + .wrap_err("Error printing Op")?; + } + + Ok(()) + }) + } + .wrap_err("Dump command failed"), + } +} diff --git a/probe_src/probe_frontend/src/metadata.rs b/probe_src/probe_frontend/src/metadata.rs new file mode 100644 index 00000000..0ea74415 --- /dev/null +++ b/probe_src/probe_frontend/src/metadata.rs @@ -0,0 +1,17 @@ +use machine_info::{Machine, SystemInfo}; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Serialize, Deserialize)] +pub struct Metadata { + entry_pid: libc::pid_t, + system: SystemInfo, +} + +impl Metadata { + pub fn new(pid: libc::pid_t) -> Self { + Self { + entry_pid: pid, + system: Machine::new().system_info(), + } + } +} diff --git a/probe_src/probe_frontend/src/ops.rs b/probe_src/probe_frontend/src/ops.rs new file mode 100644 index 00000000..8e43e2d2 --- /dev/null +++ b/probe_src/probe_frontend/src/ops.rs @@ -0,0 +1,404 @@ +#[allow(unused_imports)] +pub use crate::ffi::{ + dev_t, gid_t, ino_t, mode_t, rusage, statx, statx_timestamp, timespec, timeval, uid_t, CloneOp, + CloseOp, ExitOp, GetRUsageOp, InitProcessOp, InitThreadOp, WaitOp, +}; +use color_eyre::eyre::{eyre, Context}; +pub use std::ffi::{c_int, c_uint}; + +use color_eyre::eyre::Result; +use serde::{Deserialize, Serialize}; +use std::{ + ffi::{OsStr, OsString}, + os::unix::ffi::OsStrExt, + slice, +}; + +use crate::{arena::ArenaContext, ffi}; + +pub(crate) trait DecodeFfi { + fn decode(value: &T, ctx: &ArenaContext) -> Result + where + Self: Sized; +} + +pub(crate) trait ConvertFfi { + fn convert(&self, ctx: &ArenaContext) -> Result; +} + +impl ConvertFfi for T +where + U: DecodeFfi, +{ + #[inline] + fn convert(&self, ctx: &ArenaContext) -> Result { + U::decode(self, ctx) + } +} + +fn try_to_osstring(str: *const i8, ctx: &ArenaContext) -> Result { + Ok(if str.is_null() { + OsString::new() + } else { + match ctx.try_deref(str as usize) { + Some(x) => { + OsStr::from_bytes(unsafe { slice::from_raw_parts(x, libc::strlen(x as *const i8)) }) + .to_os_string() + } + None => return Err(eyre!("Unable to lookup pointer {0:#x}", (str as usize))), + } + }) +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Path { + pub dirfd_minus_at_fdcwd: i32, + pub path: OsString, + pub device_major: dev_t, + pub device_minor: dev_t, + pub inode: ino_t, + pub mtime: statx_timestamp, + pub ctime: statx_timestamp, + pub stat_valid: bool, + pub dirfd_valid: bool, +} + +impl DecodeFfi for Path { + fn decode(value: &ffi::Path, ctx: &ArenaContext) -> Result { + Ok(Self { + dirfd_minus_at_fdcwd: value.dirfd_minus_at_fdcwd, + path: try_to_osstring(value.path, ctx) + .wrap_err("Unable to decode char* into path string")?, + device_major: value.device_major, + device_minor: value.device_minor, + inode: value.inode, + mtime: value.mtime, + ctime: value.ctime, + stat_valid: value.stat_valid, + dirfd_valid: value.dirfd_valid, + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct InitExecEpochOp { + pub epoch: c_uint, + pub program_name: OsString, +} + +impl DecodeFfi for InitExecEpochOp { + fn decode(value: &ffi::InitExecEpochOp, ctx: &ArenaContext) -> Result { + Ok(Self { + epoch: value.epoch, + program_name: try_to_osstring(value.program_name, ctx) + .wrap_err("Unable to decode program name char* into string")?, + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct OpenOp { + pub path: Path, + pub flags: c_int, + pub mode: mode_t, + pub fd: i32, + pub ferrno: c_int, +} + +impl DecodeFfi for OpenOp { + fn decode(value: &ffi::OpenOp, ctx: &ArenaContext) -> Result { + Ok(Self { + path: value.path.convert(ctx)?, + flags: value.flags, + mode: value.mode, + fd: value.fd, + ferrno: value.ferrno, + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChdirOp { + pub path: Path, + pub ferrno: c_int, +} + +impl DecodeFfi for ChdirOp { + fn decode(value: &ffi::ChdirOp, ctx: &ArenaContext) -> Result { + Ok(Self { + path: value.path.convert(ctx)?, + ferrno: value.ferrno, + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ExecOp { + pub path: Path, + pub ferrno: c_int, +} + +impl DecodeFfi for ExecOp { + fn decode(value: &ffi::ExecOp, ctx: &ArenaContext) -> Result { + Ok(Self { + path: value.path.convert(ctx)?, + ferrno: value.ferrno, + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AccessOp { + pub path: Path, + pub mode: c_int, + pub flags: c_int, + pub ferrno: c_int, +} + +impl DecodeFfi for AccessOp { + fn decode(value: &ffi::AccessOp, ctx: &ArenaContext) -> Result { + Ok(Self { + path: value.path.convert(ctx)?, + mode: value.mode, + flags: value.flags, + ferrno: value.ferrno, + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct StatOp { + pub path: Path, + pub flags: c_int, + pub statx_buf: statx, + pub ferrno: c_int, +} + +impl DecodeFfi for StatOp { + fn decode(value: &ffi::StatOp, ctx: &ArenaContext) -> Result { + Ok(Self { + path: value.path.convert(ctx)?, + flags: value.flags, + statx_buf: value.statx_buf, + ferrno: value.ferrno, + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ReaddirOp { + pub dir: Path, + pub child: OsString, + pub all_children: bool, + pub ferrno: c_int, +} + +impl DecodeFfi for ReaddirOp { + fn decode(value: &ffi::ReaddirOp, ctx: &ArenaContext) -> Result { + Ok(Self { + dir: value.dir.convert(ctx)?, + child: try_to_osstring(value.child, ctx) + .wrap_err("Unable to decode child char* into string")?, + all_children: value.all_children, + ferrno: value.ferrno, + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum Metadata { + Mode(mode_t), + Ownership { + uid: uid_t, + gid: gid_t, + }, + Times { + is_null: bool, + atime: timeval, + mtime: timeval, + }, +} + +/// # safety +/// the [[`ffi::MetadataKind`]] passed to this function must a valid variant of MetadataKind enum +/// and be accurate for the passed value because it directly effects the interpretation of the +/// [[`ffi::MetadataValue`]] union with no additional checks +impl Metadata { + pub unsafe fn from_kind_and_value( + kind: ffi::MetadataKind, + value: ffi::MetadataValue, + ) -> Result { + Ok(match kind { + ffi::MetadataKind_MetadataMode => Metadata::Mode(value.mode), + ffi::MetadataKind_MetadataOwnership => Metadata::Ownership { + uid: value.ownership.uid, + gid: value.ownership.gid, + }, + ffi::MetadataKind_MetadataTimes => Metadata::Times { + is_null: value.times.is_null, + atime: value.times.atime, + mtime: value.times.mtime, + }, + _ => return Err(eyre!("Invalid MetadataKind Variant")), + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct UpdateMetadataOp { + pub path: Path, + pub flags: c_int, + pub metadata: Metadata, + pub ferrno: c_int, +} + +impl DecodeFfi for UpdateMetadataOp { + fn decode(value: &ffi::UpdateMetadataOp, ctx: &ArenaContext) -> Result { + Ok(Self { + path: value.path.convert(ctx)?, + flags: value.flags, + metadata: unsafe { Metadata::from_kind_and_value(value.kind, value.value) } + .wrap_err("Unable to decode Metadata tagged union")?, + ferrno: value.ferrno, + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ReadLinkOp { + pub path: Path, + pub resolved: OsString, + pub ferrno: c_int, +} + +impl DecodeFfi for ReadLinkOp { + fn decode(value: &ffi::ReadLinkOp, ctx: &ArenaContext) -> Result { + Ok(Self { + path: value.path.convert(ctx)?, + resolved: try_to_osstring(value.resolved, ctx) + .wrap_err("Unable to decode symlink resolve char* to string")?, + ferrno: value.ferrno, + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum OpInternal { + InitProcess(InitProcessOp), + InitExecEpoch(InitExecEpochOp), + InitThread(InitThreadOp), + Open(OpenOp), + Close(CloseOp), + Chdir(ChdirOp), + Exec(ExecOp), + Clone(CloneOp), + Exit(ExitOp), + Access(AccessOp), + Stat(StatOp), + Readdir(ReaddirOp), + Wait(WaitOp), + GetRUsage(GetRUsageOp), + UpdateMetadata(UpdateMetadataOp), + ReadLink(ReadLinkOp), +} + +/// # safety +/// the [[`ffi::OpCode`]] passed to this function must a valid variant of OpCode enum +/// and be accurate for the passed value because it directly effects the interpretation of the +/// value union with no additional checks +impl OpInternal { + pub unsafe fn from_kind_and_value( + kind: ffi::OpCode, + value: &ffi::Op__bindgen_ty_1, + ctx: &ArenaContext, + ) -> Result { + Ok(match kind { + ffi::OpCode_init_process_op_code => Self::InitProcess(value.init_process_epoch), + ffi::OpCode_init_exec_epoch_op_code => Self::InitExecEpoch( + value + .init_exec_epoch + .convert(ctx) + .wrap_err("Unable to decode InitExecEpochOp")?, + ), + ffi::OpCode_init_thread_op_code => Self::InitThread(value.init_thread), + ffi::OpCode_open_op_code => Self::Open( + value + .open + .convert(ctx) + .wrap_err("Unable to decode OpenOp")?, + ), + ffi::OpCode_close_op_code => Self::Close(value.close), + ffi::OpCode_chdir_op_code => Self::Chdir( + value + .chdir + .convert(ctx) + .wrap_err("Unable to decode ChdirOp")?, + ), + ffi::OpCode_exec_op_code => Self::Exec( + value + .exec + .convert(ctx) + .wrap_err("Unable to decode ExecOp")?, + ), + ffi::OpCode_clone_op_code => Self::Clone(value.clone), + ffi::OpCode_exit_op_code => Self::Exit(value.exit), + ffi::OpCode_access_op_code => Self::Access( + value + .access + .convert(ctx) + .wrap_err("Unable to decode AccessOp")?, + ), + ffi::OpCode_stat_op_code => Self::Stat( + value + .stat + .convert(ctx) + .wrap_err("Unable to decode StatOp")?, + ), + ffi::OpCode_readdir_op_code => Self::Readdir( + value + .readdir + .convert(ctx) + .wrap_err("Unable to decode ReaddirOp")?, + ), + ffi::OpCode_wait_op_code => Self::Wait(value.wait), + ffi::OpCode_getrusage_op_code => Self::GetRUsage(value.getrusage), + ffi::OpCode_update_metadata_op_code => Self::UpdateMetadata( + value + .update_metadata + .convert(ctx) + .wrap_err("Unable to decode UpdateMetadataOp")?, + ), + ffi::OpCode_read_link_op_code => Self::ReadLink( + value + .read_link + .convert(ctx) + .wrap_err("Unable to decode ReadlinkOp")?, + ), + _ => { + if kind < ffi::OpCode_LAST_OP_CODE && kind > ffi::OpCode_FIRST_OP_CODE { + return Err(eyre!( + "Valid OpCode not handled (this is a bug, please report it)" + )); + } else { + return Err(eyre!("Invalid OpCode")); + } + } + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Op { + pub data: OpInternal, + pub time: timespec, +} + +impl DecodeFfi for Op { + fn decode(value: &ffi::Op, ctx: &ArenaContext) -> Result { + Ok(Self { + data: unsafe { OpInternal::from_kind_and_value(value.op_code, &value.data, ctx) } + .wrap_err("Unable to decode Op tagged union")?, + time: value.time, + }) + } +} From d9ad8256156791bf7b1ede96fdf6b77f61c7c251 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Mon, 24 Jun 2024 16:18:29 -0500 Subject: [PATCH 02/37] Added logging facility --- probe_src/probe_frontend/Cargo.lock | 30 ++++++++++++++++++++++++++++ probe_src/probe_frontend/Cargo.toml | 1 + probe_src/probe_frontend/src/main.rs | 18 +++++++++-------- 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/probe_src/probe_frontend/Cargo.lock b/probe_src/probe_frontend/Cargo.lock index bd3d3fc3..232f089a 100644 --- a/probe_src/probe_frontend/Cargo.lock +++ b/probe_src/probe_frontend/Cargo.lock @@ -358,6 +358,29 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" +[[package]] +name = "env_filter" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea" +dependencies = [ + "log", + "regex", +] + +[[package]] +name = "env_logger" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "humantime", + "log", +] + [[package]] name = "errno" version = "0.3.9" @@ -439,6 +462,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + [[package]] name = "iana-time-zone" version = "0.1.60" @@ -684,6 +713,7 @@ dependencies = [ "chrono", "clap", "color-eyre", + "env_logger", "flate2", "libc", "log", diff --git a/probe_src/probe_frontend/Cargo.toml b/probe_src/probe_frontend/Cargo.toml index 4e640d84..694c3a35 100644 --- a/probe_src/probe_frontend/Cargo.toml +++ b/probe_src/probe_frontend/Cargo.toml @@ -15,6 +15,7 @@ path = "src/main.rs" chrono = "0.4.38" clap = { version = "4.5.7", features = ["derive"] } color-eyre = "0.6.3" +env_logger = "0.11.3" flate2 = "1.0.30" libc = "0.2.155" log = "0.4.21" diff --git a/probe_src/probe_frontend/src/main.rs b/probe_src/probe_frontend/src/main.rs index c0cd64d9..efe326d1 100644 --- a/probe_src/probe_frontend/src/main.rs +++ b/probe_src/probe_frontend/src/main.rs @@ -62,6 +62,8 @@ enum Command { fn main() -> Result<()> { color_eyre::install()?; + env_logger::Builder::from_env(env_logger::Env::new().filter_or("__PROBE_LOG", "warn")).init(); + log::info!("Logger Facility Initialized"); match Cli::parse().command { Command::Record { @@ -94,6 +96,7 @@ fn main() -> Result<()> { .wrap_err("unable to canonicalize lib path")?; if debug || gdb { + log::debug!("Using debug version of libprobe"); ld_preload.push("libprobe-dbg.so"); } else { ld_preload.push("libprobe.so"); @@ -143,7 +146,7 @@ fn main() -> Result<()> { log::error!("Failed to create output file: {}", e); let path = format!( - "probe_log_{}_{}", + "./probe_log_{}_{}", std::process::id(), std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) @@ -151,8 +154,8 @@ fn main() -> Result<()> { .as_secs() ); - let tmp = - File::create_new(&path).wrap_err("Failed to create backup output file"); + let tmp = File::create_new(&path) + .wrap_err(format!("Failed to create backup output file '{}'", path)); log::error!("backup output file '{}' will be used instead", &path); @@ -191,8 +194,7 @@ fn main() -> Result<()> { } Ok::<(), Report>(()) - } - .wrap_err("Record command failed"), + }, Command::Dump { input } => { let file = flate2::read::GzDecoder::new(File::open(&input).wrap_err(format!( "Failed to open input file '{}'", @@ -214,7 +216,8 @@ fn main() -> Result<()> { .ok_or_else(|| eyre!("Tarball entry path not valid UTF-8"))? .to_owned(); - if path == "_metadata" { + + if path == "0_metadata" { return Ok(()); } @@ -268,7 +271,6 @@ fn main() -> Result<()> { Ok(()) }) - } - .wrap_err("Dump command failed"), + }, } } From c457b68f37719ec68417abba10b7511592d7360c Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Mon, 24 Jun 2024 16:33:54 -0500 Subject: [PATCH 03/37] Update .envrc Update the .envrc for probe_frontend to automatically run make on libprobe, as well as export the __PROBE_LIB environment variable so that the frontend can find the libprobe.so --- probe_src/probe_frontend/.envrc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/probe_src/probe_frontend/.envrc b/probe_src/probe_frontend/.envrc index c4b17d79..56230f5f 100644 --- a/probe_src/probe_frontend/.envrc +++ b/probe_src/probe_frontend/.envrc @@ -1 +1,4 @@ use_flake + +(cd ../libprobe && make) +export __PROBE_LIB=$(expand_path ../libprobe/build) From 4556de43690e9b5d6e235b23c37d1c399d64f5ea Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Mon, 24 Jun 2024 22:14:27 -0500 Subject: [PATCH 04/37] Documentation and code cleanup --- probe_src/probe_frontend/Cargo.toml | 3 + probe_src/probe_frontend/build.rs | 18 +-- probe_src/probe_frontend/src/arena.rs | 89 +++++++++---- probe_src/probe_frontend/src/display.rs | 82 ++++++------ probe_src/probe_frontend/src/ffi.rs | 2 - probe_src/probe_frontend/src/main.rs | 63 ++++++++- probe_src/probe_frontend/src/metadata.rs | 2 + probe_src/probe_frontend/src/ops.rs | 162 ++++++++++++----------- 8 files changed, 261 insertions(+), 160 deletions(-) diff --git a/probe_src/probe_frontend/Cargo.toml b/probe_src/probe_frontend/Cargo.toml index 694c3a35..0913cfae 100644 --- a/probe_src/probe_frontend/Cargo.toml +++ b/probe_src/probe_frontend/Cargo.toml @@ -11,6 +11,9 @@ edition = "2021" name = "probe" path = "src/main.rs" +[lints.rust] +unsafe_op_in_unsafe_fn = "forbid" + [dependencies] chrono = "0.4.38" clap = { version = "4.5.7", features = ["derive"] } diff --git a/probe_src/probe_frontend/build.rs b/probe_src/probe_frontend/build.rs index ac19b97c..33f14227 100644 --- a/probe_src/probe_frontend/build.rs +++ b/probe_src/probe_frontend/build.rs @@ -6,7 +6,7 @@ use std::sync::OnceLock; use bindgen::callbacks::ParseCallbacks; #[derive(Debug)] -struct LibprobeCallback; +struct SerdeDeriveCallback; fn derive_list(name: &str) -> bool { static DERIVE_LIST: OnceLock> = OnceLock::new(); @@ -32,7 +32,7 @@ fn derive_list(name: &str) -> bool { .contains(name) } -impl ParseCallbacks for LibprobeCallback { +impl ParseCallbacks for SerdeDeriveCallback { fn add_derives(&self, info: &bindgen::callbacks::DeriveInfo<'_>) -> Vec { if derive_list(info.name) { vec!["Serialize".to_owned(), "Deserialize".to_owned()] @@ -64,12 +64,12 @@ fn main() { #include #include #include - - // defining this manually instead of using is a - // hack, but it greatly reduces the generated code complexity since - // in glibc all the long ints are unions over two types that both - // alias to long int, this is done for kernel-userland compatibility - // reasons that don't matter here. + + // HACK: defining this manually instead of using is + // a huge hack, but it greatly reduces the generated code complexity + // since in glibc all the long ints are unions over two types that + // both alias to long int, this is done for kernel-userland + // compatibilityreasons that don't matter here. struct rusage { struct timeval ru_utime; struct timeval ru_stime; @@ -104,7 +104,7 @@ fn main() { // Tell cargo to invalidate the built crate whenever any of the // included header files changed. .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) - .parse_callbacks(Box::new(LibprobeCallback {})) + .parse_callbacks(Box::new(SerdeDeriveCallback {})) // Finish the builder and generate the bindings. .generate() // Unwrap the Result and panic on failure. diff --git a/probe_src/probe_frontend/src/arena.rs b/probe_src/probe_frontend/src/arena.rs index 785e204b..f9577451 100644 --- a/probe_src/probe_frontend/src/arena.rs +++ b/probe_src/probe_frontend/src/arena.rs @@ -1,8 +1,5 @@ -#![deny(unsafe_op_in_unsafe_fn)] - use color_eyre::eyre::{eyre, ContextCompat, Report, Result, WrapErr}; use rayon::iter::{ParallelBridge, ParallelIterator}; -use serde::{Deserialize, Serialize}; use std::{ collections::HashMap, ffi::{OsStr, OsString}, @@ -14,9 +11,10 @@ use std::{ use crate::{ ffi, - ops::{self, DecodeFfi}, + ops::{self, FfiFrom}, }; +/// Arena allocator metadata placed at the beginning of allocator files by libprobe. #[repr(C)] #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct ArenaHeader { @@ -26,11 +24,14 @@ pub struct ArenaHeader { used: libc::uintptr_t, } +/// This struct represents a single `ops/*.dat` arena allocator file emitted by libprobe. pub struct OpsArena<'a> { // raw is needed even though it's unused since ops is a reference to it; // the compiler doesn't know this since it's constructed using unsafe code. #[allow(dead_code)] + /// raw byte buffer of Ops arena allocator. raw: Vec, + /// slice over Ops of the raw buffer. ops: &'a [ffi::Op], } @@ -67,6 +68,7 @@ impl<'a> OpsArena<'a> { let count = (header.used - size_of::()) / size_of::(); + log::debug!("[unsafe] converting Vec to &[ffi::Op]"); let ops = unsafe { let ptr = bytes.as_ptr().add(size_of::()) as *const ffi::Op; std::slice::from_raw_parts(ptr, count) @@ -78,12 +80,13 @@ impl<'a> OpsArena<'a> { pub fn decode(self, ctx: &ArenaContext) -> Result> { self.ops .iter() - .map(|x| ops::Op::decode(x, ctx)) + .map(|x| ops::Op::ffi_from(x, ctx)) .collect::>>() .wrap_err("Failed to decode arena ops") } } +/// This struct represents a single `data/*.dat` arena allocator file emitted by libprobe. pub struct DataArena { header: ArenaHeader, raw: Vec, @@ -126,6 +129,7 @@ impl DataArena { } } +/// this struct represents a `/data` directory from libprobe. pub struct ArenaContext(pub Vec); impl ArenaContext { @@ -139,14 +143,15 @@ impl ArenaContext { } } -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ArenaTree(HashMap>>); - +/// Parse the front of a raw byte buffer into a libprobe arena header +/// /// # Safety: -/// invoking this function on any byte buffer that is not a valid libprobe -/// arena is undefined behavior. +/// Invoking this function on any byte buffer smaller than [`std::mem::size_of()`] +/// bytes is undefined behavior (best case a segfault). Invoking this method on a byte buffer +/// that's not a valid libprobe arena will produce garbage values that should not be used. unsafe fn get_header_unchecked(bytes: &[u8]) -> ArenaHeader { let ptr = bytes as *const [u8] as *const ArenaHeader; + log::debug!("[unsafe] converting byte buffer into ArenaHeader"); unsafe { ArenaHeader { instantiation: (*ptr).instantiation, @@ -157,6 +162,9 @@ unsafe fn get_header_unchecked(bytes: &[u8]) -> ArenaHeader { } } +/// Gets the filename from a path and returns it parsed as an integer. +/// +/// errors if the path has no filename or the filename can't be parsed as an integer. fn filename_numeric>(dir: P) -> Result { let filename = dir .as_ref() @@ -173,6 +181,21 @@ fn filename_numeric>(dir: P) -> Result { )) } +/// Recursively parse a TID libprobe arena allocator directory from `in_dir` and write it in +/// serialized format to `out_dir`. +/// +/// This function parses a TID directory in 6 steps: +/// +/// 1. Output file is created. +/// 2. Paths of sub-directory are parsed into a [`HashMap`]. +/// 3. `data` directory is is read and parsed into [`DataArena`]s which are then parsed into an +/// [`ArenaContext`]. +/// 4. `ops` directory is read and parsed into [`OpsArena`]s. +/// 5. [`OpsArena`]s are parsed into which are then parsed into [`ops::Op`]s using the +/// [`ArenaContext`]. +/// 6. [`ops::Op`]s are serialized into json and written line-by-line into the output directory. +/// +/// (steps 5 & 6 are done with iterators to reduce unnecessary memory allocations) fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Result<()> { fn try_files_from_arena_dir>(dir: P) -> Result> { match fs::read_dir(&dir) { @@ -186,6 +209,7 @@ fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Resul } } + // STEP 1 let tid = filename_numeric(&in_dir)?; let mut outfile = { let mut path = out_dir.as_ref().to_owned(); @@ -193,6 +217,7 @@ fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Resul File::create_new(path).wrap_err("Failed to create TID output file")? }; + // STEP 2 let paths = fs::read_dir(&in_dir) .wrap_err(format!( "Error reading directory '{}'", @@ -207,26 +232,31 @@ fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Resul }) .collect::>(); - let data = try_files_from_arena_dir( - paths - .get(OsStr::new("data")) - .wrap_err("Missing data directory from TID directory")? - .path(), - )? - .into_iter() - .map(|x| { - DataArena::from_bytes(std::fs::read(x).wrap_err("Failed to read file from data directory")?) - }) - .collect::, _>>()?; - - let ctx = ArenaContext(data); + // STEP 3 + let ctx = ArenaContext( + try_files_from_arena_dir( + paths + .get(OsStr::new("data")) + .wrap_err("Missing data directory from TID directory")? + .path(), + )? + .into_iter() + .map(|x| { + DataArena::from_bytes( + std::fs::read(x).wrap_err("Failed to read file from data directory")?, + ) + }) + .collect::, _>>()?, + ); + // STEP 4 try_files_from_arena_dir( paths .get(OsStr::new("ops")) .wrap_err("Missing ops directory from TID directory")? .path(), )? + // STEP 5 .into_iter() .map(|x| { std::fs::read(x) @@ -238,6 +268,7 @@ fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Resul .wrap_err("Error decoding OpsArena") }) }) + // STEP 6 .try_for_each(|x| { for op in x? { outfile @@ -258,6 +289,10 @@ fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Resul Ok(()) } +/// Recursively parse a ExecEpoch libprobe arena allocator directory from `in_dir` and write it in +/// serialized format to `out_dir`. +/// +/// This function calls [`parse_tid()`] on each sub-directory in `in_dir`. fn parse_exec_epoch, P2: AsRef>(in_dir: P1, out_dir: P2) -> Result<()> { let epoch = filename_numeric(&in_dir)?; @@ -283,6 +318,10 @@ fn parse_exec_epoch, P2: AsRef>(in_dir: P1, out_dir: P2) - Ok(()) } +/// Recursively parse a PID libprobe arena allocator directory from `in_dir` and write it in +/// serialized format to `out_dir`. +/// +/// This function calls [`parse_exec_epoch()`] on each sub-directory in `in_dir`. fn parse_pid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Result<()> { let pid = filename_numeric(&in_dir)?; @@ -308,6 +347,10 @@ fn parse_pid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Resul Ok(()) } +/// Recursively parse a top-level libprobe arena allocator directory from `in_dir` and write it in +/// serialized format to `out_dir`. +/// +/// This function calls [`parse_pid()`] on each sub-directory in `in_dir` **in parallel**. pub fn parse_arena_dir, P2: AsRef + Sync>( in_dir: P1, out_dir: P2, diff --git a/probe_src/probe_frontend/src/display.rs b/probe_src/probe_frontend/src/display.rs index 8471fea9..907797f0 100644 --- a/probe_src/probe_frontend/src/display.rs +++ b/probe_src/probe_frontend/src/display.rs @@ -23,7 +23,7 @@ impl Display for ops::timeval { impl Display for ops::statx { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( + write!(f, "[ mask={}, blksize={}, attributes={}, nlink={}, uid={}, gid={}, \ mode={:#06o} ino={}, size={}, blocks={}, attributes_mask={}, \ atime={}, btime={}, ctime={}, mtime={}, rdev_major={}, \ @@ -51,13 +51,13 @@ impl Display for ops::statx { self.stx_mnt_id, self.stx_dio_mem_align, self.stx_dio_offset_align, - )) + ) } } impl Display for ops::rusage { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( + write!(f, "[ utime={}, stime={}, maxrss={}, ixrss={}, idrss={}, isrss={}, \ minflt={}, majflt={}, nswap={}, inblock={}, oublock={}, msgsnd={}, \ msgrcv={}, nsignals={}, nvcsw={}, nivcsw={} ]", @@ -77,13 +77,13 @@ impl Display for ops::rusage { self.ru_nsignals, self.ru_nvcsw, self.ru_nivcsw, - )) + ) } } impl Display for ops::Path { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( + write!(f, "[ dirfd_minus_at_fdcwd={}, path='{}', device_major={}, \ device_minor={}, inode={}, mtime={}, ctime={}, stat_valid={}, \ dirfd_valid={} ]", @@ -96,13 +96,13 @@ impl Display for ops::Path { self.ctime, self.stat_valid, self.dirfd_valid, - )) + ) } } impl Display for ops::CloneOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( + write!(f, "[ flags={}, run_pthread_atfork_handlers={}, child_process_id={}, \ child_thread_id={}, ferrno={} ]", self.flags, @@ -110,168 +110,168 @@ impl Display for ops::CloneOp { self.child_process_id, self.child_thread_id, self.ferrno, - )) + ) } } impl Display for ops::CloseOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( + write!(f, "[ low_fd={}, high_fd={}, ferrno={} ]", self.low_fd, self.high_fd, self.ferrno, - )) + ) } } impl Display for ops::ExitOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( + write!(f, "[ satus={}, run_atexit_handlers={} ]", self.status, self.run_atexit_handlers, - )) + ) } } impl Display for ops::GetRUsageOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( + write!(f, "[ waitpid_arg={}, getrusage_arg={}, usage={}, ferrno={} ]", self.waitpid_arg, self.getrusage_arg, self.usage, self.ferrno, - )) + ) } } impl Display for ops::InitProcessOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!("[ pid={} ]", self.pid)) + write!(f,"[ pid={} ]", self.pid) } } impl Display for ops::InitThreadOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!("[ tid={} ]", self.tid)) + write!(f,"[ tid={} ]", self.tid) } } impl Display for ops::WaitOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( + write!(f, "[ pid={}, options={}, status={}, ret={}, ferrno={} ]", self.pid, self.options, self.status, self.ret, self.ferrno, - )) + ) } } impl Display for ops::InitExecEpochOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( + write!(f, "[ epoch={}, program_name={} ]", self.epoch, self.program_name.to_string_lossy(), - )) + ) } } impl Display for ops::OpenOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( + write!(f, "[ path={}, flags={}, mode={:#06o} fd={}, ferrno={} ]", self.path, self.flags, self.mode, self.fd, self.ferrno, - )) + ) } } impl Display for ops::ChdirOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( + write!(f, "[ path={}, ferrno={} ]", self.path, self.ferrno, - )) + ) } } impl Display for ops::ExecOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( + write!(f, "[ path={}, ferrno={} ]", self.path, self.ferrno, - )) + ) } } impl Display for ops::AccessOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( + write!(f, "[ path={}, mode={:#06o}, flags={}, ferrno={} ]", self.path, self.mode, self.flags, self.ferrno, - )) + ) } } impl Display for ops::StatOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( + write!(f, "[ path={}, flags={}, statx_buf={}, ferrno={} ]", self.path, self.flags, self.statx_buf, self.ferrno, - )) + ) } } impl Display for ops::ReaddirOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( + write!(f, "[ dir={}, child='{}', all_children={}, ferrno={} ]", self.dir, self.child.to_string_lossy(), self.all_children, self.ferrno, - )) + ) } } impl Display for ops::Metadata { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - ops::Metadata::Mode(mode) => f.write_fmt(format_args!("Mode[ mode={:#06o} ]", mode)), + ops::Metadata::Mode(mode) => write!(f,"Mode[ mode={:#06o} ]", mode), ops::Metadata::Ownership { uid, gid } => { - f.write_fmt(format_args!("Ownership[ uid={}, gid={} ]", uid, gid)) + write!(f,"Ownership[ uid={}, gid={} ]", uid, gid) } ops::Metadata::Times { is_null, atime, mtime, - } => f.write_fmt(format_args!( + } => write!(f, "Times[ is_null={}, atime={}, mtime={} ]", is_null, atime, mtime - )), + ), } } } impl Display for ops::UpdateMetadataOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( + write!(f, "[ path={}, flags={}, metadata={}, ferrno={} ]", self.path, self.flags, self.metadata, self.ferrno, - )) + ) } } impl Display for ops::ReadLinkOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( + write!(f, "[ path={}, resolved='{}', ferrno={} ]", self.path, self.resolved.to_string_lossy(), self.ferrno - )) + ) } } impl Display for ops::OpInternal { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn wfmt(f: &mut std::fmt::Formatter<'_>, x: &str, y: impl Display) -> std::fmt::Result { - f.write_fmt(format_args!("{}{}", x, y)) + write!(f,"{}{}", x, y) } match self { diff --git a/probe_src/probe_frontend/src/ffi.rs b/probe_src/probe_frontend/src/ffi.rs index f1413947..3a96f599 100644 --- a/probe_src/probe_frontend/src/ffi.rs +++ b/probe_src/probe_frontend/src/ffi.rs @@ -2,8 +2,6 @@ #![allow(non_camel_case_types)] #![allow(non_snake_case)] -/// raw ffi bindings for the raw C-structs emitted by libprobe, generated automatically with -/// rust-bindgen use serde::{Deserialize, Serialize}; include!(concat!(env!("OUT_DIR"), "/bindings.rs")); diff --git a/probe_src/probe_frontend/src/main.rs b/probe_src/probe_frontend/src/main.rs index efe326d1..5655ec42 100644 --- a/probe_src/probe_frontend/src/main.rs +++ b/probe_src/probe_frontend/src/main.rs @@ -9,12 +9,47 @@ use clap::Parser; use color_eyre::eyre::{eyre, Context, Report, Result}; use flate2::Compression; -mod arena; -mod display; +/// Raw ffi bindings for the raw C-structs emitted by libprobe, generated automatically with +/// rust-bindgen. +/// +/// If you're trying to make sense of this it's going to be much easier if you have `prov_ops.h` +/// open as well. mod ffi; -mod metadata; + +/// Rust versions of Arena structs from [`ffi`]. +/// +/// While simple Ops containing only Integral values can be used directly from [`ffi`], more +/// complicated structs with paths or other strings need to be manually converted to more rusty +/// versions so they can be serialized. This module re-exports the trivial Ops and defines new ones +/// (as well as methods for converting) for the non-trivial structs. mod ops; +/// [`std::fmt::Display`] trait implementations for [`ops::Op`] and all the Op variants and other +/// structs. +/// +/// This is used by the `dump` command to print out the Ops in as close as possible to a +/// human-readable format, I hate to say this but for specific questions its probably better to +/// just look at the source code. +mod display; + +/// Parsing of arena directories created by libprobe into a cross-platform +/// serialized format. +/// +/// # Serialization format +/// +/// The serialization format output is very similar to the raw libprobe arena format. It's a +/// filesystem hierarchy of `//` but instead of `` being a directory containing +/// `ops` and `data` directories with the raw C-struct arenas, `` is a +/// [jsonlines](https://jsonlines.org/) file, where each line is a json representation of an +/// [`ops::Op`]. +mod arena; + +/// System metadata recorded into probe logs. +mod metadata; + + + +/// Generate or manipulate Provenance for Replay OBservation Engine (PROBE) logs. #[derive(clap::Parser, Debug, Clone)] #[command(author, version, about, long_about = None)] #[command(propagate_version = true)] @@ -60,6 +95,7 @@ enum Command { }, } +// TODO: break out each sub-command as a separate function fn main() -> Result<()> { color_eyre::install()?; env_logger::Builder::from_env(env_logger::Env::new().filter_or("__PROBE_LOG", "warn")).init(); @@ -74,10 +110,22 @@ fn main() -> Result<()> { debug, cmd, } => { - if PathBuf::from(output.clone()).exists() && overwrite { - fs::remove_file(&output).wrap_err("Error deleting old output file")?; + // if -f is set, we should clear-out the old probe_log + if overwrite { + match fs::remove_file(&output) { + Ok(_) => (), + Err(e) => match e.kind() { + std::io::ErrorKind::NotFound => (), + _ => return Err(e).wrap_err("Error deleting old output file"), + }, + }; } + // the path to the libprobe.so directory is searched for as follows: + // - --lib-path argument if set + // - __PROBE_LIB env var if set + // - /usr/share/probe + // - error let mut ld_preload = fs::canonicalize(match lib_path { Some(x) => x, None => match std::env::var_os("__PROBE_LIB") { @@ -101,7 +149,8 @@ fn main() -> Result<()> { } else { ld_preload.push("libprobe.so"); } - + + // append any exiting LD_PRELOAD overrides if let Some(x) = std::env::var_os("LD_PRELOAD") { ld_preload.push(":"); ld_preload.push(&x); @@ -217,7 +266,7 @@ fn main() -> Result<()> { .to_owned(); - if path == "0_metadata" { + if path == "_metadata" { return Ok(()); } diff --git a/probe_src/probe_frontend/src/metadata.rs b/probe_src/probe_frontend/src/metadata.rs index 0ea74415..60dc20d7 100644 --- a/probe_src/probe_frontend/src/metadata.rs +++ b/probe_src/probe_frontend/src/metadata.rs @@ -4,6 +4,7 @@ use serde::{Deserialize, Serialize}; #[derive(Debug, Serialize, Deserialize)] pub struct Metadata { entry_pid: libc::pid_t, + arch: &'static str, system: SystemInfo, } @@ -11,6 +12,7 @@ impl Metadata { pub fn new(pid: libc::pid_t) -> Self { Self { entry_pid: pid, + arch: std::env::consts::ARCH, system: Machine::new().system_info(), } } diff --git a/probe_src/probe_frontend/src/ops.rs b/probe_src/probe_frontend/src/ops.rs index 8e43e2d2..d6b9a2ed 100644 --- a/probe_src/probe_frontend/src/ops.rs +++ b/probe_src/probe_frontend/src/ops.rs @@ -16,27 +16,38 @@ use std::{ use crate::{arena::ArenaContext, ffi}; -pub(crate) trait DecodeFfi { - fn decode(value: &T, ctx: &ArenaContext) -> Result +/// Specialized version of [`std::convert::From`] for working with libprobe arena structs. +/// +/// Since [`ffi`] structs from arena allocator files have intrinsically invalid pointers (because +/// they came from a different virtual memory space). This trait and It's sibling [`FfiInto`] +/// exist to act as [`From`] and [`Into`] with an added parameter of a [`ArenaContext`] that can be +/// used to decode pointers. +pub(crate) trait FfiFrom { + fn ffi_from(value: &T, ctx: &ArenaContext) -> Result where Self: Sized; } -pub(crate) trait ConvertFfi { - fn convert(&self, ctx: &ArenaContext) -> Result; +/// Specialized version of [`std::convert::Into`] for working with libprobe arena structs. +/// +/// Much like [`std::convert::Into`] this trait is implemented automatically with a blanket +/// implementation as the reciprocal of [`FfiFrom`]. +pub(crate) trait FfiInto { + fn ffi_into(&self, ctx: &ArenaContext) -> Result; } -impl ConvertFfi for T +impl FfiInto for T where - U: DecodeFfi, + U: FfiFrom, { #[inline] - fn convert(&self, ctx: &ArenaContext) -> Result { - U::decode(self, ctx) + fn ffi_into(&self, ctx: &ArenaContext) -> Result { + U::ffi_from(self, ctx) } } fn try_to_osstring(str: *const i8, ctx: &ArenaContext) -> Result { + log::debug!("[unsafe] Parsing arena pointer: {:#x}", str as usize); Ok(if str.is_null() { OsString::new() } else { @@ -63,8 +74,8 @@ pub struct Path { pub dirfd_valid: bool, } -impl DecodeFfi for Path { - fn decode(value: &ffi::Path, ctx: &ArenaContext) -> Result { +impl FfiFrom for Path { + fn ffi_from(value: &ffi::Path, ctx: &ArenaContext) -> Result { Ok(Self { dirfd_minus_at_fdcwd: value.dirfd_minus_at_fdcwd, path: try_to_osstring(value.path, ctx) @@ -86,8 +97,8 @@ pub struct InitExecEpochOp { pub program_name: OsString, } -impl DecodeFfi for InitExecEpochOp { - fn decode(value: &ffi::InitExecEpochOp, ctx: &ArenaContext) -> Result { +impl FfiFrom for InitExecEpochOp { + fn ffi_from(value: &ffi::InitExecEpochOp, ctx: &ArenaContext) -> Result { Ok(Self { epoch: value.epoch, program_name: try_to_osstring(value.program_name, ctx) @@ -105,10 +116,10 @@ pub struct OpenOp { pub ferrno: c_int, } -impl DecodeFfi for OpenOp { - fn decode(value: &ffi::OpenOp, ctx: &ArenaContext) -> Result { +impl FfiFrom for OpenOp { + fn ffi_from(value: &ffi::OpenOp, ctx: &ArenaContext) -> Result { Ok(Self { - path: value.path.convert(ctx)?, + path: value.path.ffi_into(ctx)?, flags: value.flags, mode: value.mode, fd: value.fd, @@ -123,10 +134,10 @@ pub struct ChdirOp { pub ferrno: c_int, } -impl DecodeFfi for ChdirOp { - fn decode(value: &ffi::ChdirOp, ctx: &ArenaContext) -> Result { +impl FfiFrom for ChdirOp { + fn ffi_from(value: &ffi::ChdirOp, ctx: &ArenaContext) -> Result { Ok(Self { - path: value.path.convert(ctx)?, + path: value.path.ffi_into(ctx)?, ferrno: value.ferrno, }) } @@ -138,10 +149,10 @@ pub struct ExecOp { pub ferrno: c_int, } -impl DecodeFfi for ExecOp { - fn decode(value: &ffi::ExecOp, ctx: &ArenaContext) -> Result { +impl FfiFrom for ExecOp { + fn ffi_from(value: &ffi::ExecOp, ctx: &ArenaContext) -> Result { Ok(Self { - path: value.path.convert(ctx)?, + path: value.path.ffi_into(ctx)?, ferrno: value.ferrno, }) } @@ -155,10 +166,10 @@ pub struct AccessOp { pub ferrno: c_int, } -impl DecodeFfi for AccessOp { - fn decode(value: &ffi::AccessOp, ctx: &ArenaContext) -> Result { +impl FfiFrom for AccessOp { + fn ffi_from(value: &ffi::AccessOp, ctx: &ArenaContext) -> Result { Ok(Self { - path: value.path.convert(ctx)?, + path: value.path.ffi_into(ctx)?, mode: value.mode, flags: value.flags, ferrno: value.ferrno, @@ -174,10 +185,10 @@ pub struct StatOp { pub ferrno: c_int, } -impl DecodeFfi for StatOp { - fn decode(value: &ffi::StatOp, ctx: &ArenaContext) -> Result { +impl FfiFrom for StatOp { + fn ffi_from(value: &ffi::StatOp, ctx: &ArenaContext) -> Result { Ok(Self { - path: value.path.convert(ctx)?, + path: value.path.ffi_into(ctx)?, flags: value.flags, statx_buf: value.statx_buf, ferrno: value.ferrno, @@ -193,10 +204,10 @@ pub struct ReaddirOp { pub ferrno: c_int, } -impl DecodeFfi for ReaddirOp { - fn decode(value: &ffi::ReaddirOp, ctx: &ArenaContext) -> Result { +impl FfiFrom for ReaddirOp { + fn ffi_from(value: &ffi::ReaddirOp, ctx: &ArenaContext) -> Result { Ok(Self { - dir: value.dir.convert(ctx)?, + dir: value.dir.ffi_into(ctx)?, child: try_to_osstring(value.child, ctx) .wrap_err("Unable to decode child char* into string")?, all_children: value.all_children, @@ -228,16 +239,17 @@ impl Metadata { kind: ffi::MetadataKind, value: ffi::MetadataValue, ) -> Result { + log::debug!("[unsafe] decoding Metadata tagged union"); Ok(match kind { - ffi::MetadataKind_MetadataMode => Metadata::Mode(value.mode), + ffi::MetadataKind_MetadataMode => Metadata::Mode(unsafe { value.mode }), ffi::MetadataKind_MetadataOwnership => Metadata::Ownership { - uid: value.ownership.uid, - gid: value.ownership.gid, + uid: unsafe { value.ownership }.uid, + gid: unsafe { value.ownership }.gid, }, ffi::MetadataKind_MetadataTimes => Metadata::Times { - is_null: value.times.is_null, - atime: value.times.atime, - mtime: value.times.mtime, + is_null: unsafe { value.times }.is_null, + atime: unsafe { value.times }.atime, + mtime: unsafe { value.times }.mtime, }, _ => return Err(eyre!("Invalid MetadataKind Variant")), }) @@ -252,10 +264,10 @@ pub struct UpdateMetadataOp { pub ferrno: c_int, } -impl DecodeFfi for UpdateMetadataOp { - fn decode(value: &ffi::UpdateMetadataOp, ctx: &ArenaContext) -> Result { +impl FfiFrom for UpdateMetadataOp { + fn ffi_from(value: &ffi::UpdateMetadataOp, ctx: &ArenaContext) -> Result { Ok(Self { - path: value.path.convert(ctx)?, + path: value.path.ffi_into(ctx)?, flags: value.flags, metadata: unsafe { Metadata::from_kind_and_value(value.kind, value.value) } .wrap_err("Unable to decode Metadata tagged union")?, @@ -271,10 +283,10 @@ pub struct ReadLinkOp { pub ferrno: c_int, } -impl DecodeFfi for ReadLinkOp { - fn decode(value: &ffi::ReadLinkOp, ctx: &ArenaContext) -> Result { +impl FfiFrom for ReadLinkOp { + fn ffi_from(value: &ffi::ReadLinkOp, ctx: &ArenaContext) -> Result { Ok(Self { - path: value.path.convert(ctx)?, + path: value.path.ffi_into(ctx)?, resolved: try_to_osstring(value.resolved, ctx) .wrap_err("Unable to decode symlink resolve char* to string")?, ferrno: value.ferrno, @@ -312,66 +324,60 @@ impl OpInternal { value: &ffi::Op__bindgen_ty_1, ctx: &ArenaContext, ) -> Result { + log::debug!("[unsafe] decoding Op tagged union"); Ok(match kind { - ffi::OpCode_init_process_op_code => Self::InitProcess(value.init_process_epoch), + ffi::OpCode_init_process_op_code => { + Self::InitProcess(unsafe { value.init_process_epoch }) + } ffi::OpCode_init_exec_epoch_op_code => Self::InitExecEpoch( - value - .init_exec_epoch - .convert(ctx) + unsafe { value.init_exec_epoch } + .ffi_into(ctx) .wrap_err("Unable to decode InitExecEpochOp")?, ), - ffi::OpCode_init_thread_op_code => Self::InitThread(value.init_thread), + ffi::OpCode_init_thread_op_code => Self::InitThread(unsafe { value.init_thread }), ffi::OpCode_open_op_code => Self::Open( - value - .open - .convert(ctx) + unsafe { value.open } + .ffi_into(ctx) .wrap_err("Unable to decode OpenOp")?, ), - ffi::OpCode_close_op_code => Self::Close(value.close), + ffi::OpCode_close_op_code => Self::Close(unsafe { value.close }), ffi::OpCode_chdir_op_code => Self::Chdir( - value - .chdir - .convert(ctx) + unsafe { value.chdir } + .ffi_into(ctx) .wrap_err("Unable to decode ChdirOp")?, ), ffi::OpCode_exec_op_code => Self::Exec( - value - .exec - .convert(ctx) + unsafe { value.exec } + .ffi_into(ctx) .wrap_err("Unable to decode ExecOp")?, ), - ffi::OpCode_clone_op_code => Self::Clone(value.clone), - ffi::OpCode_exit_op_code => Self::Exit(value.exit), + ffi::OpCode_clone_op_code => Self::Clone(unsafe { value.clone }), + ffi::OpCode_exit_op_code => Self::Exit(unsafe { value.exit }), ffi::OpCode_access_op_code => Self::Access( - value - .access - .convert(ctx) + unsafe { value.access } + .ffi_into(ctx) .wrap_err("Unable to decode AccessOp")?, ), ffi::OpCode_stat_op_code => Self::Stat( - value - .stat - .convert(ctx) + unsafe { value.stat } + .ffi_into(ctx) .wrap_err("Unable to decode StatOp")?, ), ffi::OpCode_readdir_op_code => Self::Readdir( - value - .readdir - .convert(ctx) + unsafe { value.readdir } + .ffi_into(ctx) .wrap_err("Unable to decode ReaddirOp")?, ), - ffi::OpCode_wait_op_code => Self::Wait(value.wait), - ffi::OpCode_getrusage_op_code => Self::GetRUsage(value.getrusage), + ffi::OpCode_wait_op_code => Self::Wait(unsafe { value.wait }), + ffi::OpCode_getrusage_op_code => Self::GetRUsage(unsafe { value.getrusage }), ffi::OpCode_update_metadata_op_code => Self::UpdateMetadata( - value - .update_metadata - .convert(ctx) + unsafe { value.update_metadata } + .ffi_into(ctx) .wrap_err("Unable to decode UpdateMetadataOp")?, ), ffi::OpCode_read_link_op_code => Self::ReadLink( - value - .read_link - .convert(ctx) + unsafe { value.read_link } + .ffi_into(ctx) .wrap_err("Unable to decode ReadlinkOp")?, ), _ => { @@ -393,8 +399,8 @@ pub struct Op { pub time: timespec, } -impl DecodeFfi for Op { - fn decode(value: &ffi::Op, ctx: &ArenaContext) -> Result { +impl FfiFrom for Op { + fn ffi_from(value: &ffi::Op, ctx: &ArenaContext) -> Result { Ok(Self { data: unsafe { OpInternal::from_kind_and_value(value.op_code, &value.data, ctx) } .wrap_err("Unable to decode Op tagged union")?, From e1b085530f7ca6a4f1f554867a15da97a639ccc2 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Tue, 25 Jun 2024 14:47:36 -0500 Subject: [PATCH 05/37] Apply suggestions from code review Co-authored-by: Sam Grayson --- probe_src/probe_frontend/src/arena.rs | 2 +- probe_src/probe_frontend/src/ffi.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/probe_src/probe_frontend/src/arena.rs b/probe_src/probe_frontend/src/arena.rs index f9577451..f8c6edc0 100644 --- a/probe_src/probe_frontend/src/arena.rs +++ b/probe_src/probe_frontend/src/arena.rs @@ -68,7 +68,7 @@ impl<'a> OpsArena<'a> { let count = (header.used - size_of::()) / size_of::(); - log::debug!("[unsafe] converting Vec to &[ffi::Op]"); + log::debug!("[unsafe] converting Vec to &[ffi::Op] of size {}", count); let ops = unsafe { let ptr = bytes.as_ptr().add(size_of::()) as *const ffi::Op; std::slice::from_raw_parts(ptr, count) diff --git a/probe_src/probe_frontend/src/ffi.rs b/probe_src/probe_frontend/src/ffi.rs index 3a96f599..3597c519 100644 --- a/probe_src/probe_frontend/src/ffi.rs +++ b/probe_src/probe_frontend/src/ffi.rs @@ -4,4 +4,5 @@ use serde::{Deserialize, Serialize}; +// Bindings are generated by `../build.sh` include!(concat!(env!("OUT_DIR"), "/bindings.rs")); From 1e3dc49b89556b629c5990642487d58e2d012166 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Tue, 25 Jun 2024 15:43:55 -0500 Subject: [PATCH 06/37] More concise Display impls Removes a lot of details that are usually abstracted by libraries when printing for humans. In contrast with some of the code review comments I left mtime and errno field because I feel those are useful for human readers; even if all a human is likley to do with errno is "was it zero?" --- probe_src/probe_frontend/src/display.rs | 141 +++++++++--------------- 1 file changed, 53 insertions(+), 88 deletions(-) diff --git a/probe_src/probe_frontend/src/display.rs b/probe_src/probe_frontend/src/display.rs index 907797f0..f6cf9c5e 100644 --- a/probe_src/probe_frontend/src/display.rs +++ b/probe_src/probe_frontend/src/display.rs @@ -23,90 +23,49 @@ impl Display for ops::timeval { impl Display for ops::statx { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, - "[ mask={}, blksize={}, attributes={}, nlink={}, uid={}, gid={}, \ - mode={:#06o} ino={}, size={}, blocks={}, attributes_mask={}, \ - atime={}, btime={}, ctime={}, mtime={}, rdev_major={}, \ - rdev_minor={}, dev_major={}, dev_minor={}, mnt_id={}, \ - dio_mem_align={}, dio_offset_align={} ]", - self.stx_mask, - self.stx_blksize, - self.stx_attributes, - self.stx_nlink, + write!( + f, + "[ uid={}, gid={}, mode={:#06o} ino={}, size={}, mtime={} ]", self.stx_uid, self.stx_gid, self.stx_mode, self.stx_ino, self.stx_size, - self.stx_blocks, - self.stx_attributes_mask, - self.stx_atime, - self.stx_btime, - self.stx_ctime, self.stx_mtime, - self.stx_rdev_major, - self.stx_rdev_minor, - self.stx_dev_major, - self.stx_dev_minor, - self.stx_mnt_id, - self.stx_dio_mem_align, - self.stx_dio_offset_align, ) } } impl Display for ops::rusage { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, - "[ utime={}, stime={}, maxrss={}, ixrss={}, idrss={}, isrss={}, \ - minflt={}, majflt={}, nswap={}, inblock={}, oublock={}, msgsnd={}, \ - msgrcv={}, nsignals={}, nvcsw={}, nivcsw={} ]", + write!( + f, + "[ utime={}, stime={}, maxrss={} ]", self.ru_utime, self.ru_stime, self.ru_maxrss, - self.ru_ixrss, - self.ru_idrss, - self.ru_isrss, - self.ru_minflt, - self.ru_majflt, - self.ru_nswap, - self.ru_inblock, - self.ru_oublock, - self.ru_msgsnd, - self.ru_msgrcv, - self.ru_nsignals, - self.ru_nvcsw, - self.ru_nivcsw, ) } } impl Display for ops::Path { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, - "[ dirfd_minus_at_fdcwd={}, path='{}', device_major={}, \ - device_minor={}, inode={}, mtime={}, ctime={}, stat_valid={}, \ - dirfd_valid={} ]", - self.dirfd_minus_at_fdcwd, + write!( + f, + "[ dirfd={}, path='{}', inode={}, mtime={} ]", + self.dirfd_minus_at_fdcwd + libc::AT_FDCWD, self.path.to_string_lossy(), - self.device_major, - self.device_minor, self.inode, self.mtime, - self.ctime, - self.stat_valid, - self.dirfd_valid, ) } } impl Display for ops::CloneOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, - "[ flags={}, run_pthread_atfork_handlers={}, child_process_id={}, \ - child_thread_id={}, ferrno={} ]", - self.flags, - self.run_pthread_atfork_handlers, + write!( + f, + "[ child_process_id={}, child_thread_id={}, errno={} ]", self.child_process_id, self.child_thread_id, self.ferrno, @@ -116,8 +75,9 @@ impl Display for ops::CloneOp { impl Display for ops::CloseOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, - "[ low_fd={}, high_fd={}, ferrno={} ]", + write!( + f, + "[ low_fd={}, high_fd={}, errno={} ]", self.low_fd, self.high_fd, self.ferrno, ) } @@ -125,7 +85,8 @@ impl Display for ops::CloseOp { impl Display for ops::ExitOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, + write!( + f, "[ satus={}, run_atexit_handlers={} ]", self.status, self.run_atexit_handlers, ) @@ -134,8 +95,9 @@ impl Display for ops::ExitOp { impl Display for ops::GetRUsageOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, - "[ waitpid_arg={}, getrusage_arg={}, usage={}, ferrno={} ]", + write!( + f, + "[ waitpid_arg={}, getrusage_arg={}, usage={}, errno={} ]", self.waitpid_arg, self.getrusage_arg, self.usage, self.ferrno, ) } @@ -143,20 +105,21 @@ impl Display for ops::GetRUsageOp { impl Display for ops::InitProcessOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f,"[ pid={} ]", self.pid) + write!(f, "[ pid={} ]", self.pid) } } impl Display for ops::InitThreadOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f,"[ tid={} ]", self.tid) + write!(f, "[ tid={} ]", self.tid) } } impl Display for ops::WaitOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, - "[ pid={}, options={}, status={}, ret={}, ferrno={} ]", + write!( + f, + "[ pid={}, options={}, status={}, ret={}, errno={} ]", self.pid, self.options, self.status, self.ret, self.ferrno, ) } @@ -164,7 +127,8 @@ impl Display for ops::WaitOp { impl Display for ops::InitExecEpochOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, + write!( + f, "[ epoch={}, program_name={} ]", self.epoch, self.program_name.to_string_lossy(), @@ -174,8 +138,9 @@ impl Display for ops::InitExecEpochOp { impl Display for ops::OpenOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, - "[ path={}, flags={}, mode={:#06o} fd={}, ferrno={} ]", + write!( + f, + "[ path={}, flags={}, mode={:#06o} fd={}, errno={} ]", self.path, self.flags, self.mode, self.fd, self.ferrno, ) } @@ -183,26 +148,21 @@ impl Display for ops::OpenOp { impl Display for ops::ChdirOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, - "[ path={}, ferrno={} ]", - self.path, self.ferrno, - ) + write!(f, "[ path={}, errno={} ]", self.path, self.ferrno,) } } impl Display for ops::ExecOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, - "[ path={}, ferrno={} ]", - self.path, self.ferrno, - ) + write!(f, "[ path={}, errno={} ]", self.path, self.ferrno,) } } impl Display for ops::AccessOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, - "[ path={}, mode={:#06o}, flags={}, ferrno={} ]", + write!( + f, + "[ path={}, mode={:#06o}, flags={}, errno={} ]", self.path, self.mode, self.flags, self.ferrno, ) } @@ -210,8 +170,9 @@ impl Display for ops::AccessOp { impl Display for ops::StatOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, - "[ path={}, flags={}, statx_buf={}, ferrno={} ]", + write!( + f, + "[ path={}, flags={}, statx_buf={}, errno={} ]", self.path, self.flags, self.statx_buf, self.ferrno, ) } @@ -219,8 +180,9 @@ impl Display for ops::StatOp { impl Display for ops::ReaddirOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, - "[ dir={}, child='{}', all_children={}, ferrno={} ]", + write!( + f, + "[ dir={}, child='{}', all_children={}, errno={} ]", self.dir, self.child.to_string_lossy(), self.all_children, @@ -232,15 +194,16 @@ impl Display for ops::ReaddirOp { impl Display for ops::Metadata { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - ops::Metadata::Mode(mode) => write!(f,"Mode[ mode={:#06o} ]", mode), + ops::Metadata::Mode(mode) => write!(f, "Mode[ mode={:#06o} ]", mode), ops::Metadata::Ownership { uid, gid } => { - write!(f,"Ownership[ uid={}, gid={} ]", uid, gid) + write!(f, "Ownership[ uid={}, gid={} ]", uid, gid) } ops::Metadata::Times { is_null, atime, mtime, - } => write!(f, + } => write!( + f, "Times[ is_null={}, atime={}, mtime={} ]", is_null, atime, mtime ), @@ -250,8 +213,9 @@ impl Display for ops::Metadata { impl Display for ops::UpdateMetadataOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, - "[ path={}, flags={}, metadata={}, ferrno={} ]", + write!( + f, + "[ path={}, flags={}, metadata={}, errno={} ]", self.path, self.flags, self.metadata, self.ferrno, ) } @@ -259,8 +223,9 @@ impl Display for ops::UpdateMetadataOp { impl Display for ops::ReadLinkOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, - "[ path={}, resolved='{}', ferrno={} ]", + write!( + f, + "[ path={}, resolved='{}', errno={} ]", self.path, self.resolved.to_string_lossy(), self.ferrno @@ -271,7 +236,7 @@ impl Display for ops::ReadLinkOp { impl Display for ops::OpInternal { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn wfmt(f: &mut std::fmt::Formatter<'_>, x: &str, y: impl Display) -> std::fmt::Result { - write!(f,"{}{}", x, y) + write!(f, "{}{}", x, y) } match self { From 1f64189b8153d70eca13b055695d8d4c4cb55d03 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Tue, 25 Jun 2024 15:48:53 -0500 Subject: [PATCH 07/37] Refactor Ops to use CString instead of OsString --- probe_src/probe_frontend/src/arena.rs | 23 +++++++----- probe_src/probe_frontend/src/ops.rs | 50 +++++++++++++-------------- 2 files changed, 40 insertions(+), 33 deletions(-) diff --git a/probe_src/probe_frontend/src/arena.rs b/probe_src/probe_frontend/src/arena.rs index f8c6edc0..30b03ae1 100644 --- a/probe_src/probe_frontend/src/arena.rs +++ b/probe_src/probe_frontend/src/arena.rs @@ -68,7 +68,10 @@ impl<'a> OpsArena<'a> { let count = (header.used - size_of::()) / size_of::(); - log::debug!("[unsafe] converting Vec to &[ffi::Op] of size {}", count); + log::debug!( + "[unsafe] converting Vec to &[ffi::Op] of size {}", + count + ); let ops = unsafe { let ptr = bytes.as_ptr().add(size_of::()) as *const ffi::Op; std::slice::from_raw_parts(ptr, count) @@ -119,12 +122,16 @@ impl DataArena { Ok(Self { header, raw: bytes }) } - pub fn try_deref(&self, ptr: usize) -> Option<*const u8> { - match ptr >= self.header.base_address - && ptr <= (self.header.base_address + self.header.used) - { + pub fn try_get_slice<'a>(&'a self, ptr: usize) -> Option<&'a [u8]> { + let end = self.header.base_address + self.header.used; + match ptr >= self.header.base_address && ptr <= end { false => None, - true => Some(unsafe { self.raw.as_ptr().add(ptr - self.header.base_address) }), + true => Some(unsafe { + let new_ptr = self.raw.as_ptr().add(ptr - self.header.base_address); + let len = end - ptr; + + core::slice::from_raw_parts(new_ptr, len) + }), } } } @@ -133,9 +140,9 @@ impl DataArena { pub struct ArenaContext(pub Vec); impl ArenaContext { - pub fn try_deref(&self, ptr: usize) -> Option<*const u8> { + pub fn try_get_slice(&self, ptr: usize) -> Option<&[u8]> { for vec in self.0.iter() { - if let Some(x) = vec.try_deref(ptr) { + if let Some(x) = vec.try_get_slice(ptr) { return Some(x); } } diff --git a/probe_src/probe_frontend/src/ops.rs b/probe_src/probe_frontend/src/ops.rs index d6b9a2ed..99dec8ff 100644 --- a/probe_src/probe_frontend/src/ops.rs +++ b/probe_src/probe_frontend/src/ops.rs @@ -3,16 +3,11 @@ pub use crate::ffi::{ dev_t, gid_t, ino_t, mode_t, rusage, statx, statx_timestamp, timespec, timeval, uid_t, CloneOp, CloseOp, ExitOp, GetRUsageOp, InitProcessOp, InitThreadOp, WaitOp, }; -use color_eyre::eyre::{eyre, Context}; pub use std::ffi::{c_int, c_uint}; -use color_eyre::eyre::Result; +use color_eyre::eyre::{eyre, Context, Result}; use serde::{Deserialize, Serialize}; -use std::{ - ffi::{OsStr, OsString}, - os::unix::ffi::OsStrExt, - slice, -}; +use std::ffi::{CStr, CString}; use crate::{arena::ArenaContext, ffi}; @@ -46,25 +41,30 @@ where } } -fn try_to_osstring(str: *const i8, ctx: &ArenaContext) -> Result { - log::debug!("[unsafe] Parsing arena pointer: {:#x}", str as usize); - Ok(if str.is_null() { - OsString::new() +/// Try to convert an invalid pointer from and ffi libprobe struct into a string type. +/// +/// The strings emitted by libprobe are from C code, so they're pointers to an arbitrary sequence +/// of non-null bytes terminated by a null byte. This means we can't use the [`String`] type since +/// rust requires that all [`String`]s are valid UTF-8. +/// +/// Instead we use [`CString`] which is provided by the standard library for ffi code like this. +fn try_to_cstring(str: *const i8, ctx: &ArenaContext) -> Result { + if str.is_null() { + CString::new("").wrap_err("Failed to create empty CString") } else { - match ctx.try_deref(str as usize) { - Some(x) => { - OsStr::from_bytes(unsafe { slice::from_raw_parts(x, libc::strlen(x as *const i8)) }) - .to_os_string() - } + match ctx.try_get_slice(str as usize) { + Some(x) => Ok(CStr::from_bytes_until_nul(x) + .wrap_err("Failed to create CString")? + .to_owned()), None => return Err(eyre!("Unable to lookup pointer {0:#x}", (str as usize))), } - }) + } } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Path { pub dirfd_minus_at_fdcwd: i32, - pub path: OsString, + pub path: CString, pub device_major: dev_t, pub device_minor: dev_t, pub inode: ino_t, @@ -78,7 +78,7 @@ impl FfiFrom for Path { fn ffi_from(value: &ffi::Path, ctx: &ArenaContext) -> Result { Ok(Self { dirfd_minus_at_fdcwd: value.dirfd_minus_at_fdcwd, - path: try_to_osstring(value.path, ctx) + path: try_to_cstring(value.path, ctx) .wrap_err("Unable to decode char* into path string")?, device_major: value.device_major, device_minor: value.device_minor, @@ -94,14 +94,14 @@ impl FfiFrom for Path { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct InitExecEpochOp { pub epoch: c_uint, - pub program_name: OsString, + pub program_name: CString, } impl FfiFrom for InitExecEpochOp { fn ffi_from(value: &ffi::InitExecEpochOp, ctx: &ArenaContext) -> Result { Ok(Self { epoch: value.epoch, - program_name: try_to_osstring(value.program_name, ctx) + program_name: try_to_cstring(value.program_name, ctx) .wrap_err("Unable to decode program name char* into string")?, }) } @@ -199,7 +199,7 @@ impl FfiFrom for StatOp { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ReaddirOp { pub dir: Path, - pub child: OsString, + pub child: CString, pub all_children: bool, pub ferrno: c_int, } @@ -208,7 +208,7 @@ impl FfiFrom for ReaddirOp { fn ffi_from(value: &ffi::ReaddirOp, ctx: &ArenaContext) -> Result { Ok(Self { dir: value.dir.ffi_into(ctx)?, - child: try_to_osstring(value.child, ctx) + child: try_to_cstring(value.child, ctx) .wrap_err("Unable to decode child char* into string")?, all_children: value.all_children, ferrno: value.ferrno, @@ -279,7 +279,7 @@ impl FfiFrom for UpdateMetadataOp { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ReadLinkOp { pub path: Path, - pub resolved: OsString, + pub resolved: CString, pub ferrno: c_int, } @@ -287,7 +287,7 @@ impl FfiFrom for ReadLinkOp { fn ffi_from(value: &ffi::ReadLinkOp, ctx: &ArenaContext) -> Result { Ok(Self { path: value.path.ffi_into(ctx)?, - resolved: try_to_osstring(value.resolved, ctx) + resolved: try_to_cstring(value.resolved, ctx) .wrap_err("Unable to decode symlink resolve char* to string")?, ferrno: value.ferrno, }) From b9ebba313fc063d4ccd1e6f717fce9667e10546c Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Tue, 25 Jun 2024 16:22:20 -0500 Subject: [PATCH 08/37] Added additional cargo devtools to probe_frontend devShell --- probe_src/probe_frontend/flake.nix | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/probe_src/probe_frontend/flake.nix b/probe_src/probe_frontend/flake.nix index 45a22d2b..01088355 100644 --- a/probe_src/probe_frontend/flake.nix +++ b/probe_src/probe_frontend/flake.nix @@ -35,8 +35,10 @@ devShells.default = craneLib.devShell { checks = self.checks.${system}; packages = with pkgs; [ - rust-analyzer cargo-audit + cargo-flamegraph + cargo-watch + rust-analyzer ]; }; }); From 41f964c030b8e56ae0d71aab29db48ce91b378fe Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Tue, 25 Jun 2024 16:26:52 -0500 Subject: [PATCH 09/37] Major refactor This commit addresses serveral points raised durring the last code-review (and a few that weren't): - Restructured arena.rs to read top-down. - Refactored child process code to use std::process instead of subprocess crate. - Cleaned up arena pointer resolition to use less unsafe code. - [INFO] Better log messages. --- probe_src/probe_frontend/Cargo.lock | 11 - probe_src/probe_frontend/Cargo.toml | 1 - probe_src/probe_frontend/src/arena.rs | 497 ++++++++++++++------------ probe_src/probe_frontend/src/main.rs | 110 +++--- probe_src/probe_frontend/src/ops.rs | 2 +- 5 files changed, 313 insertions(+), 308 deletions(-) diff --git a/probe_src/probe_frontend/Cargo.lock b/probe_src/probe_frontend/Cargo.lock index 232f089a..4c6142e1 100644 --- a/probe_src/probe_frontend/Cargo.lock +++ b/probe_src/probe_frontend/Cargo.lock @@ -721,7 +721,6 @@ dependencies = [ "rayon", "serde", "serde_json", - "subprocess", "tar", "tempfile", ] @@ -891,16 +890,6 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" -[[package]] -name = "subprocess" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2e86926081dda636c546d8c5e641661049d7562a68f5488be4a1f7f66f6086" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "syn" version = "2.0.66" diff --git a/probe_src/probe_frontend/Cargo.toml b/probe_src/probe_frontend/Cargo.toml index 0913cfae..f8ee418f 100644 --- a/probe_src/probe_frontend/Cargo.toml +++ b/probe_src/probe_frontend/Cargo.toml @@ -26,7 +26,6 @@ machine-info = "1.0.9" rayon = "1.10.0" serde = { version = "1.0.203", features = ["serde_derive"] } serde_json = "1.0.117" -subprocess = "0.2.9" tar = "0.4.41" tempfile = "3.10.1" diff --git a/probe_src/probe_frontend/src/arena.rs b/probe_src/probe_frontend/src/arena.rs index 30b03ae1..6721ef32 100644 --- a/probe_src/probe_frontend/src/arena.rs +++ b/probe_src/probe_frontend/src/arena.rs @@ -7,6 +7,7 @@ use std::{ io::Write, mem::size_of, path::{Path, PathBuf}, + time::SystemTime, }; use crate::{ @@ -14,178 +15,105 @@ use crate::{ ops::{self, FfiFrom}, }; -/// Arena allocator metadata placed at the beginning of allocator files by libprobe. -#[repr(C)] -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct ArenaHeader { - instantiation: libc::size_t, - base_address: libc::uintptr_t, - capacity: libc::uintptr_t, - used: libc::uintptr_t, -} - -/// This struct represents a single `ops/*.dat` arena allocator file emitted by libprobe. -pub struct OpsArena<'a> { - // raw is needed even though it's unused since ops is a reference to it; - // the compiler doesn't know this since it's constructed using unsafe code. - #[allow(dead_code)] - /// raw byte buffer of Ops arena allocator. - raw: Vec, - /// slice over Ops of the raw buffer. - ops: &'a [ffi::Op], -} - -impl<'a> OpsArena<'a> { - pub fn from_bytes(bytes: Vec) -> Result { - if bytes.len() < size_of::() { - return Err(eyre!( - "Arena buffer too small, got {}, minimum size {}", - bytes.len(), - size_of::() - )); - } - - let header = unsafe { get_header_unchecked(&bytes) }; - if header.capacity != bytes.len() { - return Err(eyre!( - "Invalid arena capacity, expected {}, got {}", - header.capacity, - bytes.len(), - )); - } - if header.used > header.capacity { - return Err(eyre!( - "Arena size {} is greater than capacity {}", - header.used, - header.capacity, - )); - } - if ((header.used - size_of::()) % size_of::()) != 0 { - return Err(eyre!( - "Arena alignment error: used arena size minus header isn't a multiple of op size" - )); - } - - let count = (header.used - size_of::()) / size_of::(); +/// Recursively parse a top-level libprobe arena allocator directory from `in_dir` and write it in +/// serialized format to `out_dir`. +/// +/// This function calls [`parse_pid()`] on each sub-directory in `in_dir` **in parallel**. +/// +/// on success, returns the number of Ops processed in the top-level directory +pub fn parse_arena_dir, P2: AsRef + Sync>( + in_dir: P1, + out_dir: P2, +) -> Result { + log::info!( + "Processing arena dir {} into output dir {}", + in_dir.as_ref().to_string_lossy(), + out_dir.as_ref().to_string_lossy() + ); - log::debug!( - "[unsafe] converting Vec to &[ffi::Op] of size {}", - count - ); - let ops = unsafe { - let ptr = bytes.as_ptr().add(size_of::()) as *const ffi::Op; - std::slice::from_raw_parts(ptr, count) - }; + let start = SystemTime::now(); - Ok(Self { raw: bytes, ops }) - } + let count = fs::read_dir(in_dir) + .wrap_err("Error opening Arena directory")? + .par_bridge() + .map(|x| { + parse_pid( + x.wrap_err("Error reading DirEntry from Arena directory")? + .path(), + &out_dir, + ) + }) + .try_fold(|| 0usize, |acc, x| x.map(|x| acc + x)) + .try_reduce(|| 0usize, |id, x| Ok(id + x))?; - pub fn decode(self, ctx: &ArenaContext) -> Result> { - self.ops - .iter() - .map(|x| ops::Op::ffi_from(x, ctx)) - .collect::>>() - .wrap_err("Failed to decode arena ops") - } -} + match SystemTime::now().duration_since(start) { + Ok(x) => log::info!("Processed {} Ops in {:.3} seconds", count, x.as_secs_f32()), + Err(_) => log::error!("Processing arena dir took negative time"), + }; -/// This struct represents a single `data/*.dat` arena allocator file emitted by libprobe. -pub struct DataArena { - header: ArenaHeader, - raw: Vec, + Ok(count) } -impl DataArena { - pub fn from_bytes(bytes: Vec) -> Result { - if bytes.len() < size_of::() { - return Err(eyre!( - "Arena buffer too small, got {}, minimum size {}", - bytes.len(), - size_of::() - )); - } - let header = unsafe { get_header_unchecked(&bytes) }; - if header.capacity != bytes.len() { - return Err(eyre!( - "Invalid arena capacity, expected {}, got {}", - header.capacity, - bytes.len(), - )); - } - if header.used > header.capacity { - return Err(eyre!( - "Arena size {} is greater than capacity {}", - header.used, - header.capacity, - )); - } - Ok(Self { header, raw: bytes }) - } - - pub fn try_get_slice<'a>(&'a self, ptr: usize) -> Option<&'a [u8]> { - let end = self.header.base_address + self.header.used; - match ptr >= self.header.base_address && ptr <= end { - false => None, - true => Some(unsafe { - let new_ptr = self.raw.as_ptr().add(ptr - self.header.base_address); - let len = end - ptr; +/// Recursively parse a PID libprobe arena allocator directory from `in_dir` and write it in +/// serialized format to `out_dir`. +/// +/// This function calls [`parse_exec_epoch()`] on each sub-directory in `in_dir`. +/// +/// On success, returns the number of Ops processed in the PID directory. +fn parse_pid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Result { + let pid = filename_numeric(&in_dir)?; - core::slice::from_raw_parts(new_ptr, len) - }), - } - } -} + let dir = { + let mut path = out_dir.as_ref().to_owned(); + path.push(pid.to_string()); + path + }; -/// this struct represents a `/data` directory from libprobe. -pub struct ArenaContext(pub Vec); + fs::create_dir(&dir).wrap_err("Failed to create ExecEpoch output directory")?; -impl ArenaContext { - pub fn try_get_slice(&self, ptr: usize) -> Option<&[u8]> { - for vec in self.0.iter() { - if let Some(x) = vec.try_get_slice(ptr) { - return Some(x); - } - } - None - } + fs::read_dir(in_dir) + .wrap_err("Error opening PID directory")? + // .par_bridge() + .map(|entry| { + parse_exec_epoch( + entry + .wrap_err("Error reading DirEntry from PID directory")? + .path(), + &dir, + ) + }) + .try_fold(0usize, |acc, x| x.map(|x| acc + x)) } -/// Parse the front of a raw byte buffer into a libprobe arena header +/// Recursively parse a ExecEpoch libprobe arena allocator directory from `in_dir` and write it in +/// serialized format to `out_dir`. /// -/// # Safety: -/// Invoking this function on any byte buffer smaller than [`std::mem::size_of()`] -/// bytes is undefined behavior (best case a segfault). Invoking this method on a byte buffer -/// that's not a valid libprobe arena will produce garbage values that should not be used. -unsafe fn get_header_unchecked(bytes: &[u8]) -> ArenaHeader { - let ptr = bytes as *const [u8] as *const ArenaHeader; - log::debug!("[unsafe] converting byte buffer into ArenaHeader"); - unsafe { - ArenaHeader { - instantiation: (*ptr).instantiation, - base_address: (*ptr).base_address, - capacity: (*ptr).capacity, - used: (*ptr).used, - } - } -} - -/// Gets the filename from a path and returns it parsed as an integer. +/// This function calls [`parse_tid()`] on each sub-directory in `in_dir`. /// -/// errors if the path has no filename or the filename can't be parsed as an integer. -fn filename_numeric>(dir: P) -> Result { - let filename = dir - .as_ref() - .file_name() - .ok_or_else(|| eyre!("'{}' has no filename", dir.as_ref().to_string_lossy()))?; +/// On success, returns the number of Ops processed in the ExecEpoch directory. +fn parse_exec_epoch, P2: AsRef>(in_dir: P1, out_dir: P2) -> Result { + let epoch = filename_numeric(&in_dir)?; - filename - .to_str() - .ok_or_else(|| eyre!("filename '{}' not valid UTF-8", filename.to_string_lossy()))? - .parse::() - .wrap_err(format!( - "unable to convert filename '{}' to integer", - filename.to_string_lossy() - )) + let dir = { + let mut path = out_dir.as_ref().to_owned(); + path.push(epoch.to_string()); + path + }; + + fs::create_dir(&dir).wrap_err("Failed to create ExecEpoch output directory")?; + + fs::read_dir(in_dir) + .wrap_err("Error opening ExecEpoch directory")? + // .par_bridge() + .map(|entry| { + parse_tid( + entry + .wrap_err("Error reading DirEntry from ExecEpoch directory")? + .path(), + &dir, + ) + }) + .try_fold(0usize, |acc, x| x.map(|x| acc + x)) } /// Recursively parse a TID libprobe arena allocator directory from `in_dir` and write it in @@ -202,13 +130,16 @@ fn filename_numeric>(dir: P) -> Result { /// [`ArenaContext`]. /// 6. [`ops::Op`]s are serialized into json and written line-by-line into the output directory. /// -/// (steps 5 & 6 are done with iterators to reduce unnecessary memory allocations) -fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Result<()> { +/// (steps 5 & 6 are done lazily with iterators to reduce unnecessary memory allocations) +/// +/// On success, returns the number of Ops processed in the TID directory. +fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Result { fn try_files_from_arena_dir>(dir: P) -> Result> { match fs::read_dir(&dir) { - Ok(x) => x - .map(|x| { - x.map(|x| x.path()) + Ok(entry_iter) => entry_iter + .map(|entry_result| { + entry_result + .map(|entry| entry.path()) .wrap_err("Error reading DirEntry from arena directory") }) .collect::, _>>(), @@ -230,8 +161,8 @@ fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Resul "Error reading directory '{}'", in_dir.as_ref().to_string_lossy() ))? - .filter_map(|x| match x { - Ok(x) => Some((x.file_name(), x)), + .filter_map(|entry_result| match entry_result { + Ok(entry) => Some((entry.file_name(), entry)), Err(e) => { log::warn!("Error reading DirEntry in TID directory: {}", e); None @@ -248,15 +179,16 @@ fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Resul .path(), )? .into_iter() - .map(|x| { + .map(|data_dat_file| { DataArena::from_bytes( - std::fs::read(x).wrap_err("Failed to read file from data directory")?, + std::fs::read(data_dat_file).wrap_err("Failed to read file from data directory")?, ) }) .collect::, _>>()?, ); // STEP 4 + let mut count: usize = 0; try_files_from_arena_dir( paths .get(OsStr::new("ops")) @@ -265,11 +197,11 @@ fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Resul )? // STEP 5 .into_iter() - .map(|x| { - std::fs::read(x) + .map(|ops_dat_file| { + std::fs::read(ops_dat_file) .wrap_err("Failed to read file from ops directory") - .and_then(|x| { - OpsArena::from_bytes(x) + .and_then(|file_contents| { + OpsArena::from_bytes(file_contents) .wrap_err("Error constructing OpsArena")? .decode(&ctx) .wrap_err("Error decoding OpsArena") @@ -288,90 +220,183 @@ fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Resul outfile .write_all("\n".as_bytes()) .wrap_err("Failed to write newline deliminator to tempfile")?; + count += 1; } Ok::<(), Report>(()) })?; - Ok(()) + Ok(count) } -/// Recursively parse a ExecEpoch libprobe arena allocator directory from `in_dir` and write it in -/// serialized format to `out_dir`. +/// Gets the filename from a path and returns it parsed as an integer. /// -/// This function calls [`parse_tid()`] on each sub-directory in `in_dir`. -fn parse_exec_epoch, P2: AsRef>(in_dir: P1, out_dir: P2) -> Result<()> { - let epoch = filename_numeric(&in_dir)?; +/// errors if the path has no filename or the filename can't be parsed as an integer. +fn filename_numeric>(dir: P) -> Result { + let filename = dir + .as_ref() + .file_name() + .ok_or_else(|| eyre!("'{}' has no filename", dir.as_ref().to_string_lossy()))?; - let dir = { - let mut path = out_dir.as_ref().to_owned(); - path.push(epoch.to_string()); - path - }; + filename + .to_str() + .ok_or_else(|| eyre!("filename '{}' not valid UTF-8", filename.to_string_lossy()))? + .parse::() + .wrap_err(format!( + "unable to convert filename '{}' to integer", + filename.to_string_lossy() + )) +} - fs::create_dir(&dir).wrap_err("Failed to create ExecEpoch output directory")?; +/// this struct represents a `/data` directory from libprobe. +pub struct ArenaContext(pub Vec); - fs::read_dir(in_dir) - .wrap_err("Error opening ExecEpoch directory")? - // .par_bridge() - .try_for_each(|x| { - parse_tid( - x.wrap_err("Error reading DirEntry from ExecEpoch directory")? - .path(), - &dir, - ) - })?; +impl ArenaContext { + pub fn try_get_slice(&self, ptr: usize) -> Option<&[u8]> { + for vec in self.0.iter() { + if let Some(x) = vec.try_get_slice(ptr) { + return Some(x); + } + } + None + } +} - Ok(()) +/// This struct represents a single `data/*.dat` arena allocator file emitted by libprobe. +pub struct DataArena { + header: ArenaHeader, + raw: Vec, } -/// Recursively parse a PID libprobe arena allocator directory from `in_dir` and write it in -/// serialized format to `out_dir`. -/// -/// This function calls [`parse_exec_epoch()`] on each sub-directory in `in_dir`. -fn parse_pid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Result<()> { - let pid = filename_numeric(&in_dir)?; +impl DataArena { + pub fn from_bytes(bytes: Vec) -> Result { + if bytes.len() < size_of::() { + return Err(eyre!( + "Arena buffer too small, got {}, minimum size {}", + bytes.len(), + size_of::() + )); + } + let header = ArenaHeader::from_bytes(&bytes) + .wrap_err("Failed to create ArenaHeader for DataArena")?; - let dir = { - let mut path = out_dir.as_ref().to_owned(); - path.push(pid.to_string()); - path - }; + Ok(Self { header, raw: bytes }) + } - fs::create_dir(&dir).wrap_err("Failed to create ExecEpoch output directory")?; + pub fn try_get_slice<'a>(&'a self, ptr: usize) -> Option<&'a [u8]> { + let end = self.header.base_address + self.header.used; + match ptr >= self.header.base_address && ptr <= end { + false => None, + true => Some(unsafe { + let new_ptr = self.raw.as_ptr().add(ptr - self.header.base_address); + let len = end - ptr; - fs::read_dir(in_dir) - .wrap_err("Error opening PID directory")? - // .par_bridge() - .try_for_each(|x| { - parse_exec_epoch( - x.wrap_err("Error reading DirEntry from PID directory")? - .path(), - &dir, - ) - })?; + core::slice::from_raw_parts(new_ptr, len) + }), + } + } +} - Ok(()) +/// This struct represents a single `ops/*.dat` arena allocator file emitted by libprobe. +pub struct OpsArena<'a> { + // raw is needed even though it's unused since ops is a reference to it; + // the compiler doesn't know this since it's constructed using unsafe code. + #[allow(dead_code)] + /// raw byte buffer of Ops arena allocator. + raw: Vec, + /// slice over Ops of the raw buffer. + ops: &'a [ffi::Op], } -/// Recursively parse a top-level libprobe arena allocator directory from `in_dir` and write it in -/// serialized format to `out_dir`. -/// -/// This function calls [`parse_pid()`] on each sub-directory in `in_dir` **in parallel**. -pub fn parse_arena_dir, P2: AsRef + Sync>( - in_dir: P1, - out_dir: P2, -) -> Result<()> { - fs::read_dir(in_dir) - .wrap_err("Error opening Arena directory")? - .par_bridge() - .try_for_each(|x| { - parse_pid( - x.wrap_err("Error reading DirEntry from Arena directory")? - .path(), - &out_dir, - ) - })?; +impl<'a> OpsArena<'a> { + pub fn from_bytes(bytes: Vec) -> Result { + let header = ArenaHeader::from_bytes(&bytes) + .wrap_err("Failed to create ArenaHeader for OpsArena")?; + + if ((header.used - size_of::()) % size_of::()) != 0 { + return Err(eyre!( + "Arena alignment error: used arena size minus header isn't a multiple of op size" + )); + } + + let count = (header.used - size_of::()) / size_of::(); + + log::debug!( + "[unsafe] converting Vec to &[ffi::Op] of size {}", + count + ); + let ops = unsafe { + let ptr = bytes.as_ptr().add(size_of::()) as *const ffi::Op; + std::slice::from_raw_parts(ptr, count) + }; + + Ok(Self { raw: bytes, ops }) + } + + pub fn decode(self, ctx: &ArenaContext) -> Result> { + self.ops + .iter() + .map(|x| ops::Op::ffi_from(x, ctx)) + .collect::>>() + .wrap_err("Failed to decode arena ops") + } +} + +/// Arena allocator metadata placed at the beginning of allocator files by libprobe. +#[repr(C)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ArenaHeader { + instantiation: libc::size_t, + base_address: libc::uintptr_t, + capacity: libc::uintptr_t, + used: libc::uintptr_t, +} + +impl ArenaHeader { + /// Parse the front of a raw byte buffer into a libprobe arena header + fn from_bytes(bytes: &[u8]) -> Result { + let ptr = bytes as *const [u8] as *const Self; + + if bytes.len() < size_of::() { + return Err(eyre!( + "Arena buffer too small, got {}, minimum size {}", + bytes.len(), + size_of::() + )); + } + + log::debug!("[unsafe] converting byte buffer into ArenaHeader"); + let header = unsafe { + Self { + instantiation: (*ptr).instantiation, + base_address: (*ptr).base_address, + capacity: (*ptr).capacity, + used: (*ptr).used, + } + }; + log::debug!( + "[unsafe] created ArenaHeader [ inst={}, base_addr={:#x}, capacity: {}, used={} ]", + header.instantiation, + header.base_address, + header.capacity, + header.used + ); + + if header.capacity != bytes.len() { + return Err(eyre!( + "Invalid arena capacity, expected {}, got {}", + header.capacity, + bytes.len(), + )); + } + if header.used > header.capacity { + return Err(eyre!( + "Arena size {} is greater than capacity {}", + header.used, + header.capacity, + )); + } - Ok(()) + Ok(header) + } } diff --git a/probe_src/probe_frontend/src/main.rs b/probe_src/probe_frontend/src/main.rs index 5655ec42..56fe28a1 100644 --- a/probe_src/probe_frontend/src/main.rs +++ b/probe_src/probe_frontend/src/main.rs @@ -1,5 +1,5 @@ use std::{ - ffi::{OsStr, OsString}, + ffi::OsString, fs::{self, File}, io::{Read, Write}, path::{Path, PathBuf}, @@ -47,8 +47,6 @@ mod arena; /// System metadata recorded into probe logs. mod metadata; - - /// Generate or manipulate Provenance for Replay OBservation Engine (PROBE) logs. #[derive(clap::Parser, Debug, Clone)] #[command(author, version, about, long_about = None)] @@ -120,9 +118,13 @@ fn main() -> Result<()> { }, }; } + let mut tar = tar::Builder::new(flate2::write::GzEncoder::new( + File::create_new(output).wrap_err("Failed to create output file")?, + Compression::default(), + )); // the path to the libprobe.so directory is searched for as follows: - // - --lib-path argument if set + // - --lib-path argument if set // - __PROBE_LIB env var if set // - /usr/share/probe // - error @@ -149,72 +151,43 @@ fn main() -> Result<()> { } else { ld_preload.push("libprobe.so"); } - - // append any exiting LD_PRELOAD overrides + + // append any existing LD_PRELOAD overrides if let Some(x) = std::env::var_os("LD_PRELOAD") { ld_preload.push(":"); ld_preload.push(&x); } - let dir = tempfile::tempdir().wrap_err("Failed to create arena directory")?; + let arena_dir = tempfile::tempdir().wrap_err("Failed to create arena directory")?; - let mut popen = if gdb { + let mut child = if gdb { let mut dir_env = OsString::from("__PROBE_DIR="); - dir_env.push(dir.path()); + dir_env.push(arena_dir.path()); let mut preload_env = OsString::from("LD_PRELOAD="); preload_env.push(ld_preload); - subprocess::Exec::cmd("gdb") - .args(&[ - OsStr::new("--args"), - OsStr::new("env"), - &dir_env, - &preload_env, - ]) + std::process::Command::new("gdb") + .arg("--args") + .arg("env") + .arg(dir_env) + .arg(preload_env) .args(&cmd) + .env_remove("__PROBE_LIB") + .env_remove("__PROBE_LOG") + .spawn() + .wrap_err("Failed to launch gdb")? } else { - subprocess::Exec::cmd(&cmd[0]) + std::process::Command::new(&cmd[0]) .args(&cmd[1..]) + .env_remove("__PROBE_LIB") + .env_remove("__PROBE_LOG") + .env("__PROBE_DIR", arena_dir.path()) .env("LD_PRELOAD", ld_preload) - .env("__PROBE_DIR", dir.path()) - } - .popen() - .wrap_err("Failed to launch process")?; - - let metadata = metadata::Metadata::new( - popen - .pid() - .expect("just popened process should always have PID") as i32, - ); - - popen.wait().wrap_err("Error awaiting child process")?; - - let file = match File::create_new(output) { - Ok(x) => x, - Err(e) => { - log::error!("Failed to create output file: {}", e); - - let path = format!( - "./probe_log_{}_{}", - std::process::id(), - std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .wrap_err("current system time before unix epoch")? - .as_secs() - ); - - let tmp = File::create_new(&path) - .wrap_err(format!("Failed to create backup output file '{}'", path)); - - log::error!("backup output file '{}' will be used instead", &path); - - tmp - } - .wrap_err("Failed to create output dir")?, + .spawn() + .wrap_err("Failed to launch child process")? }; - let mut tar = - tar::Builder::new(flate2::write::GzEncoder::new(file, Compression::default())); + let metadata = metadata::Metadata::new(child.id() as i32); let outdir = tempfile::tempdir()?; @@ -227,7 +200,27 @@ fn main() -> Result<()> { ) .wrap_err("Error writing metadata")?; - arena::parse_arena_dir(dir.path(), &outdir) + match Path::read_dir(arena_dir.path()) { + Ok(x) => { + if !(x + .into_iter() + .try_fold(false, |_, x| x.map(|x| x.path().exists()))?) + { + log::warn!( + "No arean files detected, something is \ + wrong, you should probably abort!" + ); + } + } + Err(e) => { + return Err(e).wrap_err( + "Unable to read arena directory during post-startup sanity check", + ) + } + } + + child.wait().wrap_err("Failed to await child process")?; + arena::parse_arena_dir(arena_dir.path(), &outdir) .wrap_err("Unable to decode arena directory")?; tar.append_dir_all(".", &outdir) @@ -238,12 +231,12 @@ fn main() -> Result<()> { log::warn!("Failed to close output directory: {}", e); } - if let Err(e) = dir.close() { + if let Err(e) = arena_dir.close() { log::warn!("Failed to close arena directory: {}", e); } Ok::<(), Report>(()) - }, + } Command::Dump { input } => { let file = flate2::read::GzDecoder::new(File::open(&input).wrap_err(format!( "Failed to open input file '{}'", @@ -265,7 +258,6 @@ fn main() -> Result<()> { .ok_or_else(|| eyre!("Tarball entry path not valid UTF-8"))? .to_owned(); - if path == "_metadata" { return Ok(()); } @@ -320,6 +312,6 @@ fn main() -> Result<()> { Ok(()) }) - }, + } } } diff --git a/probe_src/probe_frontend/src/ops.rs b/probe_src/probe_frontend/src/ops.rs index 99dec8ff..1dd747da 100644 --- a/probe_src/probe_frontend/src/ops.rs +++ b/probe_src/probe_frontend/src/ops.rs @@ -324,7 +324,7 @@ impl OpInternal { value: &ffi::Op__bindgen_ty_1, ctx: &ArenaContext, ) -> Result { - log::debug!("[unsafe] decoding Op tagged union"); + log::debug!("[unsafe] decoding Op tagged union [ OpCode={} ]", kind); Ok(match kind { ffi::OpCode_init_process_op_code => { Self::InitProcess(unsafe { value.init_process_epoch }) From 5fa394f704514a708af9e24e9b08c68eb477c378 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Sun, 30 Jun 2024 00:29:48 -0500 Subject: [PATCH 10/37] version 0.2.0 --- probe_src/probe_frontend/.envrc | 2 +- probe_src/probe_frontend/Cargo.lock | 141 ++++-- probe_src/probe_frontend/Cargo.toml | 40 +- probe_src/probe_frontend/build.sh | 9 - probe_src/probe_frontend/cli/Cargo.toml | 27 ++ probe_src/probe_frontend/cli/src/dump.rs | 385 ++++++++++++++++ probe_src/probe_frontend/cli/src/main.rs | 205 +++++++++ probe_src/probe_frontend/cli/src/record.rs | 114 +++++ .../probe_frontend/cli/src/transcribe.rs | 21 + probe_src/probe_frontend/cli/src/util.rs | 74 ++++ probe_src/probe_frontend/configure | 7 + probe_src/probe_frontend/deny.toml | 209 +++++++++ probe_src/probe_frontend/flake.lock | 29 +- probe_src/probe_frontend/flake.nix | 141 +++++- probe_src/probe_frontend/lib/Cargo.toml | 29 ++ probe_src/probe_frontend/{ => lib}/build.rs | 106 +++-- probe_src/probe_frontend/lib/src/error.rs | 107 +++++ probe_src/probe_frontend/lib/src/ffi.rs | 251 +++++++++++ probe_src/probe_frontend/lib/src/lib.rs | 32 ++ .../probe_frontend/{ => lib}/src/metadata.rs | 4 +- probe_src/probe_frontend/lib/src/ops.rs | 225 ++++++++++ .../{src/arena.rs => lib/src/transcribe.rs} | 152 +++---- probe_src/probe_frontend/macros/Cargo.toml | 19 + probe_src/probe_frontend/macros/src/lib.rs | 113 +++++ probe_src/probe_frontend/macros/src/pygen.rs | 371 ++++++++++++++++ .../probe_frontend/python/generated/ops.py | 361 +++++++++++++++ probe_src/probe_frontend/python/probe.py | 35 ++ probe_src/probe_frontend/src/display.rs | 267 ------------ probe_src/probe_frontend/src/ffi.rs | 8 - probe_src/probe_frontend/src/main.rs | 317 -------------- probe_src/probe_frontend/src/ops.rs | 410 ------------------ 31 files changed, 3005 insertions(+), 1206 deletions(-) delete mode 100755 probe_src/probe_frontend/build.sh create mode 100644 probe_src/probe_frontend/cli/Cargo.toml create mode 100644 probe_src/probe_frontend/cli/src/dump.rs create mode 100644 probe_src/probe_frontend/cli/src/main.rs create mode 100644 probe_src/probe_frontend/cli/src/record.rs create mode 100644 probe_src/probe_frontend/cli/src/transcribe.rs create mode 100644 probe_src/probe_frontend/cli/src/util.rs create mode 100755 probe_src/probe_frontend/configure create mode 100644 probe_src/probe_frontend/deny.toml create mode 100644 probe_src/probe_frontend/lib/Cargo.toml rename probe_src/probe_frontend/{ => lib}/build.rs (57%) create mode 100644 probe_src/probe_frontend/lib/src/error.rs create mode 100644 probe_src/probe_frontend/lib/src/ffi.rs create mode 100644 probe_src/probe_frontend/lib/src/lib.rs rename probe_src/probe_frontend/{ => lib}/src/metadata.rs (88%) create mode 100644 probe_src/probe_frontend/lib/src/ops.rs rename probe_src/probe_frontend/{src/arena.rs => lib/src/transcribe.rs} (73%) create mode 100644 probe_src/probe_frontend/macros/Cargo.toml create mode 100644 probe_src/probe_frontend/macros/src/lib.rs create mode 100644 probe_src/probe_frontend/macros/src/pygen.rs create mode 100644 probe_src/probe_frontend/python/generated/ops.py create mode 100644 probe_src/probe_frontend/python/probe.py delete mode 100644 probe_src/probe_frontend/src/display.rs delete mode 100644 probe_src/probe_frontend/src/ffi.rs delete mode 100644 probe_src/probe_frontend/src/main.rs delete mode 100644 probe_src/probe_frontend/src/ops.rs diff --git a/probe_src/probe_frontend/.envrc b/probe_src/probe_frontend/.envrc index 56230f5f..76f69446 100644 --- a/probe_src/probe_frontend/.envrc +++ b/probe_src/probe_frontend/.envrc @@ -1,4 +1,4 @@ use_flake (cd ../libprobe && make) -export __PROBE_LIB=$(expand_path ../libprobe/build) +export __PROBE_LOG=info diff --git a/probe_src/probe_frontend/Cargo.lock b/probe_src/probe_frontend/Cargo.lock index 4c6142e1..62e571e1 100644 --- a/probe_src/probe_frontend/Cargo.lock +++ b/probe_src/probe_frontend/Cargo.lock @@ -123,7 +123,7 @@ version = "0.69.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "cexpr", "clang-sys", "itertools", @@ -148,9 +148,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" [[package]] name = "bumpalo" @@ -160,9 +160,9 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "cc" -version = "1.0.99" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96c51067fd44124faa7f870b4b1c969379ad32b2ba805aa959430ceaa384f695" +checksum = "ac367972e516d45567c7eafc73d24e1c193dcf200a8d94e9db7b3d38b349572d" [[package]] name = "cexpr" @@ -201,7 +201,7 @@ checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" dependencies = [ "glob", "libc", - "libloading 0.8.3", + "libloading 0.8.4", ] [[package]] @@ -354,9 +354,9 @@ dependencies = [ [[package]] name = "either" -version = "1.12.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "env_filter" @@ -401,12 +401,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "fastrand" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" - [[package]] name = "filetime" version = "0.2.23" @@ -435,6 +429,17 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "gimli" version = "0.28.1" @@ -535,9 +540,9 @@ dependencies = [ [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "lazycell" @@ -563,9 +568,9 @@ dependencies = [ [[package]] name = "libloading" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" +checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d" dependencies = [ "cfg-if", "windows-targets", @@ -695,6 +700,12 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + [[package]] name = "prettyplease" version = "0.2.20" @@ -706,10 +717,9 @@ dependencies = [ ] [[package]] -name = "probe_frontend" -version = "0.1.0" +name = "probe_cli" +version = "0.2.0" dependencies = [ - "bindgen", "chrono", "clap", "color-eyre", @@ -717,19 +727,42 @@ dependencies = [ "flate2", "libc", "log", + "probe_frontend", + "rand", + "serde", + "serde_json", + "tar", +] + +[[package]] +name = "probe_frontend" +version = "0.2.0" +dependencies = [ + "bindgen", + "libc", + "log", "machine-info", + "probe_macros", "rayon", "serde", "serde_json", - "tar", - "tempfile", + "thiserror", +] + +[[package]] +name = "probe_macros" +version = "0.2.0" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] name = "proc-macro2" -version = "1.0.85" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" dependencies = [ "unicode-ident", ] @@ -743,6 +776,36 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + [[package]] name = "rayon" version = "1.10.0" @@ -819,7 +882,7 @@ version = "0.38.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "errno", "libc", "linux-raw-sys", @@ -854,9 +917,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.117" +version = "1.0.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +checksum = "d947f6b3163d8857ea16c4fa0dd4840d52f3041039a85decd46867eb1abef2e4" dependencies = [ "itoa", "ryu", @@ -892,9 +955,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.66" +version = "2.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9" dependencies = [ "proc-macro2", "quote", @@ -926,18 +989,6 @@ dependencies = [ "xattr", ] -[[package]] -name = "tempfile" -version = "3.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" -dependencies = [ - "cfg-if", - "fastrand", - "rustix", - "windows-sys", -] - [[package]] name = "thiserror" version = "1.0.61" @@ -1027,6 +1078,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + [[package]] name = "wasm-bindgen" version = "0.2.92" diff --git a/probe_src/probe_frontend/Cargo.toml b/probe_src/probe_frontend/Cargo.toml index f8ee418f..a4e4dc97 100644 --- a/probe_src/probe_frontend/Cargo.toml +++ b/probe_src/probe_frontend/Cargo.toml @@ -1,33 +1,19 @@ -[package] -name = "probe_frontend" -version = "0.1.0" +[workspace] +resolver = "2" +members = [ + "cli", + "lib", + "macros", +] + +[workspace.package] +version = "0.2.0" authors = ["Jenna Fligor "] publish = false edition = "2021" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[workspace.lints.rust] +unsafe_op_in_unsafe_fn = "deny" -[[bin]] +[workspace.metadata.crane] name = "probe" -path = "src/main.rs" - -[lints.rust] -unsafe_op_in_unsafe_fn = "forbid" - -[dependencies] -chrono = "0.4.38" -clap = { version = "4.5.7", features = ["derive"] } -color-eyre = "0.6.3" -env_logger = "0.11.3" -flate2 = "1.0.30" -libc = "0.2.155" -log = "0.4.21" -machine-info = "1.0.9" -rayon = "1.10.0" -serde = { version = "1.0.203", features = ["serde_derive"] } -serde_json = "1.0.117" -tar = "0.4.41" -tempfile = "3.10.1" - -[build-dependencies] -bindgen = "0.69.4" diff --git a/probe_src/probe_frontend/build.sh b/probe_src/probe_frontend/build.sh deleted file mode 100755 index 440e9f8a..00000000 --- a/probe_src/probe_frontend/build.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh - -set -e -cd "$(dirname "$(realpath "$0")")" -mkdir -p ./include -cp ../libprobe/include/prov_ops.h ./include/prov_ops.h -git add ./include -nix build -git restore --staged ./include diff --git a/probe_src/probe_frontend/cli/Cargo.toml b/probe_src/probe_frontend/cli/Cargo.toml new file mode 100644 index 00000000..5a3add18 --- /dev/null +++ b/probe_src/probe_frontend/cli/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "probe_cli" +version.workspace = true +authors.workspace = true +publish.workspace = true +edition.workspace = true + +[[bin]] +name = "probe" +path = "src/main.rs" + +[dependencies] +chrono = "0.4.38" +clap = { version = "4.5.7", features = ["derive"] } +color-eyre = "0.6.3" +env_logger = "0.11.3" +flate2 = "1.0.30" +libc = "0.2.155" +log = "0.4.21" +probe_frontend = { path = "../lib" } +rand = "0.8.5" +serde = "1.0.203" +serde_json = "1.0.118" +tar = "0.4.41" + +[lints] +workspace = true diff --git a/probe_src/probe_frontend/cli/src/dump.rs b/probe_src/probe_frontend/cli/src/dump.rs new file mode 100644 index 00000000..76733d2f --- /dev/null +++ b/probe_src/probe_frontend/cli/src/dump.rs @@ -0,0 +1,385 @@ +use std::{ + fs::File, + io::{Read, Write}, + path::Path, +}; + +use chrono::{DateTime, SecondsFormat}; +use color_eyre::eyre::{eyre, Result, WrapErr}; +use probe_frontend::ops; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct DumpOp { + pid: usize, + exec_epoch: usize, + tid: usize, + op: ops::Op, +} + +pub fn to_stdout>(tar_path: P) -> Result<()> { + dump_internal(tar_path, |(pid, epoch, tid), ops| { + let mut stdout = std::io::stdout().lock(); + for op in ops { + writeln!(stdout, "{}.{}.{} >>> {}", pid, epoch, tid, op.dump())?; + } + Ok(()) + }) +} + +pub fn to_stdout_json>(tar_path: P) -> Result<()> { + dump_internal(tar_path, |(pid, epoch, tid), ops| { + let mut stdout = std::io::stdout().lock(); + + for op in ops { + let json = serde_json::to_string(&DumpOp { + pid, + exec_epoch: epoch, + tid, + op, + })?; + writeln!(stdout, "{}", json)?; + } + Ok(()) + }) +} + +fn dump_internal, F: Fn((usize, usize, usize), Vec) -> Result<()>>( + tar_path: P, + printer: F, +) -> Result<()> { + let file = flate2::read::GzDecoder::new(File::open(&tar_path).wrap_err_with(|| { + eyre!(format!( + "Failed to open input file '{}'", + tar_path.as_ref().to_string_lossy() + )) + })?); + + let mut tar = tar::Archive::new(file); + + tar.entries() + .wrap_err("Unable to get tarball entry iterator")? + .try_for_each(|x| { + let mut entry = x.wrap_err("Unable to extract tarball entry")?; + + let path = entry + .path() + .wrap_err("Error getting path of tarball entry")? + .as_ref() + // this forced UTF-8 conversion is permitted because these paths are strictly + // within the tarball *we wrote*, so the paths should be all ASCII + .to_str() + .ok_or_else(|| eyre!("Tarball entry path not valid UTF-8"))? + .to_owned(); + + // if path == "_metadata" { + // return Ok(()); + // } + + let mut buf = String::new(); + let size = entry + .read_to_string(&mut buf) + .wrap_err("unable to read contents of tarball entry")?; + + // this is the case where the entry is a directory + if size == 0 { + return Ok(()); + } + + let hierarchy = path + .split('/') + .map(|x| { + x.parse::() + .wrap_err(format!("Unable to convert path component '{x}' to integer")) + }) + .collect::, _>>() + .wrap_err("Unable to extract PID.EPOCH.TID hierarchy")?; + + if hierarchy.len() != 3 { + return Err(eyre!("malformed PID.EPOCH.TID hierarchy")); + } + let op_id_triple = (hierarchy[0], hierarchy[1], hierarchy[2]); + + let ops = buf + .split('\n') + .filter_map(|x| { + if x.is_empty() { + return None; + } + Some(serde_json::from_str::(x).wrap_err("Error deserializing Op")) + }) + .collect::, _>>() + .wrap_err("Failed to deserialize TID file")?; + + printer(op_id_triple, ops)?; + + Ok(()) + }) +} + +trait Dump { + fn dump(&self) -> String; +} + +impl Dump for ops::statx_timestamp { + fn dump(&self) -> String { + match DateTime::from_timestamp(self.tv_sec, self.tv_nsec) { + Some(x) => x.to_rfc3339_opts(SecondsFormat::Secs, true), + None => "[INVALID TIMESTAMP]".to_owned(), + } + } +} + +impl Dump for ops::timeval { + fn dump(&self) -> String { + match DateTime::from_timestamp(self.tv_sec, self.tv_usec as u32 * 1000) { + Some(x) => x.to_rfc3339_opts(SecondsFormat::Secs, true), + None => "[INVALID TIMESTAMP]".to_owned(), + } + } +} + +impl Dump for ops::statx { + fn dump(&self) -> String { + format!( + "[ uid={}, gid={}, mode={:#06o} ino={}, size={}, mtime={} ]", + self.stx_uid, + self.stx_gid, + self.stx_mode, + self.stx_ino, + self.stx_size, + self.stx_mtime.dump(), + ) + } +} + +impl Dump for ops::rusage { + fn dump(&self) -> String { + format!( + "[ utime={}, stime={}, maxrss={} ]", + self.ru_utime.dump(), + self.ru_stime.dump(), + self.ru_maxrss, + ) + } +} + +impl Dump for ops::Path { + fn dump(&self) -> String { + format!( + "[ dirfd={}, path='{}', inode={}, mtime={} ]", + self.dirfd_minus_at_fdcwd + libc::AT_FDCWD, + self.path.to_string_lossy(), + self.inode, + self.mtime.dump(), + ) + } +} + +impl Dump for ops::CloneOp { + fn dump(&self) -> String { + format!( + "[ child_process_id={}, child_thread_id={}, errno={} ]", + self.child_process_id, self.child_thread_id, self.ferrno, + ) + } +} + +impl Dump for ops::CloseOp { + fn dump(&self) -> String { + format!( + "[ low_fd={}, high_fd={}, errno={} ]", + self.low_fd, self.high_fd, self.ferrno, + ) + } +} + +impl Dump for ops::ExitOp { + fn dump(&self) -> String { + format!( + "[ satus={}, run_atexit_handlers={} ]", + self.status, self.run_atexit_handlers, + ) + } +} + +impl Dump for ops::GetRUsageOp { + fn dump(&self) -> String { + format!( + "[ waitpid_arg={}, getrusage_arg={}, usage={}, errno={} ]", + self.waitpid_arg, + self.getrusage_arg, + self.usage.dump(), + self.ferrno, + ) + } +} + +impl Dump for ops::InitProcessOp { + fn dump(&self) -> String { + format!("[ pid={} ]", self.pid) + } +} + +impl Dump for ops::InitThreadOp { + fn dump(&self) -> String { + format!("[ tid={} ]", self.tid) + } +} + +impl Dump for ops::WaitOp { + fn dump(&self) -> String { + format!( + "[ pid={}, options={}, status={}, ret={}, errno={} ]", + self.pid, self.options, self.status, self.ret, self.ferrno, + ) + } +} + +impl Dump for ops::InitExecEpochOp { + fn dump(&self) -> String { + format!( + "[ epoch={}, program_name={} ]", + self.epoch, + self.program_name.to_string_lossy(), + ) + } +} + +impl Dump for ops::OpenOp { + fn dump(&self) -> String { + format!( + "[ path={}, flags={}, mode={:#06o} fd={}, errno={} ]", + self.path.dump(), + self.flags, + self.mode, + self.fd, + self.ferrno, + ) + } +} + +impl Dump for ops::ChdirOp { + fn dump(&self) -> String { + format!("[ path={}, errno={} ]", self.path.dump(), self.ferrno,) + } +} + +impl Dump for ops::ExecOp { + fn dump(&self) -> String { + format!("[ path={}, errno={} ]", self.path.dump(), self.ferrno,) + } +} + +impl Dump for ops::AccessOp { + fn dump(&self) -> String { + format!( + "[ path={}, mode={:#06o}, flags={}, errno={} ]", + self.path.dump(), + self.mode, + self.flags, + self.ferrno, + ) + } +} + +impl Dump for ops::StatOp { + fn dump(&self) -> String { + format!( + "[ path={}, flags={}, statx_buf={}, errno={} ]", + self.path.dump(), + self.flags, + self.statx_buf.dump(), + self.ferrno, + ) + } +} + +impl Dump for ops::ReaddirOp { + fn dump(&self) -> String { + format!( + "[ dir={}, child='{}', all_children={}, errno={} ]", + self.dir.dump(), + self.child.to_string_lossy(), + self.all_children, + self.ferrno, + ) + } +} + +impl Dump for ops::Metadata { + fn dump(&self) -> String { + match self { + ops::Metadata::Mode { mode } => format!("Mode[ mode={:#06o} ]", mode), + ops::Metadata::Ownership { uid, gid } => { + format!("Ownership[ uid={}, gid={} ]", uid, gid) + } + ops::Metadata::Times { + is_null, + atime, + mtime, + } => format!( + "Times[ is_null={}, atime={}, mtime={} ]", + is_null, + atime.dump(), + mtime.dump() + ), + } + } +} + +impl Dump for ops::UpdateMetadataOp { + fn dump(&self) -> String { + format!( + "[ path={}, flags={}, metadata={}, errno={} ]", + self.path.dump(), + self.flags, + self.metadata.dump(), + self.ferrno, + ) + } +} + +impl Dump for ops::ReadLinkOp { + fn dump(&self) -> String { + format!( + "[ path={}, resolved='{}', errno={} ]", + self.path.dump(), + self.resolved.to_string_lossy(), + self.ferrno + ) + } +} + +impl Dump for ops::OpInternal { + fn dump(&self) -> String { + fn wfmt(x: &str, y: &impl Dump) -> String { + format!("{}{}", x, y.dump()) + } + + match self { + ops::OpInternal::InitProcessOp(x) => wfmt("InitProcessOp", x), + ops::OpInternal::InitExecEpochOp(x) => wfmt("InitExecEpochOp", x), + ops::OpInternal::InitThreadOp(x) => wfmt("InitThreadOp", x), + ops::OpInternal::OpenOp(x) => wfmt("OpenOp", x), + ops::OpInternal::CloseOp(x) => wfmt("CloseOp", x), + ops::OpInternal::ChdirOp(x) => wfmt("ChdirOp", x), + ops::OpInternal::ExecOp(x) => wfmt("ExecOp", x), + ops::OpInternal::CloneOp(x) => wfmt("CloneOp", x), + ops::OpInternal::ExitOp(x) => wfmt("ExitOp", x), + ops::OpInternal::AccessOp(x) => wfmt("AccessOp", x), + ops::OpInternal::StatOp(x) => wfmt("StatOp", x), + ops::OpInternal::ReaddirOp(x) => wfmt("ReadirOp", x), + ops::OpInternal::WaitOp(x) => wfmt("WaitOp", x), + ops::OpInternal::GetRUsageOp(x) => wfmt("GetRUsageOp", x), + ops::OpInternal::UpdateMetadataOp(x) => wfmt("UpdateMetadataOp", x), + ops::OpInternal::ReadLinkOp(x) => wfmt("ReadLinkOp", x), + } + } +} + +impl Dump for ops::Op { + fn dump(&self) -> String { + self.data.dump() + } +} diff --git a/probe_src/probe_frontend/cli/src/main.rs b/probe_src/probe_frontend/cli/src/main.rs new file mode 100644 index 00000000..87306a8b --- /dev/null +++ b/probe_src/probe_frontend/cli/src/main.rs @@ -0,0 +1,205 @@ +use std::{ + ffi::OsString, + fs::{self, File}, +}; + +use clap::Parser; +use color_eyre::eyre::{Context, Result}; +use flate2::Compression; +use util::Dir; + +mod dump; +mod record; + +/// Wrapper over [`probe_frontend::transcribe`] which provides high-level commands +mod transcribe; + +/// Utility code for creating temporary directories +mod util; + +/// Generate or manipulate Provenance for Replay OBservation Engine (PROBE) logs. +#[derive(clap::Parser, Debug, Clone)] +#[command(author, version, about, long_about = None)] +#[command(propagate_version = true)] +struct Cli { + #[command(subcommand)] + command: Command, +} + +#[derive(clap::Subcommand, Debug, Clone)] +enum Command { + /// Execute a command and record its provenance + Record { + /// Path to output to + #[arg(short, long)] + output: Option, + + /// Overwrite existing output directory if it exists + #[arg(short = 'f', long)] + overwrite: bool, + + /// emit PROBE record rather than PROBE log. + #[arg(short, long)] + no_transcribe: bool, + + /// Run in gdb + #[arg(long)] + gdb: bool, + + /// Run in verbose & debug build of libprobe + #[arg(long)] + debug: bool, + + /// Command to execute under provenance + #[arg(required = true)] + cmd: Vec, + }, + + /// Convert PROBE records to PROBE logs. + Transcribe { + /// Overwrite existing output directory if it exists + #[arg(short = 'f', long)] + overwrite: bool, + + /// Path to write the transcribed PROBE log. + #[arg(short, long, required = false, default_value = "probe_log")] + output: OsString, + + /// Path to read the PROBE record from. + #[arg(short, long, required = false, default_value = "probe_record")] + input: OsString, + }, + + /// Write the data from probe log data in a human-readable manner + Dump { + /// output json + #[arg(long)] + json: bool, + + /// Path to load PROBE log from + #[arg(short, long, required = false, default_value = "probe_log")] + input: OsString, + }, +} + +fn main() -> Result<()> { + color_eyre::install()?; + env_logger::Builder::from_env(env_logger::Env::new().filter_or("__PROBE_LOG", "warn")).init(); + log::debug!("Logger initialized"); + + match Cli::parse().command { + Command::Record { + output, + overwrite, + no_transcribe, + gdb, + debug, + cmd, + } => if no_transcribe { + record_no_transcribe(output, overwrite, gdb, debug, cmd) + } else { + record_transcribe(output, overwrite, gdb, debug, cmd) + } + .wrap_err("Record command failed"), + + Command::Transcribe { + overwrite, + output, + input, + } => if overwrite { + File::create(&output) + } else { + File::create_new(&output) + } + .wrap_err("Failed to create output file") + .map(|file| tar::Builder::new(flate2::write::GzEncoder::new(file, Compression::default()))) + .and_then(|mut tar| transcribe::transcribe(input, &mut tar)) + .wrap_err("Transcribe command failed"), + + Command::Dump { json, input } => if json { + dump::to_stdout_json(input) + } else { + dump::to_stdout(input) + } + .wrap_err("Dump command failed"), + } +} + +fn record_no_transcribe( + output: Option, + overwrite: bool, + gdb: bool, + debug: bool, + cmd: Vec, +) -> Result<()> { + let output = match output { + Some(x) => fs::canonicalize(x).wrap_err("Failed to canonicalize record directory path")?, + None => { + let mut output = std::env::current_dir().wrap_err("Failed to get CWD")?; + output.push("probe_record"); + output + } + }; + + if overwrite { + if let Err(e) = fs::remove_dir_all(&output) { + match e.kind() { + std::io::ErrorKind::NotFound => (), + _ => return Err(e).wrap_err("Failed to remove exisitng record directory"), + } + } + } + + let record_dir = Dir::new(output).wrap_err("Failed to create record directory")?; + + record::Recorder::new(cmd, record_dir) + .gdb(gdb) + .debug(debug) + .record()?; + + Ok(()) +} + +fn record_transcribe( + output: Option, + overwrite: bool, + gdb: bool, + debug: bool, + cmd: Vec, +) -> Result<()> { + let output = match output { + Some(x) => x, + None => OsString::from("probe_log"), + }; + + let file = if overwrite { + File::create(&output) + } else { + File::create_new(&output) + } + .wrap_err("Failed to create output file")?; + + let mut tar = tar::Builder::new(flate2::write::GzEncoder::new(file, Compression::default())); + + let mut record_dir = record::Recorder::new( + cmd, + util::Dir::temp(true).wrap_err("Failed to create record directory")?, + ) + .gdb(gdb) + .debug(debug) + .record()?; + + match transcribe::transcribe(&record_dir, &mut tar) { + Ok(_) => (), + Err(e) => { + log::error!( + "Error transcribing record directory, saving directory '{}'", + record_dir.as_ref().to_string_lossy() + ); + record_dir.drop = false; + return Err(e).wrap_err("Failed to transcirbe record directory"); + } + }; + + Ok(()) +} diff --git a/probe_src/probe_frontend/cli/src/record.rs b/probe_src/probe_frontend/cli/src/record.rs new file mode 100644 index 00000000..1ae6c4c5 --- /dev/null +++ b/probe_src/probe_frontend/cli/src/record.rs @@ -0,0 +1,114 @@ +use std::{ + ffi::OsString, + fs, + path::{Path, PathBuf}, + thread, +}; + +use color_eyre::eyre::{eyre, Result, WrapErr}; + +use crate::util::Dir; + +#[derive(Debug)] +pub struct Recorder { + gdb: bool, + debug: bool, + + output: Dir, + cmd: Vec, +} + +impl Recorder { + /// runs the built recorder, on success returns the PID of launched process and the TempDir it + /// was recorded into + pub fn record(self) -> Result { + // reading and canonicalizing path to libprobe + let mut libprobe = fs::canonicalize(match std::env::var_os("__PROBE_LIB") { + Some(x) => PathBuf::from(x), + None => return Err(eyre!("couldn't find libprobe, are you using the wrapper?")), + }) + .wrap_err("unable to canonicalize libprobe path")?; + if self.debug || self.gdb { + log::debug!("Using debug version of libprobe"); + libprobe.push("libprobe-dbg.so"); + } else { + libprobe.push("libprobe.so"); + } + + // append any existing LD_PRELOAD overrides + let mut ld_preload = OsString::from(libprobe); + if let Some(x) = std::env::var_os("LD_PRELOAD") { + ld_preload.push(":"); + ld_preload.push(&x); + } + + let mut child = if self.gdb { + let mut dir_env = OsString::from("__PROBE_DIR="); + dir_env.push(self.output.path()); + let mut preload_env = OsString::from("LD_PRELOAD="); + preload_env.push(ld_preload); + + std::process::Command::new("gdb") + .arg("--args") + .arg("env") + .arg(dir_env) + .arg(preload_env) + .args(&self.cmd) + .env_remove("__PROBE_LIB") + .env_remove("__PROBE_LOG") + .spawn() + .wrap_err("Failed to launch gdb")? + } else { + std::process::Command::new(&self.cmd[0]) + .args(&self.cmd[1..]) + .env_remove("__PROBE_LIB") + .env_remove("__PROBE_LOG") + .env("__PROBE_DIR", self.output.path()) + .env("LD_PRELOAD", ld_preload) + .spawn() + .wrap_err("Failed to launch child process")? + }; + + thread::sleep(std::time::Duration::from_millis(50)); + + match Path::read_dir(self.output.path()) { + Ok(x) => { + let any_files = x + .into_iter() + .try_fold(false, |_, x| x.map(|x| x.path().exists()))?; + if !any_files { + log::warn!( + "No arean files detected, something is wrong, you should probably abort!" + ); + } + } + Err(e) => { + return Err(e) + .wrap_err("Unable to read record directory during post-startup sanity check") + } + } + + child.wait().wrap_err("Failed to await child process")?; + + Ok(self.output) + } + pub fn new(cmd: Vec, output: Dir) -> Self { + Self { + gdb: false, + debug: false, + + output, + cmd, + } + } + + pub fn gdb(mut self, gdb: bool) -> Self { + self.gdb = gdb; + self + } + + pub fn debug(mut self, debug: bool) -> Self { + self.debug = debug; + self + } +} diff --git a/probe_src/probe_frontend/cli/src/transcribe.rs b/probe_src/probe_frontend/cli/src/transcribe.rs new file mode 100644 index 00000000..799df9b1 --- /dev/null +++ b/probe_src/probe_frontend/cli/src/transcribe.rs @@ -0,0 +1,21 @@ +use std::{io::Write, path::Path}; + +use color_eyre::eyre::{Result, WrapErr}; + +use crate::util::Dir; + +pub fn transcribe, T: Write>( + record_dir: P, + tar: &mut tar::Builder, +) -> Result<()> { + let log_dir = Dir::temp(true).wrap_err("Failed to create temp directory for transcription")?; + + probe_frontend::transcribe::parse_top_level(record_dir, &log_dir) + .wrap_err("Failed to transcribe record directory")?; + + tar.append_dir_all(".", &log_dir) + .wrap_err("Failed to copy output dir into archive")?; + tar.finish().wrap_err("Failed to finish writing tarball")?; + + Ok(()) +} diff --git a/probe_src/probe_frontend/cli/src/util.rs b/probe_src/probe_frontend/cli/src/util.rs new file mode 100644 index 00000000..c8b09c57 --- /dev/null +++ b/probe_src/probe_frontend/cli/src/util.rs @@ -0,0 +1,74 @@ +use std::{ + fs, io, + path::{Path, PathBuf}, +}; + +use color_eyre::eyre::{Context, Result}; +use rand::Rng; + +#[derive(Debug)] +pub struct Dir { + path: PathBuf, + pub drop: bool, +} + +impl Dir { + #[inline] + pub fn new(path: PathBuf) -> Result { + fs::create_dir(&path).wrap_err("Failed to create named directory")?; + Ok(Self { path, drop: false }) + } + + pub fn temp(drop: bool) -> Result { + let mut path = std::env::temp_dir(); + path.push(format!("probe-{}", rand_alphanumeric(8))); + + match fs::create_dir(&path) { + Ok(_) => Ok(Self { path, drop }), + Err(e) => match e.kind() { + io::ErrorKind::AlreadyExists => Self::temp(drop), + _ => Err(e).wrap_err("Failed to create temp directory"), + }, + } + } + + #[inline] + pub fn path(&self) -> &Path { + self.path.as_path() + } +} + +impl AsRef for Dir { + fn as_ref(&self) -> &Path { + self.path.as_path() + } +} + +impl Drop for Dir { + fn drop(&mut self) { + if self.drop { + if let Err(e) = fs::remove_dir_all(&self.path) { + log::warn!( + "Failed to remove temporary directory '{}' because: {}", + self.path.to_string_lossy(), + e + ); + } + } + } +} + +fn rand_alphanumeric(len: usize) -> String { + const CHARSET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\ + abcdefghijklmnopqrstuvwxyz\ + 0123456789"; + + let mut rng = rand::thread_rng(); + + (0..len) + .map(|_| { + let idx = rng.gen_range(0..CHARSET.len()); + CHARSET[idx] as char + }) + .collect() +} diff --git a/probe_src/probe_frontend/configure b/probe_src/probe_frontend/configure new file mode 100755 index 00000000..699751ed --- /dev/null +++ b/probe_src/probe_frontend/configure @@ -0,0 +1,7 @@ +#!/bin/sh + +set -e +cd "$(dirname "$(realpath "$0")")" +mkdir -p ./lib/include +cp ../libprobe/include/prov_ops.h ./lib/include/prov_ops.h +git add ./lib/include diff --git a/probe_src/probe_frontend/deny.toml b/probe_src/probe_frontend/deny.toml new file mode 100644 index 00000000..b074f444 --- /dev/null +++ b/probe_src/probe_frontend/deny.toml @@ -0,0 +1,209 @@ +# The graph table configures how the dependency graph is constructed and thus +# which crates the checks are performed against +[graph] +# When creating the dependency graph used as the source of truth when checks are +# executed, this field can be used to prune crates from the graph, removing them +# from the view of cargo-deny. This is an extremely heavy hammer, as if a crate +# is pruned from the graph, all of its dependencies will also be pruned unless +# they are connected to another crate in the graph that hasn't been pruned, +# so it should be used with care. The identifiers are [Package ID Specifications] +# (https://doc.rust-lang.org/cargo/reference/pkgid-spec.html) +#exclude = [] +# If true, metadata will be collected with `--all-features`. Note that this can't +# be toggled off if true, if you want to conditionally enable `--all-features` it +# is recommended to pass `--all-features` on the cmd line instead +all-features = false +# If true, metadata will be collected with `--no-default-features`. The same +# caveat with `all-features` applies +no-default-features = false +# If set, these feature will be enabled when collecting metadata. If `--features` +# is specified on the cmd line they will take precedence over this option. +#features = [] + +# The output table provides options for how/if diagnostics are outputted +[output] +# When outputting inclusion graphs in diagnostics that include features, this +# option can be used to specify the depth at which feature edges will be added. +# This option is included since the graphs can be quite large and the addition +# of features from the crate(s) to all of the graph roots can be far too verbose. +# This option can be overridden via `--feature-depth` on the cmd line +feature-depth = 1 + +# This section is considered when running `cargo deny check advisories` +# More documentation for the advisories section can be found here: +# https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html +[advisories] +# The path where the advisory databases are cloned/fetched into +#db-path = "$CARGO_HOME/advisory-dbs" +# The url(s) of the advisory databases to use +#db-urls = ["https://github.com/rustsec/advisory-db"] +# A list of advisory IDs to ignore. Note that ignored advisories will still +# output a note when they are encountered. +ignore = [ + #"RUSTSEC-0000-0000", + #{ id = "RUSTSEC-0000-0000", reason = "you can specify a reason the advisory is ignored" }, + #"a-crate-that-is-yanked@0.1.1", # you can also ignore yanked crate versions if you wish + #{ crate = "a-crate-that-is-yanked@0.1.1", reason = "you can specify why you are ignoring the yanked crate" }, +] +# If this is true, then cargo deny will use the git executable to fetch advisory database. +# If this is false, then it uses a built-in git library. +# Setting this to true can be helpful if you have special authentication requirements that cargo-deny does not support. +# See Git Authentication for more information about setting up git authentication. +#git-fetch-with-cli = true + +# This section is considered when running `cargo deny check licenses` +# More documentation for the licenses section can be found here: +# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html +[licenses] +# List of explicitly allowed licenses +# See https://spdx.org/licenses/ for list of possible licenses +# [possible values: any SPDX 3.11 short identifier (+ optional exception)]. +allow = [ + #OSI approved FOSS licenses (will expand as needed) + "Apache-2.0 WITH LLVM-exception", + "Apache-2.0", + "BSD-3-Clause", + "ISC", + "MIT", + "Unicode-DFS-2016", +] +# The confidence threshold for detecting a license from license text. +# The higher the value, the more closely the license text must be to the +# canonical license text of a valid SPDX license file. +# [possible values: any between 0.0 and 1.0]. +confidence-threshold = 0.8 +# Allow 1 or more licenses on a per-crate basis, so that particular licenses +# aren't accepted for every possible crate as with the normal allow list +exceptions = [ + # Each entry is the crate and version constraint, and its specific allow + # list + #{ allow = ["Zlib"], crate = "adler32" }, +] + +# Some crates don't have (easily) machine readable licensing information, +# adding a clarification entry for it allows you to manually specify the +# licensing information +#[[licenses.clarify]] +# The package spec the clarification applies to +#crate = "ring" +# The SPDX expression for the license requirements of the crate +#expression = "MIT AND ISC AND OpenSSL" +# One or more files in the crate's source used as the "source of truth" for +# the license expression. If the contents match, the clarification will be used +# when running the license check, otherwise the clarification will be ignored +# and the crate will be checked normally, which may produce warnings or errors +# depending on the rest of your configuration +#license-files = [ +# Each entry is a crate relative path, and the (opaque) hash of its contents +#{ path = "LICENSE", hash = 0xbd0eed23 } +#] + +[licenses.private] +# If true, ignores workspace crates that aren't published, or are only +# published to private registries. +# To see how to mark a crate as unpublished (to the official registry), +# visit https://doc.rust-lang.org/cargo/reference/manifest.html#the-publish-field. +ignore = true +# One or more private registries that you might publish crates to, if a crate +# is only published to private registries, and ignore is true, the crate will +# not have its license(s) checked +registries = [ + #"https://sekretz.com/registry +] + +# This section is considered when running `cargo deny check bans`. +# More documentation about the 'bans' section can be found here: +# https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html +[bans] +# Lint level for when multiple versions of the same crate are detected +multiple-versions = "warn" +# Lint level for when a crate version requirement is `*` +wildcards = "allow" +# The graph highlighting used when creating dotgraphs for crates +# with multiple versions +# * lowest-version - The path to the lowest versioned duplicate is highlighted +# * simplest-path - The path to the version with the fewest edges is highlighted +# * all - Both lowest-version and simplest-path are used +highlight = "all" +# The default lint level for `default` features for crates that are members of +# the workspace that is being checked. This can be overridden by allowing/denying +# `default` on a crate-by-crate basis if desired. +workspace-default-features = "allow" +# The default lint level for `default` features for external crates that are not +# members of the workspace. This can be overridden by allowing/denying `default` +# on a crate-by-crate basis if desired. +external-default-features = "allow" +# List of crates that are allowed. Use with care! +allow = [ + #"ansi_term@0.11.0", + #{ crate = "ansi_term@0.11.0", reason = "you can specify a reason it is allowed" }, +] +# List of crates to deny +deny = [ + #"ansi_term@0.11.0", + #{ crate = "ansi_term@0.11.0", reason = "you can specify a reason it is banned" }, + # Wrapper crates can optionally be specified to allow the crate when it + # is a direct dependency of the otherwise banned crate + #{ crate = "ansi_term@0.11.0", wrappers = ["this-crate-directly-depends-on-ansi_term"] }, +] + +# List of features to allow/deny +# Each entry the name of a crate and a version range. If version is +# not specified, all versions will be matched. +#[[bans.features]] +#crate = "reqwest" +# Features to not allow +#deny = ["json"] +# Features to allow +#allow = [ +# "rustls", +# "__rustls", +# "__tls", +# "hyper-rustls", +# "rustls", +# "rustls-pemfile", +# "rustls-tls-webpki-roots", +# "tokio-rustls", +# "webpki-roots", +#] +# If true, the allowed features must exactly match the enabled feature set. If +# this is set there is no point setting `deny` +#exact = true + +# Certain crates/versions that will be skipped when doing duplicate detection. +skip = [ + #"ansi_term@0.11.0", + #{ crate = "ansi_term@0.11.0", reason = "you can specify a reason why it can't be updated/removed" }, +] +# Similarly to `skip` allows you to skip certain crates during duplicate +# detection. Unlike skip, it also includes the entire tree of transitive +# dependencies starting at the specified crate, up to a certain depth, which is +# by default infinite. +skip-tree = [ + #"ansi_term@0.11.0", # will be skipped along with _all_ of its direct and transitive dependencies + #{ crate = "ansi_term@0.11.0", depth = 20 }, +] + +# This section is considered when running `cargo deny check sources`. +# More documentation about the 'sources' section can be found here: +# https://embarkstudios.github.io/cargo-deny/checks/sources/cfg.html +[sources] +# Lint level for what to happen when a crate from a crate registry that is not +# in the allow list is encountered +unknown-registry = "warn" +# Lint level for what to happen when a crate from a git repository that is not +# in the allow list is encountered +unknown-git = "warn" +# List of URLs for allowed crate registries. Defaults to the crates.io index +# if not specified. If it is specified but empty, no registries are allowed. +allow-registry = ["https://github.com/rust-lang/crates.io-index"] +# List of URLs for allowed Git repositories +allow-git = [] + +[sources.allow-org] +# 1 or more github.com organizations to allow git sources for +github = [""] +# 1 or more gitlab.com organizations to allow git sources for +gitlab = [""] +# 1 or more bitbucket.org organizations to allow git sources for +bitbucket = [""] diff --git a/probe_src/probe_frontend/flake.lock b/probe_src/probe_frontend/flake.lock index 7934bbe5..fd766282 100644 --- a/probe_src/probe_frontend/flake.lock +++ b/probe_src/probe_frontend/flake.lock @@ -1,5 +1,21 @@ { "nodes": { + "advisory-db": { + "flake": false, + "locked": { + "lastModified": 1719411196, + "narHash": "sha256-EdryZFXPjkK2F2J1re/bOl2oezKAB7dpFNi9mLUygmI=", + "owner": "rustsec", + "repo": "advisory-db", + "rev": "34f191da603f67b491a2e12af0b93c9c794ae1d1", + "type": "github" + }, + "original": { + "owner": "rustsec", + "repo": "advisory-db", + "type": "github" + } + }, "crane": { "inputs": { "nixpkgs": [ @@ -7,11 +23,11 @@ ] }, "locked": { - "lastModified": 1718078026, - "narHash": "sha256-LbQabH6h86ZzTvDnaZHmMwedRZNB2jYtUQzmoqWQoJ8=", + "lastModified": 1719249093, + "narHash": "sha256-0q1haa3sw6GbmJ+WhogMnducZGjEaCa/iR6hF2vq80I=", "owner": "ipetkov", "repo": "crane", - "rev": "a3f0c63eed74a516298932b9b1627dd80b9c3892", + "rev": "9791c77eb7e98b8d8ac5b0305d47282f994411ca", "type": "github" }, "original": { @@ -40,11 +56,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1718276985, - "narHash": "sha256-u1fA0DYQYdeG+5kDm1bOoGcHtX0rtC7qs2YA2N1X++I=", + "lastModified": 1719379843, + "narHash": "sha256-u+D+IOAMMl70+CJ9NKB+RMrASjInuIWMHzjLWQjPZ6c=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "3f84a279f1a6290ce154c5531378acc827836fbb", + "rev": "b3f3c1b13fb08f3828442ee86630362e81136bbc", "type": "github" }, "original": { @@ -56,6 +72,7 @@ }, "root": { "inputs": { + "advisory-db": "advisory-db", "crane": "crane", "flake-utils": "flake-utils", "nixpkgs": "nixpkgs" diff --git a/probe_src/probe_frontend/flake.nix b/probe_src/probe_frontend/flake.nix index 01088355..e4f45e38 100644 --- a/probe_src/probe_frontend/flake.nix +++ b/probe_src/probe_frontend/flake.nix @@ -1,44 +1,157 @@ { + description = "libprobe frontend"; + inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable"; - crane.url = "github:ipetkov/crane"; - crane.inputs.nixpkgs.follows = "nixpkgs"; + + crane = { + url = "github:ipetkov/crane"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + flake-utils.url = "github:numtide/flake-utils"; + + advisory-db = { + url = "github:rustsec/advisory-db"; + flake = false; + }; }; + # FIXME: currently all the different crates get their dependencies grouped + # together, this means you can't build even the pure-rust crates without + # python, I'd like to figure out how to avoid this; a rust-bindgen crate + # and a PyO3 crate is really pushing what crane was designed to do (but the + # other options are worse). outputs = { self, nixpkgs, crane, flake-utils, + advisory-db, ... }: flake-utils.lib.eachDefaultSystem (system: let pkgs = nixpkgs.legacyPackages.${system}; + # inherit (pkgs) lib; + craneLib = crane.mkLib pkgs; + src = ./.; - crate = craneLib.buildPackage { - src = ./.; + # Common arguments can be set here to avoid repeating them later + commonArgs = { + inherit src; + strictDeps = true; - # Add extra inputs here or any other derivation settings - doCheck = true; - # buildInputs = []; + # all the crates in this workspace either use rust-bindgen or depend + # on local crate that does. nativeBuildInputs = [ pkgs.rustPlatform.bindgenHook ]; }; + + # Build *just* the cargo dependencies (of the entire workspace), + # so we can reuse all of that work (e.g. via cachix) when running in CI + # It is *highly* recommended to use something like cargo-hakari to avoid + # cache misses when building individual top-level-crates + cargoArtifacts = craneLib.buildDepsOnly commonArgs; + + individualCrateArgs = + commonArgs + // { + inherit cargoArtifacts; + inherit (craneLib.crateNameFromCargoToml {inherit src;}) version; + # NB: we disable tests since we'll run them all via cargo-nextest + doCheck = false; + }; + + # Build the top-level crates of the workspace as individual derivations. + # This allows consumers to only depend on (and build) only what they need. + # Though it is possible to build the entire workspace as a single derivation, + # so this is left up to you on how to organize things + probe-frontend = craneLib.buildPackage (individualCrateArgs + // { + pname = "probe-frontend"; + cargoExtraArgs = "-p probe_frontend"; + }); + probe-cli = craneLib.buildPackage (individualCrateArgs + // { + pname = "probe-cli"; + cargoExtraArgs = "-p probe_cli"; + }); + probe-macros = craneLib.buildPackage (individualCrateArgs + // { + pname = "probe-macros"; + cargoExtraArgs = "-p probe_macros"; + installPhase = '' + cp -r python/ $out + ''; + }); in { - packages.default = crate; checks = { - inherit crate; + # Build the crates as part of `nix flake check` for convenience + inherit probe-frontend probe-cli probe-macros; + + # Run clippy (and deny all warnings) on the workspace source, + # again, reusing the dependency artifacts from above. + # + # Note that this is done as a separate derivation so that + # we can block the CI if there are issues here, but not + # prevent downstream consumers from building our crate by itself. + probe-workspace-clippy = craneLib.cargoClippy (commonArgs + // { + inherit cargoArtifacts; + cargoClippyExtraArgs = "--all-targets -- --deny warnings"; + }); + + probe-workspace-doc = craneLib.cargoDoc (commonArgs + // { + inherit cargoArtifacts; + }); + + # Check formatting + probe-workspace-fmt = craneLib.cargoFmt { + inherit src; + }; + + # Audit dependencies + probe-workspace-audit = craneLib.cargoAudit { + inherit src advisory-db; + }; + + # Audit licenses + probe-workspace-deny = craneLib.cargoDeny { + inherit src; + }; + + # Run tests with cargo-nextest + # this is why `doCheck = false` on other crate derivations, to not run + # the tests twice. + workspace-nextest = craneLib.cargoNextest (commonArgs + // { + inherit cargoArtifacts; + partitions = 1; + partitionType = "count"; + }); }; + + packages = { + inherit probe-cli probe-frontend probe-macros; + }; + devShells.default = craneLib.devShell { + # Inherit inputs from checks. checks = self.checks.${system}; - packages = with pkgs; [ - cargo-audit - cargo-flamegraph - cargo-watch - rust-analyzer + + shellHook = '' + export __PROBE_LIB=$(realpath ../libprobe/build) + ''; + + packages = [ + pkgs.cargo-audit + pkgs.cargo-expand + pkgs.cargo-flamegraph + pkgs.cargo-watch + pkgs.rust-analyzer ]; }; }); diff --git a/probe_src/probe_frontend/lib/Cargo.toml b/probe_src/probe_frontend/lib/Cargo.toml new file mode 100644 index 00000000..d683b29e --- /dev/null +++ b/probe_src/probe_frontend/lib/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "probe_frontend" +version.workspace = true +authors.workspace = true +publish.workspace = true +edition.workspace = true + + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[lib] +name = "probe_frontend" +path = "src/lib.rs" + +[dependencies] +libc = "0.2.155" +log = "0.4.21" +machine-info = "1.0.9" +probe_macros = { path = "../macros" } +rayon = "1.10.0" +serde = { version = "1.0.203", features = ["serde_derive"] } +serde_json = "1.0.118" +thiserror = "1.0.61" + +[build-dependencies] +bindgen = "0.69.4" + +[lints] +workspace = true diff --git a/probe_src/probe_frontend/build.rs b/probe_src/probe_frontend/lib/build.rs similarity index 57% rename from probe_src/probe_frontend/build.rs rename to probe_src/probe_frontend/lib/build.rs index 33f14227..9123b297 100644 --- a/probe_src/probe_frontend/build.rs +++ b/probe_src/probe_frontend/lib/build.rs @@ -6,40 +6,86 @@ use std::sync::OnceLock; use bindgen::callbacks::ParseCallbacks; #[derive(Debug)] -struct SerdeDeriveCallback; +struct LibprobeCallback; -fn derive_list(name: &str) -> bool { - static DERIVE_LIST: OnceLock> = OnceLock::new(); - DERIVE_LIST - .get_or_init(|| { - HashSet::from([ - "CloneOp", - "CloseOp", - "ExitOp", - "GetRUsageOp", - "InitProcessOp", - "InitThreadOp", - "MetadataValue__bindgen_ty_1", - "MetadataValue__bindgen_ty_2", - "WaitOp", - "rusage", - "statx", - "statx_timestamp", - "timespec", - "timeval", - ]) - }) - .contains(name) +/// These C-structs get prefixed with "Bindgen_" because a rust version of the struct will be +/// either generated or manually implemented. +fn should_prefix(name: &str) -> bool { + static LIST: OnceLock> = OnceLock::new(); + LIST.get_or_init(|| { + HashSet::from([ + "Path", + "InitProcessOp", + "InitExecEpochOp", + "InitThreadOp", + "OpenOp", + "CloseOp", + "ChdirOp", + "ExecOp", + "CloneOp", + "ExitOp", + "AccessOp", + "StatOp", + "ReaddirOp", + "WaitOp", + "GetRUsageOp", + "MetadataKind", + "MetadataValue", + "UpdateMetadataOp", + "ReadLinkOp", + "OpCode", + "Op", + "statx", + "rusage", + "statx_timestamp", + "timespec", + "timeval", + ]) + }) + .contains(name) } -impl ParseCallbacks for SerdeDeriveCallback { - fn add_derives(&self, info: &bindgen::callbacks::DeriveInfo<'_>) -> Vec { - if derive_list(info.name) { - vec!["Serialize".to_owned(), "Deserialize".to_owned()] +/// These structs are parts of tagged unions and so the rust versions of the structs can't (yet) be +/// autogenerated and have to be implemented manually +fn no_derive(name: &str) -> bool { + static LIST: OnceLock> = OnceLock::new(); + LIST.get_or_init(|| { + HashSet::from([ + "MetadataKind", + "MetadataValue", + "UpdateMetadataOp", + "OpCode", + "Op", + ]) + }) + .contains(name) +} + +impl ParseCallbacks for LibprobeCallback { + fn item_name(&self, _original_item_name: &str) -> Option { + if should_prefix(_original_item_name) { + Some(format!("Bindgen_{}", _original_item_name)) } else { - vec![] + None } } + + fn add_derives(&self, info: &bindgen::callbacks::DeriveInfo<'_>) -> Vec { + let mut ret = vec![]; + + match info.kind { + bindgen::callbacks::TypeKind::Struct => { + let orig_name = info.name.strip_prefix("Bindgen_"); + if orig_name.is_some() && !no_derive(orig_name.unwrap()) { + ret.push("MakeRustOp".to_owned()); + } + } + bindgen::callbacks::TypeKind::Enum => (), + bindgen::callbacks::TypeKind::Union => (), + }; + + ret + } } fn main() { @@ -91,7 +137,7 @@ fn main() { #define BORROWED #define OWNED - ", + ", ) // The input header we would like to generate // bindings for. @@ -104,7 +150,7 @@ fn main() { // Tell cargo to invalidate the built crate whenever any of the // included header files changed. .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) - .parse_callbacks(Box::new(SerdeDeriveCallback {})) + .parse_callbacks(Box::new(LibprobeCallback {})) // Finish the builder and generate the bindings. .generate() // Unwrap the Result and panic on failure. diff --git a/probe_src/probe_frontend/lib/src/error.rs b/probe_src/probe_frontend/lib/src/error.rs new file mode 100644 index 00000000..380a1d07 --- /dev/null +++ b/probe_src/probe_frontend/lib/src/error.rs @@ -0,0 +1,107 @@ +use std::num::ParseIntError; + +pub type Result = std::result::Result; + +#[non_exhaustive] +#[derive(Debug, thiserror::Error)] +pub enum ProbeError { + #[error("{msg}: {inner}")] + FFiConversionError { + msg: &'static str, + inner: Box, + }, + + #[error("Invalid variant of tagged union")] + InvalidVariant(u32), + + #[error("Unable to decode pointer {0:#x}")] + InvalidPointer(usize), + + #[error("Expected null byte but none found")] + MissingNull, + + #[error("Reached code believed unreachable, please report this bug")] + UnreachableCode, + + #[error("(de)serialization error ({context}):\n{error}")] + JsonError { + context: &'static str, + error: serde_json::Error, + }, + + #[error("{context}:\n{error}")] + Context { + context: &'static str, + error: Box, + }, + + #[error("{context}:\n{error}")] + ContextIO { + context: &'static str, + error: std::io::Error, + }, + + #[error("{context}:\nNeeded Option was None")] + MissingOption { + context: &'static str, + }, + + #[error("{0}")] + ArenaError(crate::transcribe::ArenaError), + + #[error("{0}")] + ParseIntError(ParseIntError), +} + +impl From for ProbeError { + fn from(value: crate::transcribe::ArenaError) -> Self { + Self::ArenaError(value) + } +} + +impl From for ProbeError { + fn from(value: ParseIntError) -> Self { + Self::ParseIntError(value) + } +} + +/// create new [`ProbeError::MissingOption`] with the given context +pub fn option_err(context: &'static str) -> ProbeError { + ProbeError::MissingOption { context } +} + +pub(crate) trait WrapErr { + fn wrap_err(self, context: &'static str) -> Result; +} + +impl WrapErr for std::result::Result { + fn wrap_err(self, context: &'static str) -> Result { + match self { + Ok(x) => Ok(x), + Err(e) => Err(e.convert(context)), + } + } +} + +pub(crate) trait ConvertErr { + fn convert(self, context: &'static str) -> ProbeError; +} + +impl ConvertErr for std::io::Error { + fn convert(self, context: &'static str) -> ProbeError { + ProbeError::ContextIO { context, error: self } + } +} + +impl ConvertErr for ProbeError { + fn convert(self, context: &'static str) -> ProbeError { + ProbeError::Context { context, error: Box::new(self) } + } +} + +impl ConvertErr for serde_json::Error { + fn convert(self, context: &'static str) -> ProbeError { + ProbeError::JsonError { context, error: self } + } +} + diff --git a/probe_src/probe_frontend/lib/src/ffi.rs b/probe_src/probe_frontend/lib/src/ffi.rs new file mode 100644 index 00000000..4016c4bf --- /dev/null +++ b/probe_src/probe_frontend/lib/src/ffi.rs @@ -0,0 +1,251 @@ +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +use crate::transcribe::ArenaContext; +use color_eyre::eyre::{Result, WrapErr, eyre}; +use probe_macros::MakeRustOp; +use pyo3::pyclass; +use serde::{Deserialize, Serialize}; + +/// Specialized version of [`std::convert::From`] for working with libprobe arena structs. +/// +/// Since [`ffi`] structs from arena allocator files have intrinsically invalid pointers (because +/// they came from a different virtual memory space). This trait and It's sibling [`FfiInto`] +/// exist to act as [`From`] and [`Into`] with an added parameter of a [`ArenaContext`] that can be +/// used to decode pointers. +pub(crate) trait FfiFrom { + fn ffi_from(value: &T, ctx: &ArenaContext) -> Result + where + Self: Sized; +} + +/// Specialized version of [`std::convert::Into`] for working with libprobe arena structs. +/// +/// Much like [`std::convert::Into`] this trait is implemented automatically with a blanket +/// implementation as the reciprocal of [`FfiFrom`]. +pub(crate) trait FfiInto { + fn ffi_into(&self, ctx: &ArenaContext) -> Result; +} + +impl FfiFrom for T { + fn ffi_from(value: &T, _ctx: &ArenaContext) -> Result { + Ok(*value) + } +} + +impl FfiInto for T +where + U: FfiFrom, +{ + #[inline] + fn ffi_into(&self, ctx: &ArenaContext) -> Result { + U::ffi_from(self, ctx) + } +} + +impl FfiFrom<*const i8> for std::ffi::CString { + fn ffi_from(value: &*const i8, ctx: &ArenaContext) -> Result { + let str = *value; + if str.is_null() { + std::ffi::CString::new("").wrap_err("Failed to create empty CString") + } else { + match ctx.try_get_slice(str as usize) { + Some(x) => Ok(std::ffi::CStr::from_bytes_until_nul(x) + .wrap_err("Failed to create CString")? + .to_owned()), + None => Err(eyre!("Unable to lookup pointer {0:#x}", (str as usize))), + } + } + } +} + +impl FfiFrom<*mut i8> for std::ffi::CString { + fn ffi_from(value: &*mut i8, ctx: &ArenaContext) -> Result { + let str = *value; + if str.is_null() { + std::ffi::CString::new("").wrap_err("Failed to create empty CString") + } else { + match ctx.try_get_slice(str as usize) { + Some(x) => Ok(std::ffi::CStr::from_bytes_until_nul(x) + .wrap_err("Failed to create CString")? + .to_owned()), + None => Err(eyre!("Unable to lookup pointer {0:#x}", (str as usize))), + } + } + } +} + +// Bindings are generated by `../build.sh` and the MakeRustOp proc-macro +include!(concat!(env!("OUT_DIR"), "/bindings.rs")); + + +// NOTE: the raw versions of these Ops are tagged unions, so currently they have to be manually +// implemented, this is somewhat confusing since they extensively use types and trait +// implementations that are auto-generated. + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum OpInternal { + InitProcess(InitProcessOp), + InitExecEpoch(InitExecEpochOp), + InitThread(InitThreadOp), + Open(OpenOp), + Close(CloseOp), + Chdir(ChdirOp), + Exec(ExecOp), + Clone(CloneOp), + Exit(ExitOp), + Access(AccessOp), + Stat(StatOp), + Readdir(ReaddirOp), + Wait(WaitOp), + GetRUsage(GetRUsageOp), + UpdateMetadata(UpdateMetadataOp), + ReadLink(ReadLinkOp), +} + +impl FfiFrom for OpInternal { + fn ffi_from(value: &Bindgen_Op, ctx: &ArenaContext) -> Result { + let kind = value.op_code; + let value = value.data; + + log::debug!("[unsafe] decoding Op tagged union [ OpCode={} ]", kind); + Ok(match kind { + Bindgen_OpCode_init_process_op_code => { + Self::InitProcess(unsafe { value.init_process_epoch }.ffi_into(ctx)?) + } + Bindgen_OpCode_init_exec_epoch_op_code => Self::InitExecEpoch( + unsafe { value.init_exec_epoch } + .ffi_into(ctx) + .wrap_err("Unable to decode InitExecEpochOp")?, + ), + Bindgen_OpCode_init_thread_op_code => Self::InitThread(unsafe { value.init_thread }.ffi_into(ctx)?), + Bindgen_OpCode_open_op_code => Self::Open( + unsafe { value.open } + .ffi_into(ctx) + .wrap_err("Unable to decode OpenOp")?, + ), + Bindgen_OpCode_close_op_code => Self::Close(unsafe { value.close }.ffi_into(ctx)?), + Bindgen_OpCode_chdir_op_code => Self::Chdir( + unsafe { value.chdir } + .ffi_into(ctx) + .wrap_err("Unable to decode ChdirOp")?, + ), + Bindgen_OpCode_exec_op_code => Self::Exec( + unsafe { value.exec } + .ffi_into(ctx) + .wrap_err("Unable to decode ExecOp")?, + ), + Bindgen_OpCode_clone_op_code => Self::Clone(unsafe { value.clone }.ffi_into(ctx)?), + Bindgen_OpCode_exit_op_code => Self::Exit(unsafe { value.exit }.ffi_into(ctx)?), + Bindgen_OpCode_access_op_code => Self::Access( + unsafe { value.access } + .ffi_into(ctx) + .wrap_err("Unable to decode AccessOp")?, + ), + Bindgen_OpCode_stat_op_code => Self::Stat( + unsafe { value.stat } + .ffi_into(ctx) + .wrap_err("Unable to decode StatOp")?, + ), + Bindgen_OpCode_readdir_op_code => Self::Readdir( + unsafe { value.readdir } + .ffi_into(ctx) + .wrap_err("Unable to decode ReaddirOp")?, + ), + Bindgen_OpCode_wait_op_code => Self::Wait(unsafe { value.wait }.ffi_into(ctx)?), + Bindgen_OpCode_getrusage_op_code => Self::GetRUsage(unsafe { value.getrusage }.ffi_into(ctx)?), + Bindgen_OpCode_update_metadata_op_code => Self::UpdateMetadata( + unsafe { value.update_metadata } + .ffi_into(ctx) + .wrap_err("Unable to decode UpdateMetadataOp")?, + ), + Bindgen_OpCode_read_link_op_code => Self::ReadLink( + unsafe { value.read_link } + .ffi_into(ctx) + .wrap_err("Unable to decode ReadlinkOp")?, + ), + _ => { + if kind < Bindgen_OpCode_LAST_OP_CODE && kind > Bindgen_OpCode_FIRST_OP_CODE { + return Err(eyre!( + "Valid OpCode not handled (this is a bug, please report it)" + )); + } else { + return Err(eyre!("Invalid OpCode")); + } + } + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Op { + pub data: OpInternal, + pub time: timespec, +} + +impl FfiFrom for Op { + fn ffi_from(value: &Bindgen_Op, ctx: &ArenaContext) -> Result { + Ok(Self { + data: value + .ffi_into(ctx) + .wrap_err("Unable to decode OpInternal")?, + time: value.time.ffi_into(ctx)?, + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum Metadata { + Mode(mode_t), + Ownership { + uid: uid_t, + gid: gid_t, + }, + Times { + is_null: bool, + atime: timeval, + mtime: timeval, + }, +} + +impl FfiFrom for Metadata { + fn ffi_from(value: &Bindgen_UpdateMetadataOp, ctx: &ArenaContext) -> Result { + let kind = value.kind; + let value = value.value; + + log::debug!("[unsafe] decoding Metadata tagged union"); + Ok(match kind { + Bindgen_MetadataKind_MetadataMode => Metadata::Mode(unsafe { value.mode }), + Bindgen_MetadataKind_MetadataOwnership => Metadata::Ownership { + uid: unsafe { value.ownership }.uid, + gid: unsafe { value.ownership }.gid, + }, + Bindgen_MetadataKind_MetadataTimes => Metadata::Times { + is_null: unsafe { value.times }.is_null, + atime: unsafe { value.times }.atime.ffi_into(ctx)?, + mtime: unsafe { value.times }.mtime.ffi_into(ctx)?, + }, + _ => return Err(eyre!("Invalid MetadataKind Variant")), + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct UpdateMetadataOp { + pub path: Path, + pub flags: ::std::os::raw::c_int, + pub metadata: Metadata, + pub ferrno: ::std::os::raw::c_int, +} + +impl FfiFrom for UpdateMetadataOp { + fn ffi_from(value: &Bindgen_UpdateMetadataOp, ctx: &ArenaContext) -> Result { + Ok(Self { + path: value.path.ffi_into(ctx)?, + flags: value.flags, + metadata: value.ffi_into(ctx).wrap_err("Unable to decode Metadata")?, + ferrno: value.ferrno, + }) + } +} diff --git a/probe_src/probe_frontend/lib/src/lib.rs b/probe_src/probe_frontend/lib/src/lib.rs new file mode 100644 index 00000000..4ec768fe --- /dev/null +++ b/probe_src/probe_frontend/lib/src/lib.rs @@ -0,0 +1,32 @@ + +/// Op definitions +/// +/// While simple Ops containing only Integral values can be used/serialized directory from +/// libprobe, more complicated structs containing pointers (usually in the form of strings) need to +/// be manually converted to versions so they can be serialized. This module re-exports the trivial +/// structs and defines new ones (as well as methods for converting) for the non-trivial structs. +/// +/// Raw ffi bindings for the raw C-structs emitted by libprobe, generated automatically with +/// rust-bindgen (these start with `Bindgen_`. +/// +/// If you're trying to make sense of this it's going to be much easier if you have `prov_ops.h` +/// open as well. +pub mod ops; + +/// Transcribe raw Bindgen Ops from libprobe to usable, serializable data. +/// +/// # Serialization format +/// +/// The serialization format output is very similar to the raw libprobe arena format. It's a +/// filesystem hierarchy of `//` but instead of `` being a directory containing +/// `ops` and `data` directories with the raw C-struct arenas, `` is a +/// [jsonlines](https://jsonlines.org/) file, where each line is a json representation of an +/// [`ops::Op`]. +pub mod transcribe; + +// currently unused, get system metadata +mod metadata; + +/// Library error type and definitions. +pub mod error; + diff --git a/probe_src/probe_frontend/src/metadata.rs b/probe_src/probe_frontend/lib/src/metadata.rs similarity index 88% rename from probe_src/probe_frontend/src/metadata.rs rename to probe_src/probe_frontend/lib/src/metadata.rs index 60dc20d7..9a9d6617 100644 --- a/probe_src/probe_frontend/src/metadata.rs +++ b/probe_src/probe_frontend/lib/src/metadata.rs @@ -2,13 +2,13 @@ use machine_info::{Machine, SystemInfo}; use serde::{Deserialize, Serialize}; #[derive(Debug, Serialize, Deserialize)] -pub struct Metadata { +pub struct SystemMetadata { entry_pid: libc::pid_t, arch: &'static str, system: SystemInfo, } -impl Metadata { +impl SystemMetadata { pub fn new(pid: libc::pid_t) -> Self { Self { entry_pid: pid, diff --git a/probe_src/probe_frontend/lib/src/ops.rs b/probe_src/probe_frontend/lib/src/ops.rs new file mode 100644 index 00000000..c9aa9da7 --- /dev/null +++ b/probe_src/probe_frontend/lib/src/ops.rs @@ -0,0 +1,225 @@ +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] +#![allow(unsafe_op_in_unsafe_fn)] // <- PyO3 breaks without this + +use crate::error::{ProbeError, Result}; +use crate::transcribe::ArenaContext; +use probe_macros::{MakePyDataclass, MakeRustOp}; +use serde::{Deserialize, Serialize}; +use std::ffi::CString; + +/// Specialized version of [`std::convert::From`] for working with libprobe arena structs. +/// +/// Since [`ffi`] structs from arena allocator files have intrinsically invalid pointers (because +/// they came from a different virtual memory space). This trait and It's sibling [`FfiInto`] +/// exist to act as [`From`] and [`Into`] with an added parameter of a [`ArenaContext`] that can be +/// used to decode pointers. +pub(crate) trait FfiFrom { + fn ffi_from(value: &T, ctx: &ArenaContext) -> Result + where + Self: Sized; +} + +/// Specialized version of [`std::convert::Into`] for working with libprobe arena structs. +/// +/// Much like [`std::convert::Into`] this trait is implemented automatically with a blanket +/// implementation as the reciprocal of [`FfiFrom`]. +pub(crate) trait FfiInto { + fn ffi_into(&self, ctx: &ArenaContext) -> Result; +} + +impl FfiInto for T +where + U: FfiFrom, +{ + #[inline] + fn ffi_into(&self, ctx: &ArenaContext) -> Result { + U::ffi_from(self, ctx) + } +} + +// these are the three base implementations of FFiFrom; each generated Op implements FFiFrom by +// calling ffi_into(ctx) on each of it's fields, each fields *must* be either: +// - Another generated struct +// - A Copy-able value +// - An i8 pointer, which maps to the C *char and are converted to CStrings +impl FfiFrom for T { + #[inline] + fn ffi_from(value: &T, _: &ArenaContext) -> Result { + Ok(*value) + } +} +impl FfiFrom<*const i8> for CString { + #[inline] + fn ffi_from(value: &*const i8, ctx: &ArenaContext) -> Result { + try_cstring(*value, ctx) + } +} +impl FfiFrom<*mut i8> for CString { + #[inline] + fn ffi_from(value: &*mut i8, ctx: &ArenaContext) -> Result { + try_cstring(*value, ctx) + } +} + +fn try_cstring(str: *const i8, ctx: &ArenaContext) -> Result { + if str.is_null() { + std::ffi::CString::new("").map_err(|_| ProbeError::MissingNull) + } else { + match ctx.try_get_slice(str as usize) { + Some(x) => Ok(std::ffi::CStr::from_bytes_until_nul(x) + .map_err(|_| ProbeError::MissingNull)? + .to_owned()), + None => Err(ProbeError::InvalidPointer(str as usize)), + } + } +} + +// Bindings are generated by `../build.sh` and the MakeRustOp proc-macro +include!(concat!(env!("OUT_DIR"), "/bindings.rs")); + +// NOTE: the raw versions of these Ops are tagged unions, so currently they have to be manually +// implemented, this is somewhat confusing since they extensively use types and trait +// implementations that are auto-generated. + +#[derive(Debug, Clone, Serialize, Deserialize, MakePyDataclass)] +pub enum Metadata { + Mode { + mode: mode_t, + }, + Ownership { + uid: uid_t, + gid: gid_t, + }, + Times { + is_null: bool, + atime: timeval, + mtime: timeval, + }, +} + +impl FfiFrom for Metadata { + fn ffi_from(value: &Bindgen_UpdateMetadataOp, ctx: &ArenaContext) -> Result { + let kind = value.kind; + let value = value.value; + + log::debug!("[unsafe] decoding Metadata tagged union"); + Ok(match kind { + Bindgen_MetadataKind_MetadataMode => Metadata::Mode { + mode: unsafe { value.mode }, + }, + Bindgen_MetadataKind_MetadataOwnership => Metadata::Ownership { + uid: unsafe { value.ownership }.uid, + gid: unsafe { value.ownership }.gid, + }, + Bindgen_MetadataKind_MetadataTimes => Metadata::Times { + is_null: unsafe { value.times }.is_null, + atime: unsafe { value.times }.atime.ffi_into(ctx)?, + mtime: unsafe { value.times }.mtime.ffi_into(ctx)?, + }, + _ => return Err(ProbeError::InvalidVariant(kind)), + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, MakePyDataclass)] +pub struct UpdateMetadataOp { + pub path: Path, + pub flags: ::std::os::raw::c_int, + pub metadata: Metadata, + pub ferrno: ::std::os::raw::c_int, +} + +impl FfiFrom for UpdateMetadataOp { + fn ffi_from(value: &Bindgen_UpdateMetadataOp, ctx: &ArenaContext) -> Result { + Ok(Self { + path: value.path.ffi_into(ctx)?, + flags: value.flags, + metadata: value + .ffi_into(ctx) + .map_err(|e| ProbeError::FFiConversionError { + msg: "Unable to decode Metadata", + inner: Box::new(e), + })?, + ferrno: value.ferrno, + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, MakePyDataclass)] +pub enum OpInternal { + InitProcessOp(InitProcessOp), + InitExecEpochOp(InitExecEpochOp), + InitThreadOp(InitThreadOp), + OpenOp(OpenOp), + CloseOp(CloseOp), + ChdirOp(ChdirOp), + ExecOp(ExecOp), + CloneOp(CloneOp), + ExitOp(ExitOp), + AccessOp(AccessOp), + StatOp(StatOp), + ReaddirOp(ReaddirOp), + WaitOp(WaitOp), + GetRUsageOp(GetRUsageOp), + UpdateMetadataOp(UpdateMetadataOp), + ReadLinkOp(ReadLinkOp), +} + +impl FfiFrom for OpInternal { + fn ffi_from(value: &Bindgen_Op, ctx: &ArenaContext) -> Result { + let kind = value.op_code; + let value = value.data; + + log::debug!("[unsafe] decoding Op tagged union [ OpCode={} ]", kind); + Ok(match kind { + Bindgen_OpCode_init_process_op_code => { + Self::InitProcessOp(unsafe { value.init_process_epoch }.ffi_into(ctx)?) + } + Bindgen_OpCode_init_exec_epoch_op_code => { + Self::InitExecEpochOp(unsafe { value.init_exec_epoch }.ffi_into(ctx)?) + } + Bindgen_OpCode_init_thread_op_code => { + Self::InitThreadOp(unsafe { value.init_thread }.ffi_into(ctx)?) + } + Bindgen_OpCode_open_op_code => Self::OpenOp(unsafe { value.open }.ffi_into(ctx)?), + Bindgen_OpCode_close_op_code => Self::CloseOp(unsafe { value.close }.ffi_into(ctx)?), + Bindgen_OpCode_chdir_op_code => Self::ChdirOp(unsafe { value.chdir }.ffi_into(ctx)?), + Bindgen_OpCode_exec_op_code => Self::ExecOp(unsafe { value.exec }.ffi_into(ctx)?), + Bindgen_OpCode_clone_op_code => Self::CloneOp(unsafe { value.clone }.ffi_into(ctx)?), + Bindgen_OpCode_exit_op_code => Self::ExitOp(unsafe { value.exit }.ffi_into(ctx)?), + Bindgen_OpCode_access_op_code => Self::AccessOp(unsafe { value.access }.ffi_into(ctx)?), + Bindgen_OpCode_stat_op_code => Self::StatOp(unsafe { value.stat }.ffi_into(ctx)?), + Bindgen_OpCode_readdir_op_code => { + Self::ReaddirOp(unsafe { value.readdir }.ffi_into(ctx)?) + } + Bindgen_OpCode_wait_op_code => Self::WaitOp(unsafe { value.wait }.ffi_into(ctx)?), + Bindgen_OpCode_getrusage_op_code => { + Self::GetRUsageOp(unsafe { value.getrusage }.ffi_into(ctx)?) + } + Bindgen_OpCode_update_metadata_op_code => { + Self::UpdateMetadataOp(unsafe { value.update_metadata }.ffi_into(ctx)?) + } + Bindgen_OpCode_read_link_op_code => { + Self::ReadLinkOp(unsafe { value.read_link }.ffi_into(ctx)?) + } + _ => return Err(ProbeError::InvalidVariant(kind)), + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, MakePyDataclass)] +pub struct Op { + pub data: OpInternal, + pub time: timespec, +} + +impl FfiFrom for Op { + fn ffi_from(value: &Bindgen_Op, ctx: &ArenaContext) -> Result { + Ok(Self { + data: value.ffi_into(ctx)?, + time: value.time.ffi_into(ctx)?, + }) + } +} diff --git a/probe_src/probe_frontend/src/arena.rs b/probe_src/probe_frontend/lib/src/transcribe.rs similarity index 73% rename from probe_src/probe_frontend/src/arena.rs rename to probe_src/probe_frontend/lib/src/transcribe.rs index 6721ef32..26d163e0 100644 --- a/probe_src/probe_frontend/src/arena.rs +++ b/probe_src/probe_frontend/lib/src/transcribe.rs @@ -1,4 +1,3 @@ -use color_eyre::eyre::{eyre, ContextCompat, Report, Result, WrapErr}; use rayon::iter::{ParallelBridge, ParallelIterator}; use std::{ collections::HashMap, @@ -11,17 +10,20 @@ use std::{ }; use crate::{ - ffi, + error::{option_err, ConvertErr, ProbeError, Result, WrapErr}, ops::{self, FfiFrom}, }; -/// Recursively parse a top-level libprobe arena allocator directory from `in_dir` and write it in -/// serialized format to `out_dir`. +type RawOp = ops::Bindgen_Op; + +// pub mod ops; + +/// Recursively parse a Top-level arena allocator directory and write it in serialized. /// /// This function calls [`parse_pid()`] on each sub-directory in `in_dir` **in parallel**. /// /// on success, returns the number of Ops processed in the top-level directory -pub fn parse_arena_dir, P2: AsRef + Sync>( +pub fn parse_top_level, P2: AsRef + Sync>( in_dir: P1, out_dir: P2, ) -> Result { @@ -34,11 +36,11 @@ pub fn parse_arena_dir, P2: AsRef + Sync>( let start = SystemTime::now(); let count = fs::read_dir(in_dir) - .wrap_err("Error opening Arena directory")? + .wrap_err("Error opening record directory")? .par_bridge() .map(|x| { parse_pid( - x.wrap_err("Error reading DirEntry from Arena directory")? + x.wrap_err("Error reading DirEntry from record directory")? .path(), &out_dir, ) @@ -54,13 +56,12 @@ pub fn parse_arena_dir, P2: AsRef + Sync>( Ok(count) } -/// Recursively parse a PID libprobe arena allocator directory from `in_dir` and write it in -/// serialized format to `out_dir`. +/// Recursively parse a PID arena allocator directory and write it in serialized. /// /// This function calls [`parse_exec_epoch()`] on each sub-directory in `in_dir`. /// /// On success, returns the number of Ops processed in the PID directory. -fn parse_pid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Result { +pub fn parse_pid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Result { let pid = filename_numeric(&in_dir)?; let dir = { @@ -85,13 +86,15 @@ fn parse_pid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Resul .try_fold(0usize, |acc, x| x.map(|x| acc + x)) } -/// Recursively parse a ExecEpoch libprobe arena allocator directory from `in_dir` and write it in -/// serialized format to `out_dir`. +/// Recursively parse a Epoch arena allocator directory and write it in serialized. /// /// This function calls [`parse_tid()`] on each sub-directory in `in_dir`. /// /// On success, returns the number of Ops processed in the ExecEpoch directory. -fn parse_exec_epoch, P2: AsRef>(in_dir: P1, out_dir: P2) -> Result { +pub fn parse_exec_epoch, P2: AsRef>( + in_dir: P1, + out_dir: P2, +) -> Result { let epoch = filename_numeric(&in_dir)?; let dir = { @@ -116,8 +119,7 @@ fn parse_exec_epoch, P2: AsRef>(in_dir: P1, out_dir: P2) - .try_fold(0usize, |acc, x| x.map(|x| acc + x)) } -/// Recursively parse a TID libprobe arena allocator directory from `in_dir` and write it in -/// serialized format to `out_dir`. +/// Recursively parse a TID arena allocator directory and write it in serialized. /// /// This function parses a TID directory in 6 steps: /// @@ -133,17 +135,17 @@ fn parse_exec_epoch, P2: AsRef>(in_dir: P1, out_dir: P2) - /// (steps 5 & 6 are done lazily with iterators to reduce unnecessary memory allocations) /// /// On success, returns the number of Ops processed in the TID directory. -fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Result { - fn try_files_from_arena_dir>(dir: P) -> Result> { +pub fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Result { + fn try_files_from_dir>(dir: P) -> Result> { match fs::read_dir(&dir) { Ok(entry_iter) => entry_iter .map(|entry_result| { entry_result .map(|entry| entry.path()) - .wrap_err("Error reading DirEntry from arena directory") + .wrap_err("Error reading DirEntry from record TID subdirectory") }) - .collect::, _>>(), - Err(e) => Err(Report::from(e).wrap_err("Error opening arena directory")), + .collect::>>(), + Err(e) => Err(e.convert("Error opening record TID directory")), } } @@ -157,10 +159,7 @@ fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Resul // STEP 2 let paths = fs::read_dir(&in_dir) - .wrap_err(format!( - "Error reading directory '{}'", - in_dir.as_ref().to_string_lossy() - ))? + .wrap_err("Error reading record TID directory")? .filter_map(|entry_result| match entry_result { Ok(entry) => Some((entry.file_name(), entry)), Err(e) => { @@ -172,10 +171,10 @@ fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Resul // STEP 3 let ctx = ArenaContext( - try_files_from_arena_dir( + try_files_from_dir( paths .get(OsStr::new("data")) - .wrap_err("Missing data directory from TID directory")? + .ok_or_else(|| option_err("Missing data directory from TID directory"))? .path(), )? .into_iter() @@ -184,15 +183,15 @@ fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Resul std::fs::read(data_dat_file).wrap_err("Failed to read file from data directory")?, ) }) - .collect::, _>>()?, + .collect::>>()?, ); // STEP 4 let mut count: usize = 0; - try_files_from_arena_dir( + try_files_from_dir( paths .get(OsStr::new("ops")) - .wrap_err("Missing ops directory from TID directory")? + .ok_or_else(|| option_err("Missing ops directory from TID directory"))? .path(), )? // STEP 5 @@ -223,7 +222,7 @@ fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Resul count += 1; } - Ok::<(), Report>(()) + Ok::<(), ProbeError>(()) })?; Ok(count) @@ -233,19 +232,23 @@ fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> Resul /// /// errors if the path has no filename or the filename can't be parsed as an integer. fn filename_numeric>(dir: P) -> Result { - let filename = dir - .as_ref() - .file_name() - .ok_or_else(|| eyre!("'{}' has no filename", dir.as_ref().to_string_lossy()))?; + let filename = dir.as_ref().file_name().ok_or_else(|| { + log::error!("'{}' has no filename", dir.as_ref().to_string_lossy()); + option_err("path has no filename") + })?; filename .to_str() - .ok_or_else(|| eyre!("filename '{}' not valid UTF-8", filename.to_string_lossy()))? + .ok_or_else(|| { + log::error!("'{}' not valid UTF-8", filename.to_string_lossy()); + option_err("filename not valid UTF-8") + })? .parse::() - .wrap_err(format!( - "unable to convert filename '{}' to integer", - filename.to_string_lossy() - )) + .map_err(|e| { + log::error!("Parsing filename '{}' to integer", filename.to_string_lossy()); + ProbeError::from(e) + }) + .wrap_err("Failed to parse filename to integer") } /// this struct represents a `/data` directory from libprobe. @@ -270,20 +273,13 @@ pub struct DataArena { impl DataArena { pub fn from_bytes(bytes: Vec) -> Result { - if bytes.len() < size_of::() { - return Err(eyre!( - "Arena buffer too small, got {}, minimum size {}", - bytes.len(), - size_of::() - )); - } let header = ArenaHeader::from_bytes(&bytes) .wrap_err("Failed to create ArenaHeader for DataArena")?; Ok(Self { header, raw: bytes }) } - pub fn try_get_slice<'a>(&'a self, ptr: usize) -> Option<&'a [u8]> { + pub fn try_get_slice(&self, ptr: usize) -> Option<&[u8]> { let end = self.header.base_address + self.header.used; match ptr >= self.header.base_address && ptr <= end { false => None, @@ -305,7 +301,7 @@ pub struct OpsArena<'a> { /// raw byte buffer of Ops arena allocator. raw: Vec, /// slice over Ops of the raw buffer. - ops: &'a [ffi::Op], + ops: &'a [RawOp], } impl<'a> OpsArena<'a> { @@ -313,20 +309,15 @@ impl<'a> OpsArena<'a> { let header = ArenaHeader::from_bytes(&bytes) .wrap_err("Failed to create ArenaHeader for OpsArena")?; - if ((header.used - size_of::()) % size_of::()) != 0 { - return Err(eyre!( - "Arena alignment error: used arena size minus header isn't a multiple of op size" - )); + if ((header.used - size_of::()) % size_of::()) != 0 { + return Err(ArenaError::Misaligned.into()); } - let count = (header.used - size_of::()) / size_of::(); + let count = (header.used - size_of::()) / size_of::(); - log::debug!( - "[unsafe] converting Vec to &[ffi::Op] of size {}", - count - ); + log::debug!("[unsafe] converting Vec to &[RawOp] of size {}", count); let ops = unsafe { - let ptr = bytes.as_ptr().add(size_of::()) as *const ffi::Op; + let ptr = bytes.as_ptr().add(size_of::()) as *const RawOp; std::slice::from_raw_parts(ptr, count) }; @@ -358,11 +349,11 @@ impl ArenaHeader { let ptr = bytes as *const [u8] as *const Self; if bytes.len() < size_of::() { - return Err(eyre!( - "Arena buffer too small, got {}, minimum size {}", - bytes.len(), - size_of::() - )); + return Err(ArenaError::BufferTooSmall { + got: bytes.len(), + needed: size_of::(), + } + .into()); } log::debug!("[unsafe] converting byte buffer into ArenaHeader"); @@ -383,20 +374,35 @@ impl ArenaHeader { ); if header.capacity != bytes.len() { - return Err(eyre!( - "Invalid arena capacity, expected {}, got {}", - header.capacity, - bytes.len(), - )); + return Err(ArenaError::InvalidCapacity { + expected: header.capacity, + actual: bytes.len(), + } + .into()); } if header.used > header.capacity { - return Err(eyre!( - "Arena size {} is greater than capacity {}", - header.used, - header.capacity, - )); + return Err(ArenaError::InvalidSize { + size: header.used, + capacity: header.capacity, + } + .into()); } Ok(header) } } + +#[derive(Debug, thiserror::Error)] +pub enum ArenaError { + #[error("Arena buffer too small, got {got}, minimum size {needed}")] + BufferTooSmall { got: usize, needed: usize }, + + #[error("Invalid arena capacity, expected {expected}, got {actual}")] + InvalidCapacity { expected: usize, actual: usize }, + + #[error("Arena size {size} is greater than capacity {capacity}")] + InvalidSize { size: usize, capacity: usize }, + + #[error("Arena alignment error: used arena size minus header isn't a multiple of op size")] + Misaligned, +} diff --git a/probe_src/probe_frontend/macros/Cargo.toml b/probe_src/probe_frontend/macros/Cargo.toml new file mode 100644 index 00000000..6b04c001 --- /dev/null +++ b/probe_src/probe_frontend/macros/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "probe_macros" +version.workspace = true +authors.workspace = true +publish.workspace = true +edition.workspace = true + +[lib] +name = "probe_macros" +proc-macro = true + +[dependencies] +# darling = "0.20.9" +proc-macro2 = "1.0.86" +quote = "1.0.36" +syn = "2.0.68" + +[lints] +workspace = true diff --git a/probe_src/probe_frontend/macros/src/lib.rs b/probe_src/probe_frontend/macros/src/lib.rs new file mode 100644 index 00000000..52c3445f --- /dev/null +++ b/probe_src/probe_frontend/macros/src/lib.rs @@ -0,0 +1,113 @@ +use proc_macro::TokenStream; +use proc_macro2::Span; +use quote::quote; +use syn::parse_quote; +use syn::{parse_macro_input, Data, DeriveInput, Fields, Ident, Type}; + +mod pygen; + +#[proc_macro_derive(MakeRustOp)] +pub fn make_rust_op(input: TokenStream) -> TokenStream { + let original_struct = parse_macro_input!(input as DeriveInput); + let DeriveInput { data, ident, .. } = original_struct.clone(); + + match data { + Data::Struct(data_struct) => { + let fields = match data_struct.fields { + Fields::Named(x) => x, + _ => unimplemented!("unnamed and unit structs not implemented"), + }; + + let pairs = fields + .named + .iter() + .filter_map(|x| { + let ident = x.ident.as_ref().unwrap(); + if ident.to_string().starts_with("__") { + return None; + } + Some((ident, convert_bindgen_type(&x.ty))) + }) + .collect::>(); + + let field_idents = pairs.iter().map(|x| x.0).collect::>(); + + let field_types = pairs.into_iter().map(|x| x.1).collect::>(); + + let new_name = Ident::new( + ident + .to_string() + .strip_prefix("Bindgen_") + .expect("struct name doesn't start with 'Bindgen_'"), + Span::call_site(), + ); + + // This is rather bad macro hygiene, but this macro is only intend for probe_frontend's + // op struct generation, so we're playing a little fast-n'-loose with scoping. + quote! { + #[derive(Debug, Clone, Serialize, Deserialize, MakePyDataclass)] + pub struct #new_name { + #(pub #field_idents: #field_types,)* + } + + impl FfiFrom<#ident> for #new_name { + fn ffi_from(value: &#ident, ctx: &ArenaContext) -> Result { + Ok(Self { + #( + #field_idents: value.#field_idents + .ffi_into(ctx) + .map_err(|e| { + ProbeError::FFiConversionError { + msg: "Error calling ffi_into() on\ + #field_idents creating #new_name", + inner: Box::new(e), + } + })?, + )* + }) + } + } + } + .into() + } + _ => unimplemented!("MakeRustOp only supports structs"), + } +} + +fn convert_bindgen_type(ty: &syn::Type) -> syn::Type { + match ty { + syn::Type::Ptr(_inner) => parse_quote!(::std::ffi::CString), + syn::Type::Array(inner) => { + let mut new = inner.clone(); + new.elem = Box::new(convert_bindgen_type(&new.elem)); + Type::Array(new) + } + syn::Type::Path(inner) => { + if let Some(name) = type_basename(inner).to_string().strip_prefix("Bindgen_") { + let name = Ident::new(name, Span::mixed_site()); + parse_quote!(#name) + } else { + Type::Path(inner.clone()) + } + } + // FIXME: return a proper error instead of just panicking + _ => unimplemented!("unsupported bindgen type conversion"), + } +} + +// FIXME: return a proper error instead of just panicking +pub(crate) fn type_basename(ty: &syn::TypePath) -> &syn::Ident { + if ty.qself.is_some() { + unimplemented!("qualified self-typs not supported"); + } + + &ty.path.segments.last().expect("type has no segments").ident +} + +#[proc_macro_derive(MakePyDataclass)] +pub fn make_py_dataclass(input: TokenStream) -> TokenStream { + let source = parse_macro_input!(input as DeriveInput); + pygen::make_py_dataclass_internal(source); + // return empty token stream, we're not actually writing rust here + TokenStream::new() +} diff --git a/probe_src/probe_frontend/macros/src/pygen.rs b/probe_src/probe_frontend/macros/src/pygen.rs new file mode 100644 index 00000000..ba0673c4 --- /dev/null +++ b/probe_src/probe_frontend/macros/src/pygen.rs @@ -0,0 +1,371 @@ +use std::fmt::Display; +use std::fs::File; +use std::io::Write; +use std::sync::{OnceLock, RwLock}; +use syn::{Data, Fields}; + +/// statically defined python code that gets added to the begining of the outputed file +const PYGEN_PREAMBLE: &str = " +# This file is automatically @generated by probe_macros + +import sys +import typing +from dataclasses import dataclass + +mod = sys.modules[__name__] + +"; + +pub fn make_py_dataclass_internal(input: syn::DeriveInput) { + let syn::DeriveInput { data, ident, .. } = input.clone(); + + match data { + Data::Struct(data_struct) => { + let fields = match data_struct.fields { + Fields::Named(x) => x, + _ => unimplemented!("unnamed and unit structs not implemented"), + }; + + let pairs = fields + .named + .iter() + .map(|x| { + let ident = x.ident.as_ref().unwrap(); + (ident.to_string(), convert_to_pytype(&x.ty)) + }) + .collect::>(); + + write_pygen(basic_dataclass(ident.to_string(), &pairs)); + } + Data::Enum(data_enum) => { + // let mut dataclass = format!("@dataclass(init=False)\nclass {}:\n", ident); + let mut dataclass = Dataclass::new(ident.to_string()); + let mut init = DataclassInit::new(); + let mut args = InitArgs::new(); + + // this is the types that the produced union is over + let mut variants = vec![]; + + for variant in data_enum.variants { + match variant.fields { + syn::Fields::Named(inner) => { + let name = variant.ident.to_string(); + + let pairs = inner + .named + .iter() + .map(|x| { + let name = x.ident.as_ref().unwrap(); + (name.to_string(), convert_to_pytype(&x.ty)) + }) + .collect::>(); + + dataclass.add_inclass(basic_dataclass(name.clone(), &pairs)); + variants.push(name); + } + syn::Fields::Unnamed(inner) => { + let fields = inner.unnamed.iter().collect::>(); + if fields.len() != 1 { + unimplemented!("Tuple enums of length != 1 not supported") + } + variants.push(convert_to_pytype(&fields[0].ty)); + } + syn::Fields::Unit => unimplemented!("Unit enum variants not supported"), + } + } + + // here we merge the variants together in a python union + let union_type = format!( + "typing.Union[{}]", + variants + .iter() + .fold(String::new(), |mut acc, x| { + acc.push_str(x); + acc.push_str(", "); + + acc + }) + .strip_suffix(", ") + .expect("union had no variants") + ); + dataclass.add_item(DataclassItem::new("value".to_owned(), union_type)); + + args.add( + "**kwargs".to_owned(), + "typing.Mapping[str, typing.Any]".to_owned(), + ); + // add custom init that does some quasi-quoting hackery + [ + "if len(kwargs) != 1:", + " raise ValueError(\"Malformed Enum constructor args\")", + "key = list(kwargs.keys())[0]", + "if key in self.__class__.__dict__:", + " self.value = self.__class__.__dict__[key](**kwargs[key])", + "else:", + " self.value = mod.__dict__[key](**kwargs[key])", + ] + .into_iter() + .for_each(|line| init.add_line(line.to_owned())); + + init.set_args(args); + dataclass.set_init(Some(init)); + write_pygen(dataclass); + } + Data::Union(_data_union) => unimplemented!(), + }; +} + +fn basic_dataclass(name: String, pairs: &[(String, String)]) -> Dataclass { + // this function take a type and identifier that's part of the argumetns to the init fucnction + // and creates the expression for converting it for sotrage in the dataclass, basically this + // means running primitive types through their type constructor to validate them and for other + // dataclasses the arg get unpacked and passed to the relevant class constructor. + fn make_conversion(ident: &str, ty: &str) -> String { + match ty { + // don't unpack primitive types + "bytes" | "int" | "str" | "bool" => format!("{}({})", ty, ident), + _ => format!("{}(**{})", ty, ident), + } + } + + let mut dataclass = Dataclass::new(name); + let mut init = DataclassInit::new(); + let mut args = InitArgs::new(); + + for (ident, ty) in pairs { + dataclass.add_item(DataclassItem::new(ident.clone(), ty.clone())); + init.add_line(format!("self.{} = {}", ident, make_conversion(ident, ty))); + args.add(ident.clone(), ty.clone()); + } + + init.set_args(args); + dataclass.set_init(Some(init)); + + dataclass +} + +fn convert_to_pytype(ty: &syn::Type) -> String { + match ty { + syn::Type::Array(inner) => { + format!("list[{}]", convert_to_pytype(inner.elem.as_ref())) + } + syn::Type::Path(inner) => { + let name = crate::type_basename(inner).to_string(); + match name.as_str() { + "__dev_t" | "__gid_t" | "__ino_t" | "__mode_t" | "__s32" | "__s64" + | "__suseconds_t" | "__syscall_slong_t" | "__syseconds_t" | "__time_t" + | "__u16" | "__u32" | "__u64" | "__uid_t" | "c_int" | "c_long" | "c_uint" + | "dev_t" | "gid_t" | "i32" | "ino_t" | "mode_t" | "pid_t" | "uid_t" => { + "int".to_owned() + } + + "CString" => "bytes".to_owned(), + + _ => name, + } + } + _ => unimplemented!("unsupported type type"), + } +} + +fn write_pygen(item: impl Display) { + static DATACLASSES: OnceLock> = OnceLock::new(); + let mut writer = DATACLASSES + .get_or_init(|| { + let mut file = File::create(concat!( + env!("CARGO_MANIFEST_DIR"), + "/../python/generated/ops.py" + )) + .expect("unable to create ops.py"); + file.write_all(PYGEN_PREAMBLE.as_bytes()) + .expect("failed to write preamble"); + RwLock::new(file) + }) + .write() + .expect("python dataclasses rwlock poisioned"); + writeln!(writer, "{}", item).expect("failed to write pygen"); +} + +struct Dataclass { + indent: usize, + name: String, + inclasses: Vec, + items: Vec, + init: Option, +} + +impl Dataclass { + pub fn new(name: String) -> Self { + Self { + indent: 0, + name, + inclasses: vec![], + items: vec![], + init: None, + } + } + + pub fn add_inclass(&mut self, mut inclass: Dataclass) { + inclass.set_indent(self.indent + 4); + self.inclasses.push(inclass) + } + + pub fn add_item(&mut self, mut item: DataclassItem) { + item.set_indent(self.indent + 4); + self.items.push(item) + } + + pub fn set_init(&mut self, init: Option) { + self.init = init.map(|mut x| { + x.set_indent(self.indent + 4); + x + }); + } + + pub fn set_indent(&mut self, mut indent: usize) -> usize { + for inclass in &mut self.inclasses { + inclass.set_indent(indent + 4); + } + for item in &mut self.items { + item.set_indent(indent + 4); + } + if let Some(init) = &mut self.init { + init.set_indent(indent + 4); + } + + std::mem::swap(&mut self.indent, &mut indent); + indent + } +} + +impl Display for Dataclass { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let name = self.name.as_str(); + let indent_str = " ".repeat(self.indent); + let gen_init = match self.init { + Some(_) => "False", + None => "True", + }; + + // write class signature + writeln!( + f, + "{indent_str}@dataclass(init={gen_init})\n\ + {indent_str}class {name}:" + )?; + + // write inner class definitions + for inclass in &self.inclasses { + writeln!(f, "{inclass}",)?; + } + + // write dataclass fields + for item in &self.items { + writeln!(f, "{item}")?; + } + + // write init definition (if any) + if let Some(init) = &self.init { + write!(f, "{init}")?; + } + + Ok(()) + } +} + +struct DataclassItem { + indent: usize, + name: String, + ty: String, +} + +impl DataclassItem { + pub fn new(name: String, ty: String) -> Self { + Self { + indent: 0, + name, + ty, + } + } + + pub fn set_indent(&mut self, mut indent: usize) -> usize { + std::mem::swap(&mut self.indent, &mut indent); + indent + } +} + +impl Display for DataclassItem { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let &Self { name, ty, .. } = &self; + let indent_str = " ".repeat(self.indent); + write!(f, "{indent_str}{name}: {ty}") + } +} + +struct DataclassInit { + indent: usize, + args: InitArgs, + body: Vec, +} + +impl DataclassInit { + pub fn new() -> Self { + Self { + indent: 0, + args: InitArgs::new(), + body: vec![], + } + } + + pub fn add_line(&mut self, line: String) { + self.body.push(line) + } + + pub fn set_args(&mut self, args: InitArgs) { + self.args = args; + } + + pub fn set_indent(&mut self, mut indent: usize) -> usize { + std::mem::swap(&mut self.indent, &mut indent); + indent + } +} + +impl Display for DataclassInit { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let &Self { args, .. } = &self; + let indent_str = " ".repeat(self.indent); + + writeln!(f, "{indent_str}def __init__(self{args}):")?; + + for line in &self.body { + writeln!(f, "{indent_str} {line}")?; + } + + Ok(()) + } +} + +struct InitArgs { + pairs: Vec<(String, String)>, +} + +impl InitArgs { + pub fn new() -> Self { + Self { pairs: vec![] } + } + + pub fn add(&mut self, name: String, ty: String) { + self.pairs.push((name, ty)) + } +} + +impl Display for InitArgs { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for arg in &self.pairs { + let (name, ty) = arg; + write!(f, ", {name}: {ty}")?; + } + Ok(()) + } +} diff --git a/probe_src/probe_frontend/python/generated/ops.py b/probe_src/probe_frontend/python/generated/ops.py new file mode 100644 index 00000000..e7188e17 --- /dev/null +++ b/probe_src/probe_frontend/python/generated/ops.py @@ -0,0 +1,361 @@ + +# This file is automatically @generated by probe_macros + +import sys +import typing +from dataclasses import dataclass + +mod = sys.modules[__name__] + +@dataclass(init=False) +class timespec: + tv_sec: int + tv_nsec: int + def __init__(self, tv_sec: int, tv_nsec: int): + self.tv_sec = int(tv_sec) + self.tv_nsec = int(tv_nsec) + +@dataclass(init=False) +class statx_timestamp: + tv_sec: int + tv_nsec: int + def __init__(self, tv_sec: int, tv_nsec: int): + self.tv_sec = int(tv_sec) + self.tv_nsec = int(tv_nsec) + +@dataclass(init=False) +class statx: + stx_mask: int + stx_blksize: int + stx_attributes: int + stx_nlink: int + stx_uid: int + stx_gid: int + stx_mode: int + stx_ino: int + stx_size: int + stx_blocks: int + stx_attributes_mask: int + stx_atime: statx_timestamp + stx_btime: statx_timestamp + stx_ctime: statx_timestamp + stx_mtime: statx_timestamp + stx_rdev_major: int + stx_rdev_minor: int + stx_dev_major: int + stx_dev_minor: int + stx_mnt_id: int + stx_dio_mem_align: int + stx_dio_offset_align: int + def __init__(self, stx_mask: int, stx_blksize: int, stx_attributes: int, stx_nlink: int, stx_uid: int, stx_gid: int, stx_mode: int, stx_ino: int, stx_size: int, stx_blocks: int, stx_attributes_mask: int, stx_atime: statx_timestamp, stx_btime: statx_timestamp, stx_ctime: statx_timestamp, stx_mtime: statx_timestamp, stx_rdev_major: int, stx_rdev_minor: int, stx_dev_major: int, stx_dev_minor: int, stx_mnt_id: int, stx_dio_mem_align: int, stx_dio_offset_align: int): + self.stx_mask = int(stx_mask) + self.stx_blksize = int(stx_blksize) + self.stx_attributes = int(stx_attributes) + self.stx_nlink = int(stx_nlink) + self.stx_uid = int(stx_uid) + self.stx_gid = int(stx_gid) + self.stx_mode = int(stx_mode) + self.stx_ino = int(stx_ino) + self.stx_size = int(stx_size) + self.stx_blocks = int(stx_blocks) + self.stx_attributes_mask = int(stx_attributes_mask) + self.stx_atime = statx_timestamp(**stx_atime) + self.stx_btime = statx_timestamp(**stx_btime) + self.stx_ctime = statx_timestamp(**stx_ctime) + self.stx_mtime = statx_timestamp(**stx_mtime) + self.stx_rdev_major = int(stx_rdev_major) + self.stx_rdev_minor = int(stx_rdev_minor) + self.stx_dev_major = int(stx_dev_major) + self.stx_dev_minor = int(stx_dev_minor) + self.stx_mnt_id = int(stx_mnt_id) + self.stx_dio_mem_align = int(stx_dio_mem_align) + self.stx_dio_offset_align = int(stx_dio_offset_align) + +@dataclass(init=False) +class timeval: + tv_sec: int + tv_usec: int + def __init__(self, tv_sec: int, tv_usec: int): + self.tv_sec = int(tv_sec) + self.tv_usec = int(tv_usec) + +@dataclass(init=False) +class rusage: + ru_utime: timeval + ru_stime: timeval + ru_maxrss: int + ru_ixrss: int + ru_idrss: int + ru_isrss: int + ru_minflt: int + ru_majflt: int + ru_nswap: int + ru_inblock: int + ru_oublock: int + ru_msgsnd: int + ru_msgrcv: int + ru_nsignals: int + ru_nvcsw: int + ru_nivcsw: int + def __init__(self, ru_utime: timeval, ru_stime: timeval, ru_maxrss: int, ru_ixrss: int, ru_idrss: int, ru_isrss: int, ru_minflt: int, ru_majflt: int, ru_nswap: int, ru_inblock: int, ru_oublock: int, ru_msgsnd: int, ru_msgrcv: int, ru_nsignals: int, ru_nvcsw: int, ru_nivcsw: int): + self.ru_utime = timeval(**ru_utime) + self.ru_stime = timeval(**ru_stime) + self.ru_maxrss = int(ru_maxrss) + self.ru_ixrss = int(ru_ixrss) + self.ru_idrss = int(ru_idrss) + self.ru_isrss = int(ru_isrss) + self.ru_minflt = int(ru_minflt) + self.ru_majflt = int(ru_majflt) + self.ru_nswap = int(ru_nswap) + self.ru_inblock = int(ru_inblock) + self.ru_oublock = int(ru_oublock) + self.ru_msgsnd = int(ru_msgsnd) + self.ru_msgrcv = int(ru_msgrcv) + self.ru_nsignals = int(ru_nsignals) + self.ru_nvcsw = int(ru_nvcsw) + self.ru_nivcsw = int(ru_nivcsw) + +@dataclass(init=False) +class Path: + dirfd_minus_at_fdcwd: int + path: bytes + device_major: int + device_minor: int + inode: int + mtime: statx_timestamp + ctime: statx_timestamp + stat_valid: bool + dirfd_valid: bool + def __init__(self, dirfd_minus_at_fdcwd: int, path: bytes, device_major: int, device_minor: int, inode: int, mtime: statx_timestamp, ctime: statx_timestamp, stat_valid: bool, dirfd_valid: bool): + self.dirfd_minus_at_fdcwd = int(dirfd_minus_at_fdcwd) + self.path = bytes(path) + self.device_major = int(device_major) + self.device_minor = int(device_minor) + self.inode = int(inode) + self.mtime = statx_timestamp(**mtime) + self.ctime = statx_timestamp(**ctime) + self.stat_valid = bool(stat_valid) + self.dirfd_valid = bool(dirfd_valid) + +@dataclass(init=False) +class InitProcessOp: + pid: int + def __init__(self, pid: int): + self.pid = int(pid) + +@dataclass(init=False) +class InitExecEpochOp: + epoch: int + program_name: bytes + def __init__(self, epoch: int, program_name: bytes): + self.epoch = int(epoch) + self.program_name = bytes(program_name) + +@dataclass(init=False) +class InitThreadOp: + tid: int + def __init__(self, tid: int): + self.tid = int(tid) + +@dataclass(init=False) +class OpenOp: + path: Path + flags: int + mode: int + fd: int + ferrno: int + def __init__(self, path: Path, flags: int, mode: int, fd: int, ferrno: int): + self.path = Path(**path) + self.flags = int(flags) + self.mode = int(mode) + self.fd = int(fd) + self.ferrno = int(ferrno) + +@dataclass(init=False) +class CloseOp: + low_fd: int + high_fd: int + ferrno: int + def __init__(self, low_fd: int, high_fd: int, ferrno: int): + self.low_fd = int(low_fd) + self.high_fd = int(high_fd) + self.ferrno = int(ferrno) + +@dataclass(init=False) +class ChdirOp: + path: Path + ferrno: int + def __init__(self, path: Path, ferrno: int): + self.path = Path(**path) + self.ferrno = int(ferrno) + +@dataclass(init=False) +class ExecOp: + path: Path + ferrno: int + def __init__(self, path: Path, ferrno: int): + self.path = Path(**path) + self.ferrno = int(ferrno) + +@dataclass(init=False) +class CloneOp: + flags: int + run_pthread_atfork_handlers: bool + child_process_id: int + child_thread_id: int + ferrno: int + def __init__(self, flags: int, run_pthread_atfork_handlers: bool, child_process_id: int, child_thread_id: int, ferrno: int): + self.flags = int(flags) + self.run_pthread_atfork_handlers = bool(run_pthread_atfork_handlers) + self.child_process_id = int(child_process_id) + self.child_thread_id = int(child_thread_id) + self.ferrno = int(ferrno) + +@dataclass(init=False) +class ExitOp: + status: int + run_atexit_handlers: bool + def __init__(self, status: int, run_atexit_handlers: bool): + self.status = int(status) + self.run_atexit_handlers = bool(run_atexit_handlers) + +@dataclass(init=False) +class AccessOp: + path: Path + mode: int + flags: int + ferrno: int + def __init__(self, path: Path, mode: int, flags: int, ferrno: int): + self.path = Path(**path) + self.mode = int(mode) + self.flags = int(flags) + self.ferrno = int(ferrno) + +@dataclass(init=False) +class StatOp: + path: Path + flags: int + statx_buf: statx + ferrno: int + def __init__(self, path: Path, flags: int, statx_buf: statx, ferrno: int): + self.path = Path(**path) + self.flags = int(flags) + self.statx_buf = statx(**statx_buf) + self.ferrno = int(ferrno) + +@dataclass(init=False) +class ReaddirOp: + dir: Path + child: bytes + all_children: bool + ferrno: int + def __init__(self, dir: Path, child: bytes, all_children: bool, ferrno: int): + self.dir = Path(**dir) + self.child = bytes(child) + self.all_children = bool(all_children) + self.ferrno = int(ferrno) + +@dataclass(init=False) +class WaitOp: + pid: int + options: int + status: int + ret: int + ferrno: int + def __init__(self, pid: int, options: int, status: int, ret: int, ferrno: int): + self.pid = int(pid) + self.options = int(options) + self.status = int(status) + self.ret = int(ret) + self.ferrno = int(ferrno) + +@dataclass(init=False) +class GetRUsageOp: + waitpid_arg: int + getrusage_arg: int + usage: rusage + ferrno: int + def __init__(self, waitpid_arg: int, getrusage_arg: int, usage: rusage, ferrno: int): + self.waitpid_arg = int(waitpid_arg) + self.getrusage_arg = int(getrusage_arg) + self.usage = rusage(**usage) + self.ferrno = int(ferrno) + +@dataclass(init=False) +class ReadLinkOp: + path: Path + resolved: bytes + ferrno: int + def __init__(self, path: Path, resolved: bytes, ferrno: int): + self.path = Path(**path) + self.resolved = bytes(resolved) + self.ferrno = int(ferrno) + +@dataclass(init=False) +class Metadata: + @dataclass(init=False) + class Mode: + mode: int + def __init__(self, mode: int): + self.mode = int(mode) + + @dataclass(init=False) + class Ownership: + uid: int + gid: int + def __init__(self, uid: int, gid: int): + self.uid = int(uid) + self.gid = int(gid) + + @dataclass(init=False) + class Times: + is_null: bool + atime: timeval + mtime: timeval + def __init__(self, is_null: bool, atime: timeval, mtime: timeval): + self.is_null = bool(is_null) + self.atime = timeval(**atime) + self.mtime = timeval(**mtime) + + value: typing.Union[Mode, Ownership, Times] + def __init__(self, **kwargs: typing.Mapping[str, typing.Any]): + if len(kwargs) != 1: + raise ValueError("Malformed Enum constructor args") + key = list(kwargs.keys())[0] + if key in self.__class__.__dict__: + self.value = self.__class__.__dict__[key](**kwargs[key]) + else: + self.value = mod.__dict__[key](**kwargs[key]) + +@dataclass(init=False) +class UpdateMetadataOp: + path: Path + flags: int + metadata: Metadata + ferrno: int + def __init__(self, path: Path, flags: int, metadata: Metadata, ferrno: int): + self.path = Path(**path) + self.flags = int(flags) + self.metadata = Metadata(**metadata) + self.ferrno = int(ferrno) + +@dataclass(init=False) +class OpInternal: + value: typing.Union[InitProcessOp, InitExecEpochOp, InitThreadOp, OpenOp, CloseOp, ChdirOp, ExecOp, CloneOp, ExitOp, AccessOp, StatOp, ReaddirOp, WaitOp, GetRUsageOp, UpdateMetadataOp, ReadLinkOp] + def __init__(self, **kwargs: typing.Mapping[str, typing.Any]): + if len(kwargs) != 1: + raise ValueError("Malformed Enum constructor args") + key = list(kwargs.keys())[0] + if key in self.__class__.__dict__: + self.value = self.__class__.__dict__[key](**kwargs[key]) + else: + self.value = mod.__dict__[key](**kwargs[key]) + +@dataclass(init=False) +class Op: + data: OpInternal + time: timespec + def __init__(self, data: OpInternal, time: timespec): + self.data = OpInternal(**data) + self.time = timespec(**time) + diff --git a/probe_src/probe_frontend/python/probe.py b/probe_src/probe_frontend/python/probe.py new file mode 100644 index 00000000..94f830be --- /dev/null +++ b/probe_src/probe_frontend/python/probe.py @@ -0,0 +1,35 @@ + +import typing +import json +import subprocess +import generated.ops as ops + +OpTable = typing.Mapping[int, typing.Mapping[int, typing.Mapping[int, typing.List[ops.Op]]]] + +def load_log(path: str) -> OpTable: + ret: dict[int, dict[int, dict[int, list[ops.Op]]]] = {} + + + lines = subprocess.run( + ["probe", "dump", "--json", "--input", path], + capture_output=True, + encoding="utf-8" + ) + jsonlines = [json.loads(x) for x in lines.stdout.strip().split('\n')] + + for item in jsonlines: + pid: int = item['pid'] + epoch: int = item['exec_epoch'] + tid: int = item['tid'] + op: ops.Op = ops.Op(**item['op']) + + if not pid in ret: + ret[pid] = {} + if not epoch in ret[pid]: + ret[pid][epoch] = {} + if not tid in ret[pid][epoch]: + ret[pid][epoch][tid] = [] + + ret[pid][epoch][tid].append(op) + + return ret diff --git a/probe_src/probe_frontend/src/display.rs b/probe_src/probe_frontend/src/display.rs deleted file mode 100644 index f6cf9c5e..00000000 --- a/probe_src/probe_frontend/src/display.rs +++ /dev/null @@ -1,267 +0,0 @@ -use std::fmt::Display; - -use crate::ops; -use chrono::{DateTime, SecondsFormat}; - -impl Display for ops::statx_timestamp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match DateTime::from_timestamp(self.tv_sec, self.tv_nsec) { - Some(x) => f.write_str(&x.to_rfc3339_opts(SecondsFormat::Secs, true)), - None => f.write_str("[INVALID TIMESTAMP]"), - } - } -} - -impl Display for ops::timeval { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match DateTime::from_timestamp(self.tv_sec, self.tv_usec as u32 * 1000) { - Some(x) => f.write_str(&x.to_rfc3339_opts(SecondsFormat::Secs, true)), - None => f.write_str("[INVALID TIMESTAMP]"), - } - } -} - -impl Display for ops::statx { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "[ uid={}, gid={}, mode={:#06o} ino={}, size={}, mtime={} ]", - self.stx_uid, - self.stx_gid, - self.stx_mode, - self.stx_ino, - self.stx_size, - self.stx_mtime, - ) - } -} - -impl Display for ops::rusage { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "[ utime={}, stime={}, maxrss={} ]", - self.ru_utime, - self.ru_stime, - self.ru_maxrss, - ) - } -} - -impl Display for ops::Path { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "[ dirfd={}, path='{}', inode={}, mtime={} ]", - self.dirfd_minus_at_fdcwd + libc::AT_FDCWD, - self.path.to_string_lossy(), - self.inode, - self.mtime, - ) - } -} - -impl Display for ops::CloneOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "[ child_process_id={}, child_thread_id={}, errno={} ]", - self.child_process_id, - self.child_thread_id, - self.ferrno, - ) - } -} - -impl Display for ops::CloseOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "[ low_fd={}, high_fd={}, errno={} ]", - self.low_fd, self.high_fd, self.ferrno, - ) - } -} - -impl Display for ops::ExitOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "[ satus={}, run_atexit_handlers={} ]", - self.status, self.run_atexit_handlers, - ) - } -} - -impl Display for ops::GetRUsageOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "[ waitpid_arg={}, getrusage_arg={}, usage={}, errno={} ]", - self.waitpid_arg, self.getrusage_arg, self.usage, self.ferrno, - ) - } -} - -impl Display for ops::InitProcessOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "[ pid={} ]", self.pid) - } -} - -impl Display for ops::InitThreadOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "[ tid={} ]", self.tid) - } -} - -impl Display for ops::WaitOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "[ pid={}, options={}, status={}, ret={}, errno={} ]", - self.pid, self.options, self.status, self.ret, self.ferrno, - ) - } -} - -impl Display for ops::InitExecEpochOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "[ epoch={}, program_name={} ]", - self.epoch, - self.program_name.to_string_lossy(), - ) - } -} - -impl Display for ops::OpenOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "[ path={}, flags={}, mode={:#06o} fd={}, errno={} ]", - self.path, self.flags, self.mode, self.fd, self.ferrno, - ) - } -} - -impl Display for ops::ChdirOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "[ path={}, errno={} ]", self.path, self.ferrno,) - } -} - -impl Display for ops::ExecOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "[ path={}, errno={} ]", self.path, self.ferrno,) - } -} - -impl Display for ops::AccessOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "[ path={}, mode={:#06o}, flags={}, errno={} ]", - self.path, self.mode, self.flags, self.ferrno, - ) - } -} - -impl Display for ops::StatOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "[ path={}, flags={}, statx_buf={}, errno={} ]", - self.path, self.flags, self.statx_buf, self.ferrno, - ) - } -} - -impl Display for ops::ReaddirOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "[ dir={}, child='{}', all_children={}, errno={} ]", - self.dir, - self.child.to_string_lossy(), - self.all_children, - self.ferrno, - ) - } -} - -impl Display for ops::Metadata { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - ops::Metadata::Mode(mode) => write!(f, "Mode[ mode={:#06o} ]", mode), - ops::Metadata::Ownership { uid, gid } => { - write!(f, "Ownership[ uid={}, gid={} ]", uid, gid) - } - ops::Metadata::Times { - is_null, - atime, - mtime, - } => write!( - f, - "Times[ is_null={}, atime={}, mtime={} ]", - is_null, atime, mtime - ), - } - } -} - -impl Display for ops::UpdateMetadataOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "[ path={}, flags={}, metadata={}, errno={} ]", - self.path, self.flags, self.metadata, self.ferrno, - ) - } -} - -impl Display for ops::ReadLinkOp { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "[ path={}, resolved='{}', errno={} ]", - self.path, - self.resolved.to_string_lossy(), - self.ferrno - ) - } -} - -impl Display for ops::OpInternal { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - fn wfmt(f: &mut std::fmt::Formatter<'_>, x: &str, y: impl Display) -> std::fmt::Result { - write!(f, "{}{}", x, y) - } - - match self { - ops::OpInternal::InitProcess(x) => wfmt(f, "InitProcessOp", x), - ops::OpInternal::InitExecEpoch(x) => wfmt(f, "InitExecEpochOp", x), - ops::OpInternal::InitThread(x) => wfmt(f, "InitThreadOp", x), - ops::OpInternal::Open(x) => wfmt(f, "OpenOp", x), - ops::OpInternal::Close(x) => wfmt(f, "CloseOp", x), - ops::OpInternal::Chdir(x) => wfmt(f, "ChdirOp", x), - ops::OpInternal::Exec(x) => wfmt(f, "ExecOp", x), - ops::OpInternal::Clone(x) => wfmt(f, "CloneOp", x), - ops::OpInternal::Exit(x) => wfmt(f, "ExitOp", x), - ops::OpInternal::Access(x) => wfmt(f, "AccessOp", x), - ops::OpInternal::Stat(x) => wfmt(f, "StatOp", x), - ops::OpInternal::Readdir(x) => wfmt(f, "ReadirOp", x), - ops::OpInternal::Wait(x) => wfmt(f, "WaitOp", x), - ops::OpInternal::GetRUsage(x) => wfmt(f, "GetRUsageOp", x), - ops::OpInternal::UpdateMetadata(x) => wfmt(f, "UpdateMetadataOp", x), - ops::OpInternal::ReadLink(x) => wfmt(f, "ReadLinkOp", x), - } - } -} - -impl Display for ops::Op { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.data.fmt(f) - } -} diff --git a/probe_src/probe_frontend/src/ffi.rs b/probe_src/probe_frontend/src/ffi.rs deleted file mode 100644 index 3597c519..00000000 --- a/probe_src/probe_frontend/src/ffi.rs +++ /dev/null @@ -1,8 +0,0 @@ -#![allow(non_upper_case_globals)] -#![allow(non_camel_case_types)] -#![allow(non_snake_case)] - -use serde::{Deserialize, Serialize}; - -// Bindings are generated by `../build.sh` -include!(concat!(env!("OUT_DIR"), "/bindings.rs")); diff --git a/probe_src/probe_frontend/src/main.rs b/probe_src/probe_frontend/src/main.rs deleted file mode 100644 index 56fe28a1..00000000 --- a/probe_src/probe_frontend/src/main.rs +++ /dev/null @@ -1,317 +0,0 @@ -use std::{ - ffi::OsString, - fs::{self, File}, - io::{Read, Write}, - path::{Path, PathBuf}, -}; - -use clap::Parser; -use color_eyre::eyre::{eyre, Context, Report, Result}; -use flate2::Compression; - -/// Raw ffi bindings for the raw C-structs emitted by libprobe, generated automatically with -/// rust-bindgen. -/// -/// If you're trying to make sense of this it's going to be much easier if you have `prov_ops.h` -/// open as well. -mod ffi; - -/// Rust versions of Arena structs from [`ffi`]. -/// -/// While simple Ops containing only Integral values can be used directly from [`ffi`], more -/// complicated structs with paths or other strings need to be manually converted to more rusty -/// versions so they can be serialized. This module re-exports the trivial Ops and defines new ones -/// (as well as methods for converting) for the non-trivial structs. -mod ops; - -/// [`std::fmt::Display`] trait implementations for [`ops::Op`] and all the Op variants and other -/// structs. -/// -/// This is used by the `dump` command to print out the Ops in as close as possible to a -/// human-readable format, I hate to say this but for specific questions its probably better to -/// just look at the source code. -mod display; - -/// Parsing of arena directories created by libprobe into a cross-platform -/// serialized format. -/// -/// # Serialization format -/// -/// The serialization format output is very similar to the raw libprobe arena format. It's a -/// filesystem hierarchy of `//` but instead of `` being a directory containing -/// `ops` and `data` directories with the raw C-struct arenas, `` is a -/// [jsonlines](https://jsonlines.org/) file, where each line is a json representation of an -/// [`ops::Op`]. -mod arena; - -/// System metadata recorded into probe logs. -mod metadata; - -/// Generate or manipulate Provenance for Replay OBservation Engine (PROBE) logs. -#[derive(clap::Parser, Debug, Clone)] -#[command(author, version, about, long_about = None)] -#[command(propagate_version = true)] -struct Cli { - #[command(subcommand)] - command: Command, -} - -#[derive(clap::Subcommand, Debug, Clone)] -enum Command { - /// Execute a command and record its provenance - Record { - /// Directory to output PROBE log to - #[arg(short, long, required = false, default_value = "probe_log")] - output: OsString, - - /// Overwrite existing output directory if it exists - #[arg(short = 'f', long)] - overwrite: bool, - - /// Run in gdb - #[arg(long)] - gdb: bool, - - /// Override the path to libprobe.so (this path will be canonicalized) - #[arg(long)] - lib_path: Option, - - /// Run in verbose & debug build of libprobe - #[arg(long)] - debug: bool, - - /// Command to execute under provenance - #[arg(required = true)] - cmd: Vec, - }, - - /// Write the data from probe log data in a human-readable manner - Dump { - /// Directory to load PROBE log from - #[arg(short, long, required = false, default_value = "probe_log")] - input: OsString, - }, -} - -// TODO: break out each sub-command as a separate function -fn main() -> Result<()> { - color_eyre::install()?; - env_logger::Builder::from_env(env_logger::Env::new().filter_or("__PROBE_LOG", "warn")).init(); - log::info!("Logger Facility Initialized"); - - match Cli::parse().command { - Command::Record { - output, - overwrite, - gdb, - lib_path, - debug, - cmd, - } => { - // if -f is set, we should clear-out the old probe_log - if overwrite { - match fs::remove_file(&output) { - Ok(_) => (), - Err(e) => match e.kind() { - std::io::ErrorKind::NotFound => (), - _ => return Err(e).wrap_err("Error deleting old output file"), - }, - }; - } - let mut tar = tar::Builder::new(flate2::write::GzEncoder::new( - File::create_new(output).wrap_err("Failed to create output file")?, - Compression::default(), - )); - - // the path to the libprobe.so directory is searched for as follows: - // - --lib-path argument if set - // - __PROBE_LIB env var if set - // - /usr/share/probe - // - error - let mut ld_preload = fs::canonicalize(match lib_path { - Some(x) => x, - None => match std::env::var_os("__PROBE_LIB") { - Some(x) => PathBuf::from(x), - None => match Path::new("/usr/share/probe").exists() { - true => PathBuf::from("/usr/share/probe"), - false => { - return Err(eyre!( - "Can't find libprobe lib path, ensure libprobe is installed in \ - /usr/share/probe or set --lib-path or __PROBE_LIB" - )) - } - }, - }, - }) - .wrap_err("unable to canonicalize lib path")?; - - if debug || gdb { - log::debug!("Using debug version of libprobe"); - ld_preload.push("libprobe-dbg.so"); - } else { - ld_preload.push("libprobe.so"); - } - - // append any existing LD_PRELOAD overrides - if let Some(x) = std::env::var_os("LD_PRELOAD") { - ld_preload.push(":"); - ld_preload.push(&x); - } - - let arena_dir = tempfile::tempdir().wrap_err("Failed to create arena directory")?; - - let mut child = if gdb { - let mut dir_env = OsString::from("__PROBE_DIR="); - dir_env.push(arena_dir.path()); - let mut preload_env = OsString::from("LD_PRELOAD="); - preload_env.push(ld_preload); - - std::process::Command::new("gdb") - .arg("--args") - .arg("env") - .arg(dir_env) - .arg(preload_env) - .args(&cmd) - .env_remove("__PROBE_LIB") - .env_remove("__PROBE_LOG") - .spawn() - .wrap_err("Failed to launch gdb")? - } else { - std::process::Command::new(&cmd[0]) - .args(&cmd[1..]) - .env_remove("__PROBE_LIB") - .env_remove("__PROBE_LOG") - .env("__PROBE_DIR", arena_dir.path()) - .env("LD_PRELOAD", ld_preload) - .spawn() - .wrap_err("Failed to launch child process")? - }; - - let metadata = metadata::Metadata::new(child.id() as i32); - - let outdir = tempfile::tempdir()?; - - File::create_new(outdir.path().join("_metadata")) - .wrap_err("failed to create metadata file in output directory")? - .write_all( - serde_json::to_string(&metadata) - .wrap_err("Error serializng metadata")? - .as_bytes(), - ) - .wrap_err("Error writing metadata")?; - - match Path::read_dir(arena_dir.path()) { - Ok(x) => { - if !(x - .into_iter() - .try_fold(false, |_, x| x.map(|x| x.path().exists()))?) - { - log::warn!( - "No arean files detected, something is \ - wrong, you should probably abort!" - ); - } - } - Err(e) => { - return Err(e).wrap_err( - "Unable to read arena directory during post-startup sanity check", - ) - } - } - - child.wait().wrap_err("Failed to await child process")?; - arena::parse_arena_dir(arena_dir.path(), &outdir) - .wrap_err("Unable to decode arena directory")?; - - tar.append_dir_all(".", &outdir) - .wrap_err("Failed to copy output dir into archive")?; - tar.finish().wrap_err("Failed to finish writing tarball")?; - - if let Err(e) = outdir.close() { - log::warn!("Failed to close output directory: {}", e); - } - - if let Err(e) = arena_dir.close() { - log::warn!("Failed to close arena directory: {}", e); - } - - Ok::<(), Report>(()) - } - Command::Dump { input } => { - let file = flate2::read::GzDecoder::new(File::open(&input).wrap_err(format!( - "Failed to open input file '{}'", - input.to_string_lossy() - ))?); - - let mut tar = tar::Archive::new(file); - - tar.entries() - .wrap_err("Unable to get tarball entry iterator")? - .try_for_each(|x| { - let mut entry = x.wrap_err("Unable to extract tarball entry")?; - - let path = entry - .path() - .wrap_err("Error getting path of tarball entry")? - .as_ref() - .to_str() - .ok_or_else(|| eyre!("Tarball entry path not valid UTF-8"))? - .to_owned(); - - if path == "_metadata" { - return Ok(()); - } - - let mut buf = String::new(); - let size = entry - .read_to_string(&mut buf) - .wrap_err("unable to read contents of tarball entry")?; - - // this is the case where the entry is a directory - if size == 0 { - return Ok(()); - } - - let hierarchy = path - .split('/') - .map(|x| { - x.parse::().wrap_err(format!( - "Unable to convert path component '{x}' to integer" - )) - }) - .collect::, _>>() - .wrap_err("Unable to extract PID.EPOCH.TID hierarchy")?; - - if hierarchy.len() != 3 { - return Err(eyre!("malformed PID.EPOCH.TID hierarchy")); - } - - let ops = buf - .split('\n') - .filter_map(|x| { - if x.is_empty() { - return None; - } - Some( - serde_json::from_str::(x) - .wrap_err("Error deserializing Op"), - ) - }) - .collect::, _>>() - .wrap_err("Failed to deserialize TID file")?; - - let mut stdout = std::io::stdout().lock(); - for op in ops { - writeln!( - stdout, - "{}.{}.{} >>> {}", - hierarchy[0], hierarchy[1], hierarchy[2], op, - ) - .wrap_err("Error printing Op")?; - } - - Ok(()) - }) - } - } -} diff --git a/probe_src/probe_frontend/src/ops.rs b/probe_src/probe_frontend/src/ops.rs deleted file mode 100644 index 1dd747da..00000000 --- a/probe_src/probe_frontend/src/ops.rs +++ /dev/null @@ -1,410 +0,0 @@ -#[allow(unused_imports)] -pub use crate::ffi::{ - dev_t, gid_t, ino_t, mode_t, rusage, statx, statx_timestamp, timespec, timeval, uid_t, CloneOp, - CloseOp, ExitOp, GetRUsageOp, InitProcessOp, InitThreadOp, WaitOp, -}; -pub use std::ffi::{c_int, c_uint}; - -use color_eyre::eyre::{eyre, Context, Result}; -use serde::{Deserialize, Serialize}; -use std::ffi::{CStr, CString}; - -use crate::{arena::ArenaContext, ffi}; - -/// Specialized version of [`std::convert::From`] for working with libprobe arena structs. -/// -/// Since [`ffi`] structs from arena allocator files have intrinsically invalid pointers (because -/// they came from a different virtual memory space). This trait and It's sibling [`FfiInto`] -/// exist to act as [`From`] and [`Into`] with an added parameter of a [`ArenaContext`] that can be -/// used to decode pointers. -pub(crate) trait FfiFrom { - fn ffi_from(value: &T, ctx: &ArenaContext) -> Result - where - Self: Sized; -} - -/// Specialized version of [`std::convert::Into`] for working with libprobe arena structs. -/// -/// Much like [`std::convert::Into`] this trait is implemented automatically with a blanket -/// implementation as the reciprocal of [`FfiFrom`]. -pub(crate) trait FfiInto { - fn ffi_into(&self, ctx: &ArenaContext) -> Result; -} - -impl FfiInto for T -where - U: FfiFrom, -{ - #[inline] - fn ffi_into(&self, ctx: &ArenaContext) -> Result { - U::ffi_from(self, ctx) - } -} - -/// Try to convert an invalid pointer from and ffi libprobe struct into a string type. -/// -/// The strings emitted by libprobe are from C code, so they're pointers to an arbitrary sequence -/// of non-null bytes terminated by a null byte. This means we can't use the [`String`] type since -/// rust requires that all [`String`]s are valid UTF-8. -/// -/// Instead we use [`CString`] which is provided by the standard library for ffi code like this. -fn try_to_cstring(str: *const i8, ctx: &ArenaContext) -> Result { - if str.is_null() { - CString::new("").wrap_err("Failed to create empty CString") - } else { - match ctx.try_get_slice(str as usize) { - Some(x) => Ok(CStr::from_bytes_until_nul(x) - .wrap_err("Failed to create CString")? - .to_owned()), - None => return Err(eyre!("Unable to lookup pointer {0:#x}", (str as usize))), - } - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Path { - pub dirfd_minus_at_fdcwd: i32, - pub path: CString, - pub device_major: dev_t, - pub device_minor: dev_t, - pub inode: ino_t, - pub mtime: statx_timestamp, - pub ctime: statx_timestamp, - pub stat_valid: bool, - pub dirfd_valid: bool, -} - -impl FfiFrom for Path { - fn ffi_from(value: &ffi::Path, ctx: &ArenaContext) -> Result { - Ok(Self { - dirfd_minus_at_fdcwd: value.dirfd_minus_at_fdcwd, - path: try_to_cstring(value.path, ctx) - .wrap_err("Unable to decode char* into path string")?, - device_major: value.device_major, - device_minor: value.device_minor, - inode: value.inode, - mtime: value.mtime, - ctime: value.ctime, - stat_valid: value.stat_valid, - dirfd_valid: value.dirfd_valid, - }) - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct InitExecEpochOp { - pub epoch: c_uint, - pub program_name: CString, -} - -impl FfiFrom for InitExecEpochOp { - fn ffi_from(value: &ffi::InitExecEpochOp, ctx: &ArenaContext) -> Result { - Ok(Self { - epoch: value.epoch, - program_name: try_to_cstring(value.program_name, ctx) - .wrap_err("Unable to decode program name char* into string")?, - }) - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct OpenOp { - pub path: Path, - pub flags: c_int, - pub mode: mode_t, - pub fd: i32, - pub ferrno: c_int, -} - -impl FfiFrom for OpenOp { - fn ffi_from(value: &ffi::OpenOp, ctx: &ArenaContext) -> Result { - Ok(Self { - path: value.path.ffi_into(ctx)?, - flags: value.flags, - mode: value.mode, - fd: value.fd, - ferrno: value.ferrno, - }) - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ChdirOp { - pub path: Path, - pub ferrno: c_int, -} - -impl FfiFrom for ChdirOp { - fn ffi_from(value: &ffi::ChdirOp, ctx: &ArenaContext) -> Result { - Ok(Self { - path: value.path.ffi_into(ctx)?, - ferrno: value.ferrno, - }) - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ExecOp { - pub path: Path, - pub ferrno: c_int, -} - -impl FfiFrom for ExecOp { - fn ffi_from(value: &ffi::ExecOp, ctx: &ArenaContext) -> Result { - Ok(Self { - path: value.path.ffi_into(ctx)?, - ferrno: value.ferrno, - }) - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct AccessOp { - pub path: Path, - pub mode: c_int, - pub flags: c_int, - pub ferrno: c_int, -} - -impl FfiFrom for AccessOp { - fn ffi_from(value: &ffi::AccessOp, ctx: &ArenaContext) -> Result { - Ok(Self { - path: value.path.ffi_into(ctx)?, - mode: value.mode, - flags: value.flags, - ferrno: value.ferrno, - }) - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct StatOp { - pub path: Path, - pub flags: c_int, - pub statx_buf: statx, - pub ferrno: c_int, -} - -impl FfiFrom for StatOp { - fn ffi_from(value: &ffi::StatOp, ctx: &ArenaContext) -> Result { - Ok(Self { - path: value.path.ffi_into(ctx)?, - flags: value.flags, - statx_buf: value.statx_buf, - ferrno: value.ferrno, - }) - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ReaddirOp { - pub dir: Path, - pub child: CString, - pub all_children: bool, - pub ferrno: c_int, -} - -impl FfiFrom for ReaddirOp { - fn ffi_from(value: &ffi::ReaddirOp, ctx: &ArenaContext) -> Result { - Ok(Self { - dir: value.dir.ffi_into(ctx)?, - child: try_to_cstring(value.child, ctx) - .wrap_err("Unable to decode child char* into string")?, - all_children: value.all_children, - ferrno: value.ferrno, - }) - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum Metadata { - Mode(mode_t), - Ownership { - uid: uid_t, - gid: gid_t, - }, - Times { - is_null: bool, - atime: timeval, - mtime: timeval, - }, -} - -/// # safety -/// the [[`ffi::MetadataKind`]] passed to this function must a valid variant of MetadataKind enum -/// and be accurate for the passed value because it directly effects the interpretation of the -/// [[`ffi::MetadataValue`]] union with no additional checks -impl Metadata { - pub unsafe fn from_kind_and_value( - kind: ffi::MetadataKind, - value: ffi::MetadataValue, - ) -> Result { - log::debug!("[unsafe] decoding Metadata tagged union"); - Ok(match kind { - ffi::MetadataKind_MetadataMode => Metadata::Mode(unsafe { value.mode }), - ffi::MetadataKind_MetadataOwnership => Metadata::Ownership { - uid: unsafe { value.ownership }.uid, - gid: unsafe { value.ownership }.gid, - }, - ffi::MetadataKind_MetadataTimes => Metadata::Times { - is_null: unsafe { value.times }.is_null, - atime: unsafe { value.times }.atime, - mtime: unsafe { value.times }.mtime, - }, - _ => return Err(eyre!("Invalid MetadataKind Variant")), - }) - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct UpdateMetadataOp { - pub path: Path, - pub flags: c_int, - pub metadata: Metadata, - pub ferrno: c_int, -} - -impl FfiFrom for UpdateMetadataOp { - fn ffi_from(value: &ffi::UpdateMetadataOp, ctx: &ArenaContext) -> Result { - Ok(Self { - path: value.path.ffi_into(ctx)?, - flags: value.flags, - metadata: unsafe { Metadata::from_kind_and_value(value.kind, value.value) } - .wrap_err("Unable to decode Metadata tagged union")?, - ferrno: value.ferrno, - }) - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ReadLinkOp { - pub path: Path, - pub resolved: CString, - pub ferrno: c_int, -} - -impl FfiFrom for ReadLinkOp { - fn ffi_from(value: &ffi::ReadLinkOp, ctx: &ArenaContext) -> Result { - Ok(Self { - path: value.path.ffi_into(ctx)?, - resolved: try_to_cstring(value.resolved, ctx) - .wrap_err("Unable to decode symlink resolve char* to string")?, - ferrno: value.ferrno, - }) - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum OpInternal { - InitProcess(InitProcessOp), - InitExecEpoch(InitExecEpochOp), - InitThread(InitThreadOp), - Open(OpenOp), - Close(CloseOp), - Chdir(ChdirOp), - Exec(ExecOp), - Clone(CloneOp), - Exit(ExitOp), - Access(AccessOp), - Stat(StatOp), - Readdir(ReaddirOp), - Wait(WaitOp), - GetRUsage(GetRUsageOp), - UpdateMetadata(UpdateMetadataOp), - ReadLink(ReadLinkOp), -} - -/// # safety -/// the [[`ffi::OpCode`]] passed to this function must a valid variant of OpCode enum -/// and be accurate for the passed value because it directly effects the interpretation of the -/// value union with no additional checks -impl OpInternal { - pub unsafe fn from_kind_and_value( - kind: ffi::OpCode, - value: &ffi::Op__bindgen_ty_1, - ctx: &ArenaContext, - ) -> Result { - log::debug!("[unsafe] decoding Op tagged union [ OpCode={} ]", kind); - Ok(match kind { - ffi::OpCode_init_process_op_code => { - Self::InitProcess(unsafe { value.init_process_epoch }) - } - ffi::OpCode_init_exec_epoch_op_code => Self::InitExecEpoch( - unsafe { value.init_exec_epoch } - .ffi_into(ctx) - .wrap_err("Unable to decode InitExecEpochOp")?, - ), - ffi::OpCode_init_thread_op_code => Self::InitThread(unsafe { value.init_thread }), - ffi::OpCode_open_op_code => Self::Open( - unsafe { value.open } - .ffi_into(ctx) - .wrap_err("Unable to decode OpenOp")?, - ), - ffi::OpCode_close_op_code => Self::Close(unsafe { value.close }), - ffi::OpCode_chdir_op_code => Self::Chdir( - unsafe { value.chdir } - .ffi_into(ctx) - .wrap_err("Unable to decode ChdirOp")?, - ), - ffi::OpCode_exec_op_code => Self::Exec( - unsafe { value.exec } - .ffi_into(ctx) - .wrap_err("Unable to decode ExecOp")?, - ), - ffi::OpCode_clone_op_code => Self::Clone(unsafe { value.clone }), - ffi::OpCode_exit_op_code => Self::Exit(unsafe { value.exit }), - ffi::OpCode_access_op_code => Self::Access( - unsafe { value.access } - .ffi_into(ctx) - .wrap_err("Unable to decode AccessOp")?, - ), - ffi::OpCode_stat_op_code => Self::Stat( - unsafe { value.stat } - .ffi_into(ctx) - .wrap_err("Unable to decode StatOp")?, - ), - ffi::OpCode_readdir_op_code => Self::Readdir( - unsafe { value.readdir } - .ffi_into(ctx) - .wrap_err("Unable to decode ReaddirOp")?, - ), - ffi::OpCode_wait_op_code => Self::Wait(unsafe { value.wait }), - ffi::OpCode_getrusage_op_code => Self::GetRUsage(unsafe { value.getrusage }), - ffi::OpCode_update_metadata_op_code => Self::UpdateMetadata( - unsafe { value.update_metadata } - .ffi_into(ctx) - .wrap_err("Unable to decode UpdateMetadataOp")?, - ), - ffi::OpCode_read_link_op_code => Self::ReadLink( - unsafe { value.read_link } - .ffi_into(ctx) - .wrap_err("Unable to decode ReadlinkOp")?, - ), - _ => { - if kind < ffi::OpCode_LAST_OP_CODE && kind > ffi::OpCode_FIRST_OP_CODE { - return Err(eyre!( - "Valid OpCode not handled (this is a bug, please report it)" - )); - } else { - return Err(eyre!("Invalid OpCode")); - } - } - }) - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Op { - pub data: OpInternal, - pub time: timespec, -} - -impl FfiFrom for Op { - fn ffi_from(value: &ffi::Op, ctx: &ArenaContext) -> Result { - Ok(Self { - data: unsafe { OpInternal::from_kind_and_value(value.op_code, &value.data, ctx) } - .wrap_err("Unable to decode Op tagged union")?, - time: value.time, - }) - } -} From 89d3a2f2036041b2e13b30e40e95b4dbbf63ef7e Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Sun, 30 Jun 2024 00:57:59 -0500 Subject: [PATCH 11/37] fix cargo fmt/clippy --- probe_src/probe_frontend/lib/src/error.rs | 20 ++++++++++++------- probe_src/probe_frontend/lib/src/lib.rs | 4 +--- .../probe_frontend/lib/src/transcribe.rs | 5 ++++- probe_src/probe_frontend/macros/src/lib.rs | 2 +- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/probe_src/probe_frontend/lib/src/error.rs b/probe_src/probe_frontend/lib/src/error.rs index 380a1d07..7d595c3e 100644 --- a/probe_src/probe_frontend/lib/src/error.rs +++ b/probe_src/probe_frontend/lib/src/error.rs @@ -42,9 +42,7 @@ pub enum ProbeError { }, #[error("{context}:\nNeeded Option was None")] - MissingOption { - context: &'static str, - }, + MissingOption { context: &'static str }, #[error("{0}")] ArenaError(crate::transcribe::ArenaError), @@ -89,19 +87,27 @@ pub(crate) trait ConvertErr { impl ConvertErr for std::io::Error { fn convert(self, context: &'static str) -> ProbeError { - ProbeError::ContextIO { context, error: self } + ProbeError::ContextIO { + context, + error: self, + } } } impl ConvertErr for ProbeError { fn convert(self, context: &'static str) -> ProbeError { - ProbeError::Context { context, error: Box::new(self) } + ProbeError::Context { + context, + error: Box::new(self), + } } } impl ConvertErr for serde_json::Error { fn convert(self, context: &'static str) -> ProbeError { - ProbeError::JsonError { context, error: self } + ProbeError::JsonError { + context, + error: self, + } } } - diff --git a/probe_src/probe_frontend/lib/src/lib.rs b/probe_src/probe_frontend/lib/src/lib.rs index 4ec768fe..5c6865c5 100644 --- a/probe_src/probe_frontend/lib/src/lib.rs +++ b/probe_src/probe_frontend/lib/src/lib.rs @@ -1,4 +1,3 @@ - /// Op definitions /// /// While simple Ops containing only Integral values can be used/serialized directory from @@ -25,8 +24,7 @@ pub mod ops; pub mod transcribe; // currently unused, get system metadata -mod metadata; +// mod metadata; /// Library error type and definitions. pub mod error; - diff --git a/probe_src/probe_frontend/lib/src/transcribe.rs b/probe_src/probe_frontend/lib/src/transcribe.rs index 26d163e0..21b5971c 100644 --- a/probe_src/probe_frontend/lib/src/transcribe.rs +++ b/probe_src/probe_frontend/lib/src/transcribe.rs @@ -245,7 +245,10 @@ fn filename_numeric>(dir: P) -> Result { })? .parse::() .map_err(|e| { - log::error!("Parsing filename '{}' to integer", filename.to_string_lossy()); + log::error!( + "Parsing filename '{}' to integer", + filename.to_string_lossy() + ); ProbeError::from(e) }) .wrap_err("Failed to parse filename to integer") diff --git a/probe_src/probe_frontend/macros/src/lib.rs b/probe_src/probe_frontend/macros/src/lib.rs index 52c3445f..442c040c 100644 --- a/probe_src/probe_frontend/macros/src/lib.rs +++ b/probe_src/probe_frontend/macros/src/lib.rs @@ -59,7 +59,7 @@ pub fn make_rust_op(input: TokenStream) -> TokenStream { .map_err(|e| { ProbeError::FFiConversionError { msg: "Error calling ffi_into() on\ - #field_idents creating #new_name", + #field_idents creating #new_name", inner: Box::new(e), } })?, From 1554dfc12cf646f0b0e736d705871f544d5295ba Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Sun, 30 Jun 2024 10:30:22 -0500 Subject: [PATCH 12/37] Added pygen sanity check to flake checks --- probe_src/probe_frontend/flake.nix | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/probe_src/probe_frontend/flake.nix b/probe_src/probe_frontend/flake.nix index e4f45e38..4a705f70 100644 --- a/probe_src/probe_frontend/flake.nix +++ b/probe_src/probe_frontend/flake.nix @@ -83,7 +83,8 @@ pname = "probe-macros"; cargoExtraArgs = "-p probe_macros"; installPhase = '' - cp -r python/ $out + mkdir -p $out + cp -r python $out/python ''; }); in { @@ -132,6 +133,11 @@ partitions = 1; partitionType = "count"; }); + + pygen-sanity = pkgs.runCommand "pygen-sanity-check" {} '' + cp ${probe-macros}/python/generated/ops.py $out + ${pkgs.python312}/bin/python $out + ''; }; packages = { From 85c37cc96e6db45b500c6ed311929392b412b77b Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Sun, 30 Jun 2024 17:51:18 -0500 Subject: [PATCH 13/37] :sparkles: documentation :sparkles: --- probe_src/probe_frontend/README.md | 124 +++++++++ probe_src/probe_frontend/cli/src/dump.rs | 34 ++- probe_src/probe_frontend/cli/src/main.rs | 96 +------ probe_src/probe_frontend/cli/src/record.rs | 106 +++++++- probe_src/probe_frontend/cli/src/util.rs | 47 ++-- probe_src/probe_frontend/flake.nix | 15 +- probe_src/probe_frontend/lib/build.rs | 6 +- probe_src/probe_frontend/lib/src/error.rs | 34 ++- probe_src/probe_frontend/lib/src/ffi.rs | 251 ------------------ probe_src/probe_frontend/lib/src/lib.rs | 14 +- probe_src/probe_frontend/lib/src/ops.rs | 111 ++++---- .../probe_frontend/lib/src/transcribe.rs | 40 +-- probe_src/probe_frontend/macros/src/lib.rs | 10 +- probe_src/probe_frontend/macros/src/pygen.rs | 53 +++- .../python/{generated => }/ops.py | 0 probe_src/probe_frontend/python/probe.py | 2 +- 16 files changed, 471 insertions(+), 472 deletions(-) create mode 100644 probe_src/probe_frontend/README.md delete mode 100644 probe_src/probe_frontend/lib/src/ffi.rs rename probe_src/probe_frontend/python/{generated => }/ops.py (100%) diff --git a/probe_src/probe_frontend/README.md b/probe_src/probe_frontend/README.md new file mode 100644 index 00000000..f57bf3fb --- /dev/null +++ b/probe_src/probe_frontend/README.md @@ -0,0 +1,124 @@ + +# PROBE Frontend + +Tools for recording and manipulating libprobe provenance. + +## Terminology + +The documentation in this project assumes the reader understands a couple pieces +of terminology specific to this tool. + +- **Probe record** (or probe recording) +This is a directory (`probe_record` by default) that contains raw arena +allocator `*.dat` files created by libprobe aranged in a `//` +(see the top-level repo glossary for an explanation of an Exec epoch) hierarchy, +these files contain +[mmap(2)](https://www.man7.org/linux/man-pages/man2/mmap.2.html)-ed c structures +and are not guaranteed to valid if moved to a computer with a different +architecture, kernel version, or c compiler (or if any of those things change on +the same computer). + +- **Probe log** +This is a directory **or** file (`probe_log` by default) that encodes the data +from a probe record in a format that is cross-platform and much easier to use; a +probe log file is just a gzip-ed tarball containing a probe log directory. (see +the section on serialization format) + +- **Transcription** +This is the process of converting a probe record to a probe log. + +- **Translation** +This is the process of polypeptide synthesis from mRNA strands generated during +[**transcription**](https://en.wikipedia.org/wiki/Transcription_(biology)). +(joke) + +## Using the CLI to create probe logs + +the simplest invocation of the `probe` cli is + +```bash +probe record +``` + +this will run `` under the benevolent supervision of libprobe, outputting +the probe record to a temporary directory. Upon the process exiting, `probe` it +will transcribe the record directory and write a probe log file named `probe_log` in +the current directory. + +If you run this again you'll notice it throws an error that the output file +already exists, solve this by passing `-o ` to specify a new file to write +the log to, or by passing `-f` to overwrite the previous log. + +The transcription process can take a while after the program exits, if you don't +want to automatically transcribe the record, you can pass the `-n` flag, this +will change the default output path from `probe_log` to `probe_record`, and will +output a probe record directory that can be transcribed to a probe log later +with the `probe transcribe` command. + +### Subshells + +`probe record` does **not** pass your command through a shell, any +subshell or environment substitutions will still be performed by your shell +before the arguments are passed to `probe`. But it won't understand flow control +statements like `if` and `for`, shell builtins like `cd`, or shell +aliases/functions. + +If you need these you can either write a shell script and +invoke `probe record` on that, or else run: + +```bash +probe record -- bash -c ''` +``` + +(note the `--` so that `probe` doesn't try to parse `-c` as a flag). + +## Serialization formats + +### Probe record directory + +The exact format of the probe record directory is defined by libprobe and not +part of this tool's spec, however a best-effort explanation is still given. + +- Each probe record directory is composed of a top-level directory containing +one or more PID directories. + +- Each PID directory has a numeric name corresponding to the PID of the process +who's provenance is recorded inside it, and in turn contains one or more exec +epoch directories. + +- Each exec epoch directory has a numeric name corresponding to the exec epoch +of the virtual memory space who's provenance is recorded inside it, and in turn +contains one or more TID directories. + +- Each TID directory has a numeric name corresponding to the TID of the thread +who's provenance is recorded inside it, it contains two subdirectories named +`data` and `ops` + +- The `data` and `ops` directories both contains one or more files of the form +`X.dat` where `X` is a number, the `.dat` files inside the `data` directory are +called "data arenas", while those in the `ops` directory are called "op arenas". + +- Each op arena is a binary file containing an arena header followed by zero or +more raw op c-structs, followed by zero or more null bytes. + +- Each data arena is a binary file containing and arena header followed by zero +or more bytes of arbitrary data, followed by zero or more null bytes. + +### Probe log directory + +This format **is** part of this tool's spec, and this tool is the source of +truth for its format. + +- The format of the top-level, PID, and exec epoch directories is the same as +for the probe record directory described above, but rather than containing TID +directories, each exec epoch directory contains one or more TID files. + +- Each TID file has a numeric name corresponding to the TID of the thread who's +provenance is recorded inside it. It is a [jsonlines](https://jsonlines.org/) +file, where each line is an op (as defined in this library) serialized as json. + +### Probe log file + +This format is simply a probe log directory that's bundled into a tar archive +and compressed with gzip, since its easier to move as a single file and +compresses well. diff --git a/probe_src/probe_frontend/cli/src/dump.rs b/probe_src/probe_frontend/cli/src/dump.rs index 76733d2f..d1f21df0 100644 --- a/probe_src/probe_frontend/cli/src/dump.rs +++ b/probe_src/probe_frontend/cli/src/dump.rs @@ -9,14 +9,10 @@ use color_eyre::eyre::{eyre, Result, WrapErr}; use probe_frontend::ops; use serde::{Deserialize, Serialize}; -#[derive(Debug, Clone, Serialize, Deserialize)] -struct DumpOp { - pid: usize, - exec_epoch: usize, - tid: usize, - op: ops::Op, -} - +/// Print the ops from a probe log out for humans. +/// +/// This hides some of the data and so is not suitable for machine consumption use +/// [`to_stdout_json()`] instead. pub fn to_stdout>(tar_path: P) -> Result<()> { dump_internal(tar_path, |(pid, epoch, tid), ops| { let mut stdout = std::io::stdout().lock(); @@ -27,6 +23,15 @@ pub fn to_stdout>(tar_path: P) -> Result<()> { }) } +/// Prints the ops from a probe log out for machine consumption. +/// +/// The ops are emitted one on each line, in the form: +/// +/// ``` +/// { "pid": X, "exec_epoch": Y, "tid": Z, "op": {...} } +/// ``` +/// +/// (without whitespace) pub fn to_stdout_json>(tar_path: P) -> Result<()> { dump_internal(tar_path, |(pid, epoch, tid), ops| { let mut stdout = std::io::stdout().lock(); @@ -117,6 +122,19 @@ fn dump_internal, F: Fn((usize, usize, usize), Vec) -> R }) } +/// Helper struct constructed from pid/epoch/tid hierarchy information and an op. Used for +/// serialization. +#[derive(Debug, Clone, Serialize, Deserialize)] +struct DumpOp { + pid: usize, + exec_epoch: usize, + tid: usize, + op: ops::Op, +} + +// TODO: Display won't work (foreign trait rule) but some kind of streaming would be better; if we +// don't care about UTF-8 guarantees we might be able to do some kind of byte iterator approach and +// evaluate it all lazily trait Dump { fn dump(&self) -> String; } diff --git a/probe_src/probe_frontend/cli/src/main.rs b/probe_src/probe_frontend/cli/src/main.rs index 87306a8b..2f26004c 100644 --- a/probe_src/probe_frontend/cli/src/main.rs +++ b/probe_src/probe_frontend/cli/src/main.rs @@ -1,20 +1,19 @@ -use std::{ - ffi::OsString, - fs::{self, File}, -}; +use std::{ffi::OsString, fs::File}; use clap::Parser; use color_eyre::eyre::{Context, Result}; use flate2::Compression; -use util::Dir; +/// Output the ops from a probe log file to stdout. mod dump; + +/// Run commands under provenance and generate probe record directory. mod record; -/// Wrapper over [`probe_frontend::transcribe`] which provides high-level commands +/// Wrapper over [`probe_frontend::transcribe`]. mod transcribe; -/// Utility code for creating temporary directories +/// Utility code for creating temporary directories. mod util; /// Generate or manipulate Provenance for Replay OBservation Engine (PROBE) logs. @@ -96,9 +95,9 @@ fn main() -> Result<()> { debug, cmd, } => if no_transcribe { - record_no_transcribe(output, overwrite, gdb, debug, cmd) + record::record_no_transcribe(output, overwrite, gdb, debug, cmd) } else { - record_transcribe(output, overwrite, gdb, debug, cmd) + record::record_transcribe(output, overwrite, gdb, debug, cmd) } .wrap_err("Record command failed"), @@ -124,82 +123,3 @@ fn main() -> Result<()> { .wrap_err("Dump command failed"), } } - -fn record_no_transcribe( - output: Option, - overwrite: bool, - gdb: bool, - debug: bool, - cmd: Vec, -) -> Result<()> { - let output = match output { - Some(x) => fs::canonicalize(x).wrap_err("Failed to canonicalize record directory path")?, - None => { - let mut output = std::env::current_dir().wrap_err("Failed to get CWD")?; - output.push("probe_record"); - output - } - }; - - if overwrite { - if let Err(e) = fs::remove_dir_all(&output) { - match e.kind() { - std::io::ErrorKind::NotFound => (), - _ => return Err(e).wrap_err("Failed to remove exisitng record directory"), - } - } - } - - let record_dir = Dir::new(output).wrap_err("Failed to create record directory")?; - - record::Recorder::new(cmd, record_dir) - .gdb(gdb) - .debug(debug) - .record()?; - - Ok(()) -} - -fn record_transcribe( - output: Option, - overwrite: bool, - gdb: bool, - debug: bool, - cmd: Vec, -) -> Result<()> { - let output = match output { - Some(x) => x, - None => OsString::from("probe_log"), - }; - - let file = if overwrite { - File::create(&output) - } else { - File::create_new(&output) - } - .wrap_err("Failed to create output file")?; - - let mut tar = tar::Builder::new(flate2::write::GzEncoder::new(file, Compression::default())); - - let mut record_dir = record::Recorder::new( - cmd, - util::Dir::temp(true).wrap_err("Failed to create record directory")?, - ) - .gdb(gdb) - .debug(debug) - .record()?; - - match transcribe::transcribe(&record_dir, &mut tar) { - Ok(_) => (), - Err(e) => { - log::error!( - "Error transcribing record directory, saving directory '{}'", - record_dir.as_ref().to_string_lossy() - ); - record_dir.drop = false; - return Err(e).wrap_err("Failed to transcirbe record directory"); - } - }; - - Ok(()) -} diff --git a/probe_src/probe_frontend/cli/src/record.rs b/probe_src/probe_frontend/cli/src/record.rs index 1ae6c4c5..b59db563 100644 --- a/probe_src/probe_frontend/cli/src/record.rs +++ b/probe_src/probe_frontend/cli/src/record.rs @@ -1,14 +1,100 @@ use std::{ ffi::OsString, - fs, + fs::{self, File}, path::{Path, PathBuf}, thread, }; use color_eyre::eyre::{eyre, Result, WrapErr}; +use flate2::Compression; -use crate::util::Dir; +use crate::{transcribe, util::Dir}; +// TODO: modularize and improve ergonomics (maybe expand builder pattern?) + +/// create a probe record directory from subset of a [`Command::Record`](crate::Command::Record) +pub fn record_no_transcribe( + output: Option, + overwrite: bool, + gdb: bool, + debug: bool, + cmd: Vec, +) -> Result<()> { + let output = match output { + Some(x) => fs::canonicalize(x).wrap_err("Failed to canonicalize record directory path")?, + None => { + let mut output = std::env::current_dir().wrap_err("Failed to get CWD")?; + output.push("probe_record"); + output + } + }; + + if overwrite { + if let Err(e) = fs::remove_dir_all(&output) { + match e.kind() { + std::io::ErrorKind::NotFound => (), + _ => return Err(e).wrap_err("Failed to remove exisitng record directory"), + } + } + } + + let record_dir = Dir::new(output).wrap_err("Failed to create record directory")?; + + Recorder::new(cmd, record_dir) + .gdb(gdb) + .debug(debug) + .record()?; + + Ok(()) +} + +/// create a probe log file from subset of a [`Command::Record`](crate::Command::Record) +pub fn record_transcribe( + output: Option, + overwrite: bool, + gdb: bool, + debug: bool, + cmd: Vec, +) -> Result<()> { + let output = match output { + Some(x) => x, + None => OsString::from("probe_log"), + }; + + let file = if overwrite { + File::create(&output) + } else { + File::create_new(&output) + } + .wrap_err("Failed to create output file")?; + + let mut tar = tar::Builder::new(flate2::write::GzEncoder::new(file, Compression::default())); + + let mut record_dir = Recorder::new( + cmd, + Dir::temp(true).wrap_err("Failed to create record directory")?, + ) + .gdb(gdb) + .debug(debug) + .record()?; + + match transcribe::transcribe(&record_dir, &mut tar) { + Ok(_) => (), + Err(e) => { + log::error!( + "Error transcribing record directory, saving directory '{}'", + record_dir.as_ref().to_string_lossy() + ); + record_dir.drop = false; + return Err(e).wrap_err("Failed to transcirbe record directory"); + } + }; + + Ok(()) +} + +/// Builder for running processes under provenance. +// TODO: extract this into the library part of this project #[derive(Debug)] pub struct Recorder { gdb: bool, @@ -35,7 +121,9 @@ impl Recorder { libprobe.push("libprobe.so"); } - // append any existing LD_PRELOAD overrides + // append any existing LD_PRELOAD overrides; libprobe needs to be explicitly converted from + // a PathBuf to a OsString because PathBuf::push() automatically adds path separators which + // is incorrect here. let mut ld_preload = OsString::from(libprobe); if let Some(x) = std::env::var_os("LD_PRELOAD") { ld_preload.push(":"); @@ -69,6 +157,8 @@ impl Recorder { .wrap_err("Failed to launch child process")? }; + // without this the child process typically won't have written it's first op by the time we + // do our sanity check, since we're about to wait on child anyway, this isn't a big deal. thread::sleep(std::time::Duration::from_millis(50)); match Path::read_dir(self.output.path()) { @@ -78,7 +168,8 @@ impl Recorder { .try_fold(false, |_, x| x.map(|x| x.path().exists()))?; if !any_files { log::warn!( - "No arean files detected, something is wrong, you should probably abort!" + "No arean files detected after 50ms, \ + something is wrong, you should probably abort!" ); } } @@ -92,6 +183,11 @@ impl Recorder { Ok(self.output) } + + /// Create new [`Recorder`] from a command and the directory where it should write the probe + /// record. + /// + /// `cmd[0]` will be used as the command while `cmd[1..]` will be used as the arguments. pub fn new(cmd: Vec, output: Dir) -> Self { Self { gdb: false, @@ -102,11 +198,13 @@ impl Recorder { } } + /// Set if the process should be run under gdb, implies debug. pub fn gdb(mut self, gdb: bool) -> Self { self.gdb = gdb; self } + /// Set if the debug version of libprobe should be used. pub fn debug(mut self, debug: bool) -> Self { self.debug = debug; self diff --git a/probe_src/probe_frontend/cli/src/util.rs b/probe_src/probe_frontend/cli/src/util.rs index c8b09c57..f530b31e 100644 --- a/probe_src/probe_frontend/cli/src/util.rs +++ b/probe_src/probe_frontend/cli/src/util.rs @@ -6,20 +6,52 @@ use std::{ use color_eyre::eyre::{Context, Result}; use rand::Rng; +/// Represents a newly created directory and optionally acts as a RAII guard that (attempts to) +/// delete the directory and anything in it when dropped. #[derive(Debug)] pub struct Dir { + /// path to created directory path: PathBuf, + + /// drop flag, if this is `true` when [`Dir`] is dropped then the drop hook will call + /// [`fs::remove_dir_all()`] on `path`, if this fails it will log a warning but take no other + /// action. pub drop: bool, } impl Dir { + /// Attempts to create a new directory at `path`. + /// + /// By default directories created this way **are not** deleted when [`Dir`] is dropped. #[inline] pub fn new(path: PathBuf) -> Result { fs::create_dir(&path).wrap_err("Failed to create named directory")?; Ok(Self { path, drop: false }) } + /// Attempts to create a new tempoerary directory + /// + /// The directory is created in the path retunred by [`std::env::temp_dir()`] and is named + /// `probe-XXXXXXXX` where `X` is a random alphanumeric digit. Will try again (indefinitely) if + /// directory creation errors with [`AlreadyExists`](io::ErrorKind::AlreadyExists). + /// + /// By default directories created this way **are** deleted when [`Dir`] is dropped. pub fn temp(drop: bool) -> Result { + fn rand_alphanumeric(len: usize) -> String { + const CHARSET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\ + abcdefghijklmnopqrstuvwxyz\ + 0123456789"; + + let mut rng = rand::thread_rng(); + + (0..len) + .map(|_| { + let idx = rng.gen_range(0..CHARSET.len()); + CHARSET[idx] as char + }) + .collect() + } + let mut path = std::env::temp_dir(); path.push(format!("probe-{}", rand_alphanumeric(8))); @@ -57,18 +89,3 @@ impl Drop for Dir { } } } - -fn rand_alphanumeric(len: usize) -> String { - const CHARSET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\ - abcdefghijklmnopqrstuvwxyz\ - 0123456789"; - - let mut rng = rand::thread_rng(); - - (0..len) - .map(|_| { - let idx = rng.gen_range(0..CHARSET.len()); - CHARSET[idx] as char - }) - .collect() -} diff --git a/probe_src/probe_frontend/flake.nix b/probe_src/probe_frontend/flake.nix index 4a705f70..1141e844 100644 --- a/probe_src/probe_frontend/flake.nix +++ b/probe_src/probe_frontend/flake.nix @@ -17,11 +17,10 @@ }; }; - # FIXME: currently all the different crates get their dependencies grouped - # together, this means you can't build even the pure-rust crates without - # python, I'd like to figure out how to avoid this; a rust-bindgen crate - # and a PyO3 crate is really pushing what crane was designed to do (but the - # other options are worse). + # TODO: cleanup derivations and make more usable: + # - version of probe cli with bundled libprobe and wrapper script + # - python code as actual module + # (this may require merging this flake with the top-level one) outputs = { self, nixpkgs, @@ -127,15 +126,15 @@ # Run tests with cargo-nextest # this is why `doCheck = false` on other crate derivations, to not run # the tests twice. - workspace-nextest = craneLib.cargoNextest (commonArgs + probe-workspace-nextest = craneLib.cargoNextest (commonArgs // { inherit cargoArtifacts; partitions = 1; partitionType = "count"; }); - pygen-sanity = pkgs.runCommand "pygen-sanity-check" {} '' - cp ${probe-macros}/python/generated/ops.py $out + probe-pygen-sanity = pkgs.runCommand "pygen-sanity-check" {} '' + cp ${probe-macros}/python/ops.py $out ${pkgs.python312}/bin/python $out ''; }; diff --git a/probe_src/probe_frontend/lib/build.rs b/probe_src/probe_frontend/lib/build.rs index 9123b297..70d8b1ad 100644 --- a/probe_src/probe_frontend/lib/build.rs +++ b/probe_src/probe_frontend/lib/build.rs @@ -8,7 +8,7 @@ use bindgen::callbacks::ParseCallbacks; #[derive(Debug)] struct LibprobeCallback; -/// These C-structs get prefixed with "Bindgen_" because a rust version of the struct will be +/// These C-structs get prefixed with "C_" because a rust version of the struct will be /// either generated or manually implemented. fn should_prefix(name: &str) -> bool { static LIST: OnceLock> = OnceLock::new(); @@ -64,7 +64,7 @@ fn no_derive(name: &str) -> bool { impl ParseCallbacks for LibprobeCallback { fn item_name(&self, _original_item_name: &str) -> Option { if should_prefix(_original_item_name) { - Some(format!("Bindgen_{}", _original_item_name)) + Some(format!("C_{}", _original_item_name)) } else { None } @@ -75,7 +75,7 @@ impl ParseCallbacks for LibprobeCallback { match info.kind { bindgen::callbacks::TypeKind::Struct => { - let orig_name = info.name.strip_prefix("Bindgen_"); + let orig_name = info.name.strip_prefix("C_"); if orig_name.is_some() && !no_derive(orig_name.unwrap()) { ret.push("MakeRustOp".to_owned()); } diff --git a/probe_src/probe_frontend/lib/src/error.rs b/probe_src/probe_frontend/lib/src/error.rs index 7d595c3e..8473f011 100644 --- a/probe_src/probe_frontend/lib/src/error.rs +++ b/probe_src/probe_frontend/lib/src/error.rs @@ -5,52 +5,84 @@ pub type Result = std::result::Result; #[non_exhaustive] #[derive(Debug, thiserror::Error)] pub enum ProbeError { + /// wrapper explaining where an occurred converting a [`C_` struct](crate::ops) to its rust + /// version, call [`root_cause()`](Self::root_cause()) to return the underlying error. #[error("{msg}: {inner}")] FFiConversionError { msg: &'static str, inner: Box, }, + /// The tag of a tagged union type from an [`C_` struct](crate::ops) isn't a valid variant of + /// that union #[error("Invalid variant of tagged union")] InvalidVariant(u32), + /// A pointer from an [`C_` struct](crate::ops) couldn't be decoded into a byte slice. #[error("Unable to decode pointer {0:#x}")] InvalidPointer(usize), + /// Unable to generate a [`CString`](std::ffi::CString) from a byte slice because it had no null byte. #[error("Expected null byte but none found")] MissingNull, + /// Used instead of [`unreachable`] so that functions up the call stack can add + /// [context](Self::Context). #[error("Reached code believed unreachable, please report this bug")] UnreachableCode, + /// An error occurred serializing or deserializing a struct into/from json. #[error("(de)serialization error ({context}):\n{error}")] JsonError { context: &'static str, error: serde_json::Error, }, + /// A generic wrapper around another [`ProbeError`] type that adds additional context, call + /// [`root_cause()`](Self::root_cause()) to return the underlying error. #[error("{context}:\n{error}")] Context { context: &'static str, error: Box, }, + /// A wrapper over a [`std::io::Error`] with a description of what the was being done when an + /// IO error occurred #[error("{context}:\n{error}")] ContextIO { context: &'static str, error: std::io::Error, }, + /// An external function returned [`None`] when [`Some`] was required, contains explanation. + // FIXME: this is an unhelpful error #[error("{context}:\nNeeded Option was None")] MissingOption { context: &'static str }, + /// A wrapper over [`ArenaError`](crate::transcribe::ArenaError), see that type for variant + /// details. #[error("{0}")] ArenaError(crate::transcribe::ArenaError), + /// An error occured trying to parse a string into an integer, this error is generally wrapped + /// in [context](Self::Context). #[error("{0}")] ParseIntError(ParseIntError), } +impl ProbeError { + /// Walks down the inner value(s) of one or more layers of [`Context`](Self::Context) or + /// [`FfiConversionError`](Self::FFiConversionError) and returns a reference to the underlying + /// error type, returns `&self` for other variants. + pub fn root_cause(&self) -> &ProbeError { + match self { + Self::Context { error, .. } => error.as_ref().root_cause(), + Self::FFiConversionError { inner, .. } => inner.as_ref().root_cause(), + _ => self, + } + } +} + impl From for ProbeError { fn from(value: crate::transcribe::ArenaError) -> Self { Self::ArenaError(value) @@ -64,7 +96,7 @@ impl From for ProbeError { } /// create new [`ProbeError::MissingOption`] with the given context -pub fn option_err(context: &'static str) -> ProbeError { +pub(crate) fn option_err(context: &'static str) -> ProbeError { ProbeError::MissingOption { context } } diff --git a/probe_src/probe_frontend/lib/src/ffi.rs b/probe_src/probe_frontend/lib/src/ffi.rs deleted file mode 100644 index 4016c4bf..00000000 --- a/probe_src/probe_frontend/lib/src/ffi.rs +++ /dev/null @@ -1,251 +0,0 @@ -#![allow(non_upper_case_globals)] -#![allow(non_camel_case_types)] -#![allow(non_snake_case)] - -use crate::transcribe::ArenaContext; -use color_eyre::eyre::{Result, WrapErr, eyre}; -use probe_macros::MakeRustOp; -use pyo3::pyclass; -use serde::{Deserialize, Serialize}; - -/// Specialized version of [`std::convert::From`] for working with libprobe arena structs. -/// -/// Since [`ffi`] structs from arena allocator files have intrinsically invalid pointers (because -/// they came from a different virtual memory space). This trait and It's sibling [`FfiInto`] -/// exist to act as [`From`] and [`Into`] with an added parameter of a [`ArenaContext`] that can be -/// used to decode pointers. -pub(crate) trait FfiFrom { - fn ffi_from(value: &T, ctx: &ArenaContext) -> Result - where - Self: Sized; -} - -/// Specialized version of [`std::convert::Into`] for working with libprobe arena structs. -/// -/// Much like [`std::convert::Into`] this trait is implemented automatically with a blanket -/// implementation as the reciprocal of [`FfiFrom`]. -pub(crate) trait FfiInto { - fn ffi_into(&self, ctx: &ArenaContext) -> Result; -} - -impl FfiFrom for T { - fn ffi_from(value: &T, _ctx: &ArenaContext) -> Result { - Ok(*value) - } -} - -impl FfiInto for T -where - U: FfiFrom, -{ - #[inline] - fn ffi_into(&self, ctx: &ArenaContext) -> Result { - U::ffi_from(self, ctx) - } -} - -impl FfiFrom<*const i8> for std::ffi::CString { - fn ffi_from(value: &*const i8, ctx: &ArenaContext) -> Result { - let str = *value; - if str.is_null() { - std::ffi::CString::new("").wrap_err("Failed to create empty CString") - } else { - match ctx.try_get_slice(str as usize) { - Some(x) => Ok(std::ffi::CStr::from_bytes_until_nul(x) - .wrap_err("Failed to create CString")? - .to_owned()), - None => Err(eyre!("Unable to lookup pointer {0:#x}", (str as usize))), - } - } - } -} - -impl FfiFrom<*mut i8> for std::ffi::CString { - fn ffi_from(value: &*mut i8, ctx: &ArenaContext) -> Result { - let str = *value; - if str.is_null() { - std::ffi::CString::new("").wrap_err("Failed to create empty CString") - } else { - match ctx.try_get_slice(str as usize) { - Some(x) => Ok(std::ffi::CStr::from_bytes_until_nul(x) - .wrap_err("Failed to create CString")? - .to_owned()), - None => Err(eyre!("Unable to lookup pointer {0:#x}", (str as usize))), - } - } - } -} - -// Bindings are generated by `../build.sh` and the MakeRustOp proc-macro -include!(concat!(env!("OUT_DIR"), "/bindings.rs")); - - -// NOTE: the raw versions of these Ops are tagged unions, so currently they have to be manually -// implemented, this is somewhat confusing since they extensively use types and trait -// implementations that are auto-generated. - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum OpInternal { - InitProcess(InitProcessOp), - InitExecEpoch(InitExecEpochOp), - InitThread(InitThreadOp), - Open(OpenOp), - Close(CloseOp), - Chdir(ChdirOp), - Exec(ExecOp), - Clone(CloneOp), - Exit(ExitOp), - Access(AccessOp), - Stat(StatOp), - Readdir(ReaddirOp), - Wait(WaitOp), - GetRUsage(GetRUsageOp), - UpdateMetadata(UpdateMetadataOp), - ReadLink(ReadLinkOp), -} - -impl FfiFrom for OpInternal { - fn ffi_from(value: &Bindgen_Op, ctx: &ArenaContext) -> Result { - let kind = value.op_code; - let value = value.data; - - log::debug!("[unsafe] decoding Op tagged union [ OpCode={} ]", kind); - Ok(match kind { - Bindgen_OpCode_init_process_op_code => { - Self::InitProcess(unsafe { value.init_process_epoch }.ffi_into(ctx)?) - } - Bindgen_OpCode_init_exec_epoch_op_code => Self::InitExecEpoch( - unsafe { value.init_exec_epoch } - .ffi_into(ctx) - .wrap_err("Unable to decode InitExecEpochOp")?, - ), - Bindgen_OpCode_init_thread_op_code => Self::InitThread(unsafe { value.init_thread }.ffi_into(ctx)?), - Bindgen_OpCode_open_op_code => Self::Open( - unsafe { value.open } - .ffi_into(ctx) - .wrap_err("Unable to decode OpenOp")?, - ), - Bindgen_OpCode_close_op_code => Self::Close(unsafe { value.close }.ffi_into(ctx)?), - Bindgen_OpCode_chdir_op_code => Self::Chdir( - unsafe { value.chdir } - .ffi_into(ctx) - .wrap_err("Unable to decode ChdirOp")?, - ), - Bindgen_OpCode_exec_op_code => Self::Exec( - unsafe { value.exec } - .ffi_into(ctx) - .wrap_err("Unable to decode ExecOp")?, - ), - Bindgen_OpCode_clone_op_code => Self::Clone(unsafe { value.clone }.ffi_into(ctx)?), - Bindgen_OpCode_exit_op_code => Self::Exit(unsafe { value.exit }.ffi_into(ctx)?), - Bindgen_OpCode_access_op_code => Self::Access( - unsafe { value.access } - .ffi_into(ctx) - .wrap_err("Unable to decode AccessOp")?, - ), - Bindgen_OpCode_stat_op_code => Self::Stat( - unsafe { value.stat } - .ffi_into(ctx) - .wrap_err("Unable to decode StatOp")?, - ), - Bindgen_OpCode_readdir_op_code => Self::Readdir( - unsafe { value.readdir } - .ffi_into(ctx) - .wrap_err("Unable to decode ReaddirOp")?, - ), - Bindgen_OpCode_wait_op_code => Self::Wait(unsafe { value.wait }.ffi_into(ctx)?), - Bindgen_OpCode_getrusage_op_code => Self::GetRUsage(unsafe { value.getrusage }.ffi_into(ctx)?), - Bindgen_OpCode_update_metadata_op_code => Self::UpdateMetadata( - unsafe { value.update_metadata } - .ffi_into(ctx) - .wrap_err("Unable to decode UpdateMetadataOp")?, - ), - Bindgen_OpCode_read_link_op_code => Self::ReadLink( - unsafe { value.read_link } - .ffi_into(ctx) - .wrap_err("Unable to decode ReadlinkOp")?, - ), - _ => { - if kind < Bindgen_OpCode_LAST_OP_CODE && kind > Bindgen_OpCode_FIRST_OP_CODE { - return Err(eyre!( - "Valid OpCode not handled (this is a bug, please report it)" - )); - } else { - return Err(eyre!("Invalid OpCode")); - } - } - }) - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Op { - pub data: OpInternal, - pub time: timespec, -} - -impl FfiFrom for Op { - fn ffi_from(value: &Bindgen_Op, ctx: &ArenaContext) -> Result { - Ok(Self { - data: value - .ffi_into(ctx) - .wrap_err("Unable to decode OpInternal")?, - time: value.time.ffi_into(ctx)?, - }) - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum Metadata { - Mode(mode_t), - Ownership { - uid: uid_t, - gid: gid_t, - }, - Times { - is_null: bool, - atime: timeval, - mtime: timeval, - }, -} - -impl FfiFrom for Metadata { - fn ffi_from(value: &Bindgen_UpdateMetadataOp, ctx: &ArenaContext) -> Result { - let kind = value.kind; - let value = value.value; - - log::debug!("[unsafe] decoding Metadata tagged union"); - Ok(match kind { - Bindgen_MetadataKind_MetadataMode => Metadata::Mode(unsafe { value.mode }), - Bindgen_MetadataKind_MetadataOwnership => Metadata::Ownership { - uid: unsafe { value.ownership }.uid, - gid: unsafe { value.ownership }.gid, - }, - Bindgen_MetadataKind_MetadataTimes => Metadata::Times { - is_null: unsafe { value.times }.is_null, - atime: unsafe { value.times }.atime.ffi_into(ctx)?, - mtime: unsafe { value.times }.mtime.ffi_into(ctx)?, - }, - _ => return Err(eyre!("Invalid MetadataKind Variant")), - }) - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct UpdateMetadataOp { - pub path: Path, - pub flags: ::std::os::raw::c_int, - pub metadata: Metadata, - pub ferrno: ::std::os::raw::c_int, -} - -impl FfiFrom for UpdateMetadataOp { - fn ffi_from(value: &Bindgen_UpdateMetadataOp, ctx: &ArenaContext) -> Result { - Ok(Self { - path: value.path.ffi_into(ctx)?, - flags: value.flags, - metadata: value.ffi_into(ctx).wrap_err("Unable to decode Metadata")?, - ferrno: value.ferrno, - }) - } -} diff --git a/probe_src/probe_frontend/lib/src/lib.rs b/probe_src/probe_frontend/lib/src/lib.rs index 5c6865c5..1ea39fa4 100644 --- a/probe_src/probe_frontend/lib/src/lib.rs +++ b/probe_src/probe_frontend/lib/src/lib.rs @@ -1,18 +1,18 @@ -/// Op definitions +/// transcribe probe record directories created by libprobe to log directories + +/// Op definitions from `prov_ops.h` +/// +/// This module contains ffi bindings for the raw C-structs emitted by libprobe, generated automatically with +/// rust-bindgen (these start with `C_`), as well as the converted version which can be serialized /// /// While simple Ops containing only Integral values can be used/serialized directory from /// libprobe, more complicated structs containing pointers (usually in the form of strings) need to /// be manually converted to versions so they can be serialized. This module re-exports the trivial /// structs and defines new ones (as well as methods for converting) for the non-trivial structs. /// -/// Raw ffi bindings for the raw C-structs emitted by libprobe, generated automatically with -/// rust-bindgen (these start with `Bindgen_`. -/// -/// If you're trying to make sense of this it's going to be much easier if you have `prov_ops.h` -/// open as well. pub mod ops; -/// Transcribe raw Bindgen Ops from libprobe to usable, serializable data. +/// Convert part of all of a probe record directory to a probe log directory. /// /// # Serialization format /// diff --git a/probe_src/probe_frontend/lib/src/ops.rs b/probe_src/probe_frontend/lib/src/ops.rs index c9aa9da7..c92e1ddb 100644 --- a/probe_src/probe_frontend/lib/src/ops.rs +++ b/probe_src/probe_frontend/lib/src/ops.rs @@ -1,7 +1,6 @@ #![allow(non_upper_case_globals)] #![allow(non_camel_case_types)] #![allow(non_snake_case)] -#![allow(unsafe_op_in_unsafe_fn)] // <- PyO3 breaks without this use crate::error::{ProbeError, Result}; use crate::transcribe::ArenaContext; @@ -11,39 +10,23 @@ use std::ffi::CString; /// Specialized version of [`std::convert::From`] for working with libprobe arena structs. /// -/// Since [`ffi`] structs from arena allocator files have intrinsically invalid pointers (because +/// Since `C_*` structs from arena allocator files have intrinsically invalid pointers (because /// they came from a different virtual memory space). This trait and It's sibling [`FfiInto`] /// exist to act as [`From`] and [`Into`] with an added parameter of a [`ArenaContext`] that can be /// used to decode pointers. -pub(crate) trait FfiFrom { +/// +/// The autogenerated, rust versions of `C_*` structs implement this trait by recursively calling it +/// on each of it's fields. In order to make this work there are three base case implementations: +/// +/// - `*mut i8` and `*const i8` can (try to) be converted to [`CString`]s by looking up the +/// pointers in the [`ArenaContext`], +/// - Any type implementing [`Copy`], this base case just returns itself. +pub trait FfiFrom { fn ffi_from(value: &T, ctx: &ArenaContext) -> Result where Self: Sized; } -/// Specialized version of [`std::convert::Into`] for working with libprobe arena structs. -/// -/// Much like [`std::convert::Into`] this trait is implemented automatically with a blanket -/// implementation as the reciprocal of [`FfiFrom`]. -pub(crate) trait FfiInto { - fn ffi_into(&self, ctx: &ArenaContext) -> Result; -} - -impl FfiInto for T -where - U: FfiFrom, -{ - #[inline] - fn ffi_into(&self, ctx: &ArenaContext) -> Result { - U::ffi_from(self, ctx) - } -} - -// these are the three base implementations of FFiFrom; each generated Op implements FFiFrom by -// calling ffi_into(ctx) on each of it's fields, each fields *must* be either: -// - Another generated struct -// - A Copy-able value -// - An i8 pointer, which maps to the C *char and are converted to CStrings impl FfiFrom for T { #[inline] fn ffi_from(value: &T, _: &ArenaContext) -> Result { @@ -63,6 +46,26 @@ impl FfiFrom<*mut i8> for CString { } } +/// Specialized version of [`std::convert::Into`] for working with libprobe arena structs. +/// +/// Much like [`std::convert::Into`] this trait is implemented automatically with a blanket +/// implementation as the reciprocal of [`FfiFrom`]. +/// +/// See [`FfiFrom`] for an explanation of how this is used in the conversion of `C_` structs +pub trait FfiInto { + fn ffi_into(&self, ctx: &ArenaContext) -> Result; +} + +impl FfiInto for T +where + U: FfiFrom, +{ + #[inline] + fn ffi_into(&self, ctx: &ArenaContext) -> Result { + U::ffi_from(self, ctx) + } +} + fn try_cstring(str: *const i8, ctx: &ArenaContext) -> Result { if str.is_null() { std::ffi::CString::new("").map_err(|_| ProbeError::MissingNull) @@ -99,21 +102,21 @@ pub enum Metadata { }, } -impl FfiFrom for Metadata { - fn ffi_from(value: &Bindgen_UpdateMetadataOp, ctx: &ArenaContext) -> Result { +impl FfiFrom for Metadata { + fn ffi_from(value: &C_UpdateMetadataOp, ctx: &ArenaContext) -> Result { let kind = value.kind; let value = value.value; log::debug!("[unsafe] decoding Metadata tagged union"); Ok(match kind { - Bindgen_MetadataKind_MetadataMode => Metadata::Mode { + C_MetadataKind_MetadataMode => Metadata::Mode { mode: unsafe { value.mode }, }, - Bindgen_MetadataKind_MetadataOwnership => Metadata::Ownership { + C_MetadataKind_MetadataOwnership => Metadata::Ownership { uid: unsafe { value.ownership }.uid, gid: unsafe { value.ownership }.gid, }, - Bindgen_MetadataKind_MetadataTimes => Metadata::Times { + C_MetadataKind_MetadataTimes => Metadata::Times { is_null: unsafe { value.times }.is_null, atime: unsafe { value.times }.atime.ffi_into(ctx)?, mtime: unsafe { value.times }.mtime.ffi_into(ctx)?, @@ -131,8 +134,8 @@ pub struct UpdateMetadataOp { pub ferrno: ::std::os::raw::c_int, } -impl FfiFrom for UpdateMetadataOp { - fn ffi_from(value: &Bindgen_UpdateMetadataOp, ctx: &ArenaContext) -> Result { +impl FfiFrom for UpdateMetadataOp { + fn ffi_from(value: &C_UpdateMetadataOp, ctx: &ArenaContext) -> Result { Ok(Self { path: value.path.ffi_into(ctx)?, flags: value.flags, @@ -167,41 +170,39 @@ pub enum OpInternal { ReadLinkOp(ReadLinkOp), } -impl FfiFrom for OpInternal { - fn ffi_from(value: &Bindgen_Op, ctx: &ArenaContext) -> Result { +impl FfiFrom for OpInternal { + fn ffi_from(value: &C_Op, ctx: &ArenaContext) -> Result { let kind = value.op_code; let value = value.data; log::debug!("[unsafe] decoding Op tagged union [ OpCode={} ]", kind); Ok(match kind { - Bindgen_OpCode_init_process_op_code => { + C_OpCode_init_process_op_code => { Self::InitProcessOp(unsafe { value.init_process_epoch }.ffi_into(ctx)?) } - Bindgen_OpCode_init_exec_epoch_op_code => { + C_OpCode_init_exec_epoch_op_code => { Self::InitExecEpochOp(unsafe { value.init_exec_epoch }.ffi_into(ctx)?) } - Bindgen_OpCode_init_thread_op_code => { + C_OpCode_init_thread_op_code => { Self::InitThreadOp(unsafe { value.init_thread }.ffi_into(ctx)?) } - Bindgen_OpCode_open_op_code => Self::OpenOp(unsafe { value.open }.ffi_into(ctx)?), - Bindgen_OpCode_close_op_code => Self::CloseOp(unsafe { value.close }.ffi_into(ctx)?), - Bindgen_OpCode_chdir_op_code => Self::ChdirOp(unsafe { value.chdir }.ffi_into(ctx)?), - Bindgen_OpCode_exec_op_code => Self::ExecOp(unsafe { value.exec }.ffi_into(ctx)?), - Bindgen_OpCode_clone_op_code => Self::CloneOp(unsafe { value.clone }.ffi_into(ctx)?), - Bindgen_OpCode_exit_op_code => Self::ExitOp(unsafe { value.exit }.ffi_into(ctx)?), - Bindgen_OpCode_access_op_code => Self::AccessOp(unsafe { value.access }.ffi_into(ctx)?), - Bindgen_OpCode_stat_op_code => Self::StatOp(unsafe { value.stat }.ffi_into(ctx)?), - Bindgen_OpCode_readdir_op_code => { - Self::ReaddirOp(unsafe { value.readdir }.ffi_into(ctx)?) - } - Bindgen_OpCode_wait_op_code => Self::WaitOp(unsafe { value.wait }.ffi_into(ctx)?), - Bindgen_OpCode_getrusage_op_code => { + C_OpCode_open_op_code => Self::OpenOp(unsafe { value.open }.ffi_into(ctx)?), + C_OpCode_close_op_code => Self::CloseOp(unsafe { value.close }.ffi_into(ctx)?), + C_OpCode_chdir_op_code => Self::ChdirOp(unsafe { value.chdir }.ffi_into(ctx)?), + C_OpCode_exec_op_code => Self::ExecOp(unsafe { value.exec }.ffi_into(ctx)?), + C_OpCode_clone_op_code => Self::CloneOp(unsafe { value.clone }.ffi_into(ctx)?), + C_OpCode_exit_op_code => Self::ExitOp(unsafe { value.exit }.ffi_into(ctx)?), + C_OpCode_access_op_code => Self::AccessOp(unsafe { value.access }.ffi_into(ctx)?), + C_OpCode_stat_op_code => Self::StatOp(unsafe { value.stat }.ffi_into(ctx)?), + C_OpCode_readdir_op_code => Self::ReaddirOp(unsafe { value.readdir }.ffi_into(ctx)?), + C_OpCode_wait_op_code => Self::WaitOp(unsafe { value.wait }.ffi_into(ctx)?), + C_OpCode_getrusage_op_code => { Self::GetRUsageOp(unsafe { value.getrusage }.ffi_into(ctx)?) } - Bindgen_OpCode_update_metadata_op_code => { + C_OpCode_update_metadata_op_code => { Self::UpdateMetadataOp(unsafe { value.update_metadata }.ffi_into(ctx)?) } - Bindgen_OpCode_read_link_op_code => { + C_OpCode_read_link_op_code => { Self::ReadLinkOp(unsafe { value.read_link }.ffi_into(ctx)?) } _ => return Err(ProbeError::InvalidVariant(kind)), @@ -215,8 +216,8 @@ pub struct Op { pub time: timespec, } -impl FfiFrom for Op { - fn ffi_from(value: &Bindgen_Op, ctx: &ArenaContext) -> Result { +impl FfiFrom for Op { + fn ffi_from(value: &C_Op, ctx: &ArenaContext) -> Result { Ok(Self { data: value.ffi_into(ctx)?, time: value.time.ffi_into(ctx)?, diff --git a/probe_src/probe_frontend/lib/src/transcribe.rs b/probe_src/probe_frontend/lib/src/transcribe.rs index 21b5971c..52b49023 100644 --- a/probe_src/probe_frontend/lib/src/transcribe.rs +++ b/probe_src/probe_frontend/lib/src/transcribe.rs @@ -11,14 +11,12 @@ use std::{ use crate::{ error::{option_err, ConvertErr, ProbeError, Result, WrapErr}, - ops::{self, FfiFrom}, + ops::{self, C_Op, FfiFrom}, }; -type RawOp = ops::Bindgen_Op; - // pub mod ops; -/// Recursively parse a Top-level arena allocator directory and write it in serialized. +/// Recursively parse a whole probe record directory and write it to a probe log directory. /// /// This function calls [`parse_pid()`] on each sub-directory in `in_dir` **in parallel**. /// @@ -56,7 +54,7 @@ pub fn parse_top_level, P2: AsRef + Sync>( Ok(count) } -/// Recursively parse a PID arena allocator directory and write it in serialized. +/// Recursively parse a probe record PID directory and write it as a probe log PID directory. /// /// This function calls [`parse_exec_epoch()`] on each sub-directory in `in_dir`. /// @@ -86,7 +84,8 @@ pub fn parse_pid, P2: AsRef>(in_dir: P1, out_dir: P2) -> R .try_fold(0usize, |acc, x| x.map(|x| acc + x)) } -/// Recursively parse a Epoch arena allocator directory and write it in serialized. +/// Recursively parse a probe record exec epoch directory and write it as a probe log exec epoch +/// directory. /// /// This function calls [`parse_tid()`] on each sub-directory in `in_dir`. /// @@ -119,7 +118,7 @@ pub fn parse_exec_epoch, P2: AsRef>( .try_fold(0usize, |acc, x| x.map(|x| acc + x)) } -/// Recursively parse a TID arena allocator directory and write it in serialized. +/// Recursively parse a probe record TID directory and write it as a probe log TID directory. /// /// This function parses a TID directory in 6 steps: /// @@ -230,7 +229,9 @@ pub fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> R /// Gets the filename from a path and returns it parsed as an integer. /// -/// errors if the path has no filename or the filename can't be parsed as an integer. +/// Errors if the path has no filename, the filename isn't valid UTF-8, or the filename can't be +/// parsed as an integer. +// TODO: cleanup errors, better context fn filename_numeric>(dir: P) -> Result { let filename = dir.as_ref().file_name().ok_or_else(|| { log::error!("'{}' has no filename", dir.as_ref().to_string_lossy()); @@ -254,7 +255,7 @@ fn filename_numeric>(dir: P) -> Result { .wrap_err("Failed to parse filename to integer") } -/// this struct represents a `/data` directory from libprobe. +/// this struct represents a `/data` probe record directory. pub struct ArenaContext(pub Vec); impl ArenaContext { @@ -268,7 +269,7 @@ impl ArenaContext { } } -/// This struct represents a single `data/*.dat` arena allocator file emitted by libprobe. +/// This struct represents a single `data/*.dat` file from a probe record directory. pub struct DataArena { header: ArenaHeader, raw: Vec, @@ -296,7 +297,7 @@ impl DataArena { } } -/// This struct represents a single `ops/*.dat` arena allocator file emitted by libprobe. +/// This struct represents a single `ops/*.dat` file from a probe record directory. pub struct OpsArena<'a> { // raw is needed even though it's unused since ops is a reference to it; // the compiler doesn't know this since it's constructed using unsafe code. @@ -304,7 +305,7 @@ pub struct OpsArena<'a> { /// raw byte buffer of Ops arena allocator. raw: Vec, /// slice over Ops of the raw buffer. - ops: &'a [RawOp], + ops: &'a [C_Op], } impl<'a> OpsArena<'a> { @@ -312,15 +313,15 @@ impl<'a> OpsArena<'a> { let header = ArenaHeader::from_bytes(&bytes) .wrap_err("Failed to create ArenaHeader for OpsArena")?; - if ((header.used - size_of::()) % size_of::()) != 0 { + if ((header.used - size_of::()) % size_of::()) != 0 { return Err(ArenaError::Misaligned.into()); } - let count = (header.used - size_of::()) / size_of::(); + let count = (header.used - size_of::()) / size_of::(); log::debug!("[unsafe] converting Vec to &[RawOp] of size {}", count); let ops = unsafe { - let ptr = bytes.as_ptr().add(size_of::()) as *const RawOp; + let ptr = bytes.as_ptr().add(size_of::()) as *const C_Op; std::slice::from_raw_parts(ptr, count) }; @@ -336,10 +337,11 @@ impl<'a> OpsArena<'a> { } } -/// Arena allocator metadata placed at the beginning of allocator files by libprobe. +/// Arena allocator metadata placed at the beginning of arena files by libprobe. #[repr(C)] #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct ArenaHeader { + // TODO: check instantiation (requires filename) instantiation: libc::size_t, base_address: libc::uintptr_t, capacity: libc::uintptr_t, @@ -397,15 +399,21 @@ impl ArenaHeader { #[derive(Debug, thiserror::Error)] pub enum ArenaError { + /// Returned if an [`ArenaHeader`] was construction was attempted with a byte buffer smaller + /// than an [`ArenaHeader`]. #[error("Arena buffer too small, got {got}, minimum size {needed}")] BufferTooSmall { got: usize, needed: usize }, + /// Returned if the [`ArenaHeader`]'s capacity value doesn't match the size of the byte buffer. #[error("Invalid arena capacity, expected {expected}, got {actual}")] InvalidCapacity { expected: usize, actual: usize }, + /// Returned if the [`ArenaHeader`]'s size value is larger than the capacity value. This #[error("Arena size {size} is greater than capacity {capacity}")] InvalidSize { size: usize, capacity: usize }, + /// Returned if an [`OpsArena`]'s size isn't isn't `HEADER_SIZE + (N * OP_SIZE)` when `N` is + /// some integer. #[error("Arena alignment error: used arena size minus header isn't a multiple of op size")] Misaligned, } diff --git a/probe_src/probe_frontend/macros/src/lib.rs b/probe_src/probe_frontend/macros/src/lib.rs index 442c040c..1aab9c30 100644 --- a/probe_src/probe_frontend/macros/src/lib.rs +++ b/probe_src/probe_frontend/macros/src/lib.rs @@ -6,6 +6,7 @@ use syn::{parse_macro_input, Data, DeriveInput, Fields, Ident, Type}; mod pygen; +// TODO: return compiler error instead of panicking on error #[proc_macro_derive(MakeRustOp)] pub fn make_rust_op(input: TokenStream) -> TokenStream { let original_struct = parse_macro_input!(input as DeriveInput); @@ -37,8 +38,8 @@ pub fn make_rust_op(input: TokenStream) -> TokenStream { let new_name = Ident::new( ident .to_string() - .strip_prefix("Bindgen_") - .expect("struct name doesn't start with 'Bindgen_'"), + .strip_prefix("C_") + .expect("struct name doesn't start with 'C_'"), Span::call_site(), ); @@ -83,19 +84,17 @@ fn convert_bindgen_type(ty: &syn::Type) -> syn::Type { Type::Array(new) } syn::Type::Path(inner) => { - if let Some(name) = type_basename(inner).to_string().strip_prefix("Bindgen_") { + if let Some(name) = type_basename(inner).to_string().strip_prefix("C_") { let name = Ident::new(name, Span::mixed_site()); parse_quote!(#name) } else { Type::Path(inner.clone()) } } - // FIXME: return a proper error instead of just panicking _ => unimplemented!("unsupported bindgen type conversion"), } } -// FIXME: return a proper error instead of just panicking pub(crate) fn type_basename(ty: &syn::TypePath) -> &syn::Ident { if ty.qself.is_some() { unimplemented!("qualified self-typs not supported"); @@ -104,6 +103,7 @@ pub(crate) fn type_basename(ty: &syn::TypePath) -> &syn::Ident { &ty.path.segments.last().expect("type has no segments").ident } +// TODO: return compiler error instead of panicking on error #[proc_macro_derive(MakePyDataclass)] pub fn make_py_dataclass(input: TokenStream) -> TokenStream { let source = parse_macro_input!(input as DeriveInput); diff --git a/probe_src/probe_frontend/macros/src/pygen.rs b/probe_src/probe_frontend/macros/src/pygen.rs index ba0673c4..7346a3b7 100644 --- a/probe_src/probe_frontend/macros/src/pygen.rs +++ b/probe_src/probe_frontend/macros/src/pygen.rs @@ -1,9 +1,14 @@ +use std::collections::HashSet; use std::fmt::Display; use std::fs::File; use std::io::Write; use std::sync::{OnceLock, RwLock}; use syn::{Data, Fields}; +// hashset of previously generated dataclass, this is used during type conversion to ensure that +// every type in a dataclass is an already generated (or is a primitive type). +static GENERATED_TYPES: OnceLock>> = OnceLock::new(); + /// statically defined python code that gets added to the begining of the outputed file const PYGEN_PREAMBLE: &str = " # This file is automatically @generated by probe_macros @@ -35,7 +40,7 @@ pub fn make_py_dataclass_internal(input: syn::DeriveInput) { }) .collect::>(); - write_pygen(basic_dataclass(ident.to_string(), &pairs)); + write_dataclass(basic_dataclass(ident.to_string(), &pairs)); } Data::Enum(data_enum) => { // let mut dataclass = format!("@dataclass(init=False)\nclass {}:\n", ident); @@ -109,7 +114,7 @@ pub fn make_py_dataclass_internal(input: syn::DeriveInput) { init.set_args(args); dataclass.set_init(Some(init)); - write_pygen(dataclass); + write_dataclass(dataclass); } Data::Union(_data_union) => unimplemented!(), }; @@ -152,6 +157,8 @@ fn convert_to_pytype(ty: &syn::Type) -> String { syn::Type::Path(inner) => { let name = crate::type_basename(inner).to_string(); match name.as_str() { + // that's a lot of ways to say "int", python ints are bigints so we don't have to + // care about size "__dev_t" | "__gid_t" | "__ino_t" | "__mode_t" | "__s32" | "__s64" | "__suseconds_t" | "__syscall_slong_t" | "__syseconds_t" | "__time_t" | "__u16" | "__u32" | "__u64" | "__uid_t" | "c_int" | "c_long" | "c_uint" @@ -159,24 +166,44 @@ fn convert_to_pytype(ty: &syn::Type) -> String { "int".to_owned() } + // CStrings are serialized as an array of bytes, so it makes sense to load them + // into python as bytes "CString" => "bytes".to_owned(), - _ => name, + // bool types are basically the same everywhere + "bool" => name, + + // other types are checked to see if it's a dataclass we've already written, if it + // is we can simply pass it through unchanged, otherwise we don't know how to + // convert this type and we panic. + // + // FIXME: this approach works fine for generation when running `cargo build` but + // rust-analyzer indicates that the proc-macro paniced; they're probably being + // processed in a compartmentalized manner in rust-analyzer. + _ => { + let types = GENERATED_TYPES + .get_or_init(|| RwLock::new(HashSet::new())) + .read() + .expect("python generated types rwlock poisioned"); + + if !types.contains(&name) { + panic!("Can't convert type '{}' to a python type", name); + } + + name + } } } _ => unimplemented!("unsupported type type"), } } -fn write_pygen(item: impl Display) { +fn write_dataclass(item: Dataclass) { static DATACLASSES: OnceLock> = OnceLock::new(); let mut writer = DATACLASSES .get_or_init(|| { - let mut file = File::create(concat!( - env!("CARGO_MANIFEST_DIR"), - "/../python/generated/ops.py" - )) - .expect("unable to create ops.py"); + let mut file = File::create(concat!(env!("CARGO_MANIFEST_DIR"), "/../python/ops.py")) + .expect("unable to create ops.py"); file.write_all(PYGEN_PREAMBLE.as_bytes()) .expect("failed to write preamble"); RwLock::new(file) @@ -184,11 +211,17 @@ fn write_pygen(item: impl Display) { .write() .expect("python dataclasses rwlock poisioned"); writeln!(writer, "{}", item).expect("failed to write pygen"); + + GENERATED_TYPES + .get_or_init(|| RwLock::new(HashSet::new())) + .write() + .expect("python generated types rwlock poisioned") + .insert(item.name); } struct Dataclass { indent: usize, - name: String, + pub name: String, inclasses: Vec, items: Vec, init: Option, diff --git a/probe_src/probe_frontend/python/generated/ops.py b/probe_src/probe_frontend/python/ops.py similarity index 100% rename from probe_src/probe_frontend/python/generated/ops.py rename to probe_src/probe_frontend/python/ops.py diff --git a/probe_src/probe_frontend/python/probe.py b/probe_src/probe_frontend/python/probe.py index 94f830be..c0d6cf9e 100644 --- a/probe_src/probe_frontend/python/probe.py +++ b/probe_src/probe_frontend/python/probe.py @@ -2,7 +2,7 @@ import typing import json import subprocess -import generated.ops as ops +from . import ops OpTable = typing.Mapping[int, typing.Mapping[int, typing.Mapping[int, typing.List[ops.Op]]]] From 436408bee20d6f71a80550574b91a8a8cc594839 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Sun, 30 Jun 2024 18:17:53 -0500 Subject: [PATCH 14/37] Update README.md --- probe_src/probe_frontend/README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/probe_src/probe_frontend/README.md b/probe_src/probe_frontend/README.md index f57bf3fb..48c90f04 100644 --- a/probe_src/probe_frontend/README.md +++ b/probe_src/probe_frontend/README.md @@ -10,9 +10,7 @@ of terminology specific to this tool. - **Probe record** (or probe recording) This is a directory (`probe_record` by default) that contains raw arena -allocator `*.dat` files created by libprobe aranged in a `//` -(see the top-level repo glossary for an explanation of an Exec epoch) hierarchy, -these files contain +allocator `*.dat` files created by libprobe, these files contain [mmap(2)](https://www.man7.org/linux/man-pages/man2/mmap.2.html)-ed c structures and are not guaranteed to valid if moved to a computer with a different architecture, kernel version, or c compiler (or if any of those things change on From 316c1c62a4dcd803fb2d84b4d7ae179912194281 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Sun, 30 Jun 2024 18:24:57 -0500 Subject: [PATCH 15/37] typos --- probe_src/probe_frontend/README.md | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/probe_src/probe_frontend/README.md b/probe_src/probe_frontend/README.md index 48c90f04..1134f68f 100644 --- a/probe_src/probe_frontend/README.md +++ b/probe_src/probe_frontend/README.md @@ -17,10 +17,9 @@ architecture, kernel version, or c compiler (or if any of those things change on the same computer). - **Probe log** -This is a directory **or** file (`probe_log` by default) that encodes the data -from a probe record in a format that is cross-platform and much easier to use; a -probe log file is just a gzip-ed tarball containing a probe log directory. (see -the section on serialization format) +This is a directory or file (`probe_log` by default) that encodes the data +from a probe record in a format that is cross-platform and much easier to use. +(see the section on serialization format for details). - **Transcription** This is the process of converting a probe record to a probe log. @@ -74,8 +73,8 @@ probe record -- bash -c ''` ### Probe record directory -The exact format of the probe record directory is defined by libprobe and not -part of this tool's spec, however a best-effort explanation is still given. +The format of the probe record directory is defined by libprobe and not part of +this tool's spec, however a best-effort explanation is still given. - Each probe record directory is composed of a top-level directory containing one or more PID directories. @@ -97,9 +96,9 @@ who's provenance is recorded inside it, it contains two subdirectories named called "data arenas", while those in the `ops` directory are called "op arenas". - Each op arena is a binary file containing an arena header followed by zero or -more raw op c-structs, followed by zero or more null bytes. +more op c structs, followed by zero or more null bytes. -- Each data arena is a binary file containing and arena header followed by zero +- Each data arena is a binary file containing an arena header followed by zero or more bytes of arbitrary data, followed by zero or more null bytes. ### Probe log directory From 3e332b2b7de9770ba0fa854968b31d37873ed331 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Mon, 1 Jul 2024 19:33:02 -0500 Subject: [PATCH 16/37] Fixed typo Co-authored-by: Sam Grayson --- probe_src/probe_frontend/cli/src/record.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/probe_src/probe_frontend/cli/src/record.rs b/probe_src/probe_frontend/cli/src/record.rs index b59db563..8497afae 100644 --- a/probe_src/probe_frontend/cli/src/record.rs +++ b/probe_src/probe_frontend/cli/src/record.rs @@ -168,7 +168,7 @@ impl Recorder { .try_fold(false, |_, x| x.map(|x| x.path().exists()))?; if !any_files { log::warn!( - "No arean files detected after 50ms, \ + "No arena files detected after 50ms, \ something is wrong, you should probably abort!" ); } From 1dd3bee5c493b1d18b58a21bd871cd9b2061d389 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Tue, 2 Jul 2024 09:52:04 -0500 Subject: [PATCH 17/37] code review improvements --- .../probe_frontend/lib/src/transcribe.rs | 4 +- probe_src/probe_frontend/macros/src/pygen.rs | 54 ++++++--- probe_src/probe_frontend/python/ops.py | 108 +++++++++--------- 3 files changed, 97 insertions(+), 69 deletions(-) diff --git a/probe_src/probe_frontend/lib/src/transcribe.rs b/probe_src/probe_frontend/lib/src/transcribe.rs index 52b49023..31232479 100644 --- a/probe_src/probe_frontend/lib/src/transcribe.rs +++ b/probe_src/probe_frontend/lib/src/transcribe.rs @@ -206,8 +206,8 @@ pub fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> R }) }) // STEP 6 - .try_for_each(|x| { - for op in x? { + .try_for_each(|arena_file_ops| { + for op in arena_file_ops? { outfile .write_all( serde_json::to_string(&op) diff --git a/probe_src/probe_frontend/macros/src/pygen.rs b/probe_src/probe_frontend/macros/src/pygen.rs index 7346a3b7..0f256722 100644 --- a/probe_src/probe_frontend/macros/src/pygen.rs +++ b/probe_src/probe_frontend/macros/src/pygen.rs @@ -23,6 +23,7 @@ mod = sys.modules[__name__] pub fn make_py_dataclass_internal(input: syn::DeriveInput) { let syn::DeriveInput { data, ident, .. } = input.clone(); + let ident = pascal_to_snake_case(&ident.to_string()); match data { Data::Struct(data_struct) => { @@ -34,17 +35,19 @@ pub fn make_py_dataclass_internal(input: syn::DeriveInput) { let pairs = fields .named .iter() - .map(|x| { - let ident = x.ident.as_ref().unwrap(); - (ident.to_string(), convert_to_pytype(&x.ty)) + .map(|field| { + ( + field.ident.as_ref().unwrap().to_string(), + convert_to_pytype(&field.ty), + ) }) .collect::>(); - write_dataclass(basic_dataclass(ident.to_string(), &pairs)); + write_dataclass(basic_dataclass(ident, &pairs)); } Data::Enum(data_enum) => { // let mut dataclass = format!("@dataclass(init=False)\nclass {}:\n", ident); - let mut dataclass = Dataclass::new(ident.to_string()); + let mut dataclass = Dataclass::new(ident); let mut init = DataclassInit::new(); let mut args = InitArgs::new(); @@ -54,14 +57,16 @@ pub fn make_py_dataclass_internal(input: syn::DeriveInput) { for variant in data_enum.variants { match variant.fields { syn::Fields::Named(inner) => { - let name = variant.ident.to_string(); + let name = pascal_to_snake_case(&variant.ident.to_string()); let pairs = inner .named .iter() - .map(|x| { - let name = x.ident.as_ref().unwrap(); - (name.to_string(), convert_to_pytype(&x.ty)) + .map(|field| { + ( + pascal_to_snake_case(&field.ident.as_ref().unwrap().to_string()), + convert_to_pytype(&field.ty), + ) }) .collect::>(); @@ -181,16 +186,18 @@ fn convert_to_pytype(ty: &syn::Type) -> String { // rust-analyzer indicates that the proc-macro paniced; they're probably being // processed in a compartmentalized manner in rust-analyzer. _ => { + let snake_case = pascal_to_snake_case(&name); + let types = GENERATED_TYPES .get_or_init(|| RwLock::new(HashSet::new())) .read() .expect("python generated types rwlock poisioned"); - if !types.contains(&name) { - panic!("Can't convert type '{}' to a python type", name); + if types.contains(&snake_case) { + snake_case + } else { + panic!("Can't convert type '{}' to a python type", snake_case); } - - name } } } @@ -198,6 +205,27 @@ fn convert_to_pytype(ty: &syn::Type) -> String { } } +fn pascal_to_snake_case(ident: &str) -> String { + // this primitive lookback is needed so that names with repeated capitals like SocketTCP get + // turned into socket_tcp and not socket_t_c_p + let mut prior_upper = true; + ident + .chars() + .fold(String::new(), |mut acc, ch| { + if ch.is_uppercase() { + if !prior_upper { + acc.push('_') + } + ch.to_lowercase().for_each(|lower_ch| acc.push(lower_ch)); + prior_upper = true; + } else { + acc.push(ch); + prior_upper = false; + } + acc + }) +} + fn write_dataclass(item: Dataclass) { static DATACLASSES: OnceLock> = OnceLock::new(); let mut writer = DATACLASSES diff --git a/probe_src/probe_frontend/python/ops.py b/probe_src/probe_frontend/python/ops.py index e7188e17..fe2106f6 100644 --- a/probe_src/probe_frontend/python/ops.py +++ b/probe_src/probe_frontend/python/ops.py @@ -116,7 +116,7 @@ def __init__(self, ru_utime: timeval, ru_stime: timeval, ru_maxrss: int, ru_ixrs self.ru_nivcsw = int(ru_nivcsw) @dataclass(init=False) -class Path: +class path: dirfd_minus_at_fdcwd: int path: bytes device_major: int @@ -138,13 +138,13 @@ def __init__(self, dirfd_minus_at_fdcwd: int, path: bytes, device_major: int, de self.dirfd_valid = bool(dirfd_valid) @dataclass(init=False) -class InitProcessOp: +class init_process_op: pid: int def __init__(self, pid: int): self.pid = int(pid) @dataclass(init=False) -class InitExecEpochOp: +class init_exec_epoch_op: epoch: int program_name: bytes def __init__(self, epoch: int, program_name: bytes): @@ -152,27 +152,27 @@ def __init__(self, epoch: int, program_name: bytes): self.program_name = bytes(program_name) @dataclass(init=False) -class InitThreadOp: +class init_thread_op: tid: int def __init__(self, tid: int): self.tid = int(tid) @dataclass(init=False) -class OpenOp: - path: Path +class open_op: + path: path flags: int mode: int fd: int ferrno: int - def __init__(self, path: Path, flags: int, mode: int, fd: int, ferrno: int): - self.path = Path(**path) + def __init__(self, path: path, flags: int, mode: int, fd: int, ferrno: int): + self.path = path(**path) self.flags = int(flags) self.mode = int(mode) self.fd = int(fd) self.ferrno = int(ferrno) @dataclass(init=False) -class CloseOp: +class close_op: low_fd: int high_fd: int ferrno: int @@ -182,23 +182,23 @@ def __init__(self, low_fd: int, high_fd: int, ferrno: int): self.ferrno = int(ferrno) @dataclass(init=False) -class ChdirOp: - path: Path +class chdir_op: + path: path ferrno: int - def __init__(self, path: Path, ferrno: int): - self.path = Path(**path) + def __init__(self, path: path, ferrno: int): + self.path = path(**path) self.ferrno = int(ferrno) @dataclass(init=False) -class ExecOp: - path: Path +class exec_op: + path: path ferrno: int - def __init__(self, path: Path, ferrno: int): - self.path = Path(**path) + def __init__(self, path: path, ferrno: int): + self.path = path(**path) self.ferrno = int(ferrno) @dataclass(init=False) -class CloneOp: +class clone_op: flags: int run_pthread_atfork_handlers: bool child_process_id: int @@ -212,7 +212,7 @@ def __init__(self, flags: int, run_pthread_atfork_handlers: bool, child_process_ self.ferrno = int(ferrno) @dataclass(init=False) -class ExitOp: +class exit_op: status: int run_atexit_handlers: bool def __init__(self, status: int, run_atexit_handlers: bool): @@ -220,43 +220,43 @@ def __init__(self, status: int, run_atexit_handlers: bool): self.run_atexit_handlers = bool(run_atexit_handlers) @dataclass(init=False) -class AccessOp: - path: Path +class access_op: + path: path mode: int flags: int ferrno: int - def __init__(self, path: Path, mode: int, flags: int, ferrno: int): - self.path = Path(**path) + def __init__(self, path: path, mode: int, flags: int, ferrno: int): + self.path = path(**path) self.mode = int(mode) self.flags = int(flags) self.ferrno = int(ferrno) @dataclass(init=False) -class StatOp: - path: Path +class stat_op: + path: path flags: int statx_buf: statx ferrno: int - def __init__(self, path: Path, flags: int, statx_buf: statx, ferrno: int): - self.path = Path(**path) + def __init__(self, path: path, flags: int, statx_buf: statx, ferrno: int): + self.path = path(**path) self.flags = int(flags) self.statx_buf = statx(**statx_buf) self.ferrno = int(ferrno) @dataclass(init=False) -class ReaddirOp: - dir: Path +class readdir_op: + dir: path child: bytes all_children: bool ferrno: int - def __init__(self, dir: Path, child: bytes, all_children: bool, ferrno: int): - self.dir = Path(**dir) + def __init__(self, dir: path, child: bytes, all_children: bool, ferrno: int): + self.dir = path(**dir) self.child = bytes(child) self.all_children = bool(all_children) self.ferrno = int(ferrno) @dataclass(init=False) -class WaitOp: +class wait_op: pid: int options: int status: int @@ -270,7 +270,7 @@ def __init__(self, pid: int, options: int, status: int, ret: int, ferrno: int): self.ferrno = int(ferrno) @dataclass(init=False) -class GetRUsageOp: +class get_rusage_op: waitpid_arg: int getrusage_arg: int usage: rusage @@ -282,25 +282,25 @@ def __init__(self, waitpid_arg: int, getrusage_arg: int, usage: rusage, ferrno: self.ferrno = int(ferrno) @dataclass(init=False) -class ReadLinkOp: - path: Path +class read_link_op: + path: path resolved: bytes ferrno: int - def __init__(self, path: Path, resolved: bytes, ferrno: int): - self.path = Path(**path) + def __init__(self, path: path, resolved: bytes, ferrno: int): + self.path = path(**path) self.resolved = bytes(resolved) self.ferrno = int(ferrno) @dataclass(init=False) -class Metadata: +class metadata: @dataclass(init=False) - class Mode: + class mode: mode: int def __init__(self, mode: int): self.mode = int(mode) @dataclass(init=False) - class Ownership: + class ownership: uid: int gid: int def __init__(self, uid: int, gid: int): @@ -308,7 +308,7 @@ def __init__(self, uid: int, gid: int): self.gid = int(gid) @dataclass(init=False) - class Times: + class times: is_null: bool atime: timeval mtime: timeval @@ -317,7 +317,7 @@ def __init__(self, is_null: bool, atime: timeval, mtime: timeval): self.atime = timeval(**atime) self.mtime = timeval(**mtime) - value: typing.Union[Mode, Ownership, Times] + value: typing.Union[mode, ownership, times] def __init__(self, **kwargs: typing.Mapping[str, typing.Any]): if len(kwargs) != 1: raise ValueError("Malformed Enum constructor args") @@ -328,20 +328,20 @@ def __init__(self, **kwargs: typing.Mapping[str, typing.Any]): self.value = mod.__dict__[key](**kwargs[key]) @dataclass(init=False) -class UpdateMetadataOp: - path: Path +class update_metadata_op: + path: path flags: int - metadata: Metadata + metadata: metadata ferrno: int - def __init__(self, path: Path, flags: int, metadata: Metadata, ferrno: int): - self.path = Path(**path) + def __init__(self, path: path, flags: int, metadata: metadata, ferrno: int): + self.path = path(**path) self.flags = int(flags) - self.metadata = Metadata(**metadata) + self.metadata = metadata(**metadata) self.ferrno = int(ferrno) @dataclass(init=False) -class OpInternal: - value: typing.Union[InitProcessOp, InitExecEpochOp, InitThreadOp, OpenOp, CloseOp, ChdirOp, ExecOp, CloneOp, ExitOp, AccessOp, StatOp, ReaddirOp, WaitOp, GetRUsageOp, UpdateMetadataOp, ReadLinkOp] +class op_internal: + value: typing.Union[init_process_op, init_exec_epoch_op, init_thread_op, open_op, close_op, chdir_op, exec_op, clone_op, exit_op, access_op, stat_op, readdir_op, wait_op, get_rusage_op, update_metadata_op, read_link_op] def __init__(self, **kwargs: typing.Mapping[str, typing.Any]): if len(kwargs) != 1: raise ValueError("Malformed Enum constructor args") @@ -352,10 +352,10 @@ def __init__(self, **kwargs: typing.Mapping[str, typing.Any]): self.value = mod.__dict__[key](**kwargs[key]) @dataclass(init=False) -class Op: - data: OpInternal +class op: + data: op_internal time: timespec - def __init__(self, data: OpInternal, time: timespec): - self.data = OpInternal(**data) + def __init__(self, data: op_internal, time: timespec): + self.data = op_internal(**data) self.time = timespec(**time) From c9be3e26bcc80d91465d670a3ef2b6acb3faba89 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Tue, 2 Jul 2024 23:26:17 -0500 Subject: [PATCH 18/37] improved pygen code --- probe_src/probe_frontend/Cargo.lock | 57 ++- probe_src/probe_frontend/cli/src/dump.rs | 5 +- probe_src/probe_frontend/flake.nix | 18 +- probe_src/probe_frontend/lib/src/ops.rs | 93 ++++ probe_src/probe_frontend/macros/Cargo.toml | 1 + probe_src/probe_frontend/macros/src/lib.rs | 42 +- probe_src/probe_frontend/macros/src/pygen.rs | 487 +++++++++---------- probe_src/probe_frontend/python/ops.py | 333 ++++--------- probe_src/probe_frontend/python/probe.py | 56 ++- 9 files changed, 569 insertions(+), 523 deletions(-) diff --git a/probe_src/probe_frontend/Cargo.lock b/probe_src/probe_frontend/Cargo.lock index 62e571e1..713b236c 100644 --- a/probe_src/probe_frontend/Cargo.lock +++ b/probe_src/probe_frontend/Cargo.lock @@ -409,7 +409,7 @@ checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.4.1", "windows-sys", ] @@ -582,6 +582,16 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.21" @@ -694,6 +704,29 @@ version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.5.2", + "smallvec", + "windows-targets", +] + [[package]] name = "pin-project-lite" version = "0.2.14" @@ -753,6 +786,7 @@ dependencies = [ name = "probe_macros" version = "0.2.0" dependencies = [ + "parking_lot", "proc-macro2", "quote", "syn", @@ -835,6 +869,15 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "redox_syscall" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c82cf8cff14456045f55ec4241383baeff27af886adb72ffb2162f99911de0fd" +dependencies = [ + "bitflags 2.6.0", +] + [[package]] name = "regex" version = "1.10.5" @@ -895,6 +938,12 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "serde" version = "1.0.203" @@ -941,6 +990,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + [[package]] name = "static_assertions" version = "1.1.0" diff --git a/probe_src/probe_frontend/cli/src/dump.rs b/probe_src/probe_frontend/cli/src/dump.rs index d1f21df0..6256829d 100644 --- a/probe_src/probe_frontend/cli/src/dump.rs +++ b/probe_src/probe_frontend/cli/src/dump.rs @@ -328,14 +328,15 @@ impl Dump for ops::ReaddirOp { impl Dump for ops::Metadata { fn dump(&self) -> String { match self { - ops::Metadata::Mode { mode } => format!("Mode[ mode={:#06o} ]", mode), - ops::Metadata::Ownership { uid, gid } => { + ops::Metadata::Mode { mode, .. } => format!("Mode[ mode={:#06o} ]", mode), + ops::Metadata::Ownership { uid, gid, .. } => { format!("Ownership[ uid={}, gid={} ]", uid, gid) } ops::Metadata::Times { is_null, atime, mtime, + .. } => format!( "Times[ is_null={}, atime={}, mtime={} ]", is_null, diff --git a/probe_src/probe_frontend/flake.nix b/probe_src/probe_frontend/flake.nix index 1141e844..d8b4c87b 100644 --- a/probe_src/probe_frontend/flake.nix +++ b/probe_src/probe_frontend/flake.nix @@ -46,6 +46,11 @@ nativeBuildInputs = [ pkgs.rustPlatform.bindgenHook ]; + + # pygen needs to know where to write the python file + postUnpack = '' + export PYGEN_OUTFILE="$(realpath ./python)" + ''; }; # Build *just* the cargo dependencies (of the entire workspace), @@ -71,6 +76,10 @@ // { pname = "probe-frontend"; cargoExtraArgs = "-p probe_frontend"; + installPhase = '' + mkdir -p $out/python + cp -r ./python $out/ + ''; }); probe-cli = craneLib.buildPackage (individualCrateArgs // { @@ -81,10 +90,6 @@ // { pname = "probe-macros"; cargoExtraArgs = "-p probe_macros"; - installPhase = '' - mkdir -p $out - cp -r python $out/python - ''; }); in { checks = { @@ -134,7 +139,7 @@ }); probe-pygen-sanity = pkgs.runCommand "pygen-sanity-check" {} '' - cp ${probe-macros}/python/ops.py $out + cp ${probe-frontend}/python/ops.py $out ${pkgs.python312}/bin/python $out ''; }; @@ -148,7 +153,8 @@ checks = self.checks.${system}; shellHook = '' - export __PROBE_LIB=$(realpath ../libprobe/build) + export __PROBE_LIB="$(realpath ../libprobe/build)" + export PYGEN_OUTFILE="$(realpath ./python/ops.py)" ''; packages = [ diff --git a/probe_src/probe_frontend/lib/src/ops.rs b/probe_src/probe_frontend/lib/src/ops.rs index c92e1ddb..eaddfaa5 100644 --- a/probe_src/probe_frontend/lib/src/ops.rs +++ b/probe_src/probe_frontend/lib/src/ops.rs @@ -88,20 +88,56 @@ include!(concat!(env!("OUT_DIR"), "/bindings.rs")); #[derive(Debug, Clone, Serialize, Deserialize, MakePyDataclass)] pub enum Metadata { + #[serde(untagged)] Mode { mode: mode_t, + + #[serde(serialize_with = "Metadata::serialize_variant_mode")] + #[serde(skip_deserializing)] + _type: (), }, + #[serde(untagged)] Ownership { uid: uid_t, gid: gid_t, + + #[serde(serialize_with = "Metadata::serialize_variant_ownership")] + #[serde(skip_deserializing)] + _type: (), }, + #[serde(untagged)] Times { is_null: bool, atime: timeval, mtime: timeval, + + #[serde(serialize_with = "Metadata::serialize_variant_times")] + #[serde(skip_deserializing)] + _type: (), }, } +impl Metadata { + fn serialize_variant_mode( + _: &(), + serializer: S, + ) -> std::result::Result { + serializer.serialize_str("Mode") + } + fn serialize_variant_ownership( + _: &(), + serializer: S, + ) -> std::result::Result { + serializer.serialize_str("Ownership") + } + fn serialize_variant_times( + _: &(), + serializer: S, + ) -> std::result::Result { + serializer.serialize_str("Times") + } +} + impl FfiFrom for Metadata { fn ffi_from(value: &C_UpdateMetadataOp, ctx: &ArenaContext) -> Result { let kind = value.kind; @@ -111,15 +147,21 @@ impl FfiFrom for Metadata { Ok(match kind { C_MetadataKind_MetadataMode => Metadata::Mode { mode: unsafe { value.mode }, + + _type: (), }, C_MetadataKind_MetadataOwnership => Metadata::Ownership { uid: unsafe { value.ownership }.uid, gid: unsafe { value.ownership }.gid, + + _type: (), }, C_MetadataKind_MetadataTimes => Metadata::Times { is_null: unsafe { value.times }.is_null, atime: unsafe { value.times }.atime.ffi_into(ctx)?, mtime: unsafe { value.times }.mtime.ffi_into(ctx)?, + + _type: (), }, _ => return Err(ProbeError::InvalidVariant(kind)), }) @@ -132,6 +174,19 @@ pub struct UpdateMetadataOp { pub flags: ::std::os::raw::c_int, pub metadata: Metadata, pub ferrno: ::std::os::raw::c_int, + + #[serde(serialize_with = "UpdateMetadataOp::serialize_type")] + #[serde(skip_deserializing)] + pub _type: (), +} + +impl UpdateMetadataOp { + fn serialize_type( + _: &(), + serializer: S, + ) -> std::result::Result { + serializer.serialize_str("UpdateMetadataOp") + } } impl FfiFrom for UpdateMetadataOp { @@ -146,27 +201,45 @@ impl FfiFrom for UpdateMetadataOp { inner: Box::new(e), })?, ferrno: value.ferrno, + + _type: (), }) } } #[derive(Debug, Clone, Serialize, Deserialize, MakePyDataclass)] pub enum OpInternal { + #[serde(untagged)] InitProcessOp(InitProcessOp), + #[serde(untagged)] InitExecEpochOp(InitExecEpochOp), + #[serde(untagged)] InitThreadOp(InitThreadOp), + #[serde(untagged)] OpenOp(OpenOp), + #[serde(untagged)] CloseOp(CloseOp), + #[serde(untagged)] ChdirOp(ChdirOp), + #[serde(untagged)] ExecOp(ExecOp), + #[serde(untagged)] CloneOp(CloneOp), + #[serde(untagged)] ExitOp(ExitOp), + #[serde(untagged)] AccessOp(AccessOp), + #[serde(untagged)] StatOp(StatOp), + #[serde(untagged)] ReaddirOp(ReaddirOp), + #[serde(untagged)] WaitOp(WaitOp), + #[serde(untagged)] GetRUsageOp(GetRUsageOp), + #[serde(untagged)] UpdateMetadataOp(UpdateMetadataOp), + #[serde(untagged)] ReadLinkOp(ReadLinkOp), } @@ -214,6 +287,19 @@ impl FfiFrom for OpInternal { pub struct Op { pub data: OpInternal, pub time: timespec, + + #[serde(serialize_with = "Op::serialize_type")] + #[serde(skip_deserializing)] + pub _type: (), +} + +impl Op { + fn serialize_type( + _: &(), + serializer: S, + ) -> std::result::Result { + serializer.serialize_str("Op") + } } impl FfiFrom for Op { @@ -221,6 +307,13 @@ impl FfiFrom for Op { Ok(Self { data: value.ffi_into(ctx)?, time: value.time.ffi_into(ctx)?, + + _type: (), }) } } + +// WARNING: this macro invokation must come after all structs that implement MakePyDataclass +// (including classes that implement MakeRustOp, who's daughter classes implement MakePyDataclass) +// for python codegen to work properly +probe_macros::write_pygen_file_from_env!("PYGEN_OUTFILE"); diff --git a/probe_src/probe_frontend/macros/Cargo.toml b/probe_src/probe_frontend/macros/Cargo.toml index 6b04c001..bbf84e6f 100644 --- a/probe_src/probe_frontend/macros/Cargo.toml +++ b/probe_src/probe_frontend/macros/Cargo.toml @@ -10,6 +10,7 @@ name = "probe_macros" proc-macro = true [dependencies] +parking_lot = "0.12.3" # darling = "0.20.9" proc-macro2 = "1.0.86" quote = "1.0.36" diff --git a/probe_src/probe_frontend/macros/src/lib.rs b/probe_src/probe_frontend/macros/src/lib.rs index 1aab9c30..98c6c6ab 100644 --- a/probe_src/probe_frontend/macros/src/lib.rs +++ b/probe_src/probe_frontend/macros/src/lib.rs @@ -43,24 +43,51 @@ pub fn make_rust_op(input: TokenStream) -> TokenStream { Span::call_site(), ); + let msgs = field_idents + .iter() + .map(|field_ident| { + format!( + "Error calling ffi_into() on {} while creating {}", + field_ident, new_name + ) + }) + .collect::>(); + + let serialize_type_path = format!("{}::serialize_type", new_name); + let type_name = new_name.to_string(); + // This is rather bad macro hygiene, but this macro is only intend for probe_frontend's // op struct generation, so we're playing a little fast-n'-loose with scoping. quote! { #[derive(Debug, Clone, Serialize, Deserialize, MakePyDataclass)] pub struct #new_name { #(pub #field_idents: #field_types,)* + + /// this is a placeholder field that get's serialized as the type name + #[serde(serialize_with = #serialize_type_path)] + #[serde(skip_deserializing)] + pub _type: (), + } + + impl #new_name { + fn serialize_type( + _: &(), + serializer: S + ) -> std::result::Result { + serializer.serialize_str(#type_name) + } } impl FfiFrom<#ident> for #new_name { fn ffi_from(value: &#ident, ctx: &ArenaContext) -> Result { Ok(Self { + _type: (), #( #field_idents: value.#field_idents .ffi_into(ctx) .map_err(|e| { ProbeError::FFiConversionError { - msg: "Error calling ffi_into() on\ - #field_idents creating #new_name", + msg: #msgs, inner: Box::new(e), } })?, @@ -91,7 +118,7 @@ fn convert_bindgen_type(ty: &syn::Type) -> syn::Type { Type::Path(inner.clone()) } } - _ => unimplemented!("unsupported bindgen type conversion"), + _ => unreachable!("unsupported bindgen type conversion"), } } @@ -111,3 +138,12 @@ pub fn make_py_dataclass(input: TokenStream) -> TokenStream { // return empty token stream, we're not actually writing rust here TokenStream::new() } + +// TODO: return compiler error instead of panicking on error +#[proc_macro] +pub fn write_pygen_file_from_env(item: TokenStream) -> TokenStream { + let path = parse_macro_input!(item as syn::LitStr); + pygen::write_pygen_internal(path); + // return empty token stream, we're not actually writing rust here + TokenStream::new() +} diff --git a/probe_src/probe_frontend/macros/src/pygen.rs b/probe_src/probe_frontend/macros/src/pygen.rs index 0f256722..7f7e2714 100644 --- a/probe_src/probe_frontend/macros/src/pygen.rs +++ b/probe_src/probe_frontend/macros/src/pygen.rs @@ -1,29 +1,18 @@ -use std::collections::HashSet; +use parking_lot::RwLock; use std::fmt::Display; use std::fs::File; use std::io::Write; -use std::sync::{OnceLock, RwLock}; +use std::sync::OnceLock; use syn::{Data, Fields}; -// hashset of previously generated dataclass, this is used during type conversion to ensure that -// every type in a dataclass is an already generated (or is a primitive type). -static GENERATED_TYPES: OnceLock>> = OnceLock::new(); - -/// statically defined python code that gets added to the begining of the outputed file -const PYGEN_PREAMBLE: &str = " -# This file is automatically @generated by probe_macros - -import sys -import typing -from dataclasses import dataclass - -mod = sys.modules[__name__] - -"; +fn pygen_file() -> &'static RwLock { + static INNER: OnceLock> = OnceLock::new(); + INNER.get_or_init(|| RwLock::new(PygenFile::new())) +} pub fn make_py_dataclass_internal(input: syn::DeriveInput) { let syn::DeriveInput { data, ident, .. } = input.clone(); - let ident = pascal_to_snake_case(&ident.to_string()); + let ident = snake_case_to_pascal(&ident.to_string()); match data { Data::Struct(data_struct) => { @@ -35,21 +24,26 @@ pub fn make_py_dataclass_internal(input: syn::DeriveInput) { let pairs = fields .named .iter() - .map(|field| { - ( + .filter_map(|field| { + if let syn::Type::Tuple(syn::TypeTuple { elems, .. }) = &field.ty { + // this is the unit type, so we just skip it + if elems.is_empty() { + return None; + } + } + + Some(( field.ident.as_ref().unwrap().to_string(), convert_to_pytype(&field.ty), - ) + )) }) .collect::>(); - write_dataclass(basic_dataclass(ident, &pairs)); + let dataclass = basic_dataclass(ident, &pairs); + pygen_file().write().add_class(dataclass); } Data::Enum(data_enum) => { - // let mut dataclass = format!("@dataclass(init=False)\nclass {}:\n", ident); - let mut dataclass = Dataclass::new(ident); - let mut init = DataclassInit::new(); - let mut args = InitArgs::new(); + let mut enu = Enum::new(ident); // this is the types that the produced union is over let mut variants = vec![]; @@ -57,20 +51,28 @@ pub fn make_py_dataclass_internal(input: syn::DeriveInput) { for variant in data_enum.variants { match variant.fields { syn::Fields::Named(inner) => { - let name = pascal_to_snake_case(&variant.ident.to_string()); + let name = variant.ident.to_string(); let pairs = inner .named .iter() - .map(|field| { - ( - pascal_to_snake_case(&field.ident.as_ref().unwrap().to_string()), + .filter_map(|field| { + if let syn::Type::Tuple(syn::TypeTuple { elems, .. }) = &field.ty { + // this is the unit type, so we just skip it + if elems.is_empty() { + return None; + } + } + + Some(( + field.ident.as_ref().unwrap().to_string(), convert_to_pytype(&field.ty), - ) + )) }) .collect::>(); - dataclass.add_inclass(basic_dataclass(name.clone(), &pairs)); + // dataclass.add_inclass(basic_dataclass(name.clone(), &pairs)); + enu.add_variant_owned_class(basic_dataclass(name.clone(), &pairs)); variants.push(name); } syn::Fields::Unnamed(inner) => { @@ -78,79 +80,25 @@ pub fn make_py_dataclass_internal(input: syn::DeriveInput) { if fields.len() != 1 { unimplemented!("Tuple enums of length != 1 not supported") } - variants.push(convert_to_pytype(&fields[0].ty)); + enu.add_variant_ref(convert_to_pytype(&fields[0].ty)); } syn::Fields::Unit => unimplemented!("Unit enum variants not supported"), } } - // here we merge the variants together in a python union - let union_type = format!( - "typing.Union[{}]", - variants - .iter() - .fold(String::new(), |mut acc, x| { - acc.push_str(x); - acc.push_str(", "); - - acc - }) - .strip_suffix(", ") - .expect("union had no variants") - ); - dataclass.add_item(DataclassItem::new("value".to_owned(), union_type)); - - args.add( - "**kwargs".to_owned(), - "typing.Mapping[str, typing.Any]".to_owned(), - ); - // add custom init that does some quasi-quoting hackery - [ - "if len(kwargs) != 1:", - " raise ValueError(\"Malformed Enum constructor args\")", - "key = list(kwargs.keys())[0]", - "if key in self.__class__.__dict__:", - " self.value = self.__class__.__dict__[key](**kwargs[key])", - "else:", - " self.value = mod.__dict__[key](**kwargs[key])", - ] - .into_iter() - .for_each(|line| init.add_line(line.to_owned())); - - init.set_args(args); - dataclass.set_init(Some(init)); - write_dataclass(dataclass); + pygen_file().write().add_enum(enu); } Data::Union(_data_union) => unimplemented!(), }; } fn basic_dataclass(name: String, pairs: &[(String, String)]) -> Dataclass { - // this function take a type and identifier that's part of the argumetns to the init fucnction - // and creates the expression for converting it for sotrage in the dataclass, basically this - // means running primitive types through their type constructor to validate them and for other - // dataclasses the arg get unpacked and passed to the relevant class constructor. - fn make_conversion(ident: &str, ty: &str) -> String { - match ty { - // don't unpack primitive types - "bytes" | "int" | "str" | "bool" => format!("{}({})", ty, ident), - _ => format!("{}(**{})", ty, ident), - } - } - let mut dataclass = Dataclass::new(name); - let mut init = DataclassInit::new(); - let mut args = InitArgs::new(); for (ident, ty) in pairs { dataclass.add_item(DataclassItem::new(ident.clone(), ty.clone())); - init.add_line(format!("self.{} = {}", ident, make_conversion(ident, ty))); - args.add(ident.clone(), ty.clone()); } - init.set_args(args); - dataclass.set_init(Some(init)); - dataclass } @@ -178,83 +126,154 @@ fn convert_to_pytype(ty: &syn::Type) -> String { // bool types are basically the same everywhere "bool" => name, - // other types are checked to see if it's a dataclass we've already written, if it - // is we can simply pass it through unchanged, otherwise we don't know how to - // convert this type and we panic. - // - // FIXME: this approach works fine for generation when running `cargo build` but - // rust-analyzer indicates that the proc-macro paniced; they're probably being - // processed in a compartmentalized manner in rust-analyzer. - _ => { - let snake_case = pascal_to_snake_case(&name); - - let types = GENERATED_TYPES - .get_or_init(|| RwLock::new(HashSet::new())) - .read() - .expect("python generated types rwlock poisioned"); - - if types.contains(&snake_case) { - snake_case - } else { - panic!("Can't convert type '{}' to a python type", snake_case); - } - } + _ => snake_case_to_pascal(&name), } } _ => unimplemented!("unsupported type type"), } } -fn pascal_to_snake_case(ident: &str) -> String { - // this primitive lookback is needed so that names with repeated capitals like SocketTCP get - // turned into socket_tcp and not socket_t_c_p - let mut prior_upper = true; - ident +pub(crate) fn snake_case_to_pascal(input: &str) -> String { + input .chars() - .fold(String::new(), |mut acc, ch| { - if ch.is_uppercase() { - if !prior_upper { - acc.push('_') - } - ch.to_lowercase().for_each(|lower_ch| acc.push(lower_ch)); - prior_upper = true; + .fold((true, String::new()), |(prior_underscore, mut acc), ch| { + if ch == '_' { + return (true, acc); + } else if prior_underscore { + ch.to_uppercase().for_each(|x| acc.push(x)) } else { - acc.push(ch); - prior_upper = false; + acc.push(ch) } - acc + (false, acc) }) + .1 } -fn write_dataclass(item: Dataclass) { - static DATACLASSES: OnceLock> = OnceLock::new(); - let mut writer = DATACLASSES - .get_or_init(|| { - let mut file = File::create(concat!(env!("CARGO_MANIFEST_DIR"), "/../python/ops.py")) - .expect("unable to create ops.py"); - file.write_all(PYGEN_PREAMBLE.as_bytes()) - .expect("failed to write preamble"); - RwLock::new(file) - }) - .write() - .expect("python dataclasses rwlock poisioned"); - writeln!(writer, "{}", item).expect("failed to write pygen"); - - GENERATED_TYPES - .get_or_init(|| RwLock::new(HashSet::new())) - .write() - .expect("python generated types rwlock poisioned") - .insert(item.name); +pub(crate) fn write_pygen_internal(path: syn::LitStr) { + let path = path.value(); + let path = std::env::var_os(&path) + .unwrap_or_else(|| panic!("Environment variable '{}' not defined", path)); + + let mut file = File::create(&path).unwrap_or_else(|e| { + panic!( + "unable to create file '{}' when writing pygen file: {}", + path.to_string_lossy(), + e + ) + }); + + pygen_file().write().prepend_preamble( + [ + "from __future__ import annotations", + "import typing", + "from dataclasses import dataclass\n", + ] + .into_iter() + .map(|x| x.to_owned()) + .collect(), + ); + + writeln!(file, "{}", pygen_file().read()).expect("Failed to write pygen file"); +} + +#[derive(Debug, Clone)] +struct PygenFile { + preamble: Vec, + classes: Vec, + enums: Vec, } +#[derive(Debug, Clone)] +struct Enum { + indent: usize, + pub name: String, + variants_owned_class: Vec, + variants_owned_enum: Vec, + variants_ref: Vec, +} + +#[derive(Debug, Clone)] struct Dataclass { indent: usize, pub name: String, inclasses: Vec, items: Vec, - init: Option, } +#[derive(Debug, Clone)] +struct DataclassItem { + indent: usize, + name: String, + ty: String, +} + +#[allow(dead_code)] +impl PygenFile { + pub fn new() -> Self { + Self { + preamble: vec![], + classes: vec![], + enums: vec![], + } + } + + pub fn add_class(&mut self, class: Dataclass) { + self.classes.push(class); + } + + pub fn add_enum(&mut self, enu: Enum) { + self.enums.push(enu); + } + + pub fn prepend_preamble(&mut self, mut lines: Vec) { + lines.extend(std::mem::take(&mut self.preamble)); + self.preamble = lines; + } + + pub fn append_preamble(&mut self, lines: Vec) { + self.preamble.extend(lines); + } +} + +#[allow(dead_code)] +impl Enum { + pub fn new(name: String) -> Self { + Self { + indent: 0, + name, + variants_owned_class: vec![], + variants_owned_enum: vec![], + variants_ref: vec![], + } + } + + pub fn add_variant_owned_class(&mut self, mut item: Dataclass) { + item.set_indent(self.indent); + self.variants_owned_class.push(item); + } + + pub fn add_variant_owned_enum(&mut self, mut item: Enum) { + item.set_indent(self.indent); + self.variants_owned_enum.push(item); + } + + pub fn add_variant_ref(&mut self, item: String) { + self.variants_ref.push(item); + } + + pub fn set_indent(&mut self, indent: usize) { + for class in &mut self.variants_owned_class { + class.set_indent(indent); + } + for enu in &mut self.variants_owned_enum { + enu.set_indent(indent); + } + + self.indent = indent; + } +} + +#[allow(dead_code)] impl Dataclass { pub fn new(name: String) -> Self { Self { @@ -262,7 +281,6 @@ impl Dataclass { name, inclasses: vec![], items: vec![], - init: None, } } @@ -276,157 +294,124 @@ impl Dataclass { self.items.push(item) } - pub fn set_init(&mut self, init: Option) { - self.init = init.map(|mut x| { - x.set_indent(self.indent + 4); - x - }); - } - - pub fn set_indent(&mut self, mut indent: usize) -> usize { + pub fn set_indent(&mut self, indent: usize) { for inclass in &mut self.inclasses { inclass.set_indent(indent + 4); } for item in &mut self.items { item.set_indent(indent + 4); } - if let Some(init) = &mut self.init { - init.set_indent(indent + 4); + + self.indent = indent; + } +} + +impl DataclassItem { + pub fn new(name: String, ty: String) -> Self { + Self { + indent: 0, + name, + ty, } + } - std::mem::swap(&mut self.indent, &mut indent); - indent + pub fn set_indent(&mut self, indent: usize) { + self.indent = indent; } } -impl Display for Dataclass { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let name = self.name.as_str(); - let indent_str = " ".repeat(self.indent); - let gen_init = match self.init { - Some(_) => "False", - None => "True", - }; +// Display trait implementations for actual codegen - // write class signature - writeln!( - f, - "{indent_str}@dataclass(init={gen_init})\n\ - {indent_str}class {name}:" - )?; +impl Display for PygenFile { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "# This file was @generated by probe_macros")?; - // write inner class definitions - for inclass in &self.inclasses { - writeln!(f, "{inclass}",)?; + for line in self.preamble.iter() { + writeln!(f, "{line}")?; } - // write dataclass fields - for item in &self.items { - writeln!(f, "{item}")?; + for class in self.classes.iter() { + writeln!(f, "{class}")?; } - // write init definition (if any) - if let Some(init) = &self.init { - write!(f, "{init}")?; + for enu in self.enums.iter() { + writeln!(f, "{enu}")?; } Ok(()) } } -struct DataclassItem { - indent: usize, - name: String, - ty: String, -} +impl Display for Enum { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn print_union_type(types: &[&str], f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if types.is_empty() { + write!(f, "None")?; + return Ok(()); + } + let mut iter = types.iter(); -impl DataclassItem { - pub fn new(name: String, ty: String) -> Self { - Self { - indent: 0, - name, - ty, - } - } + let first = iter.next().unwrap(); + write!(f, "{first}")?; - pub fn set_indent(&mut self, mut indent: usize) -> usize { - std::mem::swap(&mut self.indent, &mut indent); - indent - } -} + for ty in iter { + write!(f, " | {ty}")?; + } -impl Display for DataclassItem { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let &Self { name, ty, .. } = &self; - let indent_str = " ".repeat(self.indent); - write!(f, "{indent_str}{name}: {ty}") - } -} + Ok(()) + } -struct DataclassInit { - indent: usize, - args: InitArgs, - body: Vec, -} + let name = &self.name; + let mut acc = Vec::new(); -impl DataclassInit { - pub fn new() -> Self { - Self { - indent: 0, - args: InitArgs::new(), - body: vec![], + for owned_variant in self.variants_owned_class.iter() { + writeln!(f, "{owned_variant}")?; + acc.push(owned_variant.name.as_str()); } - } - pub fn add_line(&mut self, line: String) { - self.body.push(line) - } + for owned_variant in self.variants_owned_enum.iter() { + writeln!(f, "{owned_variant}")?; + acc.push(owned_variant.name.as_str()); + } - pub fn set_args(&mut self, args: InitArgs) { - self.args = args; - } + self.variants_ref.iter().for_each(|x| acc.push(x)); - pub fn set_indent(&mut self, mut indent: usize) -> usize { - std::mem::swap(&mut self.indent, &mut indent); - indent + let indent_str = " ".repeat(self.indent); + write!(f, "{indent_str}{name}: typing.TypeAlias = ")?; + print_union_type(acc.as_slice(), f) } } -impl Display for DataclassInit { +impl Display for Dataclass { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let &Self { args, .. } = &self; + let name = self.name.as_str(); let indent_str = " ".repeat(self.indent); - writeln!(f, "{indent_str}def __init__(self{args}):")?; + // write class signature + writeln!( + f, + "{indent_str}@dataclass(init=True, frozen=True)\n\ + {indent_str}class {name}:" + )?; + + // write inner class definitions + for inclass in &self.inclasses { + writeln!(f, "{inclass}",)?; + } - for line in &self.body { - writeln!(f, "{indent_str} {line}")?; + // write dataclass fields + for item in &self.items { + writeln!(f, "{item}")?; } Ok(()) } } -struct InitArgs { - pairs: Vec<(String, String)>, -} - -impl InitArgs { - pub fn new() -> Self { - Self { pairs: vec![] } - } - - pub fn add(&mut self, name: String, ty: String) { - self.pairs.push((name, ty)) - } -} - -impl Display for InitArgs { +impl Display for DataclassItem { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - for arg in &self.pairs { - let (name, ty) = arg; - write!(f, ", {name}: {ty}")?; - } - Ok(()) + let &Self { name, ty, .. } = &self; + let indent_str = " ".repeat(self.indent); + write!(f, "{indent_str}{name}: {ty}") } } diff --git a/probe_src/probe_frontend/python/ops.py b/probe_src/probe_frontend/python/ops.py index fe2106f6..947d6fe7 100644 --- a/probe_src/probe_frontend/python/ops.py +++ b/probe_src/probe_frontend/python/ops.py @@ -1,30 +1,20 @@ - -# This file is automatically @generated by probe_macros - -import sys +# This file was @generated by probe_macros +from __future__ import annotations import typing from dataclasses import dataclass -mod = sys.modules[__name__] - -@dataclass(init=False) -class timespec: +@dataclass(init=True, frozen=True) +class Timespec: tv_sec: int tv_nsec: int - def __init__(self, tv_sec: int, tv_nsec: int): - self.tv_sec = int(tv_sec) - self.tv_nsec = int(tv_nsec) -@dataclass(init=False) -class statx_timestamp: +@dataclass(init=True, frozen=True) +class StatxTimestamp: tv_sec: int tv_nsec: int - def __init__(self, tv_sec: int, tv_nsec: int): - self.tv_sec = int(tv_sec) - self.tv_nsec = int(tv_nsec) -@dataclass(init=False) -class statx: +@dataclass(init=True, frozen=True) +class Statx: stx_mask: int stx_blksize: int stx_attributes: int @@ -36,10 +26,10 @@ class statx: stx_size: int stx_blocks: int stx_attributes_mask: int - stx_atime: statx_timestamp - stx_btime: statx_timestamp - stx_ctime: statx_timestamp - stx_mtime: statx_timestamp + stx_atime: StatxTimestamp + stx_btime: StatxTimestamp + stx_ctime: StatxTimestamp + stx_mtime: StatxTimestamp stx_rdev_major: int stx_rdev_minor: int stx_dev_major: int @@ -47,42 +37,16 @@ class statx: stx_mnt_id: int stx_dio_mem_align: int stx_dio_offset_align: int - def __init__(self, stx_mask: int, stx_blksize: int, stx_attributes: int, stx_nlink: int, stx_uid: int, stx_gid: int, stx_mode: int, stx_ino: int, stx_size: int, stx_blocks: int, stx_attributes_mask: int, stx_atime: statx_timestamp, stx_btime: statx_timestamp, stx_ctime: statx_timestamp, stx_mtime: statx_timestamp, stx_rdev_major: int, stx_rdev_minor: int, stx_dev_major: int, stx_dev_minor: int, stx_mnt_id: int, stx_dio_mem_align: int, stx_dio_offset_align: int): - self.stx_mask = int(stx_mask) - self.stx_blksize = int(stx_blksize) - self.stx_attributes = int(stx_attributes) - self.stx_nlink = int(stx_nlink) - self.stx_uid = int(stx_uid) - self.stx_gid = int(stx_gid) - self.stx_mode = int(stx_mode) - self.stx_ino = int(stx_ino) - self.stx_size = int(stx_size) - self.stx_blocks = int(stx_blocks) - self.stx_attributes_mask = int(stx_attributes_mask) - self.stx_atime = statx_timestamp(**stx_atime) - self.stx_btime = statx_timestamp(**stx_btime) - self.stx_ctime = statx_timestamp(**stx_ctime) - self.stx_mtime = statx_timestamp(**stx_mtime) - self.stx_rdev_major = int(stx_rdev_major) - self.stx_rdev_minor = int(stx_rdev_minor) - self.stx_dev_major = int(stx_dev_major) - self.stx_dev_minor = int(stx_dev_minor) - self.stx_mnt_id = int(stx_mnt_id) - self.stx_dio_mem_align = int(stx_dio_mem_align) - self.stx_dio_offset_align = int(stx_dio_offset_align) -@dataclass(init=False) -class timeval: +@dataclass(init=True, frozen=True) +class Timeval: tv_sec: int tv_usec: int - def __init__(self, tv_sec: int, tv_usec: int): - self.tv_sec = int(tv_sec) - self.tv_usec = int(tv_usec) -@dataclass(init=False) -class rusage: - ru_utime: timeval - ru_stime: timeval +@dataclass(init=True, frozen=True) +class Rusage: + ru_utime: Timeval + ru_stime: Timeval ru_maxrss: int ru_ixrss: int ru_idrss: int @@ -97,265 +61,138 @@ class rusage: ru_nsignals: int ru_nvcsw: int ru_nivcsw: int - def __init__(self, ru_utime: timeval, ru_stime: timeval, ru_maxrss: int, ru_ixrss: int, ru_idrss: int, ru_isrss: int, ru_minflt: int, ru_majflt: int, ru_nswap: int, ru_inblock: int, ru_oublock: int, ru_msgsnd: int, ru_msgrcv: int, ru_nsignals: int, ru_nvcsw: int, ru_nivcsw: int): - self.ru_utime = timeval(**ru_utime) - self.ru_stime = timeval(**ru_stime) - self.ru_maxrss = int(ru_maxrss) - self.ru_ixrss = int(ru_ixrss) - self.ru_idrss = int(ru_idrss) - self.ru_isrss = int(ru_isrss) - self.ru_minflt = int(ru_minflt) - self.ru_majflt = int(ru_majflt) - self.ru_nswap = int(ru_nswap) - self.ru_inblock = int(ru_inblock) - self.ru_oublock = int(ru_oublock) - self.ru_msgsnd = int(ru_msgsnd) - self.ru_msgrcv = int(ru_msgrcv) - self.ru_nsignals = int(ru_nsignals) - self.ru_nvcsw = int(ru_nvcsw) - self.ru_nivcsw = int(ru_nivcsw) -@dataclass(init=False) -class path: +@dataclass(init=True, frozen=True) +class Path: dirfd_minus_at_fdcwd: int path: bytes device_major: int device_minor: int inode: int - mtime: statx_timestamp - ctime: statx_timestamp + mtime: StatxTimestamp + ctime: StatxTimestamp stat_valid: bool dirfd_valid: bool - def __init__(self, dirfd_minus_at_fdcwd: int, path: bytes, device_major: int, device_minor: int, inode: int, mtime: statx_timestamp, ctime: statx_timestamp, stat_valid: bool, dirfd_valid: bool): - self.dirfd_minus_at_fdcwd = int(dirfd_minus_at_fdcwd) - self.path = bytes(path) - self.device_major = int(device_major) - self.device_minor = int(device_minor) - self.inode = int(inode) - self.mtime = statx_timestamp(**mtime) - self.ctime = statx_timestamp(**ctime) - self.stat_valid = bool(stat_valid) - self.dirfd_valid = bool(dirfd_valid) -@dataclass(init=False) -class init_process_op: +@dataclass(init=True, frozen=True) +class InitProcessOp: pid: int - def __init__(self, pid: int): - self.pid = int(pid) -@dataclass(init=False) -class init_exec_epoch_op: +@dataclass(init=True, frozen=True) +class InitExecEpochOp: epoch: int program_name: bytes - def __init__(self, epoch: int, program_name: bytes): - self.epoch = int(epoch) - self.program_name = bytes(program_name) -@dataclass(init=False) -class init_thread_op: +@dataclass(init=True, frozen=True) +class InitThreadOp: tid: int - def __init__(self, tid: int): - self.tid = int(tid) -@dataclass(init=False) -class open_op: - path: path +@dataclass(init=True, frozen=True) +class OpenOp: + path: Path flags: int mode: int fd: int ferrno: int - def __init__(self, path: path, flags: int, mode: int, fd: int, ferrno: int): - self.path = path(**path) - self.flags = int(flags) - self.mode = int(mode) - self.fd = int(fd) - self.ferrno = int(ferrno) -@dataclass(init=False) -class close_op: +@dataclass(init=True, frozen=True) +class CloseOp: low_fd: int high_fd: int ferrno: int - def __init__(self, low_fd: int, high_fd: int, ferrno: int): - self.low_fd = int(low_fd) - self.high_fd = int(high_fd) - self.ferrno = int(ferrno) -@dataclass(init=False) -class chdir_op: - path: path +@dataclass(init=True, frozen=True) +class ChdirOp: + path: Path ferrno: int - def __init__(self, path: path, ferrno: int): - self.path = path(**path) - self.ferrno = int(ferrno) -@dataclass(init=False) -class exec_op: - path: path +@dataclass(init=True, frozen=True) +class ExecOp: + path: Path ferrno: int - def __init__(self, path: path, ferrno: int): - self.path = path(**path) - self.ferrno = int(ferrno) -@dataclass(init=False) -class clone_op: +@dataclass(init=True, frozen=True) +class CloneOp: flags: int run_pthread_atfork_handlers: bool child_process_id: int child_thread_id: int ferrno: int - def __init__(self, flags: int, run_pthread_atfork_handlers: bool, child_process_id: int, child_thread_id: int, ferrno: int): - self.flags = int(flags) - self.run_pthread_atfork_handlers = bool(run_pthread_atfork_handlers) - self.child_process_id = int(child_process_id) - self.child_thread_id = int(child_thread_id) - self.ferrno = int(ferrno) -@dataclass(init=False) -class exit_op: +@dataclass(init=True, frozen=True) +class ExitOp: status: int run_atexit_handlers: bool - def __init__(self, status: int, run_atexit_handlers: bool): - self.status = int(status) - self.run_atexit_handlers = bool(run_atexit_handlers) -@dataclass(init=False) -class access_op: - path: path +@dataclass(init=True, frozen=True) +class AccessOp: + path: Path mode: int flags: int ferrno: int - def __init__(self, path: path, mode: int, flags: int, ferrno: int): - self.path = path(**path) - self.mode = int(mode) - self.flags = int(flags) - self.ferrno = int(ferrno) -@dataclass(init=False) -class stat_op: - path: path +@dataclass(init=True, frozen=True) +class StatOp: + path: Path flags: int - statx_buf: statx + statx_buf: Statx ferrno: int - def __init__(self, path: path, flags: int, statx_buf: statx, ferrno: int): - self.path = path(**path) - self.flags = int(flags) - self.statx_buf = statx(**statx_buf) - self.ferrno = int(ferrno) -@dataclass(init=False) -class readdir_op: - dir: path +@dataclass(init=True, frozen=True) +class ReaddirOp: + dir: Path child: bytes all_children: bool ferrno: int - def __init__(self, dir: path, child: bytes, all_children: bool, ferrno: int): - self.dir = path(**dir) - self.child = bytes(child) - self.all_children = bool(all_children) - self.ferrno = int(ferrno) -@dataclass(init=False) -class wait_op: +@dataclass(init=True, frozen=True) +class WaitOp: pid: int options: int status: int ret: int ferrno: int - def __init__(self, pid: int, options: int, status: int, ret: int, ferrno: int): - self.pid = int(pid) - self.options = int(options) - self.status = int(status) - self.ret = int(ret) - self.ferrno = int(ferrno) -@dataclass(init=False) -class get_rusage_op: +@dataclass(init=True, frozen=True) +class GetRUsageOp: waitpid_arg: int getrusage_arg: int - usage: rusage + usage: Rusage ferrno: int - def __init__(self, waitpid_arg: int, getrusage_arg: int, usage: rusage, ferrno: int): - self.waitpid_arg = int(waitpid_arg) - self.getrusage_arg = int(getrusage_arg) - self.usage = rusage(**usage) - self.ferrno = int(ferrno) -@dataclass(init=False) -class read_link_op: - path: path +@dataclass(init=True, frozen=True) +class ReadLinkOp: + path: Path resolved: bytes ferrno: int - def __init__(self, path: path, resolved: bytes, ferrno: int): - self.path = path(**path) - self.resolved = bytes(resolved) - self.ferrno = int(ferrno) -@dataclass(init=False) -class metadata: - @dataclass(init=False) - class mode: - mode: int - def __init__(self, mode: int): - self.mode = int(mode) - - @dataclass(init=False) - class ownership: - uid: int - gid: int - def __init__(self, uid: int, gid: int): - self.uid = int(uid) - self.gid = int(gid) +@dataclass(init=True, frozen=True) +class UpdateMetadataOp: + path: Path + flags: int + metadata: Metadata + ferrno: int - @dataclass(init=False) - class times: - is_null: bool - atime: timeval - mtime: timeval - def __init__(self, is_null: bool, atime: timeval, mtime: timeval): - self.is_null = bool(is_null) - self.atime = timeval(**atime) - self.mtime = timeval(**mtime) +@dataclass(init=True, frozen=True) +class Op: + data: OpInternal + time: Timespec - value: typing.Union[mode, ownership, times] - def __init__(self, **kwargs: typing.Mapping[str, typing.Any]): - if len(kwargs) != 1: - raise ValueError("Malformed Enum constructor args") - key = list(kwargs.keys())[0] - if key in self.__class__.__dict__: - self.value = self.__class__.__dict__[key](**kwargs[key]) - else: - self.value = mod.__dict__[key](**kwargs[key]) +@dataclass(init=True, frozen=True) +class Mode: + mode: int -@dataclass(init=False) -class update_metadata_op: - path: path - flags: int - metadata: metadata - ferrno: int - def __init__(self, path: path, flags: int, metadata: metadata, ferrno: int): - self.path = path(**path) - self.flags = int(flags) - self.metadata = metadata(**metadata) - self.ferrno = int(ferrno) +@dataclass(init=True, frozen=True) +class Ownership: + uid: int + gid: int -@dataclass(init=False) -class op_internal: - value: typing.Union[init_process_op, init_exec_epoch_op, init_thread_op, open_op, close_op, chdir_op, exec_op, clone_op, exit_op, access_op, stat_op, readdir_op, wait_op, get_rusage_op, update_metadata_op, read_link_op] - def __init__(self, **kwargs: typing.Mapping[str, typing.Any]): - if len(kwargs) != 1: - raise ValueError("Malformed Enum constructor args") - key = list(kwargs.keys())[0] - if key in self.__class__.__dict__: - self.value = self.__class__.__dict__[key](**kwargs[key]) - else: - self.value = mod.__dict__[key](**kwargs[key]) +@dataclass(init=True, frozen=True) +class Times: + is_null: bool + atime: Timeval + mtime: Timeval -@dataclass(init=False) -class op: - data: op_internal - time: timespec - def __init__(self, data: op_internal, time: timespec): - self.data = op_internal(**data) - self.time = timespec(**time) +Metadata: typing.TypeAlias = Mode | Ownership | Times +OpInternal: typing.TypeAlias = InitProcessOp | InitExecEpochOp | InitThreadOp | OpenOp | CloseOp | ChdirOp | ExecOp | CloneOp | ExitOp | AccessOp | StatOp | ReaddirOp | WaitOp | GetRUsageOp | UpdateMetadataOp | ReadLinkOp diff --git a/probe_src/probe_frontend/python/probe.py b/probe_src/probe_frontend/python/probe.py index c0d6cf9e..c9db6fa9 100644 --- a/probe_src/probe_frontend/python/probe.py +++ b/probe_src/probe_frontend/python/probe.py @@ -1,6 +1,7 @@ import typing import json +import tarfile import subprocess from . import ops @@ -9,20 +10,22 @@ def load_log(path: str) -> OpTable: ret: dict[int, dict[int, dict[int, list[ops.Op]]]] = {} + tar = tarfile.open(path, mode='r') - lines = subprocess.run( - ["probe", "dump", "--json", "--input", path], - capture_output=True, - encoding="utf-8" - ) - jsonlines = [json.loads(x) for x in lines.stdout.strip().split('\n')] + for item in tar: + # items with size zero are directories in the tarball + if item.size == 0: + continue - for item in jsonlines: - pid: int = item['pid'] - epoch: int = item['exec_epoch'] - tid: int = item['tid'] - op: ops.Op = ops.Op(**item['op']) + # extract and name the hierarchy components + parts = item.name.split("/") + if len(parts) != 3: + raise ValueError("malformed probe log") + pid: int = int(parts[0]) + epoch: int = int(parts[1]) + tid: int = int(parts[2]) + # ensure necessary dict objects have been created if not pid in ret: ret[pid] = {} if not epoch in ret[pid]: @@ -30,6 +33,35 @@ def load_log(path: str) -> OpTable: if not tid in ret[pid][epoch]: ret[pid][epoch][tid] = [] - ret[pid][epoch][tid].append(op) + # extract file contents as byte buffer + file = tar.extractfile(item) + if file is None: + raise IOError("Unable to read jsonlines from probe log") + + # read, split, comprehend, deserialize, extend + jsonlines = file.read().strip().split(b"\n") + ops = [json.loads(x, object_hook=op_hook) for x in jsonlines] + ret[pid][epoch][tid].extend(ops) return ret + +def op_hook(json_map: typing.Dict[str, typing.Any]): + ty: str = json_map["_type"] + json_map.pop("_type") + + return ops.__dict__[snake_case_to_pascal(ty)](**json_map) + +def snake_case_to_pascal(input: str) -> str: + ret: str = "" + prior_underscore: bool = True + for ch in input: + if ch == '_': + prior_underscore = True + continue + if prior_underscore: + ret += ch.upper() + else: + ret += ch + prior_underscore = False + + return ret From 698443b55cab74ed1f326003bce331d93b8321cf Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Wed, 3 Jul 2024 10:12:21 -0500 Subject: [PATCH 19/37] improved rust struct generation the structs now get converted to camel case at the rust level instead of the python level meaning python doesn't have to do any type name translation. Also strips out redundant field prefixes from the rust structs (which also solves the issue for python). --- probe_src/probe_frontend/cli/src/dump.rs | 30 +++---- probe_src/probe_frontend/flake.nix | 2 + probe_src/probe_frontend/lib/src/ops.rs | 6 +- probe_src/probe_frontend/macros/src/lib.rs | 51 ++++++++++-- probe_src/probe_frontend/macros/src/pygen.rs | 23 +---- probe_src/probe_frontend/python/ops.py | 88 ++++++++++---------- probe_src/probe_frontend/python/probe.py | 20 ++--- 7 files changed, 116 insertions(+), 104 deletions(-) diff --git a/probe_src/probe_frontend/cli/src/dump.rs b/probe_src/probe_frontend/cli/src/dump.rs index 6256829d..8befd9d6 100644 --- a/probe_src/probe_frontend/cli/src/dump.rs +++ b/probe_src/probe_frontend/cli/src/dump.rs @@ -139,45 +139,45 @@ trait Dump { fn dump(&self) -> String; } -impl Dump for ops::statx_timestamp { +impl Dump for ops::StatxTimestamp { fn dump(&self) -> String { - match DateTime::from_timestamp(self.tv_sec, self.tv_nsec) { + match DateTime::from_timestamp(self.sec, self.nsec) { Some(x) => x.to_rfc3339_opts(SecondsFormat::Secs, true), None => "[INVALID TIMESTAMP]".to_owned(), } } } -impl Dump for ops::timeval { +impl Dump for ops::Timeval { fn dump(&self) -> String { - match DateTime::from_timestamp(self.tv_sec, self.tv_usec as u32 * 1000) { + match DateTime::from_timestamp(self.sec, self.usec as u32 * 1000) { Some(x) => x.to_rfc3339_opts(SecondsFormat::Secs, true), None => "[INVALID TIMESTAMP]".to_owned(), } } } -impl Dump for ops::statx { +impl Dump for ops::Statx { fn dump(&self) -> String { format!( "[ uid={}, gid={}, mode={:#06o} ino={}, size={}, mtime={} ]", - self.stx_uid, - self.stx_gid, - self.stx_mode, - self.stx_ino, - self.stx_size, - self.stx_mtime.dump(), + self.uid, + self.gid, + self.mode, + self.ino, + self.size, + self.mtime.dump(), ) } } -impl Dump for ops::rusage { +impl Dump for ops::Rusage { fn dump(&self) -> String { format!( "[ utime={}, stime={}, maxrss={} ]", - self.ru_utime.dump(), - self.ru_stime.dump(), - self.ru_maxrss, + self.utime.dump(), + self.stime.dump(), + self.maxrss, ) } } diff --git a/probe_src/probe_frontend/flake.nix b/probe_src/probe_frontend/flake.nix index d8b4c87b..f65676b3 100644 --- a/probe_src/probe_frontend/flake.nix +++ b/probe_src/probe_frontend/flake.nix @@ -162,6 +162,8 @@ pkgs.cargo-expand pkgs.cargo-flamegraph pkgs.cargo-watch + pkgs.gdb + pkgs.python312 pkgs.rust-analyzer ]; }; diff --git a/probe_src/probe_frontend/lib/src/ops.rs b/probe_src/probe_frontend/lib/src/ops.rs index eaddfaa5..6c0f230a 100644 --- a/probe_src/probe_frontend/lib/src/ops.rs +++ b/probe_src/probe_frontend/lib/src/ops.rs @@ -108,8 +108,8 @@ pub enum Metadata { #[serde(untagged)] Times { is_null: bool, - atime: timeval, - mtime: timeval, + atime: Timeval, + mtime: Timeval, #[serde(serialize_with = "Metadata::serialize_variant_times")] #[serde(skip_deserializing)] @@ -286,7 +286,7 @@ impl FfiFrom for OpInternal { #[derive(Debug, Clone, Serialize, Deserialize, MakePyDataclass)] pub struct Op { pub data: OpInternal, - pub time: timespec, + pub time: Timespec, #[serde(serialize_with = "Op::serialize_type")] #[serde(skip_deserializing)] diff --git a/probe_src/probe_frontend/macros/src/lib.rs b/probe_src/probe_frontend/macros/src/lib.rs index 98c6c6ab..039ef5e6 100644 --- a/probe_src/probe_frontend/macros/src/lib.rs +++ b/probe_src/probe_frontend/macros/src/lib.rs @@ -33,13 +33,33 @@ pub fn make_rust_op(input: TokenStream) -> TokenStream { let field_idents = pairs.iter().map(|x| x.0).collect::>(); + let field_idents_stripped = field_idents + .iter() + .map(|old| { + let span = old.span(); + let str = old.to_string(); + let mut slice = str.as_str(); + + for prefix in ["ru_", "tv_", "stx_"] { + if let Some(stripped) = str.strip_prefix(prefix) { + slice = stripped; + break; + } + } + + Ident::new(slice, span) + }) + .collect::>(); + let field_types = pairs.into_iter().map(|x| x.1).collect::>(); let new_name = Ident::new( - ident - .to_string() - .strip_prefix("C_") - .expect("struct name doesn't start with 'C_'"), + &snake_case_to_pascal( + ident + .to_string() + .strip_prefix("C_") + .expect("struct name doesn't start with 'C_'"), + ), Span::call_site(), ); @@ -61,7 +81,7 @@ pub fn make_rust_op(input: TokenStream) -> TokenStream { quote! { #[derive(Debug, Clone, Serialize, Deserialize, MakePyDataclass)] pub struct #new_name { - #(pub #field_idents: #field_types,)* + #(pub #field_idents_stripped: #field_types,)* /// this is a placeholder field that get's serialized as the type name #[serde(serialize_with = #serialize_type_path)] @@ -83,7 +103,7 @@ pub fn make_rust_op(input: TokenStream) -> TokenStream { Ok(Self { _type: (), #( - #field_idents: value.#field_idents + #field_idents_stripped: value.#field_idents .ffi_into(ctx) .map_err(|e| { ProbeError::FFiConversionError { @@ -112,7 +132,8 @@ fn convert_bindgen_type(ty: &syn::Type) -> syn::Type { } syn::Type::Path(inner) => { if let Some(name) = type_basename(inner).to_string().strip_prefix("C_") { - let name = Ident::new(name, Span::mixed_site()); + let name = snake_case_to_pascal(name); + let name = Ident::new(&name, Span::mixed_site()); parse_quote!(#name) } else { Type::Path(inner.clone()) @@ -130,6 +151,22 @@ pub(crate) fn type_basename(ty: &syn::TypePath) -> &syn::Ident { &ty.path.segments.last().expect("type has no segments").ident } +pub(crate) fn snake_case_to_pascal(input: &str) -> String { + input + .chars() + .fold((true, String::new()), |(prior_underscore, mut acc), ch| { + if ch == '_' { + return (true, acc); + } else if prior_underscore { + ch.to_uppercase().for_each(|x| acc.push(x)) + } else { + acc.push(ch) + } + (false, acc) + }) + .1 +} + // TODO: return compiler error instead of panicking on error #[proc_macro_derive(MakePyDataclass)] pub fn make_py_dataclass(input: TokenStream) -> TokenStream { diff --git a/probe_src/probe_frontend/macros/src/pygen.rs b/probe_src/probe_frontend/macros/src/pygen.rs index 7f7e2714..954260eb 100644 --- a/probe_src/probe_frontend/macros/src/pygen.rs +++ b/probe_src/probe_frontend/macros/src/pygen.rs @@ -12,7 +12,6 @@ fn pygen_file() -> &'static RwLock { pub fn make_py_dataclass_internal(input: syn::DeriveInput) { let syn::DeriveInput { data, ident, .. } = input.clone(); - let ident = snake_case_to_pascal(&ident.to_string()); match data { Data::Struct(data_struct) => { @@ -39,11 +38,11 @@ pub fn make_py_dataclass_internal(input: syn::DeriveInput) { }) .collect::>(); - let dataclass = basic_dataclass(ident, &pairs); + let dataclass = basic_dataclass(ident.to_string(), &pairs); pygen_file().write().add_class(dataclass); } Data::Enum(data_enum) => { - let mut enu = Enum::new(ident); + let mut enu = Enum::new(ident.to_string()); // this is the types that the produced union is over let mut variants = vec![]; @@ -126,29 +125,13 @@ fn convert_to_pytype(ty: &syn::Type) -> String { // bool types are basically the same everywhere "bool" => name, - _ => snake_case_to_pascal(&name), + _ => name, } } _ => unimplemented!("unsupported type type"), } } -pub(crate) fn snake_case_to_pascal(input: &str) -> String { - input - .chars() - .fold((true, String::new()), |(prior_underscore, mut acc), ch| { - if ch == '_' { - return (true, acc); - } else if prior_underscore { - ch.to_uppercase().for_each(|x| acc.push(x)) - } else { - acc.push(ch) - } - (false, acc) - }) - .1 -} - pub(crate) fn write_pygen_internal(path: syn::LitStr) { let path = path.value(); let path = std::env::var_os(&path) diff --git a/probe_src/probe_frontend/python/ops.py b/probe_src/probe_frontend/python/ops.py index 947d6fe7..a60e148f 100644 --- a/probe_src/probe_frontend/python/ops.py +++ b/probe_src/probe_frontend/python/ops.py @@ -5,62 +5,62 @@ @dataclass(init=True, frozen=True) class Timespec: - tv_sec: int - tv_nsec: int + sec: int + nsec: int @dataclass(init=True, frozen=True) class StatxTimestamp: - tv_sec: int - tv_nsec: int + sec: int + nsec: int @dataclass(init=True, frozen=True) class Statx: - stx_mask: int - stx_blksize: int - stx_attributes: int - stx_nlink: int - stx_uid: int - stx_gid: int - stx_mode: int - stx_ino: int - stx_size: int - stx_blocks: int - stx_attributes_mask: int - stx_atime: StatxTimestamp - stx_btime: StatxTimestamp - stx_ctime: StatxTimestamp - stx_mtime: StatxTimestamp - stx_rdev_major: int - stx_rdev_minor: int - stx_dev_major: int - stx_dev_minor: int - stx_mnt_id: int - stx_dio_mem_align: int - stx_dio_offset_align: int + mask: int + blksize: int + attributes: int + nlink: int + uid: int + gid: int + mode: int + ino: int + size: int + blocks: int + attributes_mask: int + atime: StatxTimestamp + btime: StatxTimestamp + ctime: StatxTimestamp + mtime: StatxTimestamp + rdev_major: int + rdev_minor: int + dev_major: int + dev_minor: int + mnt_id: int + dio_mem_align: int + dio_offset_align: int @dataclass(init=True, frozen=True) class Timeval: - tv_sec: int - tv_usec: int + sec: int + usec: int @dataclass(init=True, frozen=True) class Rusage: - ru_utime: Timeval - ru_stime: Timeval - ru_maxrss: int - ru_ixrss: int - ru_idrss: int - ru_isrss: int - ru_minflt: int - ru_majflt: int - ru_nswap: int - ru_inblock: int - ru_oublock: int - ru_msgsnd: int - ru_msgrcv: int - ru_nsignals: int - ru_nvcsw: int - ru_nivcsw: int + utime: Timeval + stime: Timeval + maxrss: int + ixrss: int + idrss: int + isrss: int + minflt: int + majflt: int + nswap: int + inblock: int + oublock: int + msgsnd: int + msgrcv: int + nsignals: int + nvcsw: int + nivcsw: int @dataclass(init=True, frozen=True) class Path: diff --git a/probe_src/probe_frontend/python/probe.py b/probe_src/probe_frontend/python/probe.py index c9db6fa9..3ae28714 100644 --- a/probe_src/probe_frontend/python/probe.py +++ b/probe_src/probe_frontend/python/probe.py @@ -2,7 +2,6 @@ import typing import json import tarfile -import subprocess from . import ops OpTable = typing.Mapping[int, typing.Mapping[int, typing.Mapping[int, typing.List[ops.Op]]]] @@ -49,19 +48,10 @@ def op_hook(json_map: typing.Dict[str, typing.Any]): ty: str = json_map["_type"] json_map.pop("_type") - return ops.__dict__[snake_case_to_pascal(ty)](**json_map) + constructor = ops.__dict__[ty] -def snake_case_to_pascal(input: str) -> str: - ret: str = "" - prior_underscore: bool = True - for ch in input: - if ch == '_': - prior_underscore = True - continue - if prior_underscore: - ret += ch.upper() - else: - ret += ch - prior_underscore = False + for ident, ty in constructor.__annotations__.items(): + if ty == "bytes" and ident in json_map: + json_map[ident] = bytes(json_map[ident]) - return ret + return constructor(**json_map) From 7e9072db129caf70785ec3c6158db9266c309f95 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Wed, 3 Jul 2024 19:48:50 -0500 Subject: [PATCH 20/37] pygen custom `@property`s --- probe_src/probe_frontend/.envrc | 2 - probe_src/probe_frontend/flake.nix | 7 +- probe_src/probe_frontend/lib/src/ops.rs | 27 ++++-- probe_src/probe_frontend/macros/src/lib.rs | 84 ++++++++++++++--- probe_src/probe_frontend/macros/src/pygen.rs | 96 ++++++++++++++++---- probe_src/probe_frontend/python/ops.py | 32 +++++++ 6 files changed, 205 insertions(+), 43 deletions(-) diff --git a/probe_src/probe_frontend/.envrc b/probe_src/probe_frontend/.envrc index 76f69446..86255ac0 100644 --- a/probe_src/probe_frontend/.envrc +++ b/probe_src/probe_frontend/.envrc @@ -1,4 +1,2 @@ use_flake -(cd ../libprobe && make) -export __PROBE_LOG=info diff --git a/probe_src/probe_frontend/flake.nix b/probe_src/probe_frontend/flake.nix index f65676b3..55c8d114 100644 --- a/probe_src/probe_frontend/flake.nix +++ b/probe_src/probe_frontend/flake.nix @@ -64,7 +64,7 @@ // { inherit cargoArtifacts; inherit (craneLib.crateNameFromCargoToml {inherit src;}) version; - # NB: we disable tests since we'll run them all via cargo-nextest + # disable tests since we'll run them all via cargo-nextest doCheck = false; }; @@ -129,8 +129,8 @@ }; # Run tests with cargo-nextest - # this is why `doCheck = false` on other crate derivations, to not run - # the tests twice. + # this is why `doCheck = false` on the crate derivations, so as to not + # run the tests twice. probe-workspace-nextest = craneLib.cargoNextest (commonArgs // { inherit cargoArtifacts; @@ -163,7 +163,6 @@ pkgs.cargo-flamegraph pkgs.cargo-watch pkgs.gdb - pkgs.python312 pkgs.rust-analyzer ]; }; diff --git a/probe_src/probe_frontend/lib/src/ops.rs b/probe_src/probe_frontend/lib/src/ops.rs index 6c0f230a..0e1634fb 100644 --- a/probe_src/probe_frontend/lib/src/ops.rs +++ b/probe_src/probe_frontend/lib/src/ops.rs @@ -4,7 +4,7 @@ use crate::error::{ProbeError, Result}; use crate::transcribe::ArenaContext; -use probe_macros::{MakePyDataclass, MakeRustOp}; +use probe_macros::{MakeRustOp, PygenDataclass}; use serde::{Deserialize, Serialize}; use std::ffi::CString; @@ -86,7 +86,7 @@ include!(concat!(env!("OUT_DIR"), "/bindings.rs")); // implemented, this is somewhat confusing since they extensively use types and trait // implementations that are auto-generated. -#[derive(Debug, Clone, Serialize, Deserialize, MakePyDataclass)] +#[derive(Debug, Clone, Serialize, Deserialize, PygenDataclass)] pub enum Metadata { #[serde(untagged)] Mode { @@ -168,7 +168,7 @@ impl FfiFrom for Metadata { } } -#[derive(Debug, Clone, Serialize, Deserialize, MakePyDataclass)] +#[derive(Debug, Clone, Serialize, Deserialize, PygenDataclass)] pub struct UpdateMetadataOp { pub path: Path, pub flags: ::std::os::raw::c_int, @@ -207,7 +207,7 @@ impl FfiFrom for UpdateMetadataOp { } } -#[derive(Debug, Clone, Serialize, Deserialize, MakePyDataclass)] +#[derive(Debug, Clone, Serialize, Deserialize, PygenDataclass)] pub enum OpInternal { #[serde(untagged)] InitProcessOp(InitProcessOp), @@ -283,7 +283,7 @@ impl FfiFrom for OpInternal { } } -#[derive(Debug, Clone, Serialize, Deserialize, MakePyDataclass)] +#[derive(Debug, Clone, Serialize, Deserialize, PygenDataclass)] pub struct Op { pub data: OpInternal, pub time: Timespec, @@ -313,7 +313,16 @@ impl FfiFrom for Op { } } -// WARNING: this macro invokation must come after all structs that implement MakePyDataclass -// (including classes that implement MakeRustOp, who's daughter classes implement MakePyDataclass) -// for python codegen to work properly -probe_macros::write_pygen_file_from_env!("PYGEN_OUTFILE"); +probe_macros::pygen_add_preamble!("AT_FDCWD: int = -100"); +#[test] +fn at_fdcwd_sanity_check() { + assert_eq!(libc::AT_FDCWD, -100); +} + +probe_macros::pygen_add_prop!(Path impl dirfd -> int: + "return self.dirfd_minus_at_fdcwd + AT_FDCWD" +); + +// WARNING: this macro invocation must come after all other pygen calls for those calls to be +// included in the written file +probe_macros::pygen_write_to_env!("PYGEN_OUTFILE"); diff --git a/probe_src/probe_frontend/macros/src/lib.rs b/probe_src/probe_frontend/macros/src/lib.rs index 039ef5e6..470f8202 100644 --- a/probe_src/probe_frontend/macros/src/lib.rs +++ b/probe_src/probe_frontend/macros/src/lib.rs @@ -1,8 +1,9 @@ use proc_macro::TokenStream; use proc_macro2::Span; use quote::quote; -use syn::parse_quote; +use syn::parse::Parse; use syn::{parse_macro_input, Data, DeriveInput, Fields, Ident, Type}; +use syn::{parse_quote, LitStr, Token}; mod pygen; @@ -79,7 +80,7 @@ pub fn make_rust_op(input: TokenStream) -> TokenStream { // This is rather bad macro hygiene, but this macro is only intend for probe_frontend's // op struct generation, so we're playing a little fast-n'-loose with scoping. quote! { - #[derive(Debug, Clone, Serialize, Deserialize, MakePyDataclass)] + #[derive(Debug, Clone, Serialize, Deserialize, PygenDataclass)] pub struct #new_name { #(pub #field_idents_stripped: #field_types,)* @@ -143,7 +144,7 @@ fn convert_bindgen_type(ty: &syn::Type) -> syn::Type { } } -pub(crate) fn type_basename(ty: &syn::TypePath) -> &syn::Ident { +fn type_basename(ty: &syn::TypePath) -> &syn::Ident { if ty.qself.is_some() { unimplemented!("qualified self-typs not supported"); } @@ -151,7 +152,7 @@ pub(crate) fn type_basename(ty: &syn::TypePath) -> &syn::Ident { &ty.path.segments.last().expect("type has no segments").ident } -pub(crate) fn snake_case_to_pascal(input: &str) -> String { +fn snake_case_to_pascal(input: &str) -> String { input .chars() .fold((true, String::new()), |(prior_underscore, mut acc), ch| { @@ -168,19 +169,80 @@ pub(crate) fn snake_case_to_pascal(input: &str) -> String { } // TODO: return compiler error instead of panicking on error -#[proc_macro_derive(MakePyDataclass)] -pub fn make_py_dataclass(input: TokenStream) -> TokenStream { +#[proc_macro_derive(PygenDataclass)] +pub fn pygen_dataclass(input: TokenStream) -> TokenStream { let source = parse_macro_input!(input as DeriveInput); - pygen::make_py_dataclass_internal(source); + pygen::pygen_dataclass_internal(source); // return empty token stream, we're not actually writing rust here TokenStream::new() } // TODO: return compiler error instead of panicking on error #[proc_macro] -pub fn write_pygen_file_from_env(item: TokenStream) -> TokenStream { - let path = parse_macro_input!(item as syn::LitStr); - pygen::write_pygen_internal(path); - // return empty token stream, we're not actually writing rust here +pub fn pygen_write_to_env(input: TokenStream) -> TokenStream { + let path = parse_macro_input!(input as syn::LitStr); + pygen::pygen_write_internal(path); + TokenStream::new() +} + +// TODO: return compiler error instead of panicking on error +#[proc_macro] +pub fn pygen_add_prop(input: TokenStream) -> TokenStream { + let args = parse_macro_input!(input as AddPropArgs); + pygen::pygen_add_prop_internal(args); + TokenStream::new() +} + +pub(crate) struct AddPropArgs { + class: Ident, + name: Ident, + ret: Ident, + body: Vec, +} + +impl Parse for AddPropArgs { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + let class = input.parse()?; + input.parse::()?; + let name = input.parse()?; + input.parse::]>()?; + let ret = input.parse()?; + input.parse::()?; + + let mut body = vec![]; + body.push(input.parse::()?.value()); + while !input.is_empty() { + input.parse::()?; + body.push(input.parse::()?.value()); + } + + Ok(Self { + class, + name, + ret, + body, + }) + } +} + +#[proc_macro] +pub fn pygen_add_preamble(input: TokenStream) -> TokenStream { + let args = parse_macro_input!(input as AddPreambleArgs); + pygen::pygen_add_preamble(args); TokenStream::new() } + +pub(crate) struct AddPreambleArgs(pub Vec); + +impl Parse for AddPreambleArgs { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + let mut lines = vec![]; + lines.push(input.parse::()?.value()); + while !input.is_empty() { + input.parse::()?; + lines.push(input.parse::()?.value()); + } + + Ok(Self(lines)) + } +} diff --git a/probe_src/probe_frontend/macros/src/pygen.rs b/probe_src/probe_frontend/macros/src/pygen.rs index 954260eb..52497fc5 100644 --- a/probe_src/probe_frontend/macros/src/pygen.rs +++ b/probe_src/probe_frontend/macros/src/pygen.rs @@ -10,7 +10,7 @@ fn pygen_file() -> &'static RwLock { INNER.get_or_init(|| RwLock::new(PygenFile::new())) } -pub fn make_py_dataclass_internal(input: syn::DeriveInput) { +pub fn pygen_dataclass_internal(input: syn::DeriveInput) { let syn::DeriveInput { data, ident, .. } = input.clone(); match data { @@ -39,7 +39,7 @@ pub fn make_py_dataclass_internal(input: syn::DeriveInput) { .collect::>(); let dataclass = basic_dataclass(ident.to_string(), &pairs); - pygen_file().write().add_class(dataclass); + pygen_file().write().classes.push(dataclass); } Data::Enum(data_enum) => { let mut enu = Enum::new(ident.to_string()); @@ -85,7 +85,7 @@ pub fn make_py_dataclass_internal(input: syn::DeriveInput) { } } - pygen_file().write().add_enum(enu); + pygen_file().write().enums.push(enu); } Data::Union(_data_union) => unimplemented!(), }; @@ -132,7 +132,7 @@ fn convert_to_pytype(ty: &syn::Type) -> String { } } -pub(crate) fn write_pygen_internal(path: syn::LitStr) { +pub(crate) fn pygen_write_internal(path: syn::LitStr) { let path = path.value(); let path = std::env::var_os(&path) .unwrap_or_else(|| panic!("Environment variable '{}' not defined", path)); @@ -159,11 +159,29 @@ pub(crate) fn write_pygen_internal(path: syn::LitStr) { writeln!(file, "{}", pygen_file().read()).expect("Failed to write pygen file"); } +pub(crate) fn pygen_add_prop_internal(args: crate::AddPropArgs) { + let class = args.class.to_string(); + let mut prop = DataclassProp::new(args.name.to_string(), args.ret.to_string()); + args.body.into_iter().for_each(|x| prop.body.push(x)); + + for dataclass in pygen_file().write().classes.iter_mut() { + if dataclass.name != class { + continue; + } + + dataclass.add_prop(prop.clone()); + } +} + +pub(crate) fn pygen_add_preamble(args: crate::AddPreambleArgs) { + pygen_file().write().append_preamble(args.0) +} + #[derive(Debug, Clone)] struct PygenFile { preamble: Vec, - classes: Vec, - enums: Vec, + pub classes: Vec, + pub enums: Vec, } #[derive(Debug, Clone)] @@ -181,6 +199,7 @@ struct Dataclass { pub name: String, inclasses: Vec, items: Vec, + properties: Vec, } #[derive(Debug, Clone)] @@ -190,6 +209,14 @@ struct DataclassItem { ty: String, } +#[derive(Debug, Clone)] +struct DataclassProp { + indent: usize, + name: String, + ret: String, + pub body: Vec, +} + #[allow(dead_code)] impl PygenFile { pub fn new() -> Self { @@ -200,14 +227,6 @@ impl PygenFile { } } - pub fn add_class(&mut self, class: Dataclass) { - self.classes.push(class); - } - - pub fn add_enum(&mut self, enu: Enum) { - self.enums.push(enu); - } - pub fn prepend_preamble(&mut self, mut lines: Vec) { lines.extend(std::mem::take(&mut self.preamble)); self.preamble = lines; @@ -264,6 +283,7 @@ impl Dataclass { name, inclasses: vec![], items: vec![], + properties: vec![], } } @@ -277,6 +297,11 @@ impl Dataclass { self.items.push(item) } + pub fn add_prop(&mut self, mut prop: DataclassProp) { + prop.set_indent(self.indent + 4); + self.properties.push(prop) + } + pub fn set_indent(&mut self, indent: usize) { for inclass in &mut self.inclasses { inclass.set_indent(indent + 4); @@ -303,6 +328,21 @@ impl DataclassItem { } } +impl DataclassProp { + pub fn new(name: String, ret: String) -> Self { + Self { + indent: 0, + name, + ret, + body: vec![], + } + } + + pub fn set_indent(&mut self, indent: usize) { + self.indent = indent; + } +} + // Display trait implementations for actual codegen impl Display for PygenFile { @@ -312,6 +352,7 @@ impl Display for PygenFile { for line in self.preamble.iter() { writeln!(f, "{line}")?; } + writeln!(f)?; for class in self.classes.iter() { writeln!(f, "{class}")?; @@ -370,22 +411,24 @@ impl Display for Dataclass { let name = self.name.as_str(); let indent_str = " ".repeat(self.indent); - // write class signature writeln!( f, "{indent_str}@dataclass(init=True, frozen=True)\n\ {indent_str}class {name}:" )?; - // write inner class definitions for inclass in &self.inclasses { writeln!(f, "{inclass}",)?; } - // write dataclass fields for item in &self.items { writeln!(f, "{item}")?; } + writeln!(f)?; + + for prop in &self.properties { + writeln!(f, "{prop}")?; + } Ok(()) } @@ -398,3 +441,22 @@ impl Display for DataclassItem { write!(f, "{indent_str}{name}: {ty}") } } + +impl Display for DataclassProp { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let &Self { name, ret, .. } = &self; + let indent_str = " ".repeat(self.indent); + + writeln!( + f, + "{indent_str}@property\n\ + {indent_str}def {name}() -> {ret}:", + )?; + + for line in &self.body { + writeln!(f, "{indent_str} {line}")?; + } + + Ok(()) + } +} diff --git a/probe_src/probe_frontend/python/ops.py b/probe_src/probe_frontend/python/ops.py index a60e148f..03549cc0 100644 --- a/probe_src/probe_frontend/python/ops.py +++ b/probe_src/probe_frontend/python/ops.py @@ -3,16 +3,20 @@ import typing from dataclasses import dataclass +AT_FDCWD: int = -100 + @dataclass(init=True, frozen=True) class Timespec: sec: int nsec: int + @dataclass(init=True, frozen=True) class StatxTimestamp: sec: int nsec: int + @dataclass(init=True, frozen=True) class Statx: mask: int @@ -38,11 +42,13 @@ class Statx: dio_mem_align: int dio_offset_align: int + @dataclass(init=True, frozen=True) class Timeval: sec: int usec: int + @dataclass(init=True, frozen=True) class Rusage: utime: Timeval @@ -62,6 +68,7 @@ class Rusage: nvcsw: int nivcsw: int + @dataclass(init=True, frozen=True) class Path: dirfd_minus_at_fdcwd: int @@ -74,19 +81,27 @@ class Path: stat_valid: bool dirfd_valid: bool + @property + def dirfd() -> int: + return self.dirfd_minus_at_fdcwd + AT_FDCWD + + @dataclass(init=True, frozen=True) class InitProcessOp: pid: int + @dataclass(init=True, frozen=True) class InitExecEpochOp: epoch: int program_name: bytes + @dataclass(init=True, frozen=True) class InitThreadOp: tid: int + @dataclass(init=True, frozen=True) class OpenOp: path: Path @@ -95,22 +110,26 @@ class OpenOp: fd: int ferrno: int + @dataclass(init=True, frozen=True) class CloseOp: low_fd: int high_fd: int ferrno: int + @dataclass(init=True, frozen=True) class ChdirOp: path: Path ferrno: int + @dataclass(init=True, frozen=True) class ExecOp: path: Path ferrno: int + @dataclass(init=True, frozen=True) class CloneOp: flags: int @@ -119,11 +138,13 @@ class CloneOp: child_thread_id: int ferrno: int + @dataclass(init=True, frozen=True) class ExitOp: status: int run_atexit_handlers: bool + @dataclass(init=True, frozen=True) class AccessOp: path: Path @@ -131,6 +152,7 @@ class AccessOp: flags: int ferrno: int + @dataclass(init=True, frozen=True) class StatOp: path: Path @@ -138,6 +160,7 @@ class StatOp: statx_buf: Statx ferrno: int + @dataclass(init=True, frozen=True) class ReaddirOp: dir: Path @@ -145,6 +168,7 @@ class ReaddirOp: all_children: bool ferrno: int + @dataclass(init=True, frozen=True) class WaitOp: pid: int @@ -153,6 +177,7 @@ class WaitOp: ret: int ferrno: int + @dataclass(init=True, frozen=True) class GetRUsageOp: waitpid_arg: int @@ -160,12 +185,14 @@ class GetRUsageOp: usage: Rusage ferrno: int + @dataclass(init=True, frozen=True) class ReadLinkOp: path: Path resolved: bytes ferrno: int + @dataclass(init=True, frozen=True) class UpdateMetadataOp: path: Path @@ -173,26 +200,31 @@ class UpdateMetadataOp: metadata: Metadata ferrno: int + @dataclass(init=True, frozen=True) class Op: data: OpInternal time: Timespec + @dataclass(init=True, frozen=True) class Mode: mode: int + @dataclass(init=True, frozen=True) class Ownership: uid: int gid: int + @dataclass(init=True, frozen=True) class Times: is_null: bool atime: Timeval mtime: Timeval + Metadata: typing.TypeAlias = Mode | Ownership | Times OpInternal: typing.TypeAlias = InitProcessOp | InitExecEpochOp | InitThreadOp | OpenOp | CloseOp | ChdirOp | ExecOp | CloneOp | ExitOp | AccessOp | StatOp | ReaddirOp | WaitOp | GetRUsageOp | UpdateMetadataOp | ReadLinkOp From 5ad8f9b69eb04352e2cd7f4be44da70477ffafc3 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Wed, 3 Jul 2024 22:03:09 -0500 Subject: [PATCH 21/37] improved arg parsing --- probe_src/probe_frontend/.envrc | 1 + probe_src/probe_frontend/Cargo.lock | 19 --- probe_src/probe_frontend/cli/Cargo.toml | 2 +- probe_src/probe_frontend/cli/src/main.rs | 202 ++++++++++++----------- probe_src/probe_frontend/lib/src/ops.rs | 6 +- probe_src/probe_frontend/python/ops.py | 1 + 6 files changed, 111 insertions(+), 120 deletions(-) diff --git a/probe_src/probe_frontend/.envrc b/probe_src/probe_frontend/.envrc index 86255ac0..36551f7f 100644 --- a/probe_src/probe_frontend/.envrc +++ b/probe_src/probe_frontend/.envrc @@ -1,2 +1,3 @@ use_flake +export __PROBE_LOG=info diff --git a/probe_src/probe_frontend/Cargo.lock b/probe_src/probe_frontend/Cargo.lock index 713b236c..e8657e00 100644 --- a/probe_src/probe_frontend/Cargo.lock +++ b/probe_src/probe_frontend/Cargo.lock @@ -211,7 +211,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5db83dced34638ad474f39f250d7fea9598bdd239eaced1bdf45d597da0f433f" dependencies = [ "clap_builder", - "clap_derive", ] [[package]] @@ -226,18 +225,6 @@ dependencies = [ "strsim", ] -[[package]] -name = "clap_derive" -version = "4.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c780290ccf4fb26629baa7a1081e68ced113f1d3ec302fa5948f1c381ebf06c6" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "clap_lex" version = "0.7.1" @@ -452,12 +439,6 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - [[package]] name = "home" version = "0.5.9" diff --git a/probe_src/probe_frontend/cli/Cargo.toml b/probe_src/probe_frontend/cli/Cargo.toml index 5a3add18..e6802e0c 100644 --- a/probe_src/probe_frontend/cli/Cargo.toml +++ b/probe_src/probe_frontend/cli/Cargo.toml @@ -11,7 +11,7 @@ path = "src/main.rs" [dependencies] chrono = "0.4.38" -clap = { version = "4.5.7", features = ["derive"] } +clap = { version = "4.5.7", features = ["cargo"] } color-eyre = "0.6.3" env_logger = "0.11.3" flate2 = "1.0.30" diff --git a/probe_src/probe_frontend/cli/src/main.rs b/probe_src/probe_frontend/cli/src/main.rs index 2f26004c..4a0ed526 100644 --- a/probe_src/probe_frontend/cli/src/main.rs +++ b/probe_src/probe_frontend/cli/src/main.rs @@ -1,7 +1,7 @@ use std::{ffi::OsString, fs::File}; -use clap::Parser; -use color_eyre::eyre::{Context, Result}; +use clap::{arg, command, value_parser, Command}; +use color_eyre::eyre::{eyre, Context, Result}; use flate2::Compression; /// Output the ops from a probe log file to stdout. @@ -16,110 +16,114 @@ mod transcribe; /// Utility code for creating temporary directories. mod util; -/// Generate or manipulate Provenance for Replay OBservation Engine (PROBE) logs. -#[derive(clap::Parser, Debug, Clone)] -#[command(author, version, about, long_about = None)] -#[command(propagate_version = true)] -struct Cli { - #[command(subcommand)] - command: Command, -} - -#[derive(clap::Subcommand, Debug, Clone)] -enum Command { - /// Execute a command and record its provenance - Record { - /// Path to output to - #[arg(short, long)] - output: Option, - - /// Overwrite existing output directory if it exists - #[arg(short = 'f', long)] - overwrite: bool, - - /// emit PROBE record rather than PROBE log. - #[arg(short, long)] - no_transcribe: bool, - - /// Run in gdb - #[arg(long)] - gdb: bool, - - /// Run in verbose & debug build of libprobe - #[arg(long)] - debug: bool, - - /// Command to execute under provenance - #[arg(required = true)] - cmd: Vec, - }, - - /// Convert PROBE records to PROBE logs. - Transcribe { - /// Overwrite existing output directory if it exists - #[arg(short = 'f', long)] - overwrite: bool, - - /// Path to write the transcribed PROBE log. - #[arg(short, long, required = false, default_value = "probe_log")] - output: OsString, - - /// Path to read the PROBE record from. - #[arg(short, long, required = false, default_value = "probe_record")] - input: OsString, - }, - - /// Write the data from probe log data in a human-readable manner - Dump { - /// output json - #[arg(long)] - json: bool, - - /// Path to load PROBE log from - #[arg(short, long, required = false, default_value = "probe_log")] - input: OsString, - }, -} - fn main() -> Result<()> { color_eyre::install()?; env_logger::Builder::from_env(env_logger::Env::new().filter_or("__PROBE_LOG", "warn")).init(); log::debug!("Logger initialized"); - match Cli::parse().command { - Command::Record { - output, - overwrite, - no_transcribe, - gdb, - debug, - cmd, - } => if no_transcribe { - record::record_no_transcribe(output, overwrite, gdb, debug, cmd) - } else { - record::record_transcribe(output, overwrite, gdb, debug, cmd) + let matches = command!() + .about("Generate or manipulate Provenance for Replay OBservation Engine (PROBE) logs.") + .subcommands([ + Command::new("record") + .args([ + arg!(-o --output "Set destinaton for recording.") + .required(false) + .value_parser(value_parser!(OsString)), + arg!(-f --overwrite "Overwrite existing output if it exists.") + .required(false) + .value_parser(value_parser!(bool)), + arg!(-n --"no-transcribe" "Emit PROBE record rather than PROBE log.") + .required(false) + .value_parser(value_parser!(bool)), + arg!(--gdb "Run under gdb.") + .required(false) + .value_parser(value_parser!(bool)), + arg!(--debug "Run in verbose & debug build of libprobe.") + .required(false) + .value_parser(value_parser!(bool)), + arg!( ... "Command to execute under provenance.") + .required(true) + .trailing_var_arg(true) + .value_parser(value_parser!(OsString)), + ]) + .about("Execute a command and record its provenance"), + Command::new("transcribe") + .args([ + arg!(-f --overwrite "Overwrite existing output if it exists.") + .required(false) + .value_parser(value_parser!(bool)), + arg!(-o --output "Path to write the transcribed PROBE log.") + .required(false) + .default_value("probe_log") + .value_parser(value_parser!(OsString)), + arg!(-i --input "Path to read the PROBE record from.") + .required(false) + .default_value("probe_record") + .value_parser(value_parser!(OsString)), + ]) + .about("Convert PROBE records to PROBE logs."), + Command::new("dump") + .args([ + arg!(--json "Output JSON.") + .required(false) + .value_parser(value_parser!(bool)), + arg!(-i --input "Path to load PROBE log from.") + .required(false) + .default_value("probe_log") + .value_parser(value_parser!(OsString)), + ]) + .about("Write the data from probe log data in a human-readable manne"), + ]) + .get_matches(); + + match matches.subcommand() { + Some(("record", sub)) => { + let output = sub.get_one::("output").cloned(); + let overwrite = sub.get_flag("overwrite"); + let no_transcribe = sub.get_flag("no-transcribe"); + let gdb = sub.get_flag("gdb"); + let debug = sub.get_flag("debug"); + let cmd = sub + .get_many::("CMD") + .unwrap() + .cloned() + .collect::>(); + + if no_transcribe { + record::record_no_transcribe(output, overwrite, gdb, debug, cmd) + } else { + record::record_transcribe(output, overwrite, gdb, debug, cmd) + } + .wrap_err("Record command failed") } - .wrap_err("Record command failed"), - - Command::Transcribe { - overwrite, - output, - input, - } => if overwrite { - File::create(&output) - } else { - File::create_new(&output) + Some(("transcribe", sub)) => { + let overwrite = sub.get_flag("overwrite"); + let output = sub.get_one::("output").unwrap().clone(); + let input = sub.get_one::("input").unwrap().clone(); + + if overwrite { + File::create(&output) + } else { + File::create_new(&output) + } + .wrap_err("Failed to create output file") + .map(|file| { + tar::Builder::new(flate2::write::GzEncoder::new(file, Compression::default())) + }) + .and_then(|mut tar| transcribe::transcribe(input, &mut tar)) + .wrap_err("Transcribe command failed") } - .wrap_err("Failed to create output file") - .map(|file| tar::Builder::new(flate2::write::GzEncoder::new(file, Compression::default()))) - .and_then(|mut tar| transcribe::transcribe(input, &mut tar)) - .wrap_err("Transcribe command failed"), - - Command::Dump { json, input } => if json { - dump::to_stdout_json(input) - } else { - dump::to_stdout(input) + Some(("dump", sub)) => { + let json = sub.get_flag("json"); + let input = sub.get_one::("input").unwrap().clone(); + + if json { + dump::to_stdout_json(input) + } else { + dump::to_stdout(input) + } + .wrap_err("Dump command failed") } - .wrap_err("Dump command failed"), + _ => Err(eyre!("unexpected subcommand")), } } diff --git a/probe_src/probe_frontend/lib/src/ops.rs b/probe_src/probe_frontend/lib/src/ops.rs index 0e1634fb..dad9222a 100644 --- a/probe_src/probe_frontend/lib/src/ops.rs +++ b/probe_src/probe_frontend/lib/src/ops.rs @@ -313,7 +313,11 @@ impl FfiFrom for Op { } } -probe_macros::pygen_add_preamble!("AT_FDCWD: int = -100"); +probe_macros::pygen_add_preamble!( + "# https://github.com/torvalds/linux/blob/\ + 73e931504f8e0d42978bfcda37b323dbbd1afc08/include/uapi/linux/fcntl.h#L98", + "AT_FDCWD: int = -100" +); #[test] fn at_fdcwd_sanity_check() { assert_eq!(libc::AT_FDCWD, -100); diff --git a/probe_src/probe_frontend/python/ops.py b/probe_src/probe_frontend/python/ops.py index 03549cc0..33a2bd4c 100644 --- a/probe_src/probe_frontend/python/ops.py +++ b/probe_src/probe_frontend/python/ops.py @@ -3,6 +3,7 @@ import typing from dataclasses import dataclass +# https://github.com/torvalds/linux/blob/73e931504f8e0d42978bfcda37b323dbbd1afc08/include/uapi/linux/fcntl.h#L98 AT_FDCWD: int = -100 @dataclass(init=True, frozen=True) From 3bf3d4457214ed58ca298d9c6f86661a8ecb7445 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Wed, 3 Jul 2024 22:56:47 -0500 Subject: [PATCH 22/37] improved readme --- probe_src/probe_frontend/README.md | 33 ++++++++++++++-------- probe_src/probe_frontend/cli/src/record.rs | 4 +-- probe_src/probe_frontend/macros/src/lib.rs | 12 ++++++++ 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/probe_src/probe_frontend/README.md b/probe_src/probe_frontend/README.md index 1134f68f..5cac5b4e 100644 --- a/probe_src/probe_frontend/README.md +++ b/probe_src/probe_frontend/README.md @@ -9,17 +9,13 @@ The documentation in this project assumes the reader understands a couple pieces of terminology specific to this tool. - **Probe record** (or probe recording) -This is a directory (`probe_record` by default) that contains raw arena -allocator `*.dat` files created by libprobe, these files contain -[mmap(2)](https://www.man7.org/linux/man-pages/man2/mmap.2.html)-ed c structures -and are not guaranteed to valid if moved to a computer with a different -architecture, kernel version, or c compiler (or if any of those things change on -the same computer). +This is an itermediate representation when creating a probe log. (see the section +on serialization formats for more details) - **Probe log** -This is a directory or file (`probe_log` by default) that encodes the data -from a probe record in a format that is cross-platform and much easier to use. -(see the section on serialization format for details). +This is a file (`probe_log` by default) that encodes the data from a probe +record in a format that is cross-platform and much easier to use. (see the +section on serialization format for details) - **Transcription** This is the process of converting a probe record to a probe log. @@ -50,7 +46,12 @@ The transcription process can take a while after the program exits, if you don't want to automatically transcribe the record, you can pass the `-n` flag, this will change the default output path from `probe_log` to `probe_record`, and will output a probe record directory that can be transcribed to a probe log later -with the `probe transcribe` command. +with the `probe transcribe` command, however the probe record format is not +stable, users are strongly encouraged to have `probe record` automatically +transcribe the record directory immediately after the process exits. If you do +seperate the trancription step from recording, then transcription **must** be +done on the same machine with the exact same version of the cli (and other +constraints, see the section on serialization format for more details). ### Subshells @@ -64,10 +65,11 @@ If you need these you can either write a shell script and invoke `probe record` on that, or else run: ```bash -probe record -- bash -c ''` +probe record bash -c ''` ``` -(note the `--` so that `probe` doesn't try to parse `-c` as a flag). +(any flag after the first positional argument is ignored and treated like a +command argument). ## Serialization formats @@ -101,6 +103,13 @@ more op c structs, followed by zero or more null bytes. - Each data arena is a binary file containing an arena header followed by zero or more bytes of arbitrary data, followed by zero or more null bytes. +**note:** these files contain +[mmap(2)](https://www.man7.org/linux/man-pages/man2/mmap.2.html)-ed c structures +and are not guaranteed to valid if moved to a computer with a different +architecture, kernel version, or c compiler (or if any of those things change on +the same computer), and may not be properly decoded by versions of the cli with +even patch version differences. + ### Probe log directory This format **is** part of this tool's spec, and this tool is the source of diff --git a/probe_src/probe_frontend/cli/src/record.rs b/probe_src/probe_frontend/cli/src/record.rs index 8497afae..d7b83c98 100644 --- a/probe_src/probe_frontend/cli/src/record.rs +++ b/probe_src/probe_frontend/cli/src/record.rs @@ -12,7 +12,7 @@ use crate::{transcribe, util::Dir}; // TODO: modularize and improve ergonomics (maybe expand builder pattern?) -/// create a probe record directory from subset of a [`Command::Record`](crate::Command::Record) +/// create a probe record directory from command arguments pub fn record_no_transcribe( output: Option, overwrite: bool, @@ -48,7 +48,7 @@ pub fn record_no_transcribe( Ok(()) } -/// create a probe log file from subset of a [`Command::Record`](crate::Command::Record) +/// create a probe log file from command arguments pub fn record_transcribe( output: Option, overwrite: bool, diff --git a/probe_src/probe_frontend/macros/src/lib.rs b/probe_src/probe_frontend/macros/src/lib.rs index 470f8202..ce496f32 100644 --- a/probe_src/probe_frontend/macros/src/lib.rs +++ b/probe_src/probe_frontend/macros/src/lib.rs @@ -185,6 +185,15 @@ pub fn pygen_write_to_env(input: TokenStream) -> TokenStream { TokenStream::new() } +/// add a property to a python dataclass with the following syntax: +/// +/// ``` +/// pygen_add_prop!(ClassName impl prop_name -> return_type: +/// "line1", +/// "return line2" +/// ... +/// ); +/// ``` // TODO: return compiler error instead of panicking on error #[proc_macro] pub fn pygen_add_prop(input: TokenStream) -> TokenStream { @@ -225,6 +234,9 @@ impl Parse for AddPropArgs { } } +/// Add one or more lines to the generated python file, after the imports, but before any generated +/// class or enum. +// TODO: return compiler error instead of panicking on error #[proc_macro] pub fn pygen_add_preamble(input: TokenStream) -> TokenStream { let args = parse_macro_input!(input as AddPreambleArgs); From 8a46d6c7ce046382feafb7044bea49bfc209085c Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Thu, 4 Jul 2024 11:10:16 -0500 Subject: [PATCH 23/37] cleanup and documentation --- probe_src/probe_frontend/Cargo.toml | 2 +- .../probe_frontend/lib/src/transcribe.rs | 2 -- probe_src/probe_frontend/macros/Cargo.toml | 1 - probe_src/probe_frontend/macros/src/lib.rs | 34 ++++++++++++++++++- probe_src/probe_frontend/macros/src/pygen.rs | 12 ++++--- 5 files changed, 41 insertions(+), 10 deletions(-) diff --git a/probe_src/probe_frontend/Cargo.toml b/probe_src/probe_frontend/Cargo.toml index a4e4dc97..5e5d8417 100644 --- a/probe_src/probe_frontend/Cargo.toml +++ b/probe_src/probe_frontend/Cargo.toml @@ -13,7 +13,7 @@ publish = false edition = "2021" [workspace.lints.rust] -unsafe_op_in_unsafe_fn = "deny" +unsafe_op_in_unsafe_fn = "forbid" [workspace.metadata.crane] name = "probe" diff --git a/probe_src/probe_frontend/lib/src/transcribe.rs b/probe_src/probe_frontend/lib/src/transcribe.rs index 31232479..b692776e 100644 --- a/probe_src/probe_frontend/lib/src/transcribe.rs +++ b/probe_src/probe_frontend/lib/src/transcribe.rs @@ -14,8 +14,6 @@ use crate::{ ops::{self, C_Op, FfiFrom}, }; -// pub mod ops; - /// Recursively parse a whole probe record directory and write it to a probe log directory. /// /// This function calls [`parse_pid()`] on each sub-directory in `in_dir` **in parallel**. diff --git a/probe_src/probe_frontend/macros/Cargo.toml b/probe_src/probe_frontend/macros/Cargo.toml index bbf84e6f..b85d3753 100644 --- a/probe_src/probe_frontend/macros/Cargo.toml +++ b/probe_src/probe_frontend/macros/Cargo.toml @@ -11,7 +11,6 @@ proc-macro = true [dependencies] parking_lot = "0.12.3" -# darling = "0.20.9" proc-macro2 = "1.0.86" quote = "1.0.36" syn = "2.0.68" diff --git a/probe_src/probe_frontend/macros/src/lib.rs b/probe_src/probe_frontend/macros/src/lib.rs index ce496f32..5c8735cc 100644 --- a/probe_src/probe_frontend/macros/src/lib.rs +++ b/probe_src/probe_frontend/macros/src/lib.rs @@ -7,6 +7,26 @@ use syn::{parse_quote, LitStr, Token}; mod pygen; +/// Generate a native rust struct from a rust-bindgen struct. +/// +/// In order to successfully generate a new struct, the struct it's invoked on must have the +/// following characteristics: +/// +/// - be a named struct (tuple and unit structs not supported). +/// - Name starts with `C_`. +/// - contain only types that implement `FfiFrom` (defined in probe_frontend, see ops module for +/// details). +/// +/// In will generate a struct with the following characteristics: +/// +/// - same name, but without the `C_` prefix, and converted from snake_case to PascalCase. +/// - any field in the original struct starting with `__` is ignored. +/// - any field in the original struct starting with `ru_`, `tv_`, or `stx_` will have that prefix +/// removed. +/// - derives serde's `Serialize`, `Deserialize` traits. +/// - contains a unit field `_type` that serializes to the struct's name. +/// - implements `FfiFrom` by calling it recursively on each field. +/// - derives [`PygenDataclass`]. // TODO: return compiler error instead of panicking on error #[proc_macro_derive(MakeRustOp)] pub fn make_rust_op(input: TokenStream) -> TokenStream { @@ -167,7 +187,15 @@ fn snake_case_to_pascal(input: &str) -> String { }) .1 } - +/// Generate a python dataclass from a rust struct. +/// +/// In order to successfully generate a dataclass, the struct it's invoked on must have the +/// following characteristics: +/// +/// - be a named struct (tuple and unit structs not supported). +/// - OR be an enum with either named variants or tuple enums containing only one item. +/// - contain only primitives, [`CString`](std::ffi::CString)s, or other generated dataclasses. +/// - field with the unit type are also allowed, but they're ignored. // TODO: return compiler error instead of panicking on error #[proc_macro_derive(PygenDataclass)] pub fn pygen_dataclass(input: TokenStream) -> TokenStream { @@ -177,6 +205,7 @@ pub fn pygen_dataclass(input: TokenStream) -> TokenStream { TokenStream::new() } +/// write the generated python to a path contained in a environment variable. // TODO: return compiler error instead of panicking on error #[proc_macro] pub fn pygen_write_to_env(input: TokenStream) -> TokenStream { @@ -252,6 +281,9 @@ impl Parse for AddPreambleArgs { lines.push(input.parse::()?.value()); while !input.is_empty() { input.parse::()?; + if input.is_empty() { + break; + } lines.push(input.parse::()?.value()); } diff --git a/probe_src/probe_frontend/macros/src/pygen.rs b/probe_src/probe_frontend/macros/src/pygen.rs index 52497fc5..8d9c2264 100644 --- a/probe_src/probe_frontend/macros/src/pygen.rs +++ b/probe_src/probe_frontend/macros/src/pygen.rs @@ -56,8 +56,8 @@ pub fn pygen_dataclass_internal(input: syn::DeriveInput) { .named .iter() .filter_map(|field| { + // skip any field who's type is the unit type if let syn::Type::Tuple(syn::TypeTuple { elems, .. }) = &field.ty { - // this is the unit type, so we just skip it if elems.is_empty() { return None; } @@ -70,7 +70,6 @@ pub fn pygen_dataclass_internal(input: syn::DeriveInput) { }) .collect::>(); - // dataclass.add_inclass(basic_dataclass(name.clone(), &pairs)); enu.add_variant_owned_class(basic_dataclass(name.clone(), &pairs)); variants.push(name); } @@ -114,9 +113,12 @@ fn convert_to_pytype(ty: &syn::Type) -> String { "__dev_t" | "__gid_t" | "__ino_t" | "__mode_t" | "__s32" | "__s64" | "__suseconds_t" | "__syscall_slong_t" | "__syseconds_t" | "__time_t" | "__u16" | "__u32" | "__u64" | "__uid_t" | "c_int" | "c_long" | "c_uint" - | "dev_t" | "gid_t" | "i32" | "ino_t" | "mode_t" | "pid_t" | "uid_t" => { - "int".to_owned() - } + | "dev_t" | "gid_t" | "i128" | "i16" | "i32" | "i64" | "i8" | "ino_t" | "isize" + | "mode_t" | "pid_t" | "u128" | "u16" | "u32" | "u64" | "u8" | "uid_t" + | "usize" => "int".to_owned(), + + // float, python uses doubles for everything + "f32" | "f64" => "float".to_owned(), // CStrings are serialized as an array of bytes, so it makes sense to load them // into python as bytes From e4b6384c39ed86b80efb5f94960a4aa41b5d6443 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Thu, 4 Jul 2024 22:33:13 -0500 Subject: [PATCH 24/37] `probe record` warn on non-zero exit code --- probe_src/probe_frontend/cli/src/record.rs | 21 ++++++++++- probe_src/probe_frontend/cli/src/util.rs | 44 ++++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/probe_src/probe_frontend/cli/src/record.rs b/probe_src/probe_frontend/cli/src/record.rs index d7b83c98..b20ea276 100644 --- a/probe_src/probe_frontend/cli/src/record.rs +++ b/probe_src/probe_frontend/cli/src/record.rs @@ -1,6 +1,7 @@ use std::{ ffi::OsString, fs::{self, File}, + os::unix::process::ExitStatusExt, path::{Path, PathBuf}, thread, }; @@ -179,7 +180,25 @@ impl Recorder { } } - child.wait().wrap_err("Failed to await child process")?; + let exit = child.wait().wrap_err("Failed to await child process")?; + if !exit.success() { + match exit.code() { + Some(code) => log::warn!("Recorded process exited with code {code}"), + None => match exit.signal() { + Some(sig) => match crate::util::sig_to_name(sig) { + Some(name) => log::warn!("Recorded process exited with signal {name}"), + None => { + if sig < libc::SIGRTMAX() { + log::warn!("Recorded process exited with realtime signal {sig}"); + } else { + log::warn!("Recorded process exited with unknown signal {sig}"); + } + } + }, + None => log::warn!("Recorded process exited with unknown error"), + }, + } + } Ok(self.output) } diff --git a/probe_src/probe_frontend/cli/src/util.rs b/probe_src/probe_frontend/cli/src/util.rs index f530b31e..3547a38c 100644 --- a/probe_src/probe_frontend/cli/src/util.rs +++ b/probe_src/probe_frontend/cli/src/util.rs @@ -89,3 +89,47 @@ impl Drop for Dir { } } } + +pub(crate) fn sig_to_name(sig: i32) -> Option<&'static str> { + Some(match sig { + libc::SIGHUP => "SIGHUP", + libc::SIGINT => "SIGINT", + libc::SIGQUIT => "SIGQUIT", + libc::SIGILL => "SIGILL", + libc::SIGTRAP => "SIGTRAP", + libc::SIGABRT => "SIGABRT/SIGIOT", // SIGABRT and SIGIOT have the same code + libc::SIGBUS => "SIGBUS", + libc::SIGFPE => "SIGFPE", + libc::SIGKILL => "SIGKILL", + libc::SIGUSR1 => "SIGUSR1", + libc::SIGSEGV => "SIGSEGV", + libc::SIGUSR2 => "SIGUSR2", + libc::SIGPIPE => "SIGPIPE", + libc::SIGALRM => "SIGALRM", + libc::SIGTERM => "SIGTERM", + libc::SIGSTKFLT => "SIGSTKFLT", + libc::SIGCHLD => "SIGCHLD", + libc::SIGCONT => "SIGCONT", + libc::SIGSTOP => "SIGSTOP", + libc::SIGTSTP => "SIGTSTP", + libc::SIGTTIN => "SIGTTIN", + libc::SIGTTOU => "SIGTTOU", + libc::SIGURG => "SIGURG", + libc::SIGXCPU => "SIGXCPU", + libc::SIGXFSZ => "SIGXFSZ", + libc::SIGVTALRM => "SIGVTALRM", + libc::SIGPROF => "SIGPROF", + libc::SIGWINCH => "SIGWINCH", + libc::SIGIO => "SIGIO/SIGPOLL", // SIGIO and SIGPOLL have the same code + libc::SIGPWR => "SIGPWR", + libc::SIGSYS => "SIGSYS", + + _ => return None, + }) +} + +#[test] +fn sig_eq() { + assert_eq!(libc::SIGABRT, libc::SIGIOT); + assert_eq!(libc::SIGIO, libc::SIGPOLL); +} From 76515bdd6c40d0d9090c176692a5735ac71fc83f Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Fri, 5 Jul 2024 07:46:03 -0500 Subject: [PATCH 25/37] disable overzelous warning when running under gdb --- probe_src/probe_frontend/cli/src/record.rs | 40 ++++++++++++---------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/probe_src/probe_frontend/cli/src/record.rs b/probe_src/probe_frontend/cli/src/record.rs index b20ea276..9d769be7 100644 --- a/probe_src/probe_frontend/cli/src/record.rs +++ b/probe_src/probe_frontend/cli/src/record.rs @@ -158,25 +158,29 @@ impl Recorder { .wrap_err("Failed to launch child process")? }; - // without this the child process typically won't have written it's first op by the time we - // do our sanity check, since we're about to wait on child anyway, this isn't a big deal. - thread::sleep(std::time::Duration::from_millis(50)); - - match Path::read_dir(self.output.path()) { - Ok(x) => { - let any_files = x - .into_iter() - .try_fold(false, |_, x| x.map(|x| x.path().exists()))?; - if !any_files { - log::warn!( - "No arena files detected after 50ms, \ - something is wrong, you should probably abort!" - ); + if !self.gdb { + // without this the child process typically won't have written it's first op by the + // time we do our sanity check, since we're about to wait on child anyway, this isn't a + // big deal. + thread::sleep(std::time::Duration::from_millis(50)); + + match Path::read_dir(self.output.path()) { + Ok(x) => { + let any_files = x + .into_iter() + .try_fold(false, |_, x| x.map(|x| x.path().exists()))?; + if !any_files { + log::warn!( + "No arena files detected after 50ms, \ + something is wrong, you should probably abort!" + ); + } + } + Err(e) => { + return Err(e).wrap_err( + "Unable to read record directory during post-startup sanity check", + ) } - } - Err(e) => { - return Err(e) - .wrap_err("Unable to read record directory during post-startup sanity check") } } From e8790962b18d7354c665a20550d1e104b3ffbdcd Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Sat, 6 Jul 2024 07:46:20 -0500 Subject: [PATCH 26/37] switched gdb to internal binary shim --- probe_src/probe_frontend/Cargo.lock | 34 +++++++++++++++++++++- probe_src/probe_frontend/cli/Cargo.toml | 1 + probe_src/probe_frontend/cli/src/main.rs | 17 +++++++++++ probe_src/probe_frontend/cli/src/record.rs | 12 +++++--- 4 files changed, 59 insertions(+), 5 deletions(-) diff --git a/probe_src/probe_frontend/Cargo.lock b/probe_src/probe_frontend/Cargo.lock index e8657e00..04d1a6d1 100644 --- a/probe_src/probe_frontend/Cargo.lock +++ b/probe_src/probe_frontend/Cargo.lock @@ -368,6 +368,17 @@ dependencies = [ "log", ] +[[package]] +name = "errno" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +dependencies = [ + "errno-dragonfly", + "libc", + "winapi", +] + [[package]] name = "errno" version = "0.3.9" @@ -378,6 +389,26 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "exec" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "886b70328cba8871bfc025858e1de4be16b1d5088f2ba50b57816f4210672615" +dependencies = [ + "errno 0.2.8", + "libc", +] + [[package]] name = "eyre" version = "0.6.12" @@ -738,6 +769,7 @@ dependencies = [ "clap", "color-eyre", "env_logger", + "exec", "flate2", "libc", "log", @@ -907,7 +939,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" dependencies = [ "bitflags 2.6.0", - "errno", + "errno 0.3.9", "libc", "linux-raw-sys", "windows-sys", diff --git a/probe_src/probe_frontend/cli/Cargo.toml b/probe_src/probe_frontend/cli/Cargo.toml index e6802e0c..1bf470ae 100644 --- a/probe_src/probe_frontend/cli/Cargo.toml +++ b/probe_src/probe_frontend/cli/Cargo.toml @@ -14,6 +14,7 @@ chrono = "0.4.38" clap = { version = "4.5.7", features = ["cargo"] } color-eyre = "0.6.3" env_logger = "0.11.3" +exec = "0.3.1" flate2 = "1.0.30" libc = "0.2.155" log = "0.4.21" diff --git a/probe_src/probe_frontend/cli/src/main.rs b/probe_src/probe_frontend/cli/src/main.rs index 4a0ed526..ec3af790 100644 --- a/probe_src/probe_frontend/cli/src/main.rs +++ b/probe_src/probe_frontend/cli/src/main.rs @@ -73,6 +73,12 @@ fn main() -> Result<()> { .value_parser(value_parser!(OsString)), ]) .about("Write the data from probe log data in a human-readable manne"), + Command::new("__gdb-exec-shim").hide(true).arg( + arg!( ... "Command to run") + .required(true) + .trailing_var_arg(true) + .value_parser(value_parser!(OsString)), + ), ]) .get_matches(); @@ -124,6 +130,17 @@ fn main() -> Result<()> { } .wrap_err("Dump command failed") } + Some(("__gdb-exec-shim", sub)) => { + let cmd = sub + .get_many::("CMD") + .unwrap() + .cloned() + .collect::>(); + + let e = exec::Command::new(&cmd[0]).args(&cmd[1..]).exec(); + + Err(e).wrap_err("Shim failed to exec") + } _ => Err(eyre!("unexpected subcommand")), } } diff --git a/probe_src/probe_frontend/cli/src/record.rs b/probe_src/probe_frontend/cli/src/record.rs index 9d769be7..7cff61e0 100644 --- a/probe_src/probe_frontend/cli/src/record.rs +++ b/probe_src/probe_frontend/cli/src/record.rs @@ -132,16 +132,20 @@ impl Recorder { } let mut child = if self.gdb { - let mut dir_env = OsString::from("__PROBE_DIR="); + let mut dir_env = OsString::from("--init-eval-command=set environmnet __PROBE_DIR="); dir_env.push(self.output.path()); - let mut preload_env = OsString::from("LD_PRELOAD="); + let mut preload_env = OsString::from("--init-eval-command=set environmnet LD_PRELOAD="); preload_env.push(ld_preload); + let self_bin = + std::env::current_exe().wrap_err("Failed to get path to current executable")?; + std::process::Command::new("gdb") - .arg("--args") - .arg("env") .arg(dir_env) .arg(preload_env) + .arg("--args") + .arg(self_bin) + .arg("__gdb-exec-shim") .args(&self.cmd) .env_remove("__PROBE_LIB") .env_remove("__PROBE_LOG") From 678ade83553e0ecf54eda3cdf00d25e66fe335e1 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Sun, 7 Jul 2024 13:33:59 -0500 Subject: [PATCH 27/37] proc macros return compile_error!() instead of panicking --- probe_src/probe_frontend/cli/src/dump.rs | 6 +- probe_src/probe_frontend/cli/src/record.rs | 2 + probe_src/probe_frontend/lib/build.rs | 2 +- .../probe_frontend/lib/src/transcribe.rs | 1 + probe_src/probe_frontend/macros/src/lib.rs | 113 ++++++++---- probe_src/probe_frontend/macros/src/pygen.rs | 166 +++++++++++++----- 6 files changed, 211 insertions(+), 79 deletions(-) diff --git a/probe_src/probe_frontend/cli/src/dump.rs b/probe_src/probe_frontend/cli/src/dump.rs index 8befd9d6..b387d0f7 100644 --- a/probe_src/probe_frontend/cli/src/dump.rs +++ b/probe_src/probe_frontend/cli/src/dump.rs @@ -132,9 +132,9 @@ struct DumpOp { op: ops::Op, } -// TODO: Display won't work (foreign trait rule) but some kind of streaming would be better; if we -// don't care about UTF-8 guarantees we might be able to do some kind of byte iterator approach and -// evaluate it all lazily +// OPTIMIZE: Display won't work (foreign trait rule) but some kind of streaming would greatly +// reduce unnecessary heap allocations and mem-copies; if we don't care about UTF-8 guarantees we +// might be able to do some kind of byte iterator approach and evaluate it all lazily trait Dump { fn dump(&self) -> String; } diff --git a/probe_src/probe_frontend/cli/src/record.rs b/probe_src/probe_frontend/cli/src/record.rs index 7cff61e0..897b9651 100644 --- a/probe_src/probe_frontend/cli/src/record.rs +++ b/probe_src/probe_frontend/cli/src/record.rs @@ -188,6 +188,8 @@ impl Recorder { } } + // OPTIMIZE: consider background serialization of ops as threads/processes exit instead of + // waiting until the end; large increase to complexity but potentially huge gains. let exit = child.wait().wrap_err("Failed to await child process")?; if !exit.success() { match exit.code() { diff --git a/probe_src/probe_frontend/lib/build.rs b/probe_src/probe_frontend/lib/build.rs index 70d8b1ad..eef37c9b 100644 --- a/probe_src/probe_frontend/lib/build.rs +++ b/probe_src/probe_frontend/lib/build.rs @@ -115,7 +115,7 @@ fn main() { // a huge hack, but it greatly reduces the generated code complexity // since in glibc all the long ints are unions over two types that // both alias to long int, this is done for kernel-userland - // compatibilityreasons that don't matter here. + // compatibility reasons that don't matter here. struct rusage { struct timeval ru_utime; struct timeval ru_stime; diff --git a/probe_src/probe_frontend/lib/src/transcribe.rs b/probe_src/probe_frontend/lib/src/transcribe.rs index b692776e..e155d8da 100644 --- a/probe_src/probe_frontend/lib/src/transcribe.rs +++ b/probe_src/probe_frontend/lib/src/transcribe.rs @@ -19,6 +19,7 @@ use crate::{ /// This function calls [`parse_pid()`] on each sub-directory in `in_dir` **in parallel**. /// /// on success, returns the number of Ops processed in the top-level directory +// OPTIMIZE: consider improved parallelism heuristic. pub fn parse_top_level, P2: AsRef + Sync>( in_dir: P1, out_dir: P2, diff --git a/probe_src/probe_frontend/macros/src/lib.rs b/probe_src/probe_frontend/macros/src/lib.rs index 5c8735cc..535d6288 100644 --- a/probe_src/probe_frontend/macros/src/lib.rs +++ b/probe_src/probe_frontend/macros/src/lib.rs @@ -1,12 +1,15 @@ use proc_macro::TokenStream; use proc_macro2::Span; -use quote::quote; +use quote::{quote, quote_spanned}; use syn::parse::Parse; +use syn::spanned::Spanned; use syn::{parse_macro_input, Data, DeriveInput, Fields, Ident, Type}; use syn::{parse_quote, LitStr, Token}; mod pygen; +type MacroResult = Result; + /// Generate a native rust struct from a rust-bindgen struct. /// /// In order to successfully generate a new struct, the struct it's invoked on must have the @@ -27,7 +30,6 @@ mod pygen; /// - contains a unit field `_type` that serializes to the struct's name. /// - implements `FfiFrom` by calling it recursively on each field. /// - derives [`PygenDataclass`]. -// TODO: return compiler error instead of panicking on error #[proc_macro_derive(MakeRustOp)] pub fn make_rust_op(input: TokenStream) -> TokenStream { let original_struct = parse_macro_input!(input as DeriveInput); @@ -37,20 +39,43 @@ pub fn make_rust_op(input: TokenStream) -> TokenStream { Data::Struct(data_struct) => { let fields = match data_struct.fields { Fields::Named(x) => x, - _ => unimplemented!("unnamed and unit structs not implemented"), + _ => { + return quote_spanned! { + original_struct.span() => + compile_error!("Unit and Tuple structs not supported"); + } + .into() + } }; - let pairs = fields + let pairs = match fields .named .iter() - .filter_map(|x| { - let ident = x.ident.as_ref().unwrap(); + .filter_map(|field| { + let ident = match field.ident.as_ref() { + Some(x) => x, + None => { + return Some(Err(quote_spanned! { + field.ident.span() => + compile_error!("Field had no identifier"); + } + .into())) + } + }; + // filter out any identifier starting with __ since every example i've seen in + // glibc of "__ident" is padding or reserved space. if ident.to_string().starts_with("__") { return None; } - Some((ident, convert_bindgen_type(&x.ty))) + + let pair = convert_bindgen_type(&field.ty).map(|ty| (ident, ty)); + Some(pair) }) - .collect::>(); + .collect::>>() + { + Ok(x) => x, + Err(e) => return e, + }; let field_idents = pairs.iter().map(|x| x.0).collect::>(); @@ -139,37 +164,56 @@ pub fn make_rust_op(input: TokenStream) -> TokenStream { } .into() } - _ => unimplemented!("MakeRustOp only supports structs"), + _ => quote_spanned! { + original_struct.span() => + compile_error!("MakeRustOp only supports structs"); + } + .into(), } } -fn convert_bindgen_type(ty: &syn::Type) -> syn::Type { +fn convert_bindgen_type(ty: &syn::Type) -> MacroResult { match ty { - syn::Type::Ptr(_inner) => parse_quote!(::std::ffi::CString), + syn::Type::Ptr(_inner) => Ok(parse_quote!(::std::ffi::CString)), syn::Type::Array(inner) => { let mut new = inner.clone(); - new.elem = Box::new(convert_bindgen_type(&new.elem)); - Type::Array(new) + new.elem = Box::new(convert_bindgen_type(&new.elem)?); + Ok(Type::Array(new)) } syn::Type::Path(inner) => { - if let Some(name) = type_basename(inner).to_string().strip_prefix("C_") { + if let Some(name) = type_basename(inner)?.to_string().strip_prefix("C_") { let name = snake_case_to_pascal(name); let name = Ident::new(&name, Span::mixed_site()); - parse_quote!(#name) + Ok(parse_quote!(#name)) } else { - Type::Path(inner.clone()) + Ok(Type::Path(inner.clone())) } } - _ => unreachable!("unsupported bindgen type conversion"), + _ => Err(quote_spanned! { + ty.span() => + compile_error!("Unable to convert bindgen type"); + } + .into()), } } -fn type_basename(ty: &syn::TypePath) -> &syn::Ident { - if ty.qself.is_some() { - unimplemented!("qualified self-typs not supported"); +fn type_basename(ty: &syn::TypePath) -> MacroResult<&syn::Ident> { + if let Some(qself) = &ty.qself { + return Err(quote_spanned! { + qself.span() => + compile_error!("Qualified self types not supported"); + } + .into()); } - &ty.path.segments.last().expect("type has no segments").ident + match ty.path.segments.last() { + Some(x) => Ok(&x.ident), + None => Err(quote_spanned! { + ty.path.segments.span() => + compile_error!("Type path has no segments"); + } + .into()), + } } fn snake_case_to_pascal(input: &str) -> String { @@ -187,6 +231,7 @@ fn snake_case_to_pascal(input: &str) -> String { }) .1 } + /// Generate a python dataclass from a rust struct. /// /// In order to successfully generate a dataclass, the struct it's invoked on must have the @@ -196,22 +241,23 @@ fn snake_case_to_pascal(input: &str) -> String { /// - OR be an enum with either named variants or tuple enums containing only one item. /// - contain only primitives, [`CString`](std::ffi::CString)s, or other generated dataclasses. /// - field with the unit type are also allowed, but they're ignored. -// TODO: return compiler error instead of panicking on error #[proc_macro_derive(PygenDataclass)] pub fn pygen_dataclass(input: TokenStream) -> TokenStream { let source = parse_macro_input!(input as DeriveInput); - pygen::pygen_dataclass_internal(source); - // return empty token stream, we're not actually writing rust here - TokenStream::new() + match pygen::pygen_dataclass_internal(source) { + Ok(_) => TokenStream::new(), + Err(e) => e, + } } /// write the generated python to a path contained in a environment variable. -// TODO: return compiler error instead of panicking on error #[proc_macro] pub fn pygen_write_to_env(input: TokenStream) -> TokenStream { let path = parse_macro_input!(input as syn::LitStr); - pygen::pygen_write_internal(path); - TokenStream::new() + match pygen::pygen_write_internal(path) { + Ok(_) => TokenStream::new(), + Err(e) => e, + } } /// add a property to a python dataclass with the following syntax: @@ -223,12 +269,13 @@ pub fn pygen_write_to_env(input: TokenStream) -> TokenStream { /// ... /// ); /// ``` -// TODO: return compiler error instead of panicking on error #[proc_macro] pub fn pygen_add_prop(input: TokenStream) -> TokenStream { let args = parse_macro_input!(input as AddPropArgs); - pygen::pygen_add_prop_internal(args); - TokenStream::new() + match pygen::pygen_add_prop_internal(args) { + Ok(_) => TokenStream::new(), + Err(e) => e, + } } pub(crate) struct AddPropArgs { @@ -251,6 +298,9 @@ impl Parse for AddPropArgs { body.push(input.parse::()?.value()); while !input.is_empty() { input.parse::()?; + if input.is_empty() { + break; + } body.push(input.parse::()?.value()); } @@ -265,7 +315,6 @@ impl Parse for AddPropArgs { /// Add one or more lines to the generated python file, after the imports, but before any generated /// class or enum. -// TODO: return compiler error instead of panicking on error #[proc_macro] pub fn pygen_add_preamble(input: TokenStream) -> TokenStream { let args = parse_macro_input!(input as AddPreambleArgs); diff --git a/probe_src/probe_frontend/macros/src/pygen.rs b/probe_src/probe_frontend/macros/src/pygen.rs index 8d9c2264..ee3bd819 100644 --- a/probe_src/probe_frontend/macros/src/pygen.rs +++ b/probe_src/probe_frontend/macros/src/pygen.rs @@ -1,23 +1,32 @@ use parking_lot::RwLock; +use quote::quote_spanned; use std::fmt::Display; use std::fs::File; use std::io::Write; use std::sync::OnceLock; -use syn::{Data, Fields}; +use syn::{spanned::Spanned, Data, Fields}; + +use crate::MacroResult; fn pygen_file() -> &'static RwLock { static INNER: OnceLock> = OnceLock::new(); INNER.get_or_init(|| RwLock::new(PygenFile::new())) } -pub fn pygen_dataclass_internal(input: syn::DeriveInput) { +pub fn pygen_dataclass_internal(input: syn::DeriveInput) -> MacroResult<()> { let syn::DeriveInput { data, ident, .. } = input.clone(); match data { Data::Struct(data_struct) => { let fields = match data_struct.fields { Fields::Named(x) => x, - _ => unimplemented!("unnamed and unit structs not implemented"), + _ => { + return Err(quote_spanned! { + input.span() => + compile_error!("Unnamed and unit structs not implemented") + } + .into()) + } }; let pairs = fields @@ -31,12 +40,19 @@ pub fn pygen_dataclass_internal(input: syn::DeriveInput) { } } - Some(( - field.ident.as_ref().unwrap().to_string(), - convert_to_pytype(&field.ty), - )) + let pair = + convert_to_pytype(&field.ty).and_then(|ty| match field.ident.as_ref() { + Some(ident) => Ok((ident.to_string(), ty)), + None => Err(quote_spanned! { + field.span() => + compile_error!("Field doesn't have identifier"); + } + .into()), + }); + + Some(pair) }) - .collect::>(); + .collect::>>()?; let dataclass = basic_dataclass(ident.to_string(), &pairs); pygen_file().write().classes.push(dataclass); @@ -63,12 +79,21 @@ pub fn pygen_dataclass_internal(input: syn::DeriveInput) { } } - Some(( - field.ident.as_ref().unwrap().to_string(), - convert_to_pytype(&field.ty), - )) + let pair = convert_to_pytype(&field.ty).and_then(|ty| match field + .ident + .as_ref() + { + Some(ident) => Ok((ident.to_string(), ty)), + None => Err(quote_spanned! { + field.span() => + compile_error!("Field doesn't have identifier"); + } + .into()), + }); + + Some(pair) }) - .collect::>(); + .collect::>>()?; enu.add_variant_owned_class(basic_dataclass(name.clone(), &pairs)); variants.push(name); @@ -76,18 +101,36 @@ pub fn pygen_dataclass_internal(input: syn::DeriveInput) { syn::Fields::Unnamed(inner) => { let fields = inner.unnamed.iter().collect::>(); if fields.len() != 1 { - unimplemented!("Tuple enums of length != 1 not supported") + return Err(quote_spanned! { + inner.span() => + compile_error!("Tuple enums of length != 1 not supported") + } + .into()); + } + enu.add_variant_ref(convert_to_pytype(&fields[0].ty)?); + } + syn::Fields::Unit => { + return Err(quote_spanned! { + variant.fields.span() => + compile_error!("Unit enum variants not supported") } - enu.add_variant_ref(convert_to_pytype(&fields[0].ty)); + .into()) } - syn::Fields::Unit => unimplemented!("Unit enum variants not supported"), } } pygen_file().write().enums.push(enu); } - Data::Union(_data_union) => unimplemented!(), + Data::Union(_data_union) => { + return Err(quote_spanned! { + input.span() => + compile_error!("Unions not supported") + } + .into()) + } }; + + Ok(()) } fn basic_dataclass(name: String, pairs: &[(String, String)]) -> Dataclass { @@ -100,14 +143,12 @@ fn basic_dataclass(name: String, pairs: &[(String, String)]) -> Dataclass { dataclass } -fn convert_to_pytype(ty: &syn::Type) -> String { +fn convert_to_pytype(ty: &syn::Type) -> MacroResult { match ty { - syn::Type::Array(inner) => { - format!("list[{}]", convert_to_pytype(inner.elem.as_ref())) - } + syn::Type::Array(inner) => Ok(format!("list[{}]", convert_to_pytype(inner.elem.as_ref())?)), syn::Type::Path(inner) => { - let name = crate::type_basename(inner).to_string(); - match name.as_str() { + let name = crate::type_basename(inner)?.to_string(); + Ok(match name.as_str() { // that's a lot of ways to say "int", python ints are bigints so we don't have to // care about size "__dev_t" | "__gid_t" | "__ino_t" | "__mode_t" | "__s32" | "__s64" @@ -128,24 +169,40 @@ fn convert_to_pytype(ty: &syn::Type) -> String { "bool" => name, _ => name, - } + }) + } + _ => Err(quote_spanned! { + ty.span() => + compile_error!("Unsupported type type"); } - _ => unimplemented!("unsupported type type"), + .into()), } } -pub(crate) fn pygen_write_internal(path: syn::LitStr) { - let path = path.value(); - let path = std::env::var_os(&path) - .unwrap_or_else(|| panic!("Environment variable '{}' not defined", path)); +pub(crate) fn pygen_write_internal(path: syn::LitStr) -> MacroResult<()> { + let path_str = path.value(); + let path_str = match std::env::var_os(path_str) { + Some(x) => x, + None => { + return Err(quote_spanned! { + path.span() => + compile_error!("Environmnet variable not defined"); + } + .into()) + } + }; - let mut file = File::create(&path).unwrap_or_else(|e| { - panic!( - "unable to create file '{}' when writing pygen file: {}", - path.to_string_lossy(), - e - ) - }); + let mut file = match File::create(path_str) { + Ok(x) => x, + Err(e) => { + eprintln!("pygen IO error: {}", e); + return Err(quote_spanned! { + path.span() => + compile_error!("Failed to create pygen file"); + } + .into()); + } + }; pygen_file().write().prepend_preamble( [ @@ -158,21 +215,43 @@ pub(crate) fn pygen_write_internal(path: syn::LitStr) { .collect(), ); - writeln!(file, "{}", pygen_file().read()).expect("Failed to write pygen file"); + if let Err(e) = writeln!(file, "{}", pygen_file().read()) { + eprintln!("pygen IO error: {}", e); + return Err(quote_spanned! { + path.span() => + compile_error!("Failed to write pygen file"); + } + .into()); + } + + Ok(()) } -pub(crate) fn pygen_add_prop_internal(args: crate::AddPropArgs) { +pub(crate) fn pygen_add_prop_internal(args: crate::AddPropArgs) -> MacroResult<()> { let class = args.class.to_string(); let mut prop = DataclassProp::new(args.name.to_string(), args.ret.to_string()); args.body.into_iter().for_each(|x| prop.body.push(x)); - for dataclass in pygen_file().write().classes.iter_mut() { - if dataclass.name != class { - continue; + let mut write_lock = pygen_file().write(); + + let dataclass = match write_lock + .classes + .iter_mut() + .find(|dataclass| dataclass.name == class) + { + Some(x) => x, + None => { + return Err(quote_spanned! { + args.class.span() => + compile_error!("No such dataclass found"); + } + .into()) } + }; - dataclass.add_prop(prop.clone()); - } + dataclass.add_prop(prop); + + Ok(()) } pub(crate) fn pygen_add_preamble(args: crate::AddPreambleArgs) { @@ -377,6 +456,7 @@ impl Display for Enum { } let mut iter = types.iter(); + // unwrap allowed because we checked that types isn't empty let first = iter.next().unwrap(); write!(f, "{first}")?; From 89a822dab8c8b2c08352ca022d9e7bc32a2f98f2 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Sun, 7 Jul 2024 20:19:56 -0500 Subject: [PATCH 28/37] added runtime check for arena instantiation --- probe_src/probe_frontend/lib/src/ops.rs | 26 ++++++++-- .../probe_frontend/lib/src/transcribe.rs | 49 ++++++++++++------- 2 files changed, 53 insertions(+), 22 deletions(-) diff --git a/probe_src/probe_frontend/lib/src/ops.rs b/probe_src/probe_frontend/lib/src/ops.rs index dad9222a..9a37b9b5 100644 --- a/probe_src/probe_frontend/lib/src/ops.rs +++ b/probe_src/probe_frontend/lib/src/ops.rs @@ -318,10 +318,6 @@ probe_macros::pygen_add_preamble!( 73e931504f8e0d42978bfcda37b323dbbd1afc08/include/uapi/linux/fcntl.h#L98", "AT_FDCWD: int = -100" ); -#[test] -fn at_fdcwd_sanity_check() { - assert_eq!(libc::AT_FDCWD, -100); -} probe_macros::pygen_add_prop!(Path impl dirfd -> int: "return self.dirfd_minus_at_fdcwd + AT_FDCWD" @@ -330,3 +326,25 @@ probe_macros::pygen_add_prop!(Path impl dirfd -> int: // WARNING: this macro invocation must come after all other pygen calls for those calls to be // included in the written file probe_macros::pygen_write_to_env!("PYGEN_OUTFILE"); + +#[cfg(test)] +mod tests { + use super::*; + + // we define this constant in the generated python code, so we should make sure we get it + // right. + #[test] + fn at_fdcwd_sanity_check() { + assert_eq!(libc::AT_FDCWD, -100); + } + + // since we're defining a custom version of the rusage struct (indirectly through rust-bindgen) + // we should at least check that they're the same size. + #[test] + fn rusage_size() { + assert_eq!( + std::mem::size_of::(), + std::mem::size_of::() + ); + } +} diff --git a/probe_src/probe_frontend/lib/src/transcribe.rs b/probe_src/probe_frontend/lib/src/transcribe.rs index e155d8da..30c63a7e 100644 --- a/probe_src/probe_frontend/lib/src/transcribe.rs +++ b/probe_src/probe_frontend/lib/src/transcribe.rs @@ -178,7 +178,9 @@ pub fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> R .into_iter() .map(|data_dat_file| { DataArena::from_bytes( - std::fs::read(data_dat_file).wrap_err("Failed to read file from data directory")?, + std::fs::read(&data_dat_file) + .wrap_err("Failed to read file from data directory")?, + filename_numeric(&data_dat_file)?, ) }) .collect::>>()?, @@ -195,10 +197,10 @@ pub fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> R // STEP 5 .into_iter() .map(|ops_dat_file| { - std::fs::read(ops_dat_file) + std::fs::read(&ops_dat_file) .wrap_err("Failed to read file from ops directory") .and_then(|file_contents| { - OpsArena::from_bytes(file_contents) + OpsArena::from_bytes(file_contents, filename_numeric(&ops_dat_file)?) .wrap_err("Error constructing OpsArena")? .decode(&ctx) .wrap_err("Error decoding OpsArena") @@ -226,28 +228,28 @@ pub fn parse_tid, P2: AsRef>(in_dir: P1, out_dir: P2) -> R Ok(count) } -/// Gets the filename from a path and returns it parsed as an integer. +/// Gets the [`file stem`](Path::file_stem()) from a path and returns it parsed as an integer. /// -/// Errors if the path has no filename, the filename isn't valid UTF-8, or the filename can't be -/// parsed as an integer. +/// Errors if the path has no file stem (see [`Path::file_stem()`] for details), the file stem +/// isn't valid UTF-8, or the filename can't be parsed as an integer. // TODO: cleanup errors, better context fn filename_numeric>(dir: P) -> Result { - let filename = dir.as_ref().file_name().ok_or_else(|| { - log::error!("'{}' has no filename", dir.as_ref().to_string_lossy()); - option_err("path has no filename") + let file_stem = dir.as_ref().file_stem().ok_or_else(|| { + log::error!("'{}' has no file stem", dir.as_ref().to_string_lossy()); + option_err("path has no file stem") })?; - filename + file_stem .to_str() .ok_or_else(|| { - log::error!("'{}' not valid UTF-8", filename.to_string_lossy()); + log::error!("'{}' not valid UTF-8", file_stem.to_string_lossy()); option_err("filename not valid UTF-8") })? .parse::() .map_err(|e| { log::error!( "Parsing filename '{}' to integer", - filename.to_string_lossy() + file_stem.to_string_lossy() ); ProbeError::from(e) }) @@ -275,8 +277,8 @@ pub struct DataArena { } impl DataArena { - pub fn from_bytes(bytes: Vec) -> Result { - let header = ArenaHeader::from_bytes(&bytes) + pub fn from_bytes(bytes: Vec, instantiation: usize) -> Result { + let header = ArenaHeader::from_bytes(&bytes, instantiation) .wrap_err("Failed to create ArenaHeader for DataArena")?; Ok(Self { header, raw: bytes }) @@ -308,8 +310,8 @@ pub struct OpsArena<'a> { } impl<'a> OpsArena<'a> { - pub fn from_bytes(bytes: Vec) -> Result { - let header = ArenaHeader::from_bytes(&bytes) + pub fn from_bytes(bytes: Vec, instantiation: usize) -> Result { + let header = ArenaHeader::from_bytes(&bytes, instantiation) .wrap_err("Failed to create ArenaHeader for OpsArena")?; if ((header.used - size_of::()) % size_of::()) != 0 { @@ -340,7 +342,6 @@ impl<'a> OpsArena<'a> { #[repr(C)] #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct ArenaHeader { - // TODO: check instantiation (requires filename) instantiation: libc::size_t, base_address: libc::uintptr_t, capacity: libc::uintptr_t, @@ -349,7 +350,7 @@ pub struct ArenaHeader { impl ArenaHeader { /// Parse the front of a raw byte buffer into a libprobe arena header - fn from_bytes(bytes: &[u8]) -> Result { + fn from_bytes(bytes: &[u8], instantiation: usize) -> Result { let ptr = bytes as *const [u8] as *const Self; if bytes.len() < size_of::() { @@ -392,6 +393,14 @@ impl ArenaHeader { .into()); } + if header.instantiation != instantiation { + return Err(ArenaError::InstantiationMismatch { + header: header.instantiation, + passed: instantiation, + } + .into()); + } + Ok(header) } } @@ -415,4 +424,8 @@ pub enum ArenaError { /// some integer. #[error("Arena alignment error: used arena size minus header isn't a multiple of op size")] Misaligned, + + /// Returned if the instantiation in a [`ArenaHeader`] doesn't match the indicated one + #[error("Header contained Instantiation ID {header}, but {passed} was indicated")] + InstantiationMismatch { header: usize, passed: usize }, } From c0e8d02b58c2972f4f75a512482a924a71ebdd5a Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Sun, 7 Jul 2024 20:38:04 -0500 Subject: [PATCH 29/37] fixed README.md --- probe_src/probe_frontend/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/probe_src/probe_frontend/README.md b/probe_src/probe_frontend/README.md index 5cac5b4e..4d993678 100644 --- a/probe_src/probe_frontend/README.md +++ b/probe_src/probe_frontend/README.md @@ -65,7 +65,7 @@ If you need these you can either write a shell script and invoke `probe record` on that, or else run: ```bash -probe record bash -c ''` +probe record bash -c '' ``` (any flag after the first positional argument is ignored and treated like a From afd70c0c56f163ffd8abb2a70238f9b95dd0dc3e Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Wed, 10 Jul 2024 13:40:18 -0500 Subject: [PATCH 30/37] fixed `--gdb` --- probe_src/probe_frontend/cli/src/record.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/probe_src/probe_frontend/cli/src/record.rs b/probe_src/probe_frontend/cli/src/record.rs index 897b9651..396970ee 100644 --- a/probe_src/probe_frontend/cli/src/record.rs +++ b/probe_src/probe_frontend/cli/src/record.rs @@ -132,9 +132,9 @@ impl Recorder { } let mut child = if self.gdb { - let mut dir_env = OsString::from("--init-eval-command=set environmnet __PROBE_DIR="); + let mut dir_env = OsString::from("--init-eval-command=set environment __PROBE_DIR="); dir_env.push(self.output.path()); - let mut preload_env = OsString::from("--init-eval-command=set environmnet LD_PRELOAD="); + let mut preload_env = OsString::from("--init-eval-command=set environment LD_PRELOAD="); preload_env.push(ld_preload); let self_bin = From 8ad2b16de212af6dd6e4a33e77cdce11d5da0369 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Wed, 10 Jul 2024 16:51:17 -0500 Subject: [PATCH 31/37] probe_log decoding now matches parse_probe_log.py --- probe_src/probe_frontend/python/probe.py | 51 ++++++++++++++++++------ 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/probe_src/probe_frontend/python/probe.py b/probe_src/probe_frontend/python/probe.py index 3ae28714..f96a925a 100644 --- a/probe_src/probe_frontend/python/probe.py +++ b/probe_src/probe_frontend/python/probe.py @@ -2,12 +2,32 @@ import typing import json import tarfile +from dataclasses import dataclass from . import ops -OpTable = typing.Mapping[int, typing.Mapping[int, typing.Mapping[int, typing.List[ops.Op]]]] +@dataclass(frozen=True) +class ThreadProvLog: + tid: int + ops: typing.Sequence[ops.Op] -def load_log(path: str) -> OpTable: - ret: dict[int, dict[int, dict[int, list[ops.Op]]]] = {} +@dataclass(frozen=True) +class ExecEpochProvLog: + epoch: int + threads: typing.Mapping[int, ThreadProvLog] + + +@dataclass(frozen=True) +class ProcessProvLog: + pid: int + exec_epochs: typing.Mapping[int, ExecEpochProvLog] + + +@dataclass(frozen=True) +class ProvLog: + processes: typing.Mapping[int, ProcessProvLog] + +def load_log(path: str) -> ProvLog: + op_map: typing.Dict[int, typing.Dict[int, typing.Dict[int, ThreadProvLog]]] = {} tar = tarfile.open(path, mode='r') @@ -25,12 +45,10 @@ def load_log(path: str) -> OpTable: tid: int = int(parts[2]) # ensure necessary dict objects have been created - if not pid in ret: - ret[pid] = {} - if not epoch in ret[pid]: - ret[pid][epoch] = {} - if not tid in ret[pid][epoch]: - ret[pid][epoch][tid] = [] + if not pid in op_map: + op_map[pid] = {} + if not epoch in op_map[pid]: + op_map[pid][epoch] = {} # extract file contents as byte buffer file = tar.extractfile(item) @@ -39,10 +57,19 @@ def load_log(path: str) -> OpTable: # read, split, comprehend, deserialize, extend jsonlines = file.read().strip().split(b"\n") - ops = [json.loads(x, object_hook=op_hook) for x in jsonlines] - ret[pid][epoch][tid].extend(ops) + ops = ThreadProvLog(tid, [json.loads(x, object_hook=op_hook) for x in jsonlines]) + op_map[pid][epoch][tid] = ops - return ret + return ProvLog({ + pid: ProcessProvLog( + pid, + { + epoch: ExecEpochProvLog(epoch, threads) + for epoch, threads in epochs.items() + }, + ) + for pid, epochs in op_map.items() + }) def op_hook(json_map: typing.Dict[str, typing.Any]): ty: str = json_map["_type"] From 4dae6ea4383503a3d456f3032fdc62538bc09871 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Wed, 10 Jul 2024 18:23:21 -0500 Subject: [PATCH 32/37] build probe cli with static linking building the probe cli with `cargo build` inside the devShell will still produce a dynamically linked binary, but building with `nix build .#probe-cli` should produce a statically linked binary --- probe_src/probe_frontend/flake.lock | 41 ++++++++++++++++++++++------- probe_src/probe_frontend/flake.nix | 38 +++++++++++++++++++++----- 2 files changed, 62 insertions(+), 17 deletions(-) diff --git a/probe_src/probe_frontend/flake.lock b/probe_src/probe_frontend/flake.lock index fd766282..3594030f 100644 --- a/probe_src/probe_frontend/flake.lock +++ b/probe_src/probe_frontend/flake.lock @@ -3,11 +3,11 @@ "advisory-db": { "flake": false, "locked": { - "lastModified": 1719411196, - "narHash": "sha256-EdryZFXPjkK2F2J1re/bOl2oezKAB7dpFNi9mLUygmI=", + "lastModified": 1720572893, + "narHash": "sha256-EQfU1yMnebn7LoJNjjsQimyuWwz+2YzazqUZu8aX/r4=", "owner": "rustsec", "repo": "advisory-db", - "rev": "34f191da603f67b491a2e12af0b93c9c794ae1d1", + "rev": "97a2dc75838f19a5fd63dc3f8e3f57e0c4c8cfe6", "type": "github" }, "original": { @@ -23,11 +23,11 @@ ] }, "locked": { - "lastModified": 1719249093, - "narHash": "sha256-0q1haa3sw6GbmJ+WhogMnducZGjEaCa/iR6hF2vq80I=", + "lastModified": 1720546058, + "narHash": "sha256-iU2yVaPIZm5vMGdlT0+57vdB/aPq/V5oZFBRwYw+HBM=", "owner": "ipetkov", "repo": "crane", - "rev": "9791c77eb7e98b8d8ac5b0305d47282f994411ca", + "rev": "2d83156f23c43598cf44e152c33a59d3892f8b29", "type": "github" }, "original": { @@ -56,11 +56,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1719379843, - "narHash": "sha256-u+D+IOAMMl70+CJ9NKB+RMrASjInuIWMHzjLWQjPZ6c=", + "lastModified": 1720594544, + "narHash": "sha256-w6dlBUQYvS65f0Z33TvkcAj7ITr4NFqhF5ywss5T5bU=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "b3f3c1b13fb08f3828442ee86630362e81136bbc", + "rev": "aa9461550594533c29866d42f861b6ff079a7fb6", "type": "github" }, "original": { @@ -75,7 +75,28 @@ "advisory-db": "advisory-db", "crane": "crane", "flake-utils": "flake-utils", - "nixpkgs": "nixpkgs" + "nixpkgs": "nixpkgs", + "rust-overlay": "rust-overlay" + } + }, + "rust-overlay": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1720577957, + "narHash": "sha256-RZuzLdB/8FaXaSzEoWLg3au/mtbuH7MGn2LmXUKT62g=", + "owner": "oxalica", + "repo": "rust-overlay", + "rev": "a434177dfcc53bf8f1f348a3c39bfb336d760286", + "type": "github" + }, + "original": { + "owner": "oxalica", + "repo": "rust-overlay", + "type": "github" } }, "systems": { diff --git a/probe_src/probe_frontend/flake.nix b/probe_src/probe_frontend/flake.nix index 55c8d114..d58fb262 100644 --- a/probe_src/probe_frontend/flake.nix +++ b/probe_src/probe_frontend/flake.nix @@ -15,6 +15,11 @@ url = "github:rustsec/advisory-db"; flake = false; }; + + rust-overlay = { + url = "github:oxalica/rust-overlay"; + inputs.nixpkgs.follows = "nixpkgs"; + }; }; # TODO: cleanup derivations and make more usable: @@ -27,13 +32,28 @@ crane, flake-utils, advisory-db, + rust-overlay, ... - }: - flake-utils.lib.eachDefaultSystem (system: let - pkgs = nixpkgs.legacyPackages.${system}; - # inherit (pkgs) lib; + }: let + systems = { + # "nix system" = "rust target"; + "x86_64-linux" = "x86_64-unknown-linux-musl"; + "i686-linux" = "i686-unknown-linux-musl"; + "aarch64-linux" = "aarch64-unknown-linux-musl"; + "armv7l-linux" = "armv7-unknown-linux-musleabi"; + }; + in + flake-utils.lib.eachSystem (builtins.attrNames systems) (system: let + pkgs = import nixpkgs { + inherit system; + overlays = [(import rust-overlay)]; + }; + + craneLib = (crane.mkLib pkgs).overrideToolchain (p: + p.rust-bin.stable.latest.default.override { + targets = [systems.${system}]; + }); - craneLib = crane.mkLib pkgs; src = ./.; # Common arguments can be set here to avoid repeating them later @@ -49,8 +69,12 @@ # pygen needs to know where to write the python file postUnpack = '' - export PYGEN_OUTFILE="$(realpath ./python)" + mkdir -p ./python + export PYGEN_OUTFILE="$(realpath ./python/ops.py)" ''; + + CARGO_BUILD_TARGET = "${systems.${system}}"; + CARGO_BUILD_RUSTFLAGS = "-C target-feature=+crt-static"; }; # Build *just* the cargo dependencies (of the entire workspace), @@ -62,7 +86,7 @@ individualCrateArgs = commonArgs // { - inherit cargoArtifacts; + # inherit cargoArtifacts; inherit (craneLib.crateNameFromCargoToml {inherit src;}) version; # disable tests since we'll run them all via cargo-nextest doCheck = false; From edaf78a0731620be7dd774f648fb283b7de21147 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Wed, 24 Jul 2024 15:11:33 -0500 Subject: [PATCH 33/37] update frontend code to work with main-branch libprobe --- probe_src/probe_frontend/cli/src/dump.rs | 8 ++++---- probe_src/probe_frontend/cli/src/main.rs | 6 ++++-- probe_src/probe_frontend/lib/build.rs | 2 ++ probe_src/probe_frontend/lib/src/ops.rs | 19 ++++++++++++------- .../probe_frontend/lib/src/transcribe.rs | 8 ++++---- probe_src/probe_frontend/macros/src/lib.rs | 9 +++++---- probe_src/probe_frontend/macros/src/pygen.rs | 8 ++++---- probe_src/probe_frontend/python/ops.py | 10 ++++++---- 8 files changed, 41 insertions(+), 29 deletions(-) diff --git a/probe_src/probe_frontend/cli/src/dump.rs b/probe_src/probe_frontend/cli/src/dump.rs index b387d0f7..c7d3aaf8 100644 --- a/probe_src/probe_frontend/cli/src/dump.rs +++ b/probe_src/probe_frontend/cli/src/dump.rs @@ -197,8 +197,8 @@ impl Dump for ops::Path { impl Dump for ops::CloneOp { fn dump(&self) -> String { format!( - "[ child_process_id={}, child_thread_id={}, errno={} ]", - self.child_process_id, self.child_thread_id, self.ferrno, + "[ task_type={}, task_id={}, errno={} ]", + self.task_type, self.task_id, self.ferrno, ) } } @@ -248,8 +248,8 @@ impl Dump for ops::InitThreadOp { impl Dump for ops::WaitOp { fn dump(&self) -> String { format!( - "[ pid={}, options={}, status={}, ret={}, errno={} ]", - self.pid, self.options, self.status, self.ret, self.ferrno, + "[ task_type={}, task_id={}, options={}, status={}, errno={} ]", + self.task_type, self.task_id, self.options, self.status, self.ferrno, ) } } diff --git a/probe_src/probe_frontend/cli/src/main.rs b/probe_src/probe_frontend/cli/src/main.rs index ec3af790..1c0b7a5c 100644 --- a/probe_src/probe_frontend/cli/src/main.rs +++ b/probe_src/probe_frontend/cli/src/main.rs @@ -23,6 +23,7 @@ fn main() -> Result<()> { let matches = command!() .about("Generate or manipulate Provenance for Replay OBservation Engine (PROBE) logs.") + .propagate_version(true) .subcommands([ Command::new("record") .args([ @@ -72,7 +73,7 @@ fn main() -> Result<()> { .default_value("probe_log") .value_parser(value_parser!(OsString)), ]) - .about("Write the data from probe log data in a human-readable manne"), + .about("Write the data from probe log data in a human-readable manner"), Command::new("__gdb-exec-shim").hide(true).arg( arg!( ... "Command to run") .required(true) @@ -141,6 +142,7 @@ fn main() -> Result<()> { Err(e).wrap_err("Shim failed to exec") } - _ => Err(eyre!("unexpected subcommand")), + None => Err(eyre!("Subcommand expected, try --help for more info")), + _ => Err(eyre!("Unknown subcommand")), } } diff --git a/probe_src/probe_frontend/lib/build.rs b/probe_src/probe_frontend/lib/build.rs index eef37c9b..a6a3e228 100644 --- a/probe_src/probe_frontend/lib/build.rs +++ b/probe_src/probe_frontend/lib/build.rs @@ -110,6 +110,8 @@ fn main() { #include #include #include + #include + #include // HACK: defining this manually instead of using is // a huge hack, but it greatly reduces the generated code complexity diff --git a/probe_src/probe_frontend/lib/src/ops.rs b/probe_src/probe_frontend/lib/src/ops.rs index 9a37b9b5..dca3c431 100644 --- a/probe_src/probe_frontend/lib/src/ops.rs +++ b/probe_src/probe_frontend/lib/src/ops.rs @@ -287,6 +287,8 @@ impl FfiFrom for OpInternal { pub struct Op { pub data: OpInternal, pub time: Timespec, + pub pthread_id: pthread_t, + pub iso_c_thread_id: thrd_t, #[serde(serialize_with = "Op::serialize_type")] #[serde(skip_deserializing)] @@ -307,6 +309,8 @@ impl FfiFrom for Op { Ok(Self { data: value.ffi_into(ctx)?, time: value.time.ffi_into(ctx)?, + pthread_id: value.pthread_id, + iso_c_thread_id: value.iso_c_thread_id, _type: (), }) @@ -340,11 +344,12 @@ mod tests { // since we're defining a custom version of the rusage struct (indirectly through rust-bindgen) // we should at least check that they're the same size. - #[test] - fn rusage_size() { - assert_eq!( - std::mem::size_of::(), - std::mem::size_of::() - ); - } + // FIXME: muslc has a different sized rusage struct so libc::rusage doesn't match + // #[test] + // fn rusage_size() { + // assert_eq!( + // std::mem::size_of::(), + // std::mem::size_of::() + // ); + // } } diff --git a/probe_src/probe_frontend/lib/src/transcribe.rs b/probe_src/probe_frontend/lib/src/transcribe.rs index 30c63a7e..a87d6c89 100644 --- a/probe_src/probe_frontend/lib/src/transcribe.rs +++ b/probe_src/probe_frontend/lib/src/transcribe.rs @@ -315,12 +315,12 @@ impl<'a> OpsArena<'a> { .wrap_err("Failed to create ArenaHeader for OpsArena")?; if ((header.used - size_of::()) % size_of::()) != 0 { - return Err(ArenaError::Misaligned.into()); + return Err(ArenaError::Misaligned { size: header.used }.into()); } let count = (header.used - size_of::()) / size_of::(); - log::debug!("[unsafe] converting Vec to &[RawOp] of size {}", count); + log::debug!("[unsafe] converting Vec to &[C_Op] of size {}", count); let ops = unsafe { let ptr = bytes.as_ptr().add(size_of::()) as *const C_Op; std::slice::from_raw_parts(ptr, count) @@ -422,8 +422,8 @@ pub enum ArenaError { /// Returned if an [`OpsArena`]'s size isn't isn't `HEADER_SIZE + (N * OP_SIZE)` when `N` is /// some integer. - #[error("Arena alignment error: used arena size minus header isn't a multiple of op size")] - Misaligned, + #[error("Arena alignment error: arena size ({size}) minus header isn't a multiple of op size")] + Misaligned { size: usize }, /// Returned if the instantiation in a [`ArenaHeader`] doesn't match the indicated one #[error("Header contained Instantiation ID {header}, but {passed} was indicated")] diff --git a/probe_src/probe_frontend/macros/src/lib.rs b/probe_src/probe_frontend/macros/src/lib.rs index 535d6288..a7b57942 100644 --- a/probe_src/probe_frontend/macros/src/lib.rs +++ b/probe_src/probe_frontend/macros/src/lib.rs @@ -62,10 +62,11 @@ pub fn make_rust_op(input: TokenStream) -> TokenStream { .into())) } }; - // filter out any identifier starting with __ since every example i've seen in - // glibc of "__ident" is padding or reserved space. - if ident.to_string().starts_with("__") { - return None; + let ident_str = ident.to_string(); + for prefix in ["__spare", "__reserved"] { + if ident_str.starts_with(prefix) { + return None; + } } let pair = convert_bindgen_type(&field.ty).map(|ty| (ident, ty)); diff --git a/probe_src/probe_frontend/macros/src/pygen.rs b/probe_src/probe_frontend/macros/src/pygen.rs index ee3bd819..07a0fb0c 100644 --- a/probe_src/probe_frontend/macros/src/pygen.rs +++ b/probe_src/probe_frontend/macros/src/pygen.rs @@ -151,12 +151,12 @@ fn convert_to_pytype(ty: &syn::Type) -> MacroResult { Ok(match name.as_str() { // that's a lot of ways to say "int", python ints are bigints so we don't have to // care about size - "__dev_t" | "__gid_t" | "__ino_t" | "__mode_t" | "__s32" | "__s64" + "TaskType" | "__dev_t" | "__gid_t" | "__ino_t" | "__mode_t" | "__s32" | "__s64" | "__suseconds_t" | "__syscall_slong_t" | "__syseconds_t" | "__time_t" | "__u16" | "__u32" | "__u64" | "__uid_t" | "c_int" | "c_long" | "c_uint" - | "dev_t" | "gid_t" | "i128" | "i16" | "i32" | "i64" | "i8" | "ino_t" | "isize" - | "mode_t" | "pid_t" | "u128" | "u16" | "u32" | "u64" | "u8" | "uid_t" - | "usize" => "int".to_owned(), + | "c_ulong" | "dev_t" | "gid_t" | "i128" | "i16" | "i32" | "i64" | "i8" + | "ino_t" | "isize" | "mode_t" | "pid_t" | "pthread_t" | "thrd_t" | "u128" + | "u16" | "u32" | "u64" | "u8" | "uid_t" | "usize" => "int".to_owned(), // float, python uses doubles for everything "f32" | "f64" => "float".to_owned(), diff --git a/probe_src/probe_frontend/python/ops.py b/probe_src/probe_frontend/python/ops.py index 33a2bd4c..e48bb8f2 100644 --- a/probe_src/probe_frontend/python/ops.py +++ b/probe_src/probe_frontend/python/ops.py @@ -135,8 +135,8 @@ class ExecOp: class CloneOp: flags: int run_pthread_atfork_handlers: bool - child_process_id: int - child_thread_id: int + task_type: int + task_id: int ferrno: int @@ -172,10 +172,10 @@ class ReaddirOp: @dataclass(init=True, frozen=True) class WaitOp: - pid: int + task_type: int + task_id: int options: int status: int - ret: int ferrno: int @@ -206,6 +206,8 @@ class UpdateMetadataOp: class Op: data: OpInternal time: Timespec + pthread_id: int + iso_c_thread_id: int @dataclass(init=True, frozen=True) From 311261ddce0f8c1e961871d29cbcc3e002af6ebf Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Wed, 24 Jul 2024 15:18:43 -0500 Subject: [PATCH 34/37] fixed ruff errors in generated python code --- probe_src/probe_frontend/macros/src/pygen.rs | 2 +- probe_src/probe_frontend/python/ops.py | 2 +- probe_src/probe_frontend/python/probe.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/probe_src/probe_frontend/macros/src/pygen.rs b/probe_src/probe_frontend/macros/src/pygen.rs index 07a0fb0c..6008b8f3 100644 --- a/probe_src/probe_frontend/macros/src/pygen.rs +++ b/probe_src/probe_frontend/macros/src/pygen.rs @@ -532,7 +532,7 @@ impl Display for DataclassProp { writeln!( f, "{indent_str}@property\n\ - {indent_str}def {name}() -> {ret}:", + {indent_str}def {name}(self) -> {ret}:", )?; for line in &self.body { diff --git a/probe_src/probe_frontend/python/ops.py b/probe_src/probe_frontend/python/ops.py index e48bb8f2..03bed957 100644 --- a/probe_src/probe_frontend/python/ops.py +++ b/probe_src/probe_frontend/python/ops.py @@ -83,7 +83,7 @@ class Path: dirfd_valid: bool @property - def dirfd() -> int: + def dirfd(self) -> int: return self.dirfd_minus_at_fdcwd + AT_FDCWD diff --git a/probe_src/probe_frontend/python/probe.py b/probe_src/probe_frontend/python/probe.py index f96a925a..a4bd52cc 100644 --- a/probe_src/probe_frontend/python/probe.py +++ b/probe_src/probe_frontend/python/probe.py @@ -45,9 +45,9 @@ def load_log(path: str) -> ProvLog: tid: int = int(parts[2]) # ensure necessary dict objects have been created - if not pid in op_map: + if pid not in op_map: op_map[pid] = {} - if not epoch in op_map[pid]: + if epoch not in op_map[pid]: op_map[pid][epoch] = {} # extract file contents as byte buffer From 55c9b522de46eb92b5334bb42d75b97c8896f483 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Wed, 24 Jul 2024 16:26:52 -0500 Subject: [PATCH 35/37] added LICENSE --- probe_src/probe_frontend/Cargo.toml | 1 + probe_src/probe_frontend/LICENSE | 21 +++++++++++++++++++++ probe_src/probe_frontend/cli/Cargo.toml | 1 + probe_src/probe_frontend/deny.toml | 14 +++++++------- probe_src/probe_frontend/lib/Cargo.toml | 1 + probe_src/probe_frontend/macros/Cargo.toml | 1 + 6 files changed, 32 insertions(+), 7 deletions(-) create mode 100644 probe_src/probe_frontend/LICENSE diff --git a/probe_src/probe_frontend/Cargo.toml b/probe_src/probe_frontend/Cargo.toml index 5e5d8417..6d58f229 100644 --- a/probe_src/probe_frontend/Cargo.toml +++ b/probe_src/probe_frontend/Cargo.toml @@ -8,6 +8,7 @@ members = [ [workspace.package] version = "0.2.0" +license = "MIT" authors = ["Jenna Fligor "] publish = false edition = "2021" diff --git a/probe_src/probe_frontend/LICENSE b/probe_src/probe_frontend/LICENSE new file mode 100644 index 00000000..00f314c1 --- /dev/null +++ b/probe_src/probe_frontend/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Jenna Fligor + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/probe_src/probe_frontend/cli/Cargo.toml b/probe_src/probe_frontend/cli/Cargo.toml index 1bf470ae..4c1ebdc8 100644 --- a/probe_src/probe_frontend/cli/Cargo.toml +++ b/probe_src/probe_frontend/cli/Cargo.toml @@ -1,6 +1,7 @@ [package] name = "probe_cli" version.workspace = true +license.workspace = true authors.workspace = true publish.workspace = true edition.workspace = true diff --git a/probe_src/probe_frontend/deny.toml b/probe_src/probe_frontend/deny.toml index b074f444..539bf8b0 100644 --- a/probe_src/probe_frontend/deny.toml +++ b/probe_src/probe_frontend/deny.toml @@ -103,13 +103,13 @@ exceptions = [ # published to private registries. # To see how to mark a crate as unpublished (to the official registry), # visit https://doc.rust-lang.org/cargo/reference/manifest.html#the-publish-field. -ignore = true +#ignore = false # One or more private registries that you might publish crates to, if a crate # is only published to private registries, and ignore is true, the crate will # not have its license(s) checked -registries = [ - #"https://sekretz.com/registry -] +#registries = [ +# #"https://sekretz.com/registry +#] # This section is considered when running `cargo deny check bans`. # More documentation about the 'bans' section can be found here: @@ -202,8 +202,8 @@ allow-git = [] [sources.allow-org] # 1 or more github.com organizations to allow git sources for -github = [""] +github = [] # 1 or more gitlab.com organizations to allow git sources for -gitlab = [""] +gitlab = [] # 1 or more bitbucket.org organizations to allow git sources for -bitbucket = [""] +bitbucket = [] diff --git a/probe_src/probe_frontend/lib/Cargo.toml b/probe_src/probe_frontend/lib/Cargo.toml index d683b29e..90b871e2 100644 --- a/probe_src/probe_frontend/lib/Cargo.toml +++ b/probe_src/probe_frontend/lib/Cargo.toml @@ -1,6 +1,7 @@ [package] name = "probe_frontend" version.workspace = true +license.workspace = true authors.workspace = true publish.workspace = true edition.workspace = true diff --git a/probe_src/probe_frontend/macros/Cargo.toml b/probe_src/probe_frontend/macros/Cargo.toml index b85d3753..4fad29d4 100644 --- a/probe_src/probe_frontend/macros/Cargo.toml +++ b/probe_src/probe_frontend/macros/Cargo.toml @@ -1,6 +1,7 @@ [package] name = "probe_macros" version.workspace = true +license.workspace = true authors.workspace = true publish.workspace = true edition.workspace = true From d9b79a4fcec9f30b99ff7340cf0929fc8041e0a6 Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Wed, 24 Jul 2024 16:27:19 -0500 Subject: [PATCH 36/37] added probe-py derivation to output generated python module --- probe_src/probe_frontend/flake.nix | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/probe_src/probe_frontend/flake.nix b/probe_src/probe_frontend/flake.nix index d58fb262..34d91331 100644 --- a/probe_src/probe_frontend/flake.nix +++ b/probe_src/probe_frontend/flake.nix @@ -55,6 +55,7 @@ }); src = ./.; + workspace = (builtins.fromTOML (builtins.readFile ./Cargo.toml)).workspace; # Common arguments can be set here to avoid repeating them later commonArgs = { @@ -68,7 +69,10 @@ ]; # pygen needs to know where to write the python file - postUnpack = '' + preConfigurePhases = [ + "pygenConfigPhase" + ]; + pygenConfigPhase = '' mkdir -p ./python export PYGEN_OUTFILE="$(realpath ./python/ops.py)" ''; @@ -100,9 +104,25 @@ // { pname = "probe-frontend"; cargoExtraArgs = "-p probe_frontend"; + }); + probe-py = craneLib.buildPackage (individualCrateArgs + // { + pname = "probe-py"; + cargoExtraArgs = "-p probe_frontend"; + installPhase = '' - mkdir -p $out/python - cp -r ./python $out/ + mkdir -p $out/probe_py/generated/ + cp -r ./python/*.py $out/probe_py/generated/ + touch $out/probe_py/generated/__init__.py + + cp ./LICENSE $out/LICENSE + cat > $out/pyproject.toml << EOF + [project] + name = "probe_py" + version = "${workspace.package.version}" + license = {file = "LICENSE"} + classifiers = [ "License :: OSI Approved :: MIT License" ] + EOF ''; }); probe-cli = craneLib.buildPackage (individualCrateArgs @@ -118,7 +138,7 @@ in { checks = { # Build the crates as part of `nix flake check` for convenience - inherit probe-frontend probe-cli probe-macros; + inherit probe-frontend probe-py probe-cli probe-macros; # Run clippy (and deny all warnings) on the workspace source, # again, reusing the dependency artifacts from above. @@ -169,7 +189,7 @@ }; packages = { - inherit probe-cli probe-frontend probe-macros; + inherit probe-cli probe-py probe-frontend probe-macros; }; devShells.default = craneLib.devShell { From 977dc157fc323f1de33f83dc15f2763eb2c77aed Mon Sep 17 00:00:00 2001 From: Jenna Fligor Date: Wed, 24 Jul 2024 23:43:46 -0500 Subject: [PATCH 37/37] revised probe-py derivation --- probe_src/probe_frontend/Cargo.toml | 6 ++- probe_src/probe_frontend/LICENSE | 2 +- probe_src/probe_frontend/flake.nix | 49 +++++++++++-------- probe_src/probe_frontend/lib/src/ops.rs | 2 +- .../python/probe_py/generated/__init__.py | 7 +++ .../python/{ => probe_py/generated}/ops.py | 0 .../python/{ => probe_py/generated}/probe.py | 0 .../probe_frontend/python/pyproject.toml | 15 ++++++ 8 files changed, 57 insertions(+), 24 deletions(-) create mode 100644 probe_src/probe_frontend/python/probe_py/generated/__init__.py rename probe_src/probe_frontend/python/{ => probe_py/generated}/ops.py (100%) rename probe_src/probe_frontend/python/{ => probe_py/generated}/probe.py (100%) create mode 100644 probe_src/probe_frontend/python/pyproject.toml diff --git a/probe_src/probe_frontend/Cargo.toml b/probe_src/probe_frontend/Cargo.toml index 6d58f229..5b25b713 100644 --- a/probe_src/probe_frontend/Cargo.toml +++ b/probe_src/probe_frontend/Cargo.toml @@ -9,7 +9,11 @@ members = [ [workspace.package] version = "0.2.0" license = "MIT" -authors = ["Jenna Fligor "] +# authors *MUST* be defined in the form "name " for parsing reasons +authors = [ + "Jenna Fligor ", + "Samuel Grayson " +] publish = false edition = "2021" diff --git a/probe_src/probe_frontend/LICENSE b/probe_src/probe_frontend/LICENSE index 00f314c1..404acc08 100644 --- a/probe_src/probe_frontend/LICENSE +++ b/probe_src/probe_frontend/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2024 Jenna Fligor +Copyright (c) 2024 Jenna Fligor and Samuel Grayson Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/probe_src/probe_frontend/flake.nix b/probe_src/probe_frontend/flake.nix index 34d91331..298dd67d 100644 --- a/probe_src/probe_frontend/flake.nix +++ b/probe_src/probe_frontend/flake.nix @@ -55,7 +55,6 @@ }); src = ./.; - workspace = (builtins.fromTOML (builtins.readFile ./Cargo.toml)).workspace; # Common arguments can be set here to avoid repeating them later commonArgs = { @@ -74,7 +73,7 @@ ]; pygenConfigPhase = '' mkdir -p ./python - export PYGEN_OUTFILE="$(realpath ./python/ops.py)" + export PYGEN_OUTFILE="$(realpath ./python/probe_py/generated/ops.py)" ''; CARGO_BUILD_TARGET = "${systems.${system}}"; @@ -104,27 +103,35 @@ // { pname = "probe-frontend"; cargoExtraArgs = "-p probe_frontend"; - }); - probe-py = craneLib.buildPackage (individualCrateArgs - // { - pname = "probe-py"; - cargoExtraArgs = "-p probe_frontend"; - installPhase = '' - mkdir -p $out/probe_py/generated/ - cp -r ./python/*.py $out/probe_py/generated/ - touch $out/probe_py/generated/__init__.py - + cp -r ./python/ $out cp ./LICENSE $out/LICENSE - cat > $out/pyproject.toml << EOF - [project] - name = "probe_py" - version = "${workspace.package.version}" - license = {file = "LICENSE"} - classifiers = [ "License :: OSI Approved :: MIT License" ] - EOF ''; }); + probe-py = let + workspace = (builtins.fromTOML (builtins.readFile ./Cargo.toml)).workspace; + in + pkgs.substituteAllFiles rec { + name = "probe-py-${version}"; + src = probe-frontend; + files = [ + "./pyproject.toml" + "./LICENSE" + "./probe_py/generated/__init__.py" + "./probe_py/generated/ops.py" + "./probe_py/generated/probe.py" + ]; + + authors = builtins.concatStringsSep "" (builtins.map (match: let + name = builtins.elemAt match 0; + email = builtins.elemAt match 1; + in "\n {name = \"${name}\", email = \"${email}\"},") ( + builtins.map + (author-str: builtins.match "(.+) <(.+)>" author-str) + (workspace.package.authors) + )); + version = workspace.package.version; + }; probe-cli = craneLib.buildPackage (individualCrateArgs // { pname = "probe-cli"; @@ -183,7 +190,7 @@ }); probe-pygen-sanity = pkgs.runCommand "pygen-sanity-check" {} '' - cp ${probe-frontend}/python/ops.py $out + cp ${probe-py}/probe_py/generated/ops.py $out ${pkgs.python312}/bin/python $out ''; }; @@ -198,7 +205,7 @@ shellHook = '' export __PROBE_LIB="$(realpath ../libprobe/build)" - export PYGEN_OUTFILE="$(realpath ./python/ops.py)" + export PYGEN_OUTFILE="$(realpath ./python/probe_py/generated/ops.py)" ''; packages = [ diff --git a/probe_src/probe_frontend/lib/src/ops.rs b/probe_src/probe_frontend/lib/src/ops.rs index dca3c431..a49c0c2c 100644 --- a/probe_src/probe_frontend/lib/src/ops.rs +++ b/probe_src/probe_frontend/lib/src/ops.rs @@ -333,7 +333,7 @@ probe_macros::pygen_write_to_env!("PYGEN_OUTFILE"); #[cfg(test)] mod tests { - use super::*; + // use super::*; // we define this constant in the generated python code, so we should make sure we get it // right. diff --git a/probe_src/probe_frontend/python/probe_py/generated/__init__.py b/probe_src/probe_frontend/python/probe_py/generated/__init__.py new file mode 100644 index 00000000..9f8e34d3 --- /dev/null +++ b/probe_src/probe_frontend/python/probe_py/generated/__init__.py @@ -0,0 +1,7 @@ +""" +Generated code for reading with PROBE logs. + +See https://github.com/charmoniumQ/PROBE +""" + +__version__ = "@version@" diff --git a/probe_src/probe_frontend/python/ops.py b/probe_src/probe_frontend/python/probe_py/generated/ops.py similarity index 100% rename from probe_src/probe_frontend/python/ops.py rename to probe_src/probe_frontend/python/probe_py/generated/ops.py diff --git a/probe_src/probe_frontend/python/probe.py b/probe_src/probe_frontend/python/probe_py/generated/probe.py similarity index 100% rename from probe_src/probe_frontend/python/probe.py rename to probe_src/probe_frontend/python/probe_py/generated/probe.py diff --git a/probe_src/probe_frontend/python/pyproject.toml b/probe_src/probe_frontend/python/pyproject.toml new file mode 100644 index 00000000..2be4c8d5 --- /dev/null +++ b/probe_src/probe_frontend/python/pyproject.toml @@ -0,0 +1,15 @@ +[build-system] +requires = ["flit_core >=3.2,<4"] +build-backend = "flit_core.buildapi" + +[project] +name = "probe_py.generated" +authors = [ + # authors generated from Cargo.toml@authors@: +] +license = {file = "LICENSE"} +classifiers = ["License :: OSI Approved :: MIT License"] +dynamic = ["version", "description"] + +[project.urls] +Home = "https://github.com/charmoniumQ/PROBE"