Skip to main content

tsffs/
lib.rs

1// Copyright (C) 2024 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4//! TFFS Module for SIMICS
5//!
6//! # Overview
7//!
8//! This crate provides a client and module loadable by SIMICS to enable fuzzing on the SIMICS
9//! platform. The client is intended to be used by the `simics-fuzz` crate, but it can be used
10//! manually to enable additional use cases.
11//!
12//! # Capabilities
13//!
14//! The Module can:
15//!
16//! - Trace branch hits during an execution of a target on an x86_64 processor. These branches
17//!   are traced into shared memory in the format understood by the AFL family of tools.
18//! - Catch exception/fault events registered in an initial configuration or dynamically using
19//!   a SIMICS Python script
20//! - Catch timeout events registered in an initial configuration or dynamically using a SIMICS
21//!   Python script
22//! - Manage the state of a target under test by taking and restoring a snapshot of its state for
23//!   deterministic snapshot fuzzing
24
25#![deny(clippy::all)]
26// NOTE: We have to do this a lot, and it sucks to have all these functions be unsafe
27#![allow(clippy::not_unsafe_ptr_arg_deref)]
28#![deny(clippy::unwrap_used)]
29#![warn(missing_docs)]
30
31use crate::interfaces::{config::config, fuzz::fuzz};
32use crate::state::{SnapshotRestorePolicy, SolutionKind, StopReason};
33#[cfg(simics_version = "6")]
34use crate::util::Utils;
35use anyhow::{anyhow, Result};
36use arch::{Architecture, ArchitectureHint, ArchitectureOperations};
37use fuzzer::{messages::FuzzerMessage, ShutdownMessage, Testcase};
38use indoc::indoc;
39use lcov2::Records;
40use libafl::{inputs::HasBytesVec, prelude::ExitKind};
41use libafl_bolts::prelude::OwnedMutSlice;
42use libafl_targets::AFLppCmpLogMap;
43use log::LogMessage;
44use magic::MagicNumber;
45use num_traits::FromPrimitive as _;
46use os::windows::WindowsOsInfo;
47use serde::{Deserialize, Serialize};
48use serde_json::to_writer;
49use simics::continue_simulation;
50use simics::{
51    break_simulation, class, debug, error, get_class, get_interface, get_processor_number, info,
52    lookup_file, object_clock, run_alone, run_command, run_python, simics_init, sys::save_flags_t,
53    trace, version_base, warn, write_configuration_to_file, AsConfObject, BreakpointId,
54    ClassCreate, ClassObjectsFinalize, ConfObject, CoreBreakpointMemopHap,
55    CoreControlRegisterWriteHap, CoreExceptionHap, CoreMagicInstructionHap,
56    CoreSimulationStoppedHap, CpuInstrumentationSubscribeInterface, Event, EventClassFlag,
57    FromConfObject, HapHandle, Interface,
58};
59#[cfg(simics_version = "6")]
60use simics::free_attribute;
61#[cfg(simics_version = "6")]
62use simics::{
63    discard_future, restore_micro_checkpoint, save_micro_checkpoint, MicroCheckpointFlags,
64};
65#[cfg(simics_version = "7")]
66// NOTE: save_snapshot used because it is a stable alias for both save_snapshot and take_snapshot
67// which is necessary because this module is compatible with base versions which cross the
68// deprecation boundary
69use simics::{restore_snapshot, save_snapshot};
70use source_cov::SourceCache;
71use std::{
72    alloc::{alloc_zeroed, Layout},
73    cell::OnceCell,
74    collections::{hash_map::Entry, BTreeSet, HashMap, HashSet},
75    fs::{create_dir_all, remove_dir_all, File},
76    hash::{DefaultHasher, Hash, Hasher},
77    path::PathBuf,
78    ptr::null_mut,
79    str::FromStr,
80    sync::mpsc::{Receiver, Sender},
81    thread::JoinHandle,
82    time::SystemTime,
83};
84use tracer::{
85    tsffs::{on_instruction_after, on_instruction_before},
86    ExecutionTrace,
87};
88use typed_builder::TypedBuilder;
89use versions::{Requirement, Versioning};
90
91pub(crate) mod arch;
92pub(crate) mod fuzzer;
93pub(crate) mod haps;
94pub(crate) mod interfaces;
95pub(crate) mod log;
96pub(crate) mod magic;
97pub(crate) mod os;
98pub(crate) mod source_cov;
99pub(crate) mod state;
100pub(crate) mod tracer;
101pub(crate) mod traits;
102pub(crate) mod util;
103
104/// The class name used for all operations interfacing with SIMICS
105pub const CLASS_NAME: &str = env!("CARGO_PKG_NAME");
106
107#[derive(Serialize, Deserialize, Clone, Debug)]
108/// An address that was formerly virtual or formerly physical. The actual
109/// address *must* be physical.
110pub(crate) enum StartPhysicalAddress {
111    /// The address was formerly virtual
112    WasVirtual(u64),
113    /// The address was formerly physical
114    WasPhysical(u64),
115}
116
117impl StartPhysicalAddress {
118    /// Get the physical address
119    pub fn physical_address(&self) -> u64 {
120        match self {
121            StartPhysicalAddress::WasVirtual(addr) => *addr,
122            StartPhysicalAddress::WasPhysical(addr) => *addr,
123        }
124    }
125}
126
127#[derive(Serialize, Deserialize, Clone, Debug)]
128pub(crate) enum ManualStartAddress {
129    Virtual(u64),
130    Physical(u64),
131}
132
133impl ManualStartAddress {
134    pub fn address(&self) -> u64 {
135        match self {
136            ManualStartAddress::Virtual(addr) => *addr,
137            ManualStartAddress::Physical(addr) => *addr,
138        }
139    }
140}
141
142#[derive(TypedBuilder, Serialize, Deserialize, Clone, Debug)]
143pub(crate) struct StartInfo {
144    /// The physical address of the buffer. Must be physical, if the input address was
145    /// virtual, it should be pre-translated
146    pub address: StartPhysicalAddress,
147    /// The initial contents of the buffer
148    pub contents: Vec<u8>,
149    /// The initial size of the buffer. This will either be only an address, in which
150    /// case the initial size will be `*size_ptr` and the actual size of each testcase
151    /// will be written back to `*size_ptr`, a `max_size` in which case the size will
152    /// not be written, or a `size_ptr` and `max_size` in which case the size will be
153    /// written back to `*size_ptr` and the maximum size will be `max_size`.
154    pub size: StartSize,
155}
156
157#[derive(Serialize, Deserialize, Clone, Debug)]
158/// Exactly the same as `StartInfo` except with the semantic difference that the address
159/// may not always be stored as physical, the user may provide a virtual address for both
160/// the address and the size pointer (if there is one).
161pub(crate) struct ManualStartInfo {
162    pub address: ManualStartAddress,
163    pub size: ManualStartSize,
164}
165
166#[derive(Serialize, Deserialize, Clone, Debug)]
167pub(crate) enum StartSize {
168    SizePtr {
169        address: StartPhysicalAddress,
170        maximum_size: usize,
171    },
172    MaxSize(usize),
173    SizePtrAndMaxSize {
174        address: StartPhysicalAddress,
175        maximum_size: usize,
176    },
177}
178
179impl StartSize {
180    pub fn maximum_size(&self) -> usize {
181        match self {
182            StartSize::SizePtr { maximum_size, .. } => *maximum_size,
183            StartSize::MaxSize(maximum_size) => *maximum_size,
184            StartSize::SizePtrAndMaxSize { maximum_size, .. } => *maximum_size,
185        }
186    }
187
188    pub fn physical_address(&self) -> Option<StartPhysicalAddress> {
189        match self {
190            StartSize::SizePtr { address, .. } => Some(address.clone()),
191            StartSize::MaxSize(_) => None,
192            StartSize::SizePtrAndMaxSize { address, .. } => Some(address.clone()),
193        }
194    }
195}
196
197#[derive(Serialize, Deserialize, Clone, Debug)]
198pub(crate) enum ManualStartSize {
199    SizePtr {
200        address: ManualStartAddress,
201    },
202    MaxSize(usize),
203    SizePtrAndMaxSize {
204        address: ManualStartAddress,
205        maximum_size: usize,
206    },
207}
208
209#[class(name = "tsffs", skip_objects_finalize)]
210#[derive(AsConfObject, FromConfObject, Default)]
211/// The main module class for the TSFFS fuzzer, stores state and configuration information
212pub(crate) struct Tsffs {
213    #[class(attribute(optional, default = false))]
214    /// Whether all breakpoints are treated as solutions. When set to `True`, any breakpoint
215    /// which triggers a `Core_Breakpoint_Memop` HAP will be treated as a solution. This allows
216    /// setting memory breakpoints on specific memory locations to trigger a solution when the
217    /// memory is read, written, or executed. Not all breakpoints cause this HAP to occur.
218    ///
219    /// For example, to set an execution breakpoint on the address $addr:
220    ///
221    /// $addr = 0x100000
222    /// $bp = (bp.memory.break -x $addr)
223    /// @tsffs.all_breakpoints_are_solutions = True
224    ///
225    /// Tsffs will treat the breakpoint as a solution (along with all other
226    /// breakpoints), and the fuzzer will stop when the breakpoint is hit.
227    pub all_breakpoints_are_solutions: bool,
228    #[class(attribute(optional, default = false))]
229    /// Whether all exceptions are treated as solutions. When set to `True`, any CPU exception
230    /// or interrupt which triggers a `Core_Exception` HAP will be treated as a solution. This
231    /// can be useful when enabled in a callback after which any exception is considered a
232    /// solution and is typically not useful when enabled during the start-up process because
233    /// most processors will generate exceptions during start-up and during normal operation.
234    pub all_exceptions_are_solutions: bool,
235    #[class(attribute(optional))]
236    /// The set of exceptions which are treated as solutions. For example on x86_64, setting:
237    ///
238    /// @tsffs.exceptions = [14]
239    ///
240    /// would treat any page fault as a solution.
241    pub exceptions: BTreeSet<i64>,
242    #[class(attribute(optional))]
243    /// The set of breakpoints which are treated as solutions. For example, to set a solution
244    /// breakpoint on the address $addr (note the breakpoint set from the Simics command is
245    /// accessed through the simenv namespace):
246    ///
247    /// $addr = 0x100000
248    /// $bp = (bp.memory.break -x $addr)
249    /// @tsffs.breakpoints = [simenv.bp]
250    pub breakpoints: BTreeSet<BreakpointId>,
251    #[class(attribute(optional, default = 5.0))]
252    /// The timeout in seconds of virtual time for each iteration of the fuzzer. If the virtual
253    /// time timeout is exceeded for a single iteration, the iteration is stopped and the testcase
254    /// is saved as a solution.
255    pub timeout: f64,
256    #[class(attribute(optional, default = SnapshotRestorePolicy::Always))]
257    /// Snapshot restore policy for normal iteration boundaries.
258    ///
259    /// Accepted values:
260    /// - `1` restores on every normal iteration (default)
261    /// - `N > 1` restores every N iterations based on the global iteration count
262    /// - `0` disables restores after startup
263    ///
264    /// Solution iterations always restore the initial snapshot before resuming if one exists.
265    pub snapshot_restore_interval: SnapshotRestorePolicy,
266    #[class(attribute(optional, default = true))]
267    /// Whether the fuzzer should start on compiled-in harnesses. If set to `True`, the fuzzer
268    /// will start fuzzing when a harness macro is executed.
269    pub start_on_harness: bool,
270    #[class(attribute(optional, default = true))]
271    /// Whether the fuzzer should stop on compiled-in harnesses. If set to `True`, the fuzzer
272    /// will start fuzzing when a harness macro is executed.
273    pub stop_on_harness: bool,
274    #[class(attribute(optional, default = true))]
275    /// Whether TSFFS should automatically resume simulation after preparing a repro testcase.
276    ///
277    /// When set to `False`, TSFFS will prepare repro execution state (snapshot restore,
278    /// testcase write, timeout event, bookmark) but not call continue. This is useful when
279    /// an external debugger (for example a GDB stub) should control resume.
280    pub repro_auto_continue: bool,
281    #[class(attribute(optional, default = 0))]
282    /// The index number which is passed to the platform-specific magic instruction HAP
283    /// by a compiled-in harness to signal that the fuzzer should start the fuzzing loop.
284    ///
285    /// This option is useful when fuzzing a target which has multiple start harnesses compiled
286    /// into it, and the fuzzer should start on a specific harness.
287    ///
288    /// There can only be one magic start value, because only one fuzzing loop can be running
289    /// (and they cannot be nested). This only has an effect if `start_on_harness` is set.
290    pub magic_start_index: u64,
291    #[class(attribute(optional, default = vec![0]))]
292    /// The magic numbers which is passed to the platform-specific magic instruction HAP
293    /// by a compiled-in harness to signal that the fuzzer should stop execution of the current
294    /// iteration.
295    ///
296    /// This option is useful when fuzzing a target which has multiple stop harnesses compiled
297    /// into it, and the fuzzer should stop on a specific subset of stop harness macro calls.
298    ///
299    /// This only has an effect if `stop_on_harness` is set.
300    pub magic_stop_indices: Vec<u64>,
301    #[class(attribute(optional, default = vec![0]))]
302    /// The numbers which are passed to the platform-specific magic instruction HAP by a
303    /// compiled-in harness to signal that the fuzzer should stop execution of the
304    /// current iteration and save the testcase as a solution.
305    ///
306    /// This only has an effect if `stop_on_harness` is set.
307    pub magic_assert_indices: Vec<u64>,
308    #[class(attribute(optional))]
309    /// The limit on the number of fuzzing iterations to execute. If set to 0, the fuzzer will
310    /// run indefinitely. If set to a positive integer, the fuzzer will run until the limit is
311    /// reached.
312    pub iteration_limit: usize,
313    #[class(attribute(optional, default = 8))]
314    /// The size of the corpus to generate randomly. If `generate_random_corpus` is set to
315    /// `True`, the fuzzer will generate a random corpus of this size before starting the
316    /// fuzzing loop.
317    pub initial_random_corpus_size: usize,
318    #[class(attribute(optional, default = lookup_file("%simics%")?.join("corpus")))]
319    /// The directory to load the corpus from and save new corpus items to. This directory
320    /// may be a SIMICS relative path prefixed with "%simics%". It is an error to provide no
321    /// corpus directory when `set_generate_random_corpus(True)` has not been called prior to
322    /// fuzzer startup. It is also an error to provide an *empty* corpus directory without
323    /// calling `set_generate_random_corpus(True)`.  If not provided, "%simics%/corpus" will
324    /// be used by default.
325    pub corpus_directory: PathBuf,
326    #[class(attribute(optional, default = lookup_file("%simics%")?.join("solutions")))]
327    /// The directory to save solutions to. This directory may be a SIMICS relative path
328    /// prefixed with "%simics%". If not provided, "%simics%/solutions" will be used by
329    /// default.
330    pub solutions_directory: PathBuf,
331    #[class(attribute(optional, default = false))]
332    /// Whether to generate a random corpus before starting the fuzzing loop. If set to `True`,
333    /// the fuzzer will generate a random corpus of size `initial_random_corpus_size` before
334    /// starting the fuzzing loop. This should generally be used only for debugging and testing
335    /// purposes, and is not recommended for use in production. A real corpus representative of
336    /// both valid and invalid inputs should be used in production.
337    pub generate_random_corpus: bool,
338    #[class(attribute(optional, default = true))]
339    /// Whether comparison logging should be used during fuzzing to enable value-driven
340    /// mutations. If set to `True`, the fuzzer will use comparison logging to enable
341    /// value-driven mutations. This should always be enabled unless the target is known to
342    /// not benefit from value-driven mutations or run too slowly when solving for comparison
343    /// values.
344    pub cmplog: bool,
345    #[class(attribute(optional, default = true))]
346    /// Whether coverage reporting should be enabled. When enabled, new edge addresses will
347    /// be logged.
348    pub coverage_reporting: bool,
349    #[class(attribute(optional))]
350    /// A set of executable files to tokenize. Tokens will be extracted from these files and
351    /// used to drive token mutations of testcases.
352    pub token_executables: Vec<PathBuf>,
353    #[class(attribute(optional))]
354    /// A set of source files to tokenize. Tokens will be extracted from these files and used
355    /// to drive token mutations of testcases. C source files are expected, and strings and
356    /// tokens will be extracted from strings in the source files.
357    pub token_src_files: Vec<PathBuf>,
358    #[class(attribute(optional))]
359    /// Files in the format of:
360    ///
361    /// x = "hello"
362    /// y = "foo\x41bar"
363    ///
364    /// which will be used to drive token mutations of testcases.
365    pub token_files: Vec<PathBuf>,
366    #[class(attribute(optional))]
367    /// Sets of tokens to use to drive token mutations of testcases. Each token set is a
368    /// bytes which will be randomically inserted into testcases.
369    pub tokens: Vec<Vec<u8>>,
370    #[class(attribute(optional, default = lookup_file("%simics%")?.join("checkpoint.ckpt")))]
371    /// The path to the checkpoint saved prior to fuzzing when using snapshots
372    pub checkpoint_path: PathBuf,
373    #[class(attribute(optional, default = true))]
374    pub pre_snapshot_checkpoint: bool,
375    #[class(attribute(optional, default = lookup_file("%simics%")?.join("log.json")))]
376    /// The path to the log file which will be used to log the fuzzer's output statistics
377    pub log_path: PathBuf,
378    #[class(attribute(optional, default = true))]
379    pub log_to_file: bool,
380    #[class(attribute(optional, default = false))]
381    pub keep_all_corpus: bool,
382    #[class(attribute(optional, default = false))]
383    /// Whether to use the initial contents of the testcase buffer as an entry in the corpus
384    pub use_initial_as_corpus: bool,
385    #[class(attribute(optional, default = false))]
386    /// Whether to enable extra debug logging for LibAFL
387    pub debug_log_libafl: bool,
388    #[class(attribute(optional, default = true))]
389    /// Whether to send shut down on stops without reason. This means fuzzing cannot be resumed.
390    pub shutdown_on_stop_without_reason: bool,
391    #[class(attribute(optional, default = true))]
392    /// Whether to quit on iteration limit
393    pub quit_on_iteration_limit: bool,
394    #[class(attribute(optional, default = false))]
395    /// Whether to save execution traces of test cases which result in a timeout
396    pub save_timeout_execution_traces: bool,
397    #[class(attribute(optional, default = false))]
398    /// Whether to save execution traces of test cases which result in a solution
399    pub save_solution_execution_traces: bool,
400    #[class(attribute(optional, default = false))]
401    /// Whether to save execution traces of test cases which result in an interesting input
402    pub save_interesting_execution_traces: bool,
403    #[class(attribute(optional, default = false))]
404    /// Whether to save all execution traces. This will consume a very large amount of resources
405    /// and should only be used for debugging and testing purposes.
406    pub save_all_execution_traces: bool,
407    #[class(attribute(optional, default = lookup_file("%simics%")?.join("execution-traces")))]
408    /// The directory to save execution traces to, if any are set to be saved. This
409    /// directory may be a SIMICS relative path prefixed with "%simics%". If not
410    /// provided, "%simics%/execution-traces" will be used by default.
411    pub execution_trace_directory: PathBuf,
412    #[class(attribute(optional, default = false))]
413    /// Whether execution traces should include just PC (vs instruction text and bytes)
414    pub execution_trace_pc_only: bool,
415    #[class(attribute(optional, default = true))]
416    /// Whether a heartbeat message should be emitted every `heartbeat_interval` seconds
417    pub heartbeat: bool,
418    #[class(attribute(optional, default = 60))]
419    /// The interval in seconds between heartbeat messages
420    pub heartbeat_interval: u64,
421
422    #[class(attribute(optional, default = false))]
423    /// Whether symbolic coverage should be used during fuzzing
424    pub symbolic_coverage: bool,
425    #[class(attribute(optional, default = false))]
426    /// Whether windows is being run in the simulation
427    pub windows: bool,
428    #[class(attribute(optional, default = lookup_file("%simics%")?.join("debuginfo-cache")))]
429    /// Directory in which to download PDB and EXE files from symbol servers on Windows
430    pub debuginfo_download_directory: PathBuf,
431    #[class(attribute(optional))]
432    /// Mapping of file name (name and extension e.g. fuzzer-app.exe or target.sys)
433    /// to a tuple of (exe path, debuginfo path) where debuginfo is either a PDB or DWARF
434    /// file
435    pub debug_info: HashMap<String, Vec<PathBuf>>,
436    #[class(attribute(optional, default = lookup_file("%simics%")?.join("debuginfo-source")))]
437    /// Directory in which source files are located. Source files do not need to be arranged in
438    /// the same directory structure as the compiled source, and are looked up by hash.
439    pub debuginfo_source_directory: PathBuf,
440    #[class(attribute(optional, default = false))]
441    /// Whether symbolic coverage should be collected for system components by downloading
442    /// executable and debug info files where possible.
443    pub symbolic_coverage_system: bool,
444    #[class(attribute(optional, default = lookup_file("%simics%")?.join("symbolic-coverage")))]
445    /// Directory in which source files are located. Source files do not need to be arranged in
446    /// the same directory structure as the compiled source, and are looked up by hash.
447    pub symbolic_coverage_directory: PathBuf,
448
449    /// Handle for the core simulation stopped hap
450    stop_hap_handle: HapHandle,
451    /// Handle for the core breakpoint memop hap
452    breakpoint_memop_hap_handle: HapHandle,
453    /// Handle for exception HAP
454    exception_hap_handle: HapHandle,
455    /// The handle for the registered magic HAP, used to
456    /// listen for magic start and stop if `start_on_harness`
457    /// or `stop_on_harness` are set.
458    magic_hap_handle: HapHandle,
459    /// Handle for the core control register write hap
460    control_register_write_hap_handle: HapHandle,
461
462    /// A mapping of architecture hints from CPU index to architecture hint. This architecture
463    /// hint overrides the detected architecture of the CPU core. This is useful when the
464    /// architecture of the CPU core is not detected correctly, or when the architecture of the
465    /// CPU core is not known at the time the fuzzer is started. Specifically, x86 cores which
466    /// report their architecture as x86_64 can be overridden to x86.
467    pub architecture_hints: HashMap<i32, ArchitectureHint>,
468    // Threads and message channels
469    /// Fuzzer thread
470    fuzz_thread: OnceCell<JoinHandle<Result<()>>>,
471    /// Message sender to the fuzzer thread. TSFFS sends exit kinds to the fuzzer thread to
472    /// report whether testcases resulted in normal exit, timeout, or solutions.
473    fuzzer_tx: OnceCell<Sender<ExitKind>>,
474    /// Message receiver from the fuzzer thread. TSFFS receives new testcases and run configuration
475    /// from the fuzzer thread.
476    fuzzer_rx: OnceCell<Receiver<Testcase>>,
477    /// A message sender to inform the fuzzer thread that it should exit.
478    fuzzer_shutdown: OnceCell<Sender<ShutdownMessage>>,
479    /// Reciever from the fuzzer thread to receive messages from the fuzzer thread
480    /// including status messages and structured introspection data like new edge findings.
481    fuzzer_messages: OnceCell<Receiver<FuzzerMessage>>,
482
483    // Fuzzer coverage maps
484    /// The coverage map
485    coverage_map: OnceCell<OwnedMutSlice<'static, u8>>,
486    /// A pointer to the AFL++ comparison map
487    aflpp_cmp_map_ptr: OnceCell<*mut AFLppCmpLogMap>,
488    /// The owned AFL++ comparison map
489    aflpp_cmp_map: OnceCell<&'static mut AFLppCmpLogMap>,
490    /// The previous location for coverage for calculating the hash of edges.
491    coverage_prev_loc: u64,
492    /// The registered timeout event which is registered and used to detect timeouts in
493    /// virtual time
494    timeout_event: OnceCell<Event>,
495    /// The set of edges which have been seen at least once.
496    edges_seen: HashSet<u64>,
497    /// A map of the new edges to their AFL indices seen since the last time the fuzzer
498    /// provided an update. This is not cleared every execution.
499    edges_seen_since_last: HashMap<u64, u64>,
500    /// The set of PCs comprising the current execution trace. This is cleared every execution.
501    execution_trace: ExecutionTrace,
502    /// The current line coverage state comprising the total execution. This is not
503    /// cleared and is persistent across the full campaign until the fuzzer stops.
504    coverage: Records,
505
506    /// The name of the fuzz snapshot, if saved
507    snapshot_name: OnceCell<String>,
508    /// The index of the micro checkpoint saved for the fuzzer. Only present if not using
509    /// snapshots.
510    micro_checkpoint_index: OnceCell<i32>,
511
512    /// The reason the current stop occurred
513    stop_reason: Option<StopReason>,
514    /// The buffer and size information, if saved
515    start_info: OnceCell<StartInfo>,
516
517    // #[builder(default = SystemTime::now())]
518    /// The time the fuzzer was started at
519    start_time: OnceCell<SystemTime>,
520    // #[builder(default = SystemTime::now())]
521    /// The time the fuzzer was started at
522    last_heartbeat_time: Option<SystemTime>,
523
524    log: OnceCell<File>,
525
526    /// Whether cmplog is currently enabled
527    coverage_enabled: bool,
528    /// Whether cmplog is currently enabled
529    cmplog_enabled: bool,
530    /// The number of the processor which starts the fuzzing loop (via magic or manual methods)
531    start_processor_number: OnceCell<i32>,
532    /// Tracked processors. This always includes the start processor, and may include
533    /// additional processors that are manually added by the user
534    processors: HashMap<i32, Architecture>,
535    /// A testcase to use for repro
536    repro_testcase: Option<Vec<u8>>,
537    /// Whether a bookmark has been set for repro mode
538    repro_bookmark_set: bool,
539    /// Whether the fuzzer is currently stopped in repro mode
540    stopped_for_repro: bool,
541    /// The number of iterations which have been executed so far
542    iterations: usize,
543    /// Whether snapshots are used. Snapshots are used on Simics 7.0.0 and later.
544    use_snapshots: bool,
545    /// The number of timeouts so far
546    timeouts: usize,
547    /// The number of solutions so far
548    solutions: usize,
549
550    windows_os_info: WindowsOsInfo,
551    cr3_cache: HashMap<i32, i64>,
552    source_file_cache: SourceCache,
553}
554
555impl ClassObjectsFinalize for Tsffs {
556    unsafe fn objects_finalized(instance: *mut ConfObject) -> simics::Result<()> {
557        let tsffs: &'static mut Tsffs = instance.into();
558        tsffs.stop_hap_handle = CoreSimulationStoppedHap::add_callback(
559            // NOTE: Core_Simulation_Stopped is called with an object, exception and
560            // error string, but the exception is always
561            // SimException::SimExc_No_Exception and the error string is always
562            // null_mut.
563            move |_, _, _| {
564                // On stops, call the module's stop callback method, which will in turn call the
565                // stop callback methods on each of the module's components. The stop reason will
566                // be retrieved from the module, if one is set. It is an error for the module to
567                // stop itself without setting a reason
568                let tsffs: &'static mut Tsffs = instance.into();
569                tsffs
570                    .on_simulation_stopped()
571                    .expect("Error calling simulation stopped callback");
572            },
573        )?;
574        tsffs.breakpoint_memop_hap_handle =
575            CoreBreakpointMemopHap::add_callback(move |trigger_obj, breakpoint_number, memop| {
576                let tsffs: &'static mut Tsffs = instance.into();
577                tsffs
578                    .on_breakpoint_memop(trigger_obj, breakpoint_number, memop)
579                    .expect("Error calling breakpoint memop callback");
580            })?;
581        tsffs.exception_hap_handle =
582            CoreExceptionHap::add_callback(move |trigger_obj, exception_number| {
583                let tsffs: &'static mut Tsffs = instance.into();
584                tsffs
585                    .on_exception(trigger_obj, exception_number)
586                    .expect("Error calling breakpoint memop callback");
587            })?;
588        tsffs.magic_hap_handle =
589            CoreMagicInstructionHap::add_callback(move |trigger_obj, magic_number| {
590                let tsffs: &'static mut Tsffs = instance.into();
591
592                // NOTE: Some things (notably, the x86_64 UEFI app loader) do a
593                // legitimate CPUID (in the UEFI loader, with number 0xc aka
594                // eax=0xc4711) that registers as a magic number. We therefore permit
595                // non-valid magic numbers to be executed, but we do nothing for them.
596                if let Some(magic_number) = MagicNumber::from_i64(magic_number) {
597                    tsffs
598                        .on_magic_instruction(trigger_obj, magic_number)
599                        .expect("Failed to execute on_magic_instruction callback")
600                }
601            })?;
602        tsffs.control_register_write_hap_handle =
603            CoreControlRegisterWriteHap::add_callback(move |trigger_obj, register_nr, value| {
604                let tsffs: &'static mut Tsffs = instance.into();
605                tsffs
606                    .on_control_register_write(trigger_obj, register_nr, value)
607                    .expect("Failed to execute on_control_register_write callback")
608            })?;
609        tsffs
610            .coverage_map
611            .set(OwnedMutSlice::from(vec![0; Tsffs::COVERAGE_MAP_SIZE]))
612            .map_err(|_e| anyhow!("Value already set"))?;
613
614        tsffs
615            .aflpp_cmp_map_ptr
616            .set(unsafe { alloc_zeroed(Layout::new::<AFLppCmpLogMap>()) as *mut _ })
617            .map_err(|_e| anyhow!("Value already set"))?;
618
619        tsffs
620            .aflpp_cmp_map
621            .set(unsafe {
622                &mut **tsffs
623                    .aflpp_cmp_map_ptr
624                    .get()
625                    .expect("Value just set and known to be valid")
626            })
627            .map_err(|_e| anyhow!("Value already set"))?;
628
629        tsffs
630            .timeout_event
631            .set(
632                Event::builder()
633                    .name(Tsffs::TIMEOUT_EVENT_NAME)
634                    .cls(get_class(CLASS_NAME).expect("Error getting class"))
635                    .flags(EventClassFlag::Sim_EC_No_Flags)
636                    .build(),
637            )
638            .map_err(|_e| anyhow!("Value already set"))?;
639
640        // Check whether snapshots should be used. This is a runtime check because the module
641        // may be loaded in either Simics 6 or Simics 7.
642        let version = version_base()
643            .map_err(|e| anyhow!("Error getting version string: {}", e))
644            .and_then(|v| {
645                v.split(' ')
646                    .next_back()
647                    .ok_or_else(|| anyhow!("Error parsing version string '{}'", v))
648                    .map(|s| s.to_string())
649            })
650            .and_then(|v| {
651                Versioning::from_str(&v).map_err(|e| anyhow!("Error parsing version string: {e}"))
652            })?;
653
654        tsffs.use_snapshots = Requirement::from_str(">=7.0.0")
655            .map_err(|e| anyhow!("Error parsing requirement: {}", e))?
656            .matches(&version);
657
658        Ok(())
659    }
660}
661
662impl Tsffs {
663    /// The size of the coverage map in bytes
664    pub const COVERAGE_MAP_SIZE: usize = 128 * 1024;
665    /// The name of the registered timeout event
666    pub const TIMEOUT_EVENT_NAME: &'static str = "detector_timeout_event";
667    /// The name of the initial snapshot
668    pub const SNAPSHOT_NAME: &'static str = "tsffs-origin-snapshot";
669}
670
671impl Tsffs {
672    /// CLI command shown to the user to restore the origin state when stopped for repro.
673    /// Simics 6 uses reverse-execution bookmarks; Simics 7 uses snapshots.
674    pub fn repro_restore_command() -> String {
675        #[cfg(simics_version = "6")]
676        return "reverse-to start".to_string();
677        #[cfg(simics_version = "7")]
678        return format!("restore-snapshot {}", Self::SNAPSHOT_NAME);
679    }
680}
681
682/// Implementations for controlling the simulation
683impl Tsffs {
684    /// Stop the simulation with a reason
685    pub fn stop_simulation(&mut self, reason: StopReason) -> Result<()> {
686        let break_string = reason.to_string();
687
688        self.stop_reason = Some(reason);
689
690        break_simulation(break_string)?;
691
692        Ok(())
693    }
694}
695
696/// Implementations for common functionality
697impl Tsffs {
698    /// Add a monitored processor to the simulation and whether the processor is the
699    /// "start processor" which is the processor running when the fuzzing loop begins
700    pub fn add_processor(&mut self, cpu: *mut ConfObject, is_start: bool) -> Result<()> {
701        let cpu_number = get_processor_number(cpu)?;
702        debug!(
703            self.as_conf_object(),
704            "Adding {}processor {} to fuzzer",
705            if is_start { "start " } else { "" },
706            cpu_number
707        );
708
709        if let Entry::Vacant(e) = self.processors.entry(cpu_number) {
710            let architecture = if let Some(hint) = self.architecture_hints.get(&cpu_number) {
711                hint.architecture(cpu)?
712            } else {
713                Architecture::new(cpu)?
714            };
715            e.insert(architecture);
716            let mut cpu_interface: CpuInstrumentationSubscribeInterface = get_interface(cpu)?;
717            cpu_interface.register_instruction_after_cb(
718                null_mut(),
719                Some(on_instruction_after),
720                self as *mut Self as *mut _,
721            )?;
722            cpu_interface.register_instruction_before_cb(
723                null_mut(),
724                Some(on_instruction_before),
725                self as *mut Self as *mut _,
726            )?;
727        }
728
729        if is_start {
730            self.start_processor_number
731                .set(cpu_number)
732                .map_err(|_| anyhow!("Start processor number already set"))?;
733        }
734
735        Ok(())
736    }
737
738    /// Return a reference to the saved "start processor" if there is one. There will be no
739    /// "start processor" before a start harness (manual or magic) is executed.
740    pub fn start_processor(&mut self) -> Option<&mut Architecture> {
741        self.start_processor_number
742            .get()
743            .and_then(|n| self.processors.get_mut(n))
744    }
745}
746
747impl Tsffs {
748    /// Save the initial snapshot using the configured method (either rev-exec micro checkpoints
749    /// or snapshots)
750    pub fn save_initial_snapshot(&mut self) -> Result<()> {
751        if self.have_initial_snapshot() {
752            return Ok(());
753        }
754
755        // Disable VMP if it is enabled
756        info!(self.as_conf_object(), "Disabling VMP");
757        if let Err(e) = run_command("disable-vmp") {
758            warn!(self.as_conf_object(), "Failed to disable VMP: {}", e);
759        }
760
761        // Initialize the source cache for source/line lookups
762        info!(self.as_conf_object(), "Initializing source cache");
763        self.source_file_cache = SourceCache::new(&self.debuginfo_source_directory)?;
764
765        self.log(LogMessage::startup())?;
766
767        #[cfg(simics_version = "7")]
768        {
769            if self.pre_snapshot_checkpoint {
770                debug!(
771                    self.as_conf_object(),
772                    "Saving checkpoint to {}",
773                    self.checkpoint_path.display()
774                );
775
776                if self.checkpoint_path.exists() {
777                    remove_dir_all(&self.checkpoint_path)?;
778                }
779
780                write_configuration_to_file(&self.checkpoint_path, save_flags_t(0))?;
781            }
782
783            debug!(self.as_conf_object(), "Saving initial snapshot");
784
785            save_snapshot(Self::SNAPSHOT_NAME)?;
786            self.snapshot_name
787                .set(Self::SNAPSHOT_NAME.to_string())
788                .map_err(|_| anyhow!("Snapshot name already set"))?;
789        }
790
791        #[cfg(simics_version = "6")]
792        {
793            if self.pre_snapshot_checkpoint {
794                debug!(
795                    self.as_conf_object(),
796                    "Saving checkpoint to {}",
797                    self.checkpoint_path.display()
798                );
799
800                if self.checkpoint_path.exists() {
801                    remove_dir_all(&self.checkpoint_path)?;
802                }
803
804                write_configuration_to_file(&self.checkpoint_path, save_flags_t(0))?;
805            }
806
807            debug!(self.as_conf_object(), "Saving initial micro checkpoint");
808
809            save_micro_checkpoint(
810                Self::SNAPSHOT_NAME,
811                MicroCheckpointFlags::Sim_MC_ID_User | MicroCheckpointFlags::Sim_MC_Persistent,
812            )?;
813
814            self.snapshot_name
815                .set(Self::SNAPSHOT_NAME.to_string())
816                .map_err(|_| anyhow!("Snapshot name already set"))?;
817
818            self.micro_checkpoint_index
819                .set(
820                    Utils::get_micro_checkpoints()?
821                        .iter()
822                        .enumerate()
823                        .find_map(|(i, c)| (c.name == Self::SNAPSHOT_NAME).then_some(i as i32))
824                        .ok_or_else(|| {
825                            anyhow!("No micro checkpoint with just-registered name found")
826                        })?,
827                )
828                .map_err(|_| anyhow!("Micro checkpoint index already set"))?;
829        }
830
831        Ok(())
832    }
833
834    /// Restore the initial snapshot using the configured method (either rev-exec micro checkpoints
835    /// or snapshots)
836    pub fn restore_initial_snapshot(&mut self) -> Result<()> {
837        #[cfg(simics_version = "7")]
838        restore_snapshot(Self::SNAPSHOT_NAME)?;
839        #[cfg(simics_version = "6")]
840        {
841            restore_micro_checkpoint(*self.micro_checkpoint_index.get().ok_or_else(|| {
842                anyhow!("Not using snapshots and no micro checkpoint index present")
843            })?)?;
844
845            discard_future()?;
846        }
847
848        Ok(())
849    }
850
851    /// Whether the initial snapshot should be restored at the current iteration boundary.
852    ///
853    /// This is evaluated after `self.iterations` has been incremented.
854    pub fn should_restore_snapshot_this_iteration(&self) -> bool {
855        match self.snapshot_restore_interval {
856            SnapshotRestorePolicy::Never => false,
857            SnapshotRestorePolicy::Always => true,
858            SnapshotRestorePolicy::Every(n) => self.iterations.is_multiple_of(n),
859        }
860    }
861
862    /// Whether an initial snapshot has been saved
863    pub fn have_initial_snapshot(&self) -> bool {
864        let have = if cfg!(simics_version = "7") {
865            self.snapshot_name.get().is_some()
866        } else if cfg!(simics_version = "6") {
867            self.snapshot_name.get().is_some() && self.micro_checkpoint_index.get().is_some()
868        } else {
869            error!(self.as_conf_object(), "Unsupported SIMICS version");
870            false
871        };
872        have
873    }
874
875    /// Save a repro bookmark if one is needed
876    pub fn save_repro_bookmark_if_needed(&mut self) -> Result<()> {
877        if self.repro_testcase.is_some() && !self.repro_bookmark_set {
878            // On Simics 7 the reverse-execution `set-bookmark` CLI command is gone;
879            // the existing `Self::SNAPSHOT_NAME` snapshot is what gets restored for repro.
880            #[cfg(simics_version = "6")]
881            free_attribute(run_command("set-bookmark start")?)?;
882            self.repro_bookmark_set = true;
883        }
884
885        Ok(())
886    }
887
888    /// Return true if TSFFS should continue simulation after preparing repro state.
889    pub fn should_auto_continue_repro(&self) -> bool {
890        self.repro_testcase.is_none() || self.repro_auto_continue
891    }
892
893    /// Resume immediately after preparing repro state, or log that external resume is required.
894    pub fn continue_after_repro_prepared(&self) -> Result<()> {
895        if self.should_auto_continue_repro() {
896            debug!(self.as_conf_object(), "Resuming simulation");
897
898            run_alone(|| {
899                continue_simulation(0)?;
900                Ok(())
901            })?;
902        } else {
903            info!(
904                self.as_conf_object(),
905                "Repro testcase prepared; waiting for external resume."
906            );
907        }
908
909        Ok(())
910    }
911}
912
913impl Tsffs {
914    /// Get a testcase from the fuzzer and write it to memory along with, optionally, a size
915    pub fn get_and_write_testcase(&mut self) -> Result<()> {
916        let testcase = self.get_testcase()?;
917
918        // TODO: Fix cloning - refcell?
919        let start_info = self
920            .start_info
921            .get()
922            .ok_or_else(|| anyhow!("No start info"))?
923            .clone();
924
925        let start_processor = self
926            .start_processor()
927            .ok_or_else(|| anyhow!("No start processor"))?;
928
929        start_processor.write_start(testcase.testcase.bytes(), &start_info)?;
930
931        Ok(())
932    }
933
934    /// Post a new timeout event on the start processor with the configured timeout in
935    /// seconds
936    pub fn post_timeout_event(&mut self) -> Result<()> {
937        let tsffs_ptr = self.as_conf_object_mut();
938        let start_processor = self
939            .start_processor()
940            .ok_or_else(|| anyhow!("No start processor"))?;
941        let start_processor_time = start_processor.cycle().get_time()?;
942        let start_processor_cpu = start_processor.cpu();
943        let start_processor_clock = object_clock(start_processor_cpu)?;
944        let timeout_time = self.timeout + start_processor_time;
945        trace!(
946            self.as_conf_object(),
947            "Posting event on processor at time {} for {}s (time {})",
948            start_processor_time,
949            self.timeout,
950            timeout_time
951        );
952        self.timeout_event
953            .get_mut()
954            .ok_or_else(|| anyhow!("No timeout event set"))?
955            .post_time(
956                start_processor_cpu,
957                start_processor_clock,
958                self.timeout,
959                move |_obj| {
960                    let tsffs: &'static mut Tsffs = tsffs_ptr.into();
961                    tsffs
962                        .stop_simulation(StopReason::Solution {
963                            kind: SolutionKind::Timeout,
964                        })
965                        .expect("Error calling timeout callback");
966                },
967            )?;
968
969        Ok(())
970    }
971
972    /// Cancel a pending timeout event, if there is one. Used when execution reaches a
973    /// solution or normal stop condition before a timeout occurs.
974    pub fn cancel_timeout_event(&mut self) -> Result<()> {
975        if let Some(start_processor) = self.start_processor() {
976            let start_processor_time = start_processor.cycle().get_time()?;
977            let start_processor_cpu = start_processor.cpu();
978            let start_processor_clock = object_clock(start_processor_cpu)?;
979            match self
980                .timeout_event
981                .get()
982                .ok_or_else(|| anyhow!("No timeout event set"))?
983                .find_next_time(start_processor_clock, start_processor_cpu)
984            {
985                Ok(next_time) => trace!(
986                    self.as_conf_object(),
987                    "Cancelling event with next time {} (current time {})",
988                    next_time,
989                    start_processor_time
990                ),
991                // NOTE: This is not an error, it almost always means we did not find a next
992                // time, which always happens if the timeout goes off.
993                Err(e) => trace!(
994                    self.as_conf_object(),
995                    "Not cancelling event with next time due to error: {e}"
996                ),
997            }
998            self.timeout_event
999                .get()
1000                .ok_or_else(|| anyhow!("No timeout event set"))?
1001                .cancel_time(start_processor_cpu, start_processor_clock)?;
1002        }
1003        Ok(())
1004    }
1005
1006    pub fn save_symbolic_coverage(&mut self) -> Result<()> {
1007        if self.symbolic_coverage_directory.is_dir() {
1008            create_dir_all(&self.symbolic_coverage_directory)?;
1009        }
1010
1011        debug!(
1012            self.as_conf_object(),
1013            "Saving symbolic coverage to {}",
1014            self.symbolic_coverage_directory.display()
1015        );
1016
1017        self.coverage.to_html(&self.symbolic_coverage_directory)?;
1018
1019        debug!(
1020            self.as_conf_object(),
1021            "Symbolic coverage saved to {}",
1022            self.symbolic_coverage_directory.display()
1023        );
1024
1025        Ok(())
1026    }
1027
1028    /// Save the current execution trace to a file
1029    pub fn save_execution_trace(&mut self) -> Result<()> {
1030        let mut hasher = DefaultHasher::new();
1031        self.execution_trace.hash(&mut hasher);
1032        let hash = hasher.finish();
1033
1034        if !self.execution_trace_directory.is_dir() {
1035            create_dir_all(&self.execution_trace_directory)?;
1036        }
1037
1038        let trace_path = self
1039            .execution_trace_directory
1040            .join(format!("{:x}.json", hash));
1041
1042        if !trace_path.exists() {
1043            let trace_file = File::create(&trace_path)?;
1044            to_writer(trace_file, &self.execution_trace)?;
1045        }
1046        Ok(())
1047    }
1048}
1049
1050#[simics_init(name = "tsffs", class = "tsffs")]
1051/// Initialize TSFFS
1052fn init() {
1053    let tsffs = Tsffs::create().expect("Failed to create class tsffs");
1054    config::register(tsffs).expect("Failed to register config interface for tsffs");
1055    fuzz::register(tsffs).expect("Failed to register fuzz interface for tsffs");
1056    run_python(indoc! {r#"
1057        def init_tsffs_cmd():
1058            try:
1059                global tsffs
1060                tsffs = SIM_create_object(SIM_get_class("tsffs"), "tsffs", [])
1061            except Exception as e:
1062                raise CliError(f"Failed to create tsffs: {e}")
1063            
1064            print("TSFFS initialized. Configure and use it as @tsffs.")
1065    "#})
1066    .expect("Failed to run python");
1067    run_python(indoc! {r#"
1068        new_command(
1069            "init-tsffs",
1070            init_tsffs_cmd,
1071            [],
1072            type = ["Fuzzing"],
1073            see_also = [],
1074            short = "Initialize the TSFFS fuzzer",
1075            doc = "Initialize the TSFFS fuzzer"
1076        )
1077    "#})
1078    .map_err(|e| {
1079        error!(tsffs, "{e}");
1080        e
1081    })
1082    .expect("Failed to run python");
1083}