tsffs/
lib.rs

1// Copyright (C) 2024 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4//! TFFS Module for SIMICS
5//!
6//! # Overview
7//!
8//! This crate provides a client and module loadable by SIMICS to enable fuzzing on the SIMICS
9//! platform. The client is intended to be used by the `simics-fuzz` crate, but it can be used
10//! manually to enable additional use cases.
11//!
12//! # Capabilities
13//!
14//! The Module can:
15//!
16//! - Trace branch hits during an execution of a target on an x86_64 processor. These branches
17//!   are traced into shared memory in the format understood by the AFL family of tools.
18//! - Catch exception/fault events registered in an initial configuration or dynamically using
19//!   a SIMICS Python script
20//! - Catch timeout events registered in an initial configuration or dynamically using a SIMICS
21//!   Python script
22//! - Manage the state of a target under test by taking and restoring a snapshot of its state for
23//!   deterministic snapshot fuzzing
24
25#![deny(clippy::all)]
26// NOTE: We have to do this a lot, and it sucks to have all these functions be unsafe
27#![allow(clippy::not_unsafe_ptr_arg_deref)]
28#![deny(clippy::unwrap_used)]
29#![warn(missing_docs)]
30
31use crate::interfaces::{config::config, fuzz::fuzz};
32use crate::state::SolutionKind;
33#[cfg(simics_version = "6")]
34use crate::util::Utils;
35use anyhow::{anyhow, Result};
36use arch::{Architecture, ArchitectureHint, ArchitectureOperations};
37use fuzzer::{messages::FuzzerMessage, ShutdownMessage, Testcase};
38use indoc::indoc;
39use lcov2::Records;
40use libafl::{inputs::HasBytesVec, prelude::ExitKind};
41use libafl_bolts::prelude::OwnedMutSlice;
42use libafl_targets::AFLppCmpLogMap;
43use log::LogMessage;
44use magic::MagicNumber;
45use num_traits::FromPrimitive as _;
46use os::windows::WindowsOsInfo;
47use serde::{Deserialize, Serialize};
48use serde_json::to_writer;
49use simics::{
50    break_simulation, class, debug, error, free_attribute, get_class, get_interface,
51    get_processor_number, info, lookup_file, object_clock, run_command, run_python, simics_init,
52    sys::save_flags_t, trace, version_base, warn, write_configuration_to_file, AsConfObject,
53    BreakpointId, ClassCreate, ClassObjectsFinalize, ConfObject, CoreBreakpointMemopHap,
54    CoreControlRegisterWriteHap, CoreExceptionHap, CoreMagicInstructionHap,
55    CoreSimulationStoppedHap, CpuInstrumentationSubscribeInterface, Event, EventClassFlag,
56    FromConfObject, HapHandle, Interface,
57};
58#[cfg(simics_version = "6")]
59use simics::{
60    discard_future, restore_micro_checkpoint, save_micro_checkpoint, MicroCheckpointFlags,
61};
62#[cfg(simics_version = "7")]
63// NOTE: save_snapshot used because it is a stable alias for both save_snapshot and take_snapshot
64// which is necessary because this module is compatible with base versions which cross the
65// deprecation boundary
66use simics::{restore_snapshot, save_snapshot};
67use source_cov::SourceCache;
68use state::StopReason;
69use std::{
70    alloc::{alloc_zeroed, Layout},
71    cell::OnceCell,
72    collections::{hash_map::Entry, BTreeSet, HashMap, HashSet},
73    fs::{create_dir_all, remove_dir_all, File},
74    hash::{DefaultHasher, Hash, Hasher},
75    path::PathBuf,
76    ptr::null_mut,
77    str::FromStr,
78    sync::mpsc::{Receiver, Sender},
79    thread::JoinHandle,
80    time::SystemTime,
81};
82use tracer::{
83    tsffs::{on_instruction_after, on_instruction_before},
84    ExecutionTrace,
85};
86use typed_builder::TypedBuilder;
87use versions::{Requirement, Versioning};
88
89pub(crate) mod arch;
90pub(crate) mod fuzzer;
91pub(crate) mod haps;
92pub(crate) mod interfaces;
93pub(crate) mod log;
94pub(crate) mod magic;
95pub(crate) mod os;
96pub(crate) mod source_cov;
97pub(crate) mod state;
98pub(crate) mod tracer;
99pub(crate) mod traits;
100pub(crate) mod util;
101
102/// The class name used for all operations interfacing with SIMICS
103pub const CLASS_NAME: &str = env!("CARGO_PKG_NAME");
104
105#[derive(Serialize, Deserialize, Clone, Debug)]
106/// An address that was formerly virtual or formerly physical. The actual
107/// address *must* be physical.
108pub(crate) enum StartPhysicalAddress {
109    /// The address was formerly virtual
110    WasVirtual(u64),
111    /// The address was formerly physical
112    WasPhysical(u64),
113}
114
115impl StartPhysicalAddress {
116    /// Get the physical address
117    pub fn physical_address(&self) -> u64 {
118        match self {
119            StartPhysicalAddress::WasVirtual(addr) => *addr,
120            StartPhysicalAddress::WasPhysical(addr) => *addr,
121        }
122    }
123}
124
125#[derive(Serialize, Deserialize, Clone, Debug)]
126pub(crate) enum ManualStartAddress {
127    Virtual(u64),
128    Physical(u64),
129}
130
131impl ManualStartAddress {
132    pub fn address(&self) -> u64 {
133        match self {
134            ManualStartAddress::Virtual(addr) => *addr,
135            ManualStartAddress::Physical(addr) => *addr,
136        }
137    }
138}
139
140#[derive(TypedBuilder, Serialize, Deserialize, Clone, Debug)]
141pub(crate) struct StartInfo {
142    /// The physical address of the buffer. Must be physical, if the input address was
143    /// virtual, it should be pre-translated
144    pub address: StartPhysicalAddress,
145    /// The initial contents of the buffer
146    pub contents: Vec<u8>,
147    /// The initial size of the buffer. This will either be only an address, in which
148    /// case the initial size will be `*size_ptr` and the actual size of each testcase
149    /// will be written back to `*size_ptr`, a `max_size` in which case the size will
150    /// not be written, or a `size_ptr` and `max_size` in which case the size will be
151    /// written back to `*size_ptr` and the maximum size will be `max_size`.
152    pub size: StartSize,
153}
154
155#[derive(Serialize, Deserialize, Clone, Debug)]
156/// Exactly the same as `StartInfo` except with the semantic difference that the address
157/// may not always be stored as physical, the user may provide a virtual address for both
158/// the address and the size pointer (if there is one).
159pub(crate) struct ManualStartInfo {
160    pub address: ManualStartAddress,
161    pub size: ManualStartSize,
162}
163
164#[derive(Serialize, Deserialize, Clone, Debug)]
165pub(crate) enum StartSize {
166    SizePtr {
167        address: StartPhysicalAddress,
168        maximum_size: usize,
169    },
170    MaxSize(usize),
171    SizePtrAndMaxSize {
172        address: StartPhysicalAddress,
173        maximum_size: usize,
174    },
175}
176
177impl StartSize {
178    pub fn maximum_size(&self) -> usize {
179        match self {
180            StartSize::SizePtr { maximum_size, .. } => *maximum_size,
181            StartSize::MaxSize(maximum_size) => *maximum_size,
182            StartSize::SizePtrAndMaxSize { maximum_size, .. } => *maximum_size,
183        }
184    }
185
186    pub fn physical_address(&self) -> Option<StartPhysicalAddress> {
187        match self {
188            StartSize::SizePtr { address, .. } => Some(address.clone()),
189            StartSize::MaxSize(_) => None,
190            StartSize::SizePtrAndMaxSize { address, .. } => Some(address.clone()),
191        }
192    }
193}
194
195#[derive(Serialize, Deserialize, Clone, Debug)]
196pub(crate) enum ManualStartSize {
197    SizePtr {
198        address: ManualStartAddress,
199    },
200    MaxSize(usize),
201    SizePtrAndMaxSize {
202        address: ManualStartAddress,
203        maximum_size: usize,
204    },
205}
206
207#[class(name = "tsffs", skip_objects_finalize)]
208#[derive(AsConfObject, FromConfObject, Default)]
209/// The main module class for the TSFFS fuzzer, stores state and configuration information
210pub(crate) struct Tsffs {
211    #[class(attribute(optional, default = false))]
212    /// Whether all breakpoints are treated as solutions. When set to `True`, any breakpoint
213    /// which triggers a `Core_Breakpoint_Memop` HAP will be treated as a solution. This allows
214    /// setting memory breakpoints on specific memory locations to trigger a solution when the
215    /// memory is read, written, or executed. Not all breakpoints cause this HAP to occur.
216    ///
217    /// For example, to set an execution breakpoint on the address $addr:
218    ///
219    /// $addr = 0x100000
220    /// $bp = (bp.memory.break -x $addr)
221    /// @tsffs.all_breakpoints_are_solutions = True
222    ///
223    /// Tsffs will treat the breakpoint as a solution (along with all other
224    /// breakpoints), and the fuzzer will stop when the breakpoint is hit.
225    pub all_breakpoints_are_solutions: bool,
226    #[class(attribute(optional, default = false))]
227    /// Whether all exceptions are treated as solutions. When set to `True`, any CPU exception
228    /// or interrupt which triggers a `Core_Exception` HAP will be treated as a solution. This
229    /// can be useful when enabled in a callback after which any exception is considered a
230    /// solution and is typically not useful when enabled during the start-up process because
231    /// most processors will generate exceptions during start-up and during normal operation.
232    pub all_exceptions_are_solutions: bool,
233    #[class(attribute(optional))]
234    /// The set of exceptions which are treated as solutions. For example on x86_64, setting:
235    ///
236    /// @tsffs.exceptions = [14]
237    ///
238    /// would treat any page fault as a solution.
239    pub exceptions: BTreeSet<i64>,
240    #[class(attribute(optional))]
241    /// The set of breakpoints which are treated as solutions. For example, to set a solution
242    /// breakpoint on the address $addr (note the breakpoint set from the Simics command is
243    /// accessed through the simenv namespace):
244    ///
245    /// $addr = 0x100000
246    /// $bp = (bp.memory.break -x $addr)
247    /// @tsffs.breakpoints = [simenv.bp]
248    pub breakpoints: BTreeSet<BreakpointId>,
249    #[class(attribute(optional, default = 5.0))]
250    /// The timeout in seconds of virtual time for each iteration of the fuzzer. If the virtual
251    /// time timeout is exceeded for a single iteration, the iteration is stopped and the testcase
252    /// is saved as a solution.
253    pub timeout: f64,
254    #[class(attribute(optional, default = true))]
255    /// Whether the fuzzer should start on compiled-in harnesses. If set to `True`, the fuzzer
256    /// will start fuzzing when a harness macro is executed.
257    pub start_on_harness: bool,
258    #[class(attribute(optional, default = true))]
259    /// Whether the fuzzer should stop on compiled-in harnesses. If set to `True`, the fuzzer
260    /// will start fuzzing when a harness macro is executed.
261    pub stop_on_harness: bool,
262    #[class(attribute(optional, default = 0))]
263    /// The index number which is passed to the platform-specific magic instruction HAP
264    /// by a compiled-in harness to signal that the fuzzer should start the fuzzing loop.
265    ///
266    /// This option is useful when fuzzing a target which has multiple start harnesses compiled
267    /// into it, and the fuzzer should start on a specific harness.
268    ///
269    /// There can only be one magic start value, because only one fuzzing loop can be running
270    /// (and they cannot be nested). This only has an effect if `start_on_harness` is set.
271    pub magic_start_index: u64,
272    #[class(attribute(optional, default = vec![0]))]
273    /// The magic numbers which is passed to the platform-specific magic instruction HAP
274    /// by a compiled-in harness to signal that the fuzzer should stop execution of the current
275    /// iteration.
276    ///
277    /// This option is useful when fuzzing a target which has multiple stop harnesses compiled
278    /// into it, and the fuzzer should stop on a specific subset of stop harness macro calls.
279    ///
280    /// This only has an effect if `stop_on_harness` is set.
281    pub magic_stop_indices: Vec<u64>,
282    #[class(attribute(optional, default = vec![0]))]
283    /// The numbers which are passed to the platform-specific magic instruction HAP by a
284    /// compiled-in harness to signal that the fuzzer should stop execution of the
285    /// current iteration and save the testcase as a solution.
286    ///
287    /// This only has an effect if `stop_on_harness` is set.
288    pub magic_assert_indices: Vec<u64>,
289    #[class(attribute(optional))]
290    /// The limit on the number of fuzzing iterations to execute. If set to 0, the fuzzer will
291    /// run indefinitely. If set to a positive integer, the fuzzer will run until the limit is
292    /// reached.
293    pub iteration_limit: usize,
294    #[class(attribute(optional, default = 8))]
295    /// The size of the corpus to generate randomly. If `generate_random_corpus` is set to
296    /// `True`, the fuzzer will generate a random corpus of this size before starting the
297    /// fuzzing loop.
298    pub initial_random_corpus_size: usize,
299    #[class(attribute(optional, default = lookup_file("%simics%")?.join("corpus")))]
300    /// The directory to load the corpus from and save new corpus items to. This directory
301    /// may be a SIMICS relative path prefixed with "%simics%". It is an error to provide no
302    /// corpus directory when `set_generate_random_corpus(True)` has not been called prior to
303    /// fuzzer startup. It is also an error to provide an *empty* corpus directory without
304    /// calling `set_generate_random_corpus(True)`.  If not provided, "%simics%/corpus" will
305    /// be used by default.
306    pub corpus_directory: PathBuf,
307    #[class(attribute(optional, default = lookup_file("%simics%")?.join("solutions")))]
308    /// The directory to save solutions to. This directory may be a SIMICS relative path
309    /// prefixed with "%simics%". If not provided, "%simics%/solutions" will be used by
310    /// default.
311    pub solutions_directory: PathBuf,
312    #[class(attribute(optional, default = false))]
313    /// Whether to generate a random corpus before starting the fuzzing loop. If set to `True`,
314    /// the fuzzer will generate a random corpus of size `initial_random_corpus_size` before
315    /// starting the fuzzing loop. This should generally be used only for debugging and testing
316    /// purposes, and is not recommended for use in production. A real corpus representative of
317    /// both valid and invalid inputs should be used in production.
318    pub generate_random_corpus: bool,
319    #[class(attribute(optional, default = true))]
320    /// Whether comparison logging should be used during fuzzing to enable value-driven
321    /// mutations. If set to `True`, the fuzzer will use comparison logging to enable
322    /// value-driven mutations. This should always be enabled unless the target is known to
323    /// not benefit from value-driven mutations or run too slowly when solving for comparison
324    /// values.
325    pub cmplog: bool,
326    #[class(attribute(optional, default = true))]
327    /// Whether coverage reporting should be enabled. When enabled, new edge addresses will
328    /// be logged.
329    pub coverage_reporting: bool,
330    #[class(attribute(optional))]
331    /// A set of executable files to tokenize. Tokens will be extracted from these files and
332    /// used to drive token mutations of testcases.
333    pub token_executables: Vec<PathBuf>,
334    #[class(attribute(optional))]
335    /// A set of source files to tokenize. Tokens will be extracted from these files and used
336    /// to drive token mutations of testcases. C source files are expected, and strings and
337    /// tokens will be extracted from strings in the source files.
338    pub token_src_files: Vec<PathBuf>,
339    #[class(attribute(optional))]
340    /// Files in the format of:
341    ///
342    /// x = "hello"
343    /// y = "foo\x41bar"
344    ///
345    /// which will be used to drive token mutations of testcases.
346    pub token_files: Vec<PathBuf>,
347    #[class(attribute(optional))]
348    /// Sets of tokens to use to drive token mutations of testcases. Each token set is a
349    /// bytes which will be randomically inserted into testcases.
350    pub tokens: Vec<Vec<u8>>,
351    #[class(attribute(optional, default = lookup_file("%simics%")?.join("checkpoint.ckpt")))]
352    /// The path to the checkpoint saved prior to fuzzing when using snapshots
353    pub checkpoint_path: PathBuf,
354    #[class(attribute(optional, default = true))]
355    pub pre_snapshot_checkpoint: bool,
356    #[class(attribute(optional, default = lookup_file("%simics%")?.join("log.json")))]
357    /// The path to the log file which will be used to log the fuzzer's output statistics
358    pub log_path: PathBuf,
359    #[class(attribute(optional, default = true))]
360    pub log_to_file: bool,
361    #[class(attribute(optional, default = false))]
362    pub keep_all_corpus: bool,
363    #[class(attribute(optional, default = false))]
364    /// Whether to use the initial contents of the testcase buffer as an entry in the corpus
365    pub use_initial_as_corpus: bool,
366    #[class(attribute(optional, default = false))]
367    /// Whether to enable extra debug logging for LibAFL
368    pub debug_log_libafl: bool,
369    #[class(attribute(optional, default = true))]
370    /// Whether to send shut down on stops without reason. This means fuzzing cannot be resumed.
371    pub shutdown_on_stop_without_reason: bool,
372    #[class(attribute(optional, default = true))]
373    /// Whether to quit on iteration limit
374    pub quit_on_iteration_limit: bool,
375    #[class(attribute(optional, default = false))]
376    /// Whether to save execution traces of test cases which result in a timeout
377    pub save_timeout_execution_traces: bool,
378    #[class(attribute(optional, default = false))]
379    /// Whether to save execution traces of test cases which result in a solution
380    pub save_solution_execution_traces: bool,
381    #[class(attribute(optional, default = false))]
382    /// Whether to save execution traces of test cases which result in an interesting input
383    pub save_interesting_execution_traces: bool,
384    #[class(attribute(optional, default = false))]
385    /// Whether to save all execution traces. This will consume a very large amount of resources
386    /// and should only be used for debugging and testing purposes.
387    pub save_all_execution_traces: bool,
388    #[class(attribute(optional, default = lookup_file("%simics%")?.join("execution-traces")))]
389    /// The directory to save execution traces to, if any are set to be saved. This
390    /// directory may be a SIMICS relative path prefixed with "%simics%". If not
391    /// provided, "%simics%/execution-traces" will be used by default.
392    pub execution_trace_directory: PathBuf,
393    #[class(attribute(optional, default = false))]
394    /// Whether execution traces should include just PC (vs instruction text and bytes)
395    pub execution_trace_pc_only: bool,
396    #[class(attribute(optional, default = true))]
397    /// Whether a heartbeat message should be emitted every `heartbeat_interval` seconds
398    pub heartbeat: bool,
399    #[class(attribute(optional, default = 60))]
400    /// The interval in seconds between heartbeat messages
401    pub heartbeat_interval: u64,
402
403    #[class(attribute(optional, default = false))]
404    /// Whether symbolic coverage should be used during fuzzing
405    pub symbolic_coverage: bool,
406    #[class(attribute(optional, default = false))]
407    /// Whether windows is being run in the simulation
408    pub windows: bool,
409    #[class(attribute(optional, default = lookup_file("%simics%")?.join("debuginfo-cache")))]
410    /// Directory in which to download PDB and EXE files from symbol servers on Windows
411    pub debuginfo_download_directory: PathBuf,
412    #[class(attribute(optional))]
413    /// Mapping of file name (name and extension e.g. fuzzer-app.exe or target.sys)
414    /// to a tuple of (exe path, debuginfo path) where debuginfo is either a PDB or DWARF
415    /// file
416    pub debug_info: HashMap<String, Vec<PathBuf>>,
417    #[class(attribute(optional, default = lookup_file("%simics%")?.join("debuginfo-source")))]
418    /// Directory in which source files are located. Source files do not need to be arranged in
419    /// the same directory structure as the compiled source, and are looked up by hash.
420    pub debuginfo_source_directory: PathBuf,
421    #[class(attribute(optional, default = false))]
422    /// Whether symbolic coverage should be collected for system components by downloading
423    /// executable and debug info files where possible.
424    pub symbolic_coverage_system: bool,
425    #[class(attribute(optional, default = lookup_file("%simics%")?.join("symbolic-coverage")))]
426    /// Directory in which source files are located. Source files do not need to be arranged in
427    /// the same directory structure as the compiled source, and are looked up by hash.
428    pub symbolic_coverage_directory: PathBuf,
429
430    /// Handle for the core simulation stopped hap
431    stop_hap_handle: HapHandle,
432    /// Handle for the core breakpoint memop hap
433    breakpoint_memop_hap_handle: HapHandle,
434    /// Handle for exception HAP
435    exception_hap_handle: HapHandle,
436    /// The handle for the registered magic HAP, used to
437    /// listen for magic start and stop if `start_on_harness`
438    /// or `stop_on_harness` are set.
439    magic_hap_handle: HapHandle,
440    /// Handle for the core control register write hap
441    control_register_write_hap_handle: HapHandle,
442
443    /// A mapping of architecture hints from CPU index to architecture hint. This architecture
444    /// hint overrides the detected architecture of the CPU core. This is useful when the
445    /// architecture of the CPU core is not detected correctly, or when the architecture of the
446    /// CPU core is not known at the time the fuzzer is started. Specifically, x86 cores which
447    /// report their architecture as x86_64 can be overridden to x86.
448    pub architecture_hints: HashMap<i32, ArchitectureHint>,
449    // Threads and message channels
450    /// Fuzzer thread
451    fuzz_thread: OnceCell<JoinHandle<Result<()>>>,
452    /// Message sender to the fuzzer thread. TSFFS sends exit kinds to the fuzzer thread to
453    /// report whether testcases resulted in normal exit, timeout, or solutions.
454    fuzzer_tx: OnceCell<Sender<ExitKind>>,
455    /// Message receiver from the fuzzer thread. TSFFS receives new testcases and run configuration
456    /// from the fuzzer thread.
457    fuzzer_rx: OnceCell<Receiver<Testcase>>,
458    /// A message sender to inform the fuzzer thread that it should exit.
459    fuzzer_shutdown: OnceCell<Sender<ShutdownMessage>>,
460    /// Reciever from the fuzzer thread to receive messages from the fuzzer thread
461    /// including status messages and structured introspection data like new edge findings.
462    fuzzer_messages: OnceCell<Receiver<FuzzerMessage>>,
463
464    // Fuzzer coverage maps
465    /// The coverage map
466    coverage_map: OnceCell<OwnedMutSlice<'static, u8>>,
467    /// A pointer to the AFL++ comparison map
468    aflpp_cmp_map_ptr: OnceCell<*mut AFLppCmpLogMap>,
469    /// The owned AFL++ comparison map
470    aflpp_cmp_map: OnceCell<&'static mut AFLppCmpLogMap>,
471    /// The previous location for coverage for calculating the hash of edges.
472    coverage_prev_loc: u64,
473    /// The registered timeout event which is registered and used to detect timeouts in
474    /// virtual time
475    timeout_event: OnceCell<Event>,
476    /// The set of edges which have been seen at least once.
477    edges_seen: HashSet<u64>,
478    /// A map of the new edges to their AFL indices seen since the last time the fuzzer
479    /// provided an update. This is not cleared every execution.
480    edges_seen_since_last: HashMap<u64, u64>,
481    /// The set of PCs comprising the current execution trace. This is cleared every execution.
482    execution_trace: ExecutionTrace,
483    /// The current line coverage state comprising the total execution. This is not
484    /// cleared and is persistent across the full campaign until the fuzzer stops.
485    coverage: Records,
486
487    /// The name of the fuzz snapshot, if saved
488    snapshot_name: OnceCell<String>,
489    /// The index of the micro checkpoint saved for the fuzzer. Only present if not using
490    /// snapshots.
491    micro_checkpoint_index: OnceCell<i32>,
492
493    /// The reason the current stop occurred
494    stop_reason: Option<StopReason>,
495    /// The buffer and size information, if saved
496    start_info: OnceCell<StartInfo>,
497
498    // #[builder(default = SystemTime::now())]
499    /// The time the fuzzer was started at
500    start_time: OnceCell<SystemTime>,
501    // #[builder(default = SystemTime::now())]
502    /// The time the fuzzer was started at
503    last_heartbeat_time: Option<SystemTime>,
504
505    log: OnceCell<File>,
506
507    /// Whether cmplog is currently enabled
508    coverage_enabled: bool,
509    /// Whether cmplog is currently enabled
510    cmplog_enabled: bool,
511    /// The number of the processor which starts the fuzzing loop (via magic or manual methods)
512    start_processor_number: OnceCell<i32>,
513    /// Tracked processors. This always includes the start processor, and may include
514    /// additional processors that are manually added by the user
515    processors: HashMap<i32, Architecture>,
516    /// A testcase to use for repro
517    repro_testcase: Option<Vec<u8>>,
518    /// Whether a bookmark has been set for repro mode
519    repro_bookmark_set: bool,
520    /// Whether the fuzzer is currently stopped in repro mode
521    stopped_for_repro: bool,
522    /// The number of iterations which have been executed so far
523    iterations: usize,
524    /// Whether snapshots are used. Snapshots are used on Simics 7.0.0 and later.
525    use_snapshots: bool,
526    /// The number of timeouts so far
527    timeouts: usize,
528    /// The number of solutions so far
529    solutions: usize,
530
531    windows_os_info: WindowsOsInfo,
532    cr3_cache: HashMap<i32, i64>,
533    source_file_cache: SourceCache,
534}
535
536impl ClassObjectsFinalize for Tsffs {
537    unsafe fn objects_finalized(instance: *mut ConfObject) -> simics::Result<()> {
538        let tsffs: &'static mut Tsffs = instance.into();
539        tsffs.stop_hap_handle = CoreSimulationStoppedHap::add_callback(
540            // NOTE: Core_Simulation_Stopped is called with an object, exception and
541            // error string, but the exception is always
542            // SimException::SimExc_No_Exception and the error string is always
543            // null_mut.
544            move |_, _, _| {
545                // On stops, call the module's stop callback method, which will in turn call the
546                // stop callback methods on each of the module's components. The stop reason will
547                // be retrieved from the module, if one is set. It is an error for the module to
548                // stop itself without setting a reason
549                let tsffs: &'static mut Tsffs = instance.into();
550                tsffs
551                    .on_simulation_stopped()
552                    .expect("Error calling simulation stopped callback");
553            },
554        )?;
555        tsffs.breakpoint_memop_hap_handle =
556            CoreBreakpointMemopHap::add_callback(move |trigger_obj, breakpoint_number, memop| {
557                let tsffs: &'static mut Tsffs = instance.into();
558                tsffs
559                    .on_breakpoint_memop(trigger_obj, breakpoint_number, memop)
560                    .expect("Error calling breakpoint memop callback");
561            })?;
562        tsffs.exception_hap_handle =
563            CoreExceptionHap::add_callback(move |trigger_obj, exception_number| {
564                let tsffs: &'static mut Tsffs = instance.into();
565                tsffs
566                    .on_exception(trigger_obj, exception_number)
567                    .expect("Error calling breakpoint memop callback");
568            })?;
569        tsffs.magic_hap_handle =
570            CoreMagicInstructionHap::add_callback(move |trigger_obj, magic_number| {
571                let tsffs: &'static mut Tsffs = instance.into();
572
573                // NOTE: Some things (notably, the x86_64 UEFI app loader) do a
574                // legitimate CPUID (in the UEFI loader, with number 0xc aka
575                // eax=0xc4711) that registers as a magic number. We therefore permit
576                // non-valid magic numbers to be executed, but we do nothing for them.
577                if let Some(magic_number) = MagicNumber::from_i64(magic_number) {
578                    tsffs
579                        .on_magic_instruction(trigger_obj, magic_number)
580                        .expect("Failed to execute on_magic_instruction callback")
581                }
582            })?;
583        tsffs.control_register_write_hap_handle =
584            CoreControlRegisterWriteHap::add_callback(move |trigger_obj, register_nr, value| {
585                let tsffs: &'static mut Tsffs = instance.into();
586                tsffs
587                    .on_control_register_write(trigger_obj, register_nr, value)
588                    .expect("Failed to execute on_control_register_write callback")
589            })?;
590        tsffs
591            .coverage_map
592            .set(OwnedMutSlice::from(vec![0; Tsffs::COVERAGE_MAP_SIZE]))
593            .map_err(|_e| anyhow!("Value already set"))?;
594
595        tsffs
596            .aflpp_cmp_map_ptr
597            .set(unsafe { alloc_zeroed(Layout::new::<AFLppCmpLogMap>()) as *mut _ })
598            .map_err(|_e| anyhow!("Value already set"))?;
599
600        tsffs
601            .aflpp_cmp_map
602            .set(unsafe {
603                &mut **tsffs
604                    .aflpp_cmp_map_ptr
605                    .get()
606                    .expect("Value just set and known to be valid")
607            })
608            .map_err(|_e| anyhow!("Value already set"))?;
609
610        tsffs
611            .timeout_event
612            .set(
613                Event::builder()
614                    .name(Tsffs::TIMEOUT_EVENT_NAME)
615                    .cls(get_class(CLASS_NAME).expect("Error getting class"))
616                    .flags(EventClassFlag::Sim_EC_No_Flags)
617                    .build(),
618            )
619            .map_err(|_e| anyhow!("Value already set"))?;
620
621        // Check whether snapshots should be used. This is a runtime check because the module
622        // may be loaded in either Simics 6 or Simics 7.
623        let version = version_base()
624            .map_err(|e| anyhow!("Error getting version string: {}", e))
625            .and_then(|v| {
626                v.split(' ')
627                    .next_back()
628                    .ok_or_else(|| anyhow!("Error parsing version string '{}'", v))
629                    .map(|s| s.to_string())
630            })
631            .and_then(|v| {
632                Versioning::from_str(&v).map_err(|e| anyhow!("Error parsing version string: {e}"))
633            })?;
634
635        tsffs.use_snapshots = Requirement::from_str(">=7.0.0")
636            .map_err(|e| anyhow!("Error parsing requirement: {}", e))?
637            .matches(&version);
638
639        Ok(())
640    }
641}
642
643impl Tsffs {
644    /// The size of the coverage map in bytes
645    pub const COVERAGE_MAP_SIZE: usize = 128 * 1024;
646    /// The name of the registered timeout event
647    pub const TIMEOUT_EVENT_NAME: &'static str = "detector_timeout_event";
648    /// The name of the initial snapshot
649    pub const SNAPSHOT_NAME: &'static str = "tsffs-origin-snapshot";
650}
651
652/// Implementations for controlling the simulation
653impl Tsffs {
654    /// Stop the simulation with a reason
655    pub fn stop_simulation(&mut self, reason: StopReason) -> Result<()> {
656        let break_string = reason.to_string();
657
658        self.stop_reason = Some(reason);
659
660        break_simulation(break_string)?;
661
662        Ok(())
663    }
664}
665
666/// Implementations for common functionality
667impl Tsffs {
668    /// Add a monitored processor to the simulation and whether the processor is the
669    /// "start processor" which is the processor running when the fuzzing loop begins
670    pub fn add_processor(&mut self, cpu: *mut ConfObject, is_start: bool) -> Result<()> {
671        let cpu_number = get_processor_number(cpu)?;
672        debug!(
673            self.as_conf_object(),
674            "Adding {}processor {} to fuzzer",
675            if is_start { "start " } else { "" },
676            cpu_number
677        );
678
679        if let Entry::Vacant(e) = self.processors.entry(cpu_number) {
680            let architecture = if let Some(hint) = self.architecture_hints.get(&cpu_number) {
681                hint.architecture(cpu)?
682            } else {
683                Architecture::new(cpu)?
684            };
685            e.insert(architecture);
686            let mut cpu_interface: CpuInstrumentationSubscribeInterface = get_interface(cpu)?;
687            cpu_interface.register_instruction_after_cb(
688                null_mut(),
689                Some(on_instruction_after),
690                self as *mut Self as *mut _,
691            )?;
692            cpu_interface.register_instruction_before_cb(
693                null_mut(),
694                Some(on_instruction_before),
695                self as *mut Self as *mut _,
696            )?;
697        }
698
699        if is_start {
700            self.start_processor_number
701                .set(cpu_number)
702                .map_err(|_| anyhow!("Start processor number already set"))?;
703        }
704
705        Ok(())
706    }
707
708    /// Return a reference to the saved "start processor" if there is one. There will be no
709    /// "start processor" before a start harness (manual or magic) is executed.
710    pub fn start_processor(&mut self) -> Option<&mut Architecture> {
711        self.start_processor_number
712            .get()
713            .and_then(|n| self.processors.get_mut(n))
714    }
715}
716
717impl Tsffs {
718    /// Save the initial snapshot using the configured method (either rev-exec micro checkpoints
719    /// or snapshots)
720    pub fn save_initial_snapshot(&mut self) -> Result<()> {
721        if self.have_initial_snapshot() {
722            return Ok(());
723        }
724
725        // Disable VMP if it is enabled
726        info!(self.as_conf_object(), "Disabling VMP");
727        if let Err(e) = run_command("disable-vmp") {
728            warn!(self.as_conf_object(), "Failed to disable VMP: {}", e);
729        }
730
731        // Initialize the source cache for source/line lookups
732        info!(self.as_conf_object(), "Initializing source cache");
733        self.source_file_cache = SourceCache::new(&self.debuginfo_source_directory)?;
734
735        self.log(LogMessage::startup())?;
736
737        #[cfg(simics_version = "7")]
738        {
739            if self.pre_snapshot_checkpoint {
740                debug!(
741                    self.as_conf_object(),
742                    "Saving checkpoint to {}",
743                    self.checkpoint_path.display()
744                );
745
746                if self.checkpoint_path.exists() {
747                    remove_dir_all(&self.checkpoint_path)?;
748                }
749
750                write_configuration_to_file(&self.checkpoint_path, save_flags_t(0))?;
751            }
752
753            debug!(self.as_conf_object(), "Saving initial snapshot");
754
755            save_snapshot(Self::SNAPSHOT_NAME)?;
756            self.snapshot_name
757                .set(Self::SNAPSHOT_NAME.to_string())
758                .map_err(|_| anyhow!("Snapshot name already set"))?;
759        }
760
761        #[cfg(simics_version = "6")]
762        {
763            if self.pre_snapshot_checkpoint {
764                debug!(
765                    self.as_conf_object(),
766                    "Saving checkpoint to {}",
767                    self.checkpoint_path.display()
768                );
769
770                if self.checkpoint_path.exists() {
771                    remove_dir_all(&self.checkpoint_path)?;
772                }
773
774                write_configuration_to_file(&self.checkpoint_path, save_flags_t(0))?;
775            }
776
777            debug!(self.as_conf_object(), "Saving initial micro checkpoint");
778
779            save_micro_checkpoint(
780                Self::SNAPSHOT_NAME,
781                MicroCheckpointFlags::Sim_MC_ID_User | MicroCheckpointFlags::Sim_MC_Persistent,
782            )?;
783
784            self.snapshot_name
785                .set(Self::SNAPSHOT_NAME.to_string())
786                .map_err(|_| anyhow!("Snapshot name already set"))?;
787
788            self.micro_checkpoint_index
789                .set(
790                    Utils::get_micro_checkpoints()?
791                        .iter()
792                        .enumerate()
793                        .find_map(|(i, c)| (c.name == Self::SNAPSHOT_NAME).then_some(i as i32))
794                        .ok_or_else(|| {
795                            anyhow!("No micro checkpoint with just-registered name found")
796                        })?,
797                )
798                .map_err(|_| anyhow!("Micro checkpoint index already set"))?;
799        }
800
801        Ok(())
802    }
803
804    /// Restore the initial snapshot using the configured method (either rev-exec micro checkpoints
805    /// or snapshots)
806    pub fn restore_initial_snapshot(&mut self) -> Result<()> {
807        #[cfg(simics_version = "7")]
808        restore_snapshot(Self::SNAPSHOT_NAME)?;
809        #[cfg(simics_version = "6")]
810        {
811            restore_micro_checkpoint(*self.micro_checkpoint_index.get().ok_or_else(|| {
812                anyhow!("Not using snapshots and no micro checkpoint index present")
813            })?)?;
814
815            discard_future()?;
816        }
817
818        Ok(())
819    }
820
821    /// Whether an initial snapshot has been saved
822    pub fn have_initial_snapshot(&self) -> bool {
823        let have = if cfg!(simics_version = "7") {
824            self.snapshot_name.get().is_some()
825        } else if cfg!(simics_version = "6") {
826            self.snapshot_name.get().is_some() && self.micro_checkpoint_index.get().is_some()
827        } else {
828            error!(self.as_conf_object(), "Unsupported SIMICS version");
829            false
830        };
831        have
832    }
833
834    /// Save a repro bookmark if one is needed
835    pub fn save_repro_bookmark_if_needed(&mut self) -> Result<()> {
836        if self.repro_testcase.is_some() && !self.repro_bookmark_set {
837            free_attribute(run_command("set-bookmark start")?)?;
838            self.repro_bookmark_set = true;
839        }
840
841        Ok(())
842    }
843}
844
845impl Tsffs {
846    /// Get a testcase from the fuzzer and write it to memory along with, optionally, a size
847    pub fn get_and_write_testcase(&mut self) -> Result<()> {
848        let testcase = self.get_testcase()?;
849
850        // TODO: Fix cloning - refcell?
851        let start_info = self
852            .start_info
853            .get()
854            .ok_or_else(|| anyhow!("No start info"))?
855            .clone();
856
857        let start_processor = self
858            .start_processor()
859            .ok_or_else(|| anyhow!("No start processor"))?;
860
861        start_processor.write_start(testcase.testcase.bytes(), &start_info)?;
862
863        Ok(())
864    }
865
866    /// Post a new timeout event on the start processor with the configured timeout in
867    /// seconds
868    pub fn post_timeout_event(&mut self) -> Result<()> {
869        let tsffs_ptr = self.as_conf_object_mut();
870        let start_processor = self
871            .start_processor()
872            .ok_or_else(|| anyhow!("No start processor"))?;
873        let start_processor_time = start_processor.cycle().get_time()?;
874        let start_processor_cpu = start_processor.cpu();
875        let start_processor_clock = object_clock(start_processor_cpu)?;
876        let timeout_time = self.timeout + start_processor_time;
877        trace!(
878            self.as_conf_object(),
879            "Posting event on processor at time {} for {}s (time {})",
880            start_processor_time,
881            self.timeout,
882            timeout_time
883        );
884        self.timeout_event
885            .get_mut()
886            .ok_or_else(|| anyhow!("No timeout event set"))?
887            .post_time(
888                start_processor_cpu,
889                start_processor_clock,
890                self.timeout,
891                move |_obj| {
892                    let tsffs: &'static mut Tsffs = tsffs_ptr.into();
893                    tsffs
894                        .stop_simulation(StopReason::Solution {
895                            kind: SolutionKind::Timeout,
896                        })
897                        .expect("Error calling timeout callback");
898                },
899            )?;
900
901        Ok(())
902    }
903
904    /// Cancel a pending timeout event, if there is one. Used when execution reaches a
905    /// solution or normal stop condition before a timeout occurs.
906    pub fn cancel_timeout_event(&mut self) -> Result<()> {
907        if let Some(start_processor) = self.start_processor() {
908            let start_processor_time = start_processor.cycle().get_time()?;
909            let start_processor_cpu = start_processor.cpu();
910            let start_processor_clock = object_clock(start_processor_cpu)?;
911            match self
912                .timeout_event
913                .get()
914                .ok_or_else(|| anyhow!("No timeout event set"))?
915                .find_next_time(start_processor_clock, start_processor_cpu)
916            {
917                Ok(next_time) => trace!(
918                    self.as_conf_object(),
919                    "Cancelling event with next time {} (current time {})",
920                    next_time,
921                    start_processor_time
922                ),
923                // NOTE: This is not an error, it almost always means we did not find a next
924                // time, which always happens if the timeout goes off.
925                Err(e) => trace!(
926                    self.as_conf_object(),
927                    "Not cancelling event with next time due to error: {e}"
928                ),
929            }
930            self.timeout_event
931                .get()
932                .ok_or_else(|| anyhow!("No timeout event set"))?
933                .cancel_time(start_processor_cpu, start_processor_clock)?;
934        }
935        Ok(())
936    }
937
938    pub fn save_symbolic_coverage(&mut self) -> Result<()> {
939        if self.symbolic_coverage_directory.is_dir() {
940            create_dir_all(&self.symbolic_coverage_directory)?;
941        }
942
943        debug!(
944            self.as_conf_object(),
945            "Saving symbolic coverage to {}",
946            self.symbolic_coverage_directory.display()
947        );
948
949        self.coverage.to_html(&self.symbolic_coverage_directory)?;
950
951        debug!(
952            self.as_conf_object(),
953            "Symbolic coverage saved to {}",
954            self.symbolic_coverage_directory.display()
955        );
956
957        Ok(())
958    }
959
960    /// Save the current execution trace to a file
961    pub fn save_execution_trace(&mut self) -> Result<()> {
962        let mut hasher = DefaultHasher::new();
963        self.execution_trace.hash(&mut hasher);
964        let hash = hasher.finish();
965
966        if !self.execution_trace_directory.is_dir() {
967            create_dir_all(&self.execution_trace_directory)?;
968        }
969
970        let trace_path = self
971            .execution_trace_directory
972            .join(format!("{:x}.json", hash));
973
974        if !trace_path.exists() {
975            let trace_file = File::create(&trace_path)?;
976            to_writer(trace_file, &self.execution_trace)?;
977        }
978        Ok(())
979    }
980}
981
982#[simics_init(name = "tsffs", class = "tsffs")]
983/// Initialize TSFFS
984fn init() {
985    let tsffs = Tsffs::create().expect("Failed to create class tsffs");
986    config::register(tsffs).expect("Failed to register config interface for tsffs");
987    fuzz::register(tsffs).expect("Failed to register fuzz interface for tsffs");
988    run_python(indoc! {r#"
989        def init_tsffs_cmd():
990            try:
991                global tsffs
992                tsffs = SIM_create_object(SIM_get_class("tsffs"), "tsffs", [])
993            except Exception as e:
994                raise CliError(f"Failed to create tsffs: {e}")
995            
996            print("TSFFS initialized. Configure and use it as @tsffs.")
997    "#})
998    .expect("Failed to run python");
999    run_python(indoc! {r#"
1000        new_command(
1001            "init-tsffs",
1002            init_tsffs_cmd,
1003            [],
1004            type = ["Fuzzing"],
1005            see_also = [],
1006            short = "Initialize the TSFFS fuzzer",
1007            doc = "Initialize the TSFFS fuzzer"
1008        )
1009    "#})
1010    .map_err(|e| {
1011        error!(tsffs, "{e}");
1012        e
1013    })
1014    .expect("Failed to run python");
1015}