tsffs/tracer/
mod.rs

1// Copyright (C) 2024 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4use anyhow::{anyhow, bail, Error, Result};
5use cpp_demangle::{DemangleOptions, Symbol};
6use ffi2::ffi;
7use libafl::prelude::CmpValues;
8use libafl_bolts::{AsMutSlice, AsSlice};
9use libafl_targets::{AFLppCmpLogOperands, AFL_CMP_TYPE_INS, CMPLOG_MAP_H};
10use rustc_demangle::try_demangle;
11use serde::{Deserialize, Serialize};
12use simics::{
13    api::{
14        get_processor_number, sys::instruction_handle_t, AsConfObject, AttrValue, AttrValueType,
15        ConfObject,
16    },
17    get_interface, trace, ProcessorInfoV2Interface,
18};
19use std::{
20    collections::HashMap, ffi::c_void, fmt::Display, hash::Hash, num::Wrapping,
21    slice::from_raw_parts, str::FromStr,
22};
23use typed_builder::TypedBuilder;
24
25use crate::{arch::ArchitectureOperations, Tsffs};
26
27#[derive(Clone, Deserialize, Serialize, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
28pub(crate) struct ExecutionTraceSymbol {
29    /// The symbol name
30    pub symbol: String,
31    /// The demangled symbol name, if it demangles correctly
32    pub symbol_demangled: Option<String>,
33    /// The offset into the symbol
34    pub offset: u64,
35    /// The containing module's name (usually the path to the executable or dll)
36    pub module: String,
37}
38
39#[derive(Clone, Deserialize, Serialize, Debug, Default)]
40pub(crate) struct ExecutionTrace(pub HashMap<i32, Vec<ExecutionTraceEntry>>);
41
42impl Hash for ExecutionTrace {
43    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
44        for (k, v) in self.0.iter() {
45            for entry in v.iter() {
46                k.hash(state);
47                entry.hash(state);
48            }
49        }
50    }
51}
52
53#[derive(
54    Clone, TypedBuilder, Deserialize, Serialize, Debug, PartialEq, Eq, PartialOrd, Ord, Hash,
55)]
56pub(crate) struct ExecutionTraceEntry {
57    pc: u64,
58    #[builder(default, setter(into, strip_option))]
59    insn: Option<String>,
60    #[builder(default, setter(into, strip_option))]
61    insn_bytes: Option<Vec<u8>>,
62    #[builder(default, setter(into))]
63    symbol: Option<ExecutionTraceSymbol>,
64}
65
66#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
67pub(crate) enum CmpExprShift {
68    Lsl,
69    Lsr,
70    Asr,
71    Ror,
72}
73
74#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
75pub(crate) enum CmpExpr {
76    Deref((Box<CmpExpr>, Option<u8>)),
77    Reg((String, u8)),
78    Mul((Box<CmpExpr>, Box<CmpExpr>)),
79    Add((Box<CmpExpr>, Box<CmpExpr>)),
80    Sub((Box<CmpExpr>, Box<CmpExpr>)),
81    Shift((Box<CmpExpr>, Box<CmpExpr>, CmpExprShift)),
82    U8(u8),
83    I8(i8),
84    U16(u16),
85    I16(i16),
86    U32(u32),
87    I32(i32),
88    U64(u64),
89    I64(i64),
90    Addr(u64),
91}
92
93#[allow(unused)]
94#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
95#[repr(u8)]
96pub(crate) enum CmpType {
97    Equal = 1,
98    Greater = 2,
99    Lesser = 4,
100    Fp = 8,
101    FpMod = 16,
102    IntMod = 32,
103    Transform = 64,
104}
105
106#[allow(unused)]
107#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
108pub(crate) enum CmpValue {
109    U8(u8),
110    I8(i8),
111    U16(u16),
112    I16(i16),
113    U32(u32),
114    I32(i32),
115    U64(u64),
116    I64(i64),
117    Expr(Box<CmpExpr>),
118}
119
120impl TryFrom<&CmpExpr> for CmpValue {
121    type Error = Error;
122    fn try_from(value: &CmpExpr) -> Result<Self> {
123        Ok(match value {
124            CmpExpr::U8(u) => CmpValue::U8(*u),
125            CmpExpr::I8(i) => CmpValue::I8(*i),
126            CmpExpr::U16(u) => CmpValue::U16(*u),
127            CmpExpr::I16(i) => CmpValue::I16(*i),
128            CmpExpr::U32(u) => CmpValue::U32(*u),
129            CmpExpr::I32(i) => CmpValue::I32(*i),
130            CmpExpr::U64(u) => CmpValue::U64(*u),
131            CmpExpr::I64(i) => CmpValue::I64(*i),
132            _ => bail!("Can't convert directly from non-integral expr"),
133        })
134    }
135}
136
137fn cmp_shape(cmp: &CmpValues) -> Result<u32> {
138    match cmp {
139        CmpValues::U8(_) => Ok(0),
140        CmpValues::U16(_) => Ok(1),
141        CmpValues::U32(_) => Ok(3),
142        CmpValues::U64(_) => Ok(7),
143        _ => bail!("Shape not implemented for non-integral types"),
144    }
145}
146
147fn byte_width(value: u64) -> usize {
148    if value < 0x10000 {
149        if value < 0x100 {
150            1
151        } else {
152            2
153        }
154    } else if value < 0x100000000 {
155        4
156    } else {
157        8
158    }
159}
160
161/// Hash a value into an index into an array lf length `len`
162fn hash_index(value: u64, len: u64) -> u64 {
163    let value_bytes = value.to_le_bytes();
164    let hash_width = byte_width(len - 1);
165    let hash_iters = value_bytes.len() / hash_width;
166    let mut buffer = [0u8; 8];
167
168    for i in 0..hash_iters {
169        if i == 0 {
170            buffer[0..hash_width]
171                .clone_from_slice(&value_bytes[i * hash_width..(i + 1) * hash_width])
172        } else {
173            (0..hash_width).for_each(|j| {
174                buffer[j] ^= value_bytes[i * hash_width..(i + 1) * hash_width][j];
175            });
176        }
177    }
178
179    u64::from_le_bytes(buffer)
180}
181
182#[derive(TypedBuilder, Debug, Clone, PartialEq, Eq)]
183pub(crate) struct TraceEntry {
184    #[builder(default, setter(into, strip_option))]
185    /// The target of an edge in the trace
186    edge: Option<u64>,
187    #[builder(default, setter(into, strip_option))]
188    cmp: Option<(u64, Vec<CmpType>, CmpValues)>,
189}
190
191impl Default for TraceEntry {
192    fn default() -> Self {
193        Self::builder().build()
194    }
195}
196
197#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)]
198pub(crate) enum CoverageMode {
199    HitCount,
200    Once,
201}
202
203impl CoverageMode {
204    const AS_STRING: &'static [(&'static str, Self)] =
205        &[("hit-count", Self::HitCount), ("once", Self::Once)];
206}
207
208impl Default for CoverageMode {
209    fn default() -> Self {
210        Self::HitCount
211    }
212}
213
214impl FromStr for CoverageMode {
215    type Err = Error;
216
217    fn from_str(s: &str) -> Result<Self> {
218        let as_string = Self::AS_STRING.iter().cloned().collect::<HashMap<_, _>>();
219
220        as_string.get(s).cloned().ok_or_else(|| {
221            anyhow!(
222                "Invalid coverage mode {}. Expected one of {}",
223                s,
224                Self::AS_STRING
225                    .iter()
226                    .map(|i| i.0)
227                    .collect::<Vec<_>>()
228                    .join(", ")
229            )
230        })
231    }
232}
233
234impl Display for CoverageMode {
235    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
236        let to_string = Self::AS_STRING
237            .iter()
238            .map(|(k, v)| (v, k))
239            .collect::<HashMap<_, _>>();
240        if let Some(name) = to_string.get(self) {
241            write!(f, "{}", name)
242        } else {
243            panic!("Invalid state for enum");
244        }
245    }
246}
247
248impl TryFrom<AttrValue> for CoverageMode {
249    type Error = Error;
250
251    fn try_from(value: AttrValue) -> Result<Self> {
252        String::try_from(value)?.parse()
253    }
254}
255
256impl From<CoverageMode> for AttrValueType {
257    fn from(value: CoverageMode) -> Self {
258        value.to_string().into()
259    }
260}
261
262impl Tsffs {
263    fn log_pc(&mut self, pc: u64) -> Result<()> {
264        let coverage_map = self.coverage_map.get_mut().ok_or_else(|| {
265            anyhow!("Coverage map not initialized. This is a bug in the fuzzer or the target")
266        })?;
267        let afl_idx = (pc ^ self.coverage_prev_loc) % coverage_map.as_slice().len() as u64;
268        let mut cur_byte: Wrapping<u8> = Wrapping(coverage_map.as_slice()[afl_idx as usize]);
269        cur_byte += 1;
270        coverage_map.as_mut_slice()[afl_idx as usize] = cur_byte.0;
271        self.coverage_prev_loc = (pc >> 1) % coverage_map.as_slice().len() as u64;
272
273        Ok(())
274    }
275
276    fn log_cmp(&mut self, pc: u64, types: Vec<CmpType>, cmp: CmpValues) -> Result<()> {
277        // Consistently hash pc to the same header index
278        let aflpp_cmp_map = self.aflpp_cmp_map.get_mut().ok_or_else(|| {
279            anyhow!("AFL++ cmp map not initialized. This is a bug in the fuzzer or the target")
280        })?;
281        let shape = cmp_shape(&cmp)?;
282        let operands = cmp
283            .to_u64_tuple()
284            .ok_or_else(|| anyhow!("Conversion to tuple of non-integral operands not supported"))?;
285        let pc_index = hash_index(pc, aflpp_cmp_map.headers().len() as u64);
286
287        let hits = aflpp_cmp_map.headers_mut()[pc_index as usize].hits();
288
289        aflpp_cmp_map.headers_mut()[pc_index as usize].set_hits(hits + 1);
290        aflpp_cmp_map.headers_mut()[pc_index as usize].set_shape(shape);
291        aflpp_cmp_map.headers_mut()[pc_index as usize].set__type(AFL_CMP_TYPE_INS);
292
293        if let Some(attribute) = types.iter().map(|t| *t as u32).reduce(|acc, t| acc | t) {
294            aflpp_cmp_map.headers_mut()[pc_index as usize].set_attribute(attribute);
295            // NOTE: overflow isn't used by aflppredqueen
296        } else {
297            // Naively use EQ if we don't have a value
298            aflpp_cmp_map.headers_mut()[pc_index as usize].set_attribute(CmpType::Equal as u32);
299        }
300
301        aflpp_cmp_map.values_mut().operands_mut()[pc_index as usize]
302            [hits as usize % CMPLOG_MAP_H] = AFLppCmpLogOperands::new(operands.0, operands.1);
303
304        if hits == 0 {
305            trace!(
306                self.as_conf_object(),
307                "Logged first hit of comparison with types {types:?} and values {cmp:?} (assume == if no types)"
308            );
309        }
310
311        Ok(())
312    }
313}
314
315#[ffi(from_ptr, expect, self_ty = "*mut c_void")]
316impl Tsffs {
317    #[ffi(arg(rest), arg(self))]
318    /// Callback after each instruction executed
319    ///
320    /// # Arguments
321    ///
322    /// * `obj`
323    /// * `cpu` - The processor the instruction is being executed by
324    /// * `handle` - An opaque handle to the instruction being executed
325    pub fn on_instruction_after(
326        &mut self,
327        _obj: *mut ConfObject,
328        cpu: *mut ConfObject,
329        handle: *mut instruction_handle_t,
330    ) -> Result<()> {
331        let processor_number = get_processor_number(cpu)?;
332
333        if self.coverage_enabled {
334            if let Some(arch) = self.processors.get_mut(&processor_number) {
335                match arch.trace_pc(handle) {
336                    Ok(r) => {
337                        if let Some(pc) = r.edge {
338                            if self.coverage_reporting && self.edges_seen.insert(pc) {
339                                let coverage_map = self.coverage_map.get_mut().ok_or_else(|| {
340                                    anyhow!("Coverage map not initialized. This is a bug in the fuzzer or the target")
341                                })?;
342                                let afl_idx = (pc ^ self.coverage_prev_loc)
343                                    % coverage_map.as_slice().len() as u64;
344                                self.edges_seen_since_last.insert(pc, afl_idx);
345                            }
346                            self.log_pc(pc)?;
347                        }
348                    }
349                    Err(_) => {
350                        // This is not really an error, but we may want to know  about it
351                        // sometimes when debugging
352                        // trace!(self.as_conf_object(), "Error tracing for PC: {e}");
353                    }
354                }
355            }
356        }
357
358        Ok(())
359    }
360
361    #[ffi(arg(rest), arg(self))]
362    /// Callback after each instruction executed
363    ///
364    /// # Arguments
365    ///
366    /// * `obj`
367    /// * `cpu` - The processor the instruction is being executed by
368    /// * `handle` - An opaque handle to the instruction being executed
369    pub fn on_instruction_before(
370        &mut self,
371        _obj: *mut ConfObject,
372        cpu: *mut ConfObject,
373        handle: *mut instruction_handle_t,
374    ) -> Result<()> {
375        let processor_number = get_processor_number(cpu)?;
376
377        if self.coverage_enabled && self.cmplog && self.cmplog_enabled {
378            if let Some(arch) = self.processors.get_mut(&processor_number) {
379                match arch.trace_cmp(handle) {
380                    Ok(r) => {
381                        if let Some((pc, types, cmp)) = r.cmp {
382                            self.log_cmp(pc, types.clone(), cmp.clone())?;
383                        }
384                    }
385                    Err(_) => {
386                        // This is not really an error, but we may want to know  about it
387                        // sometimes when debugging
388                        // trace!(self.as_conf_object(), "Error tracing for CMP: {e}");
389                    }
390                }
391            }
392        }
393
394        let symcov = if self.coverage_enabled
395            && self.windows
396            && self.symbolic_coverage
397            && (self.save_all_execution_traces
398                || self.save_interesting_execution_traces
399                || self.save_solution_execution_traces)
400        {
401            // Get the current instruction address
402            let mut processor_information_v2 = get_interface::<ProcessorInfoV2Interface>(cpu)?;
403            let pc = processor_information_v2.get_program_counter()?;
404            self.windows_os_info
405                .symbol_lookup_trees
406                .get(&processor_number)
407                .and_then(|lookup_tree| {
408                    lookup_tree
409                        .query(pc..pc + 1)
410                        .next()
411                        .map(|symbol_for_query| {
412                            let offset =
413                                pc - symbol_for_query.value.base + symbol_for_query.value.rva;
414                            let symbol_demangled = try_demangle(&symbol_for_query.value.name)
415                                .map(|d| d.to_string())
416                                .ok()
417                                .or_else(|| {
418                                    Symbol::new(&symbol_for_query.value.name)
419                                        .ok()
420                                        .and_then(|s| s.demangle(&DemangleOptions::new()).ok())
421                                });
422
423                            if let Some(function_start_line) = symbol_for_query.value.lines.first()
424                            {
425                                let record = self
426                                    .coverage
427                                    .get_or_insert_mut(&function_start_line.file_path);
428                                let pc_lines = symbol_for_query
429                                    .value
430                                    .lines
431                                    .iter()
432                                    .filter(|line_info| {
433                                        pc - symbol_for_query.value.base >= line_info.rva
434                                            && pc - symbol_for_query.value.base
435                                                < line_info.rva + line_info.size as u64
436                                    })
437                                    .flat_map(|l| (l.start_line..=l.end_line).map(|i| i as usize))
438                                    .collect::<Vec<_>>();
439
440                                if pc_lines.contains(&(function_start_line.start_line as usize)) {
441                                    // Increment function hit counter if we just hit the fn
442                                    record.increment_function_data(&symbol_for_query.value.name);
443                                }
444
445                                pc_lines.iter().for_each(|line| {
446                                    record.increment_line(*line);
447                                });
448                            }
449                            ExecutionTraceSymbol {
450                                symbol: symbol_for_query.value.name.clone(),
451                                symbol_demangled,
452                                offset,
453                                module: symbol_for_query.value.module.clone(),
454                            }
455                        })
456                })
457        } else if self.coverage_enabled && self.symbolic_coverage {
458            let mut processor_information_v2 = get_interface::<ProcessorInfoV2Interface>(cpu)?;
459            let pc = processor_information_v2.get_program_counter()?;
460            if let Some(lookup_tree) = self
461                .windows_os_info
462                .symbol_lookup_trees
463                .get(&processor_number)
464            {
465                if let Some(symbol_for_query) = lookup_tree.query(pc..pc + 1).next() {
466                    if let Some(function_start_line) = symbol_for_query.value.lines.first() {
467                        let record = self
468                            .coverage
469                            .get_or_insert_mut(&function_start_line.file_path);
470                        let pc_lines = symbol_for_query
471                            .value
472                            .lines
473                            .iter()
474                            .filter(|line_info| {
475                                pc - symbol_for_query.value.base >= line_info.rva
476                                    && pc - symbol_for_query.value.base
477                                        < line_info.rva + line_info.size as u64
478                            })
479                            .flat_map(|l| (l.start_line..=l.end_line).map(|i| i as usize))
480                            .collect::<Vec<_>>();
481
482                        if pc_lines.contains(&(function_start_line.start_line as usize)) {
483                            // Increment function hit counter if we just hit the fn
484                            record.increment_function_data(&symbol_for_query.value.name);
485                        }
486
487                        pc_lines.iter().for_each(|line| {
488                            record.increment_line(*line);
489                        });
490                    }
491                }
492            }
493            None
494        } else {
495            None
496        };
497
498        if self.coverage_enabled
499            && (self.save_all_execution_traces
500                || self.save_interesting_execution_traces
501                || self.save_solution_execution_traces)
502        {
503            if let Some(arch) = self.processors.get_mut(&processor_number) {
504                self.execution_trace
505                    .0
506                    .entry(processor_number)
507                    .or_default()
508                    .push(if self.execution_trace_pc_only {
509                        ExecutionTraceEntry::builder()
510                            .pc(arch.processor_info_v2().get_program_counter()?)
511                            .build()
512                    } else {
513                        let instruction_bytes =
514                            arch.cpu_instruction_query().get_instruction_bytes(handle)?;
515                        let instruction_bytes = unsafe {
516                            from_raw_parts(instruction_bytes.data, instruction_bytes.size)
517                        };
518
519                        if let Ok(disassembly_string) =
520                            arch.disassembler().disassemble_to_string(instruction_bytes)
521                        {
522                            ExecutionTraceEntry::builder()
523                                .pc(arch.processor_info_v2().get_program_counter()?)
524                                .insn(disassembly_string)
525                                .insn_bytes(instruction_bytes.to_vec())
526                                .symbol(symcov)
527                                .build()
528                        } else {
529                            ExecutionTraceEntry::builder()
530                                .pc(arch.processor_info_v2().get_program_counter()?)
531                                .insn("(unknown)".to_string())
532                                .insn_bytes(instruction_bytes.to_vec())
533                                .symbol(symcov)
534                                .build()
535                        }
536                    });
537            }
538        }
539
540        Ok(())
541    }
542}