tsffs/os/windows/
debug_info.rs

1use anyhow::{anyhow, bail, Result};
2use goblin::pe::PE;
3use intervaltree::Element;
4use pdb::{FallibleIterator, SymbolData, PDB};
5use reqwest::blocking::get;
6use std::{
7    collections::{HashMap, HashSet},
8    fs::{create_dir_all, File},
9    io::{copy, Write},
10    path::{Path, PathBuf},
11};
12
13use lending_iterator::{windows_mut, LendingIterator};
14use simics::{debug, get_object, info, warn, ConfObject};
15use windows_sys::Win32::System::{
16    Diagnostics::Debug::{
17        IMAGE_DEBUG_DIRECTORY, IMAGE_DEBUG_TYPE_CODEVIEW, IMAGE_DIRECTORY_ENTRY_DEBUG,
18        IMAGE_NT_HEADERS64,
19    },
20    SystemServices::{FILE_NOTIFY_FULL_INFORMATION, IMAGE_DOS_HEADER},
21};
22
23use crate::{os::DebugInfoConfig, source_cov::SourceCache};
24
25use super::{
26    pdb::{CvInfoPdb70, Export},
27    util::{read_virtual, read_virtual_dtb},
28};
29
30#[derive(Debug)]
31/// Debug info for an executable (which may be a .exe, .sys, etc)
32pub struct DebugInfo<'a> {
33    /// The path to the executable file on the local system
34    pub exe_path: PathBuf,
35    /// The path to the PDB file corresponding to the executable on the local system
36    pub pdb_path: PathBuf,
37    /// The contents of the executable file
38    pub exe_file_contents: Vec<u8>,
39    /// The loaded PDB info
40    pub pdb: PDB<'a, File>,
41}
42
43impl DebugInfo<'_> {
44    /// Instantiate a new debug info for an object
45    pub fn new<P>(
46        processor: *mut ConfObject,
47        name: &str,
48        base: u64,
49        download_directory: P,
50        not_found_full_name_cache: &mut HashSet<String>,
51        user_debug_info: &DebugInfoConfig,
52    ) -> Result<Option<Self>>
53    where
54        P: AsRef<Path>,
55    {
56        if let Some(info) = user_debug_info.user_debug_info.get(name) {
57            debug!(
58                get_object("tsffs")?,
59                "Have user-provided debug info for {name}"
60            );
61            let exe_path = info[0].clone();
62            let pdb_path = info[1].clone();
63
64            let exe_file_contents = std::fs::read(&exe_path)?;
65
66            let pdb_file = File::open(&pdb_path)?;
67
68            let pdb = PDB::open(pdb_file)?;
69
70            Ok(Some(Self {
71                exe_path,
72                pdb_path,
73                exe_file_contents,
74                pdb,
75            }))
76        } else if user_debug_info.system {
77            let dos_header = read_virtual::<IMAGE_DOS_HEADER>(processor, base)?;
78            let nt_header =
79                read_virtual::<IMAGE_NT_HEADERS64>(processor, base + dos_header.e_lfanew as u64)?;
80            let debug_data_directory_offset = nt_header.OptionalHeader.DataDirectory
81                [IMAGE_DIRECTORY_ENTRY_DEBUG as usize]
82                .VirtualAddress;
83            let debug_data_directory_size =
84                nt_header.OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG as usize].Size;
85            let debug_directory = (base + debug_data_directory_offset as u64
86                ..base + debug_data_directory_offset as u64 + debug_data_directory_size as u64)
87                .step_by(std::mem::size_of::<IMAGE_DEBUG_DIRECTORY>())
88                .filter_map(|offset| read_virtual::<IMAGE_DEBUG_DIRECTORY>(processor, offset).ok())
89                .filter(|dd| dd.Type == IMAGE_DEBUG_TYPE_CODEVIEW)
90                .take(1)
91                .next()
92                .ok_or_else(|| anyhow!("Failed to find debug data directory with codeview type"))?;
93
94            if debug_directory.SizeOfData == 0 || debug_directory.AddressOfRawData == 0 {
95                bail!("Invalid debug data directory");
96            }
97
98            let cv_info_pdb70 =
99                CvInfoPdb70::new(processor, base + debug_directory.AddressOfRawData as u64)?;
100
101            let exe_guid = format!(
102                "{:08X}{:05X}",
103                nt_header.FileHeader.TimeDateStamp, nt_header.OptionalHeader.SizeOfImage
104            );
105
106            // Download kernel PDB file
107            let pdb_url = format!(
108                "https://msdl.microsoft.com/download/symbols/{}/{}/{}",
109                cv_info_pdb70.file_name(),
110                cv_info_pdb70.guid(),
111                cv_info_pdb70.file_name()
112            );
113
114            let exe_url = format!(
115                "https://msdl.microsoft.com/download/symbols/{}/{}/{}",
116                name, exe_guid, name
117            );
118
119            if !download_directory.as_ref().is_dir() {
120                create_dir_all(&download_directory)?;
121            }
122
123            // Download kernel PE file
124            let exe_path = download_directory
125                .as_ref()
126                .join(format!("{}.exe", &exe_guid));
127
128            if !exe_path.exists() && !not_found_full_name_cache.contains(name) {
129                info!(get_object("tsffs")?, "Downloading PE file from {}", exe_url);
130                match get(&exe_url)?.error_for_status() {
131                    Ok(response) => {
132                        let mut file = File::create(&exe_path)?;
133                        copy(&mut response.bytes()?.as_ref(), &mut file)?;
134                        file.flush()?;
135                    }
136                    Err(e) => {
137                        not_found_full_name_cache.insert(name.to_string());
138                        bail!("Failed to download PE file: {}", e);
139                    }
140                }
141            }
142
143            let pdb_path = download_directory
144                .as_ref()
145                .join(format!("{}.pdb", cv_info_pdb70.guid()));
146
147            if !pdb_path.exists() && !not_found_full_name_cache.contains(cv_info_pdb70.file_name())
148            {
149                info!(
150                    get_object("tsffs")?,
151                    "Downloading PDB file from {}", pdb_url
152                );
153                match get(&pdb_url)?.error_for_status() {
154                    Ok(response) => {
155                        let mut file = File::create(&pdb_path)?;
156                        copy(&mut response.bytes()?.as_ref(), &mut file)?;
157                        file.flush()?;
158                    }
159                    Err(e) => {
160                        not_found_full_name_cache.insert(cv_info_pdb70.guid().to_string());
161                        bail!("Failed to download PDB file: {}", e);
162                    }
163                }
164            }
165
166            let exe_file_contents = std::fs::read(&exe_path)?;
167
168            let pdb_file = File::open(&pdb_path)?;
169
170            let pdb = PDB::open(pdb_file)?;
171
172            Ok(Some(Self {
173                exe_path,
174                pdb_path,
175                exe_file_contents,
176                pdb,
177            }))
178        } else {
179            // bail!("No debug info provided for {name}");
180            Ok(None)
181        }
182    }
183
184    /// Instantiate a new debug info for an object with a specific directory table base
185    pub fn new_dtb<P>(
186        processor: *mut ConfObject,
187        name: &str,
188        base: u64,
189        download_directory: P,
190        directory_table_base: u64,
191        not_found_full_name_cache: &mut HashSet<String>,
192        user_debug_info: DebugInfoConfig,
193    ) -> Result<Option<Self>>
194    where
195        P: AsRef<Path>,
196    {
197        if let Some(info) = user_debug_info.user_debug_info.get(name) {
198            debug!(
199                get_object("tsffs")?,
200                "Have user-provided debug info for {name}"
201            );
202            let exe_path = info[0].clone();
203            let pdb_path = info[1].clone();
204
205            let exe_file_contents = std::fs::read(&exe_path)?;
206
207            let pdb_file = File::open(&pdb_path)?;
208
209            let pdb = PDB::open(pdb_file)?;
210
211            Ok(Some(Self {
212                exe_path,
213                pdb_path,
214                exe_file_contents,
215                pdb,
216            }))
217        } else if user_debug_info.system {
218            let dos_header =
219                read_virtual_dtb::<IMAGE_DOS_HEADER>(processor, directory_table_base, base)?;
220            let nt_header = read_virtual_dtb::<IMAGE_NT_HEADERS64>(
221                processor,
222                directory_table_base,
223                base + dos_header.e_lfanew as u64,
224            )?;
225            let debug_data_directory_offset = nt_header.OptionalHeader.DataDirectory
226                [IMAGE_DIRECTORY_ENTRY_DEBUG as usize]
227                .VirtualAddress;
228            let debug_data_directory_size =
229                nt_header.OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG as usize].Size;
230            let debug_directory = (base + debug_data_directory_offset as u64
231                ..base + debug_data_directory_offset as u64 + debug_data_directory_size as u64)
232                .step_by(std::mem::size_of::<IMAGE_DEBUG_DIRECTORY>())
233                .filter_map(|offset| {
234                    read_virtual_dtb::<IMAGE_DEBUG_DIRECTORY>(
235                        processor,
236                        directory_table_base,
237                        offset,
238                    )
239                    .ok()
240                })
241                .filter(|dd| dd.Type == IMAGE_DEBUG_TYPE_CODEVIEW)
242                .take(1)
243                .next()
244                .ok_or_else(|| anyhow!("Failed to find debug data directory with codeview type"))?;
245
246            if debug_directory.SizeOfData == 0 || debug_directory.AddressOfRawData == 0 {
247                bail!("Invalid debug data directory");
248            }
249
250            let cv_info_pdb70 =
251                CvInfoPdb70::new(processor, base + debug_directory.AddressOfRawData as u64)?;
252
253            let exe_guid = format!(
254                "{:08X}{:05X}",
255                nt_header.FileHeader.TimeDateStamp, nt_header.OptionalHeader.SizeOfImage
256            );
257
258            // Download kernel PDB file
259            let pdb_url = format!(
260                "https://msdl.microsoft.com/download/symbols/{}/{}/{}",
261                cv_info_pdb70.file_name(),
262                cv_info_pdb70.guid(),
263                cv_info_pdb70.file_name()
264            );
265
266            let exe_url = format!(
267                "https://msdl.microsoft.com/download/symbols/{}/{}/{}",
268                name, exe_guid, name
269            );
270
271            if !download_directory.as_ref().is_dir() {
272                create_dir_all(&download_directory)?;
273            }
274
275            // Download kernel PE file
276            let exe_path = download_directory
277                .as_ref()
278                .join(format!("{}.exe", &exe_guid));
279
280            if !exe_path.exists() && !not_found_full_name_cache.contains(name) {
281                info!(get_object("tsffs")?, "Downloading PE file from {}", exe_url);
282                match get(&exe_url)?.error_for_status() {
283                    Ok(response) => {
284                        let mut file = File::create(&exe_path)?;
285                        copy(&mut response.bytes()?.as_ref(), &mut file)?;
286                        file.flush()?;
287                    }
288                    Err(e) => {
289                        not_found_full_name_cache.insert(name.to_string());
290                        bail!("Failed to download PE file: {}", e);
291                    }
292                }
293            }
294
295            let exe_file_contents = std::fs::read(&exe_path)?;
296
297            let pdb_path = download_directory
298                .as_ref()
299                .join(format!("{}.pdb", cv_info_pdb70.guid()));
300
301            if !pdb_path.exists() && !not_found_full_name_cache.contains(cv_info_pdb70.file_name())
302            {
303                info!(
304                    get_object("tsffs")?,
305                    "Downloading PDB file from {}", pdb_url
306                );
307                match get(&pdb_url)?.error_for_status() {
308                    Ok(response) => {
309                        let mut file = File::create(&pdb_path)?;
310                        copy(&mut response.bytes()?.as_ref(), &mut file)?;
311                        file.flush()?;
312                    }
313                    Err(e) => {
314                        not_found_full_name_cache.insert(cv_info_pdb70.guid().to_string());
315                        bail!("Failed to download PDB file: {}", e);
316                    }
317                }
318            }
319
320            let pdb_file = File::open(&pdb_path)?;
321
322            let pdb = PDB::open(pdb_file)?;
323
324            Ok(Some(Self {
325                exe_path,
326                pdb_path,
327                exe_file_contents,
328                pdb,
329            }))
330        } else {
331            Ok(None)
332        }
333    }
334
335    /// Return the parsed PE file
336    pub fn exe(&self) -> Result<PE<'_>> {
337        PE::parse(&self.exe_file_contents)
338            .map_err(move |e| anyhow!("Failed to parse PE file: {}", e))
339    }
340
341    /// Get a list of exports from the PE file
342    pub fn exports(&self) -> Result<Vec<Export>> {
343        Ok(self.exe()?.exports.iter().map(Export::from).collect())
344    }
345}
346
347#[derive(Debug)]
348/// A module (or object) loaded in a specific process
349pub struct ProcessModule {
350    /// The base of the object
351    pub base: u64,
352    /// The size of the object
353    pub size: u64,
354    /// The full name (typically a path) of the object on disk
355    pub full_name: String,
356    /// The base name of the object
357    pub base_name: String,
358    /// Loaded debug info for the object
359    pub debug_info: Option<DebugInfo<'static>>,
360}
361
362impl ProcessModule {
363    /// Return lookup intervals for symbols in the process module which can be used to build
364    /// an interval tree
365    pub fn intervals(
366        &mut self,
367        source_cache: &SourceCache,
368    ) -> Result<Vec<Element<u64, SymbolInfo>>> {
369        let Some(debug_info) = self.debug_info.as_mut() else {
370            bail!("No debug info for module {}", self.full_name);
371        };
372
373        let string_table = debug_info.pdb.string_table()?;
374        let address_map = debug_info.pdb.address_map()?;
375        let symbols = debug_info
376            .pdb
377            .debug_information()?
378            .modules()?
379            .iterator()
380            .filter_map(|module| module.ok())
381            .filter_map(|module| {
382                debug_info
383                    .pdb
384                    .module_info(&module)
385                    .ok()
386                    .flatten()
387                    .map(|module_info| (module, module_info))
388            })
389            .flat_map(|(_module, module_info)| {
390                let Ok(line_program) = module_info.line_program() else {
391                    return Vec::new();
392                };
393
394                let Ok(symbols) = module_info.symbols() else {
395                    return Vec::new();
396                };
397
398                symbols
399                    .iterator()
400                    .filter_map(|symbol| symbol.ok())
401                    .filter_map(|symbol| {
402                        symbol.parse().ok().map(|symbol_data| (symbol, symbol_data))
403                    })
404                    .filter_map(|(_symbol, symbol_data)| {
405                        let SymbolData::Procedure(procedure_symbol) = symbol_data else {
406                            return None;
407                        };
408                        let symbol_name = symbol_data.name()?;
409                        let procedure_rva = procedure_symbol.offset.to_rva(&address_map)?;
410
411                        let lines = line_program
412                            .lines_for_symbol(procedure_symbol.offset)
413                            .iterator()
414                            .filter_map(|line| line.ok())
415                            .filter_map(|line_info| {
416                                line_program
417                                    .get_file_info(line_info.file_index)
418                                    .ok()
419                                    .and_then(|line_file_info| {
420                                        string_table
421                                            .get(line_file_info.name)
422                                            .map(|line_file_name| (line_file_info, line_file_name))
423                                            .ok()
424                                    })
425                                    .and_then(|(line_file_info, line_file_name)| {
426                                        line_info.offset.to_rva(&address_map).map(|line_rva| {
427                                            (line_file_info, line_file_name, line_rva, line_info)
428                                        })
429                                    })
430                                    .and_then(
431                                        |(line_file_info, line_file_name, line_rva, line_info)| {
432                                            source_cache
433                                                .lookup_pdb(
434                                                    &line_file_info,
435                                                    &line_file_name.to_string(),
436                                                )
437                                                .ok()
438                                                .flatten()
439                                                .map(|p| p.to_path_buf())
440                                                .map(|file_path| LineInfo {
441                                                    rva: line_rva.0 as u64,
442                                                    size: line_info.length.unwrap_or(1),
443                                                    file_path,
444                                                    start_line: line_info.line_start,
445                                                    end_line: line_info.line_end,
446                                                })
447                                        },
448                                    )
449                            })
450                            .collect::<Vec<_>>();
451                        let info = SymbolInfo::new(
452                            procedure_rva.0 as u64,
453                            self.base,
454                            procedure_symbol.len as u64,
455                            symbol_name.to_string().to_string(),
456                            self.full_name.clone(),
457                            lines,
458                        );
459
460                        Some(info)
461                    })
462                    .collect::<Vec<_>>()
463            })
464            .collect::<Vec<_>>();
465
466        Ok(symbols
467            .into_iter()
468            .map(|s| (self.base + s.rva..self.base + s.rva + s.size, s).into())
469            .collect())
470    }
471}
472
473#[derive(Debug)]
474/// A process
475pub struct Process {
476    /// The unique PID of the process
477    pub pid: u64,
478    /// The file name of the process's main object
479    pub file_name: String,
480    /// The base address of the process's main object
481    pub base_address: u64,
482    /// The list of modules/objects loaded into the process's address space
483    pub modules: Vec<ProcessModule>,
484}
485
486#[derive(Debug, Clone, PartialEq, Eq, Hash)]
487/// Information about a line in a source file from a PDB
488pub struct LineInfo {
489    /// The relative virtual address in the executable image
490    pub rva: u64,
491    /// The size in bytes of the code this line represents
492    pub size: u32,
493    /// The file path of the source file on the *local* filesystem. This path is found by
494    /// looking up the pdb source path in the source cache on a best-effort approach
495    pub file_path: PathBuf,
496    /// The line number in the source file that this line starts at
497    pub start_line: u32,
498    /// The line number in the source file that this line ends at
499    pub end_line: u32,
500}
501
502#[derive(Debug, Clone, PartialEq, Eq, Hash)]
503/// Information about a symbol in a PDB, including the member lines of the symbol, if any.
504pub struct SymbolInfo {
505    /// The relative virtual address in the executable image
506    pub rva: u64,
507    /// The base address of the executable image
508    pub base: u64,
509    /// The size of the symbol (e.g. function size)
510    pub size: u64,
511    /// The (possibly mangled) name of the symbol
512    pub name: String,
513    /// The name of the module the symbol is in
514    pub module: String,
515    /// The source lines of code for the symbol
516    pub lines: Vec<LineInfo>,
517}
518
519impl SymbolInfo {
520    pub fn new(
521        rva: u64,
522        base: u64,
523        size: u64,
524        name: String,
525        module: String,
526        lines: Vec<LineInfo>,
527    ) -> Self {
528        Self {
529            rva,
530            base,
531            size,
532            name,
533            module,
534            lines,
535        }
536    }
537}
538
539#[derive(Debug)]
540/// A kernel module/driver
541pub struct Module {
542    /// The base address of the module
543    pub base: u64,
544    /// The entrypoint of the module
545    pub entry: u64,
546    /// The size of the module
547    pub size: u64,
548    /// The full name of the module
549    pub full_name: String,
550    /// The base name of the module
551    pub base_name: String,
552    /// The loaded debug info for the module
553    pub debug_info: Option<DebugInfo<'static>>,
554}
555
556impl Module {
557    /// Return lookup intervals for symbols in the module which can be used to build an interval tree
558    pub fn intervals(
559        &mut self,
560        source_cache: &SourceCache,
561    ) -> Result<Vec<Element<u64, SymbolInfo>>> {
562        let Some(debug_info) = self.debug_info.as_mut() else {
563            bail!("No debug info for module {}", self.full_name);
564        };
565
566        let string_table = debug_info.pdb.string_table()?;
567        let address_map = debug_info.pdb.address_map()?;
568        let symbols = debug_info
569            .pdb
570            .debug_information()?
571            .modules()?
572            .iterator()
573            .filter_map(|module| module.ok())
574            .filter_map(|module| {
575                debug_info
576                    .pdb
577                    .module_info(&module)
578                    .ok()
579                    .flatten()
580                    .map(|module_info| (module, module_info))
581            })
582            .flat_map(|(_module, module_info)| {
583                let Ok(line_program) = module_info.line_program() else {
584                    return Vec::new();
585                };
586
587                let Ok(symbols) = module_info.symbols() else {
588                    return Vec::new();
589                };
590
591                symbols
592                    .iterator()
593                    .filter_map(|symbol| symbol.ok())
594                    .filter_map(|symbol| {
595                        symbol.parse().ok().map(|symbol_data| (symbol, symbol_data))
596                    })
597                    .filter_map(|(_symbol, symbol_data)| {
598                        let SymbolData::Procedure(procedure_symbol) = symbol_data else {
599                            return None;
600                        };
601                        let symbol_name = symbol_data.name()?;
602                        let procedure_rva = procedure_symbol.offset.to_rva(&address_map)?;
603
604                        let lines = line_program
605                            .lines_for_symbol(procedure_symbol.offset)
606                            .iterator()
607                            .filter_map(|line| line.ok())
608                            .filter_map(|line_info| {
609                                let Ok(line_file_info) =
610                                    line_program.get_file_info(line_info.file_index)
611                                else {
612                                    if let Ok(o) = get_object("tsffs") {
613                                        debug!(o, "No file info for line {:?}", line_info);
614                                    }
615                                    return None;
616                                };
617
618                                let Ok(line_file_name) = string_table.get(line_file_info.name)
619                                else {
620                                    if let Ok(o) = get_object("tsffs") {
621                                        debug!(o, "No file name for line {:?}", line_file_info);
622                                    }
623                                    return None;
624                                };
625
626                                let Some(line_rva) = line_info.offset.to_rva(&address_map) else {
627                                    if let Ok(o) = get_object("tsffs") {
628                                        debug!(o, "No RVA for line {:?}", line_info);
629                                    }
630                                    return None;
631                                };
632
633                                let Ok(Some(source_file)) = source_cache
634                                    .lookup_pdb(&line_file_info, &line_file_name.to_string())
635                                else {
636                                    if let Ok(o) = get_object("tsffs") {
637                                        debug!(o, "No source file path for line {:?}", line_info);
638                                    }
639                                    return None;
640                                };
641
642                                let info = LineInfo {
643                                    rva: line_rva.0 as u64,
644                                    size: line_info.length.unwrap_or(1),
645                                    file_path: source_file.to_path_buf(),
646                                    start_line: line_info.line_start,
647                                    end_line: line_info.line_end,
648                                };
649                                if let Ok(o) = get_object("tsffs") {
650                                    debug!(o, "Got line info {:?}", line_info);
651                                }
652
653                                Some(info)
654                            })
655                            .collect::<Vec<_>>();
656
657                        let info = SymbolInfo::new(
658                            procedure_rva.0 as u64,
659                            self.base,
660                            procedure_symbol.len as u64,
661                            symbol_name.to_string().to_string(),
662                            self.full_name.clone(),
663                            lines,
664                        );
665
666                        Some(info)
667                    })
668                    .collect::<Vec<_>>()
669            })
670            .collect::<Vec<_>>();
671
672        Ok(symbols
673            .into_iter()
674            .map(|s| (self.base + s.rva..self.base + s.rva + s.size, s).into())
675            .collect())
676    }
677}