diff --git a/volatility3/framework/automagic/banner_scanners.py b/volatility3/framework/automagic/banner_scanners.py new file mode 100644 index 0000000000..7ee9fe99f9 --- /dev/null +++ b/volatility3/framework/automagic/banner_scanners.py @@ -0,0 +1,52 @@ +from typing import Iterator, Optional, Tuple +from volatility3.framework.layers import scanners + +VALID_BANNER_CHARSET = ( + b" #()+,;/-.0123456789:@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~\n" +) +BANNER_READ_SIZE = 0xFFF + + +class BannerScanner(scanners.RegExScanner): + """Scanner for Linux and macOS kernel version strings.""" + + BANNER_PATTERN = ( + rb"(Linux version|Darwin Kernel Version) [0-9]+\.[0-9]+\.[0-9]+[^\x00]+" + ) + + _version = (1, 0, 0) + + _required_framework_version = (2, 0, 0) + + def __init__(self) -> None: + super().__init__(pattern=self.BANNER_PATTERN) + + def _get_valid_banner(self, offset: int) -> Optional[bytes]: + """Gets the banner at a layer offset and validates it.""" + layer = self.context.layers[self.layer_name] + data = layer.read(offset, BANNER_READ_SIZE, pad=True) + # See symbol_cache's _normalize_identifier + data_index = data.find(b"\x00") + if data_index <= 0: + return None + + data = data[:data_index].strip() + failed = any(char not in VALID_BANNER_CHARSET for char in data) + if not failed: + return data + + return None + + def __call__(self, data: bytes, data_offset: int) -> Iterator[Tuple[int, bytes]]: + for off in super().__call__(data, data_offset): + banner = self._get_valid_banner(off) + if banner is not None: + yield off, banner + + +class LinuxBannerScanner(BannerScanner): + BANNER_PATTERN = rb"Linux version [0-9]+\.[0-9]+\.[0-9]+[^\x00]+" + + +class MacBannerScanner(BannerScanner): + BANNER_PATTERN = rb"Darwin Kernel Version [0-9]+\.[0-9]+\.[0-9]+[^\x00]+" diff --git a/volatility3/framework/automagic/linux.py b/volatility3/framework/automagic/linux.py index 511b957310..2385a6cf23 100644 --- a/volatility3/framework/automagic/linux.py +++ b/volatility3/framework/automagic/linux.py @@ -45,11 +45,21 @@ def stack( ) return None - mss = scanners.MultiStringScanner([x for x in linux_banners if x is not None]) - for _, banner in layer.scan( + banners = [banner for banner in linux_banners if banner is not None] + max_banner_length = max(map(len, banners), default=0) + # Arbitrary constant, reduces memory usage when the cache + # has thousands of ISF banners. + mss = scanners.MultiStringScanner(banners, max_depth=40) + for offset, _ in layer.scan( context=context, scanner=mss, progress_callback=progress_callback ): - dtb = None + banner_raw = layer.read(offset, max_banner_length, pad=True) + null_index = banner_raw.find(b"\x00") + if null_index <= 0: + continue + + # See symbol_cache's _normalize_identifier + banner = banner_raw[:null_index].rstrip() vollog.debug(f"Identified banner: {repr(banner)}") isf_path = linux_banners.get(banner, None) diff --git a/volatility3/framework/automagic/symbol_cache.py b/volatility3/framework/automagic/symbol_cache.py index 327575e969..92ea273a70 100644 --- a/volatility3/framework/automagic/symbol_cache.py +++ b/volatility3/framework/automagic/symbol_cache.py @@ -296,6 +296,12 @@ def get_hash(self, location: str) -> Optional[str]: return row["hash"] return None + def _normalize_linux_identifier(self, identifier: bytes): + # Unify banner ending, accounts for "\x00\n" and "\n\x00" cases + identifier = identifier.rstrip() + identifier = identifier.rstrip(b"\x00") + return identifier.rstrip() + def update(self, progress_callback=None): """Locates all files under the symbol directories. Updates the cache with additions, modifications and removals. This also updates remote locations based on a cache timeout. @@ -397,6 +403,10 @@ def dummy_progress(*args, **kargs) -> None: identifier = idextractor.get_identifier(json_obj) if identifier is not None: operating_system = idextractor.operating_system + if operating_system == "linux": + identifier = self._normalize_linux_identifier( + identifier + ) break # We don't try to validate schemas here, we do that on first use @@ -447,10 +457,9 @@ def dummy_progress(*args, **kargs) -> None: {}, operating_system=operating_system ) for identifier, location in identifiers: - identifier = identifier.rstrip() - identifier = ( - identifier[:-1] if identifier.endswith(b"\x00") else identifier - ) # Linux banners dumped by dwarf2json end with "\x00\n". If not stripped, the banner cannot match. + if operating_system == "linux": + identifier = self._normalize_linux_identifier(identifier) + cursor.execute( "INSERT OR REPLACE INTO cache(identifier, location, operating_system, local, cached) VALUES (?, ?, ?, ?, datetime('now'))", (identifier, location, operating_system, False), diff --git a/volatility3/framework/automagic/symbol_finder.py b/volatility3/framework/automagic/symbol_finder.py index d7c6a22c18..3491398bca 100644 --- a/volatility3/framework/automagic/symbol_finder.py +++ b/volatility3/framework/automagic/symbol_finder.py @@ -127,22 +127,24 @@ def _banner_scan( if not self.banners: return None - mss = scanners.MultiStringScanner([x for x in self.banners if x is not None]) - layer = context.layers[layer_name] # Check if the Stacker has already found what we're looking for if layer.config.get(self.banner_config_key, None): banner_list = [ (0, bytes(layer.config[self.banner_config_key], "raw_unicode_escape")) - ] # type: Iterable[Any] + ] else: # Swap to the physical layer for scanning # Only traverse down a layer if it's an intel layer # TODO: Fix this so it works for layers other than just Intel - if isinstance(layer, layers.intel.Intel): - layer = context.layers[layer.config["memory_layer"]] - banner_list = layer.scan( + mss = scanners.MultiStringScanner( + [x for x in self.banners if x is not None] + ) + scan_layer = layer + if isinstance(scan_layer, layers.intel.Intel): + scan_layer = context.layers[scan_layer.config["memory_layer"]] + banner_list = scan_layer.scan( context=context, scanner=mss, progress_callback=progress_callback ) diff --git a/volatility3/framework/constants/_version.py b/volatility3/framework/constants/_version.py index 9e6add3cdc..456d4e67c9 100644 --- a/volatility3/framework/constants/_version.py +++ b/volatility3/framework/constants/_version.py @@ -1,7 +1,7 @@ # We use the SemVer 2.0.0 versioning scheme VERSION_MAJOR = 2 # Number of releases of the library with a breaking change -VERSION_MINOR = 28 # Number of changes that only add to the interface -VERSION_PATCH = 1 # Number of changes that do not change the interface +VERSION_MINOR = 29 # Number of changes that only add to the interface +VERSION_PATCH = 0 # Number of changes that do not change the interface VERSION_SUFFIX = "" PACKAGE_VERSION = ( diff --git a/volatility3/framework/interfaces/objects.py b/volatility3/framework/interfaces/objects.py index 1228c4d0a3..64a00398b8 100644 --- a/volatility3/framework/interfaces/objects.py +++ b/volatility3/framework/interfaces/objects.py @@ -79,7 +79,7 @@ def __getitem__(self, key): raise KeyError(f"No {key} present in ObjectInformation") def __contains__(self, key): - return key in [field.name for field in dataclasses.fields(self)] + return any(field.name == key for field in dataclasses.fields(self)) class ObjectInterface(metaclass=abc.ABCMeta): diff --git a/volatility3/framework/layers/scanners/__init__.py b/volatility3/framework/layers/scanners/__init__.py index f07849f42a..dc14e469ab 100644 --- a/volatility3/framework/layers/scanners/__init__.py +++ b/volatility3/framework/layers/scanners/__init__.py @@ -61,14 +61,14 @@ def __call__(self, data: bytes, data_offset: int) -> Generator[int, None, None]: class MultiStringScanner(layers.ScannerInterface): thread_safe = True - _version = (1, 0, 0) + _version = (1, 0, 1) _required_framework_version = (2, 0, 0) - def __init__(self, patterns: List[bytes]) -> None: + def __init__(self, patterns: List[bytes], max_depth: int = None) -> None: super().__init__() self._pattern_trie: Optional[Dict[int, Optional[Dict]]] = {} for pattern in patterns: - self._process_pattern(pattern) + self._process_pattern(pattern[: max_depth or len(pattern)]) self._regex = self._process_trie(self._pattern_trie) def _process_pattern(self, value: bytes) -> None: diff --git a/volatility3/framework/plugins/banners.py b/volatility3/framework/plugins/banners.py index 69a1ee2b78..f322c0f18c 100644 --- a/volatility3/framework/plugins/banners.py +++ b/volatility3/framework/plugins/banners.py @@ -9,6 +9,7 @@ from volatility3.framework.layers import scanners from volatility3.framework.renderers import format_hints from volatility3.framework.symbols.windows import pdbutil +from volatility3.framework.automagic import banner_scanners vollog = logging.getLogger(__name__) @@ -17,7 +18,7 @@ class Banners(interfaces.plugins.PluginInterface): """Attempts to identify potential linux banners in an image""" _required_framework_version = (2, 0, 0) - _version = (1, 1, 0) + _version = (1, 2, 0) @classmethod def get_requirements(cls) -> List[interfaces.configuration.RequirementInterface]: @@ -35,48 +36,48 @@ def get_requirements(cls) -> List[interfaces.configuration.RequirementInterface] component=pdbutil.PdbSignatureScanner, version=(1, 0, 0), ), + requirements.VersionRequirement( + name="banner_scanners_bannerscanner", + component=banner_scanners.BannerScanner, + version=(1, 0, 0), + ), ] def _generator(self): layer = self.context.layers[self.config["primary"]] if isinstance(layer, layers.intel.Intel): layer = self.context.layers[layer.config["memory_layer"]] - for offset, banner in self.locate_banners(self.context, layer.name): + for offset, banner in self.locate_banners( + self.context, layer.name, self._progress_callback + ): yield 0, (offset, banner) @classmethod def locate_banners( - cls, context: interfaces.context.ContextInterface, layer_name: str + cls, + context: interfaces.context.ContextInterface, + layer_name: str, + progress_callback: constants.ProgressCallback = None, ): """Identifies banners from a memory image""" # Look for likely linux/mac banners layer = context.layers[layer_name] - for offset in layer.scan( - context=context, - scanner=scanners.RegExScanner( - rb"(Linux version|Darwin Kernel Version) [0-9]+\.[0-9]+\.[0-9]+" - ), + scanner = banner_scanners.BannerScanner + for offset, banner in layer.scan( + context=context, scanner=scanner(), progress_callback=progress_callback ): - data = layer.read(offset, 0xFFF) - data_index = data.find(b"\x00") - if data_index > 0: - data = data[:data_index].strip() - failed = [ - char - for char in data - if char - not in b" #()+,;/-.0123456789:@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~" - ] - if not failed: - yield ( - format_hints.Hex(offset), - str(data, encoding="latin-1", errors="?"), - ) + yield ( + format_hints.Hex(offset), + str(banner, encoding="latin-1", errors="?"), + ) yield from cls.locate_windows_banners(context, layer_name) @classmethod def locate_windows_banners( - cls, context: interfaces.context.ContextInterface, layer_name: str + cls, + context: interfaces.context.ContextInterface, + layer_name: str, + progress_callback: constants.ProgressCallback = None, ): layer = context.layers[layer_name] kernel_pdb_names = [ @@ -86,6 +87,7 @@ def locate_windows_banners( for guid, age, pdb_name, offset in layer.scan( context=context, scanner=pdbutil.PdbSignatureScanner(kernel_pdb_names), + progress_callback=progress_callback, ): yield ( format_hints.Hex(offset), diff --git a/volatility3/framework/plugins/linux/pagecache.py b/volatility3/framework/plugins/linux/pagecache.py index 2d20a2fb1d..49bd5f6325 100644 --- a/volatility3/framework/plugins/linux/pagecache.py +++ b/volatility3/framework/plugins/linux/pagecache.py @@ -528,7 +528,8 @@ def write_inode_content_to_stream( max_length = inode_size - current_fp page_bytes_len = min(max_length, len(page_content)) if current_fp >= inode_size or current_fp + page_bytes_len > inode_size: - vollog.error( + vollog.log( + constants.LOGLEVEL_VVV, "Page out of file bounds: inode 0x%x, inode size %d, page index %d", inode.vol.offset, inode_size, @@ -560,8 +561,9 @@ def _generate_inode_fields( try: for page_obj in inode.get_pages(): if page_obj.mapping != inode.i_mapping: - vollog.warning( - f"Cached page at {page_obj.vol.offset:#x} has a mismatched address space with the inode. Skipping page" + vollog.log( + constants.LOGLEVEL_VVV, + f"Cached page at {page_obj.vol.offset:#x} has a mismatched address space with the inode. Skipping page", ) continue page_vaddr = page_obj.vol.offset @@ -660,7 +662,7 @@ class RecoverFs(plugins.PluginInterface): Troubleshooting: to fix extraction errors related to long paths, please consider using https://github.com/mxmlnkn/ratarmount. """ - _version = (1, 0, 1) + _version = (1, 0, 2) _required_framework_version = (2, 21, 0) @classmethod @@ -793,9 +795,11 @@ def _generator(self): vmlinux_module_name = self.config["kernel"] vmlinux = self.context.modules[vmlinux_module_name] vmlinux_layer = self.context.layers[vmlinux.layer_name] - tar_buffer = BytesIO() + + output_filename = f"recovered_fs.tar.{self.config['compression_format']}" + output_file = self.open(output_filename) tar = tarfile.open( - fileobj=tar_buffer, + fileobj=output_file, mode=f"w:{self.config['compression_format']}", ) # Set a unique timestamp for all extracted files @@ -896,10 +900,7 @@ def _generator(self): yield (0, astuple(inode_out) + (extracted_file_size,)) tar.close() - tar_buffer.seek(0) - output_filename = f"recovered_fs.tar.{self.config['compression_format']}" - with self.open(output_filename) as f: - f.write(tar_buffer.getvalue()) + output_file.close() def run(self): headers = [ diff --git a/volatility3/framework/symbols/linux/extensions/__init__.py b/volatility3/framework/symbols/linux/extensions/__init__.py index 836810a6de..653c6754cc 100644 --- a/volatility3/framework/symbols/linux/extensions/__init__.py +++ b/volatility3/framework/symbols/linux/extensions/__init__.py @@ -1615,7 +1615,9 @@ def to_list( vmlinux = linux.LinuxUtilities.get_module_from_volobj_type(self._context, self) current = self.first - while current and current.is_readable(): + seen = set() + while current and current.is_readable() and current.vol.offset not in seen: + seen.add(current.vol.offset) yield linux.LinuxUtilities.container_of( current, symbol_type, member, vmlinux )