From 94725b9814db34a9249fa1c172be845d41dbbdc7 Mon Sep 17 00:00:00 2001
From: q9982 <980754729@qq.com>
Date: Wed, 13 May 2026 17:49:54 +0800
Subject: [PATCH] Fix corpus archiving to avoid `tarfile.ReadError: unexpected
 end of data` during experiments when files change while the tar archive is
 being created.

Corpus files can still change while the runner is creating a tar
archive. Adding them directly from disk can therefore write a tar header
for one size and then read a different amount of data, producing a
truncated tar member.

Stage regular files in a temporary file before writing them to the tar
archive. Non-regular entries only need their tar header, so avoid
reading file contents for them.
---
 experiment/runner.py | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/experiment/runner.py b/experiment/runner.py
index b8c95fcca..f9308554f 100644
--- a/experiment/runner.py
+++ b/experiment/runner.py
@@ -23,6 +23,7 @@
 import subprocess
 import sys
 import tarfile
+import tempfile
 import threading
 import time
 import zipfile
@@ -392,9 +393,10 @@ def archive_corpus(self):
                     last_modified_time = stat_info.st_mtime
                     if last_modified_time <= self.last_archive_time:
                         continue  # We've saved this file already.
-                    new_archive_time = max(new_archive_time, last_modified_time)
                     arcname = os.path.relpath(file_path, self.output_corpus)
-                    tar.add(file_path, arcname=arcname)
+                    if _add_corpus_file_to_archive(tar, file_path, arcname):
+                        new_archive_time = max(new_archive_time,
+                                               last_modified_time)
                 except (FileNotFoundError, OSError):
                     # We will get these errors if files or directories are being
                     # deleted from |directory| as we archive it. Don't bother
@@ -451,6 +453,26 @@ def get_fuzzer_module(fuzzer):
     return fuzzer_module
 
 
+def _add_corpus_file_to_archive(tar, file_path, arcname):
+    """Add |file_path| to |tar| without writing a truncated tar member."""
+    tarinfo = tar.gettarinfo(file_path, arcname=arcname)
+    if tarinfo is None:
+        return False
+
+    if not tarinfo.isreg():
+        tar.addfile(tarinfo)
+        return True
+
+    with tempfile.SpooledTemporaryFile(
+            max_size=CORPUS_ELEMENT_BYTES_LIMIT) as staged_file:
+        with open(file_path, 'rb') as file_handle:
+            shutil.copyfileobj(file_handle, staged_file)
+        tarinfo.size = staged_file.tell()
+        staged_file.seek(0)
+        tar.addfile(tarinfo, staged_file)
+    return True
+
+
 def get_corpus_elements(corpus_dir):
     """Returns a list of absolute paths to corpus elements in |corpus_dir|."""
     corpus_dir = os.path.abspath(corpus_dir)