diff --git a/Sources/FluidAudio/DownloadUtils.swift b/Sources/FluidAudio/DownloadUtils.swift index fe9fde44d..bce451ffa 100644 --- a/Sources/FluidAudio/DownloadUtils.swift +++ b/Sources/FluidAudio/DownloadUtils.swift @@ -100,12 +100,55 @@ public class DownloadUtils { } } + static func looksLikeHTML(_ data: Data) -> Bool { + let prefix = data.prefix(512) + let text = String(data: prefix, encoding: .utf8) ?? String(decoding: prefix, as: UTF8.self) + let lowered = text.lowercased().trimmingCharacters(in: .whitespacesAndNewlines) + return lowered.hasPrefix(" 0 && actualSize != expectedSize { + throw HuggingFaceDownloadError.invalidArtifact( + path: path, + reason: "size mismatch (expected \(expectedSize) bytes, got \(actualSize))") + } + } + public enum HuggingFaceDownloadError: LocalizedError { case invalidResponse case rateLimited(statusCode: Int, message: String) case downloadFailed(path: String, underlying: Error) case modelNotFound(path: String) case htmlErrorResponse(path: String, snippet: String) + case invalidArtifact(path: String, reason: String) public var errorDescription: String? { switch self { @@ -119,6 +162,8 @@ public class DownloadUtils { return "HuggingFace returned HTML instead of JSON for \(path) (rate limit or server issue): \(snippet)" case .modelNotFound(let path): return "Model file not found: \(path)" + case .invalidArtifact(let path, let reason): + return "Downloaded artifact for \(path) is invalid (\(reason)); refusing to cache it." } } } @@ -581,6 +626,7 @@ public class DownloadUtils { let tempFileURL = try await downloadFileWithRetry( request: request, path: file.path, + expectedSize: file.size, onProgress: onProgress ) @@ -714,6 +760,7 @@ public class DownloadUtils { private static func downloadFileWithRetry( request: URLRequest, path: String, + expectedSize: Int, onProgress: (@Sendable (Int64, Int64) -> Void)?, maxAttempts: Int = 4, minBackoff: TimeInterval = 1.0 @@ -750,6 +797,10 @@ public class DownloadUtils { ) } + // Validate before the caller moves the temp file into the cache. + try validateDownloadedArtifact( + at: tempURL, response: httpResponse, path: path, expectedSize: expectedSize) + return tempURL } catch { lastError = error @@ -787,6 +838,9 @@ public class DownloadUtils { switch error { case HuggingFaceDownloadError.rateLimited: return true + case HuggingFaceDownloadError.invalidArtifact: + // Usually a transient unhealthy network path (proxy, mirror 5xx) — retry. + return true case HuggingFaceDownloadError.downloadFailed(_, let underlying): let nsError = underlying as NSError return nsError.domain == "HTTP" && (500...599).contains(nsError.code) @@ -917,6 +971,10 @@ public class DownloadUtils { ) } + // Reject HTML error pages / truncated bodies before caching. + try validateDownloadedArtifact( + at: tempURL, response: httpResponse, path: file.path, expectedSize: file.size) + if FileManager.default.fileExists(atPath: destPath.path) { try? FileManager.default.removeItem(at: destPath) } diff --git a/Tests/FluidAudioTests/Shared/DownloadArtifactValidationTests.swift b/Tests/FluidAudioTests/Shared/DownloadArtifactValidationTests.swift new file mode 100644 index 000000000..e9c6caee3 --- /dev/null +++ b/Tests/FluidAudioTests/Shared/DownloadArtifactValidationTests.swift @@ -0,0 +1,158 @@ +import XCTest + +@testable import FluidAudio + +/// `DownloadUtils.validateDownloadedArtifact` rejects HTML error pages and +/// truncated bodies before they reach the cache (issue #740). +final class DownloadArtifactValidationTests: XCTestCase { + + private var tempDir: URL! + + override func setUpWithError() throws { + tempDir = FileManager.default.temporaryDirectory + .appendingPathComponent("fluidaudio-artifact-tests-\(UUID().uuidString)") + try FileManager.default.createDirectory(at: tempDir, withIntermediateDirectories: true) + } + + override func tearDownWithError() throws { + if let tempDir { try? FileManager.default.removeItem(at: tempDir) } + } + + // MARK: - helpers + + private func writeTemp(_ data: Data, name: String = UUID().uuidString) throws -> URL { + let url = tempDir.appendingPathComponent(name) + try data.write(to: url) + return url + } + + private func response( + contentType: String? = "application/octet-stream" + ) -> HTTPURLResponse { + var headers: [String: String] = [:] + if let contentType { headers["Content-Type"] = contentType } + return HTTPURLResponse( + url: URL(string: "https://huggingface.co/test/file")!, + statusCode: 200, + httpVersion: "HTTP/1.1", + headerFields: headers + )! + } + + private func assertInvalid( + _ body: @autoclosure () throws -> Void, + reasonContains: String, + file: StaticString = #filePath, + line: UInt = #line + ) { + do { + try body() + XCTFail("expected invalidArtifact to be thrown", file: file, line: line) + } catch let DownloadUtils.HuggingFaceDownloadError.invalidArtifact(_, reason) { + XCTAssertTrue( + reason.lowercased().contains(reasonContains.lowercased()), + "reason \"\(reason)\" should mention \"\(reasonContains)\"", + file: file, line: line + ) + } catch { + XCTFail("expected invalidArtifact, got: \(error)", file: file, line: line) + } + } + + // MARK: - looksLikeHTML + + func testLooksLikeHTMLDetectsDoctype() { + XCTAssertTrue(DownloadUtils.looksLikeHTML(Data("".utf8))) + } + + func testLooksLikeHTMLDetectsLeadingWhitespaceAndCasing() { + XCTAssertTrue(DownloadUtils.looksLikeHTML(Data("\n\n ".utf8))) + } + + func testLooksLikeHTMLDetectsXMLProxyEnvelope() { + XCTAssertTrue(DownloadUtils.looksLikeHTML(Data("".utf8))) + } + + func testLooksLikeHTMLAllowsBinaryWeights() { + // Markup-like bytes mid-stream ('\nProxy error".utf8) + let url = try writeTemp(html) + assertInvalid( + try DownloadUtils.validateDownloadedArtifact( + at: url, response: response(contentType: "application/octet-stream"), + path: "Model.mlmodelc/weights/weight.bin", expectedSize: -1), + reasonContains: "html") + } + + func testRejectsTruncatedBody() throws { + let url = try writeTemp(Data(repeating: 0x7F, count: 500)) + assertInvalid( + try DownloadUtils.validateDownloadedArtifact( + at: url, response: response(), path: "Model.mlmodelc/weights/weight.bin", + expectedSize: 1000), + reasonContains: "size mismatch") + } + + func testRejectsOversizedBody() throws { + let url = try writeTemp(Data(repeating: 0x7F, count: 2000)) + assertInvalid( + try DownloadUtils.validateDownloadedArtifact( + at: url, response: response(), path: "file.bin", expectedSize: 1000), + reasonContains: "size mismatch") + } + + // MARK: - error description + + func testInvalidArtifactErrorDescription() { + let err = DownloadUtils.HuggingFaceDownloadError.invalidArtifact( + path: "Encoder.mlmodelc/weights/weight.bin", reason: "empty file") + XCTAssertEqual( + err.errorDescription, + "Downloaded artifact for Encoder.mlmodelc/weights/weight.bin is invalid (empty file); refusing to cache it." + ) + } +}