Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions Sources/FluidAudio/DownloadUtils.swift
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,55 @@ public class DownloadUtils {
}
}

static func looksLikeHTML(_ data: Data) -> Bool {
let prefix = data.prefix(512)
let text = String(data: prefix, encoding: .utf8) ?? String(decoding: prefix, as: UTF8.self)
let lowered = text.lowercased().trimmingCharacters(in: .whitespacesAndNewlines)
return lowered.hasPrefix("<!doctype html") || lowered.hasPrefix("<html") || lowered.hasPrefix("<?xml")
}

static func validateDownloadedArtifact(
at tempURL: URL,
response: HTTPURLResponse,
path: String,
expectedSize: Int
) throws {
if let contentType = response.value(forHTTPHeaderField: "Content-Type")?.lowercased(),
contentType.contains("text/html")
{
throw HuggingFaceDownloadError.invalidArtifact(
path: path, reason: "server returned Content-Type: \(contentType)")
}

let actualSize =
((try? FileManager.default.attributesOfItem(atPath: tempURL.path))?[.size] as? Int) ?? 0
if actualSize == 0 {
throw HuggingFaceDownloadError.invalidArtifact(path: path, reason: "empty file")
}

if let handle = try? FileHandle(forReadingFrom: tempURL) {
defer { try? handle.close() }
if looksLikeHTML(handle.readData(ofLength: 512)) {
throw HuggingFaceDownloadError.invalidArtifact(
path: path, reason: "response body begins with HTML markup")
}
}

// HuggingFace reports the exact (LFS-resolved) object size; a short body is truncation.
if expectedSize > 0 && actualSize != expectedSize {
throw HuggingFaceDownloadError.invalidArtifact(
path: path,
reason: "size mismatch (expected \(expectedSize) bytes, got \(actualSize))")
}
}

public enum HuggingFaceDownloadError: LocalizedError {
case invalidResponse
case rateLimited(statusCode: Int, message: String)
case downloadFailed(path: String, underlying: Error)
case modelNotFound(path: String)
case htmlErrorResponse(path: String, snippet: String)
case invalidArtifact(path: String, reason: String)

public var errorDescription: String? {
switch self {
Expand All @@ -119,6 +162,8 @@ public class DownloadUtils {
return "HuggingFace returned HTML instead of JSON for \(path) (rate limit or server issue): \(snippet)"
case .modelNotFound(let path):
return "Model file not found: \(path)"
case .invalidArtifact(let path, let reason):
return "Downloaded artifact for \(path) is invalid (\(reason)); refusing to cache it."
}
}
}
Expand Down Expand Up @@ -581,6 +626,7 @@ public class DownloadUtils {
let tempFileURL = try await downloadFileWithRetry(
request: request,
path: file.path,
expectedSize: file.size,
onProgress: onProgress
)

Expand Down Expand Up @@ -714,6 +760,7 @@ public class DownloadUtils {
private static func downloadFileWithRetry(
request: URLRequest,
path: String,
expectedSize: Int,
onProgress: (@Sendable (Int64, Int64) -> Void)?,
maxAttempts: Int = 4,
minBackoff: TimeInterval = 1.0
Expand Down Expand Up @@ -750,6 +797,10 @@ public class DownloadUtils {
)
}

// Validate before the caller moves the temp file into the cache.
try validateDownloadedArtifact(
at: tempURL, response: httpResponse, path: path, expectedSize: expectedSize)

return tempURL
} catch {
lastError = error
Expand Down Expand Up @@ -787,6 +838,9 @@ public class DownloadUtils {
switch error {
case HuggingFaceDownloadError.rateLimited:
return true
case HuggingFaceDownloadError.invalidArtifact:
// Usually a transient unhealthy network path (proxy, mirror 5xx) — retry.
return true
case HuggingFaceDownloadError.downloadFailed(_, let underlying):
let nsError = underlying as NSError
return nsError.domain == "HTTP" && (500...599).contains(nsError.code)
Expand Down Expand Up @@ -917,6 +971,10 @@ public class DownloadUtils {
)
}

// Reject HTML error pages / truncated bodies before caching.
try validateDownloadedArtifact(
at: tempURL, response: httpResponse, path: file.path, expectedSize: file.size)

if FileManager.default.fileExists(atPath: destPath.path) {
try? FileManager.default.removeItem(at: destPath)
}
Expand Down
158 changes: 158 additions & 0 deletions Tests/FluidAudioTests/Shared/DownloadArtifactValidationTests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
import XCTest

@testable import FluidAudio

/// `DownloadUtils.validateDownloadedArtifact` rejects HTML error pages and
/// truncated bodies before they reach the cache (issue #740).
final class DownloadArtifactValidationTests: XCTestCase {

private var tempDir: URL!

override func setUpWithError() throws {
tempDir = FileManager.default.temporaryDirectory
.appendingPathComponent("fluidaudio-artifact-tests-\(UUID().uuidString)")
try FileManager.default.createDirectory(at: tempDir, withIntermediateDirectories: true)
}

override func tearDownWithError() throws {
if let tempDir { try? FileManager.default.removeItem(at: tempDir) }
}

// MARK: - helpers

private func writeTemp(_ data: Data, name: String = UUID().uuidString) throws -> URL {
let url = tempDir.appendingPathComponent(name)
try data.write(to: url)
return url
}

private func response(
contentType: String? = "application/octet-stream"
) -> HTTPURLResponse {
var headers: [String: String] = [:]
if let contentType { headers["Content-Type"] = contentType }
return HTTPURLResponse(
url: URL(string: "https://huggingface.co/test/file")!,
statusCode: 200,
httpVersion: "HTTP/1.1",
headerFields: headers
)!
}

private func assertInvalid(
_ body: @autoclosure () throws -> Void,
reasonContains: String,
file: StaticString = #filePath,
line: UInt = #line
) {
do {
try body()
XCTFail("expected invalidArtifact to be thrown", file: file, line: line)
} catch let DownloadUtils.HuggingFaceDownloadError.invalidArtifact(_, reason) {
XCTAssertTrue(
reason.lowercased().contains(reasonContains.lowercased()),
"reason \"\(reason)\" should mention \"\(reasonContains)\"",
file: file, line: line
)
} catch {
XCTFail("expected invalidArtifact, got: \(error)", file: file, line: line)
}
}

// MARK: - looksLikeHTML

func testLooksLikeHTMLDetectsDoctype() {
XCTAssertTrue(DownloadUtils.looksLikeHTML(Data("<!DOCTYPE html><html></html>".utf8)))
}

func testLooksLikeHTMLDetectsLeadingWhitespaceAndCasing() {
XCTAssertTrue(DownloadUtils.looksLikeHTML(Data("\n\n <HTML lang=\"en\">".utf8)))
}

func testLooksLikeHTMLDetectsXMLProxyEnvelope() {
XCTAssertTrue(DownloadUtils.looksLikeHTML(Data("<?xml version=\"1.0\"?><error/>".utf8)))
}

func testLooksLikeHTMLAllowsBinaryWeights() {
// Markup-like bytes mid-stream ('<h') must not trip the leading-byte check.
let binary = Data([0x00, 0x01, 0x02, 0xFF, 0xFE, 0x3C, 0x68])
XCTAssertFalse(DownloadUtils.looksLikeHTML(binary))
}

func testLooksLikeHTMLAllowsJSON() {
XCTAssertFalse(DownloadUtils.looksLikeHTML(Data("{\"vocab\": 1}".utf8)))
}

// MARK: - validateDownloadedArtifact

func testValidArtifactPasses() throws {
let payload = Data(repeating: 0xAB, count: 1024)
let url = try writeTemp(payload)
XCTAssertNoThrow(
try DownloadUtils.validateDownloadedArtifact(
at: url, response: response(), path: "Model.mlmodelc/weights/weight.bin",
expectedSize: 1024))
}

func testValidArtifactWithUnknownSizeSkipsSizeCheck() throws {
let url = try writeTemp(Data(repeating: 0x01, count: 50))
XCTAssertNoThrow(
try DownloadUtils.validateDownloadedArtifact(
at: url, response: response(), path: "file.json", expectedSize: -1))
}

func testRejectsHTMLContentType() throws {
let url = try writeTemp(Data(repeating: 0xAB, count: 1024))
assertInvalid(
try DownloadUtils.validateDownloadedArtifact(
at: url, response: response(contentType: "text/html; charset=utf-8"),
path: "Model.mlmodelc/coremldata.bin", expectedSize: 1024),
reasonContains: "content-type")
}

func testRejectsEmptyBody() throws {
let url = try writeTemp(Data())
assertInvalid(
try DownloadUtils.validateDownloadedArtifact(
at: url, response: response(), path: "file.bin", expectedSize: 0),
reasonContains: "empty")
}

func testRejectsHTMLBodyServedAsBinaryContentType() throws {
let html = Data("<!DOCTYPE html>\n<html><body>Proxy error</body></html>".utf8)
let url = try writeTemp(html)
assertInvalid(
try DownloadUtils.validateDownloadedArtifact(
at: url, response: response(contentType: "application/octet-stream"),
path: "Model.mlmodelc/weights/weight.bin", expectedSize: -1),
reasonContains: "html")
}

func testRejectsTruncatedBody() throws {
let url = try writeTemp(Data(repeating: 0x7F, count: 500))
assertInvalid(
try DownloadUtils.validateDownloadedArtifact(
at: url, response: response(), path: "Model.mlmodelc/weights/weight.bin",
expectedSize: 1000),
reasonContains: "size mismatch")
}

func testRejectsOversizedBody() throws {
let url = try writeTemp(Data(repeating: 0x7F, count: 2000))
assertInvalid(
try DownloadUtils.validateDownloadedArtifact(
at: url, response: response(), path: "file.bin", expectedSize: 1000),
reasonContains: "size mismatch")
}

// MARK: - error description

func testInvalidArtifactErrorDescription() {
let err = DownloadUtils.HuggingFaceDownloadError.invalidArtifact(
path: "Encoder.mlmodelc/weights/weight.bin", reason: "empty file")
XCTAssertEqual(
err.errorDescription,
"Downloaded artifact for Encoder.mlmodelc/weights/weight.bin is invalid (empty file); refusing to cache it."
)
}
}
Loading