Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/iceberg/manifest/manifest_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
#include "iceberg/partition_spec.h"
#include "iceberg/result.h"
#include "iceberg/schema_field.h"
#include "iceberg/snapshot.h"
#include "iceberg/table_metadata.h"
#include "iceberg/type.h"

Expand Down Expand Up @@ -107,7 +106,7 @@ struct ICEBERG_EXPORT ManifestFile {
int64_t min_sequence_number = TableMetadata::kInitialSequenceNumber;
/// Field id: 503
/// ID of the snapshot where the manifest file was added
int64_t added_snapshot_id = Snapshot::kInvalidSnapshotId;
int64_t added_snapshot_id = -1; // Snapshot::kInvalidSnapshotId
/// Field id: 504
/// Number of entries in the manifest that have status ADDED (1), when null this is
/// assumed to be non-zero
Expand Down
1 change: 1 addition & 0 deletions src/iceberg/manifest/manifest_writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "iceberg/partition_summary_internal.h"
#include "iceberg/result.h"
#include "iceberg/schema.h"
#include "iceberg/snapshot.h"
#include "iceberg/table_metadata.h"
#include "iceberg/util/macros.h"

Expand Down
61 changes: 61 additions & 0 deletions src/iceberg/snapshot.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@

#include "iceberg/snapshot.h"

#include "iceberg/file_io.h"
#include "iceberg/manifest/manifest_list.h"
#include "iceberg/manifest/manifest_reader.h"
#include "iceberg/util/macros.h"

namespace iceberg {

bool SnapshotRef::Branch::Equals(const SnapshotRef::Branch& other) const {
Expand Down Expand Up @@ -80,4 +85,60 @@ bool Snapshot::Equals(const Snapshot& other) const {
schema_id == other.schema_id;
}

Result<CachedSnapshot::ManifestsCache> CachedSnapshot::InitManifestsCache(
const Snapshot& snapshot, std::shared_ptr<FileIO> file_io) {
if (file_io == nullptr) {
return InvalidArgument("Cannot cache manifests: FileIO is null");
}

// Read manifest list
ICEBERG_ASSIGN_OR_RAISE(auto reader,
ManifestListReader::Make(snapshot.manifest_list, file_io));
ICEBERG_ASSIGN_OR_RAISE(auto manifest_files, reader->Files());

std::vector<ManifestFile> manifests;
manifests.reserve(manifest_files.size());

// Partition manifests: data manifests first, then delete manifests
// First pass: collect data manifests
for (const auto& manifest_file : manifest_files) {
if (manifest_file.content == ManifestContent::kData) {
manifests.push_back(manifest_file);
}
}
size_t data_manifests_count = manifests.size();

// Second pass: append delete manifests
for (const auto& manifest_file : manifest_files) {
if (manifest_file.content == ManifestContent::kDeletes) {
manifests.push_back(manifest_file);
}
}

return std::make_pair(std::move(manifests), data_manifests_count);
}

Result<std::span<ManifestFile>> CachedSnapshot::Manifests(
std::shared_ptr<FileIO> file_io) const {
ICEBERG_ASSIGN_OR_RAISE(auto cache_ref, manifests_cache_.Get(snapshot_, file_io));
auto& cache = cache_ref.get();
return std::span<ManifestFile>(cache.first.data(), cache.first.size());
}

Result<std::span<ManifestFile>> CachedSnapshot::DataManifests(
std::shared_ptr<FileIO> file_io) const {
ICEBERG_ASSIGN_OR_RAISE(auto cache_ref, manifests_cache_.Get(snapshot_, file_io));
auto& cache = cache_ref.get();
return std::span<ManifestFile>(cache.first.data(), cache.second);
}

Result<std::span<ManifestFile>> CachedSnapshot::DeleteManifests(
std::shared_ptr<FileIO> file_io) const {
ICEBERG_ASSIGN_OR_RAISE(auto cache_ref, manifests_cache_.Get(snapshot_, file_io));
auto& cache = cache_ref.get();
const size_t delete_start = cache.second;
const size_t delete_count = cache.first.size() - delete_start;
return std::span<ManifestFile>(cache.first.data() + delete_start, delete_count);
}

} // namespace iceberg
55 changes: 55 additions & 0 deletions src/iceberg/snapshot.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,20 @@

#pragma once

#include <memory>
#include <optional>
#include <span>
#include <string>
#include <string_view>
#include <unordered_map>
#include <utility>
#include <variant>

#include "iceberg/iceberg_export.h"
#include "iceberg/manifest/manifest_list.h"
#include "iceberg/result.h"
#include "iceberg/type_fwd.h"
#include "iceberg/util/lazy.h"
#include "iceberg/util/timepoint.h"

namespace iceberg {
Expand Down Expand Up @@ -260,4 +265,54 @@ struct ICEBERG_EXPORT Snapshot {
bool Equals(const Snapshot& other) const;
};

/// \brief A snapshot with cached manifest loading capabilities.
///
/// This class wraps a Snapshot reference and provides lazy-loading of manifests.
class ICEBERG_EXPORT CachedSnapshot {
public:
explicit CachedSnapshot(const Snapshot& snapshot) : snapshot_(snapshot) {}

/// \brief Get the underlying Snapshot reference
const Snapshot& snapshot() const { return snapshot_; }

/// \brief Returns all ManifestFile instances for either data or delete manifests
/// in this snapshot.
///
/// \param file_io The FileIO instance to use for reading the manifest list
/// \return A span of ManifestFile instances, or an error
Result<std::span<ManifestFile>> Manifests(std::shared_ptr<FileIO> file_io) const;

/// \brief Returns a ManifestFile for each data manifest in this snapshot.
///
/// \param file_io The FileIO instance to use for reading the manifest list
/// \return A span of ManifestFile instances, or an error
Result<std::span<ManifestFile>> DataManifests(std::shared_ptr<FileIO> file_io) const;

/// \brief Returns a ManifestFile for each delete manifest in this snapshot.
///
/// \param file_io The FileIO instance to use for reading the manifest list
/// \return A span of ManifestFile instances, or an error
Result<std::span<ManifestFile>> DeleteManifests(std::shared_ptr<FileIO> file_io) const;

private:
/// \brief Cache structure for storing loaded manifests
///
/// \note Manifests are stored in a single vector with data manifests at the head
/// and delete manifests at the tail, separated by the number of data manifests.
using ManifestsCache = std::pair<std::vector<ManifestFile>, size_t>;

/// \brief Initialize manifests cache by loading them from the manifest list file.
/// \param snapshot The snapshot to initialize the manifests cache for
/// \param file_io The FileIO instance to use for reading the manifest list
/// \return A result containing the manifests cache
static Result<ManifestsCache> InitManifestsCache(const Snapshot& snapshot,
std::shared_ptr<FileIO> file_io);

/// The underlying snapshot data
const Snapshot& snapshot_;

/// Lazy-loaded manifests cache
Lazy<InitManifestsCache> manifests_cache_;
};

} // namespace iceberg
Loading