Skip to content

Commit b9ce88f

Browse files
authored
feat: implement manifest group (#455)
1 parent 40565f4 commit b9ce88f

19 files changed

+1234
-103
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ set(ICEBERG_SOURCES
4242
json_internal.cc
4343
manifest/manifest_adapter.cc
4444
manifest/manifest_entry.cc
45+
manifest/manifest_group.cc
4546
manifest/manifest_list.cc
4647
manifest/manifest_reader.cc
4748
manifest/manifest_writer.cc

src/iceberg/constants.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,24 @@
1919

2020
#pragma once
2121

22+
/// \file iceberg/constants.h
23+
/// This file defines constants used commonly and shared across multiple
24+
/// source files. It is mostly useful to add constants that are used as
25+
/// default values in the class definitions in the header files without
26+
/// including other headers just for the constant definitions.
27+
2228
#include <cstdint>
2329
#include <string_view>
2430

2531
namespace iceberg {
2632

2733
constexpr std::string_view kParquetFieldIdKey = "PARQUET:field_id";
2834
constexpr int64_t kInvalidSnapshotId = -1;
35+
/// \brief Stand-in for the current sequence number that will be assigned when the commit
36+
/// is successful. This is replaced when writing a manifest list by the ManifestFile
37+
/// adapter.
38+
constexpr int64_t kUnassignedSequenceNumber = -1;
39+
40+
// TODO(gangwu): move other commonly used constants here.
2941

3042
} // namespace iceberg

src/iceberg/delete_file_index.cc

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -453,34 +453,32 @@ Result<std::shared_ptr<DataFile>> DeleteFileIndex::FindDV(
453453
}
454454

455455
Result<DeleteFileIndex::Builder> DeleteFileIndex::BuilderFor(
456-
std::shared_ptr<FileIO> io, std::vector<ManifestFile> delete_manifests) {
456+
std::shared_ptr<FileIO> io, std::shared_ptr<Schema> schema,
457+
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id,
458+
std::vector<ManifestFile> delete_manifests) {
457459
ICEBERG_PRECHECK(io != nullptr, "FileIO cannot be null");
458-
return Builder(std::move(io), std::move(delete_manifests));
460+
ICEBERG_PRECHECK(schema != nullptr, "Schema cannot be null");
461+
ICEBERG_PRECHECK(!specs_by_id.empty(), "Partition specs cannot be empty");
462+
return Builder(std::move(io), std::move(schema), std::move(specs_by_id),
463+
std::move(delete_manifests));
459464
}
460465

461466
// Builder implementation
462467

463-
DeleteFileIndex::Builder::Builder(std::shared_ptr<FileIO> io,
464-
std::vector<ManifestFile> delete_manifests)
465-
: io_(std::move(io)), delete_manifests_(std::move(delete_manifests)) {}
468+
DeleteFileIndex::Builder::Builder(
469+
std::shared_ptr<FileIO> io, std::shared_ptr<Schema> schema,
470+
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id,
471+
std::vector<ManifestFile> delete_manifests)
472+
: io_(std::move(io)),
473+
schema_(std::move(schema)),
474+
specs_by_id_(std::move(specs_by_id)),
475+
delete_manifests_(std::move(delete_manifests)) {}
466476

467477
DeleteFileIndex::Builder::~Builder() = default;
468478
DeleteFileIndex::Builder::Builder(Builder&&) noexcept = default;
469479
DeleteFileIndex::Builder& DeleteFileIndex::Builder::operator=(Builder&&) noexcept =
470480
default;
471481

472-
DeleteFileIndex::Builder& DeleteFileIndex::Builder::SpecsById(
473-
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id) {
474-
specs_by_id_ = std::move(specs_by_id);
475-
return *this;
476-
}
477-
478-
DeleteFileIndex::Builder& DeleteFileIndex::Builder::WithSchema(
479-
std::shared_ptr<Schema> schema) {
480-
schema_ = std::move(schema);
481-
return *this;
482-
}
483-
484482
DeleteFileIndex::Builder& DeleteFileIndex::Builder::AfterSequenceNumber(int64_t seq) {
485483
min_sequence_number_ = seq;
486484
return *this;
@@ -721,10 +719,6 @@ Status DeleteFileIndex::Builder::AddEqualityDelete(
721719

722720
Result<std::unique_ptr<DeleteFileIndex>> DeleteFileIndex::Builder::Build() {
723721
ICEBERG_RETURN_UNEXPECTED(CheckErrors());
724-
ICEBERG_PRECHECK(io_ != nullptr, "FileIO is required to load delete files");
725-
ICEBERG_PRECHECK(schema_ != nullptr, "Schema is required to load delete files");
726-
ICEBERG_PRECHECK(!specs_by_id_.empty(),
727-
"Partition specs are required to load delete files");
728722

729723
std::vector<ManifestEntry> entries;
730724
if (!delete_manifests_.empty()) {

src/iceberg/delete_file_index.h

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -268,10 +268,14 @@ class ICEBERG_EXPORT DeleteFileIndex {
268268
/// \brief Create a builder for constructing a DeleteFileIndex from manifest files.
269269
///
270270
/// \param io The FileIO to use for reading manifests
271+
/// \param schema Current table schema
272+
/// \param specs_by_id Partition specs by their IDs
271273
/// \param delete_manifests The delete manifests to index
272274
/// \return A Builder instance
273-
static Result<Builder> BuilderFor(std::shared_ptr<FileIO> io,
274-
std::vector<ManifestFile> delete_manifests);
275+
static Result<Builder> BuilderFor(
276+
std::shared_ptr<FileIO> io, std::shared_ptr<Schema> schema,
277+
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id,
278+
std::vector<ManifestFile> delete_manifests);
275279

276280
private:
277281
friend class Builder;
@@ -318,7 +322,9 @@ class ICEBERG_EXPORT DeleteFileIndex {
318322
class ICEBERG_EXPORT DeleteFileIndex::Builder : public ErrorCollector {
319323
public:
320324
/// \brief Construct a builder from manifest files.
321-
Builder(std::shared_ptr<FileIO> io, std::vector<ManifestFile> delete_manifests);
325+
Builder(std::shared_ptr<FileIO> io, std::shared_ptr<Schema> schema,
326+
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id,
327+
std::vector<ManifestFile> delete_manifests);
322328

323329
~Builder() override;
324330

@@ -327,15 +333,6 @@ class ICEBERG_EXPORT DeleteFileIndex::Builder : public ErrorCollector {
327333
Builder(const Builder&) = delete;
328334
Builder& operator=(const Builder&) = delete;
329335

330-
/// \brief Set the partition specs by ID.
331-
Builder& SpecsById(
332-
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id);
333-
334-
/// \brief Set the table schema.
335-
///
336-
/// Required for filtering and expression evaluation.
337-
Builder& WithSchema(std::shared_ptr<Schema> schema);
338-
339336
/// \brief Set the minimum sequence number for delete files.
340337
///
341338
/// Only delete files with sequence number > min_sequence_number will be included.
@@ -384,10 +381,10 @@ class ICEBERG_EXPORT DeleteFileIndex::Builder : public ErrorCollector {
384381
ManifestEntry&& entry);
385382

386383
std::shared_ptr<FileIO> io_;
384+
std::shared_ptr<Schema> schema_;
385+
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id_;
387386
std::vector<ManifestFile> delete_manifests_;
388387
int64_t min_sequence_number_ = 0;
389-
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id_;
390-
std::shared_ptr<Schema> schema_;
391388
std::shared_ptr<Expression> data_filter_;
392389
std::shared_ptr<Expression> partition_filter_;
393390
std::shared_ptr<PartitionSet> partition_set_;

0 commit comments

Comments
 (0)