Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ if(ICEBERG_BUILD_BUNDLE)
avro/avro_schema_util.cc
avro/avro_stream_internal.cc
parquet/parquet_data_util.cc
parquet/parquet_metrics.cc
parquet/parquet_reader.cc
parquet/parquet_register.cc
parquet/parquet_schema_util.cc
Expand Down
3 changes: 3 additions & 0 deletions src/iceberg/file_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "iceberg/arrow_c_data.h"
#include "iceberg/file_format.h"
#include "iceberg/metrics.h"
#include "iceberg/metrics_config.h"
#include "iceberg/result.h"
#include "iceberg/type_fwd.h"
#include "iceberg/util/config.h"
Expand Down Expand Up @@ -77,6 +78,8 @@ struct ICEBERG_EXPORT WriterOptions {
std::shared_ptr<class FileIO> io;
/// \brief Metadata to write to the file.
std::unordered_map<std::string, std::string> metadata;
/// \brief Metrics configuration.
std::shared_ptr<MetricsConfig> metrics_config = MetricsConfig::Default();
/// \brief Format-specific or implementation-specific properties.
WriterProperties properties;
};
Expand Down
29 changes: 29 additions & 0 deletions src/iceberg/metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,35 @@

namespace iceberg {

/// \brief Field-level metrics for a single column.
///
/// This structure captures value counts, null counts, NaN counts, and optional
/// lower/upper bounds for a specific field identified by its field_id.
struct ICEBERG_EXPORT FieldMetrics {
/// \brief The field ID this metrics belongs to.
int32_t field_id;

/// \brief The total number of values (including nulls) for this field.
/// A negative value indicates the count is unknown.
int64_t value_count = -1;

/// \brief The number of null values for this field.
/// A negative value indicates the count is unknown.
int64_t null_value_count = -1;

/// \brief The number of NaN values for this field.
/// A negative value indicates the count is unknown.
int64_t nan_value_count = -1;

/// \brief The lower bound value as a Literal.
/// Empty if no lower bound is available.
std::optional<Literal> lower_bound = std::nullopt;

/// \brief The upper bound value as a Literal.
/// Empty if no upper bound is available.
std::optional<Literal> upper_bound = std::nullopt;
};

/// \brief Iceberg file format metrics
struct ICEBERG_EXPORT Metrics {
std::optional<int64_t> row_count;
Expand Down
22 changes: 22 additions & 0 deletions src/iceberg/metrics_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include "iceberg/metrics_config.h"

#include <limits>
#include <string>
#include <unordered_map>

Expand Down Expand Up @@ -100,6 +101,19 @@ Result<MetricsMode> MetricsMode::FromString(std::string_view mode) {
return InvalidArgument("Invalid metrics mode: {}", mode);
}

int32_t MetricsMode::TruncateLength() const {
switch (kind) {
case Kind::kNone:
case Kind::kCounts:
return 0;
case Kind::kTruncate:
return std::get<int32_t>(length);
case Kind::kFull:
return std::numeric_limits<int32_t>::max();
}
return 0;
}

MetricsConfig::MetricsConfig(ColumnModeMap column_modes, MetricsMode default_mode)
: column_modes_(std::move(column_modes)), default_mode_(default_mode) {}

Expand All @@ -116,6 +130,14 @@ Result<std::shared_ptr<MetricsConfig>> MetricsConfig::Make(const Table& table) {
*sort_order.value_or(SortOrder::Unsorted()));
}

Result<std::shared_ptr<MetricsConfig>> MetricsConfig::Make(
std::unordered_map<std::string, std::string> properties) {
// Create a minimal TableProperties wrapper for the properties
TableProperties props = TableProperties::FromMap(std::move(properties));

return MakeInternal(props, Schema({}), *SortOrder::Unsorted());
}

Result<std::shared_ptr<MetricsConfig>> MetricsConfig::MakeInternal(
const TableProperties& props, const Schema& schema, const SortOrder& order) {
ColumnModeMap column_modes;
Expand Down
11 changes: 11 additions & 0 deletions src/iceberg/metrics_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ struct ICEBERG_EXPORT MetricsMode {

Kind kind;
std::variant<std::monostate, int32_t> length;

/// \brief Get the truncate length from this MetricsMode.
/// \return 0 for None/Counts modes, the truncate length for Truncate mode,
/// or INT_MAX for Full mode.
int32_t TruncateLength() const;
};

/// \brief Configuration for collecting column metrics for an Iceberg table.
Expand All @@ -63,6 +68,12 @@ class ICEBERG_EXPORT MetricsConfig {
/// \brief Creates a metrics config from a table.
static Result<std::shared_ptr<MetricsConfig>> Make(const Table& table);

/// \brief Creates a metrics config from properties (for testing)
/// \param properties Map of property key-value pairs
/// \return A shared pointer to the created MetricsConfig
static Result<std::shared_ptr<MetricsConfig>> Make(
std::unordered_map<std::string, std::string> properties);

/// \brief Get `limit` num of primitive field ids from schema
static Result<std::unordered_set<int32_t>> LimitFieldIds(const Schema& schema,
int32_t limit);
Expand Down
Loading
Loading