From 9a0875b1edf3f0f23515afa7472569d640b307fc Mon Sep 17 00:00:00 2001 From: Innocent Date: Fri, 23 Jan 2026 22:10:58 -0700 Subject: [PATCH 1/8] initial commit added json serializer for expressions --- src/iceberg/CMakeLists.txt | 1 + src/iceberg/expression/json_internal.cc | 523 +++++++++++++++++++++++ src/iceberg/expression/json_internal.h | 113 +++++ src/iceberg/test/CMakeLists.txt | 1 + src/iceberg/test/expression_json_test.cc | 522 ++++++++++++++++++++++ 5 files changed, 1160 insertions(+) create mode 100644 src/iceberg/expression/json_internal.cc create mode 100644 src/iceberg/expression/json_internal.h create mode 100644 src/iceberg/test/expression_json_test.cc diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 35c312f60..317eb75d9 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -31,6 +31,7 @@ set(ICEBERG_SOURCES expression/expression.cc expression/expressions.cc expression/inclusive_metrics_evaluator.cc + expression/json_internal.cc expression/literal.cc expression/manifest_evaluator.cc expression/predicate.cc diff --git a/src/iceberg/expression/json_internal.cc b/src/iceberg/expression/json_internal.cc new file mode 100644 index 000000000..ca66ed8bf --- /dev/null +++ b/src/iceberg/expression/json_internal.cc @@ -0,0 +1,523 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/expression/json_internal.h" + +#include +#include +#include +#include + +#include + +#include "iceberg/expression/expressions.h" +#include "iceberg/expression/literal.h" +#include "iceberg/expression/predicate.h" +#include "iceberg/expression/term.h" +#include "iceberg/transform.h" +#include "iceberg/type.h" +#include "iceberg/util/json_util_internal.h" +#include "iceberg/util/macros.h" + +namespace iceberg { + +namespace { + +// JSON field names +constexpr std::string_view kType = "type"; +constexpr std::string_view kTerm = "term"; +constexpr std::string_view kValue = "value"; +constexpr std::string_view kValues = "values"; +constexpr std::string_view kLeft = "left"; +constexpr std::string_view kRight = "right"; +constexpr std::string_view kChild = "child"; +constexpr std::string_view kTransform = "transform"; + +// Expression type strings +constexpr std::string_view kTypeTrue = "true"; +constexpr std::string_view kTypeFalse = "false"; +constexpr std::string_view kTypeAnd = "and"; +constexpr std::string_view kTypeOr = "or"; +constexpr std::string_view kTypeNot = "not"; +constexpr std::string_view kTypeEq = "eq"; +constexpr std::string_view kTypeNotEq = "not-eq"; +constexpr std::string_view kTypeLt = "lt"; +constexpr std::string_view kTypeLtEq = "lt-eq"; +constexpr std::string_view kTypeGt = "gt"; +constexpr std::string_view kTypeGtEq = "gt-eq"; +constexpr std::string_view kTypeIn = "in"; +constexpr std::string_view kTypeNotIn = "not-in"; +constexpr std::string_view kTypeIsNull = "is-null"; +constexpr std::string_view kTypeNotNull = "not-null"; +constexpr std::string_view kTypeIsNan = "is-nan"; +constexpr std::string_view kTypeNotNan = "not-nan"; +constexpr std::string_view kTypeStartsWith = "starts-with"; +constexpr std::string_view kTypeNotStartsWith = "not-starts-with"; +constexpr std::string_view kTypeReference = "reference"; + +// Term type for transform +constexpr std::string_view kTypeTransform = "transform"; + +/// Serialize a term (NamedReference or UnboundTransform) to JSON +nlohmann::json TermToJson(const Term& term) { + if (term.kind() == Term::Kind::kReference) { + // Simple references are serialized as plain strings + return std::string(dynamic_cast(term).name()); + } else if (term.kind() == Term::Kind::kTransform) { + // Note: const_cast is safe here because reference() just returns a shared_ptr + // and we're only reading from it. The method is not const due to interface design. + auto& transform_term = + const_cast(dynamic_cast(term)); + nlohmann::json json; + json[kType] = kTypeTransform; + json[kTransform] = transform_term.transform()->ToString(); + json[kTerm] = std::string(transform_term.reference()->name()); + return json; + } + // Fallback for unknown term types + return nlohmann::json{}; +} + +/// Parse a term from JSON (returns NamedReference or UnboundTransform) +Result>> TermFromJsonAsReference( + const nlohmann::json& json) { + // Handle string term (simple reference) + if (json.is_string()) { + ICEBERG_ASSIGN_OR_RAISE(auto name, GetTypedJsonValue(json)); + ICEBERG_ASSIGN_OR_RAISE(auto ref, NamedReference::Make(std::move(name))); + return std::shared_ptr(std::move(ref)); + } + + // Handle object term + if (json.is_object()) { + ICEBERG_ASSIGN_OR_RAISE(auto type_str, GetJsonValue(json, kType)); + + if (type_str == kTypeReference) { + ICEBERG_ASSIGN_OR_RAISE(auto name, GetJsonValue(json, kTerm)); + ICEBERG_ASSIGN_OR_RAISE(auto ref, NamedReference::Make(std::move(name))); + return std::shared_ptr(std::move(ref)); + } + + if (type_str == kTypeTransform) { + ICEBERG_ASSIGN_OR_RAISE(auto transform_str, GetJsonValue(json, kTransform)); + ICEBERG_ASSIGN_OR_RAISE(auto term_name, GetJsonValue(json, kTerm)); + ICEBERG_ASSIGN_OR_RAISE(auto transform, TransformFromString(transform_str)); + ICEBERG_ASSIGN_OR_RAISE(auto ref, NamedReference::Make(std::move(term_name))); + // For UnboundTransform, we need to return it as UnboundTerm + // However, UnboundTransform binds to BoundTransform, not BoundReference. + // The Java implementation handles this by using a common Term interface. + // For now, we'll handle this case by returning an error for transforms + // when expecting a reference term. + return JsonParseError("Transform terms are not supported in this context: {}", + SafeDumpJson(json)); + } + + return JsonParseError("Unknown term type '{}' in {}", type_str, SafeDumpJson(json)); + } + + return JsonParseError("Invalid term format, expected string or object: {}", + SafeDumpJson(json)); +} + +/// Check if an operation is a unary predicate (no values) +bool IsUnaryOperation(Expression::Operation op) { + switch (op) { + case Expression::Operation::kIsNull: + case Expression::Operation::kNotNull: + case Expression::Operation::kIsNan: + case Expression::Operation::kNotNan: + return true; + default: + return false; + } +} + +/// Check if an operation is a set predicate (multiple values) +bool IsSetOperation(Expression::Operation op) { + switch (op) { + case Expression::Operation::kIn: + case Expression::Operation::kNotIn: + return true; + default: + return false; + } +} + +} // namespace + +std::string_view OperationToJsonType(Expression::Operation op) { + switch (op) { + case Expression::Operation::kTrue: + return kTypeTrue; + case Expression::Operation::kFalse: + return kTypeFalse; + case Expression::Operation::kAnd: + return kTypeAnd; + case Expression::Operation::kOr: + return kTypeOr; + case Expression::Operation::kNot: + return kTypeNot; + case Expression::Operation::kEq: + return kTypeEq; + case Expression::Operation::kNotEq: + return kTypeNotEq; + case Expression::Operation::kLt: + return kTypeLt; + case Expression::Operation::kLtEq: + return kTypeLtEq; + case Expression::Operation::kGt: + return kTypeGt; + case Expression::Operation::kGtEq: + return kTypeGtEq; + case Expression::Operation::kIn: + return kTypeIn; + case Expression::Operation::kNotIn: + return kTypeNotIn; + case Expression::Operation::kIsNull: + return kTypeIsNull; + case Expression::Operation::kNotNull: + return kTypeNotNull; + case Expression::Operation::kIsNan: + return kTypeIsNan; + case Expression::Operation::kNotNan: + return kTypeNotNan; + case Expression::Operation::kStartsWith: + return kTypeStartsWith; + case Expression::Operation::kNotStartsWith: + return kTypeNotStartsWith; + default: + return "unknown"; + } +} + +Result OperationFromJsonType(std::string_view type_str) { + if (type_str == kTypeTrue) return Expression::Operation::kTrue; + if (type_str == kTypeFalse) return Expression::Operation::kFalse; + if (type_str == kTypeAnd) return Expression::Operation::kAnd; + if (type_str == kTypeOr) return Expression::Operation::kOr; + if (type_str == kTypeNot) return Expression::Operation::kNot; + if (type_str == kTypeEq) return Expression::Operation::kEq; + if (type_str == kTypeNotEq) return Expression::Operation::kNotEq; + if (type_str == kTypeLt) return Expression::Operation::kLt; + if (type_str == kTypeLtEq) return Expression::Operation::kLtEq; + if (type_str == kTypeGt) return Expression::Operation::kGt; + if (type_str == kTypeGtEq) return Expression::Operation::kGtEq; + if (type_str == kTypeIn) return Expression::Operation::kIn; + if (type_str == kTypeNotIn) return Expression::Operation::kNotIn; + if (type_str == kTypeIsNull) return Expression::Operation::kIsNull; + if (type_str == kTypeNotNull) return Expression::Operation::kNotNull; + if (type_str == kTypeIsNan) return Expression::Operation::kIsNan; + if (type_str == kTypeNotNan) return Expression::Operation::kNotNan; + if (type_str == kTypeStartsWith) return Expression::Operation::kStartsWith; + if (type_str == kTypeNotStartsWith) return Expression::Operation::kNotStartsWith; + + return JsonParseError("Unknown expression type: {}", type_str); +} + +nlohmann::json LiteralToJson(const Literal& literal) { + if (literal.IsNull()) { + return nlohmann::json(nullptr); + } + + const auto& value = literal.value(); + const auto type_id = literal.type()->type_id(); + + // Handle based on the variant type + if (std::holds_alternative(value)) { + return std::get(value); + } + if (std::holds_alternative(value)) { + return std::get(value); + } + if (std::holds_alternative(value)) { + return std::get(value); + } + if (std::holds_alternative(value)) { + return std::get(value); + } + if (std::holds_alternative(value)) { + return std::get(value); + } + if (std::holds_alternative(value)) { + return std::get(value); + } + if (std::holds_alternative(value)) { + return std::get(value).ToString(); + } + if (std::holds_alternative>(value)) { + // Binary and Fixed are serialized as hex strings + const auto& bytes = std::get>(value); + std::string hex; + hex.reserve(bytes.size() * 2); + for (uint8_t byte : bytes) { + hex += std::format("{:02x}", byte); + } + return hex; + } + if (std::holds_alternative(value)) { + // Decimal is serialized as string representation + const auto& decimal = std::get(value); + if (type_id == TypeId::kDecimal) { + const auto& decimal_type = static_cast(*literal.type()); + auto result = decimal.ToString(decimal_type.scale()); + if (result.has_value()) { + return result.value(); + } + } + // Fallback to integer string representation + return decimal.ToIntegerString(); + } + + // Fallback: use ToString() + return literal.ToString(); +} + +Result LiteralFromJson(const nlohmann::json& json) { + if (json.is_null()) { + // We don't have type information, so we can't create a proper null literal + return JsonParseError("Cannot deserialize null literal without type information"); + } + + if (json.is_boolean()) { + return Literal::Boolean(json.get()); + } + + if (json.is_number_integer()) { + // Try to fit into int32, otherwise use int64 + auto val = json.get(); + if (val >= std::numeric_limits::min() && + val <= std::numeric_limits::max()) { + return Literal::Int(static_cast(val)); + } + return Literal::Long(val); + } + + if (json.is_number_float()) { + return Literal::Double(json.get()); + } + + if (json.is_string()) { + return Literal::String(json.get()); + } + + return JsonParseError("Unsupported JSON literal type: {}", SafeDumpJson(json)); +} + +nlohmann::json ToJson(const NamedReference& ref) { return std::string(ref.name()); } + +nlohmann::json ToJson(const UnboundTransform& transform) { + // Note: const_cast is safe here because reference() just returns a shared_ptr + // and we're only reading from it. The method is not const due to interface design. + auto& mutable_transform = const_cast(transform); + nlohmann::json json; + json[kType] = kTypeTransform; + json[kTransform] = transform.transform()->ToString(); + json[kTerm] = std::string(mutable_transform.reference()->name()); + return json; +} + +nlohmann::json ToJson(const UnboundPredicate& predicate) { + nlohmann::json json; + json[kType] = OperationToJsonType(predicate.op()); + + // Get the term from the predicate + // Note: const_cast is safe here because reference() just returns a shared_ptr + // and we're only reading from it. The method is not const due to interface design. + auto& mutable_predicate = const_cast(predicate); + auto ref = mutable_predicate.reference(); + if (ref) { + json[kTerm] = std::string(ref->name()); + } + + // For predicates with values, we need to cast to the concrete type + // UnboundPredicateImpl to access literals() + const auto* pred_impl = + dynamic_cast*>(&predicate); + if (pred_impl) { + auto literals = pred_impl->literals(); + if (!literals.empty()) { + if (IsSetOperation(predicate.op())) { + nlohmann::json values_array = nlohmann::json::array(); + for (const auto& lit : literals) { + values_array.push_back(LiteralToJson(lit)); + } + json[kValues] = std::move(values_array); + } else if (literals.size() == 1) { + json[kValue] = LiteralToJson(literals[0]); + } + } + } + + return json; +} + +nlohmann::json ToJson(const Expression& expr) { + switch (expr.op()) { + case Expression::Operation::kTrue: + return true; + + case Expression::Operation::kFalse: + return false; + + case Expression::Operation::kAnd: { + const auto& and_expr = static_cast(expr); + nlohmann::json json; + json[kType] = kTypeAnd; + json[kLeft] = ToJson(*and_expr.left()); + json[kRight] = ToJson(*and_expr.right()); + return json; + } + + case Expression::Operation::kOr: { + const auto& or_expr = static_cast(expr); + nlohmann::json json; + json[kType] = kTypeOr; + json[kLeft] = ToJson(*or_expr.left()); + json[kRight] = ToJson(*or_expr.right()); + return json; + } + + case Expression::Operation::kNot: { + const auto& not_expr = static_cast(expr); + nlohmann::json json; + json[kType] = kTypeNot; + json[kChild] = ToJson(*not_expr.child()); + return json; + } + + default: + // Handle predicates + if (expr.is_unbound_predicate()) { + // Use dynamic_cast due to virtual inheritance + const auto* pred = dynamic_cast(&expr); + if (pred) { + return ToJson(*pred); + } + } + // Fallback for unknown expression types + nlohmann::json json; + json[kType] = OperationToJsonType(expr.op()); + return json; + } +} + +Result> UnboundPredicateFromJson( + const nlohmann::json& json) { + ICEBERG_ASSIGN_OR_RAISE(auto type_str, GetJsonValue(json, kType)); + ICEBERG_ASSIGN_OR_RAISE(auto op, OperationFromJsonType(type_str)); + + // Parse the term + ICEBERG_ASSIGN_OR_RAISE(auto term_json, GetJsonValue(json, kTerm)); + ICEBERG_ASSIGN_OR_RAISE(auto term, TermFromJsonAsReference(term_json)); + + // Handle unary predicates (no value) + if (IsUnaryOperation(op)) { + ICEBERG_ASSIGN_OR_RAISE(auto pred, UnboundPredicateImpl::Make( + op, std::move(term))); + return std::shared_ptr(std::move(pred)); + } + + // Handle set predicates (multiple values) + if (IsSetOperation(op)) { + if (!json.contains(kValues)) { + return JsonParseError("Missing '{}' for set predicate in {}", kValues, + SafeDumpJson(json)); + } + ICEBERG_ASSIGN_OR_RAISE(auto values_json, GetJsonValue(json, kValues)); + if (!values_json.is_array()) { + return JsonParseError("Expected array for '{}' in {}", kValues, SafeDumpJson(json)); + } + + std::vector values; + values.reserve(values_json.size()); + for (const auto& val_json : values_json) { + ICEBERG_ASSIGN_OR_RAISE(auto lit, LiteralFromJson(val_json)); + values.push_back(std::move(lit)); + } + + ICEBERG_ASSIGN_OR_RAISE( + auto pred, + UnboundPredicateImpl::Make(op, std::move(term), std::move(values))); + return std::shared_ptr(std::move(pred)); + } + + // Handle literal predicates (single value) + if (!json.contains(kValue)) { + return JsonParseError("Missing '{}' for predicate in {}", kValue, SafeDumpJson(json)); + } + ICEBERG_ASSIGN_OR_RAISE(auto value_json, GetJsonValue(json, kValue)); + ICEBERG_ASSIGN_OR_RAISE(auto value, LiteralFromJson(value_json)); + + ICEBERG_ASSIGN_OR_RAISE(auto pred, UnboundPredicateImpl::Make( + op, std::move(term), std::move(value))); + return std::shared_ptr(std::move(pred)); +} + +Result> ExpressionFromJson(const nlohmann::json& json) { + // Handle boolean literals + if (json.is_boolean()) { + return json.get() ? std::static_pointer_cast(True::Instance()) + : std::static_pointer_cast(False::Instance()); + } + + if (!json.is_object()) { + return JsonParseError("Expected boolean or object for expression: {}", + SafeDumpJson(json)); + } + + ICEBERG_ASSIGN_OR_RAISE(auto type_str, GetJsonValue(json, kType)); + ICEBERG_ASSIGN_OR_RAISE(auto op, OperationFromJsonType(type_str)); + + switch (op) { + case Expression::Operation::kTrue: + return True::Instance(); + + case Expression::Operation::kFalse: + return False::Instance(); + + case Expression::Operation::kAnd: { + ICEBERG_ASSIGN_OR_RAISE(auto left_json, GetJsonValue(json, kLeft)); + ICEBERG_ASSIGN_OR_RAISE(auto right_json, GetJsonValue(json, kRight)); + ICEBERG_ASSIGN_OR_RAISE(auto left, ExpressionFromJson(left_json)); + ICEBERG_ASSIGN_OR_RAISE(auto right, ExpressionFromJson(right_json)); + return And::MakeFolded(std::move(left), std::move(right)); + } + + case Expression::Operation::kOr: { + ICEBERG_ASSIGN_OR_RAISE(auto left_json, GetJsonValue(json, kLeft)); + ICEBERG_ASSIGN_OR_RAISE(auto right_json, GetJsonValue(json, kRight)); + ICEBERG_ASSIGN_OR_RAISE(auto left, ExpressionFromJson(left_json)); + ICEBERG_ASSIGN_OR_RAISE(auto right, ExpressionFromJson(right_json)); + return Or::MakeFolded(std::move(left), std::move(right)); + } + + case Expression::Operation::kNot: { + ICEBERG_ASSIGN_OR_RAISE(auto child_json, GetJsonValue(json, kChild)); + ICEBERG_ASSIGN_OR_RAISE(auto child, ExpressionFromJson(child_json)); + return Not::MakeFolded(std::move(child)); + } + + default: + // Handle predicates + ICEBERG_ASSIGN_OR_RAISE(auto pred, UnboundPredicateFromJson(json)); + return pred; + } +} + +} // namespace iceberg diff --git a/src/iceberg/expression/json_internal.h b/src/iceberg/expression/json_internal.h new file mode 100644 index 000000000..435d54ad0 --- /dev/null +++ b/src/iceberg/expression/json_internal.h @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/expression/json_internal.h +/// JSON serialization and deserialization for expressions. + +#include + +#include + +#include "iceberg/expression/expression.h" +#include "iceberg/iceberg_export.h" +#include "iceberg/result.h" + +namespace iceberg { + +class UnboundPredicate; +class NamedReference; +class UnboundTransform; +class Literal; + +/// \brief Serializes an Expression to JSON. +/// +/// This function converts an Expression to its JSON representation following +/// the Iceberg REST API specification. It supports: +/// - Boolean constants: serialized as JSON boolean literals +/// - Logical expressions: And, Or, Not +/// - Unbound predicates: comparison, unary, and set operations +/// +/// \param expr The Expression to serialize +/// \return A JSON representation of the expression +ICEBERG_EXPORT nlohmann::json ToJson(const Expression& expr); + +/// \brief Deserializes a JSON object into an Expression. +/// +/// This function parses the provided JSON and creates an Expression object. +/// It expects the JSON to follow the Iceberg REST API specification: +/// - JSON boolean true/false for constant expressions +/// - Objects with "type" field for other expressions +/// +/// \param json The JSON representation of an expression +/// \return A shared pointer to the Expression or an error if parsing fails +ICEBERG_EXPORT Result> ExpressionFromJson( + const nlohmann::json& json); + +/// \brief Serializes an unbound predicate to JSON. +/// +/// \param predicate The UnboundPredicate to serialize +/// \return A JSON representation of the predicate +ICEBERG_EXPORT nlohmann::json ToJson(const UnboundPredicate& predicate); + +/// \brief Deserializes a JSON object into an UnboundPredicate. +/// +/// \param json The JSON representation of a predicate +/// \return A shared pointer to the UnboundPredicate or an error if parsing fails +ICEBERG_EXPORT Result> UnboundPredicateFromJson( + const nlohmann::json& json); + +/// \brief Serializes a NamedReference to JSON. +/// +/// \param ref The NamedReference to serialize +/// \return A JSON string representing the reference name +ICEBERG_EXPORT nlohmann::json ToJson(const NamedReference& ref); + +/// \brief Serializes an UnboundTransform to JSON. +/// +/// \param transform The UnboundTransform to serialize +/// \return A JSON object representing the transform term +ICEBERG_EXPORT nlohmann::json ToJson(const UnboundTransform& transform); + +/// \brief Serializes a Literal to JSON. +/// +/// \param literal The Literal to serialize +/// \return A JSON value representing the literal +ICEBERG_EXPORT nlohmann::json LiteralToJson(const Literal& literal); + +/// \brief Deserializes a JSON value into a Literal. +/// +/// \param json The JSON representation of a literal +/// \return A Literal or an error if parsing fails +ICEBERG_EXPORT Result LiteralFromJson(const nlohmann::json& json); + +/// \brief Converts an Expression::Operation to its JSON string representation. +/// +/// \param op The operation to convert +/// \return The JSON type string (e.g., "eq", "lt-eq", "is-null") +ICEBERG_EXPORT std::string_view OperationToJsonType(Expression::Operation op); + +/// \brief Converts a JSON type string to an Expression::Operation. +/// +/// \param type_str The JSON type string +/// \return The corresponding Operation or an error if unknown +ICEBERG_EXPORT Result OperationFromJsonType(std::string_view type_str); + +} // namespace iceberg diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index d243a48bf..00fff9f27 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -88,6 +88,7 @@ add_iceberg_test(table_test add_iceberg_test(expression_test SOURCES aggregate_test.cc + expression_json_test.cc expression_test.cc expression_visitor_test.cc inclusive_metrics_evaluator_test.cc diff --git a/src/iceberg/test/expression_json_test.cc b/src/iceberg/test/expression_json_test.cc new file mode 100644 index 000000000..08f09eb94 --- /dev/null +++ b/src/iceberg/test/expression_json_test.cc @@ -0,0 +1,522 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/expression/json_internal.h" + +#include +#include + +#include +#include +#include + +#include "iceberg/expression/expression.h" +#include "iceberg/expression/expressions.h" +#include "iceberg/expression/literal.h" +#include "iceberg/expression/predicate.h" +#include "iceberg/expression/term.h" +#include "iceberg/test/matchers.h" + +namespace iceberg { + +class ExpressionJsonTest : public ::testing::Test { + protected: + // Helper to test round-trip serialization + // Uses string comparison since expressions may have different internal identity + // but the same semantic meaning (i.e., ToString() output matches) + void TestRoundTrip(const Expression& expr) { + auto json = ToJson(expr); + auto result = ExpressionFromJson(json); + ASSERT_THAT(result, IsOk()) << "Failed to parse JSON: " << json.dump(); + EXPECT_EQ(expr.ToString(), result.value()->ToString()) + << "Round-trip failed.\nJSON: " << json.dump(); + } +}; + +// Test boolean constant expressions +TEST_F(ExpressionJsonTest, TrueExpression) { + auto expr = True::Instance(); + auto json = ToJson(*expr); + + // True should serialize as JSON boolean true + EXPECT_TRUE(json.is_boolean()); + EXPECT_TRUE(json.get()); + + // Parse back + auto result = ExpressionFromJson(json); + ASSERT_THAT(result, IsOk()); + EXPECT_EQ(result.value()->op(), Expression::Operation::kTrue); +} + +TEST_F(ExpressionJsonTest, FalseExpression) { + auto expr = False::Instance(); + auto json = ToJson(*expr); + + // False should serialize as JSON boolean false + EXPECT_TRUE(json.is_boolean()); + EXPECT_FALSE(json.get()); + + // Parse back + auto result = ExpressionFromJson(json); + ASSERT_THAT(result, IsOk()); + EXPECT_EQ(result.value()->op(), Expression::Operation::kFalse); +} + +// Test And expression +TEST_F(ExpressionJsonTest, AndExpression) { + auto left = Expressions::GreaterThanOrEqual("col1", Literal::Int(50)); + auto right = Expressions::LessThan("col2", Literal::Int(100)); + auto expr = Expressions::And(left, right); + + auto json = ToJson(*expr); + + // Verify JSON structure + EXPECT_EQ(json["type"], "and"); + EXPECT_TRUE(json.contains("left")); + EXPECT_TRUE(json.contains("right")); + EXPECT_EQ(json["left"]["type"], "gt-eq"); + EXPECT_EQ(json["right"]["type"], "lt"); + + // Round-trip test + TestRoundTrip(*expr); +} + +// Test Or expression +TEST_F(ExpressionJsonTest, OrExpression) { + auto left = Expressions::Equal("status", Literal::String("active")); + auto right = Expressions::Equal("status", Literal::String("pending")); + auto expr = Expressions::Or(left, right); + + auto json = ToJson(*expr); + + // Verify JSON structure + EXPECT_EQ(json["type"], "or"); + EXPECT_EQ(json["left"]["type"], "eq"); + EXPECT_EQ(json["right"]["type"], "eq"); + + // Round-trip test + TestRoundTrip(*expr); +} + +// Test Not expression +TEST_F(ExpressionJsonTest, NotExpression) { + auto child = Expressions::IsNull("col"); + auto expr = Expressions::Not(child); + + auto json = ToJson(*expr); + + // Verify JSON structure + EXPECT_EQ(json["type"], "not"); + EXPECT_TRUE(json.contains("child")); + EXPECT_EQ(json["child"]["type"], "is-null"); + + // Round-trip test + TestRoundTrip(*expr); +} + +// Test unary predicates +TEST_F(ExpressionJsonTest, IsNullPredicate) { + auto expr = Expressions::IsNull("column_name"); + + auto json = ToJson(*expr); + + EXPECT_EQ(json["type"], "is-null"); + EXPECT_EQ(json["term"], "column_name"); + EXPECT_FALSE(json.contains("value")); + EXPECT_FALSE(json.contains("values")); + + TestRoundTrip(*expr); +} + +TEST_F(ExpressionJsonTest, NotNullPredicate) { + auto expr = Expressions::NotNull("column_name"); + + auto json = ToJson(*expr); + + EXPECT_EQ(json["type"], "not-null"); + EXPECT_EQ(json["term"], "column_name"); + + TestRoundTrip(*expr); +} + +TEST_F(ExpressionJsonTest, IsNanPredicate) { + auto expr = Expressions::IsNaN("float_col"); + + auto json = ToJson(*expr); + + EXPECT_EQ(json["type"], "is-nan"); + EXPECT_EQ(json["term"], "float_col"); + + TestRoundTrip(*expr); +} + +TEST_F(ExpressionJsonTest, NotNanPredicate) { + auto expr = Expressions::NotNaN("float_col"); + + auto json = ToJson(*expr); + + EXPECT_EQ(json["type"], "not-nan"); + EXPECT_EQ(json["term"], "float_col"); + + TestRoundTrip(*expr); +} + +// Test comparison predicates +TEST_F(ExpressionJsonTest, EqualPredicate) { + auto expr = Expressions::Equal("name", Literal::String("test")); + + auto json = ToJson(*expr); + + EXPECT_EQ(json["type"], "eq"); + EXPECT_EQ(json["term"], "name"); + EXPECT_EQ(json["value"], "test"); + + TestRoundTrip(*expr); +} + +TEST_F(ExpressionJsonTest, NotEqualPredicate) { + auto expr = Expressions::NotEqual("count", Literal::Int(0)); + + auto json = ToJson(*expr); + + EXPECT_EQ(json["type"], "not-eq"); + EXPECT_EQ(json["term"], "count"); + EXPECT_EQ(json["value"], 0); + + TestRoundTrip(*expr); +} + +TEST_F(ExpressionJsonTest, LessThanPredicate) { + auto expr = Expressions::LessThan("age", Literal::Int(18)); + + auto json = ToJson(*expr); + + EXPECT_EQ(json["type"], "lt"); + EXPECT_EQ(json["term"], "age"); + EXPECT_EQ(json["value"], 18); + + TestRoundTrip(*expr); +} + +TEST_F(ExpressionJsonTest, LessThanOrEqualPredicate) { + auto expr = Expressions::LessThanOrEqual("score", Literal::Double(99.5)); + + auto json = ToJson(*expr); + + EXPECT_EQ(json["type"], "lt-eq"); + EXPECT_EQ(json["term"], "score"); + EXPECT_DOUBLE_EQ(json["value"].get(), 99.5); + + TestRoundTrip(*expr); +} + +TEST_F(ExpressionJsonTest, GreaterThanPredicate) { + auto expr = Expressions::GreaterThan("price", Literal::Long(1000)); + + auto json = ToJson(*expr); + + EXPECT_EQ(json["type"], "gt"); + EXPECT_EQ(json["term"], "price"); + EXPECT_EQ(json["value"], 1000); + + TestRoundTrip(*expr); +} + +TEST_F(ExpressionJsonTest, GreaterThanOrEqualPredicate) { + auto expr = Expressions::GreaterThanOrEqual("quantity", Literal::Int(1)); + + auto json = ToJson(*expr); + + EXPECT_EQ(json["type"], "gt-eq"); + EXPECT_EQ(json["term"], "quantity"); + EXPECT_EQ(json["value"], 1); + + TestRoundTrip(*expr); +} + +// Test string predicates +TEST_F(ExpressionJsonTest, StartsWithPredicate) { + auto expr = Expressions::StartsWith("path", "/home/user"); + + auto json = ToJson(*expr); + + EXPECT_EQ(json["type"], "starts-with"); + EXPECT_EQ(json["term"], "path"); + EXPECT_EQ(json["value"], "/home/user"); + + TestRoundTrip(*expr); +} + +TEST_F(ExpressionJsonTest, NotStartsWithPredicate) { + auto expr = Expressions::NotStartsWith("path", "/tmp"); + + auto json = ToJson(*expr); + + EXPECT_EQ(json["type"], "not-starts-with"); + EXPECT_EQ(json["term"], "path"); + EXPECT_EQ(json["value"], "/tmp"); + + TestRoundTrip(*expr); +} + +// Test set predicates +TEST_F(ExpressionJsonTest, InPredicate) { + auto expr = Expressions::In("status", + {Literal::String("active"), Literal::String("pending"), + Literal::String("review")}); + + auto json = ToJson(*expr); + + EXPECT_EQ(json["type"], "in"); + EXPECT_EQ(json["term"], "status"); + EXPECT_TRUE(json.contains("values")); + EXPECT_TRUE(json["values"].is_array()); + EXPECT_EQ(json["values"].size(), 3); + + TestRoundTrip(*expr); +} + +TEST_F(ExpressionJsonTest, NotInPredicate) { + auto expr = Expressions::NotIn("id", {Literal::Int(1), Literal::Int(2), Literal::Int(3)}); + + auto json = ToJson(*expr); + + EXPECT_EQ(json["type"], "not-in"); + EXPECT_EQ(json["term"], "id"); + EXPECT_TRUE(json["values"].is_array()); + EXPECT_EQ(json["values"].size(), 3); + + TestRoundTrip(*expr); +} + +// Test nested expressions +TEST_F(ExpressionJsonTest, NestedAndOr) { + auto cond1 = Expressions::Equal("a", Literal::Int(1)); + auto cond2 = Expressions::Equal("b", Literal::Int(2)); + auto cond3 = Expressions::Equal("c", Literal::Int(3)); + + auto or_expr = Expressions::Or(cond1, cond2); + auto and_expr = Expressions::And(or_expr, cond3); + + auto json = ToJson(*and_expr); + + EXPECT_EQ(json["type"], "and"); + EXPECT_EQ(json["left"]["type"], "or"); + EXPECT_EQ(json["right"]["type"], "eq"); + + TestRoundTrip(*and_expr); +} + +// Test deserialization from JSON strings (matching Java format) +TEST_F(ExpressionJsonTest, ParseAndExpression) { + nlohmann::json json = R"({ + "type": "and", + "left": { + "type": "gt-eq", + "term": "column-name-1", + "value": 50 + }, + "right": { + "type": "in", + "term": "column-name-2", + "values": ["one", "two"] + } + })"_json; + + auto result = ExpressionFromJson(json); + ASSERT_THAT(result, IsOk()); + + auto expr = result.value(); + EXPECT_EQ(expr->op(), Expression::Operation::kAnd); + + const auto& and_expr = static_cast(*expr); + EXPECT_EQ(and_expr.left()->op(), Expression::Operation::kGtEq); + EXPECT_EQ(and_expr.right()->op(), Expression::Operation::kIn); +} + +TEST_F(ExpressionJsonTest, ParseOrExpression) { + nlohmann::json json = R"({ + "type": "or", + "left": { + "type": "lt", + "term": "column-name-1", + "value": 50 + }, + "right": { + "type": "not-null", + "term": "column-name-2" + } + })"_json; + + auto result = ExpressionFromJson(json); + ASSERT_THAT(result, IsOk()); + + auto expr = result.value(); + EXPECT_EQ(expr->op(), Expression::Operation::kOr); +} + +TEST_F(ExpressionJsonTest, ParseNotExpression) { + nlohmann::json json = R"({ + "type": "not", + "child": { + "type": "gt-eq", + "term": "column-name-1", + "value": 50 + } + })"_json; + + auto result = ExpressionFromJson(json); + ASSERT_THAT(result, IsOk()); + + auto expr = result.value(); + EXPECT_EQ(expr->op(), Expression::Operation::kNot); +} + +// Test literal serialization +TEST_F(ExpressionJsonTest, LiteralSerialization) { + // Boolean + auto bool_json = LiteralToJson(Literal::Boolean(true)); + EXPECT_TRUE(bool_json.is_boolean()); + EXPECT_TRUE(bool_json.get()); + + // Integer + auto int_json = LiteralToJson(Literal::Int(42)); + EXPECT_TRUE(int_json.is_number_integer()); + EXPECT_EQ(int_json.get(), 42); + + // Long + auto long_json = LiteralToJson(Literal::Long(9876543210L)); + EXPECT_TRUE(long_json.is_number_integer()); + EXPECT_EQ(long_json.get(), 9876543210L); + + // Float + auto float_json = LiteralToJson(Literal::Float(3.14f)); + EXPECT_TRUE(float_json.is_number_float()); + + // Double + auto double_json = LiteralToJson(Literal::Double(2.718281828)); + EXPECT_TRUE(double_json.is_number_float()); + + // String + auto string_json = LiteralToJson(Literal::String("hello")); + EXPECT_TRUE(string_json.is_string()); + EXPECT_EQ(string_json.get(), "hello"); +} + +// Test operation to JSON type conversion +TEST_F(ExpressionJsonTest, OperationToJsonType) { + EXPECT_EQ(OperationToJsonType(Expression::Operation::kTrue), "true"); + EXPECT_EQ(OperationToJsonType(Expression::Operation::kFalse), "false"); + EXPECT_EQ(OperationToJsonType(Expression::Operation::kAnd), "and"); + EXPECT_EQ(OperationToJsonType(Expression::Operation::kOr), "or"); + EXPECT_EQ(OperationToJsonType(Expression::Operation::kNot), "not"); + EXPECT_EQ(OperationToJsonType(Expression::Operation::kEq), "eq"); + EXPECT_EQ(OperationToJsonType(Expression::Operation::kNotEq), "not-eq"); + EXPECT_EQ(OperationToJsonType(Expression::Operation::kLt), "lt"); + EXPECT_EQ(OperationToJsonType(Expression::Operation::kLtEq), "lt-eq"); + EXPECT_EQ(OperationToJsonType(Expression::Operation::kGt), "gt"); + EXPECT_EQ(OperationToJsonType(Expression::Operation::kGtEq), "gt-eq"); + EXPECT_EQ(OperationToJsonType(Expression::Operation::kIn), "in"); + EXPECT_EQ(OperationToJsonType(Expression::Operation::kNotIn), "not-in"); + EXPECT_EQ(OperationToJsonType(Expression::Operation::kIsNull), "is-null"); + EXPECT_EQ(OperationToJsonType(Expression::Operation::kNotNull), "not-null"); + EXPECT_EQ(OperationToJsonType(Expression::Operation::kIsNan), "is-nan"); + EXPECT_EQ(OperationToJsonType(Expression::Operation::kNotNan), "not-nan"); + EXPECT_EQ(OperationToJsonType(Expression::Operation::kStartsWith), "starts-with"); + EXPECT_EQ(OperationToJsonType(Expression::Operation::kNotStartsWith), "not-starts-with"); +} + +// Test JSON type to operation conversion +TEST_F(ExpressionJsonTest, OperationFromJsonType) { + // Helper to test operation conversion + auto test_op = [](std::string_view type_str, Expression::Operation expected) { + auto result = OperationFromJsonType(type_str); + ASSERT_THAT(result, IsOk()) << "Failed to parse: " << type_str; + EXPECT_EQ(result.value(), expected) << "Mismatch for: " << type_str; + }; + + test_op("true", Expression::Operation::kTrue); + test_op("false", Expression::Operation::kFalse); + test_op("and", Expression::Operation::kAnd); + test_op("or", Expression::Operation::kOr); + test_op("not", Expression::Operation::kNot); + test_op("eq", Expression::Operation::kEq); + test_op("not-eq", Expression::Operation::kNotEq); + test_op("lt", Expression::Operation::kLt); + test_op("lt-eq", Expression::Operation::kLtEq); + test_op("gt", Expression::Operation::kGt); + test_op("gt-eq", Expression::Operation::kGtEq); + test_op("in", Expression::Operation::kIn); + test_op("not-in", Expression::Operation::kNotIn); + test_op("is-null", Expression::Operation::kIsNull); + test_op("not-null", Expression::Operation::kNotNull); + test_op("is-nan", Expression::Operation::kIsNan); + test_op("not-nan", Expression::Operation::kNotNan); + test_op("starts-with", Expression::Operation::kStartsWith); + test_op("not-starts-with", Expression::Operation::kNotStartsWith); + + // Unknown type should fail + auto result = OperationFromJsonType("unknown-type"); + EXPECT_THAT(result, IsError(ErrorKind::kJsonParseError)); +} + +// Test error cases +TEST_F(ExpressionJsonTest, InvalidJsonType) { + nlohmann::json json = R"({"type": "invalid-op", "term": "col"})"_json; + auto result = ExpressionFromJson(json); + EXPECT_THAT(result, IsError(ErrorKind::kJsonParseError)); +} + +TEST_F(ExpressionJsonTest, MissingTypeField) { + nlohmann::json json = R"({"term": "col", "value": 42})"_json; + auto result = ExpressionFromJson(json); + EXPECT_THAT(result, IsError(ErrorKind::kJsonParseError)); +} + +TEST_F(ExpressionJsonTest, MissingTermField) { + nlohmann::json json = R"({"type": "eq", "value": 42})"_json; + auto result = ExpressionFromJson(json); + EXPECT_THAT(result, IsError(ErrorKind::kJsonParseError)); +} + +TEST_F(ExpressionJsonTest, MissingValueForLiteralPredicate) { + nlohmann::json json = R"({"type": "eq", "term": "col"})"_json; + auto result = ExpressionFromJson(json); + EXPECT_THAT(result, IsError(ErrorKind::kJsonParseError)); +} + +TEST_F(ExpressionJsonTest, MissingValuesForSetPredicate) { + nlohmann::json json = R"({"type": "in", "term": "col"})"_json; + auto result = ExpressionFromJson(json); + EXPECT_THAT(result, IsError(ErrorKind::kJsonParseError)); +} + +TEST_F(ExpressionJsonTest, MissingLeftForAnd) { + nlohmann::json json = R"({"type": "and", "right": true})"_json; + auto result = ExpressionFromJson(json); + EXPECT_THAT(result, IsError(ErrorKind::kJsonParseError)); +} + +TEST_F(ExpressionJsonTest, MissingChildForNot) { + nlohmann::json json = R"({"type": "not"})"_json; + auto result = ExpressionFromJson(json); + EXPECT_THAT(result, IsError(ErrorKind::kJsonParseError)); +} + +} // namespace iceberg From ef343adf24638ca10d6652d28024a300904d7834 Mon Sep 17 00:00:00 2001 From: Innocent Date: Sat, 24 Jan 2026 13:01:12 -0700 Subject: [PATCH 2/8] scope initial change to boolean support only --- src/iceberg/expression/json_internal.cc | 479 +---------------------- src/iceberg/expression/json_internal.h | 94 +---- src/iceberg/test/expression_json_test.cc | 441 --------------------- 3 files changed, 27 insertions(+), 987 deletions(-) diff --git a/src/iceberg/expression/json_internal.cc b/src/iceberg/expression/json_internal.cc index ca66ed8bf..ed8d2df84 100644 --- a/src/iceberg/expression/json_internal.cc +++ b/src/iceberg/expression/json_internal.cc @@ -28,343 +28,19 @@ #include "iceberg/expression/expressions.h" #include "iceberg/expression/literal.h" -#include "iceberg/expression/predicate.h" -#include "iceberg/expression/term.h" -#include "iceberg/transform.h" -#include "iceberg/type.h" #include "iceberg/util/json_util_internal.h" #include "iceberg/util/macros.h" namespace iceberg { -namespace { - -// JSON field names -constexpr std::string_view kType = "type"; -constexpr std::string_view kTerm = "term"; -constexpr std::string_view kValue = "value"; -constexpr std::string_view kValues = "values"; -constexpr std::string_view kLeft = "left"; -constexpr std::string_view kRight = "right"; -constexpr std::string_view kChild = "child"; -constexpr std::string_view kTransform = "transform"; - -// Expression type strings -constexpr std::string_view kTypeTrue = "true"; -constexpr std::string_view kTypeFalse = "false"; -constexpr std::string_view kTypeAnd = "and"; -constexpr std::string_view kTypeOr = "or"; -constexpr std::string_view kTypeNot = "not"; -constexpr std::string_view kTypeEq = "eq"; -constexpr std::string_view kTypeNotEq = "not-eq"; -constexpr std::string_view kTypeLt = "lt"; -constexpr std::string_view kTypeLtEq = "lt-eq"; -constexpr std::string_view kTypeGt = "gt"; -constexpr std::string_view kTypeGtEq = "gt-eq"; -constexpr std::string_view kTypeIn = "in"; -constexpr std::string_view kTypeNotIn = "not-in"; -constexpr std::string_view kTypeIsNull = "is-null"; -constexpr std::string_view kTypeNotNull = "not-null"; -constexpr std::string_view kTypeIsNan = "is-nan"; -constexpr std::string_view kTypeNotNan = "not-nan"; -constexpr std::string_view kTypeStartsWith = "starts-with"; -constexpr std::string_view kTypeNotStartsWith = "not-starts-with"; -constexpr std::string_view kTypeReference = "reference"; - -// Term type for transform -constexpr std::string_view kTypeTransform = "transform"; - -/// Serialize a term (NamedReference or UnboundTransform) to JSON -nlohmann::json TermToJson(const Term& term) { - if (term.kind() == Term::Kind::kReference) { - // Simple references are serialized as plain strings - return std::string(dynamic_cast(term).name()); - } else if (term.kind() == Term::Kind::kTransform) { - // Note: const_cast is safe here because reference() just returns a shared_ptr - // and we're only reading from it. The method is not const due to interface design. - auto& transform_term = - const_cast(dynamic_cast(term)); - nlohmann::json json; - json[kType] = kTypeTransform; - json[kTransform] = transform_term.transform()->ToString(); - json[kTerm] = std::string(transform_term.reference()->name()); - return json; - } - // Fallback for unknown term types - return nlohmann::json{}; -} - -/// Parse a term from JSON (returns NamedReference or UnboundTransform) -Result>> TermFromJsonAsReference( - const nlohmann::json& json) { - // Handle string term (simple reference) - if (json.is_string()) { - ICEBERG_ASSIGN_OR_RAISE(auto name, GetTypedJsonValue(json)); - ICEBERG_ASSIGN_OR_RAISE(auto ref, NamedReference::Make(std::move(name))); - return std::shared_ptr(std::move(ref)); - } - - // Handle object term - if (json.is_object()) { - ICEBERG_ASSIGN_OR_RAISE(auto type_str, GetJsonValue(json, kType)); - - if (type_str == kTypeReference) { - ICEBERG_ASSIGN_OR_RAISE(auto name, GetJsonValue(json, kTerm)); - ICEBERG_ASSIGN_OR_RAISE(auto ref, NamedReference::Make(std::move(name))); - return std::shared_ptr(std::move(ref)); - } - - if (type_str == kTypeTransform) { - ICEBERG_ASSIGN_OR_RAISE(auto transform_str, GetJsonValue(json, kTransform)); - ICEBERG_ASSIGN_OR_RAISE(auto term_name, GetJsonValue(json, kTerm)); - ICEBERG_ASSIGN_OR_RAISE(auto transform, TransformFromString(transform_str)); - ICEBERG_ASSIGN_OR_RAISE(auto ref, NamedReference::Make(std::move(term_name))); - // For UnboundTransform, we need to return it as UnboundTerm - // However, UnboundTransform binds to BoundTransform, not BoundReference. - // The Java implementation handles this by using a common Term interface. - // For now, we'll handle this case by returning an error for transforms - // when expecting a reference term. - return JsonParseError("Transform terms are not supported in this context: {}", - SafeDumpJson(json)); - } - - return JsonParseError("Unknown term type '{}' in {}", type_str, SafeDumpJson(json)); - } - - return JsonParseError("Invalid term format, expected string or object: {}", - SafeDumpJson(json)); -} - -/// Check if an operation is a unary predicate (no values) -bool IsUnaryOperation(Expression::Operation op) { - switch (op) { - case Expression::Operation::kIsNull: - case Expression::Operation::kNotNull: - case Expression::Operation::kIsNan: - case Expression::Operation::kNotNan: - return true; - default: - return false; - } -} - -/// Check if an operation is a set predicate (multiple values) -bool IsSetOperation(Expression::Operation op) { - switch (op) { - case Expression::Operation::kIn: - case Expression::Operation::kNotIn: - return true; - default: - return false; - } -} - -} // namespace - -std::string_view OperationToJsonType(Expression::Operation op) { - switch (op) { - case Expression::Operation::kTrue: - return kTypeTrue; - case Expression::Operation::kFalse: - return kTypeFalse; - case Expression::Operation::kAnd: - return kTypeAnd; - case Expression::Operation::kOr: - return kTypeOr; - case Expression::Operation::kNot: - return kTypeNot; - case Expression::Operation::kEq: - return kTypeEq; - case Expression::Operation::kNotEq: - return kTypeNotEq; - case Expression::Operation::kLt: - return kTypeLt; - case Expression::Operation::kLtEq: - return kTypeLtEq; - case Expression::Operation::kGt: - return kTypeGt; - case Expression::Operation::kGtEq: - return kTypeGtEq; - case Expression::Operation::kIn: - return kTypeIn; - case Expression::Operation::kNotIn: - return kTypeNotIn; - case Expression::Operation::kIsNull: - return kTypeIsNull; - case Expression::Operation::kNotNull: - return kTypeNotNull; - case Expression::Operation::kIsNan: - return kTypeIsNan; - case Expression::Operation::kNotNan: - return kTypeNotNan; - case Expression::Operation::kStartsWith: - return kTypeStartsWith; - case Expression::Operation::kNotStartsWith: - return kTypeNotStartsWith; - default: - return "unknown"; - } -} - -Result OperationFromJsonType(std::string_view type_str) { - if (type_str == kTypeTrue) return Expression::Operation::kTrue; - if (type_str == kTypeFalse) return Expression::Operation::kFalse; - if (type_str == kTypeAnd) return Expression::Operation::kAnd; - if (type_str == kTypeOr) return Expression::Operation::kOr; - if (type_str == kTypeNot) return Expression::Operation::kNot; - if (type_str == kTypeEq) return Expression::Operation::kEq; - if (type_str == kTypeNotEq) return Expression::Operation::kNotEq; - if (type_str == kTypeLt) return Expression::Operation::kLt; - if (type_str == kTypeLtEq) return Expression::Operation::kLtEq; - if (type_str == kTypeGt) return Expression::Operation::kGt; - if (type_str == kTypeGtEq) return Expression::Operation::kGtEq; - if (type_str == kTypeIn) return Expression::Operation::kIn; - if (type_str == kTypeNotIn) return Expression::Operation::kNotIn; - if (type_str == kTypeIsNull) return Expression::Operation::kIsNull; - if (type_str == kTypeNotNull) return Expression::Operation::kNotNull; - if (type_str == kTypeIsNan) return Expression::Operation::kIsNan; - if (type_str == kTypeNotNan) return Expression::Operation::kNotNan; - if (type_str == kTypeStartsWith) return Expression::Operation::kStartsWith; - if (type_str == kTypeNotStartsWith) return Expression::Operation::kNotStartsWith; - - return JsonParseError("Unknown expression type: {}", type_str); -} - -nlohmann::json LiteralToJson(const Literal& literal) { - if (literal.IsNull()) { - return nlohmann::json(nullptr); - } - - const auto& value = literal.value(); - const auto type_id = literal.type()->type_id(); - - // Handle based on the variant type - if (std::holds_alternative(value)) { - return std::get(value); - } - if (std::holds_alternative(value)) { - return std::get(value); - } - if (std::holds_alternative(value)) { - return std::get(value); - } - if (std::holds_alternative(value)) { - return std::get(value); - } - if (std::holds_alternative(value)) { - return std::get(value); - } - if (std::holds_alternative(value)) { - return std::get(value); - } - if (std::holds_alternative(value)) { - return std::get(value).ToString(); - } - if (std::holds_alternative>(value)) { - // Binary and Fixed are serialized as hex strings - const auto& bytes = std::get>(value); - std::string hex; - hex.reserve(bytes.size() * 2); - for (uint8_t byte : bytes) { - hex += std::format("{:02x}", byte); - } - return hex; - } - if (std::holds_alternative(value)) { - // Decimal is serialized as string representation - const auto& decimal = std::get(value); - if (type_id == TypeId::kDecimal) { - const auto& decimal_type = static_cast(*literal.type()); - auto result = decimal.ToString(decimal_type.scale()); - if (result.has_value()) { - return result.value(); - } - } - // Fallback to integer string representation - return decimal.ToIntegerString(); - } - - // Fallback: use ToString() - return literal.ToString(); -} - -Result LiteralFromJson(const nlohmann::json& json) { - if (json.is_null()) { - // We don't have type information, so we can't create a proper null literal - return JsonParseError("Cannot deserialize null literal without type information"); - } +Result> ExpressionFromJson(const nlohmann::json& json) { + // Handle boolean literals if (json.is_boolean()) { - return Literal::Boolean(json.get()); - } - - if (json.is_number_integer()) { - // Try to fit into int32, otherwise use int64 - auto val = json.get(); - if (val >= std::numeric_limits::min() && - val <= std::numeric_limits::max()) { - return Literal::Int(static_cast(val)); - } - return Literal::Long(val); - } - - if (json.is_number_float()) { - return Literal::Double(json.get()); - } - - if (json.is_string()) { - return Literal::String(json.get()); - } - - return JsonParseError("Unsupported JSON literal type: {}", SafeDumpJson(json)); -} - -nlohmann::json ToJson(const NamedReference& ref) { return std::string(ref.name()); } - -nlohmann::json ToJson(const UnboundTransform& transform) { - // Note: const_cast is safe here because reference() just returns a shared_ptr - // and we're only reading from it. The method is not const due to interface design. - auto& mutable_transform = const_cast(transform); - nlohmann::json json; - json[kType] = kTypeTransform; - json[kTransform] = transform.transform()->ToString(); - json[kTerm] = std::string(mutable_transform.reference()->name()); - return json; -} - -nlohmann::json ToJson(const UnboundPredicate& predicate) { - nlohmann::json json; - json[kType] = OperationToJsonType(predicate.op()); - - // Get the term from the predicate - // Note: const_cast is safe here because reference() just returns a shared_ptr - // and we're only reading from it. The method is not const due to interface design. - auto& mutable_predicate = const_cast(predicate); - auto ref = mutable_predicate.reference(); - if (ref) { - json[kTerm] = std::string(ref->name()); - } - - // For predicates with values, we need to cast to the concrete type - // UnboundPredicateImpl to access literals() - const auto* pred_impl = - dynamic_cast*>(&predicate); - if (pred_impl) { - auto literals = pred_impl->literals(); - if (!literals.empty()) { - if (IsSetOperation(predicate.op())) { - nlohmann::json values_array = nlohmann::json::array(); - for (const auto& lit : literals) { - values_array.push_back(LiteralToJson(lit)); - } - json[kValues] = std::move(values_array); - } else if (literals.size() == 1) { - json[kValue] = LiteralToJson(literals[0]); - } - } + return json.get() ? std::static_pointer_cast(True::Instance()) + : std::static_pointer_cast(False::Instance()); } - - return json; + return JsonParseError("Only boolean literals are currently supported"); } nlohmann::json ToJson(const Expression& expr) { @@ -374,150 +50,17 @@ nlohmann::json ToJson(const Expression& expr) { case Expression::Operation::kFalse: return false; - - case Expression::Operation::kAnd: { - const auto& and_expr = static_cast(expr); - nlohmann::json json; - json[kType] = kTypeAnd; - json[kLeft] = ToJson(*and_expr.left()); - json[kRight] = ToJson(*and_expr.right()); - return json; - } - - case Expression::Operation::kOr: { - const auto& or_expr = static_cast(expr); - nlohmann::json json; - json[kType] = kTypeOr; - json[kLeft] = ToJson(*or_expr.left()); - json[kRight] = ToJson(*or_expr.right()); - return json; - } - - case Expression::Operation::kNot: { - const auto& not_expr = static_cast(expr); - nlohmann::json json; - json[kType] = kTypeNot; - json[kChild] = ToJson(*not_expr.child()); - return json; - } - default: - // Handle predicates - if (expr.is_unbound_predicate()) { - // Use dynamic_cast due to virtual inheritance - const auto* pred = dynamic_cast(&expr); - if (pred) { - return ToJson(*pred); - } - } - // Fallback for unknown expression types - nlohmann::json json; - json[kType] = OperationToJsonType(expr.op()); - return json; - } -} - -Result> UnboundPredicateFromJson( - const nlohmann::json& json) { - ICEBERG_ASSIGN_OR_RAISE(auto type_str, GetJsonValue(json, kType)); - ICEBERG_ASSIGN_OR_RAISE(auto op, OperationFromJsonType(type_str)); - - // Parse the term - ICEBERG_ASSIGN_OR_RAISE(auto term_json, GetJsonValue(json, kTerm)); - ICEBERG_ASSIGN_OR_RAISE(auto term, TermFromJsonAsReference(term_json)); - - // Handle unary predicates (no value) - if (IsUnaryOperation(op)) { - ICEBERG_ASSIGN_OR_RAISE(auto pred, UnboundPredicateImpl::Make( - op, std::move(term))); - return std::shared_ptr(std::move(pred)); - } - - // Handle set predicates (multiple values) - if (IsSetOperation(op)) { - if (!json.contains(kValues)) { - return JsonParseError("Missing '{}' for set predicate in {}", kValues, - SafeDumpJson(json)); - } - ICEBERG_ASSIGN_OR_RAISE(auto values_json, GetJsonValue(json, kValues)); - if (!values_json.is_array()) { - return JsonParseError("Expected array for '{}' in {}", kValues, SafeDumpJson(json)); - } - - std::vector values; - values.reserve(values_json.size()); - for (const auto& val_json : values_json) { - ICEBERG_ASSIGN_OR_RAISE(auto lit, LiteralFromJson(val_json)); - values.push_back(std::move(lit)); - } - - ICEBERG_ASSIGN_OR_RAISE( - auto pred, - UnboundPredicateImpl::Make(op, std::move(term), std::move(values))); - return std::shared_ptr(std::move(pred)); - } - - // Handle literal predicates (single value) - if (!json.contains(kValue)) { - return JsonParseError("Missing '{}' for predicate in {}", kValue, SafeDumpJson(json)); + throw std::logic_error("Only booleans are currently supported"); } - ICEBERG_ASSIGN_OR_RAISE(auto value_json, GetJsonValue(json, kValue)); - ICEBERG_ASSIGN_OR_RAISE(auto value, LiteralFromJson(value_json)); - - ICEBERG_ASSIGN_OR_RAISE(auto pred, UnboundPredicateImpl::Make( - op, std::move(term), std::move(value))); - return std::shared_ptr(std::move(pred)); } -Result> ExpressionFromJson(const nlohmann::json& json) { - // Handle boolean literals - if (json.is_boolean()) { - return json.get() ? std::static_pointer_cast(True::Instance()) - : std::static_pointer_cast(False::Instance()); +#define ICEBERG_DEFINE_FROM_JSON(Model) \ + template <> \ + Result> FromJson(const nlohmann::json& json) { \ + return Model##FromJson(json); \ } - if (!json.is_object()) { - return JsonParseError("Expected boolean or object for expression: {}", - SafeDumpJson(json)); - } - - ICEBERG_ASSIGN_OR_RAISE(auto type_str, GetJsonValue(json, kType)); - ICEBERG_ASSIGN_OR_RAISE(auto op, OperationFromJsonType(type_str)); - - switch (op) { - case Expression::Operation::kTrue: - return True::Instance(); - - case Expression::Operation::kFalse: - return False::Instance(); - - case Expression::Operation::kAnd: { - ICEBERG_ASSIGN_OR_RAISE(auto left_json, GetJsonValue(json, kLeft)); - ICEBERG_ASSIGN_OR_RAISE(auto right_json, GetJsonValue(json, kRight)); - ICEBERG_ASSIGN_OR_RAISE(auto left, ExpressionFromJson(left_json)); - ICEBERG_ASSIGN_OR_RAISE(auto right, ExpressionFromJson(right_json)); - return And::MakeFolded(std::move(left), std::move(right)); - } - - case Expression::Operation::kOr: { - ICEBERG_ASSIGN_OR_RAISE(auto left_json, GetJsonValue(json, kLeft)); - ICEBERG_ASSIGN_OR_RAISE(auto right_json, GetJsonValue(json, kRight)); - ICEBERG_ASSIGN_OR_RAISE(auto left, ExpressionFromJson(left_json)); - ICEBERG_ASSIGN_OR_RAISE(auto right, ExpressionFromJson(right_json)); - return Or::MakeFolded(std::move(left), std::move(right)); - } - - case Expression::Operation::kNot: { - ICEBERG_ASSIGN_OR_RAISE(auto child_json, GetJsonValue(json, kChild)); - ICEBERG_ASSIGN_OR_RAISE(auto child, ExpressionFromJson(child_json)); - return Not::MakeFolded(std::move(child)); - } - - default: - // Handle predicates - ICEBERG_ASSIGN_OR_RAISE(auto pred, UnboundPredicateFromJson(json)); - return pred; - } -} +ICEBERG_DEFINE_FROM_JSON(Expression) } // namespace iceberg diff --git a/src/iceberg/expression/json_internal.h b/src/iceberg/expression/json_internal.h index 435d54ad0..aede8ca9d 100644 --- a/src/iceberg/expression/json_internal.h +++ b/src/iceberg/expression/json_internal.h @@ -19,95 +19,33 @@ #pragma once -/// \file iceberg/expression/json_internal.h -/// JSON serialization and deserialization for expressions. - -#include - #include #include "iceberg/expression/expression.h" #include "iceberg/iceberg_export.h" #include "iceberg/result.h" -namespace iceberg { - -class UnboundPredicate; -class NamedReference; -class UnboundTransform; -class Literal; - -/// \brief Serializes an Expression to JSON. -/// -/// This function converts an Expression to its JSON representation following -/// the Iceberg REST API specification. It supports: -/// - Boolean constants: serialized as JSON boolean literals -/// - Logical expressions: And, Or, Not -/// - Unbound predicates: comparison, unary, and set operations -/// -/// \param expr The Expression to serialize -/// \return A JSON representation of the expression -ICEBERG_EXPORT nlohmann::json ToJson(const Expression& expr); - -/// \brief Deserializes a JSON object into an Expression. -/// -/// This function parses the provided JSON and creates an Expression object. -/// It expects the JSON to follow the Iceberg REST API specification: -/// - JSON boolean true/false for constant expressions -/// - Objects with "type" field for other expressions -/// -/// \param json The JSON representation of an expression -/// \return A shared pointer to the Expression or an error if parsing fails -ICEBERG_EXPORT Result> ExpressionFromJson( - const nlohmann::json& json); - -/// \brief Serializes an unbound predicate to JSON. -/// -/// \param predicate The UnboundPredicate to serialize -/// \return A JSON representation of the predicate -ICEBERG_EXPORT nlohmann::json ToJson(const UnboundPredicate& predicate); - -/// \brief Deserializes a JSON object into an UnboundPredicate. -/// -/// \param json The JSON representation of a predicate -/// \return A shared pointer to the UnboundPredicate or an error if parsing fails -ICEBERG_EXPORT Result> UnboundPredicateFromJson( - const nlohmann::json& json); +/// \file iceberg/expression/json_internal.h +/// JSON serialization and deserialization for expressions. -/// \brief Serializes a NamedReference to JSON. -/// -/// \param ref The NamedReference to serialize -/// \return A JSON string representing the reference name -ICEBERG_EXPORT nlohmann::json ToJson(const NamedReference& ref); +namespace iceberg { -/// \brief Serializes an UnboundTransform to JSON. -/// -/// \param transform The UnboundTransform to serialize -/// \return A JSON object representing the transform term -ICEBERG_EXPORT nlohmann::json ToJson(const UnboundTransform& transform); -/// \brief Serializes a Literal to JSON. -/// -/// \param literal The Literal to serialize -/// \return A JSON value representing the literal -ICEBERG_EXPORT nlohmann::json LiteralToJson(const Literal& literal); +template +Result FromJson(const nlohmann::json& json); -/// \brief Deserializes a JSON value into a Literal. -/// -/// \param json The JSON representation of a literal -/// \return A Literal or an error if parsing fails -ICEBERG_EXPORT Result LiteralFromJson(const nlohmann::json& json); +#define ICEBERG_DECLARE_JSON_SERDE(Model) \ + Result> Model##FromJson(const nlohmann::json& json); \ + \ + template \ + Result> FromJson(const nlohmann::json& json); \ + \ + nlohmann::json ToJson(const Model& model); -/// \brief Converts an Expression::Operation to its JSON string representation. -/// -/// \param op The operation to convert -/// \return The JSON type string (e.g., "eq", "lt-eq", "is-null") -ICEBERG_EXPORT std::string_view OperationToJsonType(Expression::Operation op); +/// \note Don't forget to add `ICEBERG_DEFINE_FROM_JSON` to the end of +/// `json_internal.cc` to define the `FromJson` function for the model. +ICEBERG_DECLARE_JSON_SERDE(Expression) -/// \brief Converts a JSON type string to an Expression::Operation. -/// -/// \param type_str The JSON type string -/// \return The corresponding Operation or an error if unknown -ICEBERG_EXPORT Result OperationFromJsonType(std::string_view type_str); +#undef ICEBERG_DECLARE_JSON_SERDE } // namespace iceberg diff --git a/src/iceberg/test/expression_json_test.cc b/src/iceberg/test/expression_json_test.cc index 08f09eb94..6f551d2ad 100644 --- a/src/iceberg/test/expression_json_test.cc +++ b/src/iceberg/test/expression_json_test.cc @@ -78,445 +78,4 @@ TEST_F(ExpressionJsonTest, FalseExpression) { EXPECT_EQ(result.value()->op(), Expression::Operation::kFalse); } -// Test And expression -TEST_F(ExpressionJsonTest, AndExpression) { - auto left = Expressions::GreaterThanOrEqual("col1", Literal::Int(50)); - auto right = Expressions::LessThan("col2", Literal::Int(100)); - auto expr = Expressions::And(left, right); - - auto json = ToJson(*expr); - - // Verify JSON structure - EXPECT_EQ(json["type"], "and"); - EXPECT_TRUE(json.contains("left")); - EXPECT_TRUE(json.contains("right")); - EXPECT_EQ(json["left"]["type"], "gt-eq"); - EXPECT_EQ(json["right"]["type"], "lt"); - - // Round-trip test - TestRoundTrip(*expr); -} - -// Test Or expression -TEST_F(ExpressionJsonTest, OrExpression) { - auto left = Expressions::Equal("status", Literal::String("active")); - auto right = Expressions::Equal("status", Literal::String("pending")); - auto expr = Expressions::Or(left, right); - - auto json = ToJson(*expr); - - // Verify JSON structure - EXPECT_EQ(json["type"], "or"); - EXPECT_EQ(json["left"]["type"], "eq"); - EXPECT_EQ(json["right"]["type"], "eq"); - - // Round-trip test - TestRoundTrip(*expr); -} - -// Test Not expression -TEST_F(ExpressionJsonTest, NotExpression) { - auto child = Expressions::IsNull("col"); - auto expr = Expressions::Not(child); - - auto json = ToJson(*expr); - - // Verify JSON structure - EXPECT_EQ(json["type"], "not"); - EXPECT_TRUE(json.contains("child")); - EXPECT_EQ(json["child"]["type"], "is-null"); - - // Round-trip test - TestRoundTrip(*expr); -} - -// Test unary predicates -TEST_F(ExpressionJsonTest, IsNullPredicate) { - auto expr = Expressions::IsNull("column_name"); - - auto json = ToJson(*expr); - - EXPECT_EQ(json["type"], "is-null"); - EXPECT_EQ(json["term"], "column_name"); - EXPECT_FALSE(json.contains("value")); - EXPECT_FALSE(json.contains("values")); - - TestRoundTrip(*expr); -} - -TEST_F(ExpressionJsonTest, NotNullPredicate) { - auto expr = Expressions::NotNull("column_name"); - - auto json = ToJson(*expr); - - EXPECT_EQ(json["type"], "not-null"); - EXPECT_EQ(json["term"], "column_name"); - - TestRoundTrip(*expr); -} - -TEST_F(ExpressionJsonTest, IsNanPredicate) { - auto expr = Expressions::IsNaN("float_col"); - - auto json = ToJson(*expr); - - EXPECT_EQ(json["type"], "is-nan"); - EXPECT_EQ(json["term"], "float_col"); - - TestRoundTrip(*expr); -} - -TEST_F(ExpressionJsonTest, NotNanPredicate) { - auto expr = Expressions::NotNaN("float_col"); - - auto json = ToJson(*expr); - - EXPECT_EQ(json["type"], "not-nan"); - EXPECT_EQ(json["term"], "float_col"); - - TestRoundTrip(*expr); -} - -// Test comparison predicates -TEST_F(ExpressionJsonTest, EqualPredicate) { - auto expr = Expressions::Equal("name", Literal::String("test")); - - auto json = ToJson(*expr); - - EXPECT_EQ(json["type"], "eq"); - EXPECT_EQ(json["term"], "name"); - EXPECT_EQ(json["value"], "test"); - - TestRoundTrip(*expr); -} - -TEST_F(ExpressionJsonTest, NotEqualPredicate) { - auto expr = Expressions::NotEqual("count", Literal::Int(0)); - - auto json = ToJson(*expr); - - EXPECT_EQ(json["type"], "not-eq"); - EXPECT_EQ(json["term"], "count"); - EXPECT_EQ(json["value"], 0); - - TestRoundTrip(*expr); -} - -TEST_F(ExpressionJsonTest, LessThanPredicate) { - auto expr = Expressions::LessThan("age", Literal::Int(18)); - - auto json = ToJson(*expr); - - EXPECT_EQ(json["type"], "lt"); - EXPECT_EQ(json["term"], "age"); - EXPECT_EQ(json["value"], 18); - - TestRoundTrip(*expr); -} - -TEST_F(ExpressionJsonTest, LessThanOrEqualPredicate) { - auto expr = Expressions::LessThanOrEqual("score", Literal::Double(99.5)); - - auto json = ToJson(*expr); - - EXPECT_EQ(json["type"], "lt-eq"); - EXPECT_EQ(json["term"], "score"); - EXPECT_DOUBLE_EQ(json["value"].get(), 99.5); - - TestRoundTrip(*expr); -} - -TEST_F(ExpressionJsonTest, GreaterThanPredicate) { - auto expr = Expressions::GreaterThan("price", Literal::Long(1000)); - - auto json = ToJson(*expr); - - EXPECT_EQ(json["type"], "gt"); - EXPECT_EQ(json["term"], "price"); - EXPECT_EQ(json["value"], 1000); - - TestRoundTrip(*expr); -} - -TEST_F(ExpressionJsonTest, GreaterThanOrEqualPredicate) { - auto expr = Expressions::GreaterThanOrEqual("quantity", Literal::Int(1)); - - auto json = ToJson(*expr); - - EXPECT_EQ(json["type"], "gt-eq"); - EXPECT_EQ(json["term"], "quantity"); - EXPECT_EQ(json["value"], 1); - - TestRoundTrip(*expr); -} - -// Test string predicates -TEST_F(ExpressionJsonTest, StartsWithPredicate) { - auto expr = Expressions::StartsWith("path", "/home/user"); - - auto json = ToJson(*expr); - - EXPECT_EQ(json["type"], "starts-with"); - EXPECT_EQ(json["term"], "path"); - EXPECT_EQ(json["value"], "/home/user"); - - TestRoundTrip(*expr); -} - -TEST_F(ExpressionJsonTest, NotStartsWithPredicate) { - auto expr = Expressions::NotStartsWith("path", "/tmp"); - - auto json = ToJson(*expr); - - EXPECT_EQ(json["type"], "not-starts-with"); - EXPECT_EQ(json["term"], "path"); - EXPECT_EQ(json["value"], "/tmp"); - - TestRoundTrip(*expr); -} - -// Test set predicates -TEST_F(ExpressionJsonTest, InPredicate) { - auto expr = Expressions::In("status", - {Literal::String("active"), Literal::String("pending"), - Literal::String("review")}); - - auto json = ToJson(*expr); - - EXPECT_EQ(json["type"], "in"); - EXPECT_EQ(json["term"], "status"); - EXPECT_TRUE(json.contains("values")); - EXPECT_TRUE(json["values"].is_array()); - EXPECT_EQ(json["values"].size(), 3); - - TestRoundTrip(*expr); -} - -TEST_F(ExpressionJsonTest, NotInPredicate) { - auto expr = Expressions::NotIn("id", {Literal::Int(1), Literal::Int(2), Literal::Int(3)}); - - auto json = ToJson(*expr); - - EXPECT_EQ(json["type"], "not-in"); - EXPECT_EQ(json["term"], "id"); - EXPECT_TRUE(json["values"].is_array()); - EXPECT_EQ(json["values"].size(), 3); - - TestRoundTrip(*expr); -} - -// Test nested expressions -TEST_F(ExpressionJsonTest, NestedAndOr) { - auto cond1 = Expressions::Equal("a", Literal::Int(1)); - auto cond2 = Expressions::Equal("b", Literal::Int(2)); - auto cond3 = Expressions::Equal("c", Literal::Int(3)); - - auto or_expr = Expressions::Or(cond1, cond2); - auto and_expr = Expressions::And(or_expr, cond3); - - auto json = ToJson(*and_expr); - - EXPECT_EQ(json["type"], "and"); - EXPECT_EQ(json["left"]["type"], "or"); - EXPECT_EQ(json["right"]["type"], "eq"); - - TestRoundTrip(*and_expr); -} - -// Test deserialization from JSON strings (matching Java format) -TEST_F(ExpressionJsonTest, ParseAndExpression) { - nlohmann::json json = R"({ - "type": "and", - "left": { - "type": "gt-eq", - "term": "column-name-1", - "value": 50 - }, - "right": { - "type": "in", - "term": "column-name-2", - "values": ["one", "two"] - } - })"_json; - - auto result = ExpressionFromJson(json); - ASSERT_THAT(result, IsOk()); - - auto expr = result.value(); - EXPECT_EQ(expr->op(), Expression::Operation::kAnd); - - const auto& and_expr = static_cast(*expr); - EXPECT_EQ(and_expr.left()->op(), Expression::Operation::kGtEq); - EXPECT_EQ(and_expr.right()->op(), Expression::Operation::kIn); -} - -TEST_F(ExpressionJsonTest, ParseOrExpression) { - nlohmann::json json = R"({ - "type": "or", - "left": { - "type": "lt", - "term": "column-name-1", - "value": 50 - }, - "right": { - "type": "not-null", - "term": "column-name-2" - } - })"_json; - - auto result = ExpressionFromJson(json); - ASSERT_THAT(result, IsOk()); - - auto expr = result.value(); - EXPECT_EQ(expr->op(), Expression::Operation::kOr); -} - -TEST_F(ExpressionJsonTest, ParseNotExpression) { - nlohmann::json json = R"({ - "type": "not", - "child": { - "type": "gt-eq", - "term": "column-name-1", - "value": 50 - } - })"_json; - - auto result = ExpressionFromJson(json); - ASSERT_THAT(result, IsOk()); - - auto expr = result.value(); - EXPECT_EQ(expr->op(), Expression::Operation::kNot); -} - -// Test literal serialization -TEST_F(ExpressionJsonTest, LiteralSerialization) { - // Boolean - auto bool_json = LiteralToJson(Literal::Boolean(true)); - EXPECT_TRUE(bool_json.is_boolean()); - EXPECT_TRUE(bool_json.get()); - - // Integer - auto int_json = LiteralToJson(Literal::Int(42)); - EXPECT_TRUE(int_json.is_number_integer()); - EXPECT_EQ(int_json.get(), 42); - - // Long - auto long_json = LiteralToJson(Literal::Long(9876543210L)); - EXPECT_TRUE(long_json.is_number_integer()); - EXPECT_EQ(long_json.get(), 9876543210L); - - // Float - auto float_json = LiteralToJson(Literal::Float(3.14f)); - EXPECT_TRUE(float_json.is_number_float()); - - // Double - auto double_json = LiteralToJson(Literal::Double(2.718281828)); - EXPECT_TRUE(double_json.is_number_float()); - - // String - auto string_json = LiteralToJson(Literal::String("hello")); - EXPECT_TRUE(string_json.is_string()); - EXPECT_EQ(string_json.get(), "hello"); -} - -// Test operation to JSON type conversion -TEST_F(ExpressionJsonTest, OperationToJsonType) { - EXPECT_EQ(OperationToJsonType(Expression::Operation::kTrue), "true"); - EXPECT_EQ(OperationToJsonType(Expression::Operation::kFalse), "false"); - EXPECT_EQ(OperationToJsonType(Expression::Operation::kAnd), "and"); - EXPECT_EQ(OperationToJsonType(Expression::Operation::kOr), "or"); - EXPECT_EQ(OperationToJsonType(Expression::Operation::kNot), "not"); - EXPECT_EQ(OperationToJsonType(Expression::Operation::kEq), "eq"); - EXPECT_EQ(OperationToJsonType(Expression::Operation::kNotEq), "not-eq"); - EXPECT_EQ(OperationToJsonType(Expression::Operation::kLt), "lt"); - EXPECT_EQ(OperationToJsonType(Expression::Operation::kLtEq), "lt-eq"); - EXPECT_EQ(OperationToJsonType(Expression::Operation::kGt), "gt"); - EXPECT_EQ(OperationToJsonType(Expression::Operation::kGtEq), "gt-eq"); - EXPECT_EQ(OperationToJsonType(Expression::Operation::kIn), "in"); - EXPECT_EQ(OperationToJsonType(Expression::Operation::kNotIn), "not-in"); - EXPECT_EQ(OperationToJsonType(Expression::Operation::kIsNull), "is-null"); - EXPECT_EQ(OperationToJsonType(Expression::Operation::kNotNull), "not-null"); - EXPECT_EQ(OperationToJsonType(Expression::Operation::kIsNan), "is-nan"); - EXPECT_EQ(OperationToJsonType(Expression::Operation::kNotNan), "not-nan"); - EXPECT_EQ(OperationToJsonType(Expression::Operation::kStartsWith), "starts-with"); - EXPECT_EQ(OperationToJsonType(Expression::Operation::kNotStartsWith), "not-starts-with"); -} - -// Test JSON type to operation conversion -TEST_F(ExpressionJsonTest, OperationFromJsonType) { - // Helper to test operation conversion - auto test_op = [](std::string_view type_str, Expression::Operation expected) { - auto result = OperationFromJsonType(type_str); - ASSERT_THAT(result, IsOk()) << "Failed to parse: " << type_str; - EXPECT_EQ(result.value(), expected) << "Mismatch for: " << type_str; - }; - - test_op("true", Expression::Operation::kTrue); - test_op("false", Expression::Operation::kFalse); - test_op("and", Expression::Operation::kAnd); - test_op("or", Expression::Operation::kOr); - test_op("not", Expression::Operation::kNot); - test_op("eq", Expression::Operation::kEq); - test_op("not-eq", Expression::Operation::kNotEq); - test_op("lt", Expression::Operation::kLt); - test_op("lt-eq", Expression::Operation::kLtEq); - test_op("gt", Expression::Operation::kGt); - test_op("gt-eq", Expression::Operation::kGtEq); - test_op("in", Expression::Operation::kIn); - test_op("not-in", Expression::Operation::kNotIn); - test_op("is-null", Expression::Operation::kIsNull); - test_op("not-null", Expression::Operation::kNotNull); - test_op("is-nan", Expression::Operation::kIsNan); - test_op("not-nan", Expression::Operation::kNotNan); - test_op("starts-with", Expression::Operation::kStartsWith); - test_op("not-starts-with", Expression::Operation::kNotStartsWith); - - // Unknown type should fail - auto result = OperationFromJsonType("unknown-type"); - EXPECT_THAT(result, IsError(ErrorKind::kJsonParseError)); -} - -// Test error cases -TEST_F(ExpressionJsonTest, InvalidJsonType) { - nlohmann::json json = R"({"type": "invalid-op", "term": "col"})"_json; - auto result = ExpressionFromJson(json); - EXPECT_THAT(result, IsError(ErrorKind::kJsonParseError)); -} - -TEST_F(ExpressionJsonTest, MissingTypeField) { - nlohmann::json json = R"({"term": "col", "value": 42})"_json; - auto result = ExpressionFromJson(json); - EXPECT_THAT(result, IsError(ErrorKind::kJsonParseError)); -} - -TEST_F(ExpressionJsonTest, MissingTermField) { - nlohmann::json json = R"({"type": "eq", "value": 42})"_json; - auto result = ExpressionFromJson(json); - EXPECT_THAT(result, IsError(ErrorKind::kJsonParseError)); -} - -TEST_F(ExpressionJsonTest, MissingValueForLiteralPredicate) { - nlohmann::json json = R"({"type": "eq", "term": "col"})"_json; - auto result = ExpressionFromJson(json); - EXPECT_THAT(result, IsError(ErrorKind::kJsonParseError)); -} - -TEST_F(ExpressionJsonTest, MissingValuesForSetPredicate) { - nlohmann::json json = R"({"type": "in", "term": "col"})"_json; - auto result = ExpressionFromJson(json); - EXPECT_THAT(result, IsError(ErrorKind::kJsonParseError)); -} - -TEST_F(ExpressionJsonTest, MissingLeftForAnd) { - nlohmann::json json = R"({"type": "and", "right": true})"_json; - auto result = ExpressionFromJson(json); - EXPECT_THAT(result, IsError(ErrorKind::kJsonParseError)); -} - -TEST_F(ExpressionJsonTest, MissingChildForNot) { - nlohmann::json json = R"({"type": "not"})"_json; - auto result = ExpressionFromJson(json); - EXPECT_THAT(result, IsError(ErrorKind::kJsonParseError)); -} - } // namespace iceberg From dbda110e7780f80d22bf27711b278ed753f79f5f Mon Sep 17 00:00:00 2001 From: Innocent Date: Sat, 24 Jan 2026 13:03:45 -0700 Subject: [PATCH 3/8] clang-format --- src/iceberg/expression/json_internal.cc | 7 +++---- src/iceberg/expression/json_internal.h | 15 +++++++-------- src/iceberg/test/expression_json_test.cc | 3 +-- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/iceberg/expression/json_internal.cc b/src/iceberg/expression/json_internal.cc index ed8d2df84..f2961bf2c 100644 --- a/src/iceberg/expression/json_internal.cc +++ b/src/iceberg/expression/json_internal.cc @@ -33,7 +33,6 @@ namespace iceberg { - Result> ExpressionFromJson(const nlohmann::json& json) { // Handle boolean literals if (json.is_boolean()) { @@ -55,10 +54,10 @@ nlohmann::json ToJson(const Expression& expr) { } } -#define ICEBERG_DEFINE_FROM_JSON(Model) \ - template <> \ +#define ICEBERG_DEFINE_FROM_JSON(Model) \ + template <> \ Result> FromJson(const nlohmann::json& json) { \ - return Model##FromJson(json); \ + return Model##FromJson(json); \ } ICEBERG_DEFINE_FROM_JSON(Expression) diff --git a/src/iceberg/expression/json_internal.h b/src/iceberg/expression/json_internal.h index aede8ca9d..df109d55b 100644 --- a/src/iceberg/expression/json_internal.h +++ b/src/iceberg/expression/json_internal.h @@ -30,17 +30,16 @@ namespace iceberg { - template Result FromJson(const nlohmann::json& json); -#define ICEBERG_DECLARE_JSON_SERDE(Model) \ - Result> Model##FromJson(const nlohmann::json& json); \ - \ - template \ - Result> FromJson(const nlohmann::json& json); \ - \ - nlohmann::json ToJson(const Model& model); +#define ICEBERG_DECLARE_JSON_SERDE(Model) \ + Result> Model##FromJson(const nlohmann::json& json); \ + \ + template \ + Result> FromJson(const nlohmann::json& json); \ + \ + nlohmann::json ToJson(const Model& model); /// \note Don't forget to add `ICEBERG_DEFINE_FROM_JSON` to the end of /// `json_internal.cc` to define the `FromJson` function for the model. diff --git a/src/iceberg/test/expression_json_test.cc b/src/iceberg/test/expression_json_test.cc index 6f551d2ad..6e0a89bdb 100644 --- a/src/iceberg/test/expression_json_test.cc +++ b/src/iceberg/test/expression_json_test.cc @@ -17,8 +17,6 @@ * under the License. */ -#include "iceberg/expression/json_internal.h" - #include #include @@ -28,6 +26,7 @@ #include "iceberg/expression/expression.h" #include "iceberg/expression/expressions.h" +#include "iceberg/expression/json_internal.h" #include "iceberg/expression/literal.h" #include "iceberg/expression/predicate.h" #include "iceberg/expression/term.h" From 52e25019243b0815c417256ccac1bae4acf30e96 Mon Sep 17 00:00:00 2001 From: Innocent Date: Sat, 24 Jan 2026 14:32:12 -0700 Subject: [PATCH 4/8] cleanup --- src/iceberg/expression/json_internal.cc | 4 ++-- src/iceberg/expression/json_internal.h | 15 ++++++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/iceberg/expression/json_internal.cc b/src/iceberg/expression/json_internal.cc index f2961bf2c..8d404f9f6 100644 --- a/src/iceberg/expression/json_internal.cc +++ b/src/iceberg/expression/json_internal.cc @@ -34,12 +34,12 @@ namespace iceberg { Result> ExpressionFromJson(const nlohmann::json& json) { - // Handle boolean literals + // Handle boolean if (json.is_boolean()) { return json.get() ? std::static_pointer_cast(True::Instance()) : std::static_pointer_cast(False::Instance()); } - return JsonParseError("Only boolean literals are currently supported"); + return JsonParseError("Only boolean are currently supported"); } nlohmann::json ToJson(const Expression& expr) { diff --git a/src/iceberg/expression/json_internal.h b/src/iceberg/expression/json_internal.h index df109d55b..aea805cec 100644 --- a/src/iceberg/expression/json_internal.h +++ b/src/iceberg/expression/json_internal.h @@ -33,13 +33,14 @@ namespace iceberg { template Result FromJson(const nlohmann::json& json); -#define ICEBERG_DECLARE_JSON_SERDE(Model) \ - Result> Model##FromJson(const nlohmann::json& json); \ - \ - template \ - Result> FromJson(const nlohmann::json& json); \ - \ - nlohmann::json ToJson(const Model& model); +#define ICEBERG_DECLARE_JSON_SERDE(Model) \ + ICEBERG_EXPORT Result> Model##FromJson( \ + const nlohmann::json& json); \ + \ + template \ + ICEBERG_EXPORT Result> FromJson(const nlohmann::json& json); \ + \ + ICEBERG_EXPORT nlohmann::json ToJson(const Model& model); /// \note Don't forget to add `ICEBERG_DEFINE_FROM_JSON` to the end of /// `json_internal.cc` to define the `FromJson` function for the model. From 549b9ae3706a47f62b88de140302bcbecbc51498 Mon Sep 17 00:00:00 2001 From: Innocent Date: Sun, 25 Jan 2026 11:37:15 -0700 Subject: [PATCH 5/8] Added helper for operation enum conversion --- src/iceberg/expression/json_internal.cc | 101 ++++++++++++++++++++++- src/iceberg/expression/json_internal.h | 13 +++ src/iceberg/test/expression_json_test.cc | 5 ++ 3 files changed, 118 insertions(+), 1 deletion(-) diff --git a/src/iceberg/expression/json_internal.cc b/src/iceberg/expression/json_internal.cc index 8d404f9f6..60671501f 100644 --- a/src/iceberg/expression/json_internal.cc +++ b/src/iceberg/expression/json_internal.cc @@ -32,6 +32,105 @@ #include "iceberg/util/macros.h" namespace iceberg { +namespace { +// Expression type strings +constexpr std::string_view kTypeTrue = "true"; +constexpr std::string_view kTypeFalse = "false"; +constexpr std::string_view kTypeEq = "eq"; +constexpr std::string_view kTypeAnd = "and"; +constexpr std::string_view kTypeOr = "or"; +constexpr std::string_view kTypeNot = "not"; +constexpr std::string_view kTypeIn = "in"; +constexpr std::string_view kTypeNotIn = "not-in"; +constexpr std::string_view kTypeLt = "lt"; +constexpr std::string_view kTypeLtEq = "lt-eq"; +constexpr std::string_view kTypeGt = "gt"; +constexpr std::string_view kTypeGtEq = "gt-eq"; +constexpr std::string_view kTypeNotEq = "not-eq"; +constexpr std::string_view kTypeStartsWith = "starts-with"; +constexpr std::string_view kTypeNotStartsWith = "not-starts-with"; +constexpr std::string_view kTypeIsNull = "is-null"; +constexpr std::string_view kTypeNotNull = "not-null"; +constexpr std::string_view kTypeIsNan = "is-nan"; +constexpr std::string_view kTypeNotNan = "not-nan"; +} // namespace + +/// \brief Converts a JSON type string to an Expression::Operation. +/// +/// \param type_str The JSON type string +/// \return The corresponding Operation or an error if unknown +Result OperationTypeFromString(const std::string_view type_str) { + if (type_str == kTypeTrue) return Expression::Operation::kTrue; + if (type_str == kTypeFalse) return Expression::Operation::kFalse; + if (type_str == kTypeAnd) return Expression::Operation::kAnd; + if (type_str == kTypeOr) return Expression::Operation::kOr; + if (type_str == kTypeNot) return Expression::Operation::kNot; + if (type_str == kTypeEq) return Expression::Operation::kEq; + if (type_str == kTypeNotEq) return Expression::Operation::kNotEq; + if (type_str == kTypeLt) return Expression::Operation::kLt; + if (type_str == kTypeLtEq) return Expression::Operation::kLtEq; + if (type_str == kTypeGt) return Expression::Operation::kGt; + if (type_str == kTypeGtEq) return Expression::Operation::kGtEq; + if (type_str == kTypeIn) return Expression::Operation::kIn; + if (type_str == kTypeNotIn) return Expression::Operation::kNotIn; + if (type_str == kTypeIsNull) return Expression::Operation::kIsNull; + if (type_str == kTypeNotNull) return Expression::Operation::kNotNull; + if (type_str == kTypeIsNan) return Expression::Operation::kIsNan; + if (type_str == kTypeNotNan) return Expression::Operation::kNotNan; + if (type_str == kTypeStartsWith) return Expression::Operation::kStartsWith; + if (type_str == kTypeNotStartsWith) return Expression::Operation::kNotStartsWith; + + return JsonParseError("Unknown expression type: {}", type_str); +} + +/// \brief Converts an Expression::Operation to its JSON string representation. +/// +/// \param op The operation to convert +/// \return The JSON type string (e.g., "eq", "lt-eq", "is-null") +std::string_view ToStringOperationType(Expression::Operation op) { + switch (op) { + case Expression::Operation::kTrue: + return kTypeTrue; + case Expression::Operation::kFalse: + return kTypeFalse; + case Expression::Operation::kAnd: + return kTypeAnd; + case Expression::Operation::kOr: + return kTypeOr; + case Expression::Operation::kNot: + return kTypeNot; + case Expression::Operation::kEq: + return kTypeEq; + case Expression::Operation::kNotEq: + return kTypeNotEq; + case Expression::Operation::kLt: + return kTypeLt; + case Expression::Operation::kLtEq: + return kTypeLtEq; + case Expression::Operation::kGt: + return kTypeGt; + case Expression::Operation::kGtEq: + return kTypeGtEq; + case Expression::Operation::kIn: + return kTypeIn; + case Expression::Operation::kNotIn: + return kTypeNotIn; + case Expression::Operation::kIsNull: + return kTypeIsNull; + case Expression::Operation::kNotNull: + return kTypeNotNull; + case Expression::Operation::kIsNan: + return kTypeIsNan; + case Expression::Operation::kNotNan: + return kTypeNotNan; + case Expression::Operation::kStartsWith: + return kTypeStartsWith; + case Expression::Operation::kNotStartsWith: + return kTypeNotStartsWith; + default: + return "unknown"; + } +} Result> ExpressionFromJson(const nlohmann::json& json) { // Handle boolean @@ -39,7 +138,7 @@ Result> ExpressionFromJson(const nlohmann::json& jso return json.get() ? std::static_pointer_cast(True::Instance()) : std::static_pointer_cast(False::Instance()); } - return JsonParseError("Only boolean are currently supported"); + return JsonParseError("Only booleans are currently supported"); } nlohmann::json ToJson(const Expression& expr) { diff --git a/src/iceberg/expression/json_internal.h b/src/iceberg/expression/json_internal.h index aea805cec..3bc3dd883 100644 --- a/src/iceberg/expression/json_internal.h +++ b/src/iceberg/expression/json_internal.h @@ -48,4 +48,17 @@ ICEBERG_DECLARE_JSON_SERDE(Expression) #undef ICEBERG_DECLARE_JSON_SERDE +/// \brief Converts an operation type string to an Expression::Operation. +/// +/// \param type_str The operation type string +/// \return The corresponding Operation or an error if unknown +ICEBERG_EXPORT Result OperationTypeFromString( + const std::string_view type_str); + +/// \brief Converts an Expression::Operation to its string representation. +/// +/// \param op The operation to convert +/// \return The operation type string (e.g., "eq", "lt-eq", "is-null") +ICEBERG_EXPORT std::string_view ToStringOperationType(Expression::Operation op); + } // namespace iceberg diff --git a/src/iceberg/test/expression_json_test.cc b/src/iceberg/test/expression_json_test.cc index 6e0a89bdb..431983270 100644 --- a/src/iceberg/test/expression_json_test.cc +++ b/src/iceberg/test/expression_json_test.cc @@ -77,4 +77,9 @@ TEST_F(ExpressionJsonTest, FalseExpression) { EXPECT_EQ(result.value()->op(), Expression::Operation::kFalse); } +TEST_F(ExpressionJsonTest, OpToString) { + EXPECT_EQ(OperationTypeFromString("true"), Expression::Operation::kTrue); + EXPECT_EQ("true", ToStringOperationType(Expression::Operation::kTrue)); +} + } // namespace iceberg From 3feca74f6fadec1b7d5b783cadfe11f3bef8db16 Mon Sep 17 00:00:00 2001 From: Innocent Date: Sun, 25 Jan 2026 21:51:00 -0700 Subject: [PATCH 6/8] review comment --- src/iceberg/CMakeLists.txt | 2 +- .../{json_internal.cc => json_serde.cc} | 3 +-- .../{json_internal.h => json_serde_internal.h} | 0 src/iceberg/test/expression_json_test.cc | 16 ++-------------- 4 files changed, 4 insertions(+), 17 deletions(-) rename src/iceberg/expression/{json_internal.cc => json_serde.cc} (99%) rename src/iceberg/expression/{json_internal.h => json_serde_internal.h} (100%) diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 317eb75d9..7e035cd02 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -31,7 +31,7 @@ set(ICEBERG_SOURCES expression/expression.cc expression/expressions.cc expression/inclusive_metrics_evaluator.cc - expression/json_internal.cc + expression/json_serde.cc expression/literal.cc expression/manifest_evaluator.cc expression/predicate.cc diff --git a/src/iceberg/expression/json_internal.cc b/src/iceberg/expression/json_serde.cc similarity index 99% rename from src/iceberg/expression/json_internal.cc rename to src/iceberg/expression/json_serde.cc index 60671501f..dc113dfaf 100644 --- a/src/iceberg/expression/json_internal.cc +++ b/src/iceberg/expression/json_serde.cc @@ -17,8 +17,6 @@ * under the License. */ -#include "iceberg/expression/json_internal.h" - #include #include #include @@ -27,6 +25,7 @@ #include #include "iceberg/expression/expressions.h" +#include "iceberg/expression/json_serde_internal.h" #include "iceberg/expression/literal.h" #include "iceberg/util/json_util_internal.h" #include "iceberg/util/macros.h" diff --git a/src/iceberg/expression/json_internal.h b/src/iceberg/expression/json_serde_internal.h similarity index 100% rename from src/iceberg/expression/json_internal.h rename to src/iceberg/expression/json_serde_internal.h diff --git a/src/iceberg/test/expression_json_test.cc b/src/iceberg/test/expression_json_test.cc index 431983270..34f800cd7 100644 --- a/src/iceberg/test/expression_json_test.cc +++ b/src/iceberg/test/expression_json_test.cc @@ -26,7 +26,7 @@ #include "iceberg/expression/expression.h" #include "iceberg/expression/expressions.h" -#include "iceberg/expression/json_internal.h" +#include "iceberg/expression/json_serde_internal.h" #include "iceberg/expression/literal.h" #include "iceberg/expression/predicate.h" #include "iceberg/expression/term.h" @@ -34,19 +34,7 @@ namespace iceberg { -class ExpressionJsonTest : public ::testing::Test { - protected: - // Helper to test round-trip serialization - // Uses string comparison since expressions may have different internal identity - // but the same semantic meaning (i.e., ToString() output matches) - void TestRoundTrip(const Expression& expr) { - auto json = ToJson(expr); - auto result = ExpressionFromJson(json); - ASSERT_THAT(result, IsOk()) << "Failed to parse JSON: " << json.dump(); - EXPECT_EQ(expr.ToString(), result.value()->ToString()) - << "Round-trip failed.\nJSON: " << json.dump(); - } -}; +class ExpressionJsonTest : public ::testing::Test {}; // Test boolean constant expressions TEST_F(ExpressionJsonTest, TrueExpression) { From 04ff61bc5bad67ec472ebe3a90a7d7c1b1529ff5 Mon Sep 17 00:00:00 2001 From: Innocent Date: Sun, 25 Jan 2026 12:39:06 -0700 Subject: [PATCH 7/8] set or unary operation helper --- src/iceberg/expression/json_serde.cc | 24 ++++++++++++++++++++ src/iceberg/expression/json_serde_internal.h | 6 +++++ 2 files changed, 30 insertions(+) diff --git a/src/iceberg/expression/json_serde.cc b/src/iceberg/expression/json_serde.cc index dc113dfaf..dcce6c654 100644 --- a/src/iceberg/expression/json_serde.cc +++ b/src/iceberg/expression/json_serde.cc @@ -54,6 +54,30 @@ constexpr std::string_view kTypeIsNan = "is-nan"; constexpr std::string_view kTypeNotNan = "not-nan"; } // namespace +/// Check if an operation is a unary predicate (no values) +bool IsUnaryOperation(Expression::Operation op) { + switch (op) { + case Expression::Operation::kIsNull: + case Expression::Operation::kNotNull: + case Expression::Operation::kIsNan: + case Expression::Operation::kNotNan: + return true; + default: + return false; + } +} + +/// Check if an operation is a set predicate (multiple values) +bool IsSetOperation(Expression::Operation op) { + switch (op) { + case Expression::Operation::kIn: + case Expression::Operation::kNotIn: + return true; + default: + return false; + } +} + /// \brief Converts a JSON type string to an Expression::Operation. /// /// \param type_str The JSON type string diff --git a/src/iceberg/expression/json_serde_internal.h b/src/iceberg/expression/json_serde_internal.h index 3bc3dd883..a0dce8475 100644 --- a/src/iceberg/expression/json_serde_internal.h +++ b/src/iceberg/expression/json_serde_internal.h @@ -61,4 +61,10 @@ ICEBERG_EXPORT Result OperationTypeFromString( /// \return The operation type string (e.g., "eq", "lt-eq", "is-null") ICEBERG_EXPORT std::string_view ToStringOperationType(Expression::Operation op); +/// Check if an operation is a unary predicate (no values) +ICEBERG_EXPORT bool IsUnaryOperation(Expression::Operation op); + +/// Check if an operation is a set predicate (multiple values) +ICEBERG_EXPORT bool IsSetOperation(Expression::Operation op); + } // namespace iceberg From bd7710154dc2ca1db382386bd2894e0fd7d88ac6 Mon Sep 17 00:00:00 2001 From: Innocent Date: Sun, 25 Jan 2026 12:55:35 -0700 Subject: [PATCH 8/8] updated tests --- src/iceberg/test/expression_json_test.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/iceberg/test/expression_json_test.cc b/src/iceberg/test/expression_json_test.cc index 34f800cd7..dffaaa1ad 100644 --- a/src/iceberg/test/expression_json_test.cc +++ b/src/iceberg/test/expression_json_test.cc @@ -65,9 +65,14 @@ TEST_F(ExpressionJsonTest, FalseExpression) { EXPECT_EQ(result.value()->op(), Expression::Operation::kFalse); } -TEST_F(ExpressionJsonTest, OpToString) { +TEST_F(ExpressionJsonTest, OperationTypeTests) { EXPECT_EQ(OperationTypeFromString("true"), Expression::Operation::kTrue); EXPECT_EQ("true", ToStringOperationType(Expression::Operation::kTrue)); + EXPECT_TRUE(IsSetOperation(Expression::Operation::kIn)); + EXPECT_FALSE(IsSetOperation(Expression::Operation::kTrue)); + + EXPECT_TRUE(IsUnaryOperation(Expression::Operation::kIsNull)); + EXPECT_FALSE(IsUnaryOperation(Expression::Operation::kTrue)); } } // namespace iceberg