diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 35c312f60..7e035cd02 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -31,6 +31,7 @@ set(ICEBERG_SOURCES expression/expression.cc expression/expressions.cc expression/inclusive_metrics_evaluator.cc + expression/json_serde.cc expression/literal.cc expression/manifest_evaluator.cc expression/predicate.cc diff --git a/src/iceberg/expression/json_serde.cc b/src/iceberg/expression/json_serde.cc new file mode 100644 index 000000000..dcce6c654 --- /dev/null +++ b/src/iceberg/expression/json_serde.cc @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include +#include + +#include + +#include "iceberg/expression/expressions.h" +#include "iceberg/expression/json_serde_internal.h" +#include "iceberg/expression/literal.h" +#include "iceberg/util/json_util_internal.h" +#include "iceberg/util/macros.h" + +namespace iceberg { +namespace { +// Expression type strings +constexpr std::string_view kTypeTrue = "true"; +constexpr std::string_view kTypeFalse = "false"; +constexpr std::string_view kTypeEq = "eq"; +constexpr std::string_view kTypeAnd = "and"; +constexpr std::string_view kTypeOr = "or"; +constexpr std::string_view kTypeNot = "not"; +constexpr std::string_view kTypeIn = "in"; +constexpr std::string_view kTypeNotIn = "not-in"; +constexpr std::string_view kTypeLt = "lt"; +constexpr std::string_view kTypeLtEq = "lt-eq"; +constexpr std::string_view kTypeGt = "gt"; +constexpr std::string_view kTypeGtEq = "gt-eq"; +constexpr std::string_view kTypeNotEq = "not-eq"; +constexpr std::string_view kTypeStartsWith = "starts-with"; +constexpr std::string_view kTypeNotStartsWith = "not-starts-with"; +constexpr std::string_view kTypeIsNull = "is-null"; +constexpr std::string_view kTypeNotNull = "not-null"; +constexpr std::string_view kTypeIsNan = "is-nan"; +constexpr std::string_view kTypeNotNan = "not-nan"; +} // namespace + +/// Check if an operation is a unary predicate (no values) +bool IsUnaryOperation(Expression::Operation op) { + switch (op) { + case Expression::Operation::kIsNull: + case Expression::Operation::kNotNull: + case Expression::Operation::kIsNan: + case Expression::Operation::kNotNan: + return true; + default: + return false; + } +} + +/// Check if an operation is a set predicate (multiple values) +bool IsSetOperation(Expression::Operation op) { + switch (op) { + case Expression::Operation::kIn: + case Expression::Operation::kNotIn: + return true; + default: + return false; + } +} + +/// \brief Converts a JSON type string to an Expression::Operation. +/// +/// \param type_str The JSON type string +/// \return The corresponding Operation or an error if unknown +Result OperationTypeFromString(const std::string_view type_str) { + if (type_str == kTypeTrue) return Expression::Operation::kTrue; + if (type_str == kTypeFalse) return Expression::Operation::kFalse; + if (type_str == kTypeAnd) return Expression::Operation::kAnd; + if (type_str == kTypeOr) return Expression::Operation::kOr; + if (type_str == kTypeNot) return Expression::Operation::kNot; + if (type_str == kTypeEq) return Expression::Operation::kEq; + if (type_str == kTypeNotEq) return Expression::Operation::kNotEq; + if (type_str == kTypeLt) return Expression::Operation::kLt; + if (type_str == kTypeLtEq) return Expression::Operation::kLtEq; + if (type_str == kTypeGt) return Expression::Operation::kGt; + if (type_str == kTypeGtEq) return Expression::Operation::kGtEq; + if (type_str == kTypeIn) return Expression::Operation::kIn; + if (type_str == kTypeNotIn) return Expression::Operation::kNotIn; + if (type_str == kTypeIsNull) return Expression::Operation::kIsNull; + if (type_str == kTypeNotNull) return Expression::Operation::kNotNull; + if (type_str == kTypeIsNan) return Expression::Operation::kIsNan; + if (type_str == kTypeNotNan) return Expression::Operation::kNotNan; + if (type_str == kTypeStartsWith) return Expression::Operation::kStartsWith; + if (type_str == kTypeNotStartsWith) return Expression::Operation::kNotStartsWith; + + return JsonParseError("Unknown expression type: {}", type_str); +} + +/// \brief Converts an Expression::Operation to its JSON string representation. +/// +/// \param op The operation to convert +/// \return The JSON type string (e.g., "eq", "lt-eq", "is-null") +std::string_view ToStringOperationType(Expression::Operation op) { + switch (op) { + case Expression::Operation::kTrue: + return kTypeTrue; + case Expression::Operation::kFalse: + return kTypeFalse; + case Expression::Operation::kAnd: + return kTypeAnd; + case Expression::Operation::kOr: + return kTypeOr; + case Expression::Operation::kNot: + return kTypeNot; + case Expression::Operation::kEq: + return kTypeEq; + case Expression::Operation::kNotEq: + return kTypeNotEq; + case Expression::Operation::kLt: + return kTypeLt; + case Expression::Operation::kLtEq: + return kTypeLtEq; + case Expression::Operation::kGt: + return kTypeGt; + case Expression::Operation::kGtEq: + return kTypeGtEq; + case Expression::Operation::kIn: + return kTypeIn; + case Expression::Operation::kNotIn: + return kTypeNotIn; + case Expression::Operation::kIsNull: + return kTypeIsNull; + case Expression::Operation::kNotNull: + return kTypeNotNull; + case Expression::Operation::kIsNan: + return kTypeIsNan; + case Expression::Operation::kNotNan: + return kTypeNotNan; + case Expression::Operation::kStartsWith: + return kTypeStartsWith; + case Expression::Operation::kNotStartsWith: + return kTypeNotStartsWith; + default: + return "unknown"; + } +} + +Result> ExpressionFromJson(const nlohmann::json& json) { + // Handle boolean + if (json.is_boolean()) { + return json.get() ? std::static_pointer_cast(True::Instance()) + : std::static_pointer_cast(False::Instance()); + } + return JsonParseError("Only booleans are currently supported"); +} + +nlohmann::json ToJson(const Expression& expr) { + switch (expr.op()) { + case Expression::Operation::kTrue: + return true; + + case Expression::Operation::kFalse: + return false; + default: + throw std::logic_error("Only booleans are currently supported"); + } +} + +#define ICEBERG_DEFINE_FROM_JSON(Model) \ + template <> \ + Result> FromJson(const nlohmann::json& json) { \ + return Model##FromJson(json); \ + } + +ICEBERG_DEFINE_FROM_JSON(Expression) + +} // namespace iceberg diff --git a/src/iceberg/expression/json_serde_internal.h b/src/iceberg/expression/json_serde_internal.h new file mode 100644 index 000000000..a0dce8475 --- /dev/null +++ b/src/iceberg/expression/json_serde_internal.h @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "iceberg/expression/expression.h" +#include "iceberg/iceberg_export.h" +#include "iceberg/result.h" + +/// \file iceberg/expression/json_internal.h +/// JSON serialization and deserialization for expressions. + +namespace iceberg { + +template +Result FromJson(const nlohmann::json& json); + +#define ICEBERG_DECLARE_JSON_SERDE(Model) \ + ICEBERG_EXPORT Result> Model##FromJson( \ + const nlohmann::json& json); \ + \ + template \ + ICEBERG_EXPORT Result> FromJson(const nlohmann::json& json); \ + \ + ICEBERG_EXPORT nlohmann::json ToJson(const Model& model); + +/// \note Don't forget to add `ICEBERG_DEFINE_FROM_JSON` to the end of +/// `json_internal.cc` to define the `FromJson` function for the model. +ICEBERG_DECLARE_JSON_SERDE(Expression) + +#undef ICEBERG_DECLARE_JSON_SERDE + +/// \brief Converts an operation type string to an Expression::Operation. +/// +/// \param type_str The operation type string +/// \return The corresponding Operation or an error if unknown +ICEBERG_EXPORT Result OperationTypeFromString( + const std::string_view type_str); + +/// \brief Converts an Expression::Operation to its string representation. +/// +/// \param op The operation to convert +/// \return The operation type string (e.g., "eq", "lt-eq", "is-null") +ICEBERG_EXPORT std::string_view ToStringOperationType(Expression::Operation op); + +/// Check if an operation is a unary predicate (no values) +ICEBERG_EXPORT bool IsUnaryOperation(Expression::Operation op); + +/// Check if an operation is a set predicate (multiple values) +ICEBERG_EXPORT bool IsSetOperation(Expression::Operation op); + +} // namespace iceberg diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index d243a48bf..00fff9f27 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -88,6 +88,7 @@ add_iceberg_test(table_test add_iceberg_test(expression_test SOURCES aggregate_test.cc + expression_json_test.cc expression_test.cc expression_visitor_test.cc inclusive_metrics_evaluator_test.cc diff --git a/src/iceberg/test/expression_json_test.cc b/src/iceberg/test/expression_json_test.cc new file mode 100644 index 000000000..dffaaa1ad --- /dev/null +++ b/src/iceberg/test/expression_json_test.cc @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include + +#include +#include +#include + +#include "iceberg/expression/expression.h" +#include "iceberg/expression/expressions.h" +#include "iceberg/expression/json_serde_internal.h" +#include "iceberg/expression/literal.h" +#include "iceberg/expression/predicate.h" +#include "iceberg/expression/term.h" +#include "iceberg/test/matchers.h" + +namespace iceberg { + +class ExpressionJsonTest : public ::testing::Test {}; + +// Test boolean constant expressions +TEST_F(ExpressionJsonTest, TrueExpression) { + auto expr = True::Instance(); + auto json = ToJson(*expr); + + // True should serialize as JSON boolean true + EXPECT_TRUE(json.is_boolean()); + EXPECT_TRUE(json.get()); + + // Parse back + auto result = ExpressionFromJson(json); + ASSERT_THAT(result, IsOk()); + EXPECT_EQ(result.value()->op(), Expression::Operation::kTrue); +} + +TEST_F(ExpressionJsonTest, FalseExpression) { + auto expr = False::Instance(); + auto json = ToJson(*expr); + + // False should serialize as JSON boolean false + EXPECT_TRUE(json.is_boolean()); + EXPECT_FALSE(json.get()); + + // Parse back + auto result = ExpressionFromJson(json); + ASSERT_THAT(result, IsOk()); + EXPECT_EQ(result.value()->op(), Expression::Operation::kFalse); +} + +TEST_F(ExpressionJsonTest, OperationTypeTests) { + EXPECT_EQ(OperationTypeFromString("true"), Expression::Operation::kTrue); + EXPECT_EQ("true", ToStringOperationType(Expression::Operation::kTrue)); + EXPECT_TRUE(IsSetOperation(Expression::Operation::kIn)); + EXPECT_FALSE(IsSetOperation(Expression::Operation::kTrue)); + + EXPECT_TRUE(IsUnaryOperation(Expression::Operation::kIsNull)); + EXPECT_FALSE(IsUnaryOperation(Expression::Operation::kTrue)); +} + +} // namespace iceberg