diff --git a/benchmark/jsonschema.cc b/benchmark/jsonschema.cc index 3bf15345b..cefcd16bc 100644 --- a/benchmark/jsonschema.cc +++ b/benchmark/jsonschema.cc @@ -8,28 +8,14 @@ #include #include -static void Schema_Frame_WoT_Instances(benchmark::State &state) { +static void Schema_Frame_WoT_References(benchmark::State &state) { const auto schema{ sourcemeta::core::read_json(std::filesystem::path{CURRENT_DIRECTORY} / "schemas" / "draft7_w3c_wot_td_v1_1.json")}; for (auto _ : state) { sourcemeta::core::SchemaFrame frame{ - sourcemeta::core::SchemaFrame::Mode::Instances}; - frame.analyse(schema, sourcemeta::core::schema_walker, - sourcemeta::core::schema_resolver); - benchmark::DoNotOptimize(frame); - } -} - -static void Schema_Frame_OMC_Instances(benchmark::State &state) { - const auto schema{ - sourcemeta::core::read_json(std::filesystem::path{CURRENT_DIRECTORY} / - "schemas" / "2019_09_omc_json_v2.json")}; - - for (auto _ : state) { - sourcemeta::core::SchemaFrame frame{ - sourcemeta::core::SchemaFrame::Mode::Instances}; + sourcemeta::core::SchemaFrame::Mode::References}; frame.analyse(schema, sourcemeta::core::schema_walker, sourcemeta::core::schema_resolver); benchmark::DoNotOptimize(frame); @@ -178,8 +164,7 @@ static void Schema_Bundle_Meta_2020_12(benchmark::State &state) { } } -BENCHMARK(Schema_Frame_WoT_Instances); -BENCHMARK(Schema_Frame_OMC_Instances); +BENCHMARK(Schema_Frame_WoT_References); BENCHMARK(Schema_Frame_OMC_References); BENCHMARK(Schema_Frame_OMC_Locations); BENCHMARK(Schema_Frame_ISO_Language_Locations); diff --git a/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h b/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h index cc6eaf338..a3057ece0 100644 --- a/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h +++ b/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h @@ -492,6 +492,55 @@ template class GenericPointer { } } + /// Check whether a JSON Pointer starts with another JSON Pointer followed + /// by a property token. This is useful for checking container membership + /// without allocating a new pointer. For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// const sourcemeta::core::Pointer pointer{"foo", "$defs", "bar"}; + /// const sourcemeta::core::Pointer prefix{"foo"}; + /// assert(pointer.starts_with(prefix, "$defs")); + /// assert(!pointer.starts_with(prefix, "other")); + /// ``` + template + requires(!std::is_same_v, Token>) + [[nodiscard]] auto starts_with(const GenericPointer &other, + const StringT &tail) const -> bool { + const auto prefix_size{other.size()}; + return this->size() > prefix_size && this->starts_with(other) && + this->data[prefix_size].is_property() && + this->data[prefix_size].to_property() == tail; + } + + /// Check whether a JSON Pointer starts with another JSON Pointer followed + /// by two property tokens. This is useful for checking nested container + /// membership without allocating a new pointer. For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// const sourcemeta::core::Pointer pointer{"foo", "$defs", "bar", "baz"}; + /// const sourcemeta::core::Pointer prefix{"foo"}; + /// assert(pointer.starts_with(prefix, "$defs", "bar")); + /// assert(!pointer.starts_with(prefix, "$defs", "other")); + /// ``` + template + requires(!std::is_same_v, Token> && + !std::is_same_v, Token>) + [[nodiscard]] auto starts_with(const GenericPointer &other, + const StringLeftT &tail_left, + const StringRightT &tail_right) const -> bool { + const auto prefix_size{other.size()}; + return this->size() > prefix_size + 1 && + this->starts_with(other, tail_left) && + this->data[prefix_size + 1].is_property() && + this->data[prefix_size + 1].to_property() == tail_right; + } + /// Check whether a JSON Pointer starts with the initial part of another JSON /// Pointer. For example: /// diff --git a/src/core/jsonschema/transformer.cc b/src/core/jsonschema/transformer.cc index 32287b523..4e297d5c9 100644 --- a/src/core/jsonschema/transformer.cc +++ b/src/core/jsonschema/transformer.cc @@ -107,7 +107,7 @@ auto SchemaTransformer::check( const std::optional &default_dialect, const std::optional &default_id) const -> std::pair { - SchemaFrame frame{SchemaFrame::Mode::Instances}; + SchemaFrame frame{SchemaFrame::Mode::References}; // If we use the default id when there is already one, framing will duplicate // the locations leading to duplicate check reports @@ -173,7 +173,7 @@ auto SchemaTransformer::apply( std::size_t subschema_count{0}; std::size_t subschema_failures{0}; while (true) { - SchemaFrame frame{SchemaFrame::Mode::Instances}; + SchemaFrame frame{SchemaFrame::Mode::References}; frame.analyse(schema, walker, resolver, default_dialect, default_id); std::unordered_set visited; diff --git a/src/extension/alterschema/common/orphan_definitions.h b/src/extension/alterschema/common/orphan_definitions.h index 812a7dc93..0ff2e3e62 100644 --- a/src/extension/alterschema/common/orphan_definitions.h +++ b/src/extension/alterschema/common/orphan_definitions.h @@ -15,6 +15,7 @@ class OrphanDefinitions final : public SchemaTransformRule { const sourcemeta::core::SchemaWalker &, const sourcemeta::core::SchemaResolver &) const -> sourcemeta::core::SchemaTransformRule::Result override { + ONLY_CONTINUE_IF(schema.is_object()); const bool has_modern_core{ vocabularies.contains(Vocabularies::Known::JSON_Schema_2020_12_Core) || vocabularies.contains(Vocabularies::Known::JSON_Schema_2019_09_Core)}; @@ -22,19 +23,41 @@ class OrphanDefinitions final : public SchemaTransformRule { vocabularies.contains(Vocabularies::Known::JSON_Schema_Draft_7) || vocabularies.contains(Vocabularies::Known::JSON_Schema_Draft_6) || vocabularies.contains(Vocabularies::Known::JSON_Schema_Draft_4)}; + const bool has_defs{has_modern_core && schema.defines("$defs")}; + const bool has_definitions{(has_modern_core || has_draft_definitions) && + schema.defines("definitions")}; + ONLY_CONTINUE_IF(has_defs || has_definitions); - ONLY_CONTINUE_IF(has_modern_core || has_draft_definitions); - ONLY_CONTINUE_IF(schema.is_object()); + const auto prefix_size{location.pointer.size()}; + bool has_external_to_defs{false}; + bool has_external_to_definitions{false}; + std::unordered_set outside_referenced_defs; + std::unordered_set outside_referenced_definitions; - std::vector orphans; + for (const auto &[key, reference] : frame.references()) { + const auto destination_location{frame.traverse(reference.destination)}; + if (destination_location.has_value()) { + if (has_defs) { + process_reference(key.second, destination_location->get().pointer, + location.pointer, prefix_size, "$defs", + has_external_to_defs, outside_referenced_defs); + } - if (has_modern_core) { - collect_orphans(frame, location, schema, "$defs", orphans); + if (has_definitions) { + process_reference(key.second, destination_location->get().pointer, + location.pointer, prefix_size, "definitions", + has_external_to_definitions, + outside_referenced_definitions); + } + } } - if (has_modern_core || has_draft_definitions) { - collect_orphans(frame, location, schema, "definitions", orphans); - } + std::vector orphans; + collect_orphans(schema, "$defs", has_defs, has_external_to_defs, + outside_referenced_defs, orphans); + collect_orphans(schema, "definitions", has_definitions, + has_external_to_definitions, outside_referenced_definitions, + orphans); ONLY_CONTINUE_IF(!orphans.empty()); return APPLIES_TO_POINTERS(std::move(orphans)); @@ -55,19 +78,43 @@ class OrphanDefinitions final : public SchemaTransformRule { private: static auto - collect_orphans(const sourcemeta::core::SchemaFrame &frame, - const sourcemeta::core::SchemaFrame::Location &root, - const JSON &schema, const JSON::String &container, - std::vector &orphans) -> void { - if (!schema.defines(container) || !schema.at(container).is_object()) { + process_reference(const Pointer &source_pointer, + const Pointer &destination_pointer, const Pointer &prefix, + const std::size_t prefix_size, std::string_view container, + bool &has_external, + std::unordered_set &referenced) -> void { + if (!destination_pointer.starts_with(prefix, container) || + destination_pointer.size() <= prefix_size + 1) { return; } - for (const auto &entry : schema.at(container).as_object()) { - auto entry_pointer{Pointer{container, entry.first}}; - const auto &entry_location{frame.traverse(root, entry_pointer)}; - if (frame.instance_locations(entry_location).empty()) { - orphans.push_back(std::move(entry_pointer)); + const auto &entry_token{destination_pointer.at(prefix_size + 1)}; + if (entry_token.is_property()) { + const auto &entry_name{entry_token.to_property()}; + if (!source_pointer.starts_with(prefix, container)) { + has_external = true; + referenced.insert(entry_name); + } else if (!source_pointer.starts_with(prefix, container, entry_name)) { + referenced.insert(entry_name); + } + } + } + + static auto + collect_orphans(const JSON &schema, const JSON::String &container, + const bool has_container, const bool has_external_reference, + const std::unordered_set &referenced, + std::vector &orphans) -> void { + if (has_container) { + const auto &maybe_object{schema.at(container)}; + if (maybe_object.is_object()) { + // If no external references to container, all definitions are orphans + // Otherwise, only unreferenced definitions are orphans + for (const auto &entry : maybe_object.as_object()) { + if (!has_external_reference || !referenced.contains(entry.first)) { + orphans.push_back(Pointer{container, entry.first}); + } + } } } } diff --git a/test/alterschema/alterschema_lint_2020_12_test.cc b/test/alterschema/alterschema_lint_2020_12_test.cc index b627c6dd7..91deadf52 100644 --- a/test/alterschema/alterschema_lint_2020_12_test.cc +++ b/test/alterschema/alterschema_lint_2020_12_test.cc @@ -6179,6 +6179,88 @@ TEST(AlterSchema_lint_2020_12, orphan_definitions_25) { EXPECT_EQ(document, expected); } +TEST(AlterSchema_lint_2020_12, orphan_definitions_26) { + sourcemeta::core::JSON document = sourcemeta::core::parse_json(R"JSON({ + "$id": "https://www.sourcemeta.com/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "properties": { + "foo": { "$ref": "#/$defs/bar" } + }, + "$defs": { + "bar": { + "allOf": [ + { "$ref": "#/$defs/baz" } + ] + }, + "baz": { + "type": "object", + "properties": { + "qux": { "$ref": "#/$defs/extra" } + } + }, + "extra": { "type": "string" } + } + })JSON"); + + LINT_AND_FIX(document); + + const sourcemeta::core::JSON expected = sourcemeta::core::parse_json(R"JSON({ + "$id": "https://www.sourcemeta.com/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "properties": { + "foo": { "$ref": "#/$defs/bar" } + }, + "$defs": { + "bar": { "$ref": "#/$defs/baz" }, + "baz": { + "type": "object", + "properties": { + "qux": { "$ref": "#/$defs/extra" } + } + }, + "extra": { "type": "string" } + } + })JSON"); + + EXPECT_EQ(document, expected); +} + +TEST(AlterSchema_lint_2020_12, orphan_definitions_27) { + sourcemeta::core::JSON document = sourcemeta::core::parse_json(R"JSON({ + "$id": "https://www.sourcemeta.com/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "properties": { + "start": { "$ref": "#/$defs/L1" } + }, + "$defs": { + "L1": { "allOf": [ { "$ref": "#/$defs/L2" } ] }, + "L2": { "properties": { "a": { "$ref": "#/$defs/L3" } } }, + "L3": { "allOf": [ { "$ref": "#/$defs/L4" } ] }, + "L4": { "properties": { "b": { "$ref": "#/$defs/L5" } } }, + "L5": { "type": "string" } + } + })JSON"); + + LINT_AND_FIX(document); + + const sourcemeta::core::JSON expected = sourcemeta::core::parse_json(R"JSON({ + "$id": "https://www.sourcemeta.com/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "properties": { + "start": { "$ref": "#/$defs/L1" } + }, + "$defs": { + "L1": { "$ref": "#/$defs/L2" }, + "L2": { "properties": { "a": { "$ref": "#/$defs/L3" } } }, + "L3": { "$ref": "#/$defs/L4" }, + "L4": { "properties": { "b": { "$ref": "#/$defs/L5" } } }, + "L5": { "type": "string" } + } + })JSON"); + + EXPECT_EQ(document, expected); +} + TEST(AlterSchema_lint_2020_12, unnecessary_allof_wrapper_unevaluated_properties_depends_on_properties) { sourcemeta::core::JSON document = sourcemeta::core::parse_json(R"JSON({ diff --git a/test/jsonpointer/jsonpointer_starts_with_test.cc b/test/jsonpointer/jsonpointer_starts_with_test.cc index 94a094cf5..85d262585 100644 --- a/test/jsonpointer/jsonpointer_starts_with_test.cc +++ b/test/jsonpointer/jsonpointer_starts_with_test.cc @@ -130,3 +130,75 @@ TEST(JSONPointer_starts_with, tail_empty_prefix) { const sourcemeta::core::Pointer prefix; EXPECT_TRUE(pointer.starts_with(prefix, tail)); } + +TEST(JSONPointer_starts_with, property_tail_true) { + const sourcemeta::core::Pointer pointer{"foo", "$defs", "bar"}; + const sourcemeta::core::Pointer prefix{"foo"}; + EXPECT_TRUE(pointer.starts_with(prefix, "$defs")); +} + +TEST(JSONPointer_starts_with, property_tail_false_wrong_tail) { + const sourcemeta::core::Pointer pointer{"foo", "$defs", "bar"}; + const sourcemeta::core::Pointer prefix{"foo"}; + EXPECT_FALSE(pointer.starts_with(prefix, "other")); +} + +TEST(JSONPointer_starts_with, property_tail_false_wrong_prefix) { + const sourcemeta::core::Pointer pointer{"foo", "$defs", "bar"}; + const sourcemeta::core::Pointer prefix{"baz"}; + EXPECT_FALSE(pointer.starts_with(prefix, "$defs")); +} + +TEST(JSONPointer_starts_with, property_tail_pointer_too_short) { + const sourcemeta::core::Pointer pointer{"foo"}; + const sourcemeta::core::Pointer prefix{"foo"}; + EXPECT_FALSE(pointer.starts_with(prefix, "$defs")); +} + +TEST(JSONPointer_starts_with, property_tail_empty_prefix) { + const sourcemeta::core::Pointer pointer{"$defs", "bar"}; + const sourcemeta::core::Pointer prefix; + EXPECT_TRUE(pointer.starts_with(prefix, "$defs")); +} + +TEST(JSONPointer_starts_with, property_tail_index_token) { + const sourcemeta::core::Pointer pointer{"foo", 0, "bar"}; + const sourcemeta::core::Pointer prefix{"foo"}; + EXPECT_FALSE(pointer.starts_with(prefix, "0")); +} + +TEST(JSONPointer_starts_with, property_two_tails_true) { + const sourcemeta::core::Pointer pointer{"foo", "$defs", "bar", "baz"}; + const sourcemeta::core::Pointer prefix{"foo"}; + EXPECT_TRUE(pointer.starts_with(prefix, "$defs", "bar")); +} + +TEST(JSONPointer_starts_with, property_two_tails_false_wrong_left) { + const sourcemeta::core::Pointer pointer{"foo", "$defs", "bar", "baz"}; + const sourcemeta::core::Pointer prefix{"foo"}; + EXPECT_FALSE(pointer.starts_with(prefix, "other", "bar")); +} + +TEST(JSONPointer_starts_with, property_two_tails_false_wrong_right) { + const sourcemeta::core::Pointer pointer{"foo", "$defs", "bar", "baz"}; + const sourcemeta::core::Pointer prefix{"foo"}; + EXPECT_FALSE(pointer.starts_with(prefix, "$defs", "other")); +} + +TEST(JSONPointer_starts_with, property_two_tails_pointer_too_short) { + const sourcemeta::core::Pointer pointer{"foo", "$defs"}; + const sourcemeta::core::Pointer prefix{"foo"}; + EXPECT_FALSE(pointer.starts_with(prefix, "$defs", "bar")); +} + +TEST(JSONPointer_starts_with, property_two_tails_empty_prefix) { + const sourcemeta::core::Pointer pointer{"$defs", "bar", "baz"}; + const sourcemeta::core::Pointer prefix; + EXPECT_TRUE(pointer.starts_with(prefix, "$defs", "bar")); +} + +TEST(JSONPointer_starts_with, property_two_tails_index_in_right) { + const sourcemeta::core::Pointer pointer{"foo", "$defs", 0, "baz"}; + const sourcemeta::core::Pointer prefix{"foo"}; + EXPECT_FALSE(pointer.starts_with(prefix, "$defs", "0")); +}