From cc73438a1a76bf9bcdd1b5f016e535102ad8cb46 Mon Sep 17 00:00:00 2001 From: sujata-m Date: Fri, 10 Apr 2026 16:12:07 +0530 Subject: [PATCH] =?UTF-8?q?Fix=20OSM=E2=86=92OSW=20handling=20for=20ext-on?= =?UTF-8?q?ly=20geometries=20and=20node=20reference=20export?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Dev Board Ticket https://dev.azure.com/TDEI-UW/TDEI/_workitems/edit/3477 https://dev.azure.com/TDEI-UW/TDEI/_workitems/edit/3022 ## Changes **PR Title** Fix OSM→OSW handling for ext-only geometries and node reference export **PR Summary** This PR fixes several OSM→OSW regressions around `ext:*` tags and export classification. - Semantic feature recognition now uses canonical OSM tags only; `ext:*` tags are preserved as extensions instead of being treated as feature-defining tags. - Closed ext-only ways such as `ext:demolished:building=yes` now export correctly as polygons and no longer fall through to point geometry construction, which previously caused `KeyError: 'lon'`. - GeoJSON export now keeps topology endpoint points in `nodes.geojson`, ensuring edge `_u_id` and `_v_id` references remain OSW-compliant during roundtrip validation. - Added regression coverage for `bug_3477`, serializer-level node/point remapping, and the OSW compliance failure. - Bumped the package version to `0.3.3` and updated the changelog. ## Testing - Updated existing unit test cases - Added 2 new unit test cases --- CHANGELOG.md | 6 + requirements.txt | 2 +- setup.py | 8 +- .../serializer/osm/osm_graph.py | 65 +++++++--- .../serializer/osw/osw_normalizer.py | 33 +++-- src/osm_osw_reformatter/version.py | 2 +- tests/unit_tests/test_files/bug_3477.xml | 16 +++ tests/unit_tests/test_osm2osw/test_osm2osw.py | 28 +++++ .../test_serializer/test_osm_graph.py | 118 ++++++++++++++++++ .../test_serializer/test_osw_normalizer.py | 23 ++-- 10 files changed, 265 insertions(+), 36 deletions(-) create mode 100644 tests/unit_tests/test_files/bug_3477.xml diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a6231f..9d4d417 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Change log +### 0.3.3 +- Fix OSM→OSW export classification so canonical OSM tags are used for semantic recognition and `ext:*` tags are preserved as extensions instead of being treated as feature-defining tags. +- Fix closed ext-only ways such as `ext:demolished:building=yes` to emit polygon output without falling through to point geometry construction. +- Fix GeoJSON node/point export so topology endpoints remain in `nodes.geojson`, keeping edge `_u_id`/`_v_id` references OSW-compliant on roundtrip validation. +- Add regression coverage for bug 3477 and serializer/compliance cases around ext-only geometries and remapped node references. + ### 0.3.2 - Fix duplicate polygon `_id` generation in OSM→OSW export by assigning sequential IDs per feature type. - Remap edge `_u_id`/`_v_id` and zone `_w_id` references to exported node IDs so references stay consistent after ID normalization. diff --git a/requirements.txt b/requirements.txt index b000760..f075ae7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,4 @@ shapely~=2.0.2 pyproj~=3.6.1 coverage~=7.5.1 ogr2osm==1.2.0 -python-osw-validation==0.3.1 \ No newline at end of file +python-osw-validation==0.3.5 \ No newline at end of file diff --git a/setup.py b/setup.py index 2dc03de..e040e16 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,13 @@ }, long_description_content_type='text/markdown', url='https://github.com/TaskarCenterAtUW/TDEI-python-lib-osw-formatter', - install_requires=install_requires, + install_requires=[ + 'osmium~=3.6.0', + 'asyncio~=3.4.3', + 'networkx~=3.2', + 'shapely~=2.0.2', + 'ogr2osm==1.2.0' + ], packages=find_packages(where='src'), classifiers=[ 'Programming Language :: Python :: 3', diff --git a/src/osm_osw_reformatter/serializer/osm/osm_graph.py b/src/osm_osw_reformatter/serializer/osm/osm_graph.py index ced5893..57617f2 100644 --- a/src/osm_osw_reformatter/serializer/osm/osm_graph.py +++ b/src/osm_osw_reformatter/serializer/osm/osm_graph.py @@ -150,13 +150,20 @@ def way(self, w): if self.progressbar: self.progressbar.update(1) - if not self.line_filter(w.tags): + tags = dict(w.tags) + if not self.line_filter(tags): return d = {} - tags = dict(w.tags) + normalizer = OSWLineNormalizer(tags) + + is_closed = len(w.nodes) > 2 and w.nodes[0].ref == w.nodes[-1].ref + if is_closed and normalizer.is_custom() and not ( + normalizer.is_fence() or normalizer.is_tree_row() + ): + return - d2 = {**d, **OSWLineNormalizer(tags).normalize()} + d2 = {**d, **normalizer.normalize()} ndref = [] for i in range(len(w.nodes)): @@ -264,13 +271,23 @@ def area(self, a): if self.progressbar: self.progressbar.update(1) - if not self.polygon_filter(a.tags): + tags = dict(a.tags) + if not self.polygon_filter(tags): return d = {} - tags = dict(a.tags) + normalizer = OSWPolygonNormalizer(tags) + line_normalizer = OSWLineNormalizer(tags) + zone_normalizer = OSWZoneNormalizer(tags) + + if normalizer.is_custom() and ( + line_normalizer.is_fence() + or line_normalizer.is_tree_row() + or zone_normalizer.is_pedestrian() + ): + return - d2 = {**d, **OSWPolygonNormalizer(tags).normalize()} + d2 = {**d, **normalizer.normalize()} exteriors_count = 0 for exterior in a.outer_rings(): @@ -534,7 +551,7 @@ def construct_geometries(self, progressbar: Optional[callable] = None) -> None: if progressbar: progressbar.update(1) - elif OSWPolygonNormalizer.osw_polygon_filter(d): + elif "ndref" in d and "indref" in d: ndref = d.get("ndref") indref = d.get("indref", []) if not ndref: @@ -547,7 +564,7 @@ def construct_geometries(self, progressbar: Optional[callable] = None) -> None: if progressbar: progressbar.update(1) - elif OSWLineNormalizer.osw_line_filter(d): + elif "ndref" in d: ndref = d.get("ndref") if not ndref: continue @@ -653,11 +670,28 @@ def _remap_node_ref(ref, node_id_map): for n, d in self.G.nodes(data=True): d_copy = {**d} source_id = _source_id(n) + geometry_obj = d_copy.pop("geometry") + geometry = mapping(geometry_obj) + geometry_type = geometry_obj.geom_type + is_topology_node = geometry_type == "Point" and self.G.degree(n) > 0 + + if is_topology_node: + _assign_ids(d_copy, node_id_counter, source_id) + node_id_map[n] = d_copy["_id"] + node_id_counter += 1 - if OSWPointNormalizer.osw_point_filter(d): + if 'lon' in d_copy: + d_copy.pop('lon') + + if 'lat' in d_copy: + d_copy.pop('lat') + + node_features.append( + {'type': 'Feature', 'geometry': geometry, 'properties': d_copy} + ) + elif geometry_type == "Point" and OSWPointNormalizer.osw_point_filter(d): _assign_ids(d_copy, point_id_counter, source_id) point_id_counter += 1 - geometry = mapping(d_copy.pop("geometry")) if "lon" in d_copy: d_copy.pop("lon") @@ -668,26 +702,23 @@ def _remap_node_ref(ref, node_id_map): point_features.append( {"type": "Feature", "geometry": geometry, "properties": d_copy} ) - elif OSWLineNormalizer.osw_line_filter(d): + elif geometry_type == "LineString": _assign_ids(d_copy, line_id_counter, source_id) line_id_counter += 1 - geometry = mapping(d_copy.pop("geometry")) line_features.append( {"type": "Feature", "geometry": geometry, "properties": d_copy} ) - elif OSWZoneNormalizer.osw_zone_filter(d): + elif geometry_type == "Polygon" and OSWZoneNormalizer.osw_zone_filter(d): _assign_ids(d_copy, zone_id_counter, source_id) zone_id_counter += 1 - geometry = mapping(d_copy.pop("geometry")) zone_features.append( {"type": "Feature", "geometry": geometry, "properties": d_copy} ) - elif OSWPolygonNormalizer.osw_polygon_filter(d): + elif geometry_type == "Polygon": _assign_ids(d_copy, polygon_id_counter, source_id) polygon_id_counter += 1 - geometry = mapping(d_copy.pop("geometry")) polygon_features.append( {"type": "Feature", "geometry": geometry, "properties": d_copy} @@ -697,8 +728,6 @@ def _remap_node_ref(ref, node_id_map): node_id_map[n] = d_copy["_id"] node_id_counter += 1 - geometry = mapping(d_copy.pop('geometry')) - if 'lon' in d_copy: d_copy.pop('lon') diff --git a/src/osm_osw_reformatter/serializer/osw/osw_normalizer.py b/src/osm_osw_reformatter/serializer/osw/osw_normalizer.py index b10c81b..a6ca28e 100644 --- a/src/osm_osw_reformatter/serializer/osw/osw_normalizer.py +++ b/src/osm_osw_reformatter/serializer/osw/osw_normalizer.py @@ -20,9 +20,7 @@ def _tag_value(tags, key): - if key in tags: - return tags.get(key, "") - return tags.get(f"ext:{key}", "") + return tags.get(key, "") def _tags_to_dict(tags): @@ -43,6 +41,25 @@ def _tags_to_dict(tags): return {} +def _feature_tags(tags): + tag_dict = _tags_to_dict(tags) + internal_keys = { + "geometry", + "indref", + "lat", + "length", + "lon", + "ndref", + "osm_id", + "segment", + } + return { + k: v + for k, v in tag_dict.items() + if k not in internal_keys and not str(k).startswith("_") + } + + def _has_only_ext_tags(tags): if not tags: return False @@ -278,7 +295,7 @@ def _normalize_kerb(self, keep_keys = {}, defaults = {}): def is_kerb(self): kerb_value = _tag_value(self.tags, "kerb") barrier_value = _tag_value(self.tags, "barrier") - has_kerb_key = "kerb" in self.tags or "ext:kerb" in self.tags + has_kerb_key = "kerb" in self.tags return (kerb_value in self.KERB_VALUES) or ( barrier_value == "kerb" and (not has_kerb_key or kerb_value == "yes") ) @@ -361,7 +378,7 @@ def is_tree(self): return _tag_value(self.tags, "natural") == "tree" def is_custom(self): - tag_dict = _tags_to_dict(self.tags) + tag_dict = _feature_tags(self.tags) return _has_only_ext_tags(tag_dict) class OSWLineNormalizer: @@ -405,7 +422,7 @@ def is_tree_row(self): return _tag_value(self.tags, "natural") == "tree_row" def is_custom(self): - tag_dict = _tags_to_dict(self.tags) + tag_dict = _feature_tags(self.tags) return _has_only_ext_tags(tag_dict) class OSWPolygonNormalizer: @@ -556,13 +573,13 @@ def _normalize_polygon(self, keep_keys={}, defaults = {}): return new_tags def is_building(self): - return _tag_value(self.tags, "building") in self.BUILDING_VALUES + return self.tags.get("building", "") in self.BUILDING_VALUES def is_wood(self): return _tag_value(self.tags, "natural") == "wood" def is_custom(self): - tag_dict = _tags_to_dict(self.tags) + tag_dict = _feature_tags(self.tags) return _has_only_ext_tags(tag_dict) class OSWZoneNormalizer: diff --git a/src/osm_osw_reformatter/version.py b/src/osm_osw_reformatter/version.py index 73e3bb4..80eb7f9 100644 --- a/src/osm_osw_reformatter/version.py +++ b/src/osm_osw_reformatter/version.py @@ -1 +1 @@ -__version__ = '0.3.2' +__version__ = '0.3.3' diff --git a/tests/unit_tests/test_files/bug_3477.xml b/tests/unit_tests/test_files/bug_3477.xml new file mode 100644 index 0000000..7f7995d --- /dev/null +++ b/tests/unit_tests/test_files/bug_3477.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/tests/unit_tests/test_osm2osw/test_osm2osw.py b/tests/unit_tests/test_osm2osw/test_osm2osw.py index f478d12..b22506d 100644 --- a/tests/unit_tests/test_osm2osw/test_osm2osw.py +++ b/tests/unit_tests/test_osm2osw/test_osm2osw.py @@ -14,6 +14,7 @@ TEST_INCLINE_FILE = os.path.join(ROOT_DIR, 'test_files/incline-test.xml') TEST_INVALID_NODE_TAGS_FILE = os.path.join(ROOT_DIR, 'test_files/node_with_invalid_tags.xml') TEST_TREE_FILE = os.path.join(ROOT_DIR, 'test_files/tree-test.xml') +TEST_BUG_3477_FILE = os.path.join(ROOT_DIR, 'test_files/bug_3477.xml') def is_valid_float(value): @@ -322,6 +323,33 @@ async def run_test(): asyncio.run(run_test()) + def test_bug_3477_ext_only_closed_way_emits_polygon(self): + osm_file_path = TEST_BUG_3477_FILE + + async def run_test(): + osm2osw = OSM2OSW(osm_file=osm_file_path, workdir=OUTPUT_DIR, prefix='bug3477') + result = await osm2osw.convert() + self.assertTrue(result.status) + self.assertEqual(len(result.generated_files), 1) + + polygon_file = result.generated_files[0] + self.assertTrue(polygon_file.endswith('.graph.polygons.geojson')) + + with open(polygon_file) as f: + geojson = json.load(f) + + self.assertEqual(geojson.get("$schema"), OSW_SCHEMA_ID) + self.assertEqual(len(geojson.get("features", [])), 1) + + feature = geojson["features"][0] + self.assertEqual(feature["geometry"]["type"], "Polygon") + self.assertEqual(feature["properties"].get("ext:demolished:building"), "yes") + + for file_path in result.generated_files: + os.remove(file_path) + + asyncio.run(run_test()) + if __name__ == '__main__': unittest.main() diff --git a/tests/unit_tests/test_serializer/test_osm_graph.py b/tests/unit_tests/test_serializer/test_osm_graph.py index fcff537..7560f5f 100644 --- a/tests/unit_tests/test_serializer/test_osm_graph.py +++ b/tests/unit_tests/test_serializer/test_osm_graph.py @@ -16,7 +16,9 @@ OSMTaggedNodeParser, ) from src.osm_osw_reformatter.serializer.osw.osw_normalizer import ( + OSWLineNormalizer, OSWNodeNormalizer, + OSWPolygonNormalizer, OSWPointNormalizer, ) @@ -510,6 +512,25 @@ def test_line_parser_no_nodes(self): self.assertEqual(len(self.mock_graph.nodes), 1) self.assertEqual(len(self.mock_graph.edges), 0) + def test_line_parser_skips_closed_ambiguous_custom_way(self): + parser = OSMLineParser(self.mock_graph, OSWLineNormalizer.osw_line_filter) + nodes = [ + MagicMock(ref=1), + MagicMock(ref=2), + MagicMock(ref=3), + MagicMock(ref=1), + ] + + parser.way( + MagicMock( + tags={"ext:demolished:building": "yes"}, + id=301846, + nodes=nodes, + ) + ) + + self.assertEqual(len(self.mock_graph.nodes), 0) + def test_construct_geometries_missing_node_attributes(self): # Add edge with references to missing nodes self.mock_graph.add_node(1, lon=1) @@ -532,6 +553,47 @@ def test_simplify_circular_path(self): edges = list(self.osm_graph.get_graph().edges(data=True)) self.assertEqual(len(edges), 1) + def test_construct_geometries_custom_polygon_node(self): + self.mock_graph.add_node( + "g301846", + ndref=[ + [-122.3206583, 47.6151489], + [-122.3203266, 47.6151507], + [-122.3203263, 47.6149898], + [-122.3206509, 47.6149868], + [-122.3206583, 47.6151489], + ], + indref=[], + **{"ext:demolished:building": "yes"}, + ) + + self.osm_graph.construct_geometries() + + node_data = self.mock_graph.nodes["g301846"] + self.assertIn("geometry", node_data) + self.assertIsInstance(node_data["geometry"], Polygon) + + def test_polygon_parser_accepts_closed_custom_area(self): + parser = OSMPolygonParser(self.mock_graph, OSWPolygonNormalizer.osw_polygon_filter) + outer_ring = [[ + MagicMock(lon=-122.3206583, lat=47.6151489), + MagicMock(lon=-122.3203266, lat=47.6151507), + MagicMock(lon=-122.3203263, lat=47.6149898), + MagicMock(lon=-122.3206509, lat=47.6149868), + MagicMock(lon=-122.3206583, lat=47.6151489), + ]] + + parser.area( + MagicMock( + tags={"ext:demolished:building": "yes"}, + id=301846, + outer_rings=lambda: outer_ring, + inner_rings=lambda _: [], + ) + ) + + self.assertIn("g301846", self.mock_graph.nodes) + def test_to_geojson_empty_graph(self): # Paths for test files edges_path = "test_edges_empty.geojson" @@ -901,6 +963,62 @@ def test_to_geojson_edges_reference_remapped_node_ids(self): self.assertIn(edge["_u_id"], node_ids) self.assertIn(edge["_v_id"], node_ids) + def test_to_geojson_topology_points_stay_in_nodes_file(self): + graph = nx.MultiDiGraph() + graph.add_node( + 10, + geometry=Point(0, 0), + lon=0.0, + lat=0.0, + **{"ext:osm_version": "1"}, + ) + graph.add_node( + 20, + geometry=Point(1, 1), + lon=1.0, + lat=1.0, + **{"ext:osm_version": "1"}, + ) + graph.add_edge( + 10, + 20, + geometry=LineString([(0, 0), (1, 1)]), + osm_id="99", + ) + + osm_graph = OSMGraph(G=graph) + + with TemporaryDirectory() as tmpdir: + nodes_path = os.path.join(tmpdir, 'nodes.geojson') + edges_path = os.path.join(tmpdir, 'edges.geojson') + points_path = os.path.join(tmpdir, 'points.geojson') + lines_path = os.path.join(tmpdir, 'lines.geojson') + zones_path = os.path.join(tmpdir, 'zones.geojson') + polygons_path = os.path.join(tmpdir, 'polygons.geojson') + + osm_graph.to_geojson( + nodes_path, + edges_path, + points_path, + lines_path, + zones_path, + polygons_path, + ) + + self.assertTrue(os.path.exists(nodes_path)) + self.assertFalse(os.path.exists(points_path)) + + with open(nodes_path) as f: + node_data = json.load(f) + with open(edges_path) as f: + edge_data = json.load(f) + + self.assertEqual(len(node_data["features"]), 2) + node_ids = {feat["properties"]["_id"] for feat in node_data["features"]} + edge = edge_data["features"][0]["properties"] + self.assertIn(edge["_u_id"], node_ids) + self.assertIn(edge["_v_id"], node_ids) + def test_to_geojson_zones_reference_remapped_node_ids_in_w_id(self): graph = nx.MultiDiGraph() graph.add_node(10, geometry=Point(0, 0), lon=0.0, lat=0.0) diff --git a/tests/unit_tests/test_serializer/test_osw_normalizer.py b/tests/unit_tests/test_serializer/test_osw_normalizer.py index d0702a7..f36fea6 100644 --- a/tests/unit_tests/test_serializer/test_osw_normalizer.py +++ b/tests/unit_tests/test_serializer/test_osw_normalizer.py @@ -35,7 +35,7 @@ def test_is_sidewalk(self): def test_is_sidewalk_with_ext_tags(self): tags = {'ext:highway': 'footway', 'ext:footway': 'sidewalk'} normalizer = OSWWayNormalizer(tags) - self.assertTrue(normalizer.is_sidewalk()) + self.assertFalse(normalizer.is_sidewalk()) def test_is_crossing(self): tags = {'highway': 'footway', 'footway': 'crossing'} @@ -154,12 +154,12 @@ def test_is_kerb(self): def test_is_kerb_with_ext_tags(self): tags = {'ext:kerb': 'flush'} normalizer = OSWNodeNormalizer(tags) - self.assertTrue(normalizer.is_kerb()) + self.assertFalse(normalizer.is_kerb()) def test_is_kerb_with_ext_barrier_only(self): tags = {'ext:barrier': 'kerb'} normalizer = OSWNodeNormalizer(tags) - self.assertTrue(normalizer.is_kerb()) + self.assertFalse(normalizer.is_kerb()) def test_is_kerb_invalid(self): tags = {'kerb': 'invalid_type'} @@ -219,7 +219,7 @@ def test_is_tree(self): def test_is_tree_with_ext_tags(self): tags = {'ext:natural': 'tree'} normalizer = OSWPointNormalizer(tags) - self.assertTrue(normalizer.is_tree()) + self.assertFalse(normalizer.is_tree()) def test_normalize_tree(self): tags = {'natural': 'tree', 'leaf_cycle': 'Deciduous', 'leaf_type': 'needleLeaved'} @@ -238,7 +238,7 @@ def test_is_tree_row(self): def test_is_fence_with_ext_tags(self): tags = {'ext:barrier': 'fence'} normalizer = OSWLineNormalizer(tags) - self.assertTrue(normalizer.is_fence()) + self.assertFalse(normalizer.is_fence()) def test_normalize_tree_row(self): tags = {'natural': 'tree_row', 'leaf_cycle': 'evergreen', 'leaf_type': 'leafless'} @@ -263,7 +263,16 @@ def test_is_wood(self): def test_is_building_with_ext_tags(self): tags = {'ext:building': 'yes'} normalizer = OSWPolygonNormalizer(tags) - self.assertTrue(normalizer.is_building()) + self.assertFalse(normalizer.is_building()) + + def test_custom_polygon_ignores_internal_geometry_keys(self): + tags = { + 'ext:demolished:building': 'yes', + 'ndref': [[0.0, 0.0], [1.0, 0.0], [0.0, 0.0]], + 'indref': [], + } + normalizer = OSWPolygonNormalizer(tags) + self.assertTrue(normalizer.is_custom()) def test_normalize_wood(self): tags = {'natural': 'wood', 'leaf_cycle': 'mixed', 'leaf_type': 'broadleaved'} @@ -289,7 +298,7 @@ def test_invalid_zone_raises(self): def test_is_pedestrian_with_ext_tags(self): tags = {'ext:highway': 'pedestrian'} normalizer = OSWZoneNormalizer(tags) - self.assertTrue(normalizer.is_pedestrian()) + self.assertFalse(normalizer.is_pedestrian()) class TestCommonFunctions(unittest.TestCase):