Skip to content

ratiopath.parsers.GeoJSONParser

Parser for GeoJSON format annotation files.

GeoJSON is a format for encoding geographic data structures using JSON. This parser supports both polygon and point geometries.

Source code in ratiopath/parsers/geojson_parser.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
class GeoJSONParser:
    """Parser for GeoJSON format annotation files.

    GeoJSON is a format for encoding geographic data structures using JSON.
    This parser supports both polygon and point geometries.
    """

    def __init__(self, file_path: Path | str | TextIO) -> None:
        self.gdf = gpd.read_file(file_path)

        if not self.gdf.empty:
            # Explode Multi-part geometries to simplify geometry handling
            self.gdf = self.gdf.explode(index_parts=True)

    def get_filtered_geodataframe(
        self, separator: str = "_", **kwargs: str
    ) -> GeoDataFrame:
        """Filter the GeoDataFrame based on property values.

        Args:
            separator: The string used to separate keys in the filtering.
            **kwargs: Keyword arguments for filtering. Keys are column names
                (e.g., 'classification.name') and values are regex patterns to match
                against.

        Returns:
            A filtered GeoDataFrame.
        """
        filtered_gdf = self.gdf
        for key, pattern in kwargs.items():
            subkeys = key.split(separator)
            if not subkeys or subkeys[0] not in filtered_gdf.columns:
                # If the first part of the key doesn't exist, return an empty frame
                return self.gdf.iloc[0:0]

            series = filtered_gdf[subkeys[0]].astype(str)
            if len(subkeys) > 1:
                mask = series.apply(is_json_dict)
                series = series[mask].apply(lambda x: json.loads(x))
                filtered_gdf = filtered_gdf[mask]

            for subkey in subkeys[1:]:
                mask = series.apply(
                    lambda x, subkey=subkey: isinstance(x, dict) and subkey in x
                )
                series = series[mask].apply(lambda x, subkey=subkey: x[subkey])
                filtered_gdf = filtered_gdf[mask]

            series = series.astype(str)
            mask = series.str.match(pattern, na=False)
            filtered_gdf = filtered_gdf[mask]

        return filtered_gdf

    def get_polygons(self, **kwargs: str) -> Iterable[Polygon]:
        """Get polygons from the GeoDataFrame.

        Args:
            **kwargs: Keyword arguments for filtering properties.

        Yields:
            Shapely Polygon objects.
        """
        filtered_gdf = self.get_filtered_geodataframe(**kwargs)
        for geom in filtered_gdf.geometry:
            if isinstance(geom, Polygon):
                yield geom

    def get_points(self, **kwargs: str) -> Iterable[Point]:
        """Get points from the GeoDataFrame.

        Args:
            **kwargs: Keyword arguments for filtering properties.

        Yields:
            Shapely Point objects.
        """
        filtered_gdf = self.get_filtered_geodataframe(**kwargs)
        for geom in filtered_gdf.geometry:
            if isinstance(geom, Point):
                yield geom

gdf = gpd.read_file(file_path) instance-attribute

__init__(file_path)

Source code in ratiopath/parsers/geojson_parser.py
18
19
20
21
22
23
def __init__(self, file_path: Path | str | TextIO) -> None:
    self.gdf = gpd.read_file(file_path)

    if not self.gdf.empty:
        # Explode Multi-part geometries to simplify geometry handling
        self.gdf = self.gdf.explode(index_parts=True)

get_filtered_geodataframe(separator='_', **kwargs)

Filter the GeoDataFrame based on property values.

Parameters:

Name Type Description Default
separator str

The string used to separate keys in the filtering.

'_'
**kwargs str

Keyword arguments for filtering. Keys are column names (e.g., 'classification.name') and values are regex patterns to match against.

{}

Returns:

Type Description
GeoDataFrame

A filtered GeoDataFrame.

Source code in ratiopath/parsers/geojson_parser.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def get_filtered_geodataframe(
    self, separator: str = "_", **kwargs: str
) -> GeoDataFrame:
    """Filter the GeoDataFrame based on property values.

    Args:
        separator: The string used to separate keys in the filtering.
        **kwargs: Keyword arguments for filtering. Keys are column names
            (e.g., 'classification.name') and values are regex patterns to match
            against.

    Returns:
        A filtered GeoDataFrame.
    """
    filtered_gdf = self.gdf
    for key, pattern in kwargs.items():
        subkeys = key.split(separator)
        if not subkeys or subkeys[0] not in filtered_gdf.columns:
            # If the first part of the key doesn't exist, return an empty frame
            return self.gdf.iloc[0:0]

        series = filtered_gdf[subkeys[0]].astype(str)
        if len(subkeys) > 1:
            mask = series.apply(is_json_dict)
            series = series[mask].apply(lambda x: json.loads(x))
            filtered_gdf = filtered_gdf[mask]

        for subkey in subkeys[1:]:
            mask = series.apply(
                lambda x, subkey=subkey: isinstance(x, dict) and subkey in x
            )
            series = series[mask].apply(lambda x, subkey=subkey: x[subkey])
            filtered_gdf = filtered_gdf[mask]

        series = series.astype(str)
        mask = series.str.match(pattern, na=False)
        filtered_gdf = filtered_gdf[mask]

    return filtered_gdf

get_points(**kwargs)

Get points from the GeoDataFrame.

Parameters:

Name Type Description Default
**kwargs str

Keyword arguments for filtering properties.

{}

Yields:

Type Description
Iterable[Point]

Shapely Point objects.

Source code in ratiopath/parsers/geojson_parser.py
79
80
81
82
83
84
85
86
87
88
89
90
91
def get_points(self, **kwargs: str) -> Iterable[Point]:
    """Get points from the GeoDataFrame.

    Args:
        **kwargs: Keyword arguments for filtering properties.

    Yields:
        Shapely Point objects.
    """
    filtered_gdf = self.get_filtered_geodataframe(**kwargs)
    for geom in filtered_gdf.geometry:
        if isinstance(geom, Point):
            yield geom

get_polygons(**kwargs)

Get polygons from the GeoDataFrame.

Parameters:

Name Type Description Default
**kwargs str

Keyword arguments for filtering properties.

{}

Yields:

Type Description
Iterable[Polygon]

Shapely Polygon objects.

Source code in ratiopath/parsers/geojson_parser.py
65
66
67
68
69
70
71
72
73
74
75
76
77
def get_polygons(self, **kwargs: str) -> Iterable[Polygon]:
    """Get polygons from the GeoDataFrame.

    Args:
        **kwargs: Keyword arguments for filtering properties.

    Yields:
        Shapely Polygon objects.
    """
    filtered_gdf = self.get_filtered_geodataframe(**kwargs)
    for geom in filtered_gdf.geometry:
        if isinstance(geom, Polygon):
            yield geom