Skip to content

ratiopath.parsers.ASAPParser

Parser for ASAP format annotation files.

ASAP (Automated Slide Analysis Platform) uses XML format for storing annotations. This parser supports both polygon and point annotations.

Source code in ratiopath/parsers/asap_parser.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
class ASAPParser:
    """Parser for ASAP format annotation files.

    ASAP (Automated Slide Analysis Platform) uses XML format for storing annotations.
    This parser supports both polygon and point annotations.
    """

    def __init__(self, file_path: Path | str | TextIO):
        self.tree = ET.parse(file_path)
        self.root = self.tree.getroot()

    def _get_filtered_annotations(
        self, name: str, part_of_group: str
    ) -> Iterable[ET.Element]:
        """Get annotations that match the provided regex filters.

        Args:
            name: Regex pattern to match annotation names.
            part_of_group: Regex pattern to match annotation groups.

        Yields:
            XML annotation elements that match the filters.
        """
        name_regex = re.compile(name)
        part_of_group_regex = re.compile(part_of_group)

        for annotation in self.root.findall(".//Annotation"):
            if name_regex.match(
                annotation.attrib["Name"]
            ) and part_of_group_regex.match(annotation.attrib["PartOfGroup"]):
                yield annotation

    def _extract_coordinates(self, annotation: ET.Element) -> list[Point]:
        """Extract coordinates from an annotation element.

        Args:
            annotation: XML annotation element.

        Returns:
            List of (x, y) coordinate tuples.
        """
        return [
            Point(float(coordinate.attrib["X"]), float(coordinate.attrib["Y"]))
            for coordinate in annotation.findall(".//Coordinate")
        ]

    def get_polygons(
        self, name: str = ".*", part_of_group: str = ".*"
    ) -> Iterable[Polygon]:
        """Parse polygon annotations from ASAP XML file.

        Args:
            name: Regex pattern to match annotation names.
            part_of_group: Regex pattern to match annotation groups.

        Returns:
            An iterable of shapely Polygon objects.
        """
        for annotation in self._get_filtered_annotations(name, part_of_group):
            if annotation.attrib["Type"] in ["Polygon", "Spline"]:
                yield Polygon(self._extract_coordinates(annotation))

    def get_points(
        self, name: str = ".*", part_of_group: str = ".*"
    ) -> Iterable[Point]:
        """Parse point annotations from ASAP XML file.

        Args:
            name: Regex pattern to match annotation names.
            part_of_group: Regex pattern to match annotation groups.

        Returns:
            An iterable of shapely Point objects.
        """
        for annotation in self._get_filtered_annotations(name, part_of_group):
            if annotation.attrib["Type"] in ["Point", "Dot"]:
                yield from self._extract_coordinates(annotation)

root = self.tree.getroot() instance-attribute

tree = ET.parse(file_path) instance-attribute

__init__(file_path)

Source code in ratiopath/parsers/asap_parser.py
19
20
21
def __init__(self, file_path: Path | str | TextIO):
    self.tree = ET.parse(file_path)
    self.root = self.tree.getroot()

get_points(name='.*', part_of_group='.*')

Parse point annotations from ASAP XML file.

Parameters:

Name Type Description Default
name str

Regex pattern to match annotation names.

'.*'
part_of_group str

Regex pattern to match annotation groups.

'.*'

Returns:

Type Description
Iterable[Point]

An iterable of shapely Point objects.

Source code in ratiopath/parsers/asap_parser.py
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def get_points(
    self, name: str = ".*", part_of_group: str = ".*"
) -> Iterable[Point]:
    """Parse point annotations from ASAP XML file.

    Args:
        name: Regex pattern to match annotation names.
        part_of_group: Regex pattern to match annotation groups.

    Returns:
        An iterable of shapely Point objects.
    """
    for annotation in self._get_filtered_annotations(name, part_of_group):
        if annotation.attrib["Type"] in ["Point", "Dot"]:
            yield from self._extract_coordinates(annotation)

get_polygons(name='.*', part_of_group='.*')

Parse polygon annotations from ASAP XML file.

Parameters:

Name Type Description Default
name str

Regex pattern to match annotation names.

'.*'
part_of_group str

Regex pattern to match annotation groups.

'.*'

Returns:

Type Description
Iterable[Polygon]

An iterable of shapely Polygon objects.

Source code in ratiopath/parsers/asap_parser.py
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def get_polygons(
    self, name: str = ".*", part_of_group: str = ".*"
) -> Iterable[Polygon]:
    """Parse polygon annotations from ASAP XML file.

    Args:
        name: Regex pattern to match annotation names.
        part_of_group: Regex pattern to match annotation groups.

    Returns:
        An iterable of shapely Polygon objects.
    """
    for annotation in self._get_filtered_annotations(name, part_of_group):
        if annotation.attrib["Type"] in ["Polygon", "Spline"]:
            yield Polygon(self._extract_coordinates(annotation))