-
Notifications
You must be signed in to change notification settings - Fork 260
Expand file tree
/
Copy pathdataset_utils.py
More file actions
133 lines (109 loc) · 5.32 KB
/
dataset_utils.py
File metadata and controls
133 lines (109 loc) · 5.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import json
import os
from itertools import chain
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
import numpy as np
from loguru import logger
from yolo.tools.data_conversion import discretize_categories
def locate_label_paths(dataset_path: Path, phase_name: Path) -> Tuple[Path, Path]:
"""
Find the path to label files for a specified dataset and phase(e.g. training).
Args:
dataset_path (Path): The path to the root directory of the dataset.
phase_name (Path): The name of the phase for which labels are being searched (e.g., "train", "val", "test").
Returns:
Tuple[Path, Path]: A tuple containing the path to the labels file and the file format ("json" or "txt").
"""
json_labels_path = dataset_path / "annotations" / f"instances_{phase_name}.json"
txt_labels_path = dataset_path / "labels" / phase_name
if json_labels_path.is_file():
return json_labels_path, "json"
elif txt_labels_path.is_dir():
txt_files = [f for f in os.listdir(txt_labels_path) if f.endswith(".txt")]
if txt_files:
return txt_labels_path, "txt"
logger.warning("No labels found in the specified dataset path and phase name.")
return [], None
def create_image_metadata(
labels_path: str
) -> Tuple[Dict[int, List], Dict[int, Dict], Dict[str, int]]:
"""
Returns three dictionaries mapping image id to list of annotations,
image id to image information, and image name to image id.
Image id is the `int` `id` assigned to a image in the COCO formatted .json file.
Args:
labels_path (str): The path to the annotation json file.
Returns:
(annotations_dict, image_info_dict, image_name_to_id_dict):
annotations_dict is a dictionary where keys are image ids and values
are lists of annotation dictionaries.
image_info_dict is a dictionary where keys are image file id and
values are image information dictionaries.
image_name_to_id_dict is a dictionary with image name without
extension as key and int image id as value.
"""
with open(labels_path, "r") as file:
json_data = json.load(file)
image_name_to_id_dict = {
Path(img["file_name"]).name: img['id'] for img in json_data["images"]
}
id_to_idx = discretize_categories(json_data.get("categories", [])) if "categories" in json_data else None
annotations_dict = organize_annotations_by_image(json_data, id_to_idx) # check lookup is a good name?
image_info_dict = {img['id']: img for img in json_data["images"]}
return annotations_dict, image_info_dict, image_name_to_id_dict
def organize_annotations_by_image(
json_data: Dict[str, Any],
category_id_to_idx: Optional[Dict[int, int]],
) -> dict[int, list[dict]]:
"""
Returns a dict mapping image id to a list of all corresponding annotations.
Annotations with "iscrowd" set to True, are excluded. Image id is the `int`
`image_id` in the corresponding annotation dict stored in the
COCO formatted .json file.
Args:
json_data: Data read from a COCO json file.
category_id_to_idx: For COCO dataset, a dict mapping from category_id
to (category_id - 1).
Returns:
image_name_to_annotation_dict_list: A dictionary where keys are image ids
and values are lists of annotation dictionaries.
"""
image_id_to_annotation_dict_list = {}
for annotation_dict in json_data["annotations"]:
if annotation_dict["iscrowd"]:
continue
image_id = annotation_dict["image_id"]
if category_id_to_idx:
annotation_dict["category_id"] = category_id_to_idx[annotation_dict["category_id"]]
if image_id not in image_id_to_annotation_dict_list:
image_id_to_annotation_dict_list[image_id] = []
image_id_to_annotation_dict_list[image_id].append(annotation_dict)
return image_id_to_annotation_dict_list
def scale_segmentation(
annotations: List[Dict[str, Any]], image_dimensions: Dict[str, int]
) -> Optional[List[List[float]]]:
"""
Scale the segmentation data based on image dimensions and return a list of scaled segmentation data.
Args:
annotations (List[Dict[str, Any]]): A list of annotation dictionaries.
image_dimensions (Dict[str, int]): A dictionary containing image dimensions (height and width).
Returns:
Optional[List[List[float]]]: A list of scaled segmentation data, where each sublist contains category_id followed by scaled (x, y) coordinates.
"""
if annotations is None:
return None
seg_array_with_cat = []
h, w = image_dimensions["height"], image_dimensions["width"]
for anno in annotations:
category_id = anno["category_id"]
if "segmentation" in anno:
seg_list = [item for sublist in anno["segmentation"] for item in sublist]
elif "bbox" in anno:
seg_list = anno["bbox"]
scaled_seg_data = (
np.array(seg_list).reshape(-1, 2) / [w, h]
).tolist() # make the list group in x, y pairs and scaled with image width, height
scaled_flat_seg_data = [category_id] + list(chain(*scaled_seg_data)) # flatten the scaled_seg_data list
seg_array_with_cat.append(scaled_flat_seg_data)
return seg_array_with_cat