icatcher_plus/src/icatcher/options.py at 76464a51999d772e7e601976103fc6f607664240 · icatcherplus/icatcher_plus · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
import argparse
from pathlib import Path
from . import version
from pathos.helpers import cpu_count


def parse_arguments(my_string=None):
    """
    parse command line arguments
    :param my_string: if provided, will parse this string instead of command line arguments
    :return: parsed arguments
    """
    parser = argparse.ArgumentParser(prog="icatcher")
    parser.add_argument(
        "source",
        type=str,
        help="The source to use (path to video file, folder or webcam id). Required unless launching icatcher app (use `icatcher --app`).",
        nargs="?",
        default=None,
    )
    parser.add_argument(
        "-a",
        "--app",
        action="store_true",
        help="Launch iCatcher+ web app.",
    )
    parser.add_argument(
        "--model",
        type=str,
        default="icatcher+_lookit_regnet.pth",
        choices=[
            "icatcher+_lookit.pth",
            "icatcher+_lookit_regnet.pth",
            "icatcher+_bw-cali.pth",
            "icatcher+_senegal.pth",
        ],
        help="Model file that will be used for gaze detection.",
    )
    parser.add_argument(
        "--fd_model",
        type=str,
        choices=["retinaface", "opencv_dnn"],
        default="retinaface",
        help="The face detector model used. opencv_dnn may be more suitable for cpu usage if speed is priority over accuracy.",
    )
    parser.add_argument(
        "--use_fc_model",
        action="store_true",
        help="If supplied, will use face classifier "
        "to decide which crop to use from every frame.",
    )
    parser.add_argument(
        "--fc_model",
        type=str,
        default="face_classifier_lookit.pth",
        choices=[
            "face_classifier_lookit.pth",
            "face_classifier_cali-bw.pth",
            "face_classifier_senegal.pth",
        ],
        help="Face classifier model file that will be used for deciding "
        "which crop should we select from every frame.",
    )
    parser.add_argument(
        "--source_type",
        type=str,
        default="file",
        choices=["file", "webcam"],
        help="Selects source of stream to use.",
    )
    parser.add_argument(
        "--crop_percent",
        type=int,
        default=0,
        help="A percent to crop video frames to prevent other people from appearing.",
    )
    parser.add_argument(
        "--crop_mode",
        type=str,
        choices=["top", "left", "right"],
        nargs="+",
        default=["top"],
        help="Where to crop video from, multi-choice.",
    )
    parser.add_argument(
        "--show_output",
        action="store_true",
        help="Show results online in a separate window.",
    )
    parser.add_argument(
        "--output_annotation", type=str, help="Folder to output annotations to."
    )
    parser.add_argument(
        "--overwrite",
        action="store_true",
        help="If an output annotation file exists, will overwrite it. Without this flag iCatcher+ will terminate upon encountering an existing annotation file.",
    )
    parser.add_argument(
        "--on_off",
        action="store_true",
        help="Left/right/away annotations will be swapped with on/off.",
    )
    parser.add_argument(
        "--mirror_annotation",
        action="store_true",
        help="Left will be swapped with right, and right will be swapped with left.",
    )
    parser.add_argument(
        "--output_format",
        type=str,
        default="raw_output",
        choices=["raw_output", "compressed", "ui"],
        help="Selects output format.",
    )
    parser.add_argument(
        "--output_video_path",
        help="If present, annotated video will be saved to this folder.",
    )
    parser.add_argument(
        "--pic_in_pic",
        action="store_true",
        help="If present, a mini picture with detections will be shown in the output video.",
    )
    parser.add_argument(
        "--output_file_suffix", type=str, default=".txt", help="The output file suffix."
    )
    parser.add_argument(
        "--per_channel_mean",
        nargs=3,
        metavar=("Channel1_mean", "Channel2_mean", "Channel3_mean"),
        type=float,
        default=[0.485, 0.456, 0.406],
        help="Supply custom per-channel mean of data for normalization.",
    )
    parser.add_argument(
        "--per_channel_std",
        nargs=3,
        metavar=("Channel1_std", "Channel2_std", "Channel3_std"),
        type=float,
        default=[0.229, 0.224, 0.225],
        help="Supply custom per-channel std of data for normalization.",
    )
    parser.add_argument(
        "--gpu_id", type=int, default=-1, help="GPU id to use, use -1 for CPU."
    )
    parser.add_argument("--log", help="If present, writes log to this path")
    parser.add_argument(
        "--verbosity",
        type=str,
        choices=["debug", "info", "warning"],
        default="info",
        help="Selects verbosity level.",
    )
    parser.add_argument(
        "--video_filter",
        type=str,
        help="Provided file will be used to filter only test videos,"
        " will assume certain file structure using the lookit/cali-bw/senegal datasets.",
    )
    parser.add_argument(
        "--illegal_transitions_path",
        type=str,
        help="Path to CSV with illegal transitions to 'smooth' over.",
    )
    parser.add_argument("--version", action="version", version="%(prog)s " + version)
    # face detection options:
    parser.add_argument(
        "--fd_confidence_threshold",
        type=float,
        help="The score confidence threshold that needs to be met for a face to be detected.",
    )
    parser.add_argument(
        "--fd_parallel_processing",
        action="store_true",
        default=False,
        help="(cpu, retinaface only) face detection will be parallelized, by batching the frames (requires buffering them), increasing memory usage, but decreasing overall processing time. Disallows live stream of results.",
    )
    parser.add_argument(
        "--fd_num_cpus",
        type=int,
        default=-1,
        help="(cpu, retinaface only) amount of cpus to use if face detection parallel processing is true (-1: use all available cpus)).",
    )
    parser.add_argument(
        "--fd_batch_size",
        type=int,
        default=16,
        help="(cpu, retinaface only) amount of frames fed at once into face detector if parallel processing is true.",
    )
    parser.add_argument(
        "--fd_skip_frames",
        type=int,
        default=0,
        help="(cpu, retinaface only) amount of frames to skip between each face detection if parallel processing is true. previous bbox will be used.",
    )
    parser.add_argument(
        "--track_face",
        action="store_true",
        help="If detection is lost, will keep track of face using last known position. WARNING: untested experimental feature.",
    )
    parser.add_argument(
        "--image_size",
        type=int,
        default=100,
        help="All images will be resized to this size. WARNING: changing default results in untested behavior.",
    )
    parser.add_argument(
        "--sliding_window_size",
        type=int,
        default=9,
        help="Number of frames in rolling window of each datapoint. WARNING: changing default results in untested behavior.",
    )
    parser.add_argument(
        "--window_stride",
        type=int,
        default=2,
        help="Stride between frames in rolling window. WARNING: changing default results in untested behavior.",
    )
    if my_string is not None:
        args = parser.parse_args(my_string.split())
    else:
        args = parser.parse_args()
    if args.source is None and args.app == False:
        raise ValueError(
            """
            \nMust either rup app (--app, -a) or specify a video to process.
            \nTry `icatcher --help` for more information.
            """
        )
    if args.source is not None and args.app == True:
        raise ValueError(
            "Cannot run app (--app, -a) and process video with one command. Please run separately."
        )
    if (
        args.fd_confidence_threshold is None
    ):  # set defaults outside argparse to avoid complication
        if args.fd_model == "retinaface":
            args.fd_confidence_threshold = 0.9
        elif args.fd_model == "opencv_dnn":
            args.fd_confidence_threshold = 0.7
    if args.crop_percent not in [x for x in range(100)]:
        raise ValueError("crop_video must be a percent between 0 - 99")
    if "left" in args.crop_mode and "right" in args.crop_mode:
        if args.crop_percent > 49:
            raise ValueError(
                "crop_video must be a percent between 0 - 49 when cropping both sides"
            )
    if args.video_filter:
        args.video_filter = Path(args.video_filter)
        if not args.video_filter.is_file() and not args.video_filter.is_dir():
            raise FileNotFoundError("Video filter is not a file or a folder")
    if args.output_annotation:
        args.output_annotation = Path(args.output_annotation)
        args.output_annotation.mkdir(exist_ok=True, parents=True)
    if args.output_video_path:
        args.output_video_path = Path(args.output_video_path)
        args.output_video_path.mkdir(exist_ok=True, parents=True)
    if args.log:
        args.log = Path(args.log)
    if args.on_off:
        if args.output_format != "raw_output":
            raise ValueError(
                "On off mode can only be used with raw output format. Pass raw_output with the --output_format flag."
            )
    if args.sliding_window_size % 2 == 0:
        raise ValueError("sliding_window_size must be odd.")
    if not args.per_channel_mean:
        args.per_channel_mean = [0.485, 0.456, 0.406]
    if not args.per_channel_std:
        args.per_channel_std = [0.229, 0.224, 0.225]
    if args.gpu_id == -1:
        args.device = "cpu"
    else:
        import os
        import torch

        if torch.cuda.is_available():
            os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id)
            args.device = f"cuda:{args.gpu_id}"
        else:
            if torch.backends.mps.is_available():
                args.device = f"mps:{args.gpu_id}"
            else:
                raise ValueError("GPU is not available. Was torch compiled with CUDA or MPS?")

    # figure out how many cpus can be used
    use_cpu = True if args.gpu_id == -1 else False
    if use_cpu:
        if args.fd_num_cpus == -1:
            args.fd_num_cpus = cpu_count()
        else:
            if args.fd_num_cpus > cpu_count():
                raise ValueError(
                    "Number of cpus requested is greater than available cpus"
                )
    return args