Skip to content

Commit b98e44b

Browse files
committed
Add support for admin-only evaluation feedback.
Admin-only feedback is either automatically generated by the white diff comparison step, or by outputting an additional line on stderr from the checker that starts with `ADMIN_MESSAGE:`.
1 parent fcb1905 commit b98e44b

17 files changed

Lines changed: 138 additions & 63 deletions

File tree

cms/db/submission.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -766,6 +766,9 @@ class Evaluation(Base):
766766
nullable=False,
767767
default=[])
768768

769+
# Admin-facing output from the grader.
770+
admin_text: str | None = Column(String, nullable=True, default=None)
771+
769772
# Evaluation's time and wall-clock time, in seconds.
770773
execution_time: float | None = Column(
771774
Float,

cms/grading/Job.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ def __init__(
9393
info: str | None = None,
9494
success: bool | None = None,
9595
text: list[str] | None = None,
96+
admin_text: str | None = None,
9697
files: dict[str, File] | None = None,
9798
managers: dict[str, Manager] | None = None,
9899
executables: dict[str, Executable] | None = None,
@@ -121,6 +122,8 @@ def __init__(
121122
to be presented to the user. The first item is a string,
122123
potentially with %-escaping; the following items are the
123124
values to be %-formatted into the first.
125+
admin_text: description of the outcome of the job,
126+
to be shown to admins.
124127
files: files submitted by the user.
125128
managers: managers provided by the admins.
126129
executables: executables created in the compilation.
@@ -155,6 +158,7 @@ def __init__(
155158

156159
self.success = success
157160
self.text = text
161+
self.admin_text = admin_text
158162

159163
self.files = files
160164
self.managers = managers
@@ -178,6 +182,7 @@ def export_to_dict(self) -> dict:
178182
'info': self.info,
179183
'success': self.success,
180184
'text': self.text,
185+
'admin_text': self.admin_text,
181186
'files': dict((k, v.digest)
182187
for k, v in self.files.items()),
183188
'managers': dict((k, v.digest)
@@ -316,6 +321,7 @@ def __init__(
316321
compilation_success: bool | None = None,
317322
executables: dict[str, Executable] | None = None,
318323
text: list[str] | None = None,
324+
admin_text: str | None = None,
319325
plus: dict | None = None,
320326
):
321327
"""Initialization.
@@ -331,7 +337,7 @@ def __init__(
331337
Job.__init__(self, operation, task_type, task_type_parameters,
332338
language, multithreaded_sandbox, archive_sandbox,
333339
shard, keep_sandbox, sandboxes, sandbox_digests, info, success,
334-
text, files, managers, executables)
340+
text, admin_text, files, managers, executables)
335341
self.compilation_success = compilation_success
336342
self.plus = plus
337343

@@ -537,6 +543,7 @@ def __init__(
537543
success: bool | None = None,
538544
outcome: str | None = None,
539545
text: list[str] | None = None,
546+
admin_text: str | None = None,
540547
user_output: str | None = None,
541548
plus: dict | None = None,
542549
only_execution: bool | None = False,
@@ -567,7 +574,7 @@ def __init__(
567574
Job.__init__(self, operation, task_type, task_type_parameters,
568575
language, multithreaded_sandbox, archive_sandbox,
569576
shard, keep_sandbox, sandboxes, sandbox_digests, info, success,
570-
text, files, managers, executables)
577+
text, admin_text, files, managers, executables)
571578
self.input = input
572579
self.output = output
573580
self.time_limit = time_limit
@@ -653,6 +660,7 @@ def to_submission(self, sr: SubmissionResult):
653660

654661
sr.evaluations += [Evaluation(
655662
text=self.text,
663+
admin_text=self.admin_text,
656664
outcome=self.outcome,
657665
execution_time=self.plus.get('execution_time'),
658666
execution_wall_clock_time=self.plus.get(

cms/grading/scoretypes/abc.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,8 @@ def get_html_details(
144144
translation=translation,
145145
gettext=_, ngettext=n_)
146146
except Exception:
147-
logger.error("Found an invalid score details string. "
148-
"Try invalidating scores.")
147+
logger.exception("Found an invalid score details string. "
148+
"Try invalidating scores.")
149149
return _("Score details temporarily unavailable.")
150150

151151
@abstractmethod

cms/grading/steps/trusted.py

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -77,13 +77,14 @@ def _sanitize_message(string: str) -> str:
7777
return string.replace('%', '%%')
7878

7979

80-
def extract_outcome_and_text(sandbox: Sandbox) -> tuple[float, list[str]]:
80+
def extract_outcome_and_text(sandbox: Sandbox) -> tuple[float, list[str], str | None]:
8181
"""Extract the outcome and the text from the a standard manager output.
8282
8383
sandbox: the sandbox whose last execution was a manager writing
8484
a standard manager output.
8585
86-
return: outcome and text.
86+
return: outcome, contestant-facing text and admin-facing text
87+
(not translated).
8788
8889
raise (ValueError): if cannot decode the data.
8990
raise (FileNotFoundError): if any of the sandbox stdout or stderr file
@@ -108,6 +109,23 @@ def extract_outcome_and_text(sandbox: Sandbox) -> tuple[float, list[str]]:
108109
logger.error("Manager stderr (text) is malformed. %r", error)
109110
raise error
110111

112+
# Parse special commands
113+
admin_text = None
114+
for line in stderr_file.readlines():
115+
line = line.strip()
116+
if not line:
117+
continue
118+
119+
PREFIX = "ADMIN_MESSAGE:"
120+
if line.startswith(PREFIX):
121+
line = _sanitize_message(line[len(PREFIX):].strip())
122+
if admin_text is not None:
123+
admin_text = admin_text + " " + line
124+
else:
125+
admin_text = line
126+
else:
127+
logger.warning(f"Unknown special manager command `{line}`")
128+
111129
try:
112130
outcome = float(outcome)
113131
except ValueError:
@@ -125,7 +143,7 @@ def extract_outcome_and_text(sandbox: Sandbox) -> tuple[float, list[str]]:
125143
logger.warning("Manager asked to translate text, but string "
126144
"'%s' is not recognized." % remaining)
127145

128-
return outcome, [text]
146+
return outcome, [text], admin_text
129147

130148

131149
def trusted_step(
@@ -196,7 +214,7 @@ def checker_step(
196214
correct_output_digest: str,
197215
output_filename: str,
198216
extra_args: list[str] | None = None
199-
) -> tuple[bool, float | None, list[str] | None]:
217+
) -> tuple[bool, float | None, list[str] | None, str | None]:
200218
"""Run the explicit checker given by the admins
201219
202220
sandbox: the sandbox to run the checker in; should already
@@ -213,7 +231,8 @@ def checker_step(
213231
extra_args: extra arguments to pass to the checker.
214232
215233
return: success (true if the checker was able to check the solution
216-
successfully), outcome and text (both None if success is False).
234+
successfully), outcome, text and admin_text (all None if success
235+
is False).
217236
218237
"""
219238
# Check that the file we are going to inject in the sandbox are not already
@@ -224,12 +243,12 @@ def checker_step(
224243
if sandbox.file_exists(filename):
225244
logger.error("File %s already in the sandbox for the checker.",
226245
filename)
227-
return False, None, None
246+
return False, None, None, None
228247

229248
# Copy the checker in the sandbox, after making sure it was provided.
230249
if checker_digest is None:
231250
logger.error("Configuration error: missing checker in task managers.")
232-
return False, None, None
251+
return False, None, None, None
233252
sandbox.create_file_from_storage(CHECKER_FILENAME, checker_digest,
234253
executable=True)
235254

@@ -247,17 +266,17 @@ def checker_step(
247266
if not box_success or not success:
248267
logger.error("Sandbox failed during checker step. "
249268
"See previous logs for the reason.")
250-
return False, None, None
269+
return False, None, None, None
251270

252271
# Extract outcome and text assuming a standard manager output.
253272
try:
254-
outcome, text = extract_outcome_and_text(sandbox)
273+
outcome, text, admin_text = extract_outcome_and_text(sandbox)
255274
except ValueError as e:
256275
logger.error("Invalid output from checker: %s", e)
257-
return False, None, None
276+
return False, None, None, None
258277
except FileNotFoundError as e:
259278
# This should not happen, as the redirect is handled by the sandbox.
260279
logger.error("Missing stdout or stderr file from checker: %s", e)
261-
return False, None, None
280+
return False, None, None, None
262281

263-
return True, outcome, text
282+
return True, outcome, text, admin_text

cms/grading/steps/whitediff.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def _white_diff_canonicalize(string: bytes) -> bytes:
7272
return string
7373

7474

75-
def _white_diff(output: typing.BinaryIO, res: typing.BinaryIO) -> bool:
75+
def _white_diff(output: typing.BinaryIO, res: typing.BinaryIO) -> tuple[bool, str | None]:
7676
"""Compare the two output files. Two files are equal if for every
7777
integer i, line i of first file is equal to line i of second
7878
file. Two lines are equal if they differ only by number or type of
@@ -89,33 +89,45 @@ def _white_diff(output: typing.BinaryIO, res: typing.BinaryIO) -> bool:
8989
9090
"""
9191

92+
line = 0
93+
9294
while True:
9395
lout = output.readline()
9496
lres = res.readline()
97+
line += 1
9598

9699
# Both files finished: comparison succeded
97100
if len(lres) == 0 and len(lout) == 0:
98-
return True
101+
return True, None
99102

100103
# Only one file finished: ok if the other contains only blanks
101104
elif len(lres) == 0 or len(lout) == 0:
102105
lout = lout.strip(b''.join(_WHITES))
103106
lres = lres.strip(b''.join(_WHITES))
104-
if len(lout) > 0 or len(lres) > 0:
105-
return False
107+
if len(lout) > 0:
108+
return False, "Contestant output too long"
109+
if len(lres) > 0:
110+
return False, "Contestant output too short"
106111

107112
# Both file still have lines to go: ok if they agree except
108113
# for the number of whitespaces
109114
else:
110115
lout = _white_diff_canonicalize(lout)
111116
lres = _white_diff_canonicalize(lres)
112117
if lout != lres:
113-
return False
118+
LENGTH_LIMIT = 100
119+
if len(lout) > LENGTH_LIMIT:
120+
lout = lout[:LENGTH_LIMIT] + b"..."
121+
if len(lres) > LENGTH_LIMIT:
122+
lres = lres[:LENGTH_LIMIT] + b"..."
123+
lout = lout.decode("utf-8", errors='backslashreplace')
124+
lres = lres.decode("utf-8", errors='backslashreplace')
125+
return False, f"Expected `{lres}`, found `{lout}` on line {line}"
114126

115127

116128
def white_diff_fobj_step(
117129
output_fobj: typing.BinaryIO, correct_output_fobj: typing.BinaryIO
118-
) -> tuple[float, list[str]]:
130+
) -> tuple[float, list[str], str | None]:
119131
"""Compare user output and correct output with a simple diff.
120132
121133
It gives an outcome 1.0 if the output and the reference output are
@@ -129,15 +141,16 @@ def white_diff_fobj_step(
129141
return: the outcome as above and a description text.
130142
131143
"""
132-
if _white_diff(output_fobj, correct_output_fobj):
133-
return 1.0, [EVALUATION_MESSAGES.get("success").message]
144+
correct, admin_text = _white_diff(output_fobj, correct_output_fobj)
145+
if correct:
146+
return 1.0, [EVALUATION_MESSAGES.get("success").message], admin_text
134147
else:
135-
return 0.0, [EVALUATION_MESSAGES.get("wrong").message]
148+
return 0.0, [EVALUATION_MESSAGES.get("wrong").message], admin_text
136149

137150

138151
def white_diff_step(
139152
sandbox: Sandbox, output_filename: str, correct_output_filename: str
140-
) -> tuple[float, list[str]]:
153+
) -> tuple[float, list[str], str | None]:
141154
"""Compare user output and correct output with a simple diff.
142155
143156
It gives an outcome 1.0 if the output and the reference output are
@@ -157,4 +170,4 @@ def white_diff_step(
157170
return white_diff_fobj_step(out_file, res_file)
158171
else:
159172
return 0.0, [
160-
EVALUATION_MESSAGES.get("nooutput").message, output_filename]
173+
EVALUATION_MESSAGES.get("nooutput").message, output_filename], None

cms/grading/tasktypes/Batch.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -364,10 +364,12 @@ def _execution_step(self, job, file_cacher):
364364
return outcome, text, output_file_params, stats, box_success, sandbox
365365

366366
def _evaluate_step(self, job, file_cacher, output_file_params, outcome, text, stats, box_success, sandbox, extra_args):
367+
admin_text = None
368+
367369
if box_success:
368370
assert (output_file_params is None) == (outcome is not None)
369371
if output_file_params is not None:
370-
box_success, outcome, text = eval_output(
372+
box_success, outcome, text, admin_text = eval_output(
371373
file_cacher, job,
372374
self.CHECKER_CODENAME
373375
if self._uses_checker() else None,
@@ -378,6 +380,7 @@ def _evaluate_step(self, job, file_cacher, output_file_params, outcome, text, st
378380
job.outcome = str(outcome) if outcome is not None else None
379381
job.text = text
380382
job.plus = stats
383+
job.admin_text = admin_text
381384

382385
if sandbox is not None:
383386
delete_sandbox(sandbox, job)

cms/grading/tasktypes/Communication.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,7 @@ def evaluate(self, job, file_cacher):
396396
and box_success_mgr and evaluation_success_mgr
397397
outcome = None
398398
text = None
399+
admin_text = None
399400

400401
# If at least one sandbox had problems, or the manager did not
401402
# terminate correctly, we report an error (and no need for user stats).
@@ -415,7 +416,7 @@ def evaluate(self, job, file_cacher):
415416

416417
# Otherwise, we use the manager to obtain the outcome.
417418
else:
418-
outcome, text = extract_outcome_and_text(sandbox_mgr)
419+
outcome, text, admin_text = extract_outcome_and_text(sandbox_mgr)
419420

420421
# If asked so, save the output file with additional information,
421422
# provided that it exists.
@@ -433,6 +434,7 @@ def evaluate(self, job, file_cacher):
433434
job.outcome = "%s" % outcome if outcome is not None else None
434435
job.text = text
435436
job.plus = stats_user
437+
job.admin_text = admin_text
436438

437439
delete_sandbox(sandbox_mgr, job)
438440
for s in sandbox_user:

cms/grading/tasktypes/OutputOnly.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def evaluate(self, job, file_cacher):
124124
return
125125

126126
# First and only step: eval the user output.
127-
box_success, outcome, text = eval_output(
127+
box_success, outcome, text, admin_text = eval_output(
128128
file_cacher, job,
129129
OutputOnly.CHECKER_CODENAME if self._uses_checker() else None,
130130
user_output_digest=job.files[user_output_filename].digest)
@@ -133,5 +133,6 @@ def evaluate(self, job, file_cacher):
133133
job.success = box_success
134134
job.outcome = str(outcome) if outcome is not None else None
135135
job.text = text
136+
job.admin_text = admin_text
136137
# There is no actual evaluation, so no statistics.
137138
job.plus = {} if box_success else None

cms/grading/tasktypes/TwoSteps.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,7 @@ def evaluate(self, job, file_cacher):
295295

296296
outcome = None
297297
text = None
298+
admin_text = None
298299

299300
# Error in the sandbox: nothing to do!
300301
if not box_success:
@@ -333,7 +334,7 @@ def evaluate(self, job, file_cacher):
333334

334335
# Otherwise evaluate the output file.
335336
else:
336-
box_success, outcome, text = eval_output(
337+
box_success, outcome, text, admin_text = eval_output(
337338
file_cacher, job,
338339
TwoSteps.CHECKER_CODENAME
339340
if self._uses_checker() else None,
@@ -344,6 +345,7 @@ def evaluate(self, job, file_cacher):
344345
job.success = box_success
345346
job.outcome = str(outcome) if outcome is not None else None
346347
job.text = text
348+
job.admin_text = admin_text
347349
job.plus = stats
348350

349351
delete_sandbox(first_sandbox, job)

0 commit comments

Comments
 (0)