From 268dab4a8e87262584b1a92966945e0beed62f3f Mon Sep 17 00:00:00 2001 From: Bryan Call Date: Thu, 19 Feb 2026 14:39:08 -0800 Subject: [PATCH 1/6] Fix difference_msec epoch leak when start milestone is unset difference_msec() only checked whether ms_end was zero (unset). When ms_start was unset (zero) but ms_end was set, the subtraction produced a large epoch-relative value instead of returning the missing sentinel. Check both milestones before computing the delta. --- include/proxy/Milestones.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/proxy/Milestones.h b/include/proxy/Milestones.h index 622b5738ae8..8f681357397 100644 --- a/include/proxy/Milestones.h +++ b/include/proxy/Milestones.h @@ -64,7 +64,7 @@ template class Milestones int64_t difference_msec(T ms_start, T ms_end, int64_t missing = -1) const // Return "missing" when Milestone is not set { - if (this->_milestones[static_cast(ms_end)] == 0) { + if (this->_milestones[static_cast(ms_start)] == 0 || this->_milestones[static_cast(ms_end)] == 0) { return missing; } return ink_hrtime_to_msec(this->_milestones[static_cast(ms_end)] - this->_milestones[static_cast(ms_start)]); From 8f80c83640cd41241fe18a99eda08b1865a7dadf Mon Sep 17 00:00:00 2001 From: Bryan Call Date: Thu, 19 Feb 2026 14:40:25 -0800 Subject: [PATCH 2/6] Fix: set UA_BEGIN_WRITE milestone unconditionally Mark TS_MILESTONE_UA_BEGIN_WRITE in the SERVER_READ and SERVE_FROM_CACHE paths of handle_api_return(). Previously this milestone was only set in the API_SEND_RESPONSE_HDR hook path, which requires a plugin to hook TS_HTTP_SEND_RESPONSE_HDR_HOOK. Without plugins the milestone stayed at zero, causing msdms log fields that reference UA_BEGIN_WRITE (c_ttfb, o_proc, o_body, hit_proc, hit_xfer) to report -1. --- src/proxy/http/HttpSM.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/proxy/http/HttpSM.cc b/src/proxy/http/HttpSM.cc index c6fd71a3cf4..4fbc532726e 100644 --- a/src/proxy/http/HttpSM.cc +++ b/src/proxy/http/HttpSM.cc @@ -1691,6 +1691,7 @@ HttpSM::handle_api_return() break; } case HttpTransact::StateMachineAction_t::SERVER_READ: { + milestones.mark(TS_MILESTONE_UA_BEGIN_WRITE); if (unlikely(t_state.did_upgrade_succeed)) { // We've successfully handled the upgrade, let's now setup // a blind tunnel. @@ -1727,6 +1728,7 @@ HttpSM::handle_api_return() break; } case HttpTransact::StateMachineAction_t::SERVE_FROM_CACHE: { + milestones.mark(TS_MILESTONE_UA_BEGIN_WRITE); HttpTunnelProducer *p = setup_cache_read_transfer(); tunnel.tunnel_run(p); break; From f7f9e447c1a94530d0bb3b967c2b24262f84c462 Mon Sep 17 00:00:00 2001 From: Bryan Call Date: Thu, 19 Feb 2026 14:43:21 -0800 Subject: [PATCH 3/6] Test: add milestone logging (msdms) field validation autest New end-to-end test that sends a cacheable request twice (miss then hit) and validates all Phase 1 msdms timing fields in the resulting log: - All 16 key=value fields present on every line - Values are integers with no epoch-length garbage - Cache miss: origin-phase chain sums to c_ttfb within 2ms - Cache hit: hit_proc and hit_xfer are non-negative Includes a reusable verify_milestone_fields.py validation script. Depends on #12897 (difference_msec fix) and #12898 (UA_BEGIN_WRITE fix). --- .../logging/log-milestone-fields.test.py | 202 +++++++++++++++++ .../logging/verify_milestone_fields.py | 211 ++++++++++++++++++ 2 files changed, 413 insertions(+) create mode 100644 tests/gold_tests/logging/log-milestone-fields.test.py create mode 100644 tests/gold_tests/logging/verify_milestone_fields.py diff --git a/tests/gold_tests/logging/log-milestone-fields.test.py b/tests/gold_tests/logging/log-milestone-fields.test.py new file mode 100644 index 00000000000..7dc01e11f89 --- /dev/null +++ b/tests/gold_tests/logging/log-milestone-fields.test.py @@ -0,0 +1,202 @@ +''' +Verify that msdms milestone difference fields produce valid output for both +cache-miss and cache-hit transactions. This exercises the Phase 1 timing +fields proposed for the squid.log local_disk format. +''' +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +Test.Summary = 'Verify msdms milestone logging fields for cache miss and cache hit paths' +Test.ContinueOnFail = True + + +class MilestoneFieldsTest: + """ + Sends two requests for the same cacheable URL: the first is a cache miss + that populates the cache, the second is a cache hit served from RAM cache. + A custom log format records all Phase 1 milestone timing fields plus the + cache result code. A validation script then parses the log and checks: + + - Every expected key=value pair is present on each line + - All values are integers (>= 0 or -1 for unset milestones) + - Cache miss line: ms > 0, origin-phase fields present + - Cache hit line: hit_proc >= 0 and hit_xfer >= 0 + - No epoch-length garbage values (> 1_000_000_000) + """ + + # All Phase 1 msdms fields plus ms and cache result code for identification. + LOG_FORMAT = ( + 'crc=% ms=%' + ' c_ttfb=%<{TS_MILESTONE_UA_BEGIN_WRITE-TS_MILESTONE_SM_START}msdms>' + ' c_tls=%<{TS_MILESTONE_TLS_HANDSHAKE_END-TS_MILESTONE_TLS_HANDSHAKE_START}msdms>' + ' c_hdr=%<{TS_MILESTONE_UA_READ_HEADER_DONE-TS_MILESTONE_SM_START}msdms>' + ' c_proc=%<{TS_MILESTONE_CACHE_OPEN_READ_BEGIN-TS_MILESTONE_UA_READ_HEADER_DONE}msdms>' + ' cache=%<{TS_MILESTONE_CACHE_OPEN_READ_END-TS_MILESTONE_CACHE_OPEN_READ_BEGIN}msdms>' + ' dns=%<{TS_MILESTONE_SERVER_FIRST_CONNECT-TS_MILESTONE_CACHE_OPEN_READ_END}msdms>' + ' o_tcp=%<{TS_MILESTONE_SERVER_CONNECT_END-TS_MILESTONE_SERVER_FIRST_CONNECT}msdms>' + ' o_wait=%<{TS_MILESTONE_SERVER_FIRST_READ-TS_MILESTONE_SERVER_CONNECT_END}msdms>' + ' o_hdr=%<{TS_MILESTONE_SERVER_READ_HEADER_DONE-TS_MILESTONE_SERVER_FIRST_READ}msdms>' + ' o_proc=%<{TS_MILESTONE_UA_BEGIN_WRITE-TS_MILESTONE_SERVER_READ_HEADER_DONE}msdms>' + ' o_body=%<{TS_MILESTONE_SERVER_CLOSE-TS_MILESTONE_UA_BEGIN_WRITE}msdms>' + ' c_xfer=%<{TS_MILESTONE_SM_FINISH-TS_MILESTONE_SERVER_CLOSE}msdms>' + ' hit_proc=%<{TS_MILESTONE_UA_BEGIN_WRITE-TS_MILESTONE_CACHE_OPEN_READ_END}msdms>' + ' hit_xfer=%<{TS_MILESTONE_SM_FINISH-TS_MILESTONE_UA_BEGIN_WRITE}msdms>') + + MISS_FIELDS = [ + 'ms', + 'c_ttfb', + 'c_tls', + 'c_hdr', + 'c_proc', + 'cache', + 'dns', + 'o_tcp', + 'o_wait', + 'o_hdr', + 'o_proc', + 'o_body', + 'c_xfer', + 'hit_proc', + 'hit_xfer', + ] + + HIT_FIELDS = [ + 'ms', + 'c_ttfb', + 'c_tls', + 'c_hdr', + 'c_proc', + 'cache', + 'hit_proc', + 'hit_xfer', + ] + + def __init__(self): + self._server = Test.MakeOriginServer("server") + self._nameserver = Test.MakeDNServer("dns", default='127.0.0.1') + self._setupOriginServer() + self._setupTS() + + def _setupOriginServer(self): + self._server.addResponse( + "sessionlog.json", + { + 'timestamp': 100, + "headers": "GET /cacheable HTTP/1.1\r\nHost: example.com\r\n\r\n", + "body": "", + }, + { + 'timestamp': 100, + "headers": + ( + "HTTP/1.1 200 OK\r\n" + "Content-Type: text/plain\r\n" + "Cache-Control: max-age=300\r\n" + "Connection: close\r\n" + "\r\n"), + "body": "This is a cacheable response body for milestone testing.", + }, + ) + + def _setupTS(self): + self._ts = Test.MakeATSProcess("ts", enable_cache=True) + + self._ts.Disk.records_config.update( + { + 'proxy.config.diags.debug.enabled': 1, + 'proxy.config.diags.debug.tags': 'http|log', + 'proxy.config.dns.nameservers': f'127.0.0.1:{self._nameserver.Variables.Port}', + 'proxy.config.dns.resolv_conf': 'NULL', + 'proxy.config.http.cache.http': 1, + 'proxy.config.log.max_secs_per_buffer': 1, + }) + + self._ts.Disk.remap_config.AddLine(f'map / http://127.0.0.1:{self._server.Variables.Port}/') + + self._ts.Disk.logging_yaml.AddLines( + f''' +logging: + formats: + - name: milestone_test + format: '{self.LOG_FORMAT}' + logs: + - filename: milestone_fields + format: milestone_test + mode: ascii +'''.split("\n")) + + @property + def _log_path(self) -> str: + return os.path.join(self._ts.Variables.LOGDIR, 'milestone_fields.log') + + @property + def _validate_script(self) -> str: + return os.path.join(Test.TestDirectory, 'verify_milestone_fields.py') + + def run(self): + self._sendCacheMiss() + self._waitForCacheIO() + self._sendCacheHit() + self._waitForLog() + self._validateLog() + + def _sendCacheMiss(self): + tr = Test.AddTestRun('Cache miss request') + tr.Processes.Default.StartBefore(self._server) + tr.Processes.Default.StartBefore(self._nameserver) + tr.Processes.Default.StartBefore(self._ts) + tr.MakeCurlCommand( + f'--verbose --header "Host: example.com" ' + f'http://127.0.0.1:{self._ts.Variables.port}/cacheable', ts=self._ts) + tr.Processes.Default.ReturnCode = 0 + tr.StillRunningAfter = self._server + tr.StillRunningAfter = self._ts + + def _waitForCacheIO(self): + tr = Test.AddTestRun('Wait for cache write to complete') + tr.Processes.Default.Command = 'sleep 2' + tr.Processes.Default.ReturnCode = 0 + tr.StillRunningAfter = self._server + tr.StillRunningAfter = self._ts + + def _sendCacheHit(self): + tr = Test.AddTestRun('Cache hit request') + tr.MakeCurlCommand( + f'--verbose --header "Host: example.com" ' + f'http://127.0.0.1:{self._ts.Variables.port}/cacheable', ts=self._ts) + tr.Processes.Default.ReturnCode = 0 + tr.StillRunningAfter = self._server + tr.StillRunningAfter = self._ts + + def _waitForLog(self): + tr = Test.AddTestRun('Wait for log file to be written') + tr.Processes.Default.Command = (os.path.join(Test.Variables.AtsTestToolsDir, 'condwait') + f' 60 1 -f {self._log_path}') + tr.Processes.Default.ReturnCode = 0 + tr.StillRunningAfter = self._server + tr.StillRunningAfter = self._ts + + def _validateLog(self): + tr = Test.AddTestRun('Validate milestone fields in log') + tr.Processes.Default.Command = f'python3 {self._validate_script} {self._log_path}' + tr.Processes.Default.ReturnCode = 0 + tr.Processes.Default.TimeOut = 10 + tr.Processes.Default.Streams.stdout += Testers.ContainsExpression('PASS', 'Validation script should report PASS') + tr.Processes.Default.Streams.stdout += Testers.ExcludesExpression('FAIL', 'Validation script should not report FAIL') + + +MilestoneFieldsTest().run() diff --git a/tests/gold_tests/logging/verify_milestone_fields.py b/tests/gold_tests/logging/verify_milestone_fields.py new file mode 100644 index 00000000000..35a0921d912 --- /dev/null +++ b/tests/gold_tests/logging/verify_milestone_fields.py @@ -0,0 +1,211 @@ +#!/usr/bin/env python3 +''' +Validate milestone timing fields in an ATS log file. + +Parses key=value log lines and checks: + - All expected fields are present + - All values are integers + - No epoch-length garbage (> 1 billion) from the difference_msec bug + - Cache miss lines have ms > 0 and origin-phase fields populated + - Cache hit lines have hit_proc and hit_xfer populated + - The miss-path chain sums to approximately c_ttfb +''' +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +ALL_FIELDS = [ + 'crc', + 'ms', + 'c_ttfb', + 'c_tls', + 'c_hdr', + 'c_proc', + 'cache', + 'dns', + 'o_tcp', + 'o_wait', + 'o_hdr', + 'o_proc', + 'o_body', + 'c_xfer', + 'hit_proc', + 'hit_xfer', +] + +TIMING_FIELDS = [f for f in ALL_FIELDS if f != 'crc'] + +# Fields that form the contiguous miss-path chain to c_ttfb: +# c_ttfb = c_hdr + c_proc + cache + dns + o_conn + o_wait + o_hdr + o_proc +MISS_CHAIN = ['c_hdr', 'c_proc', 'cache', 'dns', 'o_tcp', 'o_wait', 'o_hdr', 'o_proc'] + +EPOCH_THRESHOLD = 1_000_000_000 + + +def parse_line(line: str) -> dict[str, str]: + """Parse a space-separated key=value log line into a dict.""" + fields = {} + for token in line.strip().split(): + if '=' in token: + key, val = token.split('=', 1) + fields[key] = val + return fields + + +def validate_line(fields: dict[str, str], line_num: int) -> list[str]: + """Return a list of error strings (empty = pass).""" + errors = [] + + for name in ALL_FIELDS: + if name not in fields: + errors.append(f'line {line_num}: missing field "{name}"') + + for name in TIMING_FIELDS: + val_str = fields.get(name) + if val_str is None: + continue + # Accept '-' as a valid sentinel for unset milestones. + if val_str == '-': + continue + try: + val = int(val_str) + except ValueError: + errors.append(f'line {line_num}: field "{name}" is not an integer: {val_str!r}') + continue + + if val > EPOCH_THRESHOLD: + errors.append(f'line {line_num}: field "{name}" looks like an epoch leak: {val} ' + f'(> {EPOCH_THRESHOLD})') + + crc = fields.get('crc', '') + is_miss = 'MISS' in crc or 'NONE' in crc + is_hit = 'HIT' in crc and 'MISS' not in crc + + ms_str = fields.get('ms', '0') + try: + ms_val = int(ms_str) + except ValueError: + ms_val = -1 + + if ms_val < 0 and ms_str != '-': + errors.append(f'line {line_num}: ms should be >= 0, got {ms_val}') + + if is_miss: + for name in MISS_CHAIN: + val_str = fields.get(name) + if val_str is None or val_str == '-': + continue + try: + val = int(val_str) + except ValueError: + continue + if val < -1: + errors.append(f'line {line_num}: miss field "{name}" has unexpected value: {val}') + + # Verify chain sum approximates c_ttfb (within tolerance for rounding). + chain_vals = [] + for name in MISS_CHAIN: + val_str = fields.get(name) + if val_str is None or val_str == '-': + chain_vals.append(0) + continue + try: + v = int(val_str) + chain_vals.append(v if v >= 0 else 0) + except ValueError: + chain_vals.append(0) + + chain_sum = sum(chain_vals) + c_ttfb_str = fields.get('c_ttfb') + if c_ttfb_str and c_ttfb_str != '-': + try: + c_ttfb_val = int(c_ttfb_str) + # Allow 2ms tolerance for rounding across multiple sub-millisecond fields. + if c_ttfb_val >= 0 and abs(chain_sum - c_ttfb_val) > 2: + errors.append( + f'line {line_num}: chain sum ({chain_sum}) != c_ttfb ({c_ttfb_val}), ' + f'diff={abs(chain_sum - c_ttfb_val)}ms') + except ValueError: + pass + + if is_hit: + for name in ['hit_proc', 'hit_xfer']: + val_str = fields.get(name) + if val_str is None: + errors.append(f'line {line_num}: cache hit missing field "{name}"') + continue + if val_str == '-': + errors.append(f'line {line_num}: cache hit field "{name}" should not be "-"') + continue + try: + val = int(val_str) + if val < 0: + errors.append(f'line {line_num}: cache hit field "{name}" should be >= 0, got {val}') + except ValueError: + errors.append(f'line {line_num}: cache hit field "{name}" is not an integer: {val_str!r}') + + return errors + + +def main(): + if len(sys.argv) != 2: + print(f'Usage: {sys.argv[0]} ', file=sys.stderr) + sys.exit(1) + + log_path = sys.argv[1] + try: + with open(log_path) as f: + lines = [l for l in f.readlines() if l.strip()] + except FileNotFoundError: + print(f'FAIL: log file not found: {log_path}') + sys.exit(1) + + if len(lines) < 2: + print(f'FAIL: expected at least 2 log lines (miss + hit), got {len(lines)}') + sys.exit(1) + + all_errors = [] + miss_found = False + hit_found = False + + for i, line in enumerate(lines, start=1): + fields = parse_line(line) + crc = fields.get('crc', '') + if 'MISS' in crc: + miss_found = True + if 'HIT' in crc and 'MISS' not in crc: + hit_found = True + errors = validate_line(fields, i) + all_errors.extend(errors) + + if not miss_found: + all_errors.append('No cache miss line found in log') + if not hit_found: + all_errors.append('No cache hit line found in log') + + if all_errors: + for err in all_errors: + print(f'FAIL: {err}') + sys.exit(1) + else: + print(f'PASS: validated {len(lines)} log lines ' + f'(miss={miss_found}, hit={hit_found}), all fields correct') + sys.exit(0) + + +if __name__ == '__main__': + main() From 3a7c71a70300f3f5dd413253a493ce367626e338 Mon Sep 17 00:00:00 2001 From: Bryan Call Date: Thu, 19 Feb 2026 15:25:46 -0800 Subject: [PATCH 4/6] Remove unused MISS_FIELDS and HIT_FIELDS constants These lists were development artifacts not referenced by the test. The validation logic lives in verify_milestone_fields.py which has its own field definitions. --- .../logging/log-milestone-fields.test.py | 29 ------------------- 1 file changed, 29 deletions(-) diff --git a/tests/gold_tests/logging/log-milestone-fields.test.py b/tests/gold_tests/logging/log-milestone-fields.test.py index 7dc01e11f89..e71271b8000 100644 --- a/tests/gold_tests/logging/log-milestone-fields.test.py +++ b/tests/gold_tests/logging/log-milestone-fields.test.py @@ -57,35 +57,6 @@ class MilestoneFieldsTest: ' hit_proc=%<{TS_MILESTONE_UA_BEGIN_WRITE-TS_MILESTONE_CACHE_OPEN_READ_END}msdms>' ' hit_xfer=%<{TS_MILESTONE_SM_FINISH-TS_MILESTONE_UA_BEGIN_WRITE}msdms>') - MISS_FIELDS = [ - 'ms', - 'c_ttfb', - 'c_tls', - 'c_hdr', - 'c_proc', - 'cache', - 'dns', - 'o_tcp', - 'o_wait', - 'o_hdr', - 'o_proc', - 'o_body', - 'c_xfer', - 'hit_proc', - 'hit_xfer', - ] - - HIT_FIELDS = [ - 'ms', - 'c_ttfb', - 'c_tls', - 'c_hdr', - 'c_proc', - 'cache', - 'hit_proc', - 'hit_xfer', - ] - def __init__(self): self._server = Test.MakeOriginServer("server") self._nameserver = Test.MakeDNServer("dns", default='127.0.0.1') From 90dc07dca4c74379556f51aea0aa65724f0c76da Mon Sep 17 00:00:00 2001 From: Bryan Call Date: Fri, 20 Feb 2026 00:09:37 -0800 Subject: [PATCH 5/6] retrigger CI From 0de6e47e9317a79c4ff128bfbf124704cab87e1c Mon Sep 17 00:00:00 2001 From: Bryan Call Date: Fri, 20 Feb 2026 16:20:32 -0800 Subject: [PATCH 6/6] Fix: relax timing jitter threshold in milestone validation The dns field (SERVER_FIRST_CONNECT - CACHE_OPEN_READ_END) can have small negative values due to millisecond-granularity timing overlap. Allow up to -10ms jitter instead of rejecting anything below -1. --- tests/gold_tests/logging/verify_milestone_fields.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gold_tests/logging/verify_milestone_fields.py b/tests/gold_tests/logging/verify_milestone_fields.py index 35a0921d912..dd161cfccdd 100644 --- a/tests/gold_tests/logging/verify_milestone_fields.py +++ b/tests/gold_tests/logging/verify_milestone_fields.py @@ -113,7 +113,7 @@ def validate_line(fields: dict[str, str], line_num: int) -> list[str]: val = int(val_str) except ValueError: continue - if val < -1: + if val < -10: errors.append(f'line {line_num}: miss field "{name}" has unexpected value: {val}') # Verify chain sum approximates c_ttfb (within tolerance for rounding).