Skip to content

Commit dda118a

Browse files
authored
internal NTP: resolve boundary NTP sources from DNS in addition to being told explicitly (#6050)
This PR adds a special internal DNS name `boundary.ntp.control-plane.oxide.internal` which resolves to a set of AAAA records, one for each boundary NTP zone. We pass this name to chrony via a `pool` directive in its config file, allowing it to find the boundary NTP servers via internal DNS. This PR does not remove the explicit boundary NTP server names from either the sled-agent -> zone-setup or the zone-setup -> chrony config paths. Assuming this PR ships as part of R10, we can come back and remove those after R10 is out the door. We can't do both in one release, because we need to establish the new DNS name (via setting a new blueprint) in R10, at which point we can remove the explicit server names in R11, because NTP has to sync before we get the opportunity to modify DNS. Fixes #4791.
1 parent e4067e1 commit dda118a

7 files changed

Lines changed: 154 additions & 17 deletions

File tree

internal-dns/src/config.rs

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
//!
6161
//! This module provides types used to assemble that configuration.
6262
63-
use crate::names::{ServiceName, DNS_ZONE};
63+
use crate::names::{ServiceName, BOUNDARY_NTP_DNS_NAME, DNS_ZONE};
6464
use anyhow::{anyhow, ensure};
6565
use core::fmt;
6666
use dns_service_client::types::{DnsConfigParams, DnsConfigZone, DnsRecord};
@@ -407,6 +407,27 @@ impl DnsConfigBuilder {
407407
(name, vec![DnsRecord::Aaaa(sled_ip)])
408408
});
409409

410+
// Assemble the special boundary NTP name to support chrony on internal
411+
// NTP zones.
412+
//
413+
// We leave this as `None` if there are no `BoundaryNtp` service zones,
414+
// which omits it from the final set of records.
415+
let boundary_ntp_records = self
416+
.service_instances_zones
417+
.get(&ServiceName::BoundaryNtp)
418+
.map(|zone2port| {
419+
let records = zone2port
420+
.iter()
421+
.map(|(zone, _port)| {
422+
let zone_ip = self.zones.get(&zone).expect(
423+
"service_backend_zone() ensures zones are defined",
424+
);
425+
DnsRecord::Aaaa(*zone_ip)
426+
})
427+
.collect::<Vec<DnsRecord>>();
428+
(BOUNDARY_NTP_DNS_NAME.to_string(), records)
429+
});
430+
410431
// Assemble the set of AAAA records for zones.
411432
let zone_records = self.zones.into_iter().map(|(zone, zone_ip)| {
412433
(zone.dns_name(), vec![DnsRecord::Aaaa(zone_ip)])
@@ -454,6 +475,7 @@ impl DnsConfigBuilder {
454475

455476
let all_records = sled_records
456477
.chain(zone_records)
478+
.chain(boundary_ntp_records)
457479
.chain(srv_records_sleds)
458480
.chain(srv_records_zones)
459481
.collect();
@@ -593,6 +615,11 @@ mod test {
593615
b.service_backend_zone(ServiceName::Oximeter, &zone2, 125).unwrap();
594616
b.service_backend_zone(ServiceName::Oximeter, &zone3, 126).unwrap();
595617

618+
// Add a boundary NTP service to one of the zones; this will also
619+
// populate the special `BOUNDARY_NTP_DNS_NAME`.
620+
b.service_backend_zone(ServiceName::BoundaryNtp, &zone2, 127)
621+
.unwrap();
622+
596623
// A sharded service
597624
b.service_backend_sled(
598625
ServiceName::SledAgent(sled1_uuid),

internal-dns/src/names.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@
66
77
use omicron_uuid_kinds::{OmicronZoneUuid, SledUuid};
88

9+
/// Name for the special boundary NTP DNS name
10+
///
11+
/// chrony does not support SRV records. This name resolves to AAAA records for
12+
/// each boundary NTP zone, and then we can point internal NTP chrony instances
13+
/// at this name for it to find the boundary NTP zones.
14+
pub const BOUNDARY_NTP_DNS_NAME: &str = "boundary-ntp";
15+
916
/// Name for the control plane DNS zone
1017
pub const DNS_ZONE: &str = "control-plane.oxide.internal";
1118

internal-dns/tests/output/internal-dns-zone.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,17 @@ builder: "non_trivial"
6868
"data": "::1:4"
6969
}
7070
],
71+
"_boundary-ntp._tcp": [
72+
{
73+
"type": "SRV",
74+
"data": {
75+
"port": 127,
76+
"prio": 0,
77+
"target": "001de000-c04e-4000-8000-000000000002.host.control-plane.oxide.internal",
78+
"weight": 0
79+
}
80+
}
81+
],
7182
"_nexus._tcp": [
7283
{
7384
"type": "SRV",
@@ -118,5 +129,11 @@ builder: "non_trivial"
118129
"weight": 0
119130
}
120131
}
132+
],
133+
"boundary-ntp": [
134+
{
135+
"type": "AAAA",
136+
"data": "::1:2"
137+
}
121138
]
122139
}

nexus/reconfigurator/execution/src/dns.rs

Lines changed: 60 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,9 @@ mod test {
457457
use crate::overridables::Overridables;
458458
use crate::Sled;
459459
use dns_service_client::DnsDiff;
460+
use internal_dns::config::Host;
461+
use internal_dns::config::Zone;
462+
use internal_dns::names::BOUNDARY_NTP_DNS_NAME;
460463
use internal_dns::resolver::Resolver;
461464
use internal_dns::ServiceName;
462465
use internal_dns::DNS_ZONE;
@@ -662,7 +665,7 @@ mod test {
662665
})
663666
.collect();
664667

665-
let blueprint_dns_zone = blueprint_internal_dns_config(
668+
let mut blueprint_dns_zone = blueprint_internal_dns_config(
666669
&blueprint,
667670
&sleds_by_id,
668671
&Default::default(),
@@ -686,6 +689,10 @@ mod test {
686689
// 4. Our out-of-service zone does *not* appear in the DNS config,
687690
// neither with an AAAA record nor in an SRV record.
688691
//
692+
// 5. The boundary NTP zones' IP addresses are mapped to AAAA records in
693+
// the special boundary DNS name (in addition to having their normal
694+
// zone DNS name -> AAAA record from 1).
695+
//
689696
// Together, this tells us that we have SRV records for all services,
690697
// that those SRV records all point to at least one of the Omicron zones
691698
// for that service, and that we correctly ignored zones that were not
@@ -720,6 +727,33 @@ mod test {
720727
})
721728
.collect();
722729

730+
// Prune the special boundary NTP DNS name out, collecting their IP
731+
// addresses, and build a list of expected SRV targets to ensure these
732+
// IPs show up both in the special boundary NTP DNS name and as their
733+
// normal SRV records.
734+
let boundary_ntp_ips = blueprint_dns_zone
735+
.records
736+
.remove(BOUNDARY_NTP_DNS_NAME)
737+
.expect("missing boundary NTP DNS name")
738+
.into_iter()
739+
.map(|record| match record {
740+
DnsRecord::Aaaa(ip) => ip,
741+
_ => panic!("expected AAAA record; got {record:?}"),
742+
});
743+
let mut expected_boundary_ntp_srv_targets = boundary_ntp_ips
744+
.map(|ip| {
745+
let Some(zone_id) = omicron_zones_by_ip.get(&ip) else {
746+
panic!("did not find zone ID for boundary NTP IP {ip}");
747+
};
748+
let name = Host::Zone(Zone::Other(*zone_id)).fqdn();
749+
println!(
750+
"Boundary NTP IP {ip} maps to expected \
751+
SRV record target {name}"
752+
);
753+
name
754+
})
755+
.collect::<BTreeSet<_>>();
756+
723757
// Now go through all the DNS names that have AAAA records and remove
724758
// any corresponding Omicron zone. While doing this, construct a set of
725759
// the fully-qualified DNS names (i.e., with the zone name suffix
@@ -814,6 +848,16 @@ mod test {
814848
]);
815849

816850
for (name, records) in &blueprint_dns_zone.records {
851+
let mut this_kind = None;
852+
let kinds_left: Vec<_> =
853+
srv_kinds_expected.iter().copied().collect();
854+
for kind in kinds_left {
855+
if kind.dns_name() == *name {
856+
srv_kinds_expected.remove(&kind);
857+
this_kind = Some(kind);
858+
}
859+
}
860+
817861
let srvs: Vec<_> = records
818862
.iter()
819863
.filter_map(|dns_record| match dns_record {
@@ -828,19 +872,27 @@ mod test {
828872
correspond to a name that points to any Omicron zone",
829873
srv.target
830874
);
831-
}
832-
833-
let kinds_left: Vec<_> =
834-
srv_kinds_expected.iter().copied().collect();
835-
for kind in kinds_left {
836-
if kind.dns_name() == *name {
837-
srv_kinds_expected.remove(&kind);
875+
if this_kind == Some(ServiceName::BoundaryNtp) {
876+
assert!(
877+
expected_boundary_ntp_srv_targets.contains(&srv.target),
878+
"found boundary NTP SRV record with target {:?} \
879+
that does not correspond to an expected boundary \
880+
NTP zone",
881+
srv.target,
882+
);
883+
expected_boundary_ntp_srv_targets.remove(&srv.target);
838884
}
839885
}
840886
}
841887

842888
println!("SRV kinds with no records found: {:?}", srv_kinds_expected);
843889
assert!(srv_kinds_expected.is_empty());
890+
891+
println!(
892+
"Boundary NTP SRV targets not found: {:?}",
893+
expected_boundary_ntp_srv_targets
894+
);
895+
assert!(expected_boundary_ntp_srv_targets.is_empty());
844896
}
845897

846898
#[tokio::test]

sled-agent/src/services.rs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ use illumos_utils::zfs::ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT;
6060
use illumos_utils::zone::AddressRequest;
6161
use illumos_utils::zpool::{PathInPool, ZpoolName};
6262
use illumos_utils::{execute, PFEXEC};
63+
use internal_dns::names::BOUNDARY_NTP_DNS_NAME;
64+
use internal_dns::names::DNS_ZONE;
6365
use internal_dns::resolver::Resolver;
6466
use itertools::Itertools;
6567
use nexus_config::{ConfigDropshotWithTls, DeploymentConfig};
@@ -1994,15 +1996,17 @@ impl ServiceManager {
19941996
.add_property(
19951997
"boundary",
19961998
"boolean",
1997-
&is_boundary.to_string(),
1999+
is_boundary.to_string(),
2000+
)
2001+
.add_property(
2002+
"boundary_pool",
2003+
"astring",
2004+
format!("{BOUNDARY_NTP_DNS_NAME}.{DNS_ZONE}"),
19982005
);
19992006

20002007
for s in ntp_servers {
2001-
chrony_config = chrony_config.add_property(
2002-
"server",
2003-
"astring",
2004-
&s.to_string(),
2005-
);
2008+
chrony_config =
2009+
chrony_config.add_property("server", "astring", s);
20062010
}
20072011

20082012
let dns_client_service;

smf/chrony-setup/manifest.xml

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,32 @@
1212
</dependency>
1313

1414
<exec_method type='method' name='start'
15-
exec='/opt/oxide/zone-setup-cli/bin/zone-setup chrony-setup -b %{config/boundary} -s %{config/server} -a %{config/allow}'
15+
exec='/opt/oxide/zone-setup-cli/bin/zone-setup chrony-setup -b %{config/boundary} -p %{config/boundary_pool} -s %{config/server} -a %{config/allow}'
1616
timeout_seconds='0'>
1717
<method_context security_flags="aslr">
1818
<method_credential user="root" group="root"
1919
privileges="basic,file_chown" />
2020
</method_context>
2121
</exec_method>
2222

23-
2423
<property_group name='startd' type='framework'>
2524
<propval name='duration' type='astring' value='transient' />
2625
</property_group>
2726

2827
<property_group name="config" type="application">
28+
<!-- Whether this is a boundary or internal NTP zone -->
2929
<propval name="boundary" type="boolean" value="false" />
30+
<!--
31+
DNS name for the pool of boundary NTP servers. (Only used if this is
32+
an internal NTP zone.)
33+
-->
34+
<propval name="boundary_pool" type="astring" value="" />
35+
<!--
36+
Upstream NTP server. May be specifid more than once. (At least one is
37+
required.)
38+
-->
3039
<propval name="server" type="astring" value="" />
40+
<!-- Allowed IPv6 range for clients (typically the rack subnet) -->
3141
<propval name="allow" type="astring" value="" />
3242
</property_group>
3343

zone-setup/src/bin/zone-setup.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,13 @@ struct ChronySetupArgs {
104104
/// allowed IPv6 range
105105
#[arg(short, long)]
106106
allow: Ipv6Net,
107+
/// DNS name for the boundary NTP zone pool
108+
#[arg(
109+
short = 'p',
110+
long,
111+
value_parser = NonEmptyStringValueParser::default(),
112+
)]
113+
boundary_pool: String,
107114
}
108115

109116
// The default clap parser for `serde_json::Value` is to wrap the argument in a
@@ -396,6 +403,9 @@ makestep 1.0 3
396403
leapsecmode slew
397404
maxslewrate 2708.333
398405
406+
# Refresh boundary NTP servers every two minutes instead of every two weeks
407+
refresh 120
408+
399409
";
400410

401411
let boundary_ntp_tpl = "#
@@ -447,6 +457,7 @@ maxslewrate 2708.333
447457
boundary: is_boundary,
448458
servers,
449459
allow,
460+
boundary_pool,
450461
} = args;
451462

452463
let mut new_config =
@@ -464,10 +475,19 @@ maxslewrate 2708.333
464475
.expect("write to String is infallible");
465476
}
466477
} else {
478+
// TODO-cleanup: Remove specific boundary NTP servers after R10 is cut;
479+
// once all racks are setting up the boundary NTP pool we can drop
480+
// individual server lines:
481+
// https://github.com/oxidecomputer/omicron/issues/6261
467482
for s in servers {
468483
writeln!(&mut new_config, "server {s} iburst minpoll 0 maxpoll 4")
469484
.expect("write to String is infallible");
470485
}
486+
writeln!(
487+
&mut new_config,
488+
"pool {boundary_pool} iburst maxdelay 0.1 maxsources 16",
489+
)
490+
.expect("write to String is infallible");
471491
}
472492

473493
// We read the contents from the old configuration file if it existed

0 commit comments

Comments
 (0)