diff --git a/tools/ec2_worker_manager.py b/tools/ec2_worker_manager.py index 86f6a78..6d21991 100644 --- a/tools/ec2_worker_manager.py +++ b/tools/ec2_worker_manager.py @@ -199,6 +199,7 @@ def build_network_interfaces( subnet_ids: list[str], security_group_ids: list[str], network_cards: list[dict[str, Any]] | None = None, + associate_public_ip: bool = False, ) -> list[dict[str, Any]]: """Construct the NetworkInterfaces parameter for ec2:RunInstances. @@ -211,8 +212,16 @@ def build_network_interfaces( `network_cards` (when supplied from DescribeInstanceTypes' NetworkInfo payload) drives ENI placement across physical network cards via `distribute_enis_across_cards` — required on multi-card instance - types like c6in.metal where the primary card holds only 5 of the 15 + types like c6in.metal where the primary card holds only 8 of the 16 available ENI slots. + + `associate_public_ip` (off by default) sets AssociatePublicIpAddress + on the primary ENI only — the entry with DeviceIndex=0 and (when + multi-card) NetworkCardIndex=0. AWS rejects the field on secondaries. + Needed because RunInstances ignores the subnet's MapPublicIpOnLaunch + when an explicit NetworkInterfaces[] is passed; without this opt-in + the launch comes up with no public IP. See PR #65 + issuecomment-4338158487 (anygpt-48) for the failure mode. """ if target_count < 1: raise ValueError("target_count must be >= 1") @@ -220,6 +229,7 @@ def build_network_interfaces( raise ValueError("subnet_ids must contain at least one subnet") placements = distribute_enis_across_cards(target_count, network_cards) interfaces: list[dict[str, Any]] = [] + primary_assigned = False for sequence_index, (card_index, device_index) in enumerate(placements): spec: dict[str, Any] = { "DeviceIndex": device_index, @@ -233,6 +243,17 @@ def build_network_interfaces( spec["NetworkCardIndex"] = card_index if security_group_ids: spec["Groups"] = list(security_group_ids) + # AssociatePublicIpAddress is only valid on the primary ENI. + # Set it on exactly one entry: DeviceIndex=0 plus (when we're + # emitting NetworkCardIndex) NetworkCardIndex=0. The placement + # helper guarantees the primary ENI lands first, so we only + # need to flip the flag the first time we see a DeviceIndex=0 + # entry on card 0. + if associate_public_ip and not primary_assigned and device_index == 0: + on_primary_card = (not network_cards) or card_index == 0 + if on_primary_card: + spec["AssociatePublicIpAddress"] = True + primary_assigned = True interfaces.append(spec) return interfaces @@ -308,6 +329,17 @@ class ManagerConfig: # primary ENI's security group. max_enis: int | None eni_subnet_ids: list[str] + # Public-IP association on the multi-ENI launch path. When the + # explicit NetworkInterfaces[] payload is used (ANYSCAN_MAX_ENIS + # set), AWS does NOT auto-associate a public IP from the subnet's + # MapPublicIpOnLaunch setting — the operator has to opt in + # explicitly via AssociatePublicIpAddress on the primary ENI. + # Default off so existing fleets stay unchanged. See PR #65 + # issuecomment-4338158487 (anygpt-48): the metal came up + # unreachable from outside the VPC because this entry was missing + # on the primary ENI and the operator had to manually + # allocate-address + associate-address post-launch. + associate_public_ip: bool @classmethod def from_env(cls) -> "ManagerConfig": @@ -365,6 +397,10 @@ def from_env(cls) -> "ManagerConfig": loop_interval_seconds=env_int("ANYSCAN_EC2_LOOP_INTERVAL_SECONDS", 60), max_enis=parse_max_enis(env_string("ANYSCAN_MAX_ENIS")), eni_subnet_ids=split_csv(env_string("ANYSCAN_EC2_ENI_SUBNET_IDS")), + associate_public_ip=( + env_string("ANYSCAN_EC2_ASSOCIATE_PUBLIC_IP", "false") or "false" + ).lower() + in {"1", "true", "yes", "on"}, ) @@ -946,6 +982,7 @@ def recreate_instance(self) -> dict[str, Any]: subnet_ids=subnet_pool, security_group_ids=security_group_ids, network_cards=network_cards, + associate_public_ip=self.config.associate_public_ip, ) eni_attach_info["attached"] = target_count eni_attach_info["subnet_pool"] = subnet_pool diff --git a/tools/test_ec2_worker_manager.py b/tools/test_ec2_worker_manager.py index 9a523a0..ca8dba7 100644 --- a/tools/test_ec2_worker_manager.py +++ b/tools/test_ec2_worker_manager.py @@ -302,6 +302,74 @@ def test_no_security_groups_omits_groups_key(self): ) self.assertNotIn("Groups", ifs[0]) + def test_associate_public_ip_default_omits_field(self): + """No NIC carries AssociatePublicIpAddress when the knob is off.""" + # Single-card path. + ifs = m.build_network_interfaces( + target_count=2, + subnet_ids=["subnet-aaa"], + security_group_ids=["sg-1"], + ) + for spec in ifs: + self.assertNotIn("AssociatePublicIpAddress", spec) + # Multi-card path. + cards = _c6in_metal_describe()["InstanceTypes"][0]["NetworkInfo"][ + "NetworkCards" + ] + ifs = m.build_network_interfaces( + target_count=4, + subnet_ids=["subnet-aaa"], + security_group_ids=["sg-1"], + network_cards=cards, + ) + for spec in ifs: + self.assertNotIn("AssociatePublicIpAddress", spec) + + def test_associate_public_ip_true_sets_on_primary_only(self): + """Multi-card: only DeviceIndex=0 + NetworkCardIndex=0 gets the flag.""" + cards = _c6in_metal_describe()["InstanceTypes"][0]["NetworkInfo"][ + "NetworkCards" + ] + ifs = m.build_network_interfaces( + target_count=4, + subnet_ids=["subnet-aaa"], + security_group_ids=["sg-1"], + network_cards=cards, + associate_public_ip=True, + ) + public_ip_entries = [ + spec for spec in ifs if spec.get("AssociatePublicIpAddress") is True + ] + self.assertEqual(len(public_ip_entries), 1) + primary = public_ip_entries[0] + self.assertEqual(primary["DeviceIndex"], 0) + self.assertEqual(primary["NetworkCardIndex"], 0) + # Every other entry is silent on the field — AWS rejects it on + # secondaries, so we must not emit a `False` either. + secondaries = [ + spec for spec in ifs if spec is not primary + ] + for spec in secondaries: + self.assertNotIn("AssociatePublicIpAddress", spec) + + def test_associate_public_ip_true_single_card_path(self): + """Single-card (network_cards=None): primary entry is just DeviceIndex=0.""" + ifs = m.build_network_interfaces( + target_count=3, + subnet_ids=["subnet-aaa"], + security_group_ids=["sg-1"], + associate_public_ip=True, + ) + public_ip_entries = [ + spec for spec in ifs if spec.get("AssociatePublicIpAddress") is True + ] + self.assertEqual(len(public_ip_entries), 1) + self.assertEqual(public_ip_entries[0]["DeviceIndex"], 0) + # NetworkCardIndex was not emitted on this path. + self.assertNotIn("NetworkCardIndex", public_ip_entries[0]) + for spec in ifs[1:]: + self.assertNotIn("AssociatePublicIpAddress", spec) + def _make_config(**overrides) -> Any: base = dict( @@ -331,6 +399,7 @@ def _make_config(**overrides) -> Any: loop_interval_seconds=60, max_enis=None, eni_subnet_ids=[], + associate_public_ip=False, ) base.update(overrides) return m.ManagerConfig(**base) @@ -492,6 +561,49 @@ def test_eni_subnet_ids_pool_round_robins(self): subnet_sequence, ["subnet-X", "subnet-Y", "subnet-X", "subnet-Y"] ) + def test_associate_public_ip_knob_propagates_to_primary_eni(self): + """ANYSCAN_EC2_ASSOCIATE_PUBLIC_IP=1 lands on the primary NIC only.""" + config = _make_config( + max_enis=15, + instance_type="c6in.metal", + associate_public_ip=True, + ) + ec2 = _FakeEc2Client(describe_payload=_c6in_metal_describe()) + manager = _make_manager(config, ec2) + + with mock.patch.object(manager, "current_instance_id", return_value=None), \ + mock.patch.object(manager, "ensure_ssh_access", return_value=None), \ + mock.patch.object(manager, "build_user_data", return_value="#!fake"): + manager.recreate_instance() + + call = ec2.run_instances_calls[0] + nics = call["NetworkInterfaces"] + primary_candidates = [ + nic for nic in nics + if nic.get("DeviceIndex") == 0 and nic.get("NetworkCardIndex") == 0 + ] + self.assertEqual(len(primary_candidates), 1) + self.assertIs(primary_candidates[0].get("AssociatePublicIpAddress"), True) + secondaries = [nic for nic in nics if nic is not primary_candidates[0]] + self.assertGreater(len(secondaries), 0) + for nic in secondaries: + self.assertNotIn("AssociatePublicIpAddress", nic) + + def test_associate_public_ip_default_off_emits_no_field(self): + """Default config (knob off) → no NIC carries AssociatePublicIpAddress.""" + config = _make_config(max_enis=15, instance_type="c6in.metal") + ec2 = _FakeEc2Client(describe_payload=_c6in_metal_describe()) + manager = _make_manager(config, ec2) + + with mock.patch.object(manager, "current_instance_id", return_value=None), \ + mock.patch.object(manager, "ensure_ssh_access", return_value=None), \ + mock.patch.object(manager, "build_user_data", return_value="#!fake"): + manager.recreate_instance() + + call = ec2.run_instances_calls[0] + for nic in call["NetworkInterfaces"]: + self.assertNotIn("AssociatePublicIpAddress", nic) + class FromEnvIntegrationTests(unittest.TestCase): """ManagerConfig.from_env honors ANYSCAN_MAX_ENIS / ENI_SUBNET_IDS.""" @@ -524,6 +636,25 @@ def test_invalid_max_enis_exits(self): with self.assertRaises(SystemExit): m.ManagerConfig.from_env() + def test_associate_public_ip_default_off(self): + os.environ.pop("ANYSCAN_EC2_ASSOCIATE_PUBLIC_IP", None) + cfg = m.ManagerConfig.from_env() + self.assertFalse(cfg.associate_public_ip) + + def test_associate_public_ip_truthy_values_enable(self): + for raw in ("1", "true", "yes", "on", "TRUE", "Yes"): + with self.subTest(value=raw): + os.environ["ANYSCAN_EC2_ASSOCIATE_PUBLIC_IP"] = raw + cfg = m.ManagerConfig.from_env() + self.assertTrue(cfg.associate_public_ip) + + def test_associate_public_ip_falsy_values_disable(self): + for raw in ("0", "false", "no", "off", "FALSE", ""): + with self.subTest(value=raw): + os.environ["ANYSCAN_EC2_ASSOCIATE_PUBLIC_IP"] = raw + cfg = m.ManagerConfig.from_env() + self.assertFalse(cfg.associate_public_ip) + if __name__ == "__main__": unittest.main()