Skip to content

Implement subnet pool utilization#10157

Open
david-crespo wants to merge 1 commit intomainfrom
subnet-pool-utilization
Open

Implement subnet pool utilization#10157
david-crespo wants to merge 1 commit intomainfrom
subnet-pool-utilization

Conversation

@david-crespo
Copy link
Contributor

@david-crespo david-crespo commented Mar 25, 2026

Closes #10109

Follows the same pattern as IP pool utilization. Counting addresses feels a little weird, since subnets are blocks of addresses, but there isn't really another unit we can use.

  • Datastore method wraps both queries (member subnets for capacity, allocated subnets for usage) in a single transaction, with a 10k row limit on each query to bound resource usage.
  • accumulate_subnet_sizes sums subnet sizes using checked 128-bit arithmetic, analogous to accumulate_ip_range_sizes.
  • App layer computes remaining = capacity - allocated in u128 before converting to f64, matching ip_pool_utilization_view.
  • Response type uses { remaining, capacity } (both f64), matching IpPoolUtilization.

Implement the subnet_pool_utilization_view endpoint, which was previously
a stub returning 500. The implementation follows the IP pool utilization
pattern: fetch subnet CIDRs from the database, compute sizes in Rust
using u128 arithmetic, and return remaining/capacity as f64.

Changes:
- Add a new API version (SUBNET_POOL_UTILIZATION_REMAINING) that changes
  SubnetPoolUtilization from allocated/capacity to remaining/capacity,
  matching IpPoolUtilization.
- Add datastore methods: subnet_pool_utilization, helpers for fetching
  member and external subnet CIDRs, and accumulate_subnet_sizes.
- Wire up the app layer to call the datastore and compute remaining.
- Add datastore unit tests for IPv4 and IPv6 pools.
- Add integration tests via assert_subnet_pool_utilization helper.
- Update verify_endpoints from GetUnimplemented to Get.
@david-crespo david-crespo force-pushed the subnet-pool-utilization branch from b699fb4 to 4e58295 Compare March 25, 2026 23:47
}
Ok(count)
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Compare to IP pool utilization:

/// Return the number of IPs allocated from and the capacity of the provided
/// IP Pool.
pub async fn ip_pool_utilization(
&self,
opctx: &OpContext,
authz_pool: &authz::IpPool,
) -> Result<(i64, u128), Error> {
opctx.authorize(authz::Action::Read, authz_pool).await?;
opctx.authorize(authz::Action::ListChildren, authz_pool).await?;
let conn = self.pool_connection_authorized(opctx).await?;
let (allocated, ranges) = self
.transaction_retry_wrapper("ip_pool_utilization")
.transaction(&conn, |conn| async move {
let allocated = self
.ip_pool_allocated_count_on_connection(&conn, authz_pool)
.await?;
let ranges = self
.ip_pool_list_ranges_batched_on_connection(
&conn, authz_pool,
)
.await?;
Ok((allocated, ranges))
})
.await
.map_err(|e| match &e {
DieselError::NotFound => public_error_from_diesel(
e,
ErrorHandler::NotFoundByResource(authz_pool),
),
_ => public_error_from_diesel(e, ErrorHandler::Server),
})?;
let capacity = Self::accumulate_ip_range_sizes(ranges)?;
Ok((allocated, capacity))
}
/// Return the total number of IPs allocated from the provided pool.
#[cfg(test)]
async fn ip_pool_allocated_count(
&self,
opctx: &OpContext,
authz_pool: &authz::IpPool,
) -> Result<i64, Error> {
opctx.authorize(authz::Action::Read, authz_pool).await?;
let conn = self.pool_connection_authorized(opctx).await?;
self.ip_pool_allocated_count_on_connection(&conn, authz_pool)
.await
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
}
async fn ip_pool_allocated_count_on_connection(
&self,
conn: &async_bb8_diesel::Connection<DbConnection>,
authz_pool: &authz::IpPool,
) -> Result<i64, DieselError> {
use nexus_db_schema::schema::external_ip;
external_ip::table
.filter(external_ip::ip_pool_id.eq(authz_pool.id()))
.filter(external_ip::time_deleted.is_null())
.select(count(external_ip::ip).aggregate_distinct())
.first_async::<i64>(conn)
.await
}
/// Return the total capacity of the provided pool.
#[cfg(test)]
async fn ip_pool_total_capacity(
&self,
opctx: &OpContext,
authz_pool: &authz::IpPool,
) -> Result<u128, Error> {
opctx.authorize(authz::Action::Read, authz_pool).await?;
opctx.authorize(authz::Action::ListChildren, authz_pool).await?;
let conn = self.pool_connection_authorized(opctx).await?;
self.ip_pool_list_ranges_batched_on_connection(&conn, authz_pool)
.await
.map_err(|e| {
public_error_from_diesel(
e,
ErrorHandler::NotFoundByResource(authz_pool),
)
})
.and_then(Self::accumulate_ip_range_sizes)
}
async fn ip_pool_list_ranges_batched_on_connection(
&self,
conn: &async_bb8_diesel::Connection<DbConnection>,
authz_pool: &authz::IpPool,
) -> Result<Vec<(IpNetwork, IpNetwork)>, DieselError> {
use nexus_db_schema::schema::ip_pool_range;
ip_pool_range::table
.filter(ip_pool_range::ip_pool_id.eq(authz_pool.id()))
.filter(ip_pool_range::time_deleted.is_null())
.select((ip_pool_range::first_address, ip_pool_range::last_address))
// This is a rare unpaginated DB query, which means we are
// vulnerable to a resource exhaustion attack in which someone
// creates a very large number of ranges in order to make this
// query slow. In order to mitigate that, we limit the query to the
// (current) max allowed page size, effectively making this query
// exactly as vulnerable as if it were paginated. If there are more
// than 10,000 ranges in a pool, we will undercount, but I have a
// hard time seeing that as a practical problem.
.limit(10000)
.get_results_async::<(IpNetwork, IpNetwork)>(conn)
.await
}
fn accumulate_ip_range_sizes(
ranges: Vec<(IpNetwork, IpNetwork)>,
) -> Result<u128, Error> {
let mut count: u128 = 0;
for range in ranges.into_iter() {
let first = range.0.ip();
let last = range.1.ip();
let r = IpRange::try_from((first, last))
.map_err(|e| Error::internal_error(e.as_str()))?;
match r {
IpRange::V4(r) => count += u128::from(r.len()),
IpRange::V6(r) => count += r.len(),
}
}
Ok(count)
}

fn from(new: SubnetPoolUtilization) -> Self {
Self { allocated: new.capacity - new.remaining, capacity: new.capacity }
}
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is silly and I think loses precision, but the endpoint was unimplemented, so there can't be anyone who is relying on the current version.

@david-crespo david-crespo marked this pull request as ready for review March 26, 2026 00:23
let capacity = (1u128 << 80) as f64;
assert_subnet_pool_utilization(client, SUBNET_POOL_NAME, 0.0, capacity)
.await;
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

compare to

// This is mostly about testing the total field with huge numbers.
// testing allocated is done in a bunch of other places. look for
// assert_ip_pool_utilization calls
#[nexus_test]
async fn test_ipv4_ip_pool_utilization_total(
cptestctx: &ControlPlaneTestContext,
) {
let client = &cptestctx.external_client;
let _pool = create_ipv4_pool(client, "p0").await;
assert_ip_pool_utilization(client, "p0", 0, 0.0).await;
let add_url = "/v1/system/ip-pools/p0/ranges/add";
// add just 5 addresses to get the party started
let range = IpRange::V4(
Ipv4Range::new(
std::net::Ipv4Addr::new(10, 0, 0, 1),
std::net::Ipv4Addr::new(10, 0, 0, 5),
)
.unwrap(),
);
object_create::<IpRange, IpPoolRange>(client, &add_url, &range).await;
assert_ip_pool_utilization(client, "p0", 0, 5.0).await;
}
// We're going to test adding an IPv6 pool and collecting its utilization, even
// though that requires operating directly on the datastore. When we resolve
// https://github.com/oxidecomputer/omicron/issues/8881, we can switch this to
// use the API to create the IPv6 pool and ranges instead.
#[nexus_test]
async fn test_ipv6_ip_pool_utilization_total(
cptestctx: &ControlPlaneTestContext,
) {
let client = &cptestctx.external_client;
let nexus = &cptestctx.server.server_context().nexus;
let datastore = nexus.datastore();
let log = cptestctx.logctx.log.new(o!());
let opctx = OpContext::for_tests(log, datastore.clone());
let identity = IdentityMetadataCreateParams {
name: "p0".parse().unwrap(),
description: String::new(),
};
let pool = datastore
.ip_pool_create(
&opctx,
nexus_db_model::IpPool::new(
&identity,
nexus_db_model::IpVersion::V6,
nexus_db_model::IpPoolReservationType::ExternalSilos,
),
)
.await
.expect("should be able to create IPv6 pool");
// Check the utilization is zero.
assert_ip_pool_utilization(client, "p0", 0, 0.0).await;
// Now let's add a gigantic range. This requires direct datastore
// shenanigans because adding IPv6 ranges through the API is currently not
// allowed. It's worth doing because we want this code to correctly handle
// IPv6 ranges when they are allowed again.
let by_id = NameOrId::Id(pool.id());
let (authz_pool, db_pool) = nexus
.ip_pool_lookup(&opctx, &by_id)
.expect("should be able to lookup pool we just created")
.fetch_for(authz::Action::CreateChild)
.await
.expect("should be able to fetch pool we just created");
let ipv6_range = Ipv6Range::new(
std::net::Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 0),
std::net::Ipv6Addr::new(
0xfd00, 0, 0, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
),
)
.unwrap();
let big_range = IpRange::V6(ipv6_range);
datastore
.ip_pool_add_range(&opctx, &authz_pool, &db_pool, &big_range)
.await
.expect("could not add range");
let capacity = ipv6_range.len() as f64;
assert_ip_pool_utilization(client, "p0", 0, capacity).await;
}


db.terminate().await;
logctx.cleanup_successful();
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

compare to

// We're breaking out the utilization tests for IPv4 and IPv6 pools, since
// pools only contain one version now.
//
// See https://github.com/oxidecomputer/omicron/issues/8888.
#[tokio::test]
async fn test_ipv4_ip_pool_utilization() {
let logctx = dev::test_setup_log("test_ipv4_ip_pool_utilization");
let db = TestDatabase::new_with_datastore(&logctx.log).await;
let (opctx, datastore) = (db.opctx(), db.datastore());
let authz_silo = opctx.authn.silo_required().unwrap();
let project = Project::new(
authz_silo.id(),
project::ProjectCreate {
identity: IdentityMetadataCreateParams {
name: "my-project".parse().unwrap(),
description: "".to_string(),
},
},
);
let (.., project) =
datastore.project_create(&opctx, project).await.unwrap();
// create an IP pool for the silo, add a range to it, and link it to the silo
let identity = IdentityMetadataCreateParams {
name: "my-pool".parse().unwrap(),
description: "".to_string(),
};
let pool = datastore
.ip_pool_create(
&opctx,
IpPool::new(
&identity,
IpVersion::V4,
IpPoolReservationType::ExternalSilos,
),
)
.await
.expect("Failed to create IP pool");
let authz_pool = authz::IpPool::new(
authz::FLEET,
pool.id(),
LookupType::ById(pool.id()),
);
// capacity of zero because there are no ranges
let max_ips = datastore
.ip_pool_total_capacity(&opctx, &authz_pool)
.await
.unwrap();
assert_eq!(max_ips, 0);
let range = IpRange::V4(
Ipv4Range::new(
Ipv4Addr::new(10, 0, 0, 1),
Ipv4Addr::new(10, 0, 0, 5),
)
.unwrap(),
);
datastore
.ip_pool_add_range(&opctx, &authz_pool, &pool, &range)
.await
.expect("Could not add range");
// now it has a capacity of 5 because we added the range
let max_ips = datastore
.ip_pool_total_capacity(&opctx, &authz_pool)
.await
.unwrap();
assert_eq!(max_ips, 5);
let link = IncompleteIpPoolResource {
ip_pool_id: pool.id(),
resource_type: IpPoolResourceType::Silo,
resource_id: authz_silo.id(),
is_default: true,
};
datastore
.ip_pool_link_silo(&opctx, link)
.await
.expect("Could not link pool to silo");
let ip_count = datastore
.ip_pool_allocated_count(&opctx, &authz_pool)
.await
.unwrap();
assert_eq!(ip_count, 0);
let identity = IdentityMetadataCreateParams {
name: "my-ip".parse().unwrap(),
description: "".to_string(),
};
// Allocate from default pool (no explicit pool or IP version)
let ip = datastore
.allocate_floating_ip(
&opctx,
project.id(),
identity,
FloatingIpAllocation::Auto { pool: None, ip_version: None },
)
.await
.expect("Could not allocate floating IP");
assert_eq!(ip.ip.to_string(), "10.0.0.1/32");
let ip_count = datastore
.ip_pool_allocated_count(&opctx, &authz_pool)
.await
.unwrap();
assert_eq!(ip_count, 1);
// allocating one has nothing to do with total capacity
let max_ips = datastore
.ip_pool_total_capacity(&opctx, &authz_pool)
.await
.unwrap();
assert_eq!(max_ips, 5);
db.terminate().await;
logctx.cleanup_successful();
}
#[tokio::test]
async fn test_ipv6_ip_pool_utilization() {
let logctx = dev::test_setup_log("test_ipv6_ip_pool_utilization");
let db = TestDatabase::new_with_datastore(&logctx.log).await;
let (opctx, datastore) = (db.opctx(), db.datastore());
let authz_silo = opctx.authn.silo_required().unwrap();
let project = Project::new(
authz_silo.id(),
project::ProjectCreate {
identity: IdentityMetadataCreateParams {
name: "my-project".parse().unwrap(),
description: "".to_string(),
},
},
);
let (.., project) =
datastore.project_create(&opctx, project).await.unwrap();
// create an IP pool for the silo, add a range to it, and link it to the silo
let identity = IdentityMetadataCreateParams {
name: "my-pool".parse().unwrap(),
description: "".to_string(),
};
let pool = datastore
.ip_pool_create(
&opctx,
IpPool::new(
&identity,
IpVersion::V6,
IpPoolReservationType::ExternalSilos,
),
)
.await
.expect("Failed to create IP pool");
let authz_pool = authz::IpPool::new(
authz::FLEET,
pool.id(),
LookupType::ById(pool.id()),
);
let link = IncompleteIpPoolResource {
ip_pool_id: pool.id(),
resource_type: IpPoolResourceType::Silo,
resource_id: authz_silo.id(),
is_default: true,
};
datastore
.ip_pool_link_silo(&opctx, link)
.await
.expect("Could not link pool to silo");
// capacity of zero because there are no ranges
let max_ips = datastore
.ip_pool_total_capacity(&opctx, &authz_pool)
.await
.unwrap();
assert_eq!(max_ips, 0);
// Add an IPv6 range
let ipv6_range = IpRange::V6(
Ipv6Range::new(
Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 10),
Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 1, 20),
)
.unwrap(),
);
datastore
.ip_pool_add_range(&opctx, &authz_pool, &pool, &ipv6_range)
.await
.expect("Could not add range");
let max_ips = datastore
.ip_pool_total_capacity(&opctx, &authz_pool)
.await
.unwrap();
assert_eq!(max_ips, 11 + 65536);
let ip_count = datastore
.ip_pool_allocated_count(&opctx, &authz_pool)
.await
.unwrap();
assert_eq!(ip_count, 0);
let identity = IdentityMetadataCreateParams {
name: "my-ip".parse().unwrap(),
description: "".to_string(),
};
// Allocate from default pool (no explicit pool or IP version)
let ip = datastore
.allocate_floating_ip(
&opctx,
project.id(),
identity,
FloatingIpAllocation::Auto { pool: None, ip_version: None },
)
.await
.expect("Could not allocate floating IP");
assert_eq!(ip.ip.to_string(), "fd00::a/128");
let ip_count = datastore
.ip_pool_allocated_count(&opctx, &authz_pool)
.await
.unwrap();
assert_eq!(ip_count, 1);
// allocating one has nothing to do with total capacity
let max_ips = datastore
.ip_pool_total_capacity(&opctx, &authz_pool)
.await
.unwrap();
assert_eq!(max_ips, 11 + 65536);
// add a giant range for fun
let ipv6_range = IpRange::V6(
Ipv6Range::new(
Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 1, 21),
Ipv6Addr::new(
0xfd00, 0, 0, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
),
)
.unwrap(),
);
datastore
.ip_pool_add_range(&opctx, &authz_pool, &pool, &ipv6_range)
.await
.expect("Could not add range");
let max_ips = datastore
.ip_pool_total_capacity(&opctx, &authz_pool)
.await
.unwrap();
assert_eq!(max_ips, 1208925819614629174706166);
db.terminate().await;
logctx.cleanup_successful();
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

Implement subnet pool utilization

1 participant