Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@ certs/*.pem
certs/*-key.pem
certs/*.key
*.log
.env
.env

/logs/*
!/logs/.gitkeep
14 changes: 8 additions & 6 deletions blueprints/container_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,13 @@ def create_container_api():
gpu_list = container_raw.get("GPU_LIST") or container_raw.get("gpu_list") or []
cpu_number = int(container_raw.get("CPU_NUMBER") or container_raw.get("cpu_number") or 0)
memory = int(container_raw.get("MEMORY") or container_raw.get("memory") or 0)
# support swap memory in GB: keys can be SWAP_MEM, swap_memory, or SWAP_MEMORY
swap_memory = int(container_raw.get("SWAP_MEM") or container_raw.get("swap_memory") or container_raw.get("SWAP_MEMORY") or 0)
# support shared memory in GB: accept only SHARED_MEM/shared_memory/SHARED_MEMORY
shared_memory = int(container_raw.get("SHARED_MEM") or container_raw.get("shared_memory") or container_raw.get("SHARED_MEMORY") or 0)
name = container_raw.get("NAME") or container_raw.get("name") or ""
image = container_raw.get("image") or container_raw.get("IMAGE") or ""

# construct Container_info instance expected by service layer
container_obj = Container_info(gpu_list=gpu_list, cpu_number=cpu_number, memory=memory, name=name, image=image, swap_memory=swap_memory)
container_obj = Container_info(gpu_list=gpu_list, cpu_number=cpu_number, memory=memory, name=name, image=image, shared_memory=shared_memory)

except Exception as e:
return jsonify({"success": 0, "message": f"Invalid container payload: {str(e)}", "error_reason": "invalid_payload"}), 400
Expand Down Expand Up @@ -388,7 +388,7 @@ def get_container_detail_information_api():
"machine_ip",
"container_status",
"memory_gb",
"swap_gb",
"shared_gb",
"gpu_number",
"cpu_number",
"port",
Expand Down Expand Up @@ -520,6 +520,7 @@ def list_all_containers_bref_information_api():
{
"token",
"machine_id",
"user_id",
"page_number",
"page_size"
}
Expand Down Expand Up @@ -565,9 +566,10 @@ def list_all_containers_bref_information_api():
try: # 这里其实理论不会报错 但是保留
result = container_service.list_all_container_bref_information(
machine_id=machine_id,
user_id=request_user_id,
request_user_id=request_user_id,
page_number=page_number,
page_size=page_size)
page_size=page_size,
user_id=user_id)
# expect a dict: { containers: [...], total_page: n }
containers_info = result.get('containers', [])
total_page = result.get('total_page', 1)
Expand Down
12 changes: 6 additions & 6 deletions blueprints/machine_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def add_machine_api():
"gpu_number",
"gpu_type",
"memory_size",
"max_swap_gb",
"max_shared_gb",
"max_memory_gb",
"max_gpu_number",
"max_cpu_core_number",
Expand All @@ -55,7 +55,7 @@ def add_machine_api():
gpu_number = data.get("gpu_number", 0)
gpu_type = data.get("gpu_type", "")
memory_size = data.get("memory_size", 0)
max_swap_size = data.get("max_swap_gb", 2) # 默认值为2GB
max_shared_gb = data.get("max_shared_gb", 2) # 默认值为2GB
max_memory_gb = data.get("max_memory_gb", 0)
max_gpu_number = data.get("max_gpu_number", 0)
max_cpu_core_number = data.get("max_cpu_core_number", 0)
Expand All @@ -69,7 +69,7 @@ def add_machine_api():
gpu_number=gpu_number,
gpu_type=gpu_type,
memory_size=memory_size,
max_swap_size=max_swap_size,
max_shared_gb=max_shared_gb,
disk_size=disk_size,
max_memory_gb=max_memory_gb,
max_gpu_number=max_gpu_number,
Expand Down Expand Up @@ -120,7 +120,7 @@ def remove_machine_api():
def update_machine_api():
'''
allowed = {"machine_name", "machine_ip", "machine_type", "machine_status", "cpu_core_number",
"memory_size", "gpu_number", "gpu_type", "disk_size", "machine_description", "max_swap_gb", "max_memory_gb", "max_gpu_number", "max_cpu_core_number"}
"memory_size", "gpu_number", "gpu_type", "disk_size", "machine_description", "max_shared_gb", "max_memory_gb", "max_gpu_number", "max_cpu_core_number"}

通信数据格式:
发送格式:
Expand All @@ -136,7 +136,7 @@ def update_machine_api():
"gpu_type",
"memory_size",
"disk_size",
"max_swap_gb",
"max_shared_gb",
"max_memory_gb",
"max_gpu_number",
"max_cpu_core_number",
Expand Down Expand Up @@ -201,7 +201,7 @@ def get_detail_information_api():
"gpu_number": machine_info.gpu_number,
"gpu_type": machine_info.gpu_type,
"memory_size_gb": machine_info.memory_size_gb,
"max_swap_gb": machine_info.max_swap_gb,
"max_shared_gb": machine_info.max_shared_gb,
"max_memory_gb": machine_info.max_memory_gb,
"max_gpu_number": machine_info.max_gpu_number,
"max_cpu_core_number": machine_info.max_cpu_core_number,
Expand Down
Empty file added logs/.gitkeep
Empty file.
2 changes: 1 addition & 1 deletion models/containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class Container(db.Model):
port: int = db.Column(db.Integer, nullable=False, index=True)

memory_gb: int = db.Column(db.Integer, nullable=False)
swap_gb: int = db.Column(db.Integer, nullable=False)
shared_gb: int = db.Column(db.Integer, nullable=False)
gpu_number: int = db.Column(db.Integer, nullable=False)
cpu_number: int = db.Column(db.Integer, nullable=False)

Expand Down
2 changes: 1 addition & 1 deletion models/machine.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class Machine(db.Model):
memory_size_gb: int = db.Column(db.Integer, nullable=True)
gpu_number: int = db.Column(db.Integer, nullable=True)
gpu_type: str = db.Column(db.String(120), nullable=True)
max_swap_gb: int = db.Column(db.Integer, nullable=False, default=2)
max_shared_gb: int = db.Column(db.Integer, nullable=True)
disk_size_gb: int = db.Column(db.Integer, nullable=True)
machine_description: str = db.Column(db.String(500), nullable=True)
max_memory_gb: int = db.Column(db.Integer, nullable=True)
Expand Down
35 changes: 23 additions & 12 deletions repositories/containers_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from ..constant import ROLE
from sqlalchemy.exc import IntegrityError
from . import machine_repo
from .machine_repo import get_max_gpu_number, get_max_swap_gb, get_max_cpu_core_number, get_max_memory_gb
from .machine_repo import get_max_gpu_number, get_max_shared_gb, get_max_cpu_core_number, get_max_memory_gb


def get_by_id(container_id: int) -> Container | None:
Expand Down Expand Up @@ -39,8 +39,8 @@ def count_containers(machine_id: int | None = None) -> int:
return q.count()


def create_container(name: str, image: str, machine_id: int, memory_gb: int, swap_gb: int, gpu_number: int, cpu_number: int, port:int,status=None) -> Container:
container = Container(name=name, image=image, machine_id=machine_id, memory_gb=memory_gb, swap_gb=swap_gb, gpu_number=gpu_number, cpu_number=cpu_number, port=port)
def create_container(name: str, image: str, machine_id: int, memory_gb: int, shared_gb: int, gpu_number: int, cpu_number: int, port:int,status=None) -> Container:
container = Container(name=name, image=image, machine_id=machine_id, memory_gb=memory_gb, shared_gb=shared_gb, gpu_number=gpu_number, cpu_number=cpu_number, port=port)
if status is not None:
container.container_status = status
db.session.add(container)
Expand Down Expand Up @@ -165,21 +165,32 @@ def validate_gpu_request(machine: Machine, container: Container_info) -> None:
raise err


def validate_swap_request(machine: Machine, container: Container_info) -> int:
def validate_shared_request(machine: Machine, container: Container_info, requested_memory: int | None = None) -> int:
# Ensure memory validation runs first if caller didn't provide the already-validated memory
if requested_memory is None:
requested_memory = validate_memory_request(machine, container)

try:
requested_swap = int(getattr(container, 'SWAP_MEMORY', getattr(container, 'swap_memory', 0) or 0))
requested_shared = int(getattr(container, 'SHARED_MEMORY', getattr(container, 'shared_memory', 0)) or 0)
except Exception:
err = ValueError(f"swap_memory must be an integer: {getattr(container, 'SWAP_MEMORY', None)}")
err = ValueError(f"shared_memory must be an integer: {getattr(container, 'SHARED_MEMORY', None)}")
setattr(err, 'error_reason', 'invalid_config')
raise err
# 从机器的配置里取 max_swap_gb 字段
machine_max_swap = int(get_max_swap_gb(machine.id) or 0)
if requested_swap < 0 or requested_swap > machine_max_swap:
err = ValueError(f"Requested swap_memory {requested_swap}GB out of allowed range (0-{machine_max_swap} GB)")

# 从机器的配置里取 max_shared_gb 字段
machine_max_shared = int(get_max_shared_gb(machine.id) or 0)
if requested_shared < 0 or requested_shared > machine_max_shared:
err = ValueError(f"Requested shared_memory {requested_shared}GB out of allowed range (0-{machine_max_shared} GB)")
setattr(err, 'error_reason', 'invalid_config')
raise err
return requested_swap

# Shared must not exceed requested memory
if requested_shared > requested_memory:
err = ValueError(f"Requested shared_memory {requested_shared}GB cannot exceed requested memory {requested_memory}GB")
setattr(err, 'error_reason', 'invalid_config')
raise err

return requested_shared


def validate_cpu_request(machine: Machine, container: Container_info) -> int:
Expand Down
14 changes: 7 additions & 7 deletions repositories/machine_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def create_machine(machinename:str,
gpu_number:int,
gpu_type:str,
memory_size:int,
max_swap_size:int,
max_shared_gb:int,
disk_size:int,
max_cpu_core_number:int,
max_gpu_number:int,
Expand All @@ -71,7 +71,7 @@ def create_machine(machinename:str,
gpu_number=gpu_number,
gpu_type=gpu_type,
memory_size_gb=memory_size,
max_swap_gb=max_swap_size,
max_shared_gb=max_shared_gb,
max_cpu_core_number=max_cpu_core_number,
max_gpu_number=max_gpu_number,
max_memory_gb=max_memory_gb,
Expand All @@ -94,14 +94,14 @@ def update_machine(machine_id: int, *, commit: bool = True, **fields) -> bool:
部分更新用户字段。
使用示例:
update_machine(1, machine_name="new_name", cpu_core_number=16)
allowed = {"machine_name", "machine_ip", "machine_type", "machine_status", "cpu_core_number", "memory_size", "gpu_number", "gpu_type", "disk_size", "machine_description", "max_swap_gb", "max_memory_gb", "max_gpu_number", "max_cpu_core_number"}
allowed = {"machine_name", "machine_ip", "machine_type", "machine_status", "cpu_core_number", "memory_size", "gpu_number", "gpu_type", "disk_size", "machine_description", "max_shared_gb", "max_memory_gb", "max_gpu_number", "max_cpu_core_number"}
"""
machine = get_by_id(machine_id)
if not machine:
return None

allowed = {"machine_name", "machine_ip", "machine_type", "machine_status", "cpu_core_number",
"memory_size_gb", "gpu_number", "gpu_type", "disk_size_gb", "machine_description", "swap_size_gb", "max_swap_gb",
"memory_size_gb", "gpu_number", "gpu_type", "disk_size_gb", "machine_description", "shared_size_gb", "max_shared_gb",
"max_memory_gb", "max_gpu_number", "max_cpu_core_number"}
dirty = False
for k, v in fields.items():
Expand Down Expand Up @@ -137,7 +137,7 @@ def get_max_memory_gb(machine_id:int) -> int:
return int(max_val) if max_val is not None else 0


def get_max_swap_gb(machine_id:int) -> int:
"""用于取数据库里的max_swap_gb字段。"""
max_val = db.session.query(Machine.max_swap_gb).filter(Machine.id == machine_id).scalar()
def get_max_shared_gb(machine_id:int) -> int:
"""用于取数据库里的max_shared_gb字段。"""
max_val = db.session.query(Machine.max_shared_gb).filter(Machine.id == machine_id).scalar()
return int(max_val) if max_val is not None else 0
30 changes: 16 additions & 14 deletions services/container_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ class container_detail_information(BaseModel):
machine_ip:str
container_status:str
memory_gb:int
swap_gb:int
shared_gb:int
gpu_number:int
cpu_number:int
port:int
Expand Down Expand Up @@ -398,15 +398,15 @@ def Create_container(owner_name:str,machine_id:int,container:Container_info,publ
# GPU 参数检查
print(f"DEBUG: validating GPU request for machine {machine_id} and container {container.NAME}")
container_repo.validate_gpu_request(machine, container)
# swap 参数检查
print(f"DEBUG: validating swap request for machine {machine_id} and container {container.NAME}")
requested_swap = container_repo.validate_swap_request(machine, container)
# memory 参数检查(必须先于 shared)
print(f"DEBUG: validating memory request for machine {machine_id} and container {container.NAME}")
requested_memory = container_repo.validate_memory_request(machine, container)
# shared 参数检查(要求 shared <= memory)
print(f"DEBUG: validating shared request for machine {machine_id} and container {container.NAME}")
requested_shared = container_repo.validate_shared_request(machine, container, requested_memory)
# cpu 参数检查
print(f"DEBUG: validating CPU request for machine {machine_id} and container {container.NAME}")
requested_cpus = container_repo.validate_cpu_request(machine, container)
# memory 参数检查
print(f"DEBUG: validating memory request for machine {machine_id} and container {container.NAME}")
requested_memory = container_repo.validate_memory_request(machine, container)
# name/image/public_key length and format checks
container_repo.validate_names_and_lengths(container, public_key)
# duplicate name check (may raise IntegrityError)
Expand Down Expand Up @@ -495,7 +495,7 @@ def Create_container(owner_name:str,machine_id:int,container:Container_info,publ
image=container.image,
machine_id=machine_id,
memory_gb=container.MEMORY,
swap_gb=requested_swap,
shared_gb=requested_shared,
gpu_number=gpu_count,
cpu_number=container.CPU_NUMBER,
port=free_port,
Expand Down Expand Up @@ -990,7 +990,7 @@ def get_container_detail_information(container_id:int)->container_detail_informa
"machine_ip": get_machine_ip_by_id(container.machine_id),
"container_status": container.container_status.value,
"memory_gb": container.memory_gb,
"swap_gb": container.swap_gb,
"shared_gb": container.shared_gb,
"gpu_number": container.gpu_number,
"cpu_number": container.cpu_number,
"port": container.port,
Expand All @@ -1009,20 +1009,22 @@ def get_container_detail_information(container_id:int)->container_detail_informa


#返回一页容器的概要信息
def list_all_container_bref_information(machine_id:int, user_id:int, page_number:int, page_size:int)->dict:
def list_all_container_bref_information(machine_id:int, request_user_id:int, page_number:int, page_size:int, user_id:int = None)->dict:
# 非管理员用户必须先通过机器权限表过滤可见机器
if user_id and not _is_operator_user(user_id):
if not _is_operator_user(request_user_id):
allowed = set(machine_permission_repo.list_machine_ids_by_user(user_id))
if machine_id is not None:
if machine_id not in allowed:
containers = []
else:
containers = list_containers(limit=page_size, offset=page_number*page_size, machine_id=machine_id, user_id=None)
containers = list_containers(limit=page_size, offset=page_number*page_size, machine_id=machine_id, user_id=request_user_id)
else:
containers = [c for c in list_containers(limit=99999, offset=0, machine_id=None, user_id=None) if c.machine_id in allowed]
containers = [c for c in list_containers(limit=99999, offset=0, machine_id=None, user_id=request_user_id) if c.machine_id in allowed]
containers = containers[page_number*page_size:page_number*page_size+page_size]
else:
elif user_id is not None:
containers = list_containers(limit=page_size, offset=page_number*page_size, machine_id=machine_id, user_id=user_id)
else:
containers = list_containers(limit=page_size, offset=page_number*page_size, machine_id=machine_id, user_id=None)
res = []
for container in containers:
machine_ip = ""
Expand Down
Loading