Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -238,10 +238,15 @@
description="Frequency at which new labeling batches are automatically created for uploaded images. Options: 'never' (all images go to the same batch), 'daily' (new batch each day), 'weekly' (new batch each week), 'monthly' (new batch each month). Batch timestamps are appended to the labeling_batch_prefix to create unique batch names. Automatically organizing uploads into time-based batches simplifies dataset management and makes it easier to track and review collected data over time.",
examples=["never", "daily"],
)
image_name: Optional[Union[str, Selector(kind=[STRING_KIND])]] = Field(
default=None,
description="Optional custom name for the uploaded image. If provided, this name will be used instead of an auto-generated UUID. This is useful when you want to preserve the original filename or use a meaningful identifier (e.g., serial number, timestamp) for the image in the Roboflow dataset. The name should not include file extension. If not provided, a UUID will be generated automatically.",
examples=["serial_12345", "camera1_frame_001", "$inputs.filename"],
)

@classmethod
def get_parameters_accepting_batches(cls) -> List[str]:
return ["images", "predictions"]
return ["images", "predictions", "image_name"]

@classmethod
def describe_outputs(cls) -> List[OutputDefinition]:
Expand Down Expand Up @@ -294,6 +299,7 @@
fire_and_forget: bool,
labeling_batch_prefix: str,
labeling_batches_recreation_frequency: BatchCreationFrequency,
image_name: Optional[Batch[Optional[str]]] = None,
) -> BlockResult:
if self._api_key is None:
raise ValueError(
Expand All @@ -312,7 +318,8 @@
]
result = []
predictions = [None] * len(images) if predictions is None else predictions
for image, prediction in zip(images, predictions):
image_names = [None] * len(images) if image_name is None else image_name
for image, prediction, img_name in zip(images, predictions, image_names):
error_status, message = register_datapoint_at_roboflow(
image=image,
prediction=prediction,
Expand All @@ -332,6 +339,7 @@
background_tasks=self._background_tasks,
thread_pool_executor=self._thread_pool_executor,
api_key=self._api_key,
image_name=img_name,
)
result.append({"error_status": error_status, "message": message})
return result
Expand All @@ -356,6 +364,7 @@
background_tasks: Optional[BackgroundTasks],
thread_pool_executor: Optional[ThreadPoolExecutor],
api_key: str,
image_name: Optional[str] = None,
) -> Tuple[bool, str]:
registration_task = partial(
execute_registration,
Expand All @@ -374,6 +383,7 @@
new_labeling_batch_frequency=new_labeling_batch_frequency,
cache=cache,
api_key=api_key,
image_name=image_name,
)
if fire_and_forget and background_tasks:
background_tasks.add_task(registration_task)
Expand All @@ -400,6 +410,7 @@
new_labeling_batch_frequency: BatchCreationFrequency,
cache: BaseCache,
api_key: str,
image_name: Optional[str] = None,
Comment thread
rafel-roboflow marked this conversation as resolved.
) -> Tuple[bool, str]:
matching_strategies_limits = OrderedDict(
{
Expand Down Expand Up @@ -427,7 +438,7 @@
return False, "Registration skipped due to usage quota exceeded"
credit_to_be_returned = False
try:
local_image_id = str(uuid4())
local_image_id = image_name if image_name else str(uuid4())
encoded_image, scaling_factor = prepare_image_to_registration(
image=image.numpy_image,
desired_size=ImageDimensions(
Expand Down Expand Up @@ -465,7 +476,7 @@
finally:
if credit_to_be_returned:
return_strategy_credit(
cache=cache,

Check failure

Code scanning / CodeQL

Use of a broken or weak cryptographic hashing algorithm on sensitive data High

Sensitive data (password)
is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function.
Sensitive data (password)
is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function.
Sensitive data (password) is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function.
Sensitive data (password) is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function.
Sensitive data (password) is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function.
Sensitive data (password) is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function.
Sensitive data (password) is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function.
Sensitive data (password) is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function.
Sensitive data (password) is used in a hashing algorithm (MD5) that is insecure for password hashing, since it is not a computationally expensive hash function.
workspace=workspace_name,
project=target_project,
strategy_name=strategy_with_spare_credit,
Expand Down Expand Up @@ -575,7 +586,7 @@
tags: List[str],
inference_id: Optional[str],
) -> Optional[str]:
registration_response = register_image_at_roboflow(

Check failure

Code scanning / CodeQL

Clear-text logging of sensitive information High

This expression logs
sensitive data (password)
as clear text.
This expression logs
sensitive data (password)
as clear text.
This expression logs sensitive data (password) as clear text.
This expression logs sensitive data (password) as clear text.
This expression logs sensitive data (password) as clear text.
This expression logs sensitive data (password) as clear text.
This expression logs sensitive data (password) as clear text.
This expression logs sensitive data (password) as clear text.
This expression logs sensitive data (password) as clear text.
api_key=api_key,
dataset_id=target_project,
local_image_id=local_image_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,10 +222,15 @@ class BlockManifest(WorkflowBlockManifest):
description="Frequency at which new labeling batches are automatically created for uploaded images. Options: 'never' (all images go to the same batch), 'daily' (new batch each day), 'weekly' (new batch each week), 'monthly' (new batch each month). Batch timestamps are appended to the labeling_batch_prefix to create unique batch names. Automatically organizing uploads into time-based batches simplifies dataset management and makes it easier to track and review collected data over time.",
examples=["never", "daily"],
)
image_name: Optional[Union[str, Selector(kind=[STRING_KIND])]] = Field(
default=None,
description="Optional custom name for the uploaded image. This is useful when you want to preserve the original filename or use a meaningful identifier (e.g., serial number, timestamp) for the image in the Roboflow dataset. The name should not include file extension. If not provided, a UUID will be generated automatically.",
examples=["serial_12345", "camera1_frame_001", "$inputs.filename"],
)

@classmethod
def get_parameters_accepting_batches(cls) -> List[str]:
return ["images", "predictions"]
return ["images", "predictions", "image_name"]

@classmethod
def describe_outputs(cls) -> List[OutputDefinition]:
Expand Down Expand Up @@ -279,6 +284,7 @@ def run(
fire_and_forget: bool,
labeling_batch_prefix: str,
labeling_batches_recreation_frequency: BatchCreationFrequency,
image_name: Optional[Batch[Optional[str]]] = None,
) -> BlockResult:
if self._api_key is None:
raise ValueError(
Expand All @@ -297,7 +303,8 @@ def run(
]
result = []
predictions = [None] * len(images) if predictions is None else predictions
for image, prediction in zip(images, predictions):
image_names = [None] * len(images) if image_name is None else image_name
for image, prediction, img_name in zip(images, predictions, image_names):
error_status, message = maybe_register_datapoint_at_roboflow(
image=image,
prediction=prediction,
Expand All @@ -318,6 +325,7 @@ def run(
background_tasks=self._background_tasks,
thread_pool_executor=self._thread_pool_executor,
api_key=self._api_key,
image_name=img_name,
)
result.append({"error_status": error_status, "message": message})
return result
Expand All @@ -343,6 +351,7 @@ def maybe_register_datapoint_at_roboflow(
background_tasks: Optional[BackgroundTasks],
thread_pool_executor: Optional[ThreadPoolExecutor],
api_key: str,
image_name: Optional[str] = None,
) -> Tuple[bool, str]:
normalised_probability = data_percentage / 100
if random.random() < normalised_probability:
Expand All @@ -365,5 +374,6 @@ def maybe_register_datapoint_at_roboflow(
background_tasks=background_tasks,
thread_pool_executor=thread_pool_executor,
api_key=api_key,
image_name=image_name,
)
return False, "Registration skipped due to sampling settings"
Original file line number Diff line number Diff line change
Expand Up @@ -1105,6 +1105,7 @@ def test_run_sink_when_registration_should_happen_in_foreground_despite_providin
new_labeling_batch_frequency="never",
cache=cache,
api_key="my_api_key",
image_name=None,
)
]
* 3
Expand Down Expand Up @@ -1180,8 +1181,114 @@ def test_run_sink_when_predictions_not_provided(
new_labeling_batch_frequency="never",
cache=cache,
api_key="my_api_key",
image_name=None,
)
]
* 3
)
assert len(background_tasks.tasks) == 0, "Async tasks not to be added"


@mock.patch.object(v1, "return_strategy_credit")
@mock.patch.object(v1, "register_datapoint")
@mock.patch.object(v1, "use_credit_of_matching_strategy")
def test_execute_registration_with_custom_image_name(
use_credit_of_matching_strategy_mock: MagicMock,
register_datapoint_mock: MagicMock,
return_strategy_credit_mock: MagicMock,
) -> None:
# given
api_key = "my_api_key"
api_key_hash = hashlib.md5(api_key.encode("utf-8")).hexdigest()
Comment thread Dismissed
expected_cache_key = f"workflows:api_key_to_workspace:{api_key_hash}"
cache = MemoryCache()
cache.set(key=expected_cache_key, value="my_workspace")
use_credit_of_matching_strategy_mock.return_value = "my_strategy"
image = WorkflowImageData(
parent_metadata=ImageParentMetadata(parent_id="parent"),
numpy_image=np.zeros((128, 256, 3), dtype=np.uint8),
)
register_datapoint_mock.return_value = "STATUS OK"

# when
result = execute_registration(
image=image,
prediction=None,
target_project="my_project",
usage_quota_name="my_quota",
persist_predictions=True,
minutely_usage_limit=10,
hourly_usage_limit=100,
daily_usage_limit=1000,
max_image_size=(128, 64),
compression_level=75,
registration_tags=["some"],
labeling_batch_prefix="my_batch",
new_labeling_batch_frequency="never",
cache=cache,
api_key=api_key,
image_name="custom_serial_number_123",
)

# then
assert result == (False, "STATUS OK"), "Expected correct status to be marked"
register_datapoint_mock.assert_called_once()
# Verify custom image_name was used as local_image_id
assert (
register_datapoint_mock.call_args[1]["local_image_id"]
== "custom_serial_number_123"
), "Expected custom image_name to be used as local_image_id"
return_strategy_credit_mock.assert_not_called()


@mock.patch.object(v1, "return_strategy_credit")
@mock.patch.object(v1, "register_datapoint")
@mock.patch.object(v1, "use_credit_of_matching_strategy")
def test_execute_registration_without_image_name_uses_uuid(
use_credit_of_matching_strategy_mock: MagicMock,
register_datapoint_mock: MagicMock,
return_strategy_credit_mock: MagicMock,
) -> None:
# given
api_key = "my_api_key"
api_key_hash = hashlib.md5(api_key.encode("utf-8")).hexdigest()
Comment thread
rafel-roboflow marked this conversation as resolved.
Dismissed
expected_cache_key = f"workflows:api_key_to_workspace:{api_key_hash}"
cache = MemoryCache()
cache.set(key=expected_cache_key, value="my_workspace")
use_credit_of_matching_strategy_mock.return_value = "my_strategy"
image = WorkflowImageData(
parent_metadata=ImageParentMetadata(parent_id="parent"),
numpy_image=np.zeros((128, 256, 3), dtype=np.uint8),
)
register_datapoint_mock.return_value = "STATUS OK"

# when
result = execute_registration(
image=image,
prediction=None,
target_project="my_project",
usage_quota_name="my_quota",
persist_predictions=True,
minutely_usage_limit=10,
hourly_usage_limit=100,
daily_usage_limit=1000,
max_image_size=(128, 64),
compression_level=75,
registration_tags=["some"],
labeling_batch_prefix="my_batch",
new_labeling_batch_frequency="never",
cache=cache,
api_key=api_key,
# No image_name provided, should fall back to UUID
)

# then
assert result == (False, "STATUS OK"), "Expected correct status to be marked"
register_datapoint_mock.assert_called_once()
# Verify UUID format is used when no image_name provided
local_image_id = register_datapoint_mock.call_args[1]["local_image_id"]
# UUID4 has format: xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx (36 chars with hyphens)
assert (
len(local_image_id) == 36 and local_image_id.count("-") == 4
), "Expected UUID format to be used when image_name not provided"
return_strategy_credit_mock.assert_not_called()
Original file line number Diff line number Diff line change
Expand Up @@ -425,3 +425,119 @@ def test_run_sink_when_data_sampled(
* 3
), "Expected data registered"
assert register_datapoint_at_roboflow_mock.call_count == 3


@mock.patch.object(v2, "register_datapoint_at_roboflow")
def test_run_sink_with_image_name_parameter(
register_datapoint_at_roboflow_mock: MagicMock,
) -> None:
# given
background_tasks = BackgroundTasks()
cache = MemoryCache()
data_collector_block = RoboflowDatasetUploadBlockV2(
cache=cache,
api_key="my_api_key",
background_tasks=background_tasks,
thread_pool_executor=None,
)
image = WorkflowImageData(
parent_metadata=ImageParentMetadata(parent_id="parent"),
numpy_image=np.zeros((512, 256, 3), dtype=np.uint8),
)
register_datapoint_at_roboflow_mock.return_value = False, "OK"
indices = [(0,), (1,)]

# when
result = data_collector_block.run(
images=Batch(content=[image, image], indices=indices),
predictions=None,
target_project="my_project",
usage_quota_name="my_quota",
data_percentage=100.0,
persist_predictions=True,
minutely_usage_limit=10,
hourly_usage_limit=100,
daily_usage_limit=1000,
max_image_size=(128, 128),
compression_level=75,
registration_tags=["some"],
disable_sink=False,
fire_and_forget=False,
labeling_batch_prefix="my_batch",
labeling_batches_recreation_frequency="never",
image_name=Batch(content=["serial_001", "serial_002"], indices=indices),
)

# then
assert result == [
{"error_status": False, "message": "OK"},
{"error_status": False, "message": "OK"},
], "Expected data registered"
assert register_datapoint_at_roboflow_mock.call_count == 2

# Verify image_name was passed correctly
calls = register_datapoint_at_roboflow_mock.call_args_list
assert calls[0].kwargs["image_name"] == "serial_001"
assert calls[1].kwargs["image_name"] == "serial_002"


def test_manifest_parsing_with_image_name_field() -> None:
# given
raw_manifest = {
"type": "roboflow_core/roboflow_dataset_upload@v2",
"name": "some",
"images": "$inputs.image",
"predictions": None,
"target_project": "some1",
"usage_quota_name": "my_quota",
"data_percentage": 100.0,
"persist_predictions": True,
"minutely_usage_limit": 10,
"hourly_usage_limit": 100,
"daily_usage_limit": 1000,
"max_image_size": (100, 200),
"compression_level": 95,
"registration_tags": [],
"disable_sink": False,
"fire_and_forget": False,
"labeling_batch_prefix": "my_batch",
"labeling_batches_recreation_frequency": "never",
"image_name": "$inputs.filename",
}

# when
result = BlockManifest.model_validate(raw_manifest)

# then
assert result.image_name == "$inputs.filename"


def test_manifest_parsing_with_static_image_name() -> None:
# given
raw_manifest = {
"type": "roboflow_core/roboflow_dataset_upload@v2",
"name": "some",
"images": "$inputs.image",
"predictions": None,
"target_project": "some1",
"usage_quota_name": "my_quota",
"data_percentage": 100.0,
"persist_predictions": True,
"minutely_usage_limit": 10,
"hourly_usage_limit": 100,
"daily_usage_limit": 1000,
"max_image_size": (100, 200),
"compression_level": 95,
"registration_tags": [],
"disable_sink": False,
"fire_and_forget": False,
"labeling_batch_prefix": "my_batch",
"labeling_batches_recreation_frequency": "never",
"image_name": "my_static_image_name",
}

# when
result = BlockManifest.model_validate(raw_manifest)

# then
assert result.image_name == "my_static_image_name"