Skip to content

Commit 493f222

Browse files
Add match parameter to upsert method with prevent_create and prevent_update modes
Co-authored-by: MarcusRisanger <69350948+MarcusRisanger@users.noreply.github.com>
1 parent e6003ad commit 493f222

3 files changed

Lines changed: 133 additions & 3 deletions

File tree

dataverse_api/entity.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -699,17 +699,29 @@ def delete_columns(
699699
raise DataverseModeError(mode, "individual", "batch")
700700

701701
def __upsert_singles(
702-
self, data: Collection[Mapping[str, Any]], keys: Iterable[str], is_primary_id: bool, threading: bool
702+
self,
703+
data: Collection[Mapping[str, Any]],
704+
keys: Iterable[str],
705+
is_primary_id: bool,
706+
threading: bool,
707+
match: Literal["prevent_create", "prevent_update"] | None = None,
703708
) -> list[requests.Response]:
704709
"""
705710
Upsert row by individual requests.
706711
"""
707712
check_altkey_support(keys=keys, data=data)
713+
headers: dict[str, str] | None = None
714+
if match == "prevent_create":
715+
headers = {"If-Match": "*"}
716+
elif match == "prevent_update":
717+
headers = {"If-None-Match": "*"}
718+
708719
calls = [
709720
APICommand(
710721
method=RequestMethod.PATCH,
711722
url=f"{self.entity_set_name}({key})",
712723
json=payload,
724+
headers=headers,
713725
)
714726
for key, payload in transform_upsert_data(data=data, keys=keys, is_primary_id=is_primary_id)
715727
]
@@ -725,6 +737,7 @@ def upsert(
725737
mode: Literal["individual"] = "individual",
726738
altkey_name: str | None = None,
727739
threading: bool = False,
740+
match: Literal["prevent_create", "prevent_update"] | None = None,
728741
) -> list[requests.Response]: ...
729742

730743
@overload
@@ -736,6 +749,7 @@ def upsert(
736749
altkey_name: str | None = None,
737750
threading: bool = False,
738751
batch_size: int | None = None,
752+
match: Literal["prevent_create", "prevent_update"] | None = None,
739753
) -> list[requests.Response]: ...
740754

741755
def upsert(
@@ -746,6 +760,7 @@ def upsert(
746760
altkey_name: str | None = None,
747761
threading: bool = False,
748762
batch_size: int | None = None,
763+
match: Literal["prevent_create", "prevent_update"] | None = None,
749764
) -> list[requests.Response]:
750765
"""
751766
Upsert data into Entity.
@@ -764,6 +779,11 @@ def upsert(
764779
batch_size : int
765780
Optional override if batch mode is specified, useful for tuning workloads
766781
if 429s or timeouts occur.
782+
match : Literal["prevent_create", "prevent_update"] | None
783+
Controls upsert behavior using If-Match headers:
784+
- None (default): Standard upsert behavior (create or update)
785+
- "prevent_create": Only update existing records (If-Match: *)
786+
- "prevent_update": Only create new records (If-None-Match: *)
767787
"""
768788
if altkey_name is not None:
769789
try:
@@ -782,7 +802,9 @@ def upsert(
782802

783803
if mode == "individual":
784804
logging.debug("%d rows to upsert. Using individual upserts.", len(data))
785-
return self.__upsert_singles(data=data, keys=key_columns, is_primary_id=is_primary_id, threading=threading)
805+
return self.__upsert_singles(
806+
data=data, keys=key_columns, is_primary_id=is_primary_id, threading=threading, match=match
807+
)
786808

787809
if mode == "batch":
788810
logging.debug("%d rows to upsert. Using batch upserts.", len(data))
@@ -791,6 +813,7 @@ def upsert(
791813
data=data,
792814
keys=key_columns,
793815
is_primary_id=is_primary_id,
816+
match=match,
794817
)
795818
return self._batch_api_call(
796819
batch_commands=batch_commands,

dataverse_api/utils/batching.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from dataclasses import dataclass, field
55
from enum import StrEnum
66
from textwrap import dedent
7-
from typing import Any, Collection, Generator, Mapping, MutableMapping, TypeVar
7+
from typing import Any, Collection, Generator, Literal, Mapping, MutableMapping, TypeVar
88
from urllib.parse import urljoin
99

1010
from dataverse_api.errors import DataverseError
@@ -209,6 +209,7 @@ def transform_to_batch_for_upsert(
209209
data: Collection[MutableMapping[str, Any]],
210210
keys: Iterable[str],
211211
is_primary_id: bool = False,
212+
match: Literal["prevent_create", "prevent_update"] | None = None,
212213
) -> list[BatchCommand]:
213214
"""
214215
Transform data payload to upsert batch data.
@@ -223,15 +224,27 @@ def transform_to_batch_for_upsert(
223224
The keys used to identify unique rows in the dataset.
224225
is_id : bool
225226
Whether the supplied singular key is the Entity primary ID attribute.
227+
match : Literal["prevent_create", "prevent_update"] | None
228+
Controls upsert behavior using If-Match headers:
229+
- None (default): Standard upsert behavior (create or update)
230+
- "prevent_create": Only update existing records (If-Match: *)
231+
- "prevent_update": Only create new records (If-None-Match: *)
226232
"""
227233
check_altkey_support(keys=keys, data=data)
234+
headers: dict[str, str] | None = None
235+
if match == "prevent_create":
236+
headers = {"If-Match": "*"}
237+
elif match == "prevent_update":
238+
headers = {"If-None-Match": "*"}
239+
228240
commands = []
229241
for keys, payload in transform_upsert_data(data, keys, is_primary_id):
230242
commands.append(
231243
BatchCommand(
232244
url=f"{url}({keys})",
233245
method=RequestMethod.PATCH,
234246
data=payload,
247+
headers=headers,
235248
)
236249
)
237250

tests/test_entity.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,100 @@ def test_entity_upsert_bad_altkey(entity: DataverseEntity):
716716
entity.upsert([{"data": 1}], altkey_name="foo")
717717

718718

719+
def test_entity_upsert_individual_prevent_create(
720+
entity: DataverseEntity,
721+
primary_id: str,
722+
mocked_responses: responses.RequestsMock,
723+
):
724+
"""Test upsert with prevent_create (If-Match: *) - only update existing records."""
725+
# Setup
726+
data = [{primary_id: str(uuid4()), "test_val": random.randint(1, 10)} for _ in range(4)]
727+
728+
for row in data:
729+
id = row[primary_id]
730+
payload = {k: v for k, v in row.items() if k != primary_id}
731+
732+
mocked_responses.patch(
733+
url=f"{entity._endpoint}{entity.entity_set_name}({id})",
734+
match=[json_params_matcher(payload), header_matcher({"If-Match": "*"})],
735+
status=204,
736+
)
737+
738+
resp = entity.upsert(data, mode="individual", match="prevent_create")
739+
740+
for row in resp:
741+
assert row.status_code == 204
742+
743+
744+
def test_entity_upsert_individual_prevent_update(
745+
entity: DataverseEntity,
746+
primary_id: str,
747+
mocked_responses: responses.RequestsMock,
748+
):
749+
"""Test upsert with prevent_update (If-None-Match: *) - only create new records."""
750+
# Setup
751+
data = [{primary_id: str(uuid4()), "test_val": random.randint(1, 10)} for _ in range(4)]
752+
753+
for row in data:
754+
id = row[primary_id]
755+
payload = {k: v for k, v in row.items() if k != primary_id}
756+
757+
mocked_responses.patch(
758+
url=f"{entity._endpoint}{entity.entity_set_name}({id})",
759+
match=[json_params_matcher(payload), header_matcher({"If-None-Match": "*"})],
760+
status=204,
761+
)
762+
763+
resp = entity.upsert(data, mode="individual", match="prevent_update")
764+
765+
for row in resp:
766+
assert row.status_code == 204
767+
768+
769+
def test_entity_upsert_batch_prevent_create(
770+
entity: DataverseEntity,
771+
primary_id: str,
772+
mocked_responses: responses.RequestsMock,
773+
):
774+
"""Test batch upsert with prevent_create (If-Match: *) - only update existing records."""
775+
# Setup
776+
data = [{primary_id: str(uuid4()), "test_val": random.randint(1, 10)} for _ in range(4)]
777+
778+
mocked_responses.post(url=f"{entity._endpoint}$batch")
779+
780+
resp = entity.upsert(data=data, mode="batch", match="prevent_create")
781+
782+
assert isinstance(resp[0].request.body, str) # type checking
783+
elements = resp[0].request.body.split("--batch")[1:-1]
784+
785+
for out, expected in zip(elements, data):
786+
assert f"{entity.entity_set_name}({expected.pop(primary_id)})" in out
787+
assert "If-Match: *" in out
788+
assert serialize_json(expected) in out
789+
790+
791+
def test_entity_upsert_batch_prevent_update(
792+
entity: DataverseEntity,
793+
primary_id: str,
794+
mocked_responses: responses.RequestsMock,
795+
):
796+
"""Test batch upsert with prevent_update (If-None-Match: *) - only create new records."""
797+
# Setup
798+
data = [{primary_id: str(uuid4()), "test_val": random.randint(1, 10)} for _ in range(4)]
799+
800+
mocked_responses.post(url=f"{entity._endpoint}$batch")
801+
802+
resp = entity.upsert(data=data, mode="batch", match="prevent_update")
803+
804+
assert isinstance(resp[0].request.body, str) # type checking
805+
elements = resp[0].request.body.split("--batch")[1:-1]
806+
807+
for out, expected in zip(elements, data):
808+
assert f"{entity.entity_set_name}({expected.pop(primary_id)})" in out
809+
assert "If-None-Match: *" in out
810+
assert serialize_json(expected) in out
811+
812+
719813
def test_entity_upsert_pandas_dataframe(
720814
entity: DataverseEntity, mocked_responses: responses.RequestsMock, primary_id: str
721815
):

0 commit comments

Comments
 (0)