Skip to content

Commit a1cbc89

Browse files
committed
for gpkg files (added, updated) commit changes from wal file before starting upload transaction
1 parent a8a490b commit a1cbc89

File tree

7 files changed

+81
-30
lines changed

7 files changed

+81
-30
lines changed

mergin/client.py

Lines changed: 38 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,22 @@ def is_versioned_file(self, file):
134134
f_extension = os.path.splitext(file)[1]
135135
return f_extension in diff_extensions
136136

137+
def is_gpkg_open(self, path):
138+
"""
139+
Check whether geopackage file is open (and wal file exists)
140+
141+
:param path: absolute path of file on disk
142+
:type path: str
143+
:returns: whether file is open
144+
:rtype: bool
145+
"""
146+
f_extension = os.path.splitext(path)[1]
147+
if f_extension != '.gpkg':
148+
return False
149+
if os.path.exists(f'{path}-wal') and os.path.exists(f'{path}-shm'):
150+
return True
151+
return False
152+
137153
def ignore_file(self, file):
138154
"""
139155
Helper function for blacklisting certain types of files.
@@ -165,8 +181,6 @@ def inspect_files(self):
165181
for file in files:
166182
if self.ignore_file(file):
167183
continue
168-
if "gpkg" in file:
169-
do_sqlite_checkpoint(os.path.join(root, file))
170184

171185
abs_path = os.path.abspath(os.path.join(root, file))
172186
rel_path = os.path.relpath(abs_path, start=self.dir)
@@ -225,7 +239,8 @@ def compare_file_sets(self, origin, current):
225239
path = f["path"]
226240
if path not in origin_map:
227241
continue
228-
if f["checksum"] == origin_map[path]["checksum"]:
242+
# with open WAL files we don't know yet, better to mark file as updated
243+
if not self.is_gpkg_open(self.fpath(path)) and f["checksum"] == origin_map[path]["checksum"]:
229244
continue
230245
f["origin_checksum"] = origin_map[path]["checksum"]
231246
updated.append(f)
@@ -301,13 +316,12 @@ def get_push_changes(self):
301316
:rtype: dict
302317
"""
303318
changes = self.compare_file_sets(self.metadata['files'], self.inspect_files())
304-
# do checkpoint to push changes from wal file to gpkg if new file
305-
for file in changes['added']:
306-
if ".gpkg" in file["path"]:
307-
do_sqlite_checkpoint(self.fpath(file["path"]))
308-
file["checksum"] = generate_checksum(self.fpath(file["path"]))
309-
319+
# do checkpoint to push changes from wal file to gpkg
310320
for file in changes['added'] + changes['updated']:
321+
size, checksum = do_sqlite_checkpoint(self.fpath(file["path"]))
322+
if size and checksum:
323+
file["size"] = size
324+
file["checksum"] = checksum
311325
file['chunks'] = [str(uuid.uuid4()) for i in range(math.ceil(file["size"] / UPLOAD_CHUNK_SIZE))]
312326

313327
if not self.geodiff:
@@ -320,8 +334,9 @@ def get_push_changes(self):
320334
if not self.is_versioned_file(path):
321335
continue
322336

337+
# we use geodiff to check if we can push only diff files
323338
current_file = self.fpath(path)
324-
origin_file = self.fpath(path, self.meta_dir)
339+
origin_file = self.fpath_meta(path)
325340
diff_id = str(uuid.uuid4())
326341
diff_name = path + '-diff-' + diff_id
327342
diff_file = self.fpath_meta(diff_name)
@@ -341,11 +356,8 @@ def get_push_changes(self):
341356
else:
342357
not_updated.append(file)
343358
except (pygeodiff.GeoDiffLibError, pygeodiff.GeoDiffLibConflictError) as e:
344-
# do checkpoint to push changes from wal file to gpkg if create changeset failed
345-
do_sqlite_checkpoint(self.fpath(file["path"]))
346-
file["checksum"] = generate_checksum(self.fpath(file["path"]))
347-
file["size"] = os.path.getsize(self.fpath(file["path"]))
348-
file['chunks'] = [str(uuid.uuid4()) for i in range(math.ceil(file["size"] / UPLOAD_CHUNK_SIZE))]
359+
# changes from wal file already committed
360+
pass
349361

350362
changes['updated'] = [f for f in changes['updated'] if f not in not_updated]
351363
return changes
@@ -490,15 +502,16 @@ def apply_push_changes(self, changes):
490502
elif k == 'added':
491503
shutil.copy(self.fpath(path), basefile)
492504
elif k == 'updated':
493-
# in case for geopackage cannot be created diff
505+
# in case for geopackage cannot be created diff (e.g. forced update with committed changes from wal file)
494506
if "diff" not in item:
495-
continue
496-
# better to apply diff to previous basefile to avoid issues with geodiff tmp files
497-
changeset = self.fpath_meta(item['diff']['path'])
498-
patch_error = self.apply_diffs(basefile, [changeset])
499-
if patch_error:
500-
# in case of local sync issues it is safier to remove basefile, next time it will be downloaded from server
501-
os.remove(basefile)
507+
shutil.copy(self.fpath(path), basefile)
508+
else:
509+
# better to apply diff to previous basefile to avoid issues with geodiff tmp files
510+
changeset = self.fpath_meta(item['diff']['path'])
511+
patch_error = self.apply_diffs(basefile, [changeset])
512+
if patch_error:
513+
# in case of local sync issues it is safier to remove basefile, next time it will be downloaded from server
514+
os.remove(basefile)
502515
else:
503516
pass
504517

@@ -1090,6 +1103,7 @@ def _download_file(self, project_path, file, directory, parallel=True, diff_only
10901103
}
10911104
file_dir = os.path.dirname(os.path.normpath(os.path.join(directory, file['path'])))
10921105
basename = os.path.basename(file['diff']['path']) if diff_only else os.path.basename(file['path'])
1106+
expected_size = file['diff']['size'] if diff_only else file['size']
10931107

10941108
if file['size'] == 0:
10951109
os.makedirs(file_dir, exist_ok=True)
@@ -1130,7 +1144,7 @@ def download_file_part(part):
11301144
shutil.copyfileobj(chunk, final)
11311145
os.remove(file_part)
11321146

1133-
if os.path.getsize(os.path.join(file_dir, basename)) != file['size']:
1147+
if os.path.getsize(os.path.join(file_dir, basename)) != expected_size:
11341148
os.remove(os.path.join(file_dir, basename))
11351149
raise ClientError(f'Download of file {basename} failed. Please try it again.')
11361150

-32.2 KB
Binary file not shown.
96 KB
Binary file not shown.
32 KB
Binary file not shown.
8.08 KB
Binary file not shown.

mergin/test/test_client.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,11 +136,6 @@ def test_push_pull_changes(mc, parallel):
136136
f_updated = 'test3.txt'
137137
with open(os.path.join(project_dir, f_updated), 'w') as f:
138138
f.write('Modified')
139-
src_files = os.listdir(CHANGED_SCHEMA_DIR)
140-
for file_name in src_files:
141-
full_file_name = os.path.join(CHANGED_SCHEMA_DIR, file_name)
142-
if os.path.isfile(full_file_name):
143-
shutil.copy(full_file_name, project_dir)
144139

145140
# check changes before applied
146141
pull_changes, push_changes, _ = mc.project_status(project_dir)
@@ -342,3 +337,34 @@ def test_list_of_push_changes(mc):
342337
mc.project_status(project_dir)
343338

344339

340+
def test_force_gpkg_update(mc):
341+
test_project = 'test_force_update'
342+
project = API_USER + '/' + test_project
343+
project_dir = os.path.join(TMP_DIR, test_project) # primary project dir for updates
344+
345+
cleanup(mc, project, [project_dir])
346+
# create remote project
347+
shutil.copytree(TEST_DATA_DIR, project_dir)
348+
mc.create_project(test_project, project_dir)
349+
350+
# test push changes with force gpkg file upload:
351+
mp = MerginProject(project_dir)
352+
f_updated = 'base.gpkg'
353+
checksum = generate_checksum(mp.fpath(f_updated))
354+
355+
# base.gpkg updated to modified_schema (inserted new column)
356+
shutil.move(mp.fpath(f_updated), mp.fpath_meta(f_updated)) # make local copy for changeset calculation (which will fail)
357+
shutil.copy(os.path.join(CHANGED_SCHEMA_DIR, 'modified_schema.gpkg'), mp.fpath(f_updated))
358+
shutil.copy(os.path.join(CHANGED_SCHEMA_DIR, 'modified_schema.gpkg-wal'), mp.fpath(f_updated + '-wal'))
359+
shutil.copy(os.path.join(CHANGED_SCHEMA_DIR, 'modified_schema.gpkg-shm'), mp.fpath(f_updated + '-shm'))
360+
mc.push_project(project_dir)
361+
# by this point local file has been updated (changes committed from wal)
362+
updated_checksum = generate_checksum(mp.fpath(f_updated))
363+
assert checksum != updated_checksum
364+
365+
# check project after push
366+
project_info = mc.project_info(project)
367+
assert project_info['version'] == 'v2'
368+
f_remote = next((f for f in project_info['files'] if f['path'] == f_updated), None)
369+
assert f_remote['checksum'] == updated_checksum
370+
assert 'diff' not in f_remote

mergin/utils.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,23 @@ def int_version(version):
7171

7272
def do_sqlite_checkpoint(path):
7373
"""
74-
function to do checkpoint over the geopackage file which was not able to do diff file
74+
Function to do checkpoint over the geopackage file which was not able to do diff file.
75+
76+
:param path: file's absolute path on disk
77+
:type path: str
78+
:returns: new size and checksum of file after checkpoint
79+
:rtype: int, str
7580
"""
81+
new_size = None
82+
new_checksum = None
7683
if ".gpkg" in path and os.path.exists(f'{path}-wal') and os.path.exists(f'{path}-shm'):
7784
conn = sqlite3.connect(path)
7885
cursor = conn.cursor()
7986
cursor.execute("PRAGMA wal_checkpoint=FULL")
8087
cursor.execute("VACUUM")
8188
conn.commit()
8289
conn.close()
90+
new_size = os.path.getsize(path)
91+
new_checksum = generate_checksum(path)
92+
93+
return new_size, new_checksum

0 commit comments

Comments
 (0)