-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathHDF5.hpp
More file actions
2171 lines (2041 loc) · 74.1 KB
/
HDF5.hpp
File metadata and controls
2171 lines (2041 loc) · 74.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/**
* @file HDF5.hpp
*
* @brief Custom HDF5 wrapper.
*
* We encapsulate most HDF5 calls in more intuitive functions and hide all the
* error handling. And we use templates to facilitate some of the data type
* handling.
*
* @author Bert Vandenbroucke (vandenbroucke@strw.leidenuniv.nl)
*/
#ifndef RS_HDF5_HPP
#define RS_HDF5_HPP
#include "Error.hpp"
#include <algorithm>
#include <cstring>
#include <hdf5.h>
#include <sstream>
#include <string>
#include <vector>
/*! @brief In principle, using the HDF5 core driver for file operations should
* make read and write operations faster, since the entire file is stored in
* memory. In practice, using the core driver for large files seems to result
* in weird errors, so it is better to not use this. */
//#define HDF5_USE_CORE_DRIVER
namespace HDF5Datatypes {
/**
* @brief Get the HDF5 data type corresponding to the template typename.
*
* This template function needs to be specialized for every typename that is
* used.
*
* @return hid_t handle for the corresponding HDF5 data type.
*/
template <typename _datatype_> inline hid_t get_datatype_name();
/**
* @brief get_datatype_name specialization for a double precision floating point
* value.
*
* @return H5T_NATIVE_DOUBLE.
*/
template <> inline hid_t get_datatype_name<double>() {
return H5T_NATIVE_DOUBLE;
}
/**
* @brief get_datatype_name specialization for a single precision floating point
* value.
*
* @return H5T_NATIVE_FLOAT.
*/
template <> inline hid_t get_datatype_name<float>() { return H5T_NATIVE_FLOAT; }
/**
* @brief get_datatype_name specialization for a 32 bit unsigned integer.
*
* @return H5T_NATIVE_UINT32.
*/
template <> inline hid_t get_datatype_name<uint32_t>() {
return H5T_NATIVE_UINT32;
}
/**
* @brief get_datatype_name specialization for a 64 bit unsigned integer.
*
* @return H5T_NATIVE_UINT64.
*/
template <> inline hid_t get_datatype_name<uint64_t>() {
return H5T_NATIVE_UINT64;
}
/**
* @brief get_datatype_name specialization for a 32 bit signed integer.
*
* @return H5T_NATIVE_INT32.
*/
template <> inline hid_t get_datatype_name<int32_t>() {
return H5T_NATIVE_INT32;
}
/**
* @brief get_datatype_name specialization for a 64 bit signed integer.
*
* @return H5T_NATIVE_INT64.
*/
template <> inline hid_t get_datatype_name<int64_t>() {
return H5T_NATIVE_INT64;
}
} // namespace HDF5Datatypes
namespace HDF5 {
/*! @brief More convenient name for a HDF5 file or group handle. */
typedef hid_t HDF5FileOrGroup;
/*! @brief More convenient name for a HDF5 dataset handle. */
typedef hid_t HDF5Dataset;
/**
* @brief Possible HDF5 file open modes.
*/
enum HDF5FileMode {
/*! @brief Open the file for reading only. */
HDF5FileModeRead,
/*! @brief Open the file for writing, overwriting existing files. */
HDF5FileModeWrite,
/*! @brief Open the file for appending: the file is assumed to already exist,
* but is opened with write permissions. */
HDF5FileModeAppend
};
/**
* @brief Initialise our HDF5 wrapper.
*
* Should be called before any other HDF5 wrapper function is used.
*
* No harm is done by not calling this function, it simply overwrites the
* default error handler, so that we can display more useful HDF5 error
* messages.
*/
inline void StartHDF5() {
const herr_t hdf5status = H5Eset_auto(H5E_DEFAULT, nullptr, nullptr);
if (hdf5status < 0) {
my_error("Unable to turn off default HDF5 error handling!");
}
}
/**
* @brief Open an HDF5 file.
*
* The file should be closed using CloseFile() when it is no longer needed.
*
* @param filename Name of the file.
* @param mode Mode in which the file is opened.
* @return File handle that should be used for future operations that involve
* the file.
*/
inline HDF5FileOrGroup OpenFile(const std::string filename,
const HDF5FileMode mode) {
hid_t file;
#ifdef HDF5_USE_CORE_DRIVER
// open the file using the HDF5 core driver, with a default memory incrememt
// size of 500k bytes
const hid_t props = H5Pcreate(H5P_FILE_ACCESS);
herr_t hdf5status = H5Pset_fapl_core(props, 500000, 1);
#else
// use default file open properties
const hid_t props = H5P_DEFAULT;
#endif
switch (mode) {
case HDF5FileModeRead:
// open the file for reading
file = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, props);
break;
case HDF5FileModeWrite:
// create a new file, overwriting existing onces
file = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, props);
break;
case HDF5FileModeAppend:
// open a file with read-write access
file = H5Fopen(filename.c_str(), H5F_ACC_RDWR, props);
break;
default:
my_error("Unknown file mode!");
file = 0;
break;
}
if (file < 0) {
my_error("Unable to open file \"%s\"!", filename.c_str());
}
#ifdef HDF5_USE_CORE_DRIVER
hdf5status = H5Pclose(props);
if (hdf5status < 0) {
my_error("Error closing file access properties!");
}
#endif
return file;
}
/**
* @brief Close an HDF5 file.
*
* After calling this function, subsequent operations that use the file handle
* will result in errors.
*
* @param file File handle.
*/
inline void CloseFile(const HDF5FileOrGroup file) {
const herr_t hdf5status = H5Fclose(file);
if (hdf5status < 0) {
my_error("Unable to close file!");
}
}
/**
* @brief Open a group within an HDF5 file.
*
* The group should be closed using CloseGroup() when it is no longer needed.
*
* @param file File handle.
* @param name Name of the group.
* @return Group handle that should be used for all future operations that
* involve this group.
*/
inline HDF5FileOrGroup OpenGroup(const HDF5FileOrGroup file,
const std::string name) {
const hid_t group = H5Gopen(file, name.c_str(), H5P_DEFAULT);
if (group < 0) {
my_error("Error opening \"%s\" group!", name.c_str());
}
return group;
}
/**
* @brief Create a group within an HDF5 file.
*
* The group should be closed using CloseGroup() when it is no longer needed.
*
* @param file File handle.
* @param name Name of the group.
* @return Group handle that should be used for all future operations that
* involve this group.
*/
inline HDF5FileOrGroup CreateGroup(const HDF5FileOrGroup file,
const std::string name) {
const hid_t group =
H5Gcreate(file, name.c_str(), H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
if (group < 0) {
my_error("Error creating \"%s\" group!", name.c_str());
}
return group;
}
/**
* @brief Close an HDF5 group.
*
* After this function has been called, subsequent operations involving the
* group handle will result in errors.
*
* @param group Group handle.
*/
inline void CloseGroup(const HDF5FileOrGroup group) {
const herr_t hdf5status = H5Gclose(group);
if (hdf5status < 0) {
my_error("Error closing header group!");
}
}
inline uint64_t GetDatasetSize(const HDF5FileOrGroup file,
const std::string name) {
const hid_t dset = H5Dopen(file, name.c_str(), H5P_DEFAULT);
if (dset < 0) {
my_error("Unable to open dataset \"%s\"!", name.c_str());
}
const hid_t space = H5Dget_space(dset);
if (space < 0) {
my_error("Unable to obtain data space for dataset \"%s\"!", name.c_str());
}
hsize_t dims[2];
const int ndim = H5Sget_simple_extent_dims(space, dims, nullptr);
if (ndim != 1) {
my_error("Expected dataset \"%s\" to be 1D!", name.c_str());
}
herr_t hdf5status = H5Sclose(space);
if (hdf5status < 0) {
my_error("Error closing data space for dataset \"%s\"!", name.c_str());
}
hdf5status = H5Dclose(dset);
if (hdf5status < 0) {
my_error("Error closing dataset \"%s\"!", name.c_str());
}
return dims[0];
}
/**
* @brief Read a single value from an HDF5 dataset.
*
* @tparam _type_ Type of the value that is to be read.
* @param file File or group handle that contains the dataset.
* @param name Name of the dataset.
* @return Data value that was read.
*/
template <typename _type_>
inline _type_ ReadSingleValueDataset(const HDF5FileOrGroup file,
const std::string name) {
// get the appropriate HDF5 type for _type_
const hid_t dtype = HDF5Datatypes::get_datatype_name<_type_>();
// open the dataset
const hid_t dataset = H5Dopen(file, name.c_str(), H5P_DEFAULT);
if (dataset < 0) {
my_error("Unable to open dataset \"%s\"!", name.c_str());
}
// read the (single) value
// we use the () constructor to deal with non-standard types (which will
// likely cause other problems, but hey)
_type_ value(0);
herr_t hdf5status =
H5Dread(dataset, dtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &value);
if (hdf5status < 0) {
my_error("Error reading dataset \"%s\"!", name.c_str());
}
hdf5status = H5Dclose(dataset);
if (hdf5status < 0) {
my_error("Error closing dataset!");
}
return value;
}
/**
* @brief Read an entire HDF5 dataset into a 1D vector.
*
* Note that this function works regardless of the dimensions of the dataset.
* The dataset will be flattened into a 1D vector in standard C order, i.e. for
* a 2D dataset, the output vector will contain all columns of the first row,
* followed by all columns of the second row and so on. For example, if you
* read the Coordinates dataset, the resulting vector will contain
* x1, y1, z1, x2, y2, z2...
*
* @tparam _type_ Type of the data values.
* @param file File or group handle that contains the dataset.
* @param name Name of the dataset.
* @param value Pre-allocated vector of at least the right size to hold the
* data.
*/
template <typename _type_>
inline void ReadEntireDataset(const HDF5FileOrGroup file,
const std::string name,
std::vector<_type_> &value) {
// get the HDF5 data type for _type_
const hid_t dtype = HDF5Datatypes::get_datatype_name<_type_>();
// open the dataset
const hid_t dataset = H5Dopen(file, name.c_str(), H5P_DEFAULT);
if (dataset < 0) {
my_error("Unable to open dataset \"%s\"!", name.c_str());
}
// read the data
// we assume the given vector is large enough to hold the data
herr_t hdf5status =
H5Dread(dataset, dtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &value[0]);
if (hdf5status < 0) {
my_error("Error reading dataset \"%s\"!", name.c_str());
}
hdf5status = H5Dclose(dataset);
if (hdf5status < 0) {
my_error("Error closing dataset!");
}
}
/**
* @brief Read part of an HDF5 dataset into a 1D vector.
*
* @tparam _type_ Type of the data values.
* @param file File or group handle that contains the dataset.
* @param name Name of the dataset.
* @param offset Start of the chunk we want to read, as a number of elements
* from the start of the data array.
* @param size Number of elements to read.
* @param value Pre-allocated vector of at least the right size to hold the
* result, i.e. size.
*/
template <typename _type_>
inline void ReadPartialDataset(const HDF5FileOrGroup file,
const std::string name, const size_t offset,
const size_t size, std::vector<_type_> &value) {
// get the HDF5 data type for _type_
const hid_t dtype = HDF5Datatypes::get_datatype_name<_type_>();
// open the dataset
const hid_t dataset = H5Dopen(file, name.c_str(), H5P_DEFAULT);
if (dataset < 0) {
my_error("Unable to open dataset!");
}
// select a hyperslab within the dataset starting from the given offset and
// with the given size
const hid_t filespace = H5Dget_space(dataset);
if (filespace < 0) {
my_error("Could not access file space!");
}
const hsize_t dims[1] = {size};
const hsize_t offs[1] = {offset};
herr_t hdf5status = H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offs,
nullptr, dims, nullptr);
if (hdf5status < 0) {
my_error("Failed to select hyperslab!");
}
// create a memory space of the right dimensions to process the chunk
const hid_t memspace = H5Screate_simple(1, dims, nullptr);
if (memspace < 0) {
my_error("Failed to allocate memory space!");
}
// now read the dataset
hdf5status =
H5Dread(dataset, dtype, memspace, filespace, H5P_DEFAULT, &value[0]);
if (hdf5status < 0) {
my_error("Error reading partial dataset \"%s\"!", name.c_str());
}
hdf5status = H5Sclose(memspace);
if (hdf5status < 0) {
my_error("Failed to close memory space!");
}
hdf5status = H5Sclose(filespace);
if (hdf5status < 0) {
my_error("Failed to close file space!");
}
hdf5status = H5Dclose(dataset);
if (hdf5status < 0) {
my_error("Error closing dataset!");
}
}
/**
* @brief Read part of a 3D HDF5 dataset into a 1D vector.
*
* 3D in this case means a 2D dataset with 3 columns per row, i.e. of shape
* (N, 3). Examples are the Coordinates and Velocities datasets in a SWIFT
* snapshot.
*
* The resulting array will contain the data values in standard C order, i.e.
* first all columns for the first row, then all columns for the second row and
* so on. For the coordinates for example, this means
* x1, y1, z1, x2, y2, z2...
*
* @tparam _type_ Type of the data values.
* @param file File or group handle that contains the dataset.
* @param name Name of the dataset.
* @param offset Start of the chunk we want to read, as a number of rows from
* the start of the data array.
* @param size Number of rows to read.
* @param value Pre-allocated vector of at least the right size to hold the
* result, i.e. 3 * size.
*/
template <typename _type_>
inline void ReadPartial3DDataset(const HDF5FileOrGroup file,
const std::string name, const size_t offset,
const size_t size,
std::vector<_type_> &value) {
// get the HDF5 data type for _type_
const hid_t dtype = HDF5Datatypes::get_datatype_name<_type_>();
// open the dataset
const hid_t dataset = H5Dopen(file, name.c_str(), H5P_DEFAULT);
if (dataset < 0) {
my_error("Unable to open dataset!");
}
// select the hyperslab within the dataset that we want to read
const hid_t filespace = H5Dget_space(dataset);
if (filespace < 0) {
my_error("Could not access file space!");
}
const hsize_t dims[2] = {size, 3};
const hsize_t offs[2] = {offset, 0};
herr_t hdf5status = H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offs,
nullptr, dims, nullptr);
if (hdf5status < 0) {
my_error("Failed to select hyperslab!");
}
// create a memory space of the same dimensions to process the data
const hid_t memspace = H5Screate_simple(2, dims, nullptr);
if (memspace < 0) {
my_error("Failed to allocate memory space!");
}
// now read the data
hdf5status =
H5Dread(dataset, dtype, memspace, filespace, H5P_DEFAULT, &value[0]);
if (hdf5status < 0) {
my_error("Error reading partial dataset \"%s\"!", name.c_str());
}
hdf5status = H5Sclose(memspace);
if (hdf5status < 0) {
my_error("Failed to close memory space!");
}
hdf5status = H5Sclose(filespace);
if (hdf5status < 0) {
my_error("Failed to close file space!");
}
hdf5status = H5Dclose(dataset);
if (hdf5status < 0) {
my_error("Error closing dataset!");
}
}
/**
* @brief Auxiliary struct to facilitate working with chunks in an HDF5
* dataset.
*/
struct HDF5Chunk {
/*! @brief Offset of the chunk. */
size_t offset;
/*! @brief Size of the chunk. */
size_t size;
};
/**
* @brief Read a partial 1D HDF5 dataset based on a list of chunks.
*
* Note that while the chunks can be passed on in arbitrary order, the HDF5
* library will automatically sort the resulting values based on order in the
* file. This makes sense if you think about the chunks as being selection
* regions: you can tell HDF5 to select parts of the dataset in an arbitrary
* order, but the entire selection will still have a pre-defined order and
* that order determines the order of the values that are read.
*
* Since it would be too inefficient to sort the data after reading, the
* caller needs to deal with a possible mismatch between chunk and data order.
*
* @tparam _type_ Type of the data values.
* @param file File or group handle that contains the dataset.
* @param name Name of the dataset.
* @param chunks List of chunks to read (in arbitrary order).
* @param value Pre-allocated vector of at least the right size to hold the
* result, i.e. the sum of all chunk sizes.
*/
template <typename _type_>
inline void ReadPartialDataset(const HDF5FileOrGroup file,
const std::string name,
const std::vector<struct HDF5Chunk> &chunks,
std::vector<_type_> &value) {
// get the HDF5 data type for _type_
const hid_t dtype = HDF5Datatypes::get_datatype_name<_type_>();
// open the dataset
const hid_t dataset = H5Dopen(file, name.c_str(), H5P_DEFAULT);
if (dataset < 0) {
my_error("Unable to open dataset!");
}
// open the data space, i.e. the HDF5 abstraction of the data array
const hid_t filespace = H5Dget_space(dataset);
if (filespace < 0) {
my_error("Could not access file space!");
}
// now select hyperslabs within the data space corresponding to the chunks
// we want to read
// we start by selecting the first chunk (which should always exist)
// we then add the other chunks to the selection using a logical or
hsize_t dims[1] = {chunks[0].size};
hsize_t offs[1] = {chunks[0].offset};
// we need to determine the total size that will be read
hsize_t total_size = chunks[0].size;
herr_t hdf5status = H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offs,
nullptr, dims, nullptr);
if (hdf5status < 0) {
my_error("Failed to select hyperslab!");
}
for (size_t i = 1; i < chunks.size(); ++i) {
dims[0] = chunks[i].size;
offs[0] = chunks[i].offset;
total_size += chunks[i].size;
hdf5status = H5Sselect_hyperslab(filespace, H5S_SELECT_OR, offs, nullptr,
dims, nullptr);
if (hdf5status < 0) {
my_error("Failed to select hyperslab!");
}
}
my_assert(total_size == value.size(), "Size mismatch!");
dims[0] = total_size;
// create a memory space that is large enough to read the data
const hid_t memspace = H5Screate_simple(1, dims, nullptr);
if (memspace < 0) {
my_error("Failed to allocate memory space!");
}
// now read the data
hdf5status =
H5Dread(dataset, dtype, memspace, filespace, H5P_DEFAULT, &value[0]);
if (hdf5status < 0) {
my_error("Error reading partial dataset \"%s\"!", name.c_str());
}
hdf5status = H5Sclose(memspace);
if (hdf5status < 0) {
my_error("Failed to close memory space!");
}
hdf5status = H5Sclose(filespace);
if (hdf5status < 0) {
my_error("Failed to close file space!");
}
hdf5status = H5Dclose(dataset);
if (hdf5status < 0) {
my_error("Error closing dataset!");
}
}
/**
* @brief Read a partial 3D HDF5 dataset based on a list of chunks.
*
* 3D in this case means a 2D dataset with 3 columns per row, i.e. of shape
* (N, 3). Examples are the Coordinates and Velocities datasets in a SWIFT
* snapshot.
*
* The resulting array will contain the data values in standard C order, i.e.
* first all columns for the first row, then all columns for the second row and
* so on. For the coordinates for example, this means
* x1, y1, z1, x2, y2, z2...
*
* Note that while the chunks can be passed on in arbitrary order, the HDF5
* library will automatically sort the resulting values based on order in the
* file. This makes sense if you think about the chunks as being selection
* regions: you can tell HDF5 to select parts of the dataset in an arbitrary
* order, but the entire selection will still have a pre-defined order and
* that order determines the order of the values that are read.
*
* Since it would be too inefficient to sort the data after reading, the
* caller needs to deal with a possible mismatch between chunk and data order.
*
* @tparam _type_ Type of the data values.
* @param file File or group handle that contains the dataset.
* @param name Name of the dataset.
* @param chunks List of chunks to read (in arbitrary order).
* @param value Pre-allocated vector of at least the right size to hold the
* result, i.e. the sum of all chunk sizes.
*/
template <typename _type_>
inline void ReadPartial3DDataset(const HDF5FileOrGroup file,
const std::string name,
const std::vector<struct HDF5Chunk> &chunks,
std::vector<_type_> &value) {
// get the HDF5 data type for _type_
const hid_t dtype = HDF5Datatypes::get_datatype_name<_type_>();
// open the dataset
const hid_t dataset = H5Dopen(file, name.c_str(), H5P_DEFAULT);
if (dataset < 0) {
my_error("Unable to open dataset!");
}
// open the data space
const hid_t filespace = H5Dget_space(dataset);
if (filespace < 0) {
my_error("Could not access file space!");
}
// now select the chunks as hyperslabs, starting from the first chunk (which
// is assumed to always exist)
hsize_t dims[2] = {chunks[0].size, 3};
hsize_t offs[2] = {chunks[0].offset, 0};
// accumulate the total size of all chunks
hsize_t total_size = chunks[0].size;
herr_t hdf5status = H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offs,
nullptr, dims, nullptr);
if (hdf5status < 0) {
my_error("Failed to select hyperslab!");
}
// add additional chunks using logical or
for (size_t i = 1; i < chunks.size(); ++i) {
dims[0] = chunks[i].size;
offs[0] = chunks[i].offset;
total_size += chunks[i].size;
hdf5status = H5Sselect_hyperslab(filespace, H5S_SELECT_OR, offs, nullptr,
dims, nullptr);
if (hdf5status < 0) {
my_error("Failed to select hyperslab!");
}
}
my_assert(3 * total_size == value.size(), "Size mismatch!");
dims[0] = total_size;
// create a memory space large enough to deal with all chunks
const hid_t memspace = H5Screate_simple(2, dims, nullptr);
if (memspace < 0) {
my_error("Failed to allocate memory space!");
}
// now read the data
hdf5status =
H5Dread(dataset, dtype, memspace, filespace, H5P_DEFAULT, &value[0]);
if (hdf5status < 0) {
my_error("Error reading partial dataset \"%s\"!", name.c_str());
}
hdf5status = H5Sclose(memspace);
if (hdf5status < 0) {
my_error("Failed to close memory space!");
}
hdf5status = H5Sclose(filespace);
if (hdf5status < 0) {
my_error("Failed to close file space!");
}
hdf5status = H5Dclose(dataset);
if (hdf5status < 0) {
my_error("Error closing dataset!");
}
}
/**
* @brief Read an HDF5 attribute.
*
* @tparam _type_ Attribute data type.
* @param group Group or dataset handle that contains the attribute.
* @param name Name of the attribute.
* @param value Pointer to an array that is large enough to store the attribute
* value(s).
*/
template <typename _type_>
inline void ReadArrayAttribute(const HDF5FileOrGroup group,
const std::string name, _type_ *value) {
// determine the HDF5 data type for _type_
const hid_t dtype = HDF5Datatypes::get_datatype_name<_type_>();
// open the attribute
const hid_t attr = H5Aopen(group, name.c_str(), H5P_DEFAULT);
if (attr < 0) {
my_error("Error opening attribute \"%s\"!", name.c_str());
}
// read the attribute
herr_t hdf5status = H5Aread(attr, dtype, value);
if (hdf5status < 0) {
my_error("Error reading attribute \"%s\"!", name.c_str());
}
hdf5status = H5Aclose(attr);
if (hdf5status < 0) {
my_error("Error closing attribute!");
}
}
/**
* @brief Replace an HDF5 attribute with new values.
*
* @tparam _type_ Attribute data type.
* @param group Group or dataset handle that contains the attribute.
* @param name Name of the attribute.
* @param value Pointer to an array with new values, at least large enough to
* have values for all elements in the attribute array.
*/
template <typename _type_>
inline void ReplaceArrayAttribute(const HDF5FileOrGroup group,
const std::string name, const _type_ *value) {
// determine the HDF5 type for _type_
const hid_t dtype = HDF5Datatypes::get_datatype_name<_type_>();
// open the attribute
const hid_t attr = H5Aopen(group, name.c_str(), H5P_DEFAULT);
if (attr < 0) {
my_error("Error opening attribute \"%s\"!", name.c_str());
}
// write new attribute values
herr_t hdf5status = H5Awrite(attr, dtype, value);
if (hdf5status < 0) {
my_error("Error writing attribute \"%s\"!", name.c_str());
}
hdf5status = H5Aclose(attr);
if (hdf5status < 0) {
my_error("Error closing attribute!");
}
}
/**
* @brief Auxiliary class used in CopyEverythingExcept() and HDF5CopyLink().
*
* This class holds information that cannot be passed on to HDF5CopyLink()
* directly and is instead passed on using a standard C void pointer
* technique.
*/
class HDF5CopyData {
private:
/*! @brief File handle of the destination file to which data is being
* copied. */
const HDF5FileOrGroup _output_file;
/*! @brief List of objects that should not be copied over. */
const std::vector<std::string> &_blacklist;
public:
/**
* @brief Constructor.
*
* @param output_file Destination file handle.
* @param blacklist List of objects that should not be copied over.
*/
inline HDF5CopyData(const HDF5FileOrGroup output_file,
const std::vector<std::string> &blacklist)
: _output_file(output_file), _blacklist(blacklist) {}
/**
* @brief Get the destination file handle.
*
* @return Destination file handle.
*/
inline HDF5FileOrGroup get_output_file() const { return _output_file; }
/**
* @brief Check if the given object should be copied over.
*
* @param link_name Name of an object in the HDF5 file.
* @return True if the object should be copied over.
*/
inline bool is_blacklisted(const std::string link_name) const {
return std::count(_blacklist.begin(), _blacklist.end(), link_name) > 0;
}
};
/**
* @brief Recursive HDF5 copy function, called using H5Literate() from
* CopyEverythingExcept().
*
* @param group_id File or group handle of the source file we are copying
* from.
* @param link_name Name of the object currently being looked at by
* H5Literate().
* @param link_info (Mostly useless) additional information about the object
* currently being looked at.
* @param extra_data Additional information passed on to the function, i.e. our
* void* casted HDF5CopyData object.
* @return H5Literate() compatible return value: 0 if the object was processed
* correctly and we want to proceed with the next iteration, -1 if there was an
* error and we want to abort, 1 if we want to stop the iteration early (which
* we do not use here).
*/
inline herr_t HDF5CopyLink(hid_t group_id, const char *link_name,
const H5L_info_t *link_info, void *extra_data) {
// recover the HDF5CopyData object
HDF5CopyData *data = reinterpret_cast<HDF5CopyData *>(extra_data);
// check if action is required for this object
if (data->is_blacklisted(link_name)) {
my_statusmessage("Not copying blacklisted group \"%s\".", link_name);
return 0;
}
my_statusmessage("Copying \"%s\"...", link_name);
herr_t hdf5status;
// copy over the object depending on its type
if (link_info->type == H5L_TYPE_SOFT) {
my_statusmessage("Soft link: soft copy.");
// a soft link is copied as a link
hdf5status = H5Lcopy(group_id, link_name, data->get_output_file(),
link_name, H5P_DEFAULT, H5P_DEFAULT);
} else {
my_statusmessage("Hard link: real copy.");
// a hard link is a real object that needs a real copy
hdf5status = H5Ocopy(group_id, link_name, data->get_output_file(),
link_name, H5P_DEFAULT, H5P_DEFAULT);
}
if (hdf5status < 0) {
my_errormessage("Error copying \"%s\"!", link_name);
return -1;
}
my_statusmessage("Done.");
return 0;
}
/**
* @brief Copy all contents of the given input file into the given output file,
* except for objects with names that are blacklisted.
*
* @param input_file Input file handle.
* @param output_file Output file handle.
* @param blacklist List of objects that should not be copied over.
*/
inline void CopyEverythingExcept(const HDF5FileOrGroup input_file,
const HDF5FileOrGroup output_file,
const std::vector<std::string> &blacklist) {
// create an HDF5CopyData object that contains additional arguments to
// HDF5CopyLink()
HDF5CopyData copy_data(output_file, blacklist);
// now iterate over the file contents and copy what needs to be copied
const herr_t hdf5status =
H5Literate(input_file, H5_INDEX_NAME, H5_ITER_NATIVE, nullptr,
HDF5CopyLink, ©_data);
if (hdf5status < 0) {
my_error("Error during file copy iteration!");
}
}
/**
* @brief Copy all contents of the given input file into the given output file.
*
* (Literally) equivalent to calling CopyEverythingExcept() with an empty
* blacklist.
*
* @param input_file Input file handle.
* @param output_file Output file handle.
*/
inline void CopyEverything(const HDF5FileOrGroup input_file,
const HDF5FileOrGroup output_file) {
CopyEverythingExcept(input_file, output_file, std::vector<std::string>());
}
/**
* @brief Auxiliary object used by HDF5CopyAttribute() and CopyAttributes().
*
* This object acts as the additional data that is passed on to
* HDF5CopyAttribute() by H5Aiterate() using a standard C void pointer cast.
*/
class HDF5AttributeCopyData {
private:
/*! @brief Destination group/dataset handle we are copying to. */
const HDF5FileOrGroup _output_group;
public:
/**
* @brief Constructor.
*
* @param output_group Destination group/dataset handle.
*/
inline HDF5AttributeCopyData(const HDF5FileOrGroup output_group)
: _output_group(output_group) {}
/**
* @brief Get the destination group/dataset handle.
*
* @return Destination group/dataset handle.
*/
inline HDF5FileOrGroup get_output_group() const { return _output_group; }
};
/**
* @brief Recursive attribute copy function, called by H5Aiterate() from
* CopyAttributes().
*
* @param group_id Source group/dataset handle.
* @param name Name of the attribute we are currently looking at.
* @param ainfo Extra information for the current attribute.
* @param extra_data Void* casted HDF5AttributeCopyData object.
* @return H5Aiterate() compatible return value: 0 if the attribute was
* processed correctly and we want to proceed with the next iteration, -1 if
* there was an error and we want to abort, 1 if we want to stop the iteration
* early (which we do not use here).
*/
inline herr_t HDF5CopyAttribute(hid_t group_id, const char *name,
const H5A_info_t *ainfo, void *extra_data) {
my_statusmessage("Copying attribute \"%s\".", name);
// recover the HDF5AttributeCopyData object
HDF5AttributeCopyData *data =
reinterpret_cast<HDF5AttributeCopyData *>(extra_data);
// open the old attribute
const hid_t old_attr = H5Aopen(group_id, name, H5P_DEFAULT);
if (old_attr < 0) {
my_errormessage("Error opening attribute \"%s\"!", name);
return -1;
}
// query the old attribute type
const hid_t atype = H5Aget_type(old_attr);
if (atype < 0) {
my_errormessage("Error getting attribute datatype!");
return -1;
}
// open the old attribute data space
const hid_t old_aspace = H5Aget_space(old_attr);
if (old_aspace < 0) {
my_errormessage("Error getting attribute dataspace!");
return -1;
}
// create a new attribute data space based on the old data space
const hid_t new_aspace = H5Scopy(old_aspace);
if (new_aspace < 0) {
my_errormessage("Error copying attribute dataspace!");
return -1;
}
// create the new attribute
const hid_t new_attr = H5Acreate(data->get_output_group(), name, atype,
new_aspace, H5P_DEFAULT, H5P_DEFAULT);
if (new_attr < 0) {
my_errormessage("Error creating attribute \"%s\"!", name);
return -1;
}
// read the old attribute into a type-less buffer
std::vector<char> buffer(ainfo->data_size);
herr_t hdf5status = H5Aread(old_attr, atype, &buffer[0]);
if (hdf5status < 0) {
my_errormessage("Error reading attribute \"%s\"!", name);
return -1;
}
// write the buffer into the new attribute
hdf5status = H5Awrite(new_attr, atype, &buffer[0]);
if (hdf5status < 0) {
my_errormessage("Error writing attribute \"%s\"!", name);
return -1;
}
hdf5status = H5Sclose(new_aspace);
if (hdf5status < 0) {
my_errormessage("Error closing attribute dataspace!");
return -1;
}
hdf5status = H5Aclose(new_attr);
if (hdf5status < 0) {
my_errormessage("Error closing attribute \"%s\"!", name);
return -1;
}
hdf5status = H5Sclose(old_aspace);
if (hdf5status < 0) {
my_errormessage("Error closing attribute dataspace!");
return -1;
}
hdf5status = H5Tclose(atype);
if (hdf5status < 0) {
my_errormessage("Error closing attribute datatype!");
return -1;
}
hdf5status = H5Aclose(old_attr);
if (hdf5status < 0) {
my_errormessage("Error closing attribute \"%s\"!", name);
return -1;
}