Skip to content

Commit a6cc726

Browse files
committed
Allow single varint encoded ints instead of packed ints
The protobuf buffers documentation says: "Protocol buffer parsers must be able to parse repeated fields that were compiled as packed as if they were not packed, and vice versa." We don't do that, because it would either mean we have to allocate some temporary storage or read the protobuf messages multiple times, making the decoding step much more expensive. And generally it doesn't make sense to use non-packed encoding because the packed encoding uses less space. But it seems that at least one implementation of protobuf, Protobuf‑net, optimizes single-element arrays by using the non-packed form, which does make sense if you are looking at the amount of space it needs. To still have fast decoding, we implement this as a special case here where a single non-packed item is allowed but not multiple ones. This is still not quite okay according to the protobuf documentation, but it solves this problem and that's a good compromise. Fixes #389
1 parent 2e7bd0f commit a6cc726

1 file changed

Lines changed: 113 additions & 6 deletions

File tree

include/osmium/io/detail/pbf_decoder.hpp

Lines changed: 113 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -81,17 +81,29 @@ namespace osmium {
8181

8282
using protozero::data_view;
8383

84+
template <typename T>
8485
class values_access {
8586

8687
const char* m_data = nullptr;
8788
const char* m_end = nullptr;
89+
T m_value = 0;
90+
int m_num_values = 0;
8891

8992
protected:
9093

9194
std::uint64_t next() {
9295
return protozero::decode_varint(&m_data, m_end);
9396
}
9497

98+
bool has_single_value() const noexcept {
99+
return m_num_values > 0;
100+
}
101+
102+
T get_and_clear_value() noexcept {
103+
m_num_values = 0;
104+
return m_value;
105+
}
106+
95107
public:
96108

97109
values_access() = default;
@@ -101,13 +113,18 @@ namespace osmium {
101113
m_end(data.data() + data.size()) {
102114
}
103115

116+
explicit values_access(T value) :
117+
m_value(value),
118+
m_num_values(1) {
119+
}
120+
104121
bool empty() const noexcept {
105-
return m_data == m_end;
122+
return m_data == m_end && m_num_values == 0;
106123
}
107124

108125
std::size_t size() const noexcept {
109126
if (!m_data) {
110-
return 0;
127+
return m_num_values;
111128
}
112129

113130
// We know that each varint contains exactly one byte with the most
@@ -120,49 +137,61 @@ namespace osmium {
120137

121138
}; // class values_access
122139

123-
class values_access_int32 : public values_access {
140+
class values_access_int32 : public values_access<std::int32_t> {
124141

125142
public:
126143

127144
using values_access::values_access;
128145

129146
int32_t next_int32() {
147+
if (has_single_value()) {
148+
return get_and_clear_value();
149+
}
130150
return static_cast<int32_t>(next());
131151
}
132152

133153
}; // class values_access_int32
134154

135-
class values_access_uint32 : public values_access {
155+
class values_access_uint32 : public values_access<std::uint32_t> {
136156

137157
public:
138158

139159
using values_access::values_access;
140160

141161
uint32_t next_uint32() {
162+
if (has_single_value()) {
163+
return get_and_clear_value();
164+
}
142165
return static_cast<uint32_t>(next());
143166
}
144167

145168
}; // class values_access_uint32
146169

147-
class values_access_sint32 : public values_access {
170+
class values_access_sint32 : public values_access<std::int32_t> {
148171

149172
public:
150173

151174
using values_access::values_access;
152175

153176
int32_t next_sint32() {
177+
if (has_single_value()) {
178+
return get_and_clear_value();
179+
}
154180
return protozero::decode_zigzag32(static_cast<uint32_t>(next()));
155181
}
156182

157183
}; // class values_access_sint32
158184

159-
class values_access_sint64 : public values_access {
185+
class values_access_sint64 : public values_access<std::int64_t> {
160186

161187
public:
162188

163189
using values_access::values_access;
164190

165191
int64_t next_sint64() {
192+
if (has_single_value()) {
193+
return get_and_clear_value();
194+
}
166195
return protozero::decode_zigzag64(next());
167196
}
168197

@@ -375,9 +404,15 @@ namespace osmium {
375404
case protozero::tag_and_type(OSMFormat::Node::packed_uint32_keys, protozero::pbf_wire_type::length_delimited):
376405
keys = values_access_uint32{pbf_node.get_view()};
377406
break;
407+
case protozero::tag_and_type(OSMFormat::Node::packed_uint32_keys, protozero::pbf_wire_type::varint):
408+
keys = values_access_uint32{pbf_node.get_uint32()};
409+
break;
378410
case protozero::tag_and_type(OSMFormat::Node::packed_uint32_vals, protozero::pbf_wire_type::length_delimited):
379411
vals = values_access_uint32{pbf_node.get_view()};
380412
break;
413+
case protozero::tag_and_type(OSMFormat::Node::packed_uint32_vals, protozero::pbf_wire_type::varint):
414+
vals = values_access_uint32{pbf_node.get_uint32()};
415+
break;
381416
case protozero::tag_and_type(OSMFormat::Node::optional_Info_info, protozero::pbf_wire_type::length_delimited):
382417
if (m_read_metadata == osmium::io::read_meta::yes) {
383418
user = decode_info(pbf_node.get_view(), builder.object());
@@ -432,9 +467,15 @@ namespace osmium {
432467
case protozero::tag_and_type(OSMFormat::Way::packed_uint32_keys, protozero::pbf_wire_type::length_delimited):
433468
keys = values_access_uint32{pbf_way.get_view()};
434469
break;
470+
case protozero::tag_and_type(OSMFormat::Way::packed_uint32_keys, protozero::pbf_wire_type::varint):
471+
keys = values_access_uint32{pbf_way.get_uint32()};
472+
break;
435473
case protozero::tag_and_type(OSMFormat::Way::packed_uint32_vals, protozero::pbf_wire_type::length_delimited):
436474
vals = values_access_uint32{pbf_way.get_view()};
437475
break;
476+
case protozero::tag_and_type(OSMFormat::Way::packed_uint32_vals, protozero::pbf_wire_type::varint):
477+
vals = values_access_uint32{pbf_way.get_uint32()};
478+
break;
438479
case protozero::tag_and_type(OSMFormat::Way::optional_Info_info, protozero::pbf_wire_type::length_delimited):
439480
if (m_read_metadata == osmium::io::read_meta::yes) {
440481
user = decode_info(pbf_way.get_view(), builder.object());
@@ -445,12 +486,21 @@ namespace osmium {
445486
case protozero::tag_and_type(OSMFormat::Way::packed_sint64_refs, protozero::pbf_wire_type::length_delimited):
446487
refs = values_access_sint64{pbf_way.get_view()};
447488
break;
489+
case protozero::tag_and_type(OSMFormat::Way::packed_sint64_refs, protozero::pbf_wire_type::varint):
490+
refs = values_access_sint64{pbf_way.get_sint64()};
491+
break;
448492
case protozero::tag_and_type(OSMFormat::Way::packed_sint64_lat, protozero::pbf_wire_type::length_delimited):
449493
lats = values_access_sint64{pbf_way.get_view()};
450494
break;
495+
case protozero::tag_and_type(OSMFormat::Way::packed_sint64_lat, protozero::pbf_wire_type::varint):
496+
lats = values_access_sint64{pbf_way.get_sint64()};
497+
break;
451498
case protozero::tag_and_type(OSMFormat::Way::packed_sint64_lon, protozero::pbf_wire_type::length_delimited):
452499
lons = values_access_sint64{pbf_way.get_view()};
453500
break;
501+
case protozero::tag_and_type(OSMFormat::Way::packed_sint64_lon, protozero::pbf_wire_type::varint):
502+
lons = values_access_sint64{pbf_way.get_sint64()};
503+
break;
454504
default:
455505
pbf_way.skip();
456506
}
@@ -501,9 +551,15 @@ namespace osmium {
501551
case protozero::tag_and_type(OSMFormat::Relation::packed_uint32_keys, protozero::pbf_wire_type::length_delimited):
502552
keys = values_access_uint32{pbf_relation.get_view()};
503553
break;
554+
case protozero::tag_and_type(OSMFormat::Relation::packed_uint32_keys, protozero::pbf_wire_type::varint):
555+
keys = values_access_uint32{pbf_relation.get_uint32()};
556+
break;
504557
case protozero::tag_and_type(OSMFormat::Relation::packed_uint32_vals, protozero::pbf_wire_type::length_delimited):
505558
vals = values_access_uint32{pbf_relation.get_view()};
506559
break;
560+
case protozero::tag_and_type(OSMFormat::Relation::packed_uint32_vals, protozero::pbf_wire_type::varint):
561+
vals = values_access_uint32{pbf_relation.get_uint32()};
562+
break;
507563
case protozero::tag_and_type(OSMFormat::Relation::optional_Info_info, protozero::pbf_wire_type::length_delimited):
508564
if (m_read_metadata == osmium::io::read_meta::yes) {
509565
user = decode_info(pbf_relation.get_view(), builder.object());
@@ -514,12 +570,21 @@ namespace osmium {
514570
case protozero::tag_and_type(OSMFormat::Relation::packed_int32_roles_sid, protozero::pbf_wire_type::length_delimited):
515571
roles = values_access_int32{pbf_relation.get_view()};
516572
break;
573+
case protozero::tag_and_type(OSMFormat::Relation::packed_int32_roles_sid, protozero::pbf_wire_type::varint):
574+
roles = values_access_int32{pbf_relation.get_int32()};
575+
break;
517576
case protozero::tag_and_type(OSMFormat::Relation::packed_sint64_memids, protozero::pbf_wire_type::length_delimited):
518577
refs = values_access_sint64{pbf_relation.get_view()};
519578
break;
579+
case protozero::tag_and_type(OSMFormat::Relation::packed_sint64_memids, protozero::pbf_wire_type::varint):
580+
refs = values_access_sint64{pbf_relation.get_sint64()};
581+
break;
520582
case protozero::tag_and_type(OSMFormat::Relation::packed_MemberType_types, protozero::pbf_wire_type::length_delimited):
521583
types = values_access_int32{pbf_relation.get_view()};
522584
break;
585+
case protozero::tag_and_type(OSMFormat::Relation::packed_MemberType_types, protozero::pbf_wire_type::varint):
586+
types = values_access_int32{pbf_relation.get_int32()};
587+
break;
523588
default:
524589
pbf_relation.skip();
525590
}
@@ -576,15 +641,27 @@ namespace osmium {
576641
case protozero::tag_and_type(OSMFormat::DenseNodes::packed_sint64_id, protozero::pbf_wire_type::length_delimited):
577642
ids = values_access_sint64{pbf_dense_nodes.get_view()};
578643
break;
644+
case protozero::tag_and_type(OSMFormat::DenseNodes::packed_sint64_id, protozero::pbf_wire_type::varint):
645+
ids = values_access_sint64{pbf_dense_nodes.get_sint64()};
646+
break;
579647
case protozero::tag_and_type(OSMFormat::DenseNodes::packed_sint64_lat, protozero::pbf_wire_type::length_delimited):
580648
lats = values_access_sint64{pbf_dense_nodes.get_view()};
581649
break;
650+
case protozero::tag_and_type(OSMFormat::DenseNodes::packed_sint64_lat, protozero::pbf_wire_type::varint):
651+
lats = values_access_sint64{pbf_dense_nodes.get_sint64()};
652+
break;
582653
case protozero::tag_and_type(OSMFormat::DenseNodes::packed_sint64_lon, protozero::pbf_wire_type::length_delimited):
583654
lons = values_access_sint64{pbf_dense_nodes.get_view()};
584655
break;
656+
case protozero::tag_and_type(OSMFormat::DenseNodes::packed_sint64_lon, protozero::pbf_wire_type::varint):
657+
lons = values_access_sint64{pbf_dense_nodes.get_sint64()};
658+
break;
585659
case protozero::tag_and_type(OSMFormat::DenseNodes::packed_int32_keys_vals, protozero::pbf_wire_type::length_delimited):
586660
tags = values_access_int32{pbf_dense_nodes.get_view()};
587661
break;
662+
case protozero::tag_and_type(OSMFormat::DenseNodes::packed_int32_keys_vals, protozero::pbf_wire_type::varint):
663+
tags = values_access_int32{pbf_dense_nodes.get_int32()};
664+
break;
588665
default:
589666
pbf_dense_nodes.skip();
590667
}
@@ -643,6 +720,9 @@ namespace osmium {
643720
case protozero::tag_and_type(OSMFormat::DenseNodes::packed_sint64_id, protozero::pbf_wire_type::length_delimited):
644721
ids = values_access_sint64{pbf_dense_nodes.get_view()};
645722
break;
723+
case protozero::tag_and_type(OSMFormat::DenseNodes::packed_sint64_id, protozero::pbf_wire_type::varint):
724+
ids = values_access_sint64{pbf_dense_nodes.get_sint64()};
725+
break;
646726
case protozero::tag_and_type(OSMFormat::DenseNodes::optional_DenseInfo_denseinfo, protozero::pbf_wire_type::length_delimited):
647727
{
648728
has_info = true;
@@ -652,21 +732,39 @@ namespace osmium {
652732
case protozero::tag_and_type(OSMFormat::DenseInfo::packed_int32_version, protozero::pbf_wire_type::length_delimited):
653733
versions = values_access_int32{pbf_dense_info.get_view()};
654734
break;
735+
case protozero::tag_and_type(OSMFormat::DenseInfo::packed_int32_version, protozero::pbf_wire_type::varint):
736+
versions = values_access_int32{pbf_dense_info.get_int32()};
737+
break;
655738
case protozero::tag_and_type(OSMFormat::DenseInfo::packed_sint64_timestamp, protozero::pbf_wire_type::length_delimited):
656739
timestamps = values_access_sint64{pbf_dense_info.get_view()};
657740
break;
741+
case protozero::tag_and_type(OSMFormat::DenseInfo::packed_sint64_timestamp, protozero::pbf_wire_type::varint):
742+
timestamps = values_access_sint64{pbf_dense_info.get_sint64()};
743+
break;
658744
case protozero::tag_and_type(OSMFormat::DenseInfo::packed_sint64_changeset, protozero::pbf_wire_type::length_delimited):
659745
changesets = values_access_sint64{pbf_dense_info.get_view()};
660746
break;
747+
case protozero::tag_and_type(OSMFormat::DenseInfo::packed_sint64_changeset, protozero::pbf_wire_type::varint):
748+
changesets = values_access_sint64{pbf_dense_info.get_sint64()};
749+
break;
661750
case protozero::tag_and_type(OSMFormat::DenseInfo::packed_sint32_uid, protozero::pbf_wire_type::length_delimited):
662751
uids = values_access_sint32{pbf_dense_info.get_view()};
663752
break;
753+
case protozero::tag_and_type(OSMFormat::DenseInfo::packed_sint32_uid, protozero::pbf_wire_type::varint):
754+
uids = values_access_sint32{pbf_dense_info.get_sint32()};
755+
break;
664756
case protozero::tag_and_type(OSMFormat::DenseInfo::packed_sint32_user_sid, protozero::pbf_wire_type::length_delimited):
665757
user_sids = values_access_sint32{pbf_dense_info.get_view()};
666758
break;
759+
case protozero::tag_and_type(OSMFormat::DenseInfo::packed_sint32_user_sid, protozero::pbf_wire_type::varint):
760+
user_sids = values_access_sint32{pbf_dense_info.get_sint32()};
761+
break;
667762
case protozero::tag_and_type(OSMFormat::DenseInfo::packed_bool_visible, protozero::pbf_wire_type::length_delimited):
668763
visibles = values_access_int32{pbf_dense_info.get_view()};
669764
break;
765+
case protozero::tag_and_type(OSMFormat::DenseInfo::packed_bool_visible, protozero::pbf_wire_type::varint):
766+
visibles = values_access_int32{pbf_dense_info.get_int32()};
767+
break;
670768
default:
671769
pbf_dense_info.skip();
672770
}
@@ -676,12 +774,21 @@ namespace osmium {
676774
case protozero::tag_and_type(OSMFormat::DenseNodes::packed_sint64_lat, protozero::pbf_wire_type::length_delimited):
677775
lats = values_access_sint64{pbf_dense_nodes.get_view()};
678776
break;
777+
case protozero::tag_and_type(OSMFormat::DenseNodes::packed_sint64_lat, protozero::pbf_wire_type::varint):
778+
lats = values_access_sint64{pbf_dense_nodes.get_sint64()};
779+
break;
679780
case protozero::tag_and_type(OSMFormat::DenseNodes::packed_sint64_lon, protozero::pbf_wire_type::length_delimited):
680781
lons = values_access_sint64{pbf_dense_nodes.get_view()};
681782
break;
783+
case protozero::tag_and_type(OSMFormat::DenseNodes::packed_sint64_lon, protozero::pbf_wire_type::varint):
784+
lons = values_access_sint64{pbf_dense_nodes.get_sint64()};
785+
break;
682786
case protozero::tag_and_type(OSMFormat::DenseNodes::packed_int32_keys_vals, protozero::pbf_wire_type::length_delimited):
683787
tags = values_access_int32{pbf_dense_nodes.get_view()};
684788
break;
789+
case protozero::tag_and_type(OSMFormat::DenseNodes::packed_int32_keys_vals, protozero::pbf_wire_type::varint):
790+
tags = values_access_int32{pbf_dense_nodes.get_int32()};
791+
break;
685792
default:
686793
pbf_dense_nodes.skip();
687794
}

0 commit comments

Comments
 (0)