Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .github/workflows/sca.yml
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,21 @@ jobs:
-i lib/c_glib/test/gen-cpp \
--error-exitcode=1 -j2 lib/c_glib/src lib/c_glib/test test/c_glib/src tutorial/c_glib

# Ruby C extension error checks
# suppress missingIncludeSystem:lib/rb/ext/* -> Ruby and libc headers are not needed for analysis.
# suppress checkersReport -> hide info-only active-checker summary.
# Use -j1 with a build dir: build-dir avoids a large staticFunction false-positive set in serial scans,
# and -j1 avoids the -j2 whole-program path where Init_thrift_native's inline unusedFunction suppression misbinds.
mkdir -p /tmp/cppcheck-rb-ext
cppcheck --force --quiet --inline-suppr --enable=all \
-j1 --cppcheck-build-dir=/tmp/cppcheck-rb-ext \
-I lib/rb/ext \
--library=ruby \
--suppress="missingIncludeSystem:lib/rb/ext/*" \
--suppress="checkersReport" \
--check-level=exhaustive \
--error-exitcode=1 lib/rb/ext

- name: Run flake8
id: flake8
continue-on-error: true
Expand Down
67 changes: 40 additions & 27 deletions lib/rb/ext/compact_protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,33 +124,41 @@ static void write_field_begin_internal(VALUE self, VALUE type, VALUE id_value, V
SET_LAST_ID(self, id_value);
}

static int32_t int_to_zig_zag(int32_t n) {
return (n << 1) ^ (n >> 31);
static uint32_t int_to_zig_zag(int32_t n) {
return (((uint32_t)n) << 1) ^ (0U - (uint32_t)(n < 0));
}

static uint64_t ll_to_zig_zag(int64_t n) {
return (n << 1) ^ (n >> 63);
return (((uint64_t)n) << 1) ^ (0ULL - (uint64_t)(n < 0));
}

static uint32_t message_seqid_to_varint32(int32_t seqid) {
return seqid < 0 ? (uint32_t)((int64_t)seqid + (INT64_C(1) << 32)) : (uint32_t)seqid;
}

static int32_t message_seqid_from_varint32(uint32_t seqid) {
return seqid > INT32_MAX ? (int32_t)((int64_t)seqid - (INT64_C(1) << 32)) : (int32_t)seqid;
}

static void write_varint32(VALUE transport, uint32_t n) {
while (true) {
if ((n & ~0x7F) == 0) {
write_byte_direct(transport, n & 0x7f);
if ((n & ~0x7FU) == 0U) {
write_byte_direct(transport, n & 0x7FU);
break;
} else {
write_byte_direct(transport, (n & 0x7F) | 0x80);
write_byte_direct(transport, (n & 0x7FU) | 0x80U);
n = n >> 7;
}
}
}

static void write_varint64(VALUE transport, uint64_t n) {
while (true) {
if ((n & ~0x7F) == 0) {
write_byte_direct(transport, n & 0x7f);
if ((n & ~0x7FULL) == 0ULL) {
write_byte_direct(transport, n & 0x7FULL);
break;
} else {
write_byte_direct(transport, (n & 0x7F) | 0x80);
write_byte_direct(transport, (n & 0x7FULL) | 0x80ULL);
n = n >> 7;
}
}
Expand Down Expand Up @@ -208,9 +216,10 @@ VALUE rb_thrift_compact_proto_write_set_end(VALUE self) {

VALUE rb_thrift_compact_proto_write_message_begin(VALUE self, VALUE name, VALUE type, VALUE seqid) {
VALUE transport = GET_TRANSPORT(self);
int32_t seqid_value = FIX2INT(seqid);
write_byte_direct(transport, PROTOCOL_ID);
write_byte_direct(transport, (VERSION & VERSION_MASK) | ((FIX2INT(type) << TYPE_SHIFT_AMOUNT) & TYPE_MASK));
write_varint32(transport, FIX2INT(seqid));
write_varint32(transport, message_seqid_to_varint32(seqid_value));
rb_thrift_compact_proto_write_string(self, name);

return Qnil;
Expand Down Expand Up @@ -419,20 +428,20 @@ static char read_byte_direct(VALUE self) {
return (char)(FIX2INT(byte));
}

static int64_t zig_zag_to_ll(int64_t n) {
return (((uint64_t)n) >> 1) ^ -(n & 1);
static int64_t zig_zag_to_ll(uint64_t n) {
return (int64_t)((n >> 1) ^ (0ULL - (n & 1ULL)));
}

static int32_t zig_zag_to_int(int32_t n) {
return (((uint32_t)n) >> 1) ^ -(n & 1);
static int32_t zig_zag_to_int(uint32_t n) {
return (int32_t)((n >> 1) ^ (0U - (n & 1U)));
}

static int64_t read_varint64(VALUE self) {
static uint64_t read_varint64(VALUE self) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we change that to uint64_t? That introduces a lot of implicit things going on. Do we really want that?

Copy link
Copy Markdown
Contributor Author

@kpumuk kpumuk Apr 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

read_varint64() should return uint64_t because it reads raw varint bits, not a signed Thrift value yet. For values like the ZigZag encoding of INT64_MIN, the wire value is 0xffffffffffffffff, which is representable as uint64_t but not as a meaningful positive int64_t.

UBSan reports the problem in the 64-bit decode path when read_varint64() returns int64_t and the raw unsigned value is forced through a signed type too early:

runtime error: implicit conversion from type 'uint64_t' ... value 18446744073709551615 to type 'int64_t' ... changed the value to -1
  • read_varint64() -> uint64_t for raw varint bits
  • zig_zag_to_ll(uint64_t) -> int64_t for the actual signed decode

Copy link
Copy Markdown
Contributor Author

@kpumuk kpumuk Apr 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the last amend:

  • All conversions are explicit, including (potentially overkill unnecessary seqid overflow, mirroring logic in Ruby)
  • Separated int32 from int64 path via read_varint32 / read_varint64
  • ZigZag decode uses unsigned ints

Still lacking:

  • Size enforcement (C++ enforces 5 bytes for int32, 10 bytes for int64, binary/name length to INT32_MAX via signed out int32_t and >= 0 comparison). Semantically, we should fail on > INT32_MAX

int shift = 0;
int64_t result = 0;
uint64_t result = 0;
while (true) {
int8_t b = read_byte_direct(self);
result = result | ((uint64_t)(b & 0x7f) << shift);
result |= ((uint64_t)(b & 0x7f) << shift);
if ((b & 0x80) != 0x80) {
break;
}
Expand All @@ -441,8 +450,12 @@ static int64_t read_varint64(VALUE self) {
return result;
}

static uint32_t read_varint32(VALUE self) {
return (uint32_t)read_varint64(self);
}

static int16_t read_i16(VALUE self) {
return zig_zag_to_int((int32_t)read_varint64(self));
return (int16_t)zig_zag_to_int(read_varint32(self));
}

VALUE rb_thrift_compact_proto_read_message_end(VALUE self) {
Expand Down Expand Up @@ -494,7 +507,7 @@ VALUE rb_thrift_compact_proto_read_message_begin(VALUE self) {
}

int8_t type = (version_and_type >> TYPE_SHIFT_AMOUNT) & TYPE_BITS;
int32_t seqid = (int32_t)read_varint64(self);
int32_t seqid = message_seqid_from_varint32(read_varint32(self));
VALUE messageName = rb_thrift_compact_proto_read_string(self);
return rb_ary_new3(3, messageName, INT2FIX(type), INT2NUM(seqid));
}
Expand Down Expand Up @@ -532,19 +545,19 @@ VALUE rb_thrift_compact_proto_read_field_begin(VALUE self) {
}

VALUE rb_thrift_compact_proto_read_map_begin(VALUE self) {
int32_t size = (int32_t)read_varint64(self);
uint32_t size = read_varint32(self);
uint8_t key_and_value_type = size == 0 ? 0 : read_byte_direct(self);
return rb_ary_new3(3, INT2FIX(get_ttype(key_and_value_type >> 4)), INT2FIX(get_ttype(key_and_value_type & 0xf)), INT2FIX(size));
return rb_ary_new3(3, INT2FIX(get_ttype(key_and_value_type >> 4)), INT2FIX(get_ttype(key_and_value_type & 0xf)), UINT2NUM(size));
}

VALUE rb_thrift_compact_proto_read_list_begin(VALUE self) {
uint8_t size_and_type = read_byte_direct(self);
int32_t size = (size_and_type >> 4) & 0x0f;
uint32_t size = (size_and_type >> 4) & 0x0f;
if (size == 15) {
size = (int32_t)read_varint64(self);
size = read_varint32(self);
}
uint8_t type = get_ttype(size_and_type & 0x0f);
return rb_ary_new3(2, INT2FIX(type), INT2FIX(size));
return rb_ary_new3(2, INT2FIX(type), UINT2NUM(size));
}

VALUE rb_thrift_compact_proto_read_set_begin(VALUE self) {
Expand All @@ -570,7 +583,7 @@ VALUE rb_thrift_compact_proto_read_i16(VALUE self) {
}

VALUE rb_thrift_compact_proto_read_i32(VALUE self) {
return INT2NUM(zig_zag_to_int((int32_t)read_varint64(self)));
return INT2NUM(zig_zag_to_int(read_varint32(self)));
}

VALUE rb_thrift_compact_proto_read_i64(VALUE self) {
Expand Down Expand Up @@ -603,8 +616,8 @@ VALUE rb_thrift_compact_proto_read_string(VALUE self) {
}

VALUE rb_thrift_compact_proto_read_binary(VALUE self) {
int64_t size = read_varint64(self);
return READ(self, size);
uint32_t size = read_varint32(self);
return rb_funcall(GET_TRANSPORT(self), read_all_method_id, 1, UINT2NUM(size));
}

VALUE rb_thrift_compact_proto_read_uuid(VALUE self) {
Expand Down
17 changes: 5 additions & 12 deletions lib/rb/ext/struct.c
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,6 @@ static void write_container(int ttype, VALUE field_info, VALUE value, VALUE prot

if (ttype == TTYPE_MAP) {
VALUE keys;
VALUE key;
VALUE val;

Check_Type(value, T_HASH);

Expand All @@ -272,8 +270,8 @@ static void write_container(int ttype, VALUE field_info, VALUE value, VALUE prot
default_write_map_begin(protocol, keytype_value, valuetype_value, INT2FIX(sz));

for (i = 0; i < sz; i++) {
key = rb_ary_entry(keys, i);
val = rb_hash_aref(value, key);
VALUE key = rb_ary_entry(keys, i);
VALUE val = rb_hash_aref(value, key);

if (IS_CONTAINER(keytype)) {
write_container(keytype, key_info, key, protocol);
Expand Down Expand Up @@ -489,8 +487,6 @@ static VALUE read_anything(VALUE protocol, int ttype, VALUE field_info) {
rb_thrift_struct_read(result, protocol);
}
} else if (ttype == TTYPE_MAP) {
int i;

VALUE map_header = default_read_map_begin(protocol);
int key_ttype = FIX2INT(rb_ary_entry(map_header, 0));
int value_ttype = FIX2INT(rb_ary_entry(map_header, 1));
Expand All @@ -511,7 +507,7 @@ static VALUE read_anything(VALUE protocol, int ttype, VALUE field_info) {
if (num_entries == 0 || (specified_key_type == key_ttype && specified_value_type == value_ttype)) {
result = rb_hash_new();

for (i = 0; i < num_entries; ++i) {
for (int i = 0; i < num_entries; ++i) {
VALUE key, val;

key = read_anything(protocol, key_ttype, key_info);
Expand All @@ -528,8 +524,6 @@ static VALUE read_anything(VALUE protocol, int ttype, VALUE field_info) {

default_read_map_end(protocol);
} else if (ttype == TTYPE_LIST) {
int i;

VALUE list_header = default_read_list_begin(protocol);
int element_ttype = FIX2INT(rb_ary_entry(list_header, 0));
int num_elements = FIX2INT(rb_ary_entry(list_header, 1));
Expand All @@ -542,7 +536,7 @@ static VALUE read_anything(VALUE protocol, int ttype, VALUE field_info) {
if (specified_element_type == element_ttype) {
result = new_container_array(num_elements);

for (i = 0; i < num_elements; ++i) {
for (int i = 0; i < num_elements; ++i) {
rb_ary_push(result, read_anything(protocol, element_ttype, rb_hash_aref(field_info, element_sym)));
}
} else {
Expand All @@ -555,7 +549,6 @@ static VALUE read_anything(VALUE protocol, int ttype, VALUE field_info) {
default_read_list_end(protocol);
} else if (ttype == TTYPE_SET) {
VALUE items;
int i;

VALUE set_header = default_read_set_begin(protocol);
int element_ttype = FIX2INT(rb_ary_entry(set_header, 0));
Expand All @@ -569,7 +562,7 @@ static VALUE read_anything(VALUE protocol, int ttype, VALUE field_info) {
if (specified_element_type == element_ttype) {
items = new_container_array(num_elements);

for (i = 0; i < num_elements; ++i) {
for (int i = 0; i < num_elements; ++i) {
rb_ary_push(items, read_anything(protocol, element_ttype, rb_hash_aref(field_info, element_sym)));
}

Expand Down
1 change: 1 addition & 0 deletions lib/rb/ext/thrift_native.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ int PROTOERR_BAD_VERSION;
int PROTOERR_NOT_IMPLEMENTED;
int PROTOERR_DEPTH_LIMIT;

// cppcheck-suppress unusedFunction
RUBY_FUNC_EXPORTED void Init_thrift_native(void) {
// cached classes
thrift_module = rb_const_get(rb_cObject, rb_intern("Thrift"));
Expand Down
50 changes: 50 additions & 0 deletions lib/rb/spec/compact_protocol_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@
require 'spec_helper'

describe Thrift::CompactProtocol do
INTEGER_BOUNDARY_TESTS = {
:i32 => [-(2**31), (2**31) - 1],
:i64 => [-(2**63), (2**63) - 1]
}

INTEGER_MINIMUM_ENCODINGS = {
:i32 => [0xff, 0xff, 0xff, 0xff, 0x0f],
:i64 => [0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01]
}

TESTS = {
:byte => (-127..127).to_a,
:i16 => (0..14).map { |shift| [1 << shift, -(1 << shift)] }.flatten.sort,
Expand Down Expand Up @@ -71,6 +81,46 @@
end
end

it "should round-trip signed integer boundaries correctly" do
INTEGER_BOUNDARY_TESTS.each_pair do |primitive_type, test_values|
test_values.each do |value|
trans = Thrift::MemoryBufferTransport.new
proto = Thrift::CompactProtocol.new(trans)

proto.send(writer(primitive_type), value)
expect(proto.send(reader(primitive_type))).to eq(value)
end
end
end

it "should encode signed integer minima with the canonical zigzag varint bytes" do
INTEGER_MINIMUM_ENCODINGS.each_pair do |primitive_type, expected_bytes|
trans = Thrift::MemoryBufferTransport.new
proto = Thrift::CompactProtocol.new(trans)

proto.send(writer(primitive_type), INTEGER_BOUNDARY_TESTS.fetch(primitive_type).first)
expect(trans.read(trans.available).bytes).to eq(expected_bytes)
end
end

it "should decode i32 minima from direct canonical zigzag bytes" do
trans = Thrift::MemoryBufferTransport.new
trans.write(INTEGER_MINIMUM_ENCODINGS[:i32].pack("C*"))

proto = Thrift::CompactProtocol.new(trans)
expect(proto.read_i32).to eq(INTEGER_BOUNDARY_TESTS[:i32].first)
end

it "should read binary values with multi-byte varint32 lengths" do
payload = "x" * 128
trans = Thrift::MemoryBufferTransport.new
trans.write([0x80, 0x01].pack("C*"))
trans.write(payload)

proto = Thrift::CompactProtocol.new(trans)
expect(proto.read_binary).to eq(payload)
end

it "should write a uuid" do
trans = Thrift::MemoryBufferTransport.new
proto = Thrift::CompactProtocol.new(trans)
Expand Down