From 4f496f75d2b83993123e226aaa873165900469d0 Mon Sep 17 00:00:00 2001 From: Dmytro Shteflyuk Date: Sun, 29 Mar 2026 13:04:38 -0400 Subject: [PATCH] THRIFT-5941: Add Ruby ext cppcheck coverage Client: rb Co-Authored-By: OpenAI Codex (GPT-5.4) --- .github/workflows/sca.yml | 15 +++++++ lib/rb/ext/compact_protocol.c | 67 +++++++++++++++++----------- lib/rb/ext/struct.c | 17 +++---- lib/rb/ext/thrift_native.c | 1 + lib/rb/spec/compact_protocol_spec.rb | 50 +++++++++++++++++++++ 5 files changed, 111 insertions(+), 39 deletions(-) diff --git a/.github/workflows/sca.yml b/.github/workflows/sca.yml index ae41f862a1d..88b2144a895 100644 --- a/.github/workflows/sca.yml +++ b/.github/workflows/sca.yml @@ -146,6 +146,21 @@ jobs: -i lib/c_glib/test/gen-cpp \ --error-exitcode=1 -j2 lib/c_glib/src lib/c_glib/test test/c_glib/src tutorial/c_glib + # Ruby C extension error checks + # suppress missingIncludeSystem:lib/rb/ext/* -> Ruby and libc headers are not needed for analysis. + # suppress checkersReport -> hide info-only active-checker summary. + # Use -j1 with a build dir: build-dir avoids a large staticFunction false-positive set in serial scans, + # and -j1 avoids the -j2 whole-program path where Init_thrift_native's inline unusedFunction suppression misbinds. + mkdir -p /tmp/cppcheck-rb-ext + cppcheck --force --quiet --inline-suppr --enable=all \ + -j1 --cppcheck-build-dir=/tmp/cppcheck-rb-ext \ + -I lib/rb/ext \ + --library=ruby \ + --suppress="missingIncludeSystem:lib/rb/ext/*" \ + --suppress="checkersReport" \ + --check-level=exhaustive \ + --error-exitcode=1 lib/rb/ext + - name: Run flake8 id: flake8 continue-on-error: true diff --git a/lib/rb/ext/compact_protocol.c b/lib/rb/ext/compact_protocol.c index 8c864e815f0..7b602a36e31 100644 --- a/lib/rb/ext/compact_protocol.c +++ b/lib/rb/ext/compact_protocol.c @@ -124,21 +124,29 @@ static void write_field_begin_internal(VALUE self, VALUE type, VALUE id_value, V SET_LAST_ID(self, id_value); } -static int32_t int_to_zig_zag(int32_t n) { - return (n << 1) ^ (n >> 31); +static uint32_t int_to_zig_zag(int32_t n) { + return (((uint32_t)n) << 1) ^ (0U - (uint32_t)(n < 0)); } static uint64_t ll_to_zig_zag(int64_t n) { - return (n << 1) ^ (n >> 63); + return (((uint64_t)n) << 1) ^ (0ULL - (uint64_t)(n < 0)); +} + +static uint32_t message_seqid_to_varint32(int32_t seqid) { + return seqid < 0 ? (uint32_t)((int64_t)seqid + (INT64_C(1) << 32)) : (uint32_t)seqid; +} + +static int32_t message_seqid_from_varint32(uint32_t seqid) { + return seqid > INT32_MAX ? (int32_t)((int64_t)seqid - (INT64_C(1) << 32)) : (int32_t)seqid; } static void write_varint32(VALUE transport, uint32_t n) { while (true) { - if ((n & ~0x7F) == 0) { - write_byte_direct(transport, n & 0x7f); + if ((n & ~0x7FU) == 0U) { + write_byte_direct(transport, n & 0x7FU); break; } else { - write_byte_direct(transport, (n & 0x7F) | 0x80); + write_byte_direct(transport, (n & 0x7FU) | 0x80U); n = n >> 7; } } @@ -146,11 +154,11 @@ static void write_varint32(VALUE transport, uint32_t n) { static void write_varint64(VALUE transport, uint64_t n) { while (true) { - if ((n & ~0x7F) == 0) { - write_byte_direct(transport, n & 0x7f); + if ((n & ~0x7FULL) == 0ULL) { + write_byte_direct(transport, n & 0x7FULL); break; } else { - write_byte_direct(transport, (n & 0x7F) | 0x80); + write_byte_direct(transport, (n & 0x7FULL) | 0x80ULL); n = n >> 7; } } @@ -208,9 +216,10 @@ VALUE rb_thrift_compact_proto_write_set_end(VALUE self) { VALUE rb_thrift_compact_proto_write_message_begin(VALUE self, VALUE name, VALUE type, VALUE seqid) { VALUE transport = GET_TRANSPORT(self); + int32_t seqid_value = FIX2INT(seqid); write_byte_direct(transport, PROTOCOL_ID); write_byte_direct(transport, (VERSION & VERSION_MASK) | ((FIX2INT(type) << TYPE_SHIFT_AMOUNT) & TYPE_MASK)); - write_varint32(transport, FIX2INT(seqid)); + write_varint32(transport, message_seqid_to_varint32(seqid_value)); rb_thrift_compact_proto_write_string(self, name); return Qnil; @@ -419,20 +428,20 @@ static char read_byte_direct(VALUE self) { return (char)(FIX2INT(byte)); } -static int64_t zig_zag_to_ll(int64_t n) { - return (((uint64_t)n) >> 1) ^ -(n & 1); +static int64_t zig_zag_to_ll(uint64_t n) { + return (int64_t)((n >> 1) ^ (0ULL - (n & 1ULL))); } -static int32_t zig_zag_to_int(int32_t n) { - return (((uint32_t)n) >> 1) ^ -(n & 1); +static int32_t zig_zag_to_int(uint32_t n) { + return (int32_t)((n >> 1) ^ (0U - (n & 1U))); } -static int64_t read_varint64(VALUE self) { +static uint64_t read_varint64(VALUE self) { int shift = 0; - int64_t result = 0; + uint64_t result = 0; while (true) { int8_t b = read_byte_direct(self); - result = result | ((uint64_t)(b & 0x7f) << shift); + result |= ((uint64_t)(b & 0x7f) << shift); if ((b & 0x80) != 0x80) { break; } @@ -441,8 +450,12 @@ static int64_t read_varint64(VALUE self) { return result; } +static uint32_t read_varint32(VALUE self) { + return (uint32_t)read_varint64(self); +} + static int16_t read_i16(VALUE self) { - return zig_zag_to_int((int32_t)read_varint64(self)); + return (int16_t)zig_zag_to_int(read_varint32(self)); } VALUE rb_thrift_compact_proto_read_message_end(VALUE self) { @@ -494,7 +507,7 @@ VALUE rb_thrift_compact_proto_read_message_begin(VALUE self) { } int8_t type = (version_and_type >> TYPE_SHIFT_AMOUNT) & TYPE_BITS; - int32_t seqid = (int32_t)read_varint64(self); + int32_t seqid = message_seqid_from_varint32(read_varint32(self)); VALUE messageName = rb_thrift_compact_proto_read_string(self); return rb_ary_new3(3, messageName, INT2FIX(type), INT2NUM(seqid)); } @@ -532,19 +545,19 @@ VALUE rb_thrift_compact_proto_read_field_begin(VALUE self) { } VALUE rb_thrift_compact_proto_read_map_begin(VALUE self) { - int32_t size = (int32_t)read_varint64(self); + uint32_t size = read_varint32(self); uint8_t key_and_value_type = size == 0 ? 0 : read_byte_direct(self); - return rb_ary_new3(3, INT2FIX(get_ttype(key_and_value_type >> 4)), INT2FIX(get_ttype(key_and_value_type & 0xf)), INT2FIX(size)); + return rb_ary_new3(3, INT2FIX(get_ttype(key_and_value_type >> 4)), INT2FIX(get_ttype(key_and_value_type & 0xf)), UINT2NUM(size)); } VALUE rb_thrift_compact_proto_read_list_begin(VALUE self) { uint8_t size_and_type = read_byte_direct(self); - int32_t size = (size_and_type >> 4) & 0x0f; + uint32_t size = (size_and_type >> 4) & 0x0f; if (size == 15) { - size = (int32_t)read_varint64(self); + size = read_varint32(self); } uint8_t type = get_ttype(size_and_type & 0x0f); - return rb_ary_new3(2, INT2FIX(type), INT2FIX(size)); + return rb_ary_new3(2, INT2FIX(type), UINT2NUM(size)); } VALUE rb_thrift_compact_proto_read_set_begin(VALUE self) { @@ -570,7 +583,7 @@ VALUE rb_thrift_compact_proto_read_i16(VALUE self) { } VALUE rb_thrift_compact_proto_read_i32(VALUE self) { - return INT2NUM(zig_zag_to_int((int32_t)read_varint64(self))); + return INT2NUM(zig_zag_to_int(read_varint32(self))); } VALUE rb_thrift_compact_proto_read_i64(VALUE self) { @@ -603,8 +616,8 @@ VALUE rb_thrift_compact_proto_read_string(VALUE self) { } VALUE rb_thrift_compact_proto_read_binary(VALUE self) { - int64_t size = read_varint64(self); - return READ(self, size); + uint32_t size = read_varint32(self); + return rb_funcall(GET_TRANSPORT(self), read_all_method_id, 1, UINT2NUM(size)); } VALUE rb_thrift_compact_proto_read_uuid(VALUE self) { diff --git a/lib/rb/ext/struct.c b/lib/rb/ext/struct.c index cdc11db701e..d5fe817a2c8 100644 --- a/lib/rb/ext/struct.c +++ b/lib/rb/ext/struct.c @@ -252,8 +252,6 @@ static void write_container(int ttype, VALUE field_info, VALUE value, VALUE prot if (ttype == TTYPE_MAP) { VALUE keys; - VALUE key; - VALUE val; Check_Type(value, T_HASH); @@ -272,8 +270,8 @@ static void write_container(int ttype, VALUE field_info, VALUE value, VALUE prot default_write_map_begin(protocol, keytype_value, valuetype_value, INT2FIX(sz)); for (i = 0; i < sz; i++) { - key = rb_ary_entry(keys, i); - val = rb_hash_aref(value, key); + VALUE key = rb_ary_entry(keys, i); + VALUE val = rb_hash_aref(value, key); if (IS_CONTAINER(keytype)) { write_container(keytype, key_info, key, protocol); @@ -489,8 +487,6 @@ static VALUE read_anything(VALUE protocol, int ttype, VALUE field_info) { rb_thrift_struct_read(result, protocol); } } else if (ttype == TTYPE_MAP) { - int i; - VALUE map_header = default_read_map_begin(protocol); int key_ttype = FIX2INT(rb_ary_entry(map_header, 0)); int value_ttype = FIX2INT(rb_ary_entry(map_header, 1)); @@ -511,7 +507,7 @@ static VALUE read_anything(VALUE protocol, int ttype, VALUE field_info) { if (num_entries == 0 || (specified_key_type == key_ttype && specified_value_type == value_ttype)) { result = rb_hash_new(); - for (i = 0; i < num_entries; ++i) { + for (int i = 0; i < num_entries; ++i) { VALUE key, val; key = read_anything(protocol, key_ttype, key_info); @@ -528,8 +524,6 @@ static VALUE read_anything(VALUE protocol, int ttype, VALUE field_info) { default_read_map_end(protocol); } else if (ttype == TTYPE_LIST) { - int i; - VALUE list_header = default_read_list_begin(protocol); int element_ttype = FIX2INT(rb_ary_entry(list_header, 0)); int num_elements = FIX2INT(rb_ary_entry(list_header, 1)); @@ -542,7 +536,7 @@ static VALUE read_anything(VALUE protocol, int ttype, VALUE field_info) { if (specified_element_type == element_ttype) { result = new_container_array(num_elements); - for (i = 0; i < num_elements; ++i) { + for (int i = 0; i < num_elements; ++i) { rb_ary_push(result, read_anything(protocol, element_ttype, rb_hash_aref(field_info, element_sym))); } } else { @@ -555,7 +549,6 @@ static VALUE read_anything(VALUE protocol, int ttype, VALUE field_info) { default_read_list_end(protocol); } else if (ttype == TTYPE_SET) { VALUE items; - int i; VALUE set_header = default_read_set_begin(protocol); int element_ttype = FIX2INT(rb_ary_entry(set_header, 0)); @@ -569,7 +562,7 @@ static VALUE read_anything(VALUE protocol, int ttype, VALUE field_info) { if (specified_element_type == element_ttype) { items = new_container_array(num_elements); - for (i = 0; i < num_elements; ++i) { + for (int i = 0; i < num_elements; ++i) { rb_ary_push(items, read_anything(protocol, element_ttype, rb_hash_aref(field_info, element_sym))); } diff --git a/lib/rb/ext/thrift_native.c b/lib/rb/ext/thrift_native.c index cd4faa93e5f..a3252068a24 100644 --- a/lib/rb/ext/thrift_native.c +++ b/lib/rb/ext/thrift_native.c @@ -121,6 +121,7 @@ int PROTOERR_BAD_VERSION; int PROTOERR_NOT_IMPLEMENTED; int PROTOERR_DEPTH_LIMIT; +// cppcheck-suppress unusedFunction RUBY_FUNC_EXPORTED void Init_thrift_native(void) { // cached classes thrift_module = rb_const_get(rb_cObject, rb_intern("Thrift")); diff --git a/lib/rb/spec/compact_protocol_spec.rb b/lib/rb/spec/compact_protocol_spec.rb index e67514bf9d9..67a6e6dc5ea 100644 --- a/lib/rb/spec/compact_protocol_spec.rb +++ b/lib/rb/spec/compact_protocol_spec.rb @@ -21,6 +21,16 @@ require 'spec_helper' describe Thrift::CompactProtocol do + INTEGER_BOUNDARY_TESTS = { + :i32 => [-(2**31), (2**31) - 1], + :i64 => [-(2**63), (2**63) - 1] + } + + INTEGER_MINIMUM_ENCODINGS = { + :i32 => [0xff, 0xff, 0xff, 0xff, 0x0f], + :i64 => [0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01] + } + TESTS = { :byte => (-127..127).to_a, :i16 => (0..14).map { |shift| [1 << shift, -(1 << shift)] }.flatten.sort, @@ -71,6 +81,46 @@ end end + it "should round-trip signed integer boundaries correctly" do + INTEGER_BOUNDARY_TESTS.each_pair do |primitive_type, test_values| + test_values.each do |value| + trans = Thrift::MemoryBufferTransport.new + proto = Thrift::CompactProtocol.new(trans) + + proto.send(writer(primitive_type), value) + expect(proto.send(reader(primitive_type))).to eq(value) + end + end + end + + it "should encode signed integer minima with the canonical zigzag varint bytes" do + INTEGER_MINIMUM_ENCODINGS.each_pair do |primitive_type, expected_bytes| + trans = Thrift::MemoryBufferTransport.new + proto = Thrift::CompactProtocol.new(trans) + + proto.send(writer(primitive_type), INTEGER_BOUNDARY_TESTS.fetch(primitive_type).first) + expect(trans.read(trans.available).bytes).to eq(expected_bytes) + end + end + + it "should decode i32 minima from direct canonical zigzag bytes" do + trans = Thrift::MemoryBufferTransport.new + trans.write(INTEGER_MINIMUM_ENCODINGS[:i32].pack("C*")) + + proto = Thrift::CompactProtocol.new(trans) + expect(proto.read_i32).to eq(INTEGER_BOUNDARY_TESTS[:i32].first) + end + + it "should read binary values with multi-byte varint32 lengths" do + payload = "x" * 128 + trans = Thrift::MemoryBufferTransport.new + trans.write([0x80, 0x01].pack("C*")) + trans.write(payload) + + proto = Thrift::CompactProtocol.new(trans) + expect(proto.read_binary).to eq(payload) + end + it "should write a uuid" do trans = Thrift::MemoryBufferTransport.new proto = Thrift::CompactProtocol.new(trans)