From 85c3fcc3fa164f5da71f44bb05bcc0c5e628e5c4 Mon Sep 17 00:00:00 2001 From: Takuro Ashie Date: Thu, 18 Jun 2026 16:14:40 +0900 Subject: [PATCH 1/2] Make nokogiri optional Use REXML as runtime fallback. Signed-off-by: Takuro Ashie --- fluent-plugin-parser-winevt_xml.gemspec | 4 +- lib/fluent/plugin/parser_winevt_sax.rb | 38 ++++++- lib/fluent/plugin/parser_winevt_xml.rb | 80 ++++++++++++++- ...ent.rb => winevt_sax_document_nokogiri.rb} | 3 +- .../plugin/winevt_sax_document_rexml.rb | 98 +++++++++++++++++++ 5 files changed, 211 insertions(+), 12 deletions(-) rename lib/fluent/plugin/{winevt_sax_document.rb => winevt_sax_document_nokogiri.rb} (95%) create mode 100644 lib/fluent/plugin/winevt_sax_document_rexml.rb diff --git a/fluent-plugin-parser-winevt_xml.gemspec b/fluent-plugin-parser-winevt_xml.gemspec index 47fdc31..808450a 100644 --- a/fluent-plugin-parser-winevt_xml.gemspec +++ b/fluent-plugin-parser-winevt_xml.gemspec @@ -20,8 +20,10 @@ Gem::Specification.new do |spec| spec.add_development_dependency "bundler" spec.add_development_dependency "rake" spec.add_development_dependency "test-unit", "~> 3.4.0" + spec.add_development_dependency "nokogiri", ">= 1.12.5", "< 1.16" + spec.add_runtime_dependency "fluentd", [">= 0.14.12", "< 2"] - spec.add_runtime_dependency "nokogiri", ">= 1.12.5" + spec.add_runtime_dependency "rexml", "~> 3.2" # gems that aren't default gems as of Ruby 3.4 spec.add_runtime_dependency "base64", "~> 0.2" diff --git a/lib/fluent/plugin/parser_winevt_sax.rb b/lib/fluent/plugin/parser_winevt_sax.rb index a50cbbd..ed5623f 100644 --- a/lib/fluent/plugin/parser_winevt_sax.rb +++ b/lib/fluent/plugin/parser_winevt_sax.rb @@ -1,12 +1,33 @@ require 'fluent/plugin/parser' -require 'fluent/plugin/winevt_sax_document' -require 'nokogiri' module Fluent::Plugin class WinevtSAXparser < Parser Fluent::Plugin.register_parser('winevt_sax', self) config_param :preserve_qualifiers, :bool, default: true + config_param :parser, :enum, list: [:auto, :rexml, :nokogiri], default: :auto + + def initialize + @use_nokogiri = false + end + + def configure(conf) + super + if @parser != :rexml + begin + require 'nokogiri' + require 'fluent/plugin/winevt_sax_document_nokogiri' + @use_nokogiri = true + rescue + raise if @parser == :nokogiri + end + end + + if !@use_nokogiri + require 'rexml/parsers/sax2parser' + require 'fluent/plugin/winevt_sax_document_rexml' + end + end def winevt_xml? true @@ -17,9 +38,16 @@ def preserve_qualifiers? end def parse(text) - evtxml = WinevtXMLDocument.new(@preserve_qualifiers) - parser = Nokogiri::XML::SAX::Parser.new(evtxml) - parser.parse(text) + if @use_nokogiri + evtxml = WinevtXMLDocumentNokogiri.new(@preserve_qualifiers) + parser = Nokogiri::XML::SAX::Parser.new(evtxml) + parser.parse(text) + else + evtxml = WinevtXMLDocumentREXML.new(@preserve_qualifiers) + parser = REXML::Parsers::SAX2Parser.new(text) + parser.listen(evtxml) + parser.parse + end time = @estimate_current_event ? Fluent::EventTime.now : nil yield time, evtxml.result end diff --git a/lib/fluent/plugin/parser_winevt_xml.rb b/lib/fluent/plugin/parser_winevt_xml.rb index b8a0d58..0469c01 100644 --- a/lib/fluent/plugin/parser_winevt_xml.rb +++ b/lib/fluent/plugin/parser_winevt_xml.rb @@ -1,11 +1,31 @@ require 'fluent/plugin/parser' -require 'nokogiri' module Fluent::Plugin class WinevtXMLparser < Parser Fluent::Plugin.register_parser('winevt_xml', self) config_param :preserve_qualifiers, :bool, default: true + config_param :parser, :enum, list: [:auto, :rexml, :nokogiri], default: :auto + + def initialize + @use_nokogiri = false + end + + def configure(conf) + super + if @parser != :rexml + begin + require 'nokogiri' + @use_nokogiri = true + rescue + raise if @parser == :nokogiri + end + end + + if !@use_nokogiri + require 'rexml/document' + end + end def winevt_xml? true @@ -19,7 +39,7 @@ def MAKELONG(low, high) (low & 0xffff) | (high & 0xffff) << 16 end - def event_id(system_elem) + def event_id_nokogiri(system_elem) if @preserve_qualifiers return ((system_elem/'EventID').text rescue nil) end @@ -34,16 +54,29 @@ def event_id(system_elem) end end - def parse(text) + def event_id_rexml(system_elem) + return system_elem.elements['EventID'].text rescue nil if @preserve_qualifiers + + qualifiers = system_elem.elements['EventID'].attributes['Qualifiers'] rescue nil + if qualifiers + event_id = system_elem.elements['EventID'].text + event_id = MAKELONG(event_id.to_i, qualifiers.to_i) + event_id.to_s + else + system_elem.elements['EventID'].text rescue nil + end + end + + def parse_nokogiri(text) record = {} doc = Nokogiri::XML(text) system_elem = doc/'Event'/'System' record["ProviderName"] = (system_elem/"Provider").attribute("Name").text rescue nil record["ProviderGUID"] = (system_elem/"Provider").attribute("Guid").text rescue nil if @preserve_qualifiers - record["Qualifiers"] = (system_elem/'EventID').attribute("Qualifiers").text rescue nil + record["Qualifiers"] = (system_elem/'EventID').attribute("Qualifiers").text rescue nil end - record["EventID"] = event_id(system_elem) + record["EventID"] = event_id_nokogiri(system_elem) record["Level"] = (system_elem/'Level').text rescue nil record["Task"] = (system_elem/'Task').text rescue nil record["Opcode"] = (system_elem/'Opcode').text rescue nil @@ -59,6 +92,43 @@ def parse(text) record["UserID"] = (system_elem/'Security').attribute("UserID").text rescue nil record["Version"] = (system_elem/'Version').text rescue nil time = @estimate_current_event ? Fluent::EventTime.now : nil + return time, record + end + + def parse_rexml(text) + record = {} + doc = REXML::Document.new(text) + system_elem = doc.root.elements['System'] rescue nil + record["ProviderName"] = system_elem.elements['Provider'].attributes['Name'] rescue nil + record["ProviderGUID"] = system_elem.elements['Provider'].attributes['Guid'] rescue nil + if @preserve_qualifiers + record["Qualifiers"] = system_elem.elements['EventID'].attributes['Qualifiers'] rescue nil + end + record["EventID"] = event_id_rexml(system_elem) + record["Level"] = system_elem.elements['Level'].text rescue nil + record["Task"] = system_elem.elements['Task'].text rescue nil + record["Opcode"] = system_elem.elements['Opcode'].text rescue nil + record["Keywords"] = system_elem.elements['Keywords'].text rescue nil + record["TimeCreated"] = system_elem.elements['TimeCreated'].attributes['SystemTime'] rescue nil + record["EventRecordID"] = system_elem.elements['EventRecordID'].text rescue nil + record["ActivityID"] = system_elem.elements['Correlation'].attributes['ActivityID'] rescue nil + record["RelatedActivityID"] = system_elem.elements['Correlation'].attributes['RelatedActivityID'] rescue nil + record["ThreadID"] = system_elem.elements['Execution'].attributes['ThreadID'] rescue nil + record["ProcessID"] = system_elem.elements['Execution'].attributes['ProcessID'] rescue nil + record["Channel"] = system_elem.elements['Channel'].text rescue nil + record["Computer"] = system_elem.elements['Computer'].text rescue nil + record["UserID"] = system_elem.elements['Security'].attributes['UserID'] rescue nil + record["Version"] = system_elem.elements['Version'].text rescue nil + time = @estimate_current_event ? Fluent::EventTime.now : nil + return time, record + end + + def parse(text) + if @use_nokogiri + time, record = parse_nokogiri(text) + else + time, record = parse_rexml(text) + end yield time, record end end diff --git a/lib/fluent/plugin/winevt_sax_document.rb b/lib/fluent/plugin/winevt_sax_document_nokogiri.rb similarity index 95% rename from lib/fluent/plugin/winevt_sax_document.rb rename to lib/fluent/plugin/winevt_sax_document_nokogiri.rb index bd11942..a27037a 100644 --- a/lib/fluent/plugin/winevt_sax_document.rb +++ b/lib/fluent/plugin/winevt_sax_document_nokogiri.rb @@ -1,6 +1,6 @@ require 'nokogiri' -class WinevtXMLDocument < Nokogiri::XML::SAX::Document +class WinevtXMLDocumentNokogiri < Nokogiri::XML::SAX::Document def initialize(preserve_qualifiers) @stack = [] @result = {} @@ -66,6 +66,7 @@ def characters(string) end def end_element(name, attributes = []) + @stack.pop end def end_document diff --git a/lib/fluent/plugin/winevt_sax_document_rexml.rb b/lib/fluent/plugin/winevt_sax_document_rexml.rb new file mode 100644 index 0000000..e7c2749 --- /dev/null +++ b/lib/fluent/plugin/winevt_sax_document_rexml.rb @@ -0,0 +1,98 @@ +require 'rexml/parsers/sax2parser' + +class WinevtXMLDocumentREXML + def initialize(preserve_qualifiers) + @stack = [] + @result = {} + @preserve_qualifiers = preserve_qualifiers + end + + def MAKELONG(low, high) + (low & 0xffff) | (high & 0xffff) << 16 + end + + def event_id + if @result.has_key?("Qualifiers") + qualifiers = @result.delete("Qualifiers") + event_id = @result['EventID'] + event_id = MAKELONG(event_id.to_i, qualifiers.to_i) + @result['EventID'] = event_id.to_s + else + @result['EventID'] + end + end + + def result + return @result if @preserve_qualifiers + + if @result + @result['EventID'] = event_id + end + @result + end + + def start_element(*args) + # REXML SAX2 may pass (uri, localname, qname, attributes) or (qname, attributes) + name = if args.length >= 3 + args[1].to_s + else + args[0].to_s + end + # normalize namespace/prefix + name = name.split('}').last if name.include?('}') + name = name.split(':').last if name.include?(':') + @stack << name + + attrs = args.last || {} + + # helper to fetch attribute value from different attribute containers + get_attr = lambda do |a, k| + begin + if a.is_a?(Array) + pair = a.find { |p| p && p[0] && p[0].to_s == k.to_s } + pair && pair[1] + elsif a.respond_to?(:[]) + a[k] || a[k.to_sym] + else + nil + end + rescue + nil + end + end + + if name == "Provider" + @result["ProviderName"] = get_attr.call(attrs, 'Name') + @result["ProviderGUID"] = get_attr.call(attrs, 'Guid') + elsif name == "EventID" + @result["Qualifiers"] = get_attr.call(attrs, 'Qualifiers') + elsif name == "TimeCreated" + @result["TimeCreated"] = get_attr.call(attrs, 'SystemTime') + elsif name == "Correlation" + @result["ActivityID"] = get_attr.call(attrs, 'ActivityID') + @result["RelatedActivityID"] = get_attr.call(attrs, 'RelatedActivityID') + elsif name == "Execution" + @result["ProcessID"] = get_attr.call(attrs, 'ProcessID') + @result["ThreadID"] = get_attr.call(attrs, 'ThreadID') + elsif name == "Security" + @result["UserID"] = get_attr.call(attrs, 'UserID') + end + end + + def characters(string) + element = @stack.last + return unless element + + if /^EventID|Level|Task|Opcode|Keywords|EventRecordID|ActivityID|Channel|Computer|Security|Version$/ === element + @result[element] = (@result[element] || '') + string + end + end + + def end_element(*_) + @stack.pop + end + + def method_missing(name, *args, &block) + # Ignore any SAX2 events we don't explicitly handle (e.g., progress) + end +end From 82c54b2302222366026f9950f8daef1d6b44b2e9 Mon Sep 17 00:00:00 2001 From: Takuro Ashie Date: Fri, 19 Jun 2026 10:16:23 +0900 Subject: [PATCH 2/2] Add tests for parser (:auto, :nokogiri, and :rexml) Signed-off-by: Takuro Ashie --- test/plugin/test_parser_winevt_sax.rb | 82 ++++++++++++++++---------- test/plugin/test_parser_winevt_xml.rb | 83 ++++++++++++++++----------- 2 files changed, 102 insertions(+), 63 deletions(-) diff --git a/test/plugin/test_parser_winevt_sax.rb b/test/plugin/test_parser_winevt_sax.rb index b7123c0..7e0597c 100644 --- a/test/plugin/test_parser_winevt_sax.rb +++ b/test/plugin/test_parser_winevt_sax.rb @@ -2,43 +2,56 @@ class WinevtSAXparserTest < Test::Unit::TestCase - def setup - Fluent::Test.setup - end - CONFIG = %[] - XMLLOG = File.open(File.join(__dir__, "..", "data", "eventlog.xml") ) def create_driver(conf = CONFIG) Fluent::Test::Driver::Parser.new(Fluent::Plugin::WinevtSAXparser).configure(conf) end - def test_parse - d = create_driver - xml = XMLLOG - expected = {"ProviderName" => "Microsoft-Windows-Security-Auditing", - "ProviderGUID" => "{54849625-5478-4994-A5BA-3E3B0328C30D}", - "EventID" => "4624", - "Qualifiers" => nil, - "Level" => "0", - "Task" => "12544", - "Opcode" => "0", - "Keywords" => "0x8020000000000000", - "TimeCreated" => "2019-06-13T09:21:23.345889600Z", - "EventRecordID" => "80688", - "ActivityID" => "{587F0743-1F71-0006-5007-7F58711FD501}", - "RelatedActivityID" => nil, - "ProcessID" => "912", - "ThreadID" => "24708", - "Channel" => "Security", - "Computer" => "Fluentd-Developing-Windows", - "UserID" => nil, - "Version" => "2",} - d.instance.parse(xml) do |time, record| - assert_equal(expected, record) + class ParseTest < self + def setup + Fluent::Test.setup + @xml = File.open(File.join(__dir__, "..", "data", "eventlog.xml")) + end + + def teardown + @xml.close end - assert_true(d.instance.winevt_xml?) + data( + "auto" => [%[parser auto], true], + "nokogiri" => [%[parser nokogiri], true], + "rexml" => [%[parser rexml], false] + ) + def test_parse(data) + config, expected_use_nokogiri = data + d = create_driver(CONFIG + config) + @xml = File.open(File.join(__dir__, "..", "data", "eventlog.xml")) + expected = {"ProviderName" => "Microsoft-Windows-Security-Auditing", + "ProviderGUID" => "{54849625-5478-4994-A5BA-3E3B0328C30D}", + "EventID" => "4624", + "Qualifiers" => nil, + "Level" => "0", + "Task" => "12544", + "Opcode" => "0", + "Keywords" => "0x8020000000000000", + "TimeCreated" => "2019-06-13T09:21:23.345889600Z", + "EventRecordID" => "80688", + "ActivityID" => "{587F0743-1F71-0006-5007-7F58711FD501}", + "RelatedActivityID" => nil, + "ProcessID" => "912", + "ThreadID" => "24708", + "Channel" => "Security", + "Computer" => "Fluentd-Developing-Windows", + "UserID" => nil, + "Version" => "2",} + d.instance.parse(@xml) do |time, record| + assert_equal(expected, record) + end + + assert_true(d.instance.winevt_xml?) + assert_equal(expected_use_nokogiri, d.instance.instance_variable_get(:@use_nokogiri)) + end end class QualifiersTest < self @@ -50,8 +63,14 @@ def teardown @xml.close end - def test_parse_without_qualifiers - d = create_driver CONFIG + %[preserve_qualifiers false] + data( + "auto" => [%[parser auto], true], + "nokogiri" => [%[parser nokogiri], true], + "rexml" => [%[parser rexml], false] + ) + def test_parse_without_qualifiers(data) + config, expected_use_nokogiri = data + d = create_driver(CONFIG + config + %[\npreserve_qualifiers false]) expected = {"ActivityID" => nil, "Channel" => "Application", "Computer" => "DESKTOP-G457RDR", @@ -74,6 +93,7 @@ def test_parse_without_qualifiers end assert_true(d.instance.winevt_xml?) + assert_equal(expected_use_nokogiri, d.instance.instance_variable_get(:@use_nokogiri)) end end end diff --git a/test/plugin/test_parser_winevt_xml.rb b/test/plugin/test_parser_winevt_xml.rb index 1f5f9f7..258f947 100644 --- a/test/plugin/test_parser_winevt_xml.rb +++ b/test/plugin/test_parser_winevt_xml.rb @@ -2,44 +2,56 @@ class WinevtXMLparserTest < Test::Unit::TestCase - def setup - Fluent::Test.setup - end - CONFIG = %[] - XMLLOG = File.open(File.join(__dir__, "..", "data", "eventlog.xml")) def create_driver(conf = CONFIG) Fluent::Test::Driver::Parser.new(Fluent::Plugin::WinevtXMLparser).configure(conf) end - def test_parse - d = create_driver - xml = XMLLOG - expected = {"ProviderName" => "Microsoft-Windows-Security-Auditing", - "ProviderGUID" => "{54849625-5478-4994-A5BA-3E3B0328C30D}", - "EventID" => "4624", - "Qualifiers" => nil, - "Level" => "0", - "Task" => "12544", - "Opcode" => "0", - "Keywords" => "0x8020000000000000", - "TimeCreated" => "2019-06-13T09:21:23.345889600Z", - "EventRecordID" => "80688", - "ActivityID" => "{587F0743-1F71-0006-5007-7F58711FD501}", - "RelatedActivityID" => nil, - "ProcessID" => "912", - "ThreadID" => "24708", - "Channel" => "Security", - "Computer" => "Fluentd-Developing-Windows", - "UserID" => nil, - "Version" => "2",} - d.instance.parse(xml) do |time, record| - assert_equal(expected, record) + class ParseTest < self + def setup + Fluent::Test.setup + @xml = File.open(File.join(__dir__, "..", "data", "eventlog.xml")) + end + + def teardown + @xml.close end - xml.close - assert_true(d.instance.winevt_xml?) + data( + "auto" => [%[parser auto], true], + "nokogiri" => [%[parser nokogiri], true], + "rexml" => [%[parser rexml], false] + ) + def test_parse(data) + config, expected_use_nokogiri = data + d = create_driver(CONFIG + config) + @xml = File.open(File.join(__dir__, "..", "data", "eventlog.xml")) + expected = {"ProviderName" => "Microsoft-Windows-Security-Auditing", + "ProviderGUID" => "{54849625-5478-4994-A5BA-3E3B0328C30D}", + "EventID" => "4624", + "Qualifiers" => nil, + "Level" => "0", + "Task" => "12544", + "Opcode" => "0", + "Keywords" => "0x8020000000000000", + "TimeCreated" => "2019-06-13T09:21:23.345889600Z", + "EventRecordID" => "80688", + "ActivityID" => "{587F0743-1F71-0006-5007-7F58711FD501}", + "RelatedActivityID" => nil, + "ProcessID" => "912", + "ThreadID" => "24708", + "Channel" => "Security", + "Computer" => "Fluentd-Developing-Windows", + "UserID" => nil, + "Version" => "2",} + d.instance.parse(@xml) do |time, record| + assert_equal(expected, record) + end + + assert_true(d.instance.winevt_xml?) + assert_equal(expected_use_nokogiri, d.instance.instance_variable_get(:@use_nokogiri)) + end end class QualifiersTest < self @@ -51,8 +63,14 @@ def teardown @xml.close end - def test_without_qualifiers - d = create_driver CONFIG + %[preserve_qualifiers false] + data( + "auto" => [%[parser auto], true], + "nokogiri" => [%[parser nokogiri], true], + "rexml" => [%[parser rexml], false] + ) + def test_parse_without_qualifiers(data) + config, expected_use_nokogiri = data + d = create_driver(CONFIG + config + %[\npreserve_qualifiers false]) expected = {"ActivityID" => nil, "Channel" => "Application", "Computer" => "DESKTOP-G457RDR", @@ -75,6 +93,7 @@ def test_without_qualifiers end assert_true(d.instance.winevt_xml?) + assert_equal(expected_use_nokogiri, d.instance.instance_variable_get(:@use_nokogiri)) end end end