From 1543951fca84339f76dd5510a6cf2b4e77a599cb Mon Sep 17 00:00:00 2001 From: Michael Kantor <6068672+kantorcodes@users.noreply.github.com> Date: Mon, 23 Feb 2026 17:08:36 -0700 Subject: [PATCH 1/2] feat: add Hashgraph Online docs scraper Signed-off-by: Michael Kantor <6068672+kantorcodes@users.noreply.github.com> --- lib/docs/filters/hol/clean_html.rb | 42 +++++++++++++++++ lib/docs/filters/hol/entries.rb | 71 +++++++++++++++++++++++++++++ lib/docs/scrapers/hol.rb | 39 ++++++++++++++++ public/icons/docs/hol/16.png | Bin 0 -> 2268 bytes public/icons/docs/hol/16@2x.png | Bin 0 -> 3587 bytes public/icons/docs/hol/SOURCE | 1 + 6 files changed, 153 insertions(+) create mode 100644 lib/docs/filters/hol/clean_html.rb create mode 100644 lib/docs/filters/hol/entries.rb create mode 100644 lib/docs/scrapers/hol.rb create mode 100644 public/icons/docs/hol/16.png create mode 100644 public/icons/docs/hol/16@2x.png create mode 100644 public/icons/docs/hol/SOURCE diff --git a/lib/docs/filters/hol/clean_html.rb b/lib/docs/filters/hol/clean_html.rb new file mode 100644 index 0000000000..ddc33c92ce --- /dev/null +++ b/lib/docs/filters/hol/clean_html.rb @@ -0,0 +1,42 @@ +module Docs + class Hol + class CleanHtmlFilter < Filter + def call + @doc = at_css('article .theme-doc-markdown') || at_css('article') + return doc if @doc.nil? + + css( + '.theme-doc-breadcrumbs', + '.theme-doc-toc-mobile', + '.theme-doc-footer', + '.theme-doc-version-badge', + '.pagination-nav', + '.theme-edit-this-page', + '.hash-link', + '.anchor-link', + 'button.copyButtonIcon_Lhsm', + 'button.clean-btn', + ).remove + + css('pre').each do |node| + lines = node.css('.token-line') + node.content = lines.map(&:content).join("\n") if lines.any? + node.remove_attribute('style') + + language = node['class'].to_s[/language-([a-z0-9_-]+)/i, 1] + language ||= node.at_css('code')&.[]('class').to_s[/language-([a-z0-9_-]+)/i, 1] + node['data-language'] = language if language + + wrapper = node.ancestors('.theme-code-block').first + wrapper.replace(node) if wrapper + end + + css('.table-of-contents').remove + css('*[class]').remove_attribute('class') + css('*[style]').remove_attribute('style') + + doc + end + end + end +end diff --git a/lib/docs/filters/hol/entries.rb b/lib/docs/filters/hol/entries.rb new file mode 100644 index 0000000000..22420d5b62 --- /dev/null +++ b/lib/docs/filters/hol/entries.rb @@ -0,0 +1,71 @@ +module Docs + class Hol + class EntriesFilter < Docs::EntriesFilter + def include_default_entry? + !root_page? + end + + def get_name + heading = at_css('h1') + return super if heading.nil? + heading.content.gsub(/\s+/, ' ').strip + end + + def get_type + return standards_sdk_type if standards_sdk_doc? + return registry_broker_type if registry_broker_doc? + nil + end + + def additional_entries + return [] if root_page? + + css('h2[id], h3[id]').each_with_object([]) do |node, entries| + section_name = node.content.gsub(/\s+/, ' ').strip + next if section_name.empty? + next if section_name == name + entries << ["#{name}: #{section_name}", node['id']] + end + end + + private + + def standards_sdk_doc? + slug.start_with?('libraries/standards-sdk/') + end + + def registry_broker_doc? + slug.start_with?('registry-broker/') + end + + def standards_sdk_type + sdk_slug = slug.delete_prefix('libraries/standards-sdk/').split('/').first + return 'Standards SDK' if sdk_slug.nil? || sdk_slug.empty? + return sdk_slug.upcase if sdk_slug.match?(/\Ahcs-\d+\z/) + + case sdk_slug + when 'registry-broker-client' + 'SDK Registry Broker Client' + when 'utils-services' + 'SDK Utilities & Services' + else + "SDK #{sdk_slug.tr('-', ' ').split.map(&:capitalize).join(' ')}" + end + end + + def registry_broker_type + broker_slug = slug.delete_prefix('registry-broker/').split('/').first + return 'Registry Broker' if broker_slug.nil? || broker_slug.empty? + + case broker_slug + when 'api' + 'Registry Broker API' + when 'chat', 'encrypted-chat', 'multi-protocol-chat' + 'Registry Broker Chat' + else + "Registry Broker #{broker_slug.tr('-', ' ').split.map(&:capitalize).join(' ')}" + end + end + end + end +end diff --git a/lib/docs/scrapers/hol.rb b/lib/docs/scrapers/hol.rb new file mode 100644 index 0000000000..d0d75cdc2f --- /dev/null +++ b/lib/docs/scrapers/hol.rb @@ -0,0 +1,39 @@ +module Docs + class Hol < UrlScraper + self.name = 'Hashgraph Online' + self.slug = 'hol' + self.type = 'simple' + self.release = '0.1.161' + self.base_url = 'https://hol.org/docs/' + self.root_path = 'libraries/standards-sdk/' + self.initial_paths = %w( + libraries/standards-sdk/ + registry-broker/ + ) + self.links = { + home: 'https://hol.org/', + code: 'https://github.com/hashgraph-online/standards-sdk' + } + + html_filters.push 'hol/entries', 'hol/clean_html' + + options[:trailing_slash] = true + options[:only_patterns] = [ + %r{\A(?:libraries/standards-sdk|registry-broker)(?:/|$)}, + ] + options[:skip_patterns] = [ + %r{\Aregistry-broker/examples/(?:chat-demo|ping-agent-demo)/?}, + %r{\Aregistry-broker/getting-started/(?:faq|first-registration|installation|quick-start)/?}, + %r{\Aregistry-broker/(?:feature-your-agent|moltbook|partner-program|skills-upload-discovery|updating-agents)/?}, + ] + + options[:attribution] = <<-HTML + Copyright © 2025 Hashgraph Online DAO.
+ Licensed under the Apache License 2.0. + HTML + + def get_latest_version(opts) + get_npm_version('@hashgraphonline/standards-sdk', opts) + end + end +end diff --git a/public/icons/docs/hol/16.png b/public/icons/docs/hol/16.png new file mode 100644 index 0000000000000000000000000000000000000000..7f02caa6f5fda7bdeb216a51d7165c68e2c042db GIT binary patch literal 2268 zcmbVNc~BE)6#s&N995CRfY8>E(hlXwCIJFjLJB6PoXY8dMu(Uz5ZN3i3j_hnfPx^> z$}uP?pbpxpq9741NEMITsZvD;MXMd5Vzna0Y8}bo*l!o)8u+I>^X+^4d++y-@7m(1 zRbMT#aC=92p7Rp>6@pX-E;K%2+_{ToasBiS@8n9`vi2#ytohC->=qH^+t@TS8B+NfUifPs>BE>U9ZGdI#eIXpgd5hlQ6=dQ!YeWAR{3%iY`=ZF*=vY zVzL+lD>|Ld*UFR87?F5_4!;5!sRW@x5hOD+lbPwwRBKZZHjl?cSR91I@q!*+x-1nT z)qAOQpNDkjF+Q`-koE;Yy%F zEhZ(@+Bmfu7f`#3kIaUrW`j?bjmBU)HJ(mRTD(d@2snIXE_gO8VTvZ;aVI#_!P$hw zxd_w2X#{d!&?to-!xKab*sTs0C*b(PG;>;JGlgngt&P=4Wtf0HL74{4_Kw$KTA^A= zPO9L49G!^-)1%DJKy{KhZzt0%)0lD@DjQ!+>Yz;SU+TofVw-28DlL*60$`0Whfw6$Qun zFWnUzkQx_kw|}pny7ex;RZJ8(7BgszO19<2dm0SQtzlLR%)7q5Aa1s_$ahkHkl@&4 zTAPH4Jh}T@Dr}ZUd%7OHyZZIZ*PWwZ@5^fL7U#!a>(pI;Tep{%RDU@ur|{+Nr@0a^ z=8)R-yu9%Grlwy5?wix@TSO1@wnb$a53MSw94fz=)9aMIqeNc(&l?xhRFT5CHVo~3 zT3@$*WrQd1pilM%``q6Knl>*9xhM!1`kiHpI4<_Nh-=n9*Q);Xjjh?yHx9s1x zzV%0|C7&UZI~5yN7mt*5cnxP(cOKbrVu{-^qvTn=XAs|_psS{-SG4n0_VO#%r)>|9 zHCk4-E;}T1ukl{*|ITgqi?N``51kr2GHw0*e>q_4P59O?Z@*%{_tx!8x8ozMjoo2g zjcu+$d1%||Gv$V^t=mT<1eK*VXESWCwwotcXpY;M6!!$Z>8li(Nt5VK{Xdn(*EV)s zJ~iMhzrp^fq3Xxq$2ie)y_=uGY4^R{yq-kIhbBME(N7MX(o~CE?PJ!RxM_SfFu>R} z^7v?op>EWp>Eyj`G|ld)f%k{U={t4jqROtCVLeJ&H{Ai}iRjLSKWqp6YDrh)p|@{O zWi(f3+Zwu-_kLONz@_~#&2B|))rSkM)+vY14Hi9rKK%D^%CN(T!>cu=CM|&;j^zI| Nbmc11@fAt={{a|Y>&O5A literal 0 HcmV?d00001 diff --git a/public/icons/docs/hol/16@2x.png b/public/icons/docs/hol/16@2x.png new file mode 100644 index 0000000000000000000000000000000000000000..3ce67785b721adb0b6e4e28284267339d7cfe581 GIT binary patch literal 3587 zcmbVPdpwi-AAe?w`oLN2-FUOGZM zt&4Im6sI_ygD!GCDwnL(>Eh0LwwzM0`seq&Uf<{QdETGT`*Zo;Uau$C*~vyhPD2g= z0EHd4L>ExP#6xx|_~srt@*Wh@ein`v03b|UK0{t2Hd1U|90A~n9soo{0l*XpMGON# zkO2U^^acQI1^}qBvnzMuK*KPNw8PiY5!eLkvcOV^1|W_A0f#$8^RvDWq7OhNxD;qa z0pJ@D7vKvqmFUYropFK=NK1SG;1DRqk2g5Pu_Tf}4m?B#m~@IijlrOSsC4piKn9ei z;3&(7EMx%v^QAsVg5^WM=%629;Y!%C1C)FYmWm}>SQ}XryeJrJKP-C70j!Y`25pEj zKy5X|fMOfQ#1OrOhedBSKp9{RwgUYc7VALjGPbQJ7XTC+#RHPISy2+~r#kvr!7wF)ZnXW&N$HpR&!NI|ZU_%6prCg<7$hPc?3N1_hesQMX})XumPueSSR7Y2 znL@*(7AT9LZ@t~PG!B8q6i+Jt|2R4e2^L5BHiM)S&mVTO$g+q@rC=$a7E^LiKJQ=2 ziE*VJ{2CRnzj(_t==lGx5gc*>_!3m?B|*RD3Kd6L-;pmsIl! z1+Npl9JufQyPEKF?p)6x0FbHPLA2OST6V`HjG<+t+SfDvre`uVp{FBOm~~!|9FZMf z6y*$CTO>PBY)iLH$z!e6lQn&RR39E|B_#_AR|k$v z`3P@*+@z1Mq$@ncpaw@FcW7=oBlE=$_c#jLHlrV)V?)B5)gH-38SC%cZAv#CmYK*b zG%Dv*gxcTb&Jpf>c*PPXtuooVe6;V9dfEfEfIG~DfVu6%q~V=9K5Izm5tn0n0)|5I zQM=GarDjj<5z}a9tDuLZul6Xe_F3jHkaspq^zwTH24bS-O0o`=mg3f~jo!<>>#rIz z{N~RI=d-X&{dRfJ3V*%qXNIP<@Rs%PgK$GDc+!{GJmIfrCpbLPGriM&<>~OSf7_wf z5Kr}%Snb2L(}jY0)29|`?>C0qx5QfO;~|y_c-=#5o&PDxSgyXhp|Mi4Qs_TtZr6k8 zbFq2|5uNjkMoD*dT~4A*kB_lX*2NtSmOira{!tam1VUVHY_pD;AwrngAcc^V=bZXU z-li))!n|I#vpMpl+YH~P@)-k>GI?%HhKmudb?V9EG)ue7M?`lKngKxtS|D}WqNu*tE|PP<_|Do=P795bgv zTITAMX}r(k<|DSGTd=e?(* zHR7~R)Ik-6xr&721@0sas#azw#QAEO!b(M4-j%RX@2yKWLW6UoCa&w{bPPZc&R1Hp zAo-++?c9{&@Y+9vgL!q!pQpr6KY(K3P%GWsYV)r0!hOl>_PCPx=eCa^w&DAuA7} z@A;DZtCVCFmN!^4S5Ois8U~|0$4?gf>^@yvuReb^ItSW`nIy+Ny~QibqoH%U96jpn z&y27*7XzA3rX$+YN{w@swc=i3g!(tD_YChzjDQOSwM55|-_~_?j$5L0pZ;}yL-0jv z%WUeM^Lq;ZtggON6JVWG-_>7KvP7r1svKx|cHFOf#qqahE0RV{+g~L%`NSFtShaW~ z{5ZSf=T#y!wR7Nz;C)=?yM!iokB4*W$5^f7qvz)WDhJ1rH*{29m1e%Ueg6XnwO^ol z&`Zr(rd&?`!pCa+oC4p_`zvpG=0@JKWAC#ab;|IPu2uTGpp&6L#YQAO8I!SWU*B=- z-hhGL5}jo~i(dSL%PWjhysgtwqjp=O`b=x(WOrs1O0iI{^ciqxR1W`3v4%}OK6Qgd&*4v;;4Rr2s* zm9%F;)16O6dVeFOUT`h;KSAv?JFBc*va2lJD?Ox>xf=gdNi9R~E_T0E zO(ejk&_l+)?U|R}ybd9rN!?@98$Q%j6+EU_pYN=HzTps!>rpxDZ&UIv#qvbxr~)+8 zJga)Ja(VD}4cVM+)=2jk@bhO?Us_z>3{`St5Iv>}HlgRrQhR^A6M$A)&6E51JQEM_nr z-t@3jb2np6!*tG* z2mII8Xp4u22wmBBv>;bMBl?5Y?wQ7GsIb{Gd{xbWMgt;Mbmc_2`8ieAo|iJ37B^LX zd2C~iHfE7R^i%HP=$%vCO#X});`iorTh6vVKV;f?n6cOI!R8fR^F`h87xrr-E0P}k zcDDHWcnsyQm^(;SzZ;41r)I|H;8kr$h^S1=<`Ye&Mbog6GcSJM{C2jFH!r0`w2CHn S=2nY;J9bz(5e1gsk^cf{O_E;# literal 0 HcmV?d00001 diff --git a/public/icons/docs/hol/SOURCE b/public/icons/docs/hol/SOURCE new file mode 100644 index 0000000000..e70d09455a --- /dev/null +++ b/public/icons/docs/hol/SOURCE @@ -0,0 +1 @@ +https://hol.org/logo.png From adacc21692342868a88e8926879c65600fe4aa41 Mon Sep 17 00:00:00 2001 From: Michael Kantor <6068672+kantorcodes@users.noreply.github.com> Date: Mon, 23 Feb 2026 19:04:13 -0700 Subject: [PATCH 2/2] fix: normalize HOL heading entry names Signed-off-by: Michael Kantor <6068672+kantorcodes@users.noreply.github.com> --- lib/docs/filters/hol/entries.rb | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/docs/filters/hol/entries.rb b/lib/docs/filters/hol/entries.rb index 22420d5b62..8a713403b8 100644 --- a/lib/docs/filters/hol/entries.rb +++ b/lib/docs/filters/hol/entries.rb @@ -8,7 +8,7 @@ def include_default_entry? def get_name heading = at_css('h1') return super if heading.nil? - heading.content.gsub(/\s+/, ' ').strip + normalized_heading_text(heading) end def get_type @@ -21,7 +21,7 @@ def additional_entries return [] if root_page? css('h2[id], h3[id]').each_with_object([]) do |node, entries| - section_name = node.content.gsub(/\s+/, ' ').strip + section_name = normalized_heading_text(node) next if section_name.empty? next if section_name == name entries << ["#{name}: #{section_name}", node['id']] @@ -30,6 +30,12 @@ def additional_entries private + def normalized_heading_text(node) + fragment = node.dup + fragment.css('a').remove + fragment.content.gsub(/\s+/, ' ').strip + end + def standards_sdk_doc? slug.start_with?('libraries/standards-sdk/') end