diff --git a/lib/docs/filters/phpunit/clean_html.rb b/lib/docs/filters/phpunit/clean_html.rb index dacb15874b..8bbf619a87 100644 --- a/lib/docs/filters/phpunit/clean_html.rb +++ b/lib/docs/filters/phpunit/clean_html.rb @@ -9,14 +9,16 @@ def call node['data-language'] = 'php' end + # When extracting strings, filter out non-ASCII chars that mysteriously get added. + if slug.match(/assertion|annotations|configuration/) css('h2').each do |node| - node['id'] = node.content + node['id'] = node.content.gsub(/\P{ASCII}/, '') end end - css('h1').each do |node| - node.content = node.content.gsub(/\d*\./, '').strip + css('h1', 'h2', 'h3').each do |node| + node.content = node.content.gsub(/\d*\. |\P{ASCII}/, '') end doc diff --git a/lib/docs/filters/phpunit/entries.rb b/lib/docs/filters/phpunit/entries.rb index fd426be6c7..9a4f90527c 100644 --- a/lib/docs/filters/phpunit/entries.rb +++ b/lib/docs/filters/phpunit/entries.rb @@ -7,8 +7,9 @@ def get_name end def get_type + name.gsub!(/\P{ASCII}/, '') if name.in? ['Assertions', 'Annotations', 'The XML Configuration File'] - name + name.gsub('The ', '') else 'Guides' end @@ -17,11 +18,10 @@ def get_type def additional_entries return [] if type == 'Guides' - css('h2').map do |node| - [node.content, node['id']] + css('h3').map do |node| + [node.content.gsub('The ', ''), node['id']] end end - end end end diff --git a/lib/docs/scrapers/phpunit.rb b/lib/docs/scrapers/phpunit.rb index 12efbbfcba..5b7830c4f5 100644 --- a/lib/docs/scrapers/phpunit.rb +++ b/lib/docs/scrapers/phpunit.rb @@ -24,7 +24,7 @@ class Phpunit < UrlScraper FILTERS = %w(phpunit/clean_html phpunit/entries title) version do - self.release = '12.0' + self.release = '12.5' self.base_url = "https://docs.phpunit.de/en/#{release}/" html_filters.push FILTERS