diff --git a/lib/docs/filters/phpunit/clean_html.rb b/lib/docs/filters/phpunit/clean_html.rb
index dacb15874b..8bbf619a87 100644
--- a/lib/docs/filters/phpunit/clean_html.rb
+++ b/lib/docs/filters/phpunit/clean_html.rb
@@ -9,14 +9,16 @@ def call
node['data-language'] = 'php'
end
+ # When extracting strings, filter out non-ASCII chars that mysteriously get added.
+
if slug.match(/assertion|annotations|configuration/)
css('h2').each do |node|
- node['id'] = node.content
+ node['id'] = node.content.gsub(/\P{ASCII}/, '')
end
end
- css('h1').each do |node|
- node.content = node.content.gsub(/\d*\./, '').strip
+ css('h1', 'h2', 'h3').each do |node|
+ node.content = node.content.gsub(/\d*\. |\P{ASCII}/, '')
end
doc
diff --git a/lib/docs/filters/phpunit/entries.rb b/lib/docs/filters/phpunit/entries.rb
index fd426be6c7..9a4f90527c 100644
--- a/lib/docs/filters/phpunit/entries.rb
+++ b/lib/docs/filters/phpunit/entries.rb
@@ -7,8 +7,9 @@ def get_name
end
def get_type
+ name.gsub!(/\P{ASCII}/, '')
if name.in? ['Assertions', 'Annotations', 'The XML Configuration File']
- name
+ name.gsub('The ', '')
else
'Guides'
end
@@ -17,11 +18,10 @@ def get_type
def additional_entries
return [] if type == 'Guides'
- css('h2').map do |node|
- [node.content, node['id']]
+ css('h3').map do |node|
+ [node.content.gsub('The ', ''), node['id']]
end
end
-
end
end
end
diff --git a/lib/docs/scrapers/phpunit.rb b/lib/docs/scrapers/phpunit.rb
index 12efbbfcba..5b7830c4f5 100644
--- a/lib/docs/scrapers/phpunit.rb
+++ b/lib/docs/scrapers/phpunit.rb
@@ -24,7 +24,7 @@ class Phpunit < UrlScraper
FILTERS = %w(phpunit/clean_html phpunit/entries title)
version do
- self.release = '12.0'
+ self.release = '12.5'
self.base_url = "https://docs.phpunit.de/en/#{release}/"
html_filters.push FILTERS