Skip to content

Commit e647e1f

Browse files
committed
Add disallow_uri_protocols() function for disallowing javascript: URIs
1 parent d6aa116 commit e647e1f

2 files changed

Lines changed: 79 additions & 0 deletions

File tree

src/Sanitize.php

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ class Sanitize implements RegistryAware
5858
public $allow_data_attr = true;
5959
/** @var bool */
6060
public $allow_aria_attr = true;
61+
/** @var string[] */
62+
public $disallowed_uri_protocols = ['javascript'];
6163
/** @var array<string, array<string, string>> */
6264
public $add_attributes = ['audio' => ['preload' => 'none'], 'iframe' => ['sandbox' => 'allow-scripts allow-same-origin'], 'video' => ['preload' => 'none']];
6365
/** @var bool */
@@ -280,6 +282,14 @@ public function allow_aria_attr(bool $allow = true): void
280282
$this->allow_aria_attr = $allow;
281283
}
282284

285+
/**
286+
* @param string[] $protocols List of protocols to disallow
287+
*/
288+
public function disallow_uri_protocols(array $protocols = ['javascript']): void
289+
{
290+
$this->disallowed_uri_protocols = $protocols;
291+
}
292+
283293
/**
284294
* @return void
285295
*/
@@ -541,6 +551,12 @@ public function sanitize(string $data, int $type, string $base = '')
541551
$this->replace_urls($document, $element, $attributes);
542552
}
543553

554+
if ($this->disallowed_uri_protocols) {
555+
foreach ($this->disallowed_uri_protocols as $protocol) {
556+
$this->strip_uri_protocol($xpath, $protocol);
557+
}
558+
}
559+
544560
// If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
545561
if ($this->image_handler !== '' && $this->enable_cache) {
546562
$images = $document->getElementsByTagName('img');
@@ -742,6 +758,54 @@ protected function enforce_allowed_html_nodes(\DOMNode $element, bool $allow_dat
742758
}
743759
}
744760

761+
private function extract_protocol(string $uri): string
762+
{
763+
if (!str_contains($uri, ':')) {
764+
return '';
765+
}
766+
$extracted_protocol = strtolower(preg_replace(
767+
'/\s/',
768+
'',
769+
trim(
770+
rawurldecode(explode(':', $uri)[0]),
771+
"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x20"
772+
)
773+
) ?? '');
774+
return $extracted_protocol;
775+
}
776+
777+
/**
778+
* Remove a disallowed URI protocol
779+
*/
780+
protected function strip_uri_protocol(\DOMXPath $xpath, string $protocol): void
781+
{
782+
$protocol = strtolower($protocol);
783+
$elements = $xpath->query('.//a[@href]|.//iframe[@src]|.//math//*[@href]');
784+
785+
if ($elements === false) {
786+
throw new \SimplePie\Exception(sprintf(
787+
'%s(): Possibly malformed expression',
788+
__METHOD__
789+
), 1);
790+
}
791+
792+
foreach ($elements as $element) {
793+
if (!($element instanceof \DOMElement)) {
794+
continue;
795+
}
796+
797+
$href = $element->getAttribute('href');
798+
$src = $element->getAttribute('src');
799+
800+
if ($element->hasAttribute('href') && $this->extract_protocol($href) === $protocol) {
801+
$element->setAttribute('href', 'unsafe:' . $href);
802+
}
803+
if ($element->hasAttribute('src') && $this->extract_protocol($src) === $protocol) {
804+
$element->setAttribute('src', 'unsafe:' . $src);
805+
}
806+
}
807+
}
808+
745809
/**
746810
* @param int-mask-of<SimplePie::CONSTRUCT_*> $type
747811
* @return void

src/SimplePie.php

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,13 @@ class SimplePie
692692
*/
693693
public $allow_aria_attr = true;
694694

695+
/**
696+
* @var string[] Stores array of disallowed URI protocols
697+
* @see SimplePie::disallow_uri_protocols()
698+
* @access private
699+
*/
700+
public $disallowed_uri_protocols = ['javascript'];
701+
695702
/**
696703
* @var bool Should we throw exceptions, or use the old-style error property?
697704
* @access private
@@ -1589,6 +1596,14 @@ public function allow_aria_attr(bool $allow = true): void
15891596
$this->sanitize->allow_aria_attr($allow);
15901597
}
15911598

1599+
/**
1600+
* @param string[] $protocols List of protocols to disallow
1601+
*/
1602+
public function disallow_uri_protocols(array $protocols = ['javascript']): void
1603+
{
1604+
$this->sanitize->disallow_uri_protocols($protocols);
1605+
}
1606+
15921607
/**
15931608
* @return void
15941609
*/

0 commit comments

Comments
 (0)