@@ -58,6 +58,8 @@ class Sanitize implements RegistryAware
5858 public $ allow_data_attr = true ;
5959 /** @var bool */
6060 public $ allow_aria_attr = true ;
61+ /** @var string[] */
62+ public $ disallowed_uri_protocols = ['javascript ' ];
6163 /** @var array<string, array<string, string>> */
6264 public $ add_attributes = ['audio ' => ['preload ' => 'none ' ], 'iframe ' => ['sandbox ' => 'allow-scripts allow-same-origin ' ], 'video ' => ['preload ' => 'none ' ]];
6365 /** @var bool */
@@ -280,6 +282,14 @@ public function allow_aria_attr(bool $allow = true): void
280282 $ this ->allow_aria_attr = $ allow ;
281283 }
282284
285+ /**
286+ * @param string[] $protocols List of protocols to disallow
287+ */
288+ public function disallow_uri_protocols (array $ protocols = ['javascript ' ]): void
289+ {
290+ $ this ->disallowed_uri_protocols = $ protocols ;
291+ }
292+
283293 /**
284294 * @return void
285295 */
@@ -541,6 +551,12 @@ public function sanitize(string $data, int $type, string $base = '')
541551 $ this ->replace_urls ($ document , $ element , $ attributes );
542552 }
543553
554+ if ($ this ->disallowed_uri_protocols ) {
555+ foreach ($ this ->disallowed_uri_protocols as $ protocol ) {
556+ $ this ->strip_uri_protocol ($ xpath , $ protocol );
557+ }
558+ }
559+
544560 // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
545561 if ($ this ->image_handler !== '' && $ this ->enable_cache ) {
546562 $ images = $ document ->getElementsByTagName ('img ' );
@@ -742,6 +758,54 @@ protected function enforce_allowed_html_nodes(\DOMNode $element, bool $allow_dat
742758 }
743759 }
744760
761+ private function extract_protocol (string $ uri ): string
762+ {
763+ if (!str_contains ($ uri , ': ' )) {
764+ return '' ;
765+ }
766+ $ extracted_protocol = strtolower (preg_replace (
767+ '/\s/ ' ,
768+ '' ,
769+ trim (
770+ rawurldecode (explode (': ' , $ uri )[0 ]),
771+ "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x20"
772+ )
773+ ) ?? '' );
774+ return $ extracted_protocol ;
775+ }
776+
777+ /**
778+ * Remove a disallowed URI protocol
779+ */
780+ protected function strip_uri_protocol (\DOMXPath $ xpath , string $ protocol ): void
781+ {
782+ $ protocol = strtolower ($ protocol );
783+ $ elements = $ xpath ->query ('.//a[@href]|.//iframe[@src]|.//math//*[@href] ' );
784+
785+ if ($ elements === false ) {
786+ throw new \SimplePie \Exception (sprintf (
787+ '%s(): Possibly malformed expression ' ,
788+ __METHOD__
789+ ), 1 );
790+ }
791+
792+ foreach ($ elements as $ element ) {
793+ if (!($ element instanceof \DOMElement)) {
794+ continue ;
795+ }
796+
797+ $ href = $ element ->getAttribute ('href ' );
798+ $ src = $ element ->getAttribute ('src ' );
799+
800+ if ($ element ->hasAttribute ('href ' ) && $ this ->extract_protocol ($ href ) === $ protocol ) {
801+ $ element ->setAttribute ('href ' , 'unsafe: ' . $ href );
802+ }
803+ if ($ element ->hasAttribute ('src ' ) && $ this ->extract_protocol ($ src ) === $ protocol ) {
804+ $ element ->setAttribute ('src ' , 'unsafe: ' . $ src );
805+ }
806+ }
807+ }
808+
745809 /**
746810 * @param int-mask-of<SimplePie::CONSTRUCT_*> $type
747811 * @return void
0 commit comments