-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathToUtfConverter.php
More file actions
64 lines (57 loc) · 2 KB
/
ToUtfConverter.php
File metadata and controls
64 lines (57 loc) · 2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
<?php
declare(strict_types=1);
namespace Xparse\Parser\Helper;
/**
* Try to convert input encoding
*
* @author Ivan Shcherbak <alotofall@gmail.com>
*/
class ToUtfConverter implements EncodingConverterInterface
{
private const EXCLUDED_ENCODINGS = [
"utf-8",
"utf8",
"base64",
"uuencode",
"quoted-printable",
"html-entities",
];
/**
* @var null|string[]
*/
private $supportedEncodings;
public function convert(string $html, string $contentType = ''): string
{
$encoding = '';
if (preg_match('!^.*charset=([A-Za-z0-9-]{4,})$!', $contentType, $contentTypeData) === 1) {
$encoding = $contentTypeData[1];
} elseif (preg_match("!.*<meta.*charset=[\"']?[ \t]*([A-Za-z0-9-]{4,})[ \t]*[\"']!mi", $html, $metaContentType) === 1) {
$encoding = $metaContentType[1];
}
$encoding = mb_strtolower($encoding);
if ($encoding !== '' && in_array($encoding, $this->getSupportedEncodings(), true)) {
/** @noinspection CallableParameterUseCaseInTypeContextInspection */
$html = mb_convert_encoding($html, 'utf-8', $encoding);
}
return $html;
}
private function getSupportedEncodings(): array
{
if ($this->supportedEncodings === null) {
$this->supportedEncodings = [];
$findAliases = function_exists('mb_encoding_aliases');
foreach (mb_list_encodings() as $encoding) {
$encoding = mb_strtolower($encoding);
if (! in_array($encoding, self::EXCLUDED_ENCODINGS)) {
$this->supportedEncodings[] = $encoding;
if ($findAliases) {
foreach (mb_encoding_aliases($encoding) as $encodingAlias) {
$this->supportedEncodings[] = mb_strtolower($encodingAlias);
}
}
}
}
}
return $this->supportedEncodings;
}
}