Skip to content

Commit ebaf8d2

Browse files
committed
Improved data sanitisation in agentzero::parse().
Improved `weibo` parsing in `apps::get()`. Improved `Facebook` app parsing in `apps::get()`. Moved device characteristics such as width, height and network type captures to `devices::get()` and added new network type captures. Added new `Vivo Browser` capture to `browsers::get()`. Added `Bluesky` crawler to `crawlers::get()`. Added `Roku` platform specific captures as they have a fixed format in `platforms::get()`. Added tests.
1 parent 4dd0321 commit ebaf8d2

9 files changed

Lines changed: 270 additions & 41 deletions

File tree

src/agentzero.php

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -195,17 +195,14 @@ protected static function getTokens(string $ua, array $single, array $ignore) :
195195
* @return agentzero|false An agentzero object containing the parsed values of the input UA, or false if it could not be parsed
196196
*/
197197
public static function parse(string $ua, array $hints = [], array $config = []) : agentzero|false {
198-
$ua = \str_replace(' ', ' ', $ua);
198+
$ua = \str_replace(' ', ' ', \trim(\preg_replace( '/[^[:print:]]/', '', $ua)));
199199

200200
// parse client hints
201201
$hinted = $ua;
202202
$browser = hints::parse($hinted, $hints);
203203

204-
// check all printable characters
205-
if (!\ctype_print($ua)) {
206-
207204
// get config
208-
} elseif (($config = config::get($config)) === null) {
205+
if (($config = config::get($config)) === null) {
209206

210207
// get tokens
211208
} elseif (($tokens = self::getTokens(\trim($hinted, ' "\''), $config['single'], $config['ignore'])) !== false) {

src/mappings/apps.php

Lines changed: 66 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ class apps {
1212
public static function get() : array {
1313
$fn = [
1414
'appslash' => function (string $value, int $i, array $tokens, string $match) : array {
15-
if (\mb_stripos($value, 'AppleWebKit') === false && !\str_contains($value, '://')) {
15+
if (\mb_stripos($value, 'AppleWebKit') === false && !\str_contains($value, '://') && !\str_starts_with($value, 'appid/')) {
1616
$parts = \explode('/', $value, 4);
1717
$offset = isset($parts[2]) && !\is_numeric($parts[1]) ? 1 : 0;
1818
$app = \str_replace('GooglePlayStore ', '', $parts[0 + $offset]);
@@ -212,6 +212,11 @@ public static function get() : array {
212212
'appname' => 'nu.nl',
213213
'appversion' => \mb_substr($value, 6)
214214
]),
215+
'Sanoma/app' => new props('exact', fn () : array => [
216+
'type' => 'human',
217+
'app' => 'Sanoma',
218+
'appname' => 'Sanoma'
219+
]),
215220
'Google Web Preview' => new props('start', $fn['appslash']),
216221
'MicroMessenger/' => new props('start', $fn['appslash']),
217222
'MicroMessenger Weixin QQ' => new props('start', fn () : array => [
@@ -220,15 +225,26 @@ public static function get() : array {
220225
]),
221226
'weibo' => new props('any', function (string $value) : array {
222227
$data = [
228+
'type' => 'human',
223229
'app' => 'Weibo',
224230
'appname' => 'Weibo'
225231
];
226-
$parts = \explode('_', $value);
227-
foreach ($parts AS $i => $item) {
228-
if (\mb_stripos($item, 'Weibo') !== false) {
229-
$data['appname'] = $item;
230-
$data['appversion'] = $parts[$i + (\strspn($parts[$i + 1] ?? '', '0123456789', 0, 1) === 1 ? 1 : 2)] ?? null;
231-
break;
232+
if (\str_contains($value, '__')) {
233+
$parts = \explode('__', $value);
234+
$data = \array_merge($data, devices::getDevice($parts[0]), [
235+
'appname' => $parts[1],
236+
'appversion' => $parts[2] ?? null,
237+
'platform' => isset($parts[3]) ? platforms::getPlatform($parts[3]) : null,
238+
'platformversion' => isset($parts[4]) ? \substr($parts[4], \strcspn($parts[4], '0123456789.')) : null
239+
]);
240+
} else {
241+
$parts = \explode('_', $value);
242+
foreach ($parts AS $i => $item) {
243+
if (\mb_stripos($item, 'Weibo') !== false) {
244+
$data['appname'] = $item;
245+
$data['appversion'] = $parts[$i + (\strspn($parts[$i + 1] ?? '', '0123456789', 0, 1) === 1 ? 1 : 2)] ?? null;
246+
break;
247+
}
232248
}
233249
}
234250
return $data;
@@ -240,34 +256,40 @@ public static function get() : array {
240256
'FBAN/MessengerLiteForiOS' => [
241257
'type' => 'human',
242258
'app' => 'Facebook Messenger',
259+
'appname' => 'MessengerLiteForiOS',
243260
'platform' => 'iOS'
244261
],
245262
'FBAN/FB4A' => [
246263
'type' => 'human',
247264
'app' => 'Facebook',
265+
'appname' => 'FB4A',
248266
'platform' => 'Android'
249267
],
250268
'FBAN/FBIOS' => [
251269
'type' => 'human',
252270
'app' => 'Facebook',
271+
'appname' => 'FBIOS',
253272
'platform' => 'iOS'
254273
],
255274
'FBAN/FB4FireTV' => [
256275
'type' => 'human',
257276
'category' => 'tv',
258277
'app' => 'Facebook',
278+
'appname' => 'FB4FireTV',
259279
'platform' => 'Android'
260280
],
261281
'FBAN/MessengerDesktop' => [
262282
'type' => 'human',
263283
'category' => 'desktop',
264-
'app' => 'Facebook Messenger'
284+
'app' => 'Facebook Messenger',
285+
'appname' => 'MessengerDesktop'
265286
],
266287
'FacebookCanvasDesktop FBAN/GamesWindowsDesktopApp' => [
267288
'type' => 'human',
268289
'platform' => 'Windows',
269290
'category' => 'desktop',
270-
'app' => 'Facebook Gamesroom'
291+
'app' => 'Facebook Gamesroom',
292+
'appname' => 'GamesWindowsDesktopApp'
271293
]
272294
];
273295
return \array_merge([
@@ -277,21 +299,47 @@ public static function get() : array {
277299
], $map[$value] ?? []);
278300
}),
279301
'FB_IAB/' => new props('start', fn (string $value) : array => [
302+
'type' => 'human',
280303
'app' => 'Facebook',
281304
'appname' => \mb_substr($value, 7)
282305
]),
306+
'FBPN/' => new props('start', fn (string $value) : array => [
307+
'type' => 'human',
308+
'app' => 'Facebook',
309+
'appname' => \mb_substr($value, 5)
310+
]),
283311
'FBAV/' => new props('start', fn (string $value) : array => [
312+
'type' => 'human',
313+
'app' => 'Facebook',
314+
'appname' => 'Facebook',
284315
'appversion' => \mb_substr($value, 5)
285316
]),
286317
'FBMF/' => new props('start', fn (string $value) : array => [
318+
'type' => 'human',
319+
'app' => 'Facebook',
320+
'appname' => 'Facebook',
287321
'vendor' => devices::getVendor(\mb_substr($value, 5))
288322
]),
289-
'FBDV/' => new props('start', fn (string $value) : array => devices::getDevice(\mb_substr($value, 5))),
323+
'FBDV/' => new props('start', fn (string $value) : array => \array_merge(
324+
devices::getDevice(\mb_substr($value, 5))),
325+
[
326+
'type' => 'human',
327+
'app' => 'Facebook',
328+
'appname' => 'Facebook'
329+
]
330+
),
290331
'FBMD/' => new props('start', fn (string $value) : array => [
332+
'type' => 'human',
333+
'app' => 'Facebook',
334+
'appname' => 'Facebook',
291335
'model' => \mb_substr($value, 5)
292336
]),
293337
'FBDM/' => new props('start', function (string $value) : array {
294-
$data = [];
338+
$data = [
339+
'type' => 'human',
340+
'app' => 'Facebook',
341+
'appname' => 'Facebook'
342+
];
295343
foreach (\explode(',', \trim(\mb_substr($value, 5), '{}')) AS $item) {
296344
$parts = \explode('=', $item);
297345
if (!empty($parts[1])) {
@@ -303,19 +351,16 @@ public static function get() : array {
303351
}
304352
return $data;
305353
}),
306-
'width=' => new props('start', fn (string $value) : array => [
307-
'width' => \intval(\mb_substr($value, 6))
308-
]),
309-
'height=' => new props('start', fn (string $value) : array => [
310-
'height' => \intval(\mb_substr($value, 7))
311-
]),
312-
'dpi=' => new props('start', fn (string $value) : array => [
313-
'dpi' => \mb_substr($value, 4)
314-
]),
315354
'FBSN/' => new props('start', fn (string $value) : array => [
355+
'type' => 'human',
356+
'app' => 'Facebook',
357+
'appname' => 'Facebook',
316358
'platform' => \mb_substr($value, 5)
317359
]),
318360
'FBSV' => new props('start', fn (string $value) : array => [
361+
'type' => 'human',
362+
'app' => 'Facebook',
363+
'appname' => 'Facebook',
319364
'platformversion' => \mb_substr($value, 5)
320365
]),
321366
'isDarkMode/' => new props('start', function (string $value) : array {
@@ -324,12 +369,10 @@ public static function get() : array {
324369
'darkmode' => \in_array($mode, ['0', '1'], true) ? \boolval($mode) : null
325370
];
326371
}),
372+
'dark-mode' => new props('exact', ['darkmode' => true]),
327373
'AppTheme/' => new props('start', fn (string $value) : array => [
328374
'darkmode' => \mb_substr($value, 9) === 'dark'
329375
]),
330-
'NetType/' => new props('start', fn (string $value) : array => [
331-
'nettype' => \mb_convert_case(\mb_substr($value, 8), MB_CASE_UPPER)
332-
]),
333376
'Microsoft Office' => new props('start', function (string $value, int $i, array $tokens) : array {
334377
$data = [
335378
'type' => 'human'

src/mappings/browsers.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ protected static function getBrowser(string $value, int $i, array $tokens, strin
3131
'huaweibrowser' => 'Huawei Browser',
3232
'qqbrowser' => 'QQ Browser',
3333
'miuibrowser' => 'MIUI Browser',
34-
'vivobrowser' => 'Vivo Browser'
34+
'vivobrowser' => 'Vivo Browser',
35+
'android-com.vivo.browser' => 'Vivo Browser'
3536
];
3637
$data = ['type' => 'human'];
3738
$browser = \mb_strtolower(\array_shift($parts));
@@ -146,6 +147,7 @@ public static function get() : array {
146147
'CocCoc/' => new props('start', $fn['browserslash']),
147148
'Obigo/' => new props('start', $fn['browserslash']),
148149
'VivoBrowser/' => new props('start', $fn['browserslash']),
150+
'com.vivo.browser/' => new props('any', $fn['browserslash']),
149151
'QQBrowser/' => new props('any', function (string $value, int $i, array $tokens, string $key, array $config = []) use ($fn) : array {
150152
return $fn['browserslash'](\mb_substr($value, \mb_stripos($value, 'QQBrowser/') ?: 0), $i, $tokens, $key, $config); // sometimes missing a space from previous declaration, and MQQBrowser for mobile.
151153
}),

src/mappings/crawlers.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,8 @@ public static function getApp(string $value, array $data = []) : array {
169169
'p3p validator' => 'P3P Validator',
170170
'w3c-checklink' => 'W3C Checklink',
171171
'w3c_validator' => 'W3C Validator',
172-
'omgili' => 'Webz.io'
172+
'omgili' => 'Webz.io',
173+
'bluesky cardyb' => 'Bluesky'
173174
];
174175

175176
$lower = \mb_strtolower($parts[0]);
@@ -316,6 +317,7 @@ public static function get() : array {
316317
'Bidtellect' => new props('start', $fn['crawler']),
317318
'magpie-crawler/' => new props('start', $fn['crawler']),
318319
'Web Measure/' => new props('start', $fn['crawler']),
320+
'Bluesky Cardyb/' => new props('start', $fn['crawler']),
319321
'PingdomTMS/' => new props('start', $fn['monitor']),
320322
'DynGate' => new props('exact', $fn['monitor']),
321323
'CensysInspect/' => new props('start', $fn['monitor']),

src/mappings/devices.php

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,10 +353,47 @@ public static function get() : array {
353353
'type' => 'human',
354354
'vendor' => 'Cubot'
355355
]),
356+
'TCL ' => new props('start', fn (string $value) : array => [
357+
'type' => 'human',
358+
'category' => 'tv',
359+
'vendor' => 'TCL',
360+
'model' => \mb_substr($value, 4)
361+
]),
362+
'deviceName/' => new props('start', fn (string $value) : array => self::getDevice(\mb_substr($value, 11))),
363+
'deviceModel/' => new props('start', fn (string $value) : array => [
364+
'model' => \mb_substr($value, 12)
365+
]),
356366
'Model/' => new props('start', fn (string $value) : array => [
357367
'model' => \mb_substr($value, 6)
358368
]),
359369
'Build/' => new props('any', fn (string $value) : array => self::getDevice($value)),
370+
'width=' => new props('start', fn (string $value) : array => [
371+
'width' => \intval(\mb_substr($value, 6))
372+
]),
373+
'height=' => new props('start', fn (string $value) : array => [
374+
'height' => \intval(\mb_substr($value, 7))
375+
]),
376+
'dpi=' => new props('start', fn (string $value) : array => [
377+
'dpi' => \mb_substr($value, 4)
378+
]),
379+
'NetType/' => new props('start', function (string $value) : array {
380+
$type = \mb_convert_case(\mb_substr($value, 8), MB_CASE_UPPER);
381+
return [
382+
'nettype' => \in_array($type, ['WF', 'WIFI'], true) ? 'WiFi' : $type
383+
];
384+
}),
385+
'netWorkType/' => new props('start', function (string $value) : array {
386+
$type = \mb_convert_case(\mb_substr($value, 12), MB_CASE_UPPER);
387+
return [
388+
'nettype' => \in_array($type, ['WF', 'WIFI'], true) ? 'WiFi' : $type
389+
];
390+
}),
391+
'2G' => new props('exact', ['nettype' => '2g']),
392+
'3G' => new props('exact', ['nettype' => '3g']),
393+
'4G' => new props('exact', ['nettype' => '4g']),
394+
'4.5G' => new props('exact', ['nettype' => '4g']),
395+
'4.5G+' => new props('exact', ['nettype' => '4g']),
396+
'5G' => new props('exact', ['nettype' => '5g']),
360397
'x' => new props('any', function (string $value) : ?array {
361398
if (\str_contains($value, '@')) {
362399
$dpi = \explode('@', $value);
@@ -427,7 +464,11 @@ public static function getDevice(string $value) : array {
427464
'Poco' => 'Poco',
428465
'Cubot' => 'Cubot',
429466
'Kingkong' => 'Cubot',
430-
'Nokia' => 'Nokia'
467+
'Nokia' => 'Nokia',
468+
'WR' => 'Westinghouse',
469+
'HKP' => 'HKPro',
470+
'Roku' => 'Roku',
471+
'TCL' => 'TCL'
431472
];
432473

433474
// find vendor

src/mappings/platforms.php

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,14 @@ public static function get() : array {
197197
'platform' => 'iOS',
198198
'platformversion' => $tokens[$i+1] ?? null
199199
]),
200+
'iPadOS' => new props('exact', fn (string $value, int $i, array $tokens) : array => [
201+
'type' => 'human',
202+
'category' => 'tablet',
203+
'vendor' => 'Apple',
204+
'device' => 'iPad',
205+
'platform' => 'iOS',
206+
'platformversion' => $tokens[$i+1] ?? null
207+
]),
200208
'CrOS' => new props('start', function (string $value) : array {
201209
$parts = \explode(' ', $value);
202210
return [
@@ -289,6 +297,21 @@ public static function get() : array {
289297
'platform' => 'WebOS',
290298
'platformversion' => \mb_substr($value, 5)
291299
]),
300+
'Roku ' => new props('start', function (string $value, int $i, array $tokens) : array {
301+
$app = \str_contains($tokens[$i - 1] ?? '', '/') ? \explode('/', $tokens[$i - 1], 2) : null;
302+
return \array_merge(
303+
[
304+
'type' => 'human',
305+
'category' => 'tv',
306+
'platform' => 'Roku',
307+
'platformversion' => \mb_substr($value, 5),
308+
'app' => $app[0] ?? null,
309+
'appname' => $app[0] ?? null,
310+
'appversion' => $app[1] ?? null,
311+
],
312+
isset($tokens[$i + 2]) ? devices::getDevice($tokens[$i + 2]) : []
313+
);
314+
}),
292315
'SunOS' => new props('start', [
293316
'type' => 'human',
294317
'category' => 'desktop',
@@ -361,6 +384,7 @@ public static function get() : array {
361384
'platformversion' => \explode('/', $value, 3)[1] ?: null
362385
]),
363386
'Android ' => new props('start', $fn['android']),
387+
'Android-' => new props('start', $fn['android']),
364388
'Linux' => new props('any', function (string $value, int $i, array $tokens) : array {
365389
return [
366390
'kernel' => 'Linux',
@@ -383,7 +407,10 @@ public static function get() : array {
383407
]),
384408
'Version/' => new props('start', fn (string $value) : array => [
385409
'platformversion' => \mb_substr($value, 8)
386-
])
410+
]),
411+
'platformVersion/' => new props('start', fn (string $value) : array => [
412+
'platformversion' => \mb_substr($value, 16)
413+
]),
387414
];
388415
}
389416

@@ -397,7 +424,8 @@ public static function getPlatform(string $value) : string {
397424
'freebsd' => 'FreeBSD',
398425
'openbsd' => 'OpenBSD',
399426
'netbsd' => 'NetBSD',
400-
'opensuse' => 'OpenSUSE'
427+
'opensuse' => 'OpenSUSE',
428+
'iphone' => 'iOS'
401429
];
402430
$value = \mb_strtolower($value);
403431
return $map[$value] ?? \mb_convert_case($value, MB_CASE_TITLE);

0 commit comments

Comments
 (0)