Skip to content

Commit e49bfca

Browse files
gh-142224: unicodedata: support bidi classes for unassigned code points (GH-144815)
1 parent 7a7521b commit e49bfca

File tree

4 files changed

+2319
-2134
lines changed

4 files changed

+2319
-2134
lines changed

Lib/test/test_unicodedata.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ def test_category(self):
319319
self.assertRaises(TypeError, self.db.category, 'xx')
320320

321321
def test_bidirectional(self):
322-
self.assertEqual(self.db.bidirectional('\uFFFE'), '')
322+
self.assertEqual(self.db.bidirectional('\uFFFE'), 'BN')
323323
self.assertEqual(self.db.bidirectional(' '), 'WS')
324324
self.assertEqual(self.db.bidirectional('A'), 'L')
325325
self.assertEqual(self.db.bidirectional('\U00020000'), 'L')
@@ -347,6 +347,17 @@ def test_bidirectional(self):
347347
self.assertRaises(TypeError, self.db.bidirectional)
348348
self.assertRaises(TypeError, self.db.bidirectional, 'xx')
349349

350+
def test_bidirectional_unassigned(self):
351+
if self.old:
352+
return
353+
self.assertEqual(self.db.bidirectional('\u0378'), 'L')
354+
self.assertEqual(self.db.bidirectional('\u077F'), 'AL')
355+
self.assertEqual(self.db.bidirectional('\u20CF'), 'ET')
356+
self.assertEqual(self.db.bidirectional('\u0590'), 'R')
357+
self.assertEqual(self.db.bidirectional('\uFFFF'), 'BN')
358+
self.assertEqual(self.db.bidirectional('\U0001FFFE'), 'BN')
359+
self.assertEqual(self.db.bidirectional('\U00010D01'), 'AL')
360+
350361
def test_decomposition(self):
351362
self.assertEqual(self.db.decomposition('\uFFFE'),'')
352363
self.assertEqual(self.db.decomposition('\u00bc'), '<fraction> 0031 2044 0034')
@@ -676,9 +687,9 @@ class UnicodeFunctionsTest(unittest.TestCase, BaseUnicodeFunctionsTest):
676687

677688
# Update this if the database changes. Make sure to do a full rebuild
678689
# (e.g. 'make distclean && make') to get the correct checksum.
679-
expectedchecksum = ('83cc43a2fbb779185832b4c049217d80b05bf349'
690+
expectedchecksum = ('668dbbea1136e69d4f00677a5988b23bc78aefc6'
680691
if quicktest else
681-
'180bdc91143d8aa2eb9dd6726e66d37606205942')
692+
'b869af769bd8fe352c04622ab90533dc54df5cf3')
682693

683694
@requires_resource('network')
684695
def test_all_names(self):
@@ -966,9 +977,9 @@ def graphemes(*args):
966977
class Unicode_3_2_0_FunctionsTest(unittest.TestCase, BaseUnicodeFunctionsTest):
967978
db = unicodedata.ucd_3_2_0
968979
old = True
969-
expectedchecksum = ('4154d8d1232837e255edf3cdcbb5ab184d71f4a4'
980+
expectedchecksum = ('2164a66700e03cba9c9f5ed9e9a8d594d2da136a'
970981
if quicktest else
971-
'3aabaf66823b21b3d305dad804a62f6f6387c93e')
982+
'a8276cec9b6991779c5bdaa46c1ae7cc50bc2403')
972983

973984

974985
class UnicodeMiscTest(unittest.TestCase):
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
:func:`unicodedata.bidirectional` now return the correct default bidi class
2+
for unassigned code points.

0 commit comments

Comments
 (0)