Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Lib/collections/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,13 @@ def most_common(self, n=None):
if n is None:
return sorted(self.items(), key=_itemgetter(1), reverse=True)

if n == 1:
# Optimization: use max() instead of heapq for single element
# max() raises ValueError on empty sequence, so check first
Comment on lines +638 to +639
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the second comment is particularly helpful:

Suggested change
# Optimization: use max() instead of heapq for single element
# max() raises ValueError on empty sequence, so check first
# Optimization: use max() instead of heapq for single element

if not self:
return []
return [max(self.items(), key=_itemgetter(1))]

# Lazy import to speedup Python startup time
global heapq
if heapq is None:
Expand Down
18 changes: 18 additions & 0 deletions Lib/test/test_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -2279,6 +2279,24 @@ def test_invariant_for_the_in_operator(self):
self.assertTrue(elem in c)
self.assertIn(elem, c)

def test_most_common(self):
c = Counter(a=5, b=3, c=5, d=2, e=0, f=-1)
Comment on lines +2282 to +2283
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a particular reason for checking most_common again here, separately from the basic tests above?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, there's not really anything to test against in this PR.

If this does increase test coverage, we can accept it in a separate PR, since this one won't be backported.


self.assertEqual(c.most_common(), [('a', 5), ('c', 5), ('b', 3), ('d', 2), ('e', 0), ('f', -1)])
self.assertEqual(c.most_common(3), [('a', 5), ('c', 5), ('b', 3)])
self.assertEqual(c.most_common(0), [])
self.assertEqual(c.most_common(-2), [])
self.assertEqual(c.most_common(1), [('a', 5)])

# Test empty counter
empty_c = Counter()

self.assertEqual(empty_c.most_common(), [])
self.assertEqual(empty_c.most_common(3), [])
self.assertEqual(empty_c.most_common(0), [])
self.assertEqual(empty_c.most_common(-2), [])
self.assertEqual(empty_c.most_common(1), [])

def test_multiset_operations(self):
# Verify that adding a zero counter will strip zeros and negatives
c = Counter(a=10, b=-2, c=0) + Counter()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Optimize :meth:`collections.Counter.most_common` for ``n=1`` to use
:func:`max` instead of :func:`heapq.nlargest`, improving performance for
the common case of finding the single most common element.
Comment on lines +1 to +3
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please note the % increase in performance somewhere here.

Loading