-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbyte_frequency_ranking.py
More file actions
18 lines (14 loc) · 1.13 KB
/
byte_frequency_ranking.py
File metadata and controls
18 lines (14 loc) · 1.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import sys
from collections import defaultdict
#frequency from the complete works of Shakespeare
ranking = ' etoahnsrild\numy,.wfcgIbpAETSvO\'kRNLCH;WMDBUPFG?Y!-KxVjq[]J:Qz91)(XZ"<>23045_6*78|&@/}~`=%#\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0\xef\xee\xed\xec\xeb\xea\xe9\xe8\xe7\xe6\xe5\xe4\xe3\xe2\xe1\xe0\xdf\xde\xdd\xdc\xdb\xda\xd9\xd8\xd7\xd6\xd5\xd4\xd3\xd2\xd1\xd0\xcf\xce\xcd\xcc\xcb\xca\xc9\xc8\xc7\xc6\xc5\xc4\xc3\xc2\xc1\xc0\xbf\xbe\xbd\xbc\xbb\xba\xb9\xb8\xb7\xb6\xb5\xb4\xb3\xb2\xb1\xb0\xaf\xae\xad\xac\xab\xaa\xa9\xa8\xa7\xa6\xa5\xa4\xa3\xa2\xa1\xa0\x9f\x9e\x9d\x9c\x9b\x9a\x99\x98\x97\x96\x95\x94\x93\x92\x91\x90\x8f\x8e\x8d\x8c\x8b\x8a\x89\x88\x87\x86\x85\x84\x83\x82\x81\x80\x7f{^\\+$\x1f\x1e\x1d\x1c\x1b\x1a\x19\x18\x17\x16\x15\x14\x13\x12\x11\x10\x0f\x0e\r\x0c\x0b\t\x08\x07\x06\x05\x04\x03\x02\x01\x00'
if __name__ == '__main__':
if len(sys.argv) != 2:
print 'usage:', sys.argv[0], ' <corpus>'
sys.exit(1)
with open(sys.argv[1], 'rb') as f:
text = f.read()
freq = [0]*256
for b in text:
freq[ord(b)] += 1
print (''.join([chr(i[1]) for i in sorted(zip(freq, range(256)), reverse=True)])).__repr__()