88import json
99from concurrent .futures import ProcessPoolExecutor
1010
11- DEFAULT_LIMIT = 50000
11+ DEFAULT_LIMIT = 45000
1212
1313CLASS_DEF = re .compile (r'^\.class\b.*?\s+(L[^;\s]+;)' )
1414METHOD_DEF = re .compile (r'^\.method\b.*?\s+([^\s(]+)\(([^)]*)\)(\S+)' )
@@ -110,12 +110,7 @@ def parse_smali(path):
110110 for line in f :
111111 line = line .strip ()
112112
113- if (
114- line
115- and not line .startswith ("." )
116- and not line .startswith (":" )
117- and not line .startswith ("#" )
118- ):
113+ if line and not line .startswith (('.' , ':' , '#' )):
119114 instruction_count += 1
120115
121116 if line .startswith ("invoke-" ):
@@ -128,12 +123,14 @@ def parse_smali(path):
128123 if m :
129124 current_class = m .group (1 )
130125 types .add (current_class )
126+ strings .add (current_class )
131127 continue
132128
133129 m = FIELD_DEF .match (line )
134130 if m and current_class :
135131 name , ftype = m .groups ()
136132 fields .add (f"{ current_class } ->{ name } :{ ftype } " )
133+ strings .add (name )
137134 for t in extract_types (ftype ):
138135 types .add (t )
139136 continue
@@ -145,27 +142,28 @@ def parse_smali(path):
145142
146143 methods .add (f"{ current_class } ->{ name } { proto } " )
147144 protos .add (proto )
145+ strings .add (name )
148146
149147 for t in extract_types (params ):
150148 types .add (t )
151149 for t in extract_types (ret ):
152150 types .add (t )
153151 continue
154152
155- for s in STRING_PATTERN .findall (line ):
156- strings .add (s )
153+ if line .startswith ("const-string" ):
154+ m = STRING_PATTERN .search (line )
155+ if m :
156+ strings .add (m .group (1 ))
157157
158158 except :
159159 pass
160160
161- weight = (
162- len (methods ) * 5 +
163- len (fields ) * 2 +
164- len (types ) +
165- len (protos ) * 2 +
166- len (strings ) * 2 +
167- instruction_count // 10 +
168- invoke_count * 2
161+ weight = max (
162+ len (methods ),
163+ len (fields ),
164+ len (types ),
165+ len (protos ),
166+ len (strings )
169167 )
170168
171169 return path , methods , fields , types , protos , strings , weight
@@ -180,8 +178,8 @@ def collect_dex_data(base, mode):
180178 file_cache = {}
181179
182180 index = 1
183- cpu = max (1 , multiprocessing .cpu_count () - 1 )
184-
181+ cpu = min ( 4 , max (1 , multiprocessing .cpu_count () - 1 ) )
182+
185183 while True :
186184 dex_dir = get_dex_dir (base , mode , index )
187185 if not os .path .isdir (dex_dir ):
@@ -342,7 +340,9 @@ def create_new_dex():
342340
343341 for index in sorted (list (dex_data .keys ())):
344342
345- while get_count (dex_data [index ]) > limit :
343+ while any (len (dex_data [index ][k ]) > limit
344+ for k in ["methods" , "fields" , "types" , "protos" , "strings" ]
345+ ):
346346
347347 files_sorted = sorted (
348348 dex_data [index ]["files" ],
0 commit comments