@@ -49,8 +49,9 @@ def sample_ngram(lookups, n, prefix="", k=1):
4949 return random .choices (data ["keys" ], weights = data ["freqs" ], k = k )
5050
5151
52- def generate_word (N ,n ) -> str : # N = max letters, n = context window (as in, n-gram)
53- lookups = read_multingram_csv (LETTERS_PATH )
52+ def generate_word (lookups , N ,n , printing = 0 ) -> str : # N = max letters, n = context window (as in, n-gram)
53+ """Generate a random word using n-gram model up to N letters."""
54+ #lookups = read_multingram_csv(LETTERS_PATH)
5455 N_max = N
5556 samples = {}
5657 samples [1 ] = sample_ngram (lookups , n = 1 , prefix = "" , k = 1 )[0 ]
@@ -77,33 +78,75 @@ def generate_word(N,n) -> str: # N = max letters, n = context window (as in, n-g
7778
7879 return samples [N_max ]
7980
81+ def generate_single_letter (lookups , n , prefix = "" ) -> list :
82+ """Return top 5 most probable next letters for a given prefix."""
83+ # Auto-trim prefix if too long
84+ expected_prefix_len = max (0 , n - 1 )
85+ if len (prefix ) > expected_prefix_len :
86+ prefix = prefix [- expected_prefix_len :] # keep last n-1 chars
87+ print (prefix )
88+ print (prefix in lookups .get (n , {}))
89+ if prefix not in lookups .get (n , {}):
90+ return []
91+
92+ data = lookups [n ][prefix ]
93+ freqs = data ["freqs" ]
94+ keys = data ["keys" ]
95+ total = sum (freqs )
96+ probs = [f / total for f in freqs ]
97+
98+ pairs = sorted (zip (keys , probs ), key = lambda x : x [1 ], reverse = True )
99+ return pairs [:5 ]
100+
80101def run (response , answer , params :Params ) -> Result :
81- output = []
102+ mode = params .get ("mode" , "production" )
103+ context_window = params .get ("context_window" , 3 )
104+ printing = params .get ("printing" , 0 )
105+
106+ if printing :
107+ print ("#### Reading n-gram data ####" )
108+ lookups = read_multingram_csv (LETTERS_PATH )
109+
110+ result = Result (True )
82111
112+ # === SINGLE MODE ===
113+ if mode == "single" :
114+ prefix = params .get ("context" , "he" ).upper ()
115+ top5 = generate_single_letter (lookups , context_window , prefix )
116+ if not top5 :
117+ feedback = f"No data found for prefix '{ prefix } ' and n={ context_window } ."
118+ else :
119+ feedback_lines = []
120+ for k , p in top5 :
121+ feedback_lines .append (f"{ k [:- 1 ]} | { k [- 1 ]} - { p :.0%} " )
122+ feedback = "<br>" .join (feedback_lines )
123+
124+ result .add_feedback ("general" , feedback )
125+ return result
126+
127+ # === PRODUCTION MODE ===
83128 print ("#### Getting data ####" )
84129 data = csv_to_lists (WORD_LENGTHS_PATH )
85130
86131 print ("#### Generating word lengths ####" )
87- word_lengths = {}
88- word_lengths ["tokens" ] = [row [0 ] for row in data ]
89- word_lengths ["weights" ] = [row [1 ] for row in data ]
132+ word_lengths = {
133+ "tokens" : [row [0 ] for row in data ],
134+ "weights" : [row [1 ] for row in data ],
135+ }
90136
91- print ("#### Getting context window ####" )
92137 word_count = params .get ("word_count" , 10 )
93138 response_used = isinstance (response , int ) and response > 1
94- context_window = response if response_used else params .get ("context_window" , 3 )
95139
96140 if word_count == "random" :
97141 word_count = random .randint (3 ,15 )
98142
99143 print ("#### Getting output ####" )
100- for i in range (word_count ):
144+ output = []
145+ for _ in range (word_count ):
101146 k = int (random .choices (word_lengths ["tokens" ],weights = word_lengths ["weights" ],k = 1 )[0 ])
102- output .append (generate_word (k ,context_window ))
147+ output .append (generate_word (lookups , k ,context_window ))
103148
104149 print ("#### Generating Feedback ####" )
105-
106- result = Result (True )
107150 preface = 'Context window: ' + str (context_window )+ ', Word count: ' + str (word_count )+ '. Output: <br>'
108151 result .add_feedback ("general" , preface + ' ' .join (output ))
109152 if response_used :
0 commit comments