Skip to content

Commit 60f8236

Browse files
Peter JohnsonPeter Johnson
authored andcommitted
single mode added to shannon letters
1 parent 0449e43 commit 60f8236

7 files changed

Lines changed: 85 additions & 3335 deletions

File tree

.DS_Store

0 Bytes
Binary file not shown.

educational_material/.DS_Store

0 Bytes
Binary file not shown.

educational_material/main.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,26 @@ def plot_letter_histogram(show_plots: bool=False, media_dir: Path=None):
2525
else:
2626
print(f"Plot saved to {out_path}.")
2727

28+
def plot_wordlength_histogram(show_plots: bool=False, media_dir: Path=None):
29+
"""Plot a histogram from norvig_word_frequencies.csv."""
30+
csv_path = Path(__file__).parent.parent / "evaluation_function" / "models" / "storage" / "norvig_word_frequencies.csv"
31+
df = pd.read_csv(csv_path)
32+
33+
df = df.sort_values(by="Percent", ascending=False)
34+
35+
plt.bar(df["wordLength"], df["Percent"], color="skyblue", edgecolor="black")
36+
plt.xlabel("Word length")
37+
plt.ylabel("Frequency")
38+
plt.tight_layout()
39+
40+
out_path = media_dir / "word_histogram.png"
41+
plt.savefig(out_path, dpi=150, bbox_inches="tight")
42+
if show_plots:
43+
print(f"Plot saved to {out_path}, displaying plot now.")
44+
plt.show()
45+
else:
46+
print(f"Plot saved to {out_path}.")
47+
2848
def plot_neural_network_results(show_plots: bool=False, media_dir: Path=None):
2949
"""Plot the results of a neural network model against the data.
3050
@@ -70,5 +90,7 @@ def plot_neural_network_results(show_plots: bool=False, media_dir: Path=None):
7090
args = parser.parse_args()
7191
media_dir = Path(__file__).parent / "media"
7292
media_dir.mkdir(exist_ok=True)
93+
#plot_letter_histogram(show_plots=args.show_plots, media_dir=media_dir)
94+
plot_wordlength_histogram(show_plots=args.show_plots, media_dir=media_dir)
7395
#plot_neural_network_results(show_plots=args.show_plots, media_dir=media_dir)
74-
plot_letter_histogram(show_plots=args.show_plots, media_dir=media_dir)
96+

evaluation_function/dev.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616
"response": 6,
1717
"answer": 800,
1818
"model": "shannon_letters_ngram",
19+
"mode": "single",
1920
"word_count": "random",
20-
"context_window": 5
21+
"context": "ate",
22+
"context_window": 3
2123
},
2224
"shannon_words_build": {
2325
"response": "the general sweetness",
@@ -30,7 +32,7 @@
3032
"response": "the general sweetness",
3133
"answer": 2,
3234
"model": "shannon_words_ngram",
33-
"word_count": 12,
35+
"word_count": 10,
3436
"context_window": 3,
3537
"dev": true
3638
},

evaluation_function/models/shannon_letters_ngram.py

Lines changed: 55 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,9 @@ def sample_ngram(lookups, n, prefix="", k=1):
4949
return random.choices(data["keys"], weights=data["freqs"], k=k)
5050

5151

52-
def generate_word(N,n) -> str: # N = max letters, n = context window (as in, n-gram)
53-
lookups = read_multingram_csv(LETTERS_PATH)
52+
def generate_word(lookups, N,n, printing=0) -> str: # N = max letters, n = context window (as in, n-gram)
53+
"""Generate a random word using n-gram model up to N letters."""
54+
#lookups = read_multingram_csv(LETTERS_PATH)
5455
N_max=N
5556
samples = {}
5657
samples[1] = sample_ngram(lookups, n=1, prefix="", k=1)[0]
@@ -77,33 +78,75 @@ def generate_word(N,n) -> str: # N = max letters, n = context window (as in, n-g
7778

7879
return samples[N_max]
7980

81+
def generate_single_letter(lookups, n, prefix="") -> list:
82+
"""Return top 5 most probable next letters for a given prefix."""
83+
# Auto-trim prefix if too long
84+
expected_prefix_len = max(0, n - 1)
85+
if len(prefix) > expected_prefix_len:
86+
prefix = prefix[-expected_prefix_len:] # keep last n-1 chars
87+
print(prefix)
88+
print(prefix in lookups.get(n, {}))
89+
if prefix not in lookups.get(n, {}):
90+
return []
91+
92+
data = lookups[n][prefix]
93+
freqs = data["freqs"]
94+
keys = data["keys"]
95+
total = sum(freqs)
96+
probs = [f / total for f in freqs]
97+
98+
pairs = sorted(zip(keys, probs), key=lambda x: x[1], reverse=True)
99+
return pairs[:5]
100+
80101
def run(response, answer, params:Params) -> Result:
81-
output=[]
102+
mode = params.get("mode", "production")
103+
context_window = params.get("context_window", 3)
104+
printing = params.get("printing", 0)
105+
106+
if printing:
107+
print("#### Reading n-gram data ####")
108+
lookups = read_multingram_csv(LETTERS_PATH)
109+
110+
result = Result(True)
82111

112+
# === SINGLE MODE ===
113+
if mode == "single":
114+
prefix = params.get("context", "he").upper()
115+
top5 = generate_single_letter(lookups, context_window, prefix)
116+
if not top5:
117+
feedback = f"No data found for prefix '{prefix}' and n={context_window}."
118+
else:
119+
feedback_lines = []
120+
for k, p in top5:
121+
feedback_lines.append(f"{k[:-1]} | {k[-1]} - {p:.0%}")
122+
feedback = "<br>".join(feedback_lines)
123+
124+
result.add_feedback("general", feedback)
125+
return result
126+
127+
# === PRODUCTION MODE ===
83128
print("#### Getting data ####")
84129
data = csv_to_lists(WORD_LENGTHS_PATH)
85130

86131
print("#### Generating word lengths ####")
87-
word_lengths = {}
88-
word_lengths["tokens"] = [row[0] for row in data]
89-
word_lengths["weights"] = [row[1] for row in data]
132+
word_lengths = {
133+
"tokens": [row[0] for row in data],
134+
"weights": [row[1] for row in data],
135+
}
90136

91-
print("#### Getting context window ####")
92137
word_count = params.get("word_count", 10)
93138
response_used = isinstance(response, int) and response > 1
94-
context_window = response if response_used else params.get("context_window", 3)
95139

96140
if word_count == "random":
97141
word_count = random.randint(3,15)
98142

99143
print("#### Getting output ####")
100-
for i in range(word_count):
144+
output=[]
145+
for _ in range(word_count):
101146
k=int(random.choices(word_lengths["tokens"],weights=word_lengths["weights"],k=1)[0])
102-
output.append(generate_word(k,context_window))
147+
output.append(generate_word(lookups,k,context_window))
103148

104149
print("#### Generating Feedback ####")
105-
106-
result = Result(True)
107150
preface = 'Context window: '+str(context_window)+', Word count: '+str(word_count)+'. Output: <br>'
108151
result.add_feedback("general", preface + ' '.join(output))
109152
if response_used:

0 commit comments

Comments
 (0)