forked from MaartenCVonk/LLM-IPD-ARXIV
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_match_history.py
More file actions
179 lines (138 loc) · 7.49 KB
/
test_match_history.py
File metadata and controls
179 lines (138 loc) · 7.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#!/usr/bin/env python3
"""
Test script for match history implementation
"""
import sys
import os
# Add the parent directory to the path so we can import ipd_suite
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from ipd_suite.agents import LLMAgent, TitForTat, Random
from ipd_suite.tournament import Tournament, MatchHistoryManager
class TestLLMAgent(LLMAgent):
"""Test LLM agent for match history validation"""
def __init__(self, name: str, temperature: float = 0.7, match_history=None):
super().__init__(name, "test-model", temperature, 0.1, match_history=match_history)
self.mock_response = "I think carefully about the game situation.\n\nAfter analyzing the history and considering my strategy, I choose: C"
def _call_api(self, prompt: str) -> str:
"""Mock API call that tracks prompt content"""
self.api_calls += 1
self.total_tokens += 150
self.input_tokens += 100
self.output_tokens += 50
# Store the prompt to verify it contains match history
self.last_prompt = prompt
return self.mock_response
def _call_api_with_caching(self, static_content: str, dynamic_content: str) -> str:
"""Mock API call with caching that tracks prompt content"""
self.api_calls += 1
self.total_tokens += 150
self.input_tokens += 100
self.output_tokens += 50
# Store the prompt components to verify match history inclusion
self.last_static_content = static_content
self.last_dynamic_content = dynamic_content
return self.mock_response
def test_match_history_implementation():
"""Test comprehensive match history functionality"""
print("="*60)
print("MATCH HISTORY IMPLEMENTATION TEST")
print("="*60)
# Test 1: Create history manager and verify basic functionality
print("\n🧪 Test 1: Basic MatchHistoryManager functionality")
history_manager = MatchHistoryManager()
# Create test agents
agent1 = TestLLMAgent("TestAgent_T07", temperature=0.7)
agent2 = TitForTat("TitForTat")
agent3 = Random("Random")
print(f"✓ Created agents: {agent1.name}, {agent2.name}, {agent3.name}")
# Test agent identifier generation
agent1_id = history_manager.get_agent_identifier(agent1)
expected_id = "testmodeltemperature07" # Expected identifier format
print(f"✓ Agent identifier: {agent1_id}")
# Test 2: Run matches and verify history recording
print("\n🧪 Test 2: Match history recording")
tournament = Tournament([agent1, agent2, agent3], termination_prob=0.5, max_rounds=5, history_manager=history_manager)
# Run a single match and verify history recording
match_result = tournament.run_match(agent1, agent2)
print(f"✓ Match completed: {match_result.agent1_name} vs {match_result.agent2_name}")
print(f" Rounds played: {match_result.rounds_played}")
print(f" Moves: {list(zip(match_result.agent1_moves, match_result.agent2_moves))}")
# Check if history was recorded for LLM agent
agent1_history = history_manager.get_history_for_agent(agent1)
print(f"✓ Agent1 history recorded: {len(agent1_history)} matches")
# Non-LLM agents should not have history recorded
agent2_history = history_manager.get_history_for_agent(agent2)
print(f"✓ Agent2 (non-LLM) history: {len(agent2_history)} matches (expected 0)")
# Verify match history structure
if agent1_history:
match_record = agent1_history[0]
print(f"✓ Match record structure: opponent='{match_record.get('opponent')}', rounds={len(match_record.get('rounds', []))}")
if match_record.get('rounds'):
first_round = match_record['rounds'][0]
print(f"✓ Round structure: your_move='{first_round.get('your_move')}', opponent_move='{first_round.get('opponent_move')}'")
# Test 3: Create agent with match history and verify prompt inclusion
print("\n🧪 Test 3: Match history inclusion in prompts")
# Create a new agent with the recorded history
agent1_with_history = TestLLMAgent("TestAgent_T07", temperature=0.7, match_history=agent1_history)
print(f"✓ Created agent with history: {len(agent1_with_history.match_history)} matches")
# Make a move to trigger prompt generation
agent1_with_history.make_move(['C'], ['D'])
# Verify that match history is included in the prompt
if hasattr(agent1_with_history, 'last_static_content'):
static_content = agent1_with_history.last_static_content
if "complete match history from previous phases" in static_content:
print("✓ Match history section found in static content")
# Check for specific match content
if "vs TitForTat" in static_content:
print("✓ Opponent name found in match history")
# Check for move pairs
if "C,D" in static_content or "(C,D)" in static_content:
print("✓ Move pairs found in match history")
else:
print("⚠️ Match history section not found in static content")
print(f"Static content preview: {static_content[:200]}...")
else:
print("⚠️ No static content captured")
# Test 4: History serialization and loading
print("\n🧪 Test 4: History serialization and loading")
# Save histories to file
test_history_file = "test_match_history.json"
history_manager.save_histories(test_history_file)
print(f"✓ Histories saved to {test_history_file}")
# Create new manager and load histories
new_history_manager = MatchHistoryManager()
new_history_manager.load_histories(test_history_file)
print(f"✓ Histories loaded from {test_history_file}")
# Verify loaded histories match original
loaded_agent1_history = new_history_manager.get_history_for_agent(agent1)
if len(loaded_agent1_history) == len(agent1_history):
print("✓ Loaded history length matches original")
if loaded_agent1_history and agent1_history:
if loaded_agent1_history[0]['opponent'] == agent1_history[0]['opponent']:
print("✓ Loaded history content matches original")
# Clean up test file
if os.path.exists(test_history_file):
os.remove(test_history_file)
print("✓ Test file cleaned up")
# Test 5: Multiple phases simulation
print("\n🧪 Test 5: Multi-phase history accumulation")
# Simulate phase 2 - create agent with history from phase 1
agent1_phase2 = TestLLMAgent("TestAgent_T07", temperature=0.7, match_history=agent1_history)
# Run another match (simulating phase 2)
match_result_2 = tournament.run_match(agent1_phase2, agent3)
print(f"✓ Phase 2 match completed: {match_result_2.rounds_played} rounds")
# Verify cumulative history
updated_history = history_manager.get_history_for_agent(agent1_phase2)
print(f"✓ Cumulative history: {len(updated_history)} matches total")
if len(updated_history) >= 2:
print(f" Match 1 opponent: {updated_history[0]['opponent']}")
print(f" Match 2 opponent: {updated_history[1]['opponent']}")
print("\n" + "="*60)
print("✅ ALL MATCH HISTORY TESTS PASSED!")
print("✅ Match history implementation working correctly")
print("="*60)
return True
if __name__ == "__main__":
success = test_match_history_implementation()
exit_code = 0 if success else 1
exit(exit_code)