deeper1_model/neural_train.py at master · Deepersensor/deeper1_model · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#!/usr/bin/env python3
"""
Real Neural Chatbot using PyTorch
Sequence-to-Sequence with proper training
"""

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pickle
import os

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
VOCAB_SIZE = 256
EMBED_DIM = 100
HIDDEN_DIM = 200
BATCH_SIZE = 8
LEARNING_RATE = 0.001
EPOCHS = 100
MAX_LEN = 50

class Vocabulary:
    """Character-level tokenization"""
    def __init__(self):
        self.char2idx = {chr(i): i for i in range(VOCAB_SIZE)}
        self.idx2char = {i: chr(i) for i in range(VOCAB_SIZE)}

    def encode(self, text):
        return [self.char2idx.get(c, 0) for c in text[:MAX_LEN]]

    def decode(self, tokens):
        return ''.join([self.idx2char.get(t, '?') for t in tokens if t != 0]).strip()

class ChatDataset(Dataset):
    """Conversation pairs dataset"""
    def __init__(self, conversations, vocab):
        self.vocab = vocab
        self.pairs = []
        for q, a in conversations:
            q_tokens = self.vocab.encode(q)
            a_tokens = self.vocab.encode(a)
            # Pad
            q_padded = q_tokens + [0] * (MAX_LEN - len(q_tokens))
            a_padded = a_tokens + [0] * (MAX_LEN - len(a_tokens))
            self.pairs.append((q_padded[:MAX_LEN], a_padded[:MAX_LEN]))

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        q, a = self.pairs[idx]
        return torch.tensor(q, dtype=torch.long), torch.tensor(a, dtype=torch.long)

class EncoderDecoder(nn.Module):
    """Simple LSTM-based encoder-decoder"""
    def __init__(self, vocab_size, embed_dim, hidden_dim):
        super().__init__()

        # Encoder
        self.embed = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.encoder = nn.LSTM(embed_dim, hidden_dim, batch_first=True, dropout=0.2)
        self.decoder = nn.LSTM(embed_dim, hidden_dim, batch_first=True, dropout=0.2)

        # Output layer
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, questions, answers):
        # Encode questions
        q_embed = self.embed(questions)
        _, (hidden, cell) = self.encoder(q_embed)

        # Decode answers
        a_embed = self.embed(answers)
        decoder_out, _ = self.decoder(a_embed, (hidden, cell))

        # Predictions
        logits = self.fc(decoder_out)  # (batch, seq_len, vocab_size)
        return logits

def get_conversations():
    """Training conversations"""
    return [
        ("hello", "Hi there! How can I help?"),
        ("hi", "Hey! What can I do for you?"),
        ("hey", "Hello! Nice to meet you"),
        ("good morning", "Good morning! Have a great day!"),
        ("good evening", "Good evening! How's it going?"),
        ("how are you", "I'm doing well, thank you!"),
        ("how are you doing", "I'm great, thanks for asking!"),
        ("what's up", "Not much! Just here to chat"),
        ("how's it going", "Pretty good! How about you?"),
        ("what is your name", "I'm a neural AI chatbot!"),
        ("who are you", "I'm an AI assistant here to help!"),
        ("what are you", "I'm an artificial intelligence"),
        ("are you human", "No, I'm an AI!"),
        ("thank you", "You're welcome!"),
        ("thanks", "Happy to help!"),
        ("thanks a lot", "Anytime!"),
        ("bye", "Goodbye! Take care!"),
        ("goodbye", "See you soon!"),
        ("see you", "Catch you later!"),
        ("good night", "Sleep well! Good night!"),
        ("help", "I'm here to help! What do you need?"),
        ("can you help", "Of course! What's the question?"),
        ("what can you do", "I can answer questions and chat!"),
        ("i'm happy", "That's wonderful! Keep smiling!"),
        ("i'm sad", "I'm sorry to hear that. Cheer up!"),
        ("i'm tired", "You should get some rest!"),
        ("do you like me", "Of course I do!"),
        ("are you smart", "I try my best!"),
        ("you are great", "Thank you so much!"),
        ("i like you", "I like you too!"),
        ("you're cool", "Thanks! You're cool too!"),
        ("tell me a joke", "Why did the AI go to school?"),
        ("how old are you", "I'm brand new!"),
        ("where are you from", "I'm from the digital world!"),
        ("what do you think", "I think you're interesting!"),
        ("can you help me", "I'll do my best!"),
        ("what should i do", "Think about your options!"),
        ("much appreciated", "Always glad to assist!"),
        ("i need help", "I'm here for you! Tell me more"),
        ("i'm confused", "Let me try to explain it better!"),
        ("what time is it", "I don't track time, sorry!"),
        ("make me laugh", "I tried my best!"),
        ("tell me something", "AI is everywhere nowadays!"),
    ]

def train():
    print("=" * 60)
    print("Training Real Neural Chatbot")
    print("=" * 60)

    # Data
    print("\n1. Preparing data...")
    conversations = get_conversations()
    vocab = Vocabulary()
    dataset = ChatDataset(conversations, vocab)
    loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
    print(f"   Training samples: {len(conversations)}")

    # Model
    print("\n2. Creating model...")
    model = EncoderDecoder(VOCAB_SIZE, EMBED_DIM, HIDDEN_DIM).to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    criterion = nn.CrossEntropyLoss(ignore_index=0)
    print(f"   Device: {DEVICE}")

    # Training
    print("\n3. Training...")
    model.train()

    for epoch in range(EPOCHS):
        total_loss = 0
        for questions, answers in loader:
            questions = questions.to(DEVICE)
            answers = answers.to(DEVICE)

            # Forward
            logits = model(questions, answers)

            # Loss
            loss = criterion(
                logits.view(-1, VOCAB_SIZE),
                answers.view(-1)
            )

            # Backward
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

            total_loss += loss.item()

        if (epoch + 1) % 10 == 0:
            avg_loss = total_loss / len(loader)
            print(f"   Epoch {epoch+1}/{EPOCHS}, Loss: {avg_loss:.4f}")

    # Save
    print("\n4. Saving model...")
    torch.save({
        'model': model.state_dict(),
        'config': {
            'vocab_size': VOCAB_SIZE,
            'embed_dim': EMBED_DIM,
            'hidden_dim': HIDDEN_DIM,
        }
    }, 'neural_model.pt')

    with open('vocab.pkl', 'wb') as f:
        pickle.dump(vocab, f)

    print("   Saved to neural_model.pt and vocab.pkl")
    print("=" * 60)

if __name__ == "__main__":
    train()