assistant_chatbot/vicky_app.py at main · algsoch/assistant_chatbot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import uvicorn
from fastapi import FastAPI, Request, Form, File, UploadFile, HTTPException, Query, Body
from fastapi.responses import HTMLResponse, RedirectResponse, FileResponse
from fastapi.middleware.cors import CORSMiddleware
from fastapi.templating import Jinja2Templates
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
import urllib
import os
from pathlib import Path
import shutil
from datetime import datetime
import sys
from typing import Dict, Any, List, Optional
import logging
import re
import base64
import tempfile
from collections import defaultdict
import requests
import time
import threading
import ipaddress
import requests
import tempfile
import subprocess
import google.generativeai as genai
from gtts import gTTS
import uuid
import os
import textwrap
import json
import numpy as np

# Utility function for Discord notifications
async def send_discord_notification(ip_address: str, user_agent: str = None):
    """Send visitor notification to Discord webhook"""
    try:
        discord_webhook = os.environ.get("DISCORD_WEBHOOK")
        if not discord_webhook:
            logger.warning("DISCORD_WEBHOOK not found in environment variables")
            return

        import requests
        from datetime import datetime

        # Get location info (basic info without external API)
        location_info = "Unknown"
        if ip_address and ip_address != "127.0.0.1":
            try:
                # You can enhance this with a geolocation API later
                location_info = f"IP: {ip_address}"
            except:
                location_info = f"IP: {ip_address}"

        # Create Discord embed
        embed = {
            "title": "🌐 New Website Visitor",
            "color": 0x4c2882,  # Purple color
            "fields": [
                {
                    "name": "IP Address",
                    "value": ip_address or "Unknown",
                    "inline": True
                },
                {
                    "name": "Time",
                    "value": datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC"),
                    "inline": True
                },
                {
                    "name": "User Agent",
                    "value": (user_agent[:100] + "...") if user_agent and len(user_agent) > 100 else (user_agent or "Unknown"),
                    "inline": False
                }
            ],
            "footer": {
                "text": "TDS - Tools for Data Science"
            },
            "timestamp": datetime.now().isoformat()
        }

        payload = {
            "embeds": [embed]
        }

        # Send to Discord (non-blocking)
        response = requests.post(discord_webhook, json=payload, timeout=5)
        if response.status_code == 204:
            logger.info(f"Discord notification sent for visitor: {ip_address}")
        else:
            logger.warning(f"Discord webhook failed with status: {response.status_code}")

    except Exception as e:
        logger.error(f"Error sending Discord notification: {e}")

# Pydantic models for API endpoints
class SimilarityRequest(BaseModel):
    docs: List[str]
    query: str

class SimilarityResponse(BaseModel):
    matches: List[str]

# Mock embedding function for similarity search
def get_mock_embedding(text: str) -> List[float]:
    """Generate a mock embedding for text similarity"""
    # Simple hash-based embedding for demonstration
    import hashlib
    hash_obj = hashlib.md5(text.encode())
    hash_int = int(hash_obj.hexdigest(), 16)

    # Convert to float vector
    embedding = []
    for i in range(10):  # 10-dimensional vector
        embedding.append((hash_int >> (i * 3)) % 1000 / 1000.0)

    return embedding

def cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
    """Calculate cosine similarity between two vectors"""
    import math

    dot_product = sum(a * b for a, b in zip(vec1, vec2))
    magnitude1 = math.sqrt(sum(a * a for a in vec1))
    magnitude2 = math.sqrt(sum(a * a for a in vec2))

    if magnitude1 == 0 or magnitude2 == 0:
        return 0

    return dot_product / (magnitude1 * magnitude2)
import re
from dotenv import load_dotenv
import json
from pydantic import BaseModel, Field

# Set up CORS for API access from any domain

# Configure logging
logging.basicConfig(level=logging.INFO,
                   format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("tds_app")

# Try to import the question-answering system
try:
    from vicky_server import answer_question
    logger.info("Successfully imported answer_question from vicky_server")
except ImportError as e:
    logger.error(f"Failed to import from vicky_server: {e}")
    sys.exit("Error: Could not import answer_question from vicky_server. Make sure the file exists in the same directory.")

app = FastAPI(title="TDS - Tools for Data Science",
              description="Interactive assistant for data science questions")
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Allows all origins
    allow_credentials=True,
    allow_methods=["*"],  # Allows all methods
    allow_headers=["*"],  # Allows all headers
)
# Create directories for templates and static files if they don't exist
TEMPLATES_DIR = Path("templates")
STATIC_DIR = Path("static")
UPLOADS_DIR = Path("uploads")

# Create a file to store IP logs
IP_LOGS_FILE = Path("ip_logs.json")

def log_ip_address(request: Request, endpoint: str, query: str = None):
    """Log the IP address and other request details"""
    try:
        # Get IP address considering forwarded headers (for proxies/load balancers)
        ip = request.client.host
        forwarded = request.headers.get("X-Forwarded-For")

        if forwarded:
            # Get the first IP in the chain (client's real IP)
            ip = forwarded.split(',')[0].strip()

        # Validate that this is a real IP address
        try:
            ipaddress.ip_address(ip)
        except ValueError:
            ip = "Invalid IP"

        # Create log entry
        log_entry = {
            "timestamp": datetime.now().isoformat(),
            "ip_address": ip,
            "endpoint": endpoint,
            "user_agent": request.headers.get("User-Agent", "Unknown"),
            "query": query[:100] if query else None  # Limit query length
        }

        # Load existing logs
        logs = []
        if IP_LOGS_FILE.exists():
            try:
                with open(IP_LOGS_FILE, "r") as f:
                    logs = json.load(f)
            except json.JSONDecodeError:
                logs = []

        # Add new entry and save
        logs.append(log_entry)
        with open(IP_LOGS_FILE, "w") as f:
            json.dump(logs, f, indent=2)

        logger.info(f"Logged IP {ip} accessing {endpoint}")

    except Exception as e:
        logger.error(f"Error logging IP address: {e}")

# Load environment variables
load_dotenv()

# API access notification settings
API_NOTIF_COUNT = 0
API_NOTIF_LAST_TIME = time.time()
API_NOTIF_LOCK = threading.Lock()
API_NOTIF_BUFFER = []
API_NOTIF_THREAD = None

def send_api_notification(request, question):
    """Send notification about API access via webhook"""
    global API_NOTIF_COUNT, API_NOTIF_LAST_TIME, API_NOTIF_BUFFER, API_NOTIF_THREAD

    # Get notification URLs from environment
    discord_webhook = os.environ.get("DISCORD_WEBHOOK")
    telegram_bot_token = os.environ.get("TELEGRAM_BOT_TOKEN")
    telegram_chat_id = os.environ.get("TELEGRAM_CHAT_ID")
    slack_webhook = os.environ.get("SLACK_WEBHOOK")

    # Get notification settings
    min_notification_interval = int(os.environ.get("NOTIF_INTERVAL_SECONDS", "300"))  # Default: 5 minutes

    # Extract client info
    ip = request.client.host
    forwarded = request.headers.get("X-Forwarded-For")
    if forwarded:
        ip = forwarded.split(',')[0].strip()

    user_agent = request.headers.get("User-Agent", "Unknown")

    # Create notification message
    notification = {
        "timestamp": datetime.now().isoformat(),
        "ip": ip,
        "user_agent": user_agent,
        "question": question[:100] + ("..." if len(question) > 100 else "")
    }

    with API_NOTIF_LOCK:
        # Add to buffer
        API_NOTIF_BUFFER.append(notification)
        API_NOTIF_COUNT += 1

        # Check if we should send immediately
        current_time = time.time()
        time_since_last = current_time - API_NOTIF_LAST_TIME

        # Only send if enough time has passed since the last notification
        if time_since_last >= min_notification_interval:
            if API_NOTIF_THREAD is None or not API_NOTIF_THREAD.is_alive():
                API_NOTIF_THREAD = threading.Thread(target=_send_buffered_notifications)
                API_NOTIF_THREAD.daemon = True
                API_NOTIF_THREAD.start()
                API_NOTIF_LAST_TIME = current_time

def _send_buffered_notifications():
    """Send buffered notifications in a separate thread"""
    global API_NOTIF_BUFFER, API_NOTIF_COUNT

    with API_NOTIF_LOCK:
        notifications = API_NOTIF_BUFFER.copy()
        count = API_NOTIF_COUNT
        API_NOTIF_BUFFER = []
        API_NOTIF_COUNT = 0

    # Get notification URLs from environment
    discord_webhook = os.environ.get("DISCORD_WEBHOOK")
    telegram_bot_token = os.environ.get("TELEGRAM_BOT_TOKEN")
    telegram_chat_id = os.environ.get("TELEGRAM_CHAT_ID")
    slack_webhook = os.environ.get("SLACK_WEBHOOK")

    if not notifications:
        return

    # Create summary message
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    summary = f"**API Access Summary** ({timestamp})\n"
    summary += f"- Total requests: {count}\n"

    # Group by IP address
    ip_counts = {}
    for notif in notifications:
        ip = notif["ip"]
        if ip not in ip_counts:
            ip_counts[ip] = 0
        ip_counts[ip] += 1

    # Add IP statistics
    summary += "- IP addresses:\n"
    for ip, count in ip_counts.items():
        summary += f"  - {ip}: {count} requests\n"

    # Add recent questions
    summary += "- Recent queries:\n"
    for notif in notifications[-5:]:  # Show last 5 questions
        summary += f"  - {notif['question']}\n"

    try:
        # Send to Discord
        if discord_webhook:
            payload = {"content": summary}
            requests.post(discord_webhook, json=payload, timeout=5)
            logger.info("Sent API access notification to Discord")

        # Send to Telegram
        if telegram_bot_token and telegram_chat_id:
            telegram_api = f"https://api.telegram.org/bot{telegram_bot_token}/sendMessage"
            payload = {
                "chat_id": telegram_chat_id,
                "text": summary,
                "parse_mode": "Markdown"
            }
            requests.post(telegram_api, json=payload, timeout=5)
            logger.info("Sent API access notification to Telegram")

        # Send to Slack
        if slack_webhook:
            payload = {"text": summary}
            requests.post(slack_webhook, json=payload, timeout=5)
            logger.info("Sent API access notification to Slack")

    except Exception as e:
        logger.error(f"Error sending API access notification: {e}")
# Add these imports if not already present
import asyncio
import aiohttp
from datetime import datetime, timedelta

# Add these global variables near the other API notification variables
API_STATUS = "unknown"  # "up", "down", or "unknown"
API_LAST_CHECK = None
API_MONITOR_RUNNING = False
API_CHECK_INTERVAL = int(os.environ.get("API_CHECK_INTERVAL_SECONDS", "300"))  # 5 minutes by default
API_STATUS_LOCK = threading.Lock()

async def monitor_api_status():
    """Background task to monitor API status and report changes"""
    global API_STATUS, API_LAST_CHECK, API_MONITOR_RUNNING

    API_MONITOR_RUNNING = True
    logger.info("Starting API status monitoring")

    while API_MONITOR_RUNNING:
        try:
            # Check API status
            new_status = await check_api_status()
            last_status = API_STATUS

            with API_STATUS_LOCK:
                API_STATUS = new_status
                API_LAST_CHECK = datetime.now()

            # Send notification if status changed or periodic update is due
            if new_status != last_status:
                send_status_notification(new_status, is_change=True)
            elif last_status == "up" and (datetime.now().hour % 12 == 0 and datetime.now().minute < 5):
                # Send "all clear" notification once every 12 hours (around midnight and noon)
                send_status_notification(new_status, is_change=False)

            # Wait before next check
            await asyncio.sleep(API_CHECK_INTERVAL)

        except Exception as e:
            logger.error(f"Error in API monitoring: {e}")
            await asyncio.sleep(60)  # Wait a minute before retry after error

    logger.info("API status monitoring stopped")

async def check_api_status():
    """Check if the API is available and responding correctly"""
    try:
        # Determine the base URL based on environment
        if os.getenv('RENDER'):
            # On Render, check local health endpoint
            base_url = f"http://localhost:{os.getenv('PORT', '8000')}"
        else:
            # For production, check external endpoint
            base_url = "https://app.algsoch.tech"

        # First check local health endpoint
        async with aiohttp.ClientSession() as session:
            async with session.get(f"{base_url}/health", timeout=10) as response:
                if response.status != 200:
                    logger.warning(f"Health check failed with status {response.status}")
                    return "down"

        # Then check the main API endpoint (only for external checks)
        if not os.getenv('RENDER'):
            async with aiohttp.ClientSession() as session:
                # Just checking if endpoint is accessible, not submitting actual data
                async with session.post(f"{base_url}/api",
                                       data={"question": "health_check"},
                                       timeout=15) as response:
                    if response.status != 200:
                        logger.warning(f"API check failed with status {response.status}")
                        return "down"

        return "up"
    except Exception as e:
        logger.error(f"Error checking API status: {e}")
        return "down"

def send_status_notification(status, is_change=True):
    """Send API status notification through webhooks"""
    # Get notification URLs from environment
    discord_webhook = os.environ.get("DISCORD_WEBHOOK")
    telegram_bot_token = os.environ.get("TELEGRAM_BOT_TOKEN")
    telegram_chat_id = os.environ.get("TELEGRAM_CHAT_ID")
    slack_webhook = os.environ.get("SLACK_WEBHOOK")

    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    # Create appropriate message based on status
    if status == "up":
        if is_change:
            emoji = "🟢"
            title = "API is now UP"
            message = f"{emoji} **API Status Alert** - {timestamp}\nThe API at app.algsoch.tech is now UP and responding correctly."
        else:
            emoji = "✅"
            title = "API Status Report"
            message = f"{emoji} **API Status Report** - {timestamp}\nThe API at app.algsoch.tech is operating normally."
    else:
        emoji = "🔴"
        title = "API is DOWN"
        message = f"{emoji} **ALERT: API Status** - {timestamp}\nThe API at app.algsoch.tech is currently DOWN or experiencing issues."

    try:
        # Send to Discord
        if discord_webhook:
            payload = {
                "content": message,
                "embeds": [{
                    "title": title,
                    "color": 0x00ff00 if status == "up" else 0xff0000,
                    "description": f"Status: **{status.upper()}**\nLast checked: {timestamp}"
                }]
            }
            requests.post(discord_webhook, json=payload, timeout=5)
            logger.info(f"Sent API status notification to Discord: {status}")

        # Send to Slack
        if slack_webhook:
            status_icon = "✅" if status == "up" else "❌"
            payload = {
                "text": message,
                "attachments": [
                    {
                        "color": "#36a64f" if status == "up" else "#ff0000",
                        "fields": [
                            {
                                "title": "Status",
                                "value": f"{status_icon} {status.upper()}",
                                "short": True
                            },
                            {
                                "title": "Last Checked",
                                "value": timestamp,
                                "short": True
                            }
                        ]
                    }
                ]
            }
            requests.post(slack_webhook, json=payload, timeout=5)
            logger.info(f"Sent API status notification to Slack: {status}")

        # You can add Telegram implementation here

    except Exception as e:
        logger.error(f"Error sending API status notification: {e}")

# Add this to your startup code to begin monitoring
# Set up Gemini API
@app.post("/transcribe-video")
async def transcribe_video(
    request: Request,
    video_url: str = Form(None),
    start_time: float = Form(0),
    end_time: float = Form(None),
    translate_to_hindi: bool = Form(False),
    correct_text: bool = Form(False)
):
    """Transcribe a video from a URL using YouTube Transcript API"""
    try:
        if not video_url:
            return {"success": False, "error": "Video URL is required"}

        # Extract video ID from the URL
        video_id = None

        # Check for youtu.be format
        if 'youtu.be' in video_url:
            video_id_match = re.search(r'youtu\.be/([^?&]+)', video_url)
            if video_id_match:
                video_id = video_id_match.group(1)
        # Check for youtube.com format
        elif 'youtube.com' in video_url:
            parsed_url = urllib.parse.urlparse(video_url)
            query_params = urllib.parse.parse_qs(parsed_url.query)
            if 'v' in query_params:
                video_id = query_params['v'][0]

        if not video_id:
            return {"success": False, "error": "Could not extract video ID from URL"}

        # Get the transcript
        from youtube_transcript_api import YouTubeTranscriptApi
        transcript = YouTubeTranscriptApi.get_transcript(video_id)

        # Filter transcript entries by time range
        filtered_transcript = []

        # First attempt: Get only entries that start within our range
        for entry in transcript:
            entry_start = entry['start']
            entry_end = entry_start + entry['duration']

            # Only include entries that start within our range
            if start_time <= entry_start and (end_time is None or entry_start < end_time):
                filtered_transcript.append(entry)

        # If we didn't get any entries with strict filtering, try with overlaps
        if not filtered_transcript and end_time is not None:
            for entry in transcript:
                entry_start = entry['start']
                entry_end = entry_start + entry['duration']

                # Include entries that overlap with our range
                if entry_end > start_time and entry_start < end_time:
                    filtered_transcript.append(entry)

        if not filtered_transcript:
            return {"success": False, "error": f"No transcript found for the specified time range"}

        # Sort by start time
        filtered_transcript.sort(key=lambda x: x['start'])

        # Combine the text from all matched entries
        transcript_text = " ".join(entry['text'] for entry in filtered_transcript)

        # Initialize result objects
        corrected_transcript = None
        hindi_transcript = None
        audio_english_id = None
        audio_hindi_id = None

        # Correct text with Gemini if requested
        if correct_text:
            try:
                # Configure Gemini API
                import google.generativeai as genai
                api_key = os.environ.get("GEMINI_API_KEY")
                genai.configure(api_key=api_key)

                # Initialize the model
                model = genai.GenerativeModel('gemini-1.5-flash')

                # Prompt for text correction
                prompt = f"""
                Correct the following text by adding proper punctuation, capitalization, and grammar fixes.
                Return ONLY the corrected text, no additional commentary:

                {transcript_text}
                """

                # Generate corrected text
                response = model.generate_content(prompt)
                corrected_transcript = response.text.strip()
            except Exception as e:
                logger.error(f"Error correcting transcript: {str(e)}")

        # Translate to Hindi if requested
        if translate_to_hindi:
            try:
                import google.generativeai as genai
                if not 'genai' in locals():
                    api_key = os.environ.get("GEMINI_API_KEY", "AIzaSyAxVcXI5O6fviXNRF1TZh9YnCS8rSrjoSk")
                    genai.configure(api_key=api_key)
                    model = genai.GenerativeModel('gemini-1.5-flash')

                # Use corrected text if available, otherwise use original
                text_to_translate = corrected_transcript if corrected_transcript else transcript_text

                # Generate Hindi translation
                translation_prompt = f"Translate this text to Hindi: {text_to_translate}"
                response = model.generate_content(translation_prompt)
                hindi_transcript = response.text.strip()
            except Exception as e:
                logger.error(f"Error translating to Hindi: {str(e)}")

        # Generate audio files for speaking
        try:
            from gtts import gTTS
            import uuid

            # Generate English audio
            text_for_audio = corrected_transcript if corrected_transcript else transcript_text
            if text_for_audio:
                audio_english_id = str(uuid.uuid4())
                tts = gTTS(text=text_for_audio, lang='en', slow=False)

                # Create audio directory if it doesn't exist
                audio_dir = Path("static/audio")
                audio_dir.mkdir(parents=True, exist_ok=True)

                tts.save(f"static/audio/{audio_english_id}.mp3")

            # Generate Hindi audio if translation is available
            if hindi_transcript:
                audio_hindi_id = str(uuid.uuid4())
                tts = gTTS(text=hindi_transcript, lang='hi', slow=False)
                tts.save(f"static/audio/{audio_hindi_id}.mp3")
        except Exception as e:
            logger.error(f"Error generating audio: {str(e)}")

        return {
            "success": True,
            "video_id": video_id,
            "transcript": transcript_text,
            "corrected_transcript": corrected_transcript,
            "hindi_transcript": hindi_transcript,
            "audio_english_id": audio_english_id,
            "audio_hindi_id": audio_hindi_id,
            "time_range": {
                "start": start_time,
                "end": end_time
            }
        }
    except Exception as e:
        logger.error(f"Error transcribing video: {str(e)}")
        return {"success": False, "error": str(e)}

@app.get("/audio/{audio_id}")
async def get_audio(audio_id: str):
    """Serve audio files"""
    audio_path = f"static/audio/{audio_id}.mp3"
    if not os.path.exists(audio_path):
        raise HTTPException(status_code=404, detail="Audio file not found")

    return FileResponse(audio_path, media_type="audio/mpeg")
@app.on_event("startup")
async def start_background_tasks():
    # Only start health monitoring if not on Render (causes connection issues with Gunicorn workers)
    if not os.getenv('RENDER'):
        asyncio.create_task(monitor_api_status())

@app.on_event("shutdown")
async def stop_background_tasks():
    global API_MONITOR_RUNNING
    API_MONITOR_RUNNING = False
def load_file_based_questions():
    """Load questions from vickys.json grouped by file"""
    try:
        json_path = Path("vickys.json")
        if not json_path.exists():
            json_path = Path("main/grok/vickys.json")
        if not json_path.exists():
            json_path = Path("e:/data science tool/main/grok/vickys.json")

        if not json_path.exists():
            logger.warning("vickys.json file not found")
            return {}

        with open(json_path, "r", encoding="utf-8") as f:
            data = json.load(f)

        # Group questions by file
        questions_by_file = defaultdict(list)
        for i, item in enumerate(data):
            if "question" not in item:
                continue

            file_name = item.get("file", "General Questions")
            # Extract just the filename if it's a path
            if "/" in file_name or "\\" in file_name:
                file_name = file_name.replace("\\", "/").split("/")[-1]

            questions_by_file[file_name].append({
                "id": f"file-q-{i}",
                "file": item.get("file", ""),
                "question": item["question"]
            })

        logger.info(f"Loaded {len(data)} questions from vickys.json, grouped into {len(questions_by_file)} files")
        return questions_by_file

    except Exception as e:
        logger.error(f"Error loading questions from vickys.json: {e}")
        return {}
for directory in [TEMPLATES_DIR, STATIC_DIR, UPLOADS_DIR]:
    try:
        directory.mkdir(exist_ok=True)
        logger.info(f"Directory {directory} is ready")
    except Exception as e:
        logger.error(f"Failed to create directory {directory}: {e}")
        sys.exit(f"Error: Could not create directory {directory}")

# Create the HTML template file - same as your original implementation
# Replace your existing HTML template with this enhanced version
with open(TEMPLATES_DIR / "index.html", "w", encoding="utf-8") as f:
    f.write("""<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>TDS - Tools for Data Science</title>
    <link rel="icon" type="image/png" href="/static/logo.png">
    <link rel="shortcut icon" type="image/png" href="/static/logo.png">
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
    <style>
        :root {
            --primary-color: #4c2882;
            --primary-light: #6b3eb6;
            --secondary-color: #37bb9c;
            --dark-color: #2c2c2c;
            --light-color: #f5f5f5;
            --success-color: #4CAF50;
            --error-color: #f44336;
            --warning-color: #ff9800;
            --text-color: #333;
            --border-radius: 8px;
            --shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
            --transition: all 0.3s ease;
        }

        * {
            box-sizing: border-box;
            margin: 0;
            padding: 0;
        }

        body {
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            background-color: var(--light-color);
            color: var(--text-color);
            line-height: 1.6;
        }

        .container {
            max-width: 1200px;
            margin: 0 auto;
            padding: 20px;
        }

        header {
            background: linear-gradient(135deg, var(--primary-color), var(--primary-light));
            color: white;
            padding: 25px;
            margin-bottom: 20px;
            border-radius: var(--border-radius);
            box-shadow: var(--shadow);
            position: relative;
            overflow: hidden;
        }

        header::after {
            content: '';
            position: absolute;
            top: 0;
            right: 0;
            bottom: 0;
            left: 0;
            background: radial-gradient(circle at top right, rgba(255,255,255,0.2), transparent);
            pointer-events: none;
        }

        h1 {
            margin: 0;
            font-size: 32px;
            text-shadow: 1px 1px 3px rgba(0,0,0,0.2);
        }

        .subtitle {
            font-style: italic;
            opacity: 0.9;
            margin-top: 10px;
            font-weight: 300;
        }

        .header-buttons {
            position: absolute;
            top: 20px;
            right: 20px;
            display: flex;
            gap: 10px;
        }

        .header-button {
            background-color: rgba(255,255,255,0.2);
            color: white;
            border: none;
            padding: 8px 15px;
            border-radius: 20px;
            cursor: pointer;
            font-size: 14px;
            font-weight: 500;
            transition: var(--transition);
            display: flex;
            align-items: center;
            gap: 5px;
        }

        .header-button:hover {
            background-color: rgba(255,255,255,0.3);
        }

        .main-section {
            display: grid;
            grid-template-columns: 1fr 300px;
            gap: 20px;
            margin-bottom: 20px;
        }

        .chat-container {
            background-color: white;
            border-radius: var(--border-radius);
            box-shadow: var(--shadow);
            overflow: hidden;
            display: flex;
            flex-direction: column;
            height: 600px;
        }

        .chat-box {
            flex-grow: 1;
            overflow-y: auto;
            padding: 20px;
            background-color: white;
        }

        .message {
            padding: 12px 18px;
            border-radius: 18px;
            margin-bottom: 15px;
            max-width: 85%;
            word-wrap: break-word;
            position: relative;
            animation: fadeIn 0.3s ease;
            box-shadow: 0 1px 2px rgba(0,0,0,0.1);
        }

        @keyframes fadeIn {
            0% { opacity: 0; transform: translateY(10px); }
            100% { opacity: 1; transform: translateY(0); }
        }

        .user-message {
            background-color: #e3f2fd;
            margin-left: auto;
            border-top-right-radius: 4px;
            text-align: right;
        }

        .bot-message {
            background-color: #f5f5f5;
            margin-right: auto;
            border-top-left-radius: 4px;
            white-space: pre-wrap;
        }

        .bot-message.loading {
            background-color: #f0f0f0;
            color: #666;
        }

        .bot-message.loading::after {
            content: '⏳';
            margin-left: 5px;
            animation: pulse 1.5s infinite;
        }

        @keyframes pulse {
            0% { opacity: 0.5; }
            50% { opacity: 1; }
            100% { opacity: 0.5; }
        }

        .input-area {
            padding: 15px;
            background-color: #f9f9f9;
            border-top: 1px solid #eee;
        }

        .input-form {
            display: flex;
            gap: 10px;
            align-items: center;
        }
                .base64-decoder-section {
            background-color: white;
            border-radius: var(--border-radius);
            box-shadow: var(--shadow);
            margin-bottom: 20px;
            overflow: hidden;
        }

        .decoder-header {
            padding: 15px;
            background-color: var(--primary-color);
            color: white;
            display: flex;
            align-items: center;
            gap: 8px;
            font-weight: bold;
            position: relative;
        }

        .decoder-content {
            padding: 15px;
        }

        .decoder-textarea {
            width: 100%;
            min-height: 70px;
            padding: 10px;
            border: 1px solid #ddd;
            border-radius: var(--border-radius);
            font-family: monospace;
            font-size: 13px;
            resize: vertical;
        }

        .image-preview {
            margin-top: 15px;
            border: 1px solid #eee;
            border-radius: var(--border-radius);
            padding: 15px;
        }

        .preview-container {
            margin: 10px 0;
            text-align: center;
            background-color: #f5f5f5;
            padding: 10px;
            border-radius: var(--border-radius);
        }

        #previewImage {
            max-width: 100%;
            max-height: 300px;
            border: 1px solid #ddd;
        }

        .image-actions {
            display: flex;
            gap: 10px;
            margin-top: 10px;
            flex-wrap: wrap;
        }

        .action-btn {
            background-color: var(--primary-color);
            color: white;
            border: none;
            padding: 8px 12px;
            border-radius: 4px;
            cursor: pointer;
            font-size: 14px;
            transition: var(--transition);
        }

        .action-btn:hover {
            background-color: var(--primary-light);
        }

        .action-btn.secondary {
            background-color: var(--secondary-color);
        }

        .action-btn.secondary:hover {
            background-color: #2ea58a;
        }

        .action-btn.clear {
            background-color: #6c757d;
        }

        .action-btn.clear:hover {
            background-color: #5a6268;
        }

        .encoder-section {
            margin-top: 15px;
            border-top: 1px solid #eee;
            padding-top: 15px;