Feat: Se agrega script para validar y corregir links rotos

pixelead0 · pixelead0 · commit 63ff0e09064f · 2025-06-29T08:17:29.000-06:00
diff --git a/.markdownlint.json b/.markdownlint.json
@@ -0,0 +1,7 @@
+{
+  "MD033": false,
+  "MD041": false,
+  "MD013": false,
+  "MD025": false,
+  "MD026": false
+}
diff --git a/scripts/check_links.py b/scripts/check_links.py
@@ -0,0 +1,177 @@
+#!/usr/bin/env python3
+"""
+Simple script to check internal links with HTTP requests.
+"""
+
+import json
+import re
+from pathlib import Path
+from urllib.parse import urljoin
+
+import requests
+
+
+def find_internal_links(content):
+    """Find all internal links in markdown content."""
+    links = []
+    pattern = r"\[([^\]]+)\]\(([^)]+)\)"
+
+    for match in re.finditer(pattern, content):
+        text = match.group(1)
+        url = match.group(2)
+
+        # Skip external links
+        if url.startswith(("http://", "https://", "mailto:", "tel:")):
+            continue
+
+        links.append((text, url))
+
+    return links
+
+
+def resolve_link_url(base_url, md_file, link_url):
+    """Resolve the real URL as a browser would from the markdown file location."""
+    # If link is absolute (starts with /), join with base_url
+    if link_url.startswith("/"):
+        return urljoin(base_url, link_url)
+    # If link is relative, join with the file's directory path
+    else:
+        # Get the directory of the markdown file relative to docs/
+        md_dir = Path(md_file).parent
+        # Build the relative path as it would be in the site
+        rel_path = (md_dir / link_url).as_posix()
+        # Remove any leading './' for clean URLs
+        if rel_path.startswith("./"):
+            rel_path = rel_path[2:]
+        return urljoin(base_url + "/", rel_path)
+
+
+def check_link(base_url, link_url, current_file):
+    """Check if a link returns 200 or 404."""
+    try:
+        # Handle anchor links - they should resolve from current page
+        if link_url.startswith("#"):
+            # Build URL from current file path
+            file_path = current_file.replace(".md", "/")
+            if not file_path.startswith("/"):
+                file_path = "/" + file_path
+            full_url = urljoin(base_url, file_path + link_url)
+        else:
+            # For relative links, resolve from current file's directory
+            if not link_url.startswith("/"):
+                # Get current file's directory
+                current_dir = str(Path(current_file).parent)
+                if current_dir != ".":
+                    # Resolve relative to current directory
+                    resolved_path = str(Path(current_dir) / link_url)
+                else:
+                    resolved_path = link_url
+
+                # Convert to URL format
+                if not resolved_path.startswith("/"):
+                    resolved_path = "/" + resolved_path
+                full_url = urljoin(base_url, resolved_path)
+            else:
+                # Absolute path from site root
+                full_url = urljoin(base_url, link_url)
+
+        # Make request
+        response = requests.get(full_url, timeout=5)
+
+        if response.status_code == 200:
+            return True, "200 OK"
+        elif response.status_code == 404:
+            return False, "404 Not Found"
+        else:
+            return False, f"HTTP {response.status_code}"
+
+    except requests.RequestException as e:
+        return False, f"Error: {e}"
+
+
+def main():
+    base_url = "http://127.0.0.1:8000"
+    docs_dir = Path("docs")
+
+    print(f"🔍 Checking internal links against {base_url}")
+    print("=" * 50)
+
+    broken_links = []
+    working_links = []
+
+    # Find all markdown files
+    for md_file in docs_dir.rglob("*.md"):
+        try:
+            with open(md_file, "r", encoding="utf-8") as f:
+                content = f.read()
+
+            links = find_internal_links(content)
+
+            for text, url in links:
+                is_working, status = check_link(
+                    base_url, url, str(md_file.relative_to(docs_dir))
+                )
+
+                result = {
+                    "file": str(md_file.relative_to(docs_dir)),
+                    "text": text,
+                    "url": url,
+                    "full_url": (
+                        urljoin(base_url, url)
+                        if not url.startswith("#")
+                        else urljoin(
+                            base_url,
+                            str(md_file.relative_to(docs_dir)).replace(".md", "/")
+                            + url,
+                        )
+                    ),
+                    "status": status,
+                    "line": content[: content.find(f"[{text}]({url})")].count("\n") + 1,
+                }
+
+                if is_working:
+                    working_links.append(result)
+                else:
+                    broken_links.append(result)
+
+        except Exception as e:
+            print(f"❌ Error reading {md_file}: {e}")
+
+    # Print summary
+    print(f"✅ Working links: {len(working_links)}")
+    print(f"❌ Broken links: {len(broken_links)}")
+
+    # Save results to JSON
+    results = {
+        "summary": {
+            "total_files_scanned": len(list(docs_dir.rglob("*.md"))),
+            "working_links": len(working_links),
+            "broken_links": len(broken_links),
+            "base_url": base_url,
+        },
+        "broken_links": broken_links,
+        "working_links": working_links,
+    }
+
+    # Save to JSON file
+    output_file = "broken_links.json"
+    with open(output_file, "w", encoding="utf-8") as f:
+        json.dump(results, f, indent=2, ensure_ascii=False)
+
+    print(f"\n📄 Results saved to: {output_file}")
+
+    # Show some broken links in console
+    if broken_links:
+        print(f"\n🔴 BROKEN LINKS (showing first 10):")
+        print("-" * 50)
+        for link in broken_links[:10]:
+            print(f"📄 {link['file']}:{link['line']}")
+            print(f"   Text: {link['text']}")
+            print(f"   URL: {link['url']}")
+            print(f"   Full URL: {link['full_url']}")
+            print(f"   Status: {link['status']}")
+            print()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/fix_broken_links.py b/scripts/fix_broken_links.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+"""
+Script to fix broken links based on broken_links.json
+"""
+
+import json
+import re
+from pathlib import Path
+from typing import Dict, List, Tuple
+
+
+def load_broken_links(json_file: str = "broken_links.json") -> Dict:
+    """Load broken links from JSON file."""
+    try:
+        with open(json_file, "r", encoding="utf-8") as f:
+            return json.load(f)
+    except FileNotFoundError:
+        print(f"❌ File {json_file} not found. Run check_links.py first.")
+        return {}
+    except json.JSONDecodeError as e:
+        print(f"❌ Error parsing JSON: {e}")
+        return {}
+
+
+def suggest_fix(url: str) -> str:
+    """Suggest a fix for a broken URL."""
+    # Primero: /index.md o index.md
+    if url.endswith("/index.md"):
+        return url[:-9] + "/"
+    if url.endswith("index.md"):
+        return url[:-8] + "/"
+    # Después: cualquier otro .md
+    if url.endswith(".md"):
+        return url[:-3] + "/"
+
+    # Handle meetup individual links - add trailing slash
+    if re.match(r"^\d{6}-[a-z]+$", url):
+        return url + "/"
+
+    # Handle meetup individual links with .md - remove .md and add /
+    if re.match(r"^\d{6}-[a-z]+\.md$", url):
+        return url[:-3] + "/"
+
+    # Handle /index/ links - remove the /index/ part
+    if url.endswith("/index/"):
+        return url[:-7] + "/"
+
+    # Add trailing slash for directory-like URLs that don't have it
+    if not url.endswith("/") and "." not in url.split("/")[-1]:
+        return url + "/"
+
+    # Handle specific patterns for comunidad links
+    if url.startswith("/comunidad/") and not url.endswith("/"):
+        return url + "/"
+
+    if url.startswith("comunidad/") and not url.endswith("/"):
+        return url + "/"
+
+    # Handle meetup directory links
+    if "meetups/" in url and url.endswith(".md"):
+        return url[:-3] + "/"
+
+    return url
+
+
+def fix_file_links(file_path: str, broken_links: List[Dict]) -> Tuple[bool, List[Dict]]:
+    """Fix broken links in a specific file."""
+    file_links = [link for link in broken_links if link["file"] == file_path]
+
+    if not file_links:
+        return False, []
+
+    try:
+        with open(f"docs/{file_path}", "r", encoding="utf-8") as f:
+            content = f.read()
+
+        original_content = content
+        fixes_applied = []
+
+        for link in file_links:
+            old_url = link["url"]
+            new_url = suggest_fix(old_url)
+
+            if new_url != old_url:
+                # Create the pattern to find and replace
+                pattern = f'\\[{re.escape(link["text"])}\\]\\({re.escape(old_url)}\\)'
+                replacement = f'[{link["text"]}]({new_url})'
+
+                # Apply the fix
+                new_content = re.sub(pattern, replacement, content)
+
+                if new_content != content:
+                    content = new_content
+                    fixes_applied.append(
+                        {
+                            "line": link["line"],
+                            "text": link["text"],
+                            "old_url": old_url,
+                            "new_url": new_url,
+                        }
+                    )
+
+        # Write the fixed content back
+        if fixes_applied:
+            with open(f"docs/{file_path}", "w", encoding="utf-8") as f:
+                f.write(content)
+            return True, fixes_applied
+
+        return False, []
+
+    except Exception as e:
+        print(f"❌ Error fixing {file_path}: {e}")
+        return False, []
+
+
+def main():
+    """Main function to fix broken links."""
+    print("🔧 Fixing broken links...")
+    print("=" * 50)
+
+    # Load broken links
+    data = load_broken_links()
+    if not data:
+        return
+
+    broken_links = data.get("broken_links", [])
+    if not broken_links:
+        print("✅ No broken links to fix!")
+        return
+
+    print(f"📄 Found {len(broken_links)} broken links")
+
+    # Group links by file
+    files_to_fix = {}
+    for link in broken_links:
+        file_path = link["file"]
+        if file_path not in files_to_fix:
+            files_to_fix[file_path] = []
+        files_to_fix[file_path].append(link)
+
+    print(f"📁 Files to fix: {len(files_to_fix)}")
+
+    # Fix each file
+    total_fixes = 0
+    files_fixed = 0
+
+    for file_path, links in files_to_fix.items():
+        print(f"\n🔧 Fixing {file_path}...")
+
+        was_fixed, fixes = fix_file_links(file_path, links)
+
+        if was_fixed:
+            files_fixed += 1
+            total_fixes += len(fixes)
+
+            print(f"   ✅ Fixed {len(fixes)} links:")
+            for fix in fixes:
+                print(f"      Line {fix['line']}: {fix['old_url']} → {fix['new_url']}")
+        else:
+            print(f"   ⚠️  No fixes applied")
+
+    # Summary
+    print("\n" + "=" * 50)
+    print("📊 FIX SUMMARY")
+    print("=" * 50)
+    print(f"📁 Files processed: {len(files_to_fix)}")
+    print(f"🔧 Files fixed: {files_fixed}")
+    print(f"✅ Total fixes applied: {total_fixes}")
+
+    if total_fixes > 0:
+        print(f"\n💡 Run 'python scripts/check_links.py' again to verify fixes!")
+    else:
+        print(f"\n⚠️  No automatic fixes could be applied.")
+        print(f"   Some links may need manual correction.")
+
+
+if __name__ == "__main__":
+    main()