Skip to content

Commit 63ff0e0

Browse files
author
pixelead0
committed
Feat: Se agrega script para validar y corregir links rotos
1 parent b00e417 commit 63ff0e0

File tree

3 files changed

+362
-0
lines changed

3 files changed

+362
-0
lines changed

.markdownlint.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"MD033": false,
3+
"MD041": false,
4+
"MD013": false,
5+
"MD025": false,
6+
"MD026": false
7+
}

scripts/check_links.py

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Simple script to check internal links with HTTP requests.
4+
"""
5+
6+
import json
7+
import re
8+
from pathlib import Path
9+
from urllib.parse import urljoin
10+
11+
import requests
12+
13+
14+
def find_internal_links(content):
15+
"""Find all internal links in markdown content."""
16+
links = []
17+
pattern = r"\[([^\]]+)\]\(([^)]+)\)"
18+
19+
for match in re.finditer(pattern, content):
20+
text = match.group(1)
21+
url = match.group(2)
22+
23+
# Skip external links
24+
if url.startswith(("http://", "https://", "mailto:", "tel:")):
25+
continue
26+
27+
links.append((text, url))
28+
29+
return links
30+
31+
32+
def resolve_link_url(base_url, md_file, link_url):
33+
"""Resolve the real URL as a browser would from the markdown file location."""
34+
# If link is absolute (starts with /), join with base_url
35+
if link_url.startswith("/"):
36+
return urljoin(base_url, link_url)
37+
# If link is relative, join with the file's directory path
38+
else:
39+
# Get the directory of the markdown file relative to docs/
40+
md_dir = Path(md_file).parent
41+
# Build the relative path as it would be in the site
42+
rel_path = (md_dir / link_url).as_posix()
43+
# Remove any leading './' for clean URLs
44+
if rel_path.startswith("./"):
45+
rel_path = rel_path[2:]
46+
return urljoin(base_url + "/", rel_path)
47+
48+
49+
def check_link(base_url, link_url, current_file):
50+
"""Check if a link returns 200 or 404."""
51+
try:
52+
# Handle anchor links - they should resolve from current page
53+
if link_url.startswith("#"):
54+
# Build URL from current file path
55+
file_path = current_file.replace(".md", "/")
56+
if not file_path.startswith("/"):
57+
file_path = "/" + file_path
58+
full_url = urljoin(base_url, file_path + link_url)
59+
else:
60+
# For relative links, resolve from current file's directory
61+
if not link_url.startswith("/"):
62+
# Get current file's directory
63+
current_dir = str(Path(current_file).parent)
64+
if current_dir != ".":
65+
# Resolve relative to current directory
66+
resolved_path = str(Path(current_dir) / link_url)
67+
else:
68+
resolved_path = link_url
69+
70+
# Convert to URL format
71+
if not resolved_path.startswith("/"):
72+
resolved_path = "/" + resolved_path
73+
full_url = urljoin(base_url, resolved_path)
74+
else:
75+
# Absolute path from site root
76+
full_url = urljoin(base_url, link_url)
77+
78+
# Make request
79+
response = requests.get(full_url, timeout=5)
80+
81+
if response.status_code == 200:
82+
return True, "200 OK"
83+
elif response.status_code == 404:
84+
return False, "404 Not Found"
85+
else:
86+
return False, f"HTTP {response.status_code}"
87+
88+
except requests.RequestException as e:
89+
return False, f"Error: {e}"
90+
91+
92+
def main():
93+
base_url = "http://127.0.0.1:8000"
94+
docs_dir = Path("docs")
95+
96+
print(f"🔍 Checking internal links against {base_url}")
97+
print("=" * 50)
98+
99+
broken_links = []
100+
working_links = []
101+
102+
# Find all markdown files
103+
for md_file in docs_dir.rglob("*.md"):
104+
try:
105+
with open(md_file, "r", encoding="utf-8") as f:
106+
content = f.read()
107+
108+
links = find_internal_links(content)
109+
110+
for text, url in links:
111+
is_working, status = check_link(
112+
base_url, url, str(md_file.relative_to(docs_dir))
113+
)
114+
115+
result = {
116+
"file": str(md_file.relative_to(docs_dir)),
117+
"text": text,
118+
"url": url,
119+
"full_url": (
120+
urljoin(base_url, url)
121+
if not url.startswith("#")
122+
else urljoin(
123+
base_url,
124+
str(md_file.relative_to(docs_dir)).replace(".md", "/")
125+
+ url,
126+
)
127+
),
128+
"status": status,
129+
"line": content[: content.find(f"[{text}]({url})")].count("\n") + 1,
130+
}
131+
132+
if is_working:
133+
working_links.append(result)
134+
else:
135+
broken_links.append(result)
136+
137+
except Exception as e:
138+
print(f"❌ Error reading {md_file}: {e}")
139+
140+
# Print summary
141+
print(f"✅ Working links: {len(working_links)}")
142+
print(f"❌ Broken links: {len(broken_links)}")
143+
144+
# Save results to JSON
145+
results = {
146+
"summary": {
147+
"total_files_scanned": len(list(docs_dir.rglob("*.md"))),
148+
"working_links": len(working_links),
149+
"broken_links": len(broken_links),
150+
"base_url": base_url,
151+
},
152+
"broken_links": broken_links,
153+
"working_links": working_links,
154+
}
155+
156+
# Save to JSON file
157+
output_file = "broken_links.json"
158+
with open(output_file, "w", encoding="utf-8") as f:
159+
json.dump(results, f, indent=2, ensure_ascii=False)
160+
161+
print(f"\n📄 Results saved to: {output_file}")
162+
163+
# Show some broken links in console
164+
if broken_links:
165+
print(f"\n🔴 BROKEN LINKS (showing first 10):")
166+
print("-" * 50)
167+
for link in broken_links[:10]:
168+
print(f"📄 {link['file']}:{link['line']}")
169+
print(f" Text: {link['text']}")
170+
print(f" URL: {link['url']}")
171+
print(f" Full URL: {link['full_url']}")
172+
print(f" Status: {link['status']}")
173+
print()
174+
175+
176+
if __name__ == "__main__":
177+
main()

scripts/fix_broken_links.py

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Script to fix broken links based on broken_links.json
4+
"""
5+
6+
import json
7+
import re
8+
from pathlib import Path
9+
from typing import Dict, List, Tuple
10+
11+
12+
def load_broken_links(json_file: str = "broken_links.json") -> Dict:
13+
"""Load broken links from JSON file."""
14+
try:
15+
with open(json_file, "r", encoding="utf-8") as f:
16+
return json.load(f)
17+
except FileNotFoundError:
18+
print(f"❌ File {json_file} not found. Run check_links.py first.")
19+
return {}
20+
except json.JSONDecodeError as e:
21+
print(f"❌ Error parsing JSON: {e}")
22+
return {}
23+
24+
25+
def suggest_fix(url: str) -> str:
26+
"""Suggest a fix for a broken URL."""
27+
# Primero: /index.md o index.md
28+
if url.endswith("/index.md"):
29+
return url[:-9] + "/"
30+
if url.endswith("index.md"):
31+
return url[:-8] + "/"
32+
# Después: cualquier otro .md
33+
if url.endswith(".md"):
34+
return url[:-3] + "/"
35+
36+
# Handle meetup individual links - add trailing slash
37+
if re.match(r"^\d{6}-[a-z]+$", url):
38+
return url + "/"
39+
40+
# Handle meetup individual links with .md - remove .md and add /
41+
if re.match(r"^\d{6}-[a-z]+\.md$", url):
42+
return url[:-3] + "/"
43+
44+
# Handle /index/ links - remove the /index/ part
45+
if url.endswith("/index/"):
46+
return url[:-7] + "/"
47+
48+
# Add trailing slash for directory-like URLs that don't have it
49+
if not url.endswith("/") and "." not in url.split("/")[-1]:
50+
return url + "/"
51+
52+
# Handle specific patterns for comunidad links
53+
if url.startswith("/comunidad/") and not url.endswith("/"):
54+
return url + "/"
55+
56+
if url.startswith("comunidad/") and not url.endswith("/"):
57+
return url + "/"
58+
59+
# Handle meetup directory links
60+
if "meetups/" in url and url.endswith(".md"):
61+
return url[:-3] + "/"
62+
63+
return url
64+
65+
66+
def fix_file_links(file_path: str, broken_links: List[Dict]) -> Tuple[bool, List[Dict]]:
67+
"""Fix broken links in a specific file."""
68+
file_links = [link for link in broken_links if link["file"] == file_path]
69+
70+
if not file_links:
71+
return False, []
72+
73+
try:
74+
with open(f"docs/{file_path}", "r", encoding="utf-8") as f:
75+
content = f.read()
76+
77+
original_content = content
78+
fixes_applied = []
79+
80+
for link in file_links:
81+
old_url = link["url"]
82+
new_url = suggest_fix(old_url)
83+
84+
if new_url != old_url:
85+
# Create the pattern to find and replace
86+
pattern = f'\\[{re.escape(link["text"])}\\]\\({re.escape(old_url)}\\)'
87+
replacement = f'[{link["text"]}]({new_url})'
88+
89+
# Apply the fix
90+
new_content = re.sub(pattern, replacement, content)
91+
92+
if new_content != content:
93+
content = new_content
94+
fixes_applied.append(
95+
{
96+
"line": link["line"],
97+
"text": link["text"],
98+
"old_url": old_url,
99+
"new_url": new_url,
100+
}
101+
)
102+
103+
# Write the fixed content back
104+
if fixes_applied:
105+
with open(f"docs/{file_path}", "w", encoding="utf-8") as f:
106+
f.write(content)
107+
return True, fixes_applied
108+
109+
return False, []
110+
111+
except Exception as e:
112+
print(f"❌ Error fixing {file_path}: {e}")
113+
return False, []
114+
115+
116+
def main():
117+
"""Main function to fix broken links."""
118+
print("🔧 Fixing broken links...")
119+
print("=" * 50)
120+
121+
# Load broken links
122+
data = load_broken_links()
123+
if not data:
124+
return
125+
126+
broken_links = data.get("broken_links", [])
127+
if not broken_links:
128+
print("✅ No broken links to fix!")
129+
return
130+
131+
print(f"📄 Found {len(broken_links)} broken links")
132+
133+
# Group links by file
134+
files_to_fix = {}
135+
for link in broken_links:
136+
file_path = link["file"]
137+
if file_path not in files_to_fix:
138+
files_to_fix[file_path] = []
139+
files_to_fix[file_path].append(link)
140+
141+
print(f"📁 Files to fix: {len(files_to_fix)}")
142+
143+
# Fix each file
144+
total_fixes = 0
145+
files_fixed = 0
146+
147+
for file_path, links in files_to_fix.items():
148+
print(f"\n🔧 Fixing {file_path}...")
149+
150+
was_fixed, fixes = fix_file_links(file_path, links)
151+
152+
if was_fixed:
153+
files_fixed += 1
154+
total_fixes += len(fixes)
155+
156+
print(f" ✅ Fixed {len(fixes)} links:")
157+
for fix in fixes:
158+
print(f" Line {fix['line']}: {fix['old_url']}{fix['new_url']}")
159+
else:
160+
print(f" ⚠️ No fixes applied")
161+
162+
# Summary
163+
print("\n" + "=" * 50)
164+
print("📊 FIX SUMMARY")
165+
print("=" * 50)
166+
print(f"📁 Files processed: {len(files_to_fix)}")
167+
print(f"🔧 Files fixed: {files_fixed}")
168+
print(f"✅ Total fixes applied: {total_fixes}")
169+
170+
if total_fixes > 0:
171+
print(f"\n💡 Run 'python scripts/check_links.py' again to verify fixes!")
172+
else:
173+
print(f"\n⚠️ No automatic fixes could be applied.")
174+
print(f" Some links may need manual correction.")
175+
176+
177+
if __name__ == "__main__":
178+
main()

0 commit comments

Comments
 (0)