|
| 1 | +#!/usr/bin/env python3 |
| 2 | +import sys, json, argparse, requests |
| 3 | +from ddgs import DDGS |
| 4 | +from markdownify import markdownify as md |
| 5 | + |
| 6 | +def search(q, max_results=10): |
| 7 | + return list(DDGS().text(q, max_results=max_results)) |
| 8 | + |
| 9 | +def html2md(url, timeout=10): |
| 10 | + return md(requests.get(url, headers={"User-Agent":"Mozilla/5.0"}, timeout=timeout).text) |
| 11 | + |
| 12 | +def fmt_results(results, fmt="text"): |
| 13 | + if fmt=="json": |
| 14 | + return json.dumps(results, indent=2, ensure_ascii=False) |
| 15 | + elif fmt=="table": |
| 16 | + return "\n".join([f"{i+1}. {r['title'][:60]} | {r['href'][:50]} | {r['body'][:80]}..." for i,r in enumerate(results)]) |
| 17 | + else: |
| 18 | + return "\n\n".join([f"{r['title']}\n{r['href']}\n{r['body']}" for r in results]) |
| 19 | + |
| 20 | +def main(): |
| 21 | + p=argparse.ArgumentParser() |
| 22 | + p.add_argument("query",nargs="?",help="Search query or URL") |
| 23 | + p.add_argument("--search","-s",action="store_true",help="Perform web search") |
| 24 | + p.add_argument("--html2md","-m",action="store_true",help="Convert HTML to Markdown") |
| 25 | + p.add_argument("--max_results","-n",type=int,default=10,help="Max search results (default: 10)") |
| 26 | + p.add_argument("--format","-f",choices=["text","json","table"],default="text",help="Output format") |
| 27 | + p.add_argument("--filter","-F",help="Filter results by keyword in title/body") |
| 28 | + p.add_argument("--sort","-S",choices=["relevance","title"],default="relevance",help="Sort results") |
| 29 | + p.add_argument("--timeout",type=int,default=10,help="Request timeout") |
| 30 | + p.add_argument("--output","-o",help="Output to file") |
| 31 | + args=p.parse_args() |
| 32 | + |
| 33 | + if not args.query: |
| 34 | + p.print_help(); sys.exit(1) |
| 35 | + |
| 36 | + try: |
| 37 | + if args.search or not args.html2md: |
| 38 | + results=search(args.query,max_results=args.max_results) |
| 39 | + if args.filter: |
| 40 | + results=[r for r in results if args.filter.lower() in r.get("title","").lower() or args.filter.lower() in r.get("body","").lower()] |
| 41 | + if args.sort=="title": |
| 42 | + results=sorted(results,key=lambda x:x.get("title","")) |
| 43 | + out=fmt_results(results,args.format) |
| 44 | + elif args.html2md: |
| 45 | + out=html2md(args.query,args.timeout) |
| 46 | + else: |
| 47 | + sys.exit("Specify --search or --html2md") |
| 48 | + |
| 49 | + if args.output: |
| 50 | + with open(args.output,"w",encoding="utf-8") as f: f.write(out) |
| 51 | + else: |
| 52 | + print(out) |
| 53 | + except Exception as e: |
| 54 | + sys.exit(f"Error: {e}") |
| 55 | + |
| 56 | +if __name__=="__main__": |
| 57 | + main() |
0 commit comments