Skip to content

Commit fce13e0

Browse files
committed
Hub: Inquire information from https://llmtxt.dev/hub
1 parent 419a85c commit fce13e0

3 files changed

Lines changed: 70 additions & 0 deletions

File tree

src/cratedb_about/cli.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pueblo.util.cli import boot_click
77

88
from cratedb_about.bundle.llmstxt import LllmsTxtBuilder
9+
from cratedb_about.hub.model import LLMsTxtHub
910
from cratedb_about.outline import CrateDbKnowledgeOutline
1011
from cratedb_about.query.core import CrateDbKnowledgeConversation
1112
from cratedb_about.query.model import Example
@@ -99,6 +100,16 @@ def bundle(ctx: click.Context, url: str, format_: str, outdir: Path) -> None:
99100
logger.info("Ready.")
100101

101102

103+
@cli.command()
104+
@click.pass_context
105+
def hub(ctx: click.Context) -> None:
106+
"""
107+
Inquire information from https://llmtxt.dev/hub.
108+
"""
109+
LLMsTxtHub().fetch()
110+
logger.info("Ready.")
111+
112+
102113
@cli.command()
103114
@click.argument("question", type=str, required=False)
104115
@click.option("--backend", type=click.Choice(["openai", "claude"]), default="openai")

src/cratedb_about/hub/__init__.py

Whitespace-only changes.

src/cratedb_about/hub/model.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import dataclasses
2+
import typing as t
3+
4+
from bs4 import BeautifulSoup
5+
6+
from cratedb_about.util import get_cache_client
7+
8+
9+
@dataclasses.dataclass
10+
class LLMsTxtHubItem:
11+
name: str
12+
website: str
13+
description: str
14+
tags: t.List[str] = dataclasses.field(default_factory=list)
15+
resources: t.List[str] = dataclasses.field(default_factory=list)
16+
# TODO: Parse and add logo.
17+
18+
19+
20+
class LLMsTxtHub:
21+
url: str = "https://llmtxt.dev/hub"
22+
23+
def __init__(self):
24+
self.items: t.List[LLMsTxtHubItem] = []
25+
self.client = get_cache_client()
26+
27+
def fetch(self):
28+
index_html = self.client.get(self.url)
29+
bs = BeautifulSoup(index_html, "html.parser")
30+
cards = bs.find_all(attrs={"class": "website-card"})
31+
data = []
32+
for card in cards:
33+
divs = card.find(name="div")
34+
#print("divs:", divs)
35+
#continue
36+
#body = next(card.children)
37+
#body = card.next
38+
#first = divs.f
39+
#name = divs.find(attrs={"class": "text-lg"}).text
40+
name = divs.find(name="h3").text
41+
#website = divs.find(attrs={"class": "text-sm"}).text
42+
website = divs.find(name="p").text
43+
#second = body.find_all(name="div")
44+
#print("second:", second)
45+
#description = divs.next.find(attrs={"class": "text-sm"}).text
46+
description = divs.find_next("div").find(name="p").text
47+
links = divs.find_all("a")
48+
hrefs = []
49+
for link in links:
50+
href = link.get("href")
51+
hrefs.append(href)
52+
53+
item = LLMsTxtHubItem(name=name, website=website, description=description, re)
54+
data.append(item)
55+
56+
#print("cards:", cards)
57+
print("data:", data)
58+
59+
return self

0 commit comments

Comments
 (0)