Skip to content

Commit 8e181e1

Browse files
authored
Merge pull request #55 from epsilla-cloud/dev
support epsilla cloud into langchain
2 parents 08519ff + 2d4be11 commit 8e181e1

File tree

4 files changed

+164
-13
lines changed

4 files changed

+164
-13
lines changed
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#!/usr/bin/env python
2+
# -*- coding:utf-8 -*-
3+
4+
5+
# Question Answering Pipeline with LangChain and Epsilla
6+
# Step1. Install the required packages
7+
"""
8+
pip install langchain
9+
pip install openai
10+
pip install tiktoken
11+
pip install pyepsilla
12+
pip install -U langchain-community
13+
pip install -U langchain-openai
14+
"""
15+
16+
17+
# Step2. Configure the OpenAI API Key
18+
import os
19+
os.environ["OPENAI_API_KEY"] = "Your-OpenAI-API-Key"
20+
21+
22+
# Step3. Load the documents
23+
from langchain.document_loaders import WebBaseLoader
24+
from langchain.text_splitter import CharacterTextSplitter
25+
from langchain_openai import OpenAIEmbeddings
26+
27+
loader = WebBaseLoader("https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt")
28+
documents = loader.load()
29+
documents = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0).split_documents(documents)
30+
embeddings = OpenAIEmbeddings()
31+
32+
33+
# Step4. Load the vector store
34+
from langchain_community.vectorstores import Epsilla
35+
from pyepsilla import vectordb
36+
37+
db_client = vectordb.Client(protocol="https", host="demo.epsilla.com", port="443")
38+
39+
status_code, response = db_client.load_db("MyDB", "/data/MyDB")
40+
print(status_code, response)
41+
42+
vector_store = Epsilla.from_documents(
43+
documents,
44+
embeddings,
45+
db_client,
46+
db_path="/data/MyDB",
47+
db_name="MyDB",
48+
collection_name="MyCollection",
49+
)
50+
51+
52+
53+
54+
# Step4. Create the QA for Retrieval
55+
from langchain.chains import RetrievalQA
56+
from langchain_openai import OpenAI
57+
58+
qa = RetrievalQA.from_chain_type(
59+
llm=OpenAI(), chain_type="stuff", retriever=vector_store.as_retriever()
60+
)
61+
query = "What did the president say about Ketanji Brown Jackson"
62+
resp = qa.invoke(query)
63+
print("resp:", resp)
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#!/usr/bin/env python
2+
# -*- coding:utf-8 -*-
3+
4+
5+
# Question Answering Pipeline with LangChain and Epsilla
6+
# Step1. Install the required packages
7+
"""
8+
pip install langchain
9+
pip install openai
10+
pip install tiktoken
11+
pip install pyepsilla
12+
pip install -U langchain-openai
13+
pip install -U langchain-community
14+
"""
15+
16+
17+
# Step2. Configure the OpenAI API Key
18+
import os
19+
20+
os.environ["OPENAI_API_KEY"] = "Your-OpenAI-API-Key"
21+
epsilla_api_key = os.getenv("EPSILLA_API_KEY", "Your-Epsilla-API-Key")
22+
project_id = os.getenv("EPSILLA_PROJECT_ID", "Your-Project-ID")
23+
db_id = os.getenv("EPSILLA_DB_ID", "Your-DB-ID")
24+
db_sharding_id = os.getenv("EPSILLA_DB_SHARDING_ID", 0)
25+
26+
27+
# Step3. Load the documents
28+
from langchain.document_loaders import WebBaseLoader
29+
from langchain.text_splitter import CharacterTextSplitter
30+
from langchain_openai import OpenAIEmbeddings
31+
32+
loader = WebBaseLoader("https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt")
33+
documents = loader.load()
34+
documents = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0).split_documents(documents)
35+
embeddings = OpenAIEmbeddings()
36+
37+
38+
# Step4. Load the vector store
39+
from langchain_community.vectorstores import Epsilla
40+
from pyepsilla import cloud, vectordb
41+
42+
db_name = f"db_{db_id.replace('-', '_')}"
43+
db_path = f"/data/{project_id}/{db_name}/s{db_sharding_id}"
44+
table_name = "MyCollection"
45+
46+
# Connect to Epsilla Cloud
47+
cloud_client = cloud.Client(
48+
project_id=project_id,
49+
api_key=epsilla_api_key,
50+
)
51+
52+
# Connect to Vectordb
53+
db_client = cloud_client.vectordb(db_id)
54+
55+
vector_store = Epsilla.from_documents(
56+
documents,
57+
embeddings,
58+
db_client,
59+
db_path=db_path,
60+
db_name=db_name,
61+
collection_name=table_name,
62+
)
63+
64+
# Step4. Create the QA for Retrieval
65+
from langchain.chains import RetrievalQA
66+
from langchain_openai import OpenAI
67+
68+
qa = RetrievalQA.from_chain_type(
69+
llm=OpenAI(), chain_type="stuff", retriever=vector_store.as_retriever()
70+
)
71+
query = "What did the president say about Ketanji Brown Jackson"
72+
resp = qa.invoke(query)
73+
print("resp:", resp)

pyepsilla/cloud/client.py

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,11 @@ def __init__(self, project_id: str, api_key: str, headers: dict = None):
2727
}
2828
if headers is not None:
2929
self._header.update(headers)
30+
self._db_id = None
3031

3132
def validate(self):
3233
resp = requests.get(
33-
url=self._baseurl + "/vectordb/list",
34+
url=f"{self._baseurl}/vectordb/list",
3435
data=None,
3536
headers=self._header,
3637
verify=False,
@@ -42,7 +43,7 @@ def validate(self):
4243

4344
def get_db_list(self):
4445
db_list = []
45-
req_url = "{}/vectordb/list".format(self._baseurl)
46+
req_url = f"{self._baseurl}/vectordb/list"
4647
resp = requests.get(url=req_url, data=None, headers=self._header, verify=False)
4748
status_code = resp.status_code
4849
body = resp.json()
@@ -52,8 +53,22 @@ def get_db_list(self):
5253
del resp
5354
return db_list
5455

56+
def load_db(self, db_name: str, db_path: str):
57+
db_id = db_name.lstrip("db_").replace("_", "-")
58+
req_url = f"{self._baseurl}/vectordb/{db_id}/load"
59+
resp = requests.post(url=req_url, data=None, headers=self._header, verify=False)
60+
status_code = resp.status_code
61+
body = resp.json()
62+
resp.close()
63+
del resp
64+
return status_code, body
65+
66+
def use_db(self, db_name: str):
67+
self._db_id = db_name.lstrip("db_").replace("_", "-")
68+
return 200, {"statusCode": 200, "message": "", "result": {}}
69+
5570
def get_db_info(self, db_id: str):
56-
req_url = "{}/vectordb/{}".format(self._baseurl, db_id)
71+
req_url = f"{self._baseurl}/vectordb/{db_id}"
5772
resp = requests.get(url=req_url, data=None, headers=self._header, verify=False)
5873
status_code = resp.status_code
5974
body = resp.json()
@@ -62,7 +77,7 @@ def get_db_info(self, db_id: str):
6277
return status_code, body
6378

6479
def get_db_statistics(self, db_id: str):
65-
req_url = "{}/vectordb/{}/statistics".format(self._baseurl, db_id)
80+
req_url = f"{self._baseurl}/vectordb/{db_id}/statistics"
6681
req_data = None
6782
resp = requests.get(
6883
url=req_url, data=json.dumps(req_data), headers=self._header, verify=False
@@ -121,7 +136,7 @@ def __init__(
121136
def list_tables(self):
122137
if self._db_id is None:
123138
raise Exception("[ERROR] db_id is None!")
124-
req_url = "{}/table/list".format(self._baseurl)
139+
req_url = f"{self._baseurl}/table/list"
125140
resp = requests.get(url=req_url, headers=self._header, verify=False)
126141
status_code = resp.status_code
127142
body = resp.json()
@@ -140,7 +155,7 @@ def create_table(
140155
raise Exception("[ERROR] db_id is None!")
141156
if table_fields is None:
142157
table_fields = []
143-
req_url = "{}/table/create".format(self._baseurl)
158+
req_url = f"{self._baseurl}/table/create"
144159
req_data = {"name": table_name, "fields": table_fields}
145160
if indices is not None:
146161
req_data["indices"] = indices
@@ -157,7 +172,7 @@ def create_table(
157172
def drop_table(self, table_name: str):
158173
if self._db_id is None:
159174
raise Exception("[ERROR] db_id is None!")
160-
req_url = "{}/table/delete?table_name={}".format(self._baseurl, table_name)
175+
req_url = f"{self._baseurl}/table/delete?table_name={table_name}"
161176
req_data = {}
162177
resp = requests.delete(
163178
url=req_url, data=json.dumps(req_data), headers=self._header, verify=False
@@ -170,7 +185,7 @@ def drop_table(self, table_name: str):
170185

171186
# Insert data into table
172187
def insert(self, table_name: str, records: list[dict]):
173-
req_url = "{}/data/insert".format(self._baseurl)
188+
req_url = f"{self._baseurl}/data/insert"
174189
req_data = {"table": table_name, "data": records}
175190
resp = requests.post(
176191
url=req_url, data=json.dumps(req_data), headers=self._header, verify=False
@@ -182,7 +197,7 @@ def insert(self, table_name: str, records: list[dict]):
182197
return status_code, body
183198

184199
def upsert(self, table_name: str, records: list[dict]):
185-
req_url = "{}/data/insert".format(self._baseurl)
200+
req_url = f"{self._baseurl}/data/insert"
186201
req_data = {"table": table_name, "data": records, "upsert": True}
187202
resp = requests.post(
188203
url=req_url, data=json.dumps(req_data), headers=self._header, verify=False
@@ -207,7 +222,7 @@ def query(
207222
with_distance: Optional[bool] = False,
208223
facets: Optional[list[dict]] = None,
209224
):
210-
req_url = "{}/data/query".format(self._baseurl)
225+
req_url = f"{self._baseurl}/data/query"
211226
req_data = {"table": table_name, "limit": limit}
212227

213228
if response_fields is None:
@@ -272,7 +287,7 @@ def delete(
272287
"[WARN] Both primary_keys and ids are prvoided, will use primary keys by default!"
273288
)
274289

275-
req_url = "{}/data/delete".format(self._baseurl)
290+
req_url = f"{self._baseurl}/data/delete"
276291
req_data = {"table": table_name}
277292
if primary_keys is not None:
278293
req_data["primaryKeys"] = primary_keys
@@ -335,7 +350,7 @@ def get(
335350
else:
336351
req_data["facets"] = facets
337352

338-
req_url = "{}/data/get".format(self._baseurl)
353+
req_url = f"{self._baseurl}/data/get"
339354
resp = requests.post(
340355
url=req_url, data=json.dumps(req_data), headers=self._header, verify=False
341356
)

pyepsilla/vectordb/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.3.8"
1+
__version__ = "0.3.9"

0 commit comments

Comments
 (0)