-
Notifications
You must be signed in to change notification settings - Fork 961
Expand file tree
/
Copy pathpostgres_models.py
More file actions
68 lines (54 loc) · 2.42 KB
/
postgres_models.py
File metadata and controls
68 lines (54 loc) · 2.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from __future__ import annotations
from pgvector.sqlalchemy import Vector
from sqlalchemy import Index
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
# Define the models
class Base(DeclarativeBase):
pass
class Item(Base):
__tablename__ = "items"
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
type: Mapped[str] = mapped_column()
brand: Mapped[str] = mapped_column()
name: Mapped[str] = mapped_column()
description: Mapped[str] = mapped_column()
price: Mapped[float] = mapped_column()
# Embeddings for different models:
embedding_ada002: Mapped[Vector] = mapped_column(Vector(1536), nullable=True) # ada-002
embedding_nomic: Mapped[Vector] = mapped_column(Vector(768), nullable=True) # nomic-embed-text
def to_dict(self, include_embedding: bool = False):
model_dict = {column.name: getattr(self, column.name) for column in self.__table__.columns}
if include_embedding:
model_dict["embedding_ada002"] = model_dict.get("embedding_ada002", [])
model_dict["embedding_nomic"] = model_dict.get("embedding_nomic", [])
else:
del model_dict["embedding_ada002"]
del model_dict["embedding_nomic"]
return model_dict
def to_str_for_rag(self):
return f"Name:{self.name} Description:{self.description} Price:{self.price} Brand:{self.brand} Type:{self.type}"
def to_str_for_embedding(self):
return f"Name: {self.name} Description: {self.description} Type: {self.type}"
"""
**Define HNSW index to support vector similarity search**
We use the vector_cosine_ops access method (cosine distance)
since it works for both normalized and non-normalized vector embeddings
If you know your embeddings are normalized,
you can switch to inner product for potentially better performance.
The index operator should match the operator used in queries.
"""
table_name = Item.__tablename__
index_ada002 = Index(
"hnsw_index_for_cosine_{table_name}_embedding_ada002",
Item.embedding_ada002,
postgresql_using="hnsw",
postgresql_with={"m": 16, "ef_construction": 64},
postgresql_ops={"embedding_ada002": "vector_cosine_ops"},
)
index_nomic = Index(
f"hnsw_index_for_cosine_{table_name}_embedding_nomic",
Item.embedding_nomic,
postgresql_using="hnsw",
postgresql_with={"m": 16, "ef_construction": 64},
postgresql_ops={"embedding_nomic": "vector_cosine_ops"},
)