Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 155 additions & 0 deletions syncmaster/db/migrations/versions/2025-08-10_0012_update_ts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
# SPDX-FileCopyrightText: 2023-2024 MTS PJSC
# SPDX-License-Identifier: Apache-2.0
"""Update text search

Revision ID: 0012
Revises: 0011
Create Date: 2025-08-10 20:03:02.105470

"""

import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "0012"
down_revision = "0011"
branch_labels = None
depends_on = None


def upgrade() -> None:
op.drop_index(op.f("idx_connection_search_vector"), table_name="connection", postgresql_using="gin")
op.drop_column("connection", "search_vector")
op.drop_column("group", "search_vector")
op.drop_index(op.f("idx_transfer_search_vector"), table_name="transfer", postgresql_using="gin")
op.drop_column("transfer", "search_vector")
op.drop_column("queue", "search_vector")

op.add_column(
"connection",
sa.Column(
"search_vector",
postgresql.TSVECTOR(),
sa.Computed(
"\n to_tsvector('russian', coalesce(name, ''))\n || to_tsvector('simple', coalesce(name, '')) \n || to_tsvector('simple', coalesce(data->>'host', ''))\n || to_tsvector(\n 'simple',\n translate(\n coalesce(data->>'host', ''),\n './-_:\\', ' '\n )\n )\n ",
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's use here the same formatting as in models, as \n is hard to read

persisted=True,
),
nullable=False,
),
)
op.create_index(
"idx_connection_search_vector",
"connection",
["search_vector"],
unique=False,
postgresql_using="gin",
)
op.add_column(
"group",
sa.Column(
"search_vector",
postgresql.TSVECTOR(),
sa.Computed(
"\n to_tsvector('russian', coalesce(name, ''))\n || to_tsvector('simple', coalesce(name, '')) \n ",
persisted=True,
),
nullable=False,
),
)
op.add_column(
"transfer",
sa.Column(
"search_vector",
postgresql.TSVECTOR(),
sa.Computed(
"\n to_tsvector('russian', coalesce(name, ''))\n\n || to_tsvector('simple', coalesce(name, ''))\n || to_tsvector('simple', coalesce(source_params->>'table_name', ''))\n || to_tsvector('simple', coalesce(target_params->>'table_name', ''))\n || to_tsvector('simple', coalesce(source_params->>'directory_path', ''))\n || to_tsvector('simple', coalesce(target_params->>'directory_path', ''))\n\n || to_tsvector('simple',\n translate(coalesce(source_params->>'table_name', ''), './-_:\\', ' ')\n )\n || to_tsvector('simple',\n translate(coalesce(target_params->>'table_name', ''), './-_:\\', ' ')\n )\n || to_tsvector('simple',\n translate(coalesce(source_params->>'directory_path', ''), './-_:\\', ' ')\n )\n || to_tsvector('simple',\n translate(coalesce(target_params->>'directory_path', ''), './-_:\\', ' ')\n )\n ",
persisted=True,
),
nullable=False,
),
)
op.create_index("idx_transfer_search_vector", "transfer", ["search_vector"], unique=False, postgresql_using="gin")
op.add_column(
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is index for queue missing?

"queue",
sa.Column(
"search_vector",
postgresql.TSVECTOR(),
sa.Computed(
"\n to_tsvector('russian', coalesce(name, ''))\n || to_tsvector('simple', coalesce(name, ''))\n ",
persisted=True,
),
nullable=False,
),
)


def downgrade() -> None:
op.drop_index("idx_transfer_search_vector", table_name="transfer", postgresql_using="gin")
op.drop_column("transfer", "search_vector")
op.drop_column("group", "search_vector")
op.drop_index("idx_connection_search_vector", table_name="connection", postgresql_using="gin")
op.drop_column("connection", "search_vector")
op.drop_column("queue", "search_vector")

op.add_column(
"transfer",
sa.Column(
"search_vector",
postgresql.TSVECTOR(),
sa.Computed(
"to_tsvector('english'::regconfig, (((((((((((((((((((name)::text || ' '::text) || COALESCE(json_extract_path_text(source_params, VARIADIC ARRAY['table_name'::text]), ''::text)) || ' '::text) || COALESCE(json_extract_path_text(target_params, VARIADIC ARRAY['table_name'::text]), ''::text)) || ' '::text) || COALESCE(json_extract_path_text(source_params, VARIADIC ARRAY['directory_path'::text]), ''::text)) || ' '::text) || COALESCE(json_extract_path_text(target_params, VARIADIC ARRAY['directory_path'::text]), ''::text)) || ' '::text) || translate((name)::text, './'::text, ' '::text)) || ' '::text) || COALESCE(translate(json_extract_path_text(source_params, VARIADIC ARRAY['table_name'::text]), './'::text, ' '::text), ''::text)) || ' '::text) || COALESCE(translate(json_extract_path_text(target_params, VARIADIC ARRAY['table_name'::text]), './'::text, ' '::text), ''::text)) || ' '::text) || COALESCE(translate(json_extract_path_text(source_params, VARIADIC ARRAY['directory_path'::text]), './'::text, ' '::text), ''::text)) || ' '::text) || COALESCE(translate(json_extract_path_text(target_params, VARIADIC ARRAY['directory_path'::text]), './'::text, ' '::text), ''::text)))",
persisted=True,
),
autoincrement=False,
nullable=False,
),
)
op.create_index(
op.f("idx_transfer_search_vector"),
"transfer",
["search_vector"],
unique=False,
postgresql_using="gin",
)
op.add_column(
"group",
sa.Column(
"search_vector",
postgresql.TSVECTOR(),
sa.Computed("to_tsvector('english'::regconfig, (name)::text)", persisted=True),
autoincrement=False,
nullable=False,
),
)
op.add_column(
"connection",
sa.Column(
"search_vector",
postgresql.TSVECTOR(),
sa.Computed(
"to_tsvector('english'::regconfig, (((((name)::text || ' '::text) || COALESCE(json_extract_path_text(data, VARIADIC ARRAY['host'::text]), ''::text)) || ' '::text) || COALESCE(translate(json_extract_path_text(data, VARIADIC ARRAY['host'::text]), '.'::text, ' '::text), ''::text)))",
persisted=True,
),
autoincrement=False,
nullable=False,
),
)
op.create_index(
op.f("idx_connection_search_vector"),
"connection",
["search_vector"],
unique=False,
postgresql_using="gin",
)
op.add_column(
"queue",
sa.Column(
"search_vector",
postgresql.TSVECTOR(),
sa.Computed("to_tsvector('english'::regconfig, (name)::text)", persisted=True),
Comment thread
dolfinus marked this conversation as resolved.
autoincrement=False,
nullable=False,
),
)
14 changes: 9 additions & 5 deletions syncmaster/db/models/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,15 @@ class Connection(Base, ResourceMixin, TimestampMixin):
TSVECTOR,
Computed(
"""
to_tsvector(
'english'::regconfig,
name || ' ' ||
COALESCE(json_extract_path_text(data, 'host'), '') || ' ' ||
COALESCE(translate(json_extract_path_text(data, 'host'), '.', ' '), '')
to_tsvector('russian', coalesce(name, ''))
|| to_tsvector('simple', coalesce(name, ''))
|| to_tsvector('simple', coalesce(data->>'host', ''))
|| to_tsvector(
'simple',
translate(
coalesce(data->>'host', ''),
'./-_:\\', ' '
)
)
""",
persisted=True,
Expand Down
10 changes: 7 additions & 3 deletions syncmaster/db/models/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,17 @@ class Group(Base, TimestampMixin):

owner: Mapped[User] = relationship(User)
queue: Mapped[Queue] = relationship(back_populates="group", cascade="all, delete-orphan")

search_vector: Mapped[str] = mapped_column(
TSVECTOR,
Computed("to_tsvector('english'::regconfig, name)", persisted=True),
Computed(
"""
to_tsvector('russian', coalesce(name, ''))
|| to_tsvector('simple', coalesce(name, ''))
""",
persisted=True,
),
nullable=False,
deferred=True,
doc="Full-text search vector",
)

def __repr__(self) -> str:
Expand Down
9 changes: 7 additions & 2 deletions syncmaster/db/models/queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,15 @@ class Queue(Base, ResourceMixin, TimestampMixin):

search_vector: Mapped[str] = mapped_column(
TSVECTOR,
Computed("to_tsvector('english'::regconfig, name)", persisted=True),
Computed(
"""
to_tsvector('russian', coalesce(name, ''))
|| to_tsvector('simple', coalesce(name, ''))
""",
persisted=True,
),
nullable=False,
deferred=True,
doc="Full-text search vector",
)

def __repr__(self):
Expand Down
31 changes: 19 additions & 12 deletions syncmaster/db/models/transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,18 +65,25 @@ class Transfer(
TSVECTOR,
Computed(
"""
to_tsvector(
'english'::regconfig,
name || ' ' ||
COALESCE(json_extract_path_text(source_params, 'table_name'), '') || ' ' ||
COALESCE(json_extract_path_text(target_params, 'table_name'), '') || ' ' ||
COALESCE(json_extract_path_text(source_params, 'directory_path'), '') || ' ' ||
COALESCE(json_extract_path_text(target_params, 'directory_path'), '') || ' ' ||
translate(name, './', ' ') || ' ' ||
COALESCE(translate(json_extract_path_text(source_params, 'table_name'), './', ' '), '') || ' ' ||
COALESCE(translate(json_extract_path_text(target_params, 'table_name'), './', ' '), '') || ' ' ||
COALESCE(translate(json_extract_path_text(source_params, 'directory_path'), './', ' '), '') || ' ' ||
COALESCE(translate(json_extract_path_text(target_params, 'directory_path'), './', ' '), '')
to_tsvector('russian', coalesce(name, ''))

|| to_tsvector('simple', coalesce(name, ''))
Copy link
Copy Markdown
Member

@dolfinus dolfinus Aug 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add translate(name, punctiation, spaces) to all vectors, in case if someone uses hostname in connection/transfer name

|| to_tsvector('simple', coalesce(source_params->>'table_name', ''))
|| to_tsvector('simple', coalesce(target_params->>'table_name', ''))
|| to_tsvector('simple', coalesce(source_params->>'directory_path', ''))
|| to_tsvector('simple', coalesce(target_params->>'directory_path', ''))

|| to_tsvector('simple',
translate(coalesce(source_params->>'table_name', ''), './-_:\\', ' ')
)
|| to_tsvector('simple',
translate(coalesce(target_params->>'table_name', ''), './-_:\\', ' ')
)
|| to_tsvector('simple',
translate(coalesce(source_params->>'directory_path', ''), './-_:\\', ' ')
)
|| to_tsvector('simple',
translate(coalesce(target_params->>'directory_path', ''), './-_:\\', ' ')
)
""",
persisted=True,
Expand Down
14 changes: 11 additions & 3 deletions syncmaster/db/repositories/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,16 @@
from abc import ABC
from typing import Any, Generic, TypeVar

from sqlalchemy import ScalarResult, Select, delete, func, insert, select, update
from sqlalchemy import (
ColumnElement,
ScalarResult,
Select,
delete,
func,
insert,
select,
update,
)
from sqlalchemy.exc import NoResultFound
from sqlalchemy.ext.asyncio import AsyncSession

Expand Down Expand Up @@ -98,8 +107,7 @@ async def _paginate_scalar_result(self, query: Select, page: int, page_size: int
page_size=page_size,
)

def _construct_vector_search(self, query: Select, search_query: str) -> Select:
ts_query = func.plainto_tsquery("english", search_query)
def _construct_vector_search(self, query: Select, ts_query: ColumnElement) -> Select:
query = (
query.where(self._model.search_vector.op("@@")(ts_query))
.add_columns(func.ts_rank(self._model.search_vector, ts_query).label("rank"))
Expand Down
6 changes: 3 additions & 3 deletions syncmaster/db/repositories/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from syncmaster.db.models import Connection
from syncmaster.db.repositories.repository_with_owner import RepositoryWithOwner
from syncmaster.db.repositories.search import make_tsquery
from syncmaster.db.utils import Pagination
from syncmaster.exceptions import EntityNotFoundError, SyncmasterError
from syncmaster.exceptions.connection import (
Expand Down Expand Up @@ -35,9 +36,8 @@ async def paginate(
Connection.group_id == group_id,
)
if search_query:
processed_query = search_query.replace(".", " ")
combined_query = f"{search_query} {processed_query}"
stmt = self._construct_vector_search(stmt, combined_query)
ts_query = make_tsquery(search_query)
stmt = self._construct_vector_search(stmt, ts_query)

if connection_type is not None:
stmt = stmt.where(Connection.type.in_(connection_type))
Expand Down
10 changes: 7 additions & 3 deletions syncmaster/db/repositories/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from syncmaster.db.models import Group, GroupMemberRole, User, UserGroup
from syncmaster.db.repositories.base import Repository
from syncmaster.db.repositories.search import make_tsquery
from syncmaster.db.utils import Pagination, Permission
from syncmaster.exceptions import EntityNotFoundError, SyncmasterError
from syncmaster.exceptions.group import (
Expand All @@ -33,7 +34,8 @@ async def paginate_all(
) -> Pagination:
stmt = select(Group)
if search_query:
stmt = self._construct_vector_search(stmt, search_query)
ts_query = make_tsquery(search_query)
stmt = self._construct_vector_search(stmt, ts_query)

paginated_result = await self._paginate_scalar_result(
query=stmt.order_by(Group.name),
Expand Down Expand Up @@ -78,7 +80,8 @@ async def paginate_for_user(

# apply search filtering if a search query is provided
if search_query:
owned_groups_stmt = self._construct_vector_search(owned_groups_stmt, search_query)
ts_query = make_tsquery(search_query)
owned_groups_stmt = self._construct_vector_search(owned_groups_stmt, ts_query)

# get total count of owned groups
total_owned_groups = (
Expand Down Expand Up @@ -114,7 +117,8 @@ async def paginate_for_user(

# apply search filtering if a search query is provided
if search_query:
user_groups_stmt = self._construct_vector_search(user_groups_stmt, search_query)
ts_query = make_tsquery(search_query)
user_groups_stmt = self._construct_vector_search(user_groups_stmt, ts_query)

# get total count of user groups
total_user_groups = (
Expand Down
4 changes: 3 additions & 1 deletion syncmaster/db/repositories/queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from syncmaster.db.models import Group, GroupMemberRole, Queue, User, UserGroup
from syncmaster.db.repositories.repository_with_owner import RepositoryWithOwner
from syncmaster.db.repositories.search import make_tsquery
from syncmaster.db.utils import Permission
from syncmaster.exceptions import EntityNotFoundError, SyncmasterError
from syncmaster.exceptions.group import GroupNotFoundError
Expand Down Expand Up @@ -59,7 +60,8 @@ async def paginate(
Queue.group_id == group_id,
)
if search_query:
stmt = self._construct_vector_search(stmt, search_query)
ts_query = make_tsquery(search_query)
stmt = self._construct_vector_search(stmt, ts_query)

return await self._paginate_scalar_result(
query=stmt.order_by(Queue.id),
Expand Down
Loading
Loading