diff --git a/app/api/v1/endpoints/video_catalog.py b/app/api/v1/endpoints/video_catalog.py index c620681..e79ff7c 100644 --- a/app/api/v1/endpoints/video_catalog.py +++ b/app/api/v1/endpoints/video_catalog.py @@ -363,9 +363,6 @@ async def get_related_videos_for_video( ] = 5, ): """Return a list of videos related to the given video. - - The underlying implementation is currently stubbed out and will return the - latest videos (excluding the source video) with a random relevance score. """ related_items = await recommendation_service.get_related_videos( diff --git a/app/services/recommendation_service.py b/app/services/recommendation_service.py index 2e01aed..fca130d 100644 --- a/app/services/recommendation_service.py +++ b/app/services/recommendation_service.py @@ -16,13 +16,6 @@ async def get_related_videos( video_id: VideoID, limit: int = 10 ) -> List[RecommendationItem]: - """Return a stubbed *related videos* list. - - In a future iteration this will call into a real recommendation engine that - analyses the content of the referenced video to find similar items. For the - moment we simply return the latest videos (excluding the reference video) - and assign each a random relevance score. - """ from opentelemetry import trace import time @@ -42,18 +35,28 @@ async def get_related_videos( if target_video is None: return [] - latest_summaries, _total = await video_service.list_latest_videos( - page=1, page_size=limit + 5 + latest_summaries, _total = await video_service.get_recommended_videos( + query_vector=target_video.content_features, page=1, page_size=limit + 5 ) related_items: List[RecommendationItem] = [] + unique_video_names: List[str] = [] for summary in latest_summaries: if summary.videoId == video_id: # Skip the source video itself continue + + if summary.title in unique_video_names: + # Skip if we've already added this video to the list + # ...sometimes we get duplicate rows + continue + if len(related_items) >= limit: break + + unique_video_names.append(summary.title) + related_items.append( RecommendationItem( videoId=summary.videoId, diff --git a/app/services/video_service.py b/app/services/video_service.py index d0d6f51..937425d 100644 --- a/app/services/video_service.py +++ b/app/services/video_service.py @@ -7,6 +7,7 @@ from __future__ import annotations +from bdb import effective import re from datetime import datetime, timezone, timedelta import asyncio @@ -556,6 +557,25 @@ async def list_latest_videos( source_table_name=VIDEOS_TABLE_NAME, ) +async def get_recommended_videos( + query_vector: Optional[List[float]], + page: int, + page_size: int, + db_table: Optional[AstraDBCollection] = None, +) -> Tuple[List[VideoSummary], int]: + + if query_vector is None: + return [], 0 + + return await list_videos_with_query( + {}, + page, + page_size=page_size, + sort_options={"content_features": query_vector}, + db_table=db_table, + source_table_name=VIDEOS_TABLE_NAME + ) + async def list_videos_by_tag( tag: str, @@ -563,6 +583,7 @@ async def list_videos_by_tag( page_size: int, db_table: Optional[AstraDBCollection] = None, ) -> Tuple[List[VideoSummary], int]: + query_filter = { "tags": {"$in": [tag]}, }