2025-Graduation-Design
diff --git a/‎app/diary/router.py‎
Lines changed: 215 additions & 109 deletions b/‎app/diary/router.py‎
Lines changed: 215 additions & 109 deletions
@@ -7,7 +7,8 @@
 from app.statistics.models import EmotionStatistics
 from app.user.auth import get_current_user
 from app.diary.models import Diary, RecommendedSong
-from app.diary.schemas import DiaryCreateRequest, DiaryUpdateRequest, DiaryResponse, DiaryCountResponse, SongResponse
+from app.diary.schemas import DiaryCreateRequest, DiaryUpdateRequest, DiaryResponse, DiaryCountResponse, SongResponse, \
+    DiaryPreviewResponse
 from app.user.models import User
 from app.embedding.models import kobert, save_diary_embedding, split_sentences, get_user_preferred_genres, \
     get_songs_by_genre, get_song_embeddings, calculate_similarity
@@ -25,112 +26,28 @@
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
-@router.post("", response_model=DiaryResponse, status_code=201, summary="일기 작성 & 노래 추천",
-             description="일기를 작성하면 자동으로 임베딩을 진행하고, 사용자의 선호 장르 내에서 가장 유사한 노래를 추천합니다.")
-async def create_diary(
-        diary_request: DiaryCreateRequest,
-        current_user: User = Depends(get_current_user),
-        db: Session = Depends(get_db),
-        mongodb=Depends(get_mongodb)
-):
+
+def get_recently_recommended_song_ids(session: Session, user_id: int, limit: int = 5) -> List[int]:
     """
-    1. 새로운 일기를 DB에 저장
-    2. Kiwi를 이용해 문장 분리 후 KoBERT로 임베딩
-    3. DiaryEmbedding 테이블에 저장
-    4. 유저의 선호 장르 기반으로 MongoDB에서 노래 리스트 가져오기
-    5. 가사와 일기 텍스트 임베딩 값 비교 후 가장 유사한 노래 추천
+    최근 작성한 일기 중에서 추천된 노래 ID 리스트를 반환 (중복 제거)
     """
+    subquery = (
+        session.query(Diary.id)
+        .filter(Diary.user_id == user_id)
+        .order_by(Diary.created_at.desc())
+        .limit(limit)
+        .subquery()
+    )
 
-    with transactional_session(db) as session:
-        sentences = split_sentences(diary_request.content)
-        logger.info(f"[일기 문장 분리] - 원본: {diary_request.content}")
-        for idx, sentence in enumerate(sentences):
-            logger.info(f"    ▶ 문장 {idx + 1}: {sentence}")
-
-        embeddings = [kobert.get_embedding(sentence) for sentence in sentences if sentence.strip()]
-        if not embeddings:
-            logger.warning("KoBERT 임베딩 결과가 없음")
-            return {"message": "임베딩할 문장이 없습니다."}
-
-        logger.info(f"[KoBERT 임베딩 완료] - {len(embeddings)}개 문장 처리 완료")
-
-        # 2) 유저 선호 장르 가져오기
-        user_id = current_user.id
-        genre_names = get_user_preferred_genres(session, user_id)
-        if not genre_names:
-            logger.warning(f"유저 {user_id}의 선호 장르가 설정되지 않음")
-            return {"message": "유저의 선호 장르가 설정되지 않았습니다."}
-
-        logger.info(f"🎵 [유저 선호 장르] - {genre_names}")
-
-        # 3) MongoDB에서 해당 장르의 노래 가져오기
-        songs = await get_songs_by_genre(mongodb, genre_names)
-        if not songs:
-            logger.warning("해당 장르에 노래가 없음")
-            return {"message": "해당 장르에 노래가 없습니다."}
-
-        song_ids = [song["id"] for song in songs]
-        logger.info(f"🎼 [가져온 노래 개수] - {len(songs)}")
-
-        # 4) 노래 가사 임베딩 불러오기 및 유사도 계산
-        song_embeddings = get_song_embeddings(session, song_ids)
-        best_match = calculate_similarity(embeddings[0], song_embeddings)  # 첫 번째 문장만 비교
-
-        if not best_match:
-            logger.warning("유사한 가사를 찾을 수 없음")
-            return {"message": "유사한 가사를 찾을 수 없습니다."}
-
-        song_id, best_idx, similarity_score = best_match
-        matching_song = next((song for song in songs if song["id"] == str(song_id)), None)
-
-        if matching_song is None:
-            logger.error(f"추천된 song_id {song_id}가 MongoDB에서 찾을 수 없음")
-            return {"message": "추천된 노래를 찾을 수 없습니다."}
-
-        # best_idx가 가사 범위를 벗어나지 않는지 확인
-        if best_idx >= len(matching_song["lyrics"]):
-            logger.error(f"best_idx {best_idx}가 가사 범위를 초과함 (가사 개수: {len(matching_song['lyrics'])})")
-            return {"message": "유사한 가사를 찾을 수 없습니다."}
-
-        start = max(0, best_idx - 1)
-        end = min(len(matching_song["lyrics"]), best_idx + 2)
-
-        context_lyrics = matching_song["lyrics"][start:end]
-        best_lyric = " ".join(context_lyrics)
-
-        # 5) 모든 과정 완료 후 일기 저장 (트랜잭션 보장)
-        new_diary = Diary(
-            user_id=current_user.id,
-            content=diary_request.content
-        )
-        session.add(new_diary)
-        session.commit()
-        session.refresh(new_diary)
-
-        logger.info(f"[📖 일기 저장 완료] - {new_diary.content}")
-
-        save_diary_embedding(session, new_diary.id, embeddings)
-
-        response_data = {
-            "id": new_diary.id,
-            "user_id": new_diary.user_id,
-            "content": new_diary.content,
-            "created_at": new_diary.created_at,
-            "updated_at": new_diary.updated_at,
-            "recommended_song": {
-                "song_id": song_id,
-                "song_name": matching_song.get("song_name", "제목 없음"),
-                "best_lyric": best_lyric,
-                "similarity_score": round(float(similarity_score), 4),
-                "album_image": matching_song.get("album_image", "이미지 없음"),
-                "artist": matching_song.get("artist_name_basket", ["아티스트 없음"]),
-                "genre": matching_song.get("genre", "장르 없음")
-            }
-        }
-
-        logger.info(f" [응답 데이터] - {json.dumps(response_data, ensure_ascii=False, indent=4, default=str)}")
+    song_ids = (
+        session.query(RecommendedSong.song_id)
+        .filter(RecommendedSong.diary_id.in_(subquery))
+        .distinct()
+        .all()
+    )
 
-        return response_data
+    # 결과는 [(song_id1,), (song_id2,), ...] 형태이므로 flatten
+    return [sid[0] for sid in song_ids]
 
 @router.post("/main", response_model=DiaryResponse, status_code=201,
              summary="일기 작성 & Top-3 유사 가사 기반 노래 추천",
@@ -191,9 +108,19 @@ async def create_diary_with_music_recommend_top3(
 
         logger.info(f"    ▶ 최종 전체 감정 ID: {emotion_id_full}, 확신도 총합: {confidence_full:.4f}")
 
-        # 4) 가장 감정이 강한 문장 선택
-        best_sentence, best_emotion_id, best_confidence = max(sentence_confidences, key=lambda x: x[2])
-        logger.info(f"[감정이 가장 강한 문장 선택] {best_sentence} (감정 ID={best_emotion_id}, 확신도={best_confidence:.4f})")
+        # 4) Top-1 감정과 일치하는 문장 중 가장 확신도 높은 문장 선택
+        top1_emotion_id = emotion_id_full  # 모델 기준 감정 ID
+        filtered_sentences = [
+            (sentence, emo_id, conf)
+            for sentence, emo_id, conf in sentence_confidences
+            if emo_id == top1_emotion_id
+        ]
+
+        if not filtered_sentences:
+            raise HTTPException(status_code=500, detail="Top 감정에 해당하는 문장이 없습니다.")
+
+        best_sentence, best_emotion_id, best_confidence = max(filtered_sentences, key=lambda x: x[2])
+        logger.info(f"[Top 감정에서 가장 강한 문장 선택] {best_sentence} (감정 ID={best_emotion_id}, 확신도={best_confidence:.4f})")
 
         # 5) best_sentence를 KoBERT 임베딩
         combined_embedding = kobert.get_embedding(best_sentence)
@@ -276,12 +203,23 @@ async def create_diary_with_music_recommend_top3(
         # 이후 raw_top, top_3, recommended_songs 생성은 기존 코드 그대로 유지
         raw_top = heapq.nlargest(10, heap, key=lambda x: (x[0], x[1]))
 
+        recent_song_ids = get_recently_recommended_song_ids(user_id=current_user.id, limit=5)
+
         seen_song_ids = set()
         top_3 = []
         for sim, _, match in raw_top:
-            if match["song_id"] not in seen_song_ids:
-                top_3.append((sim, match))
-                seen_song_ids.add(match["song_id"])
+            song_id = match["song_id"]
+
+            if song_id in seen_song_ids:
+                continue
+
+            if song_id in recent_song_ids:
+                logger.info(f"최근 추천된 곡 {song_id} 제외")
+                continue
+
+            top_3.append((sim, match))
+            seen_song_ids.add(song_id)
+
             if len(top_3) >= 3:
                 break
 
@@ -370,6 +308,174 @@ async def create_diary_with_music_recommend_top3(
         logger.info("추천 결과: %s", json.dumps(response_data, indent=2, ensure_ascii=False, default=str))
         return response_data
 
+@router.post("/preview", summary="일기 감정 분석 + 추천 미리보기", response_model=DiaryPreviewResponse)
+async def preview_diary_with_music_recommend_top3(
+    diary_request: DiaryCreateRequest,
+    current_user: User = Depends(get_current_user),
+    db: Session = Depends(get_db),
+    mongodb = Depends(get_mongodb),
+    redis = Depends(get_redis)
+):
+    sentences = split_sentences(diary_request.content)
+    if not sentences:
+        raise HTTPException(status_code=400, detail="분석할 문장이 없습니다.")
+
+    sentence_emotions = []
+    sentence_confidences = []
+    emotion_vote_counter = {}
+
+    for sentence in sentences:
+        emotion_id, probabilities = predict_emotion(sentence)
+        confidence = max(probabilities)
+
+        topk = torch.topk(torch.tensor(probabilities), k=3)
+        top3 = [
+            {"emotion_id": topk.indices[i].item(), "score": round(topk.values[i].item(), 4)}
+            for i in range(3) if topk.values[i].item() >= 0.01
+        ]
+
+        sentence_confidences.append((sentence, emotion_id, confidence))
+        sentence_emotions.append({
+            "sentence": sentence,
+            "predicted_emotion_id": emotion_id,
+            "confidence": round(confidence, 4),
+            "top3": top3
+        })
+
+        for i in range(3):
+            emo_id = topk.indices[i].item()
+            score = topk.values[i].item()
+            if score < 0.05:
+                continue
+            emotion_vote_counter[emo_id] = emotion_vote_counter.get(emo_id, 0) + score
+
+    if not emotion_vote_counter:
+        raise HTTPException(status_code=500, detail="감정 분석 실패")
+
+    top1_emotion_id = max(emotion_vote_counter.items(), key=lambda x: x[1])[0]
+    confidence_full = emotion_vote_counter[top1_emotion_id]
+    emotion_id_db = model_index_to_db_emotion_id[top1_emotion_id]
+
+    # Top 감정 기준 가장 강한 문장
+    filtered_sentences = [
+        (s, eid, c) for (s, eid, c) in sentence_confidences if eid == top1_emotion_id
+    ]
+    if not filtered_sentences:
+        raise HTTPException(status_code=500, detail="Top 감정 문장 없음")
+    best_sentence, best_emotion_id, best_confidence = max(filtered_sentences, key=lambda x: x[2])
+
+    combined_embedding = kobert.get_embedding(best_sentence)
+
+    genre_names = get_user_preferred_genres(db, current_user.id)
+    if not genre_names:
+        raise HTTPException(status_code=400, detail="선호 장르가 설정되지 않았습니다.")
+
+    songs = await get_songs_by_genre(mongodb, genre_names)
+    if not songs:
+        raise HTTPException(status_code=404, detail="해당 장르에 노래가 없습니다.")
+
+    heap = []
+    counter = 0
+    song_id_map = {int(song["id"]): song for song in songs}
+    song_ids = list(song_id_map.keys())
+    cache_keys = [f"lyrics_emb:{song_id}" for song_id in song_ids]
+    cached_values = await redis.mget(cache_keys)
+
+    combined_np = np.array(combined_embedding)
+    for song_id, cached in zip(song_ids, cached_values):
+        try:
+            if cached:
+                lyrics_embedding = np.array(json.loads(cached))
+            else:
+                result = db.execute(
+                    text("SELECT embedding FROM songLyricsEmbedding WHERE song_id = :song_id"),
+                    {"song_id": song_id}
+                ).fetchone()
+                if not result:
+                    continue
+                lyrics_embedding = np.array(json.loads(result[0]))
+                await redis.set(f"lyrics_emb:{song_id}", json.dumps(lyrics_embedding.tolist()), ex=60*60*24*30)
+
+            if len(lyrics_embedding.shape) != 2:
+                continue
+
+            song = song_id_map[song_id]
+            lyrics = song.get("lyrics", [])
+            if len(lyrics) < 1 or len(lyrics_embedding) != len(lyrics):
+                continue
+
+            dot = np.dot(lyrics_embedding, combined_np)
+            norm_block = np.linalg.norm(lyrics_embedding, axis=1)
+            norm_query = np.linalg.norm(combined_np)
+            similarities = dot / (norm_block * norm_query + 1e-8)
+
+            for idx, similarity in enumerate(similarities):
+                heapq.heappush(heap, (
+                    similarity,
+                    counter,
+                    {
+                        "song_id": song_id,
+                        "lyric_chunk": [lyrics[idx]],
+                        "similarity": similarity,
+                        "metadata": {
+                            "song_name": song.get("song_name"),
+                            "album_image": song.get("album_image"),
+                            "artist": song.get("artist_name_basket", []),
+                            "genre": song.get("genre")
+                        }
+                    }
+                ))
+                counter += 1
+        except Exception as e:
+            logger.error(f"[preview] 노래 유사도 처리 오류: {e}")
+            continue
+
+    raw_top = heapq.nlargest(10, heap, key=lambda x: (x[0], x[1]))
+    seen_song_ids = set()
+    top_3 = []
+    for sim, _, match in raw_top:
+        if match["song_id"] not in seen_song_ids:
+            top_3.append((sim, match))
+            seen_song_ids.add(match["song_id"])
+        if len(top_3) >= 3:
+            break
+
+    if not top_3:
+        raise HTTPException(status_code=404, detail="적합한 노래를 찾을 수 없습니다.")
+
+    recommended_songs = [
+        {
+            "song_id": match["song_id"],
+            "song_name": match["metadata"]["song_name"],
+            "best_lyric": " ".join(match["lyric_chunk"]),
+            "similarity_score": round(float(sim), 4),
+            "album_image": match["metadata"]["album_image"],
+            "artist": match["metadata"]["artist"],
+            "genre": match["metadata"]["genre"]
+        }
+        for sim, match in top_3
+    ]
+
+    return {
+        "id": -1,
+        "user_id": current_user.id,
+        "content": diary_request.content,
+        "emotiontype_id": emotion_id_db,
+        "confidence": confidence_full,
+        "created_at": datetime.utcnow(),
+        "updated_at": datetime.utcnow(),
+        "recommended_songs": recommended_songs,
+        "top_emotions": [
+            {"emotion_id": emo_id, "score": round(score, 4)}
+            for emo_id, score in sorted(emotion_vote_counter.items(), key=lambda x: -x[1])[:3]
+        ],
+        "best_sentence": {
+            "sentence": best_sentence,
+            "predicted_emotion_id": best_emotion_id,
+            "confidence": round(best_confidence, 4)
+        },
+        "sentence_emotions": sentence_emotions
+    }
 
 @router.get("/{diary_id}", response_model=DiaryResponse,
             summary="일기 조회",