๋ฉ๋ชจ๋ฆฌ ๊ธฐ๋ฐ์ collaborative filtering. ํ์ฉํธ๋์ ์ด ์ฌ๋ผ์ด๋๋ฅผ ์ฐธ๊ณ ํ์ฌ ๊ตฌํํ์์ต๋๋ค. ๋ณธ ๊ตฌํ์ ํ์ด์ฌ์ผ๋ก ๊ตฌํ๋์ด ์์ต๋๋ค.
- Redis
- python3
- snappy
brew install snappyapt install libsnappy-devpip install tiny-elephantํ๋ก์ ํธ๋ฅผ ํด๋ก ํ์๊ณ ๋ค์ ์ฝ๋๋ฅผ ์คํํด๋ณด์ธ์.
from collections import Counter
from in_memory_cluster import InMemoryCluster
# ๋ฐ์ดํฐ๋ ๋ค์๊ณผ ๊ฐ์ด ๋์
๋๋ฆฌ๋ก ๊ตฌ์ฑํด์ค๋๋ค.
data = {
"user1": ['airplane', 'banana', 'cat', 'dog', 'elephant', 'fruit', 'google', 'hobby', 'internet', 'jogging'],
"user2": ['cat', 'dog', 'elephant', 'fruit', 'google', 'jogging', 'kotlin'],
"user3": ['java', 'rx', 'yahoo', 'zoo'],
"user4": ['apple', 'banana'],
"user5": ['airplane'],
"user6": ['bobby', 'dog'],
"user7": ['train', 'cat', 'exercise', 'healthy'],
"user8": ['healthy', 'dog', 'exercise', 'banana', 'youtube'],
"user9": ['java', 'javascript', 'rx', 'zoo', 'yahoo', 'google', 'github'],
"user10": ['cook', 'bobby', 'dog', 'youtube'],
"user11": ['dance', 'airplane', 'trip', 'elephant', 'fruit', 'google']
}
# ์ธ์คํด์ค ์ด๊ธฐํ
imc = InMemoryCluster(
minhash_host='localhost:6379',
secondary_index_host='localhost:6379',
minhash_db=1,
secondary_index_db=2,
seed=1
)
# MinHash DB์ Secondary Index DB๋ฅผ ๋ ๋ฆฝ๋๋ค.
# ์์ ์์๋ ๊ฐ๊ฐ 1๋ฒ DB์ 2๋ฒ DB๋ฅผ ์ฌ์ฉํฉ๋๋ค.
imc.flush_all()
# init_cluster ๋ฉ์๋๋ ๋ค์๊ณผ ๊ฐ์ด ๋์ํฉ๋๋ค.
# 1. ๋ฐ์ดํฐ๋ฅผ ๋๊ฒจ ๋ฐ์ Minhash ๋ฅผ ์์ฑํ๊ณ ์ ์ฅํฉ๋๋ค. (db 1).
# 2. MinHash๋ฅผ ๋ถ๋ฌ์ Secondary Index๋ฅผ ์์ฑํฉ๋๋ค.
# 3. ๊ฐ๊ฐ์ Secondary Index์ ํค๋ฅผ ๋ฃ์ด์ค๋๋ค.(db 2)
imc.init_cluster(data)
users = data.keys()
for user in users:
# ๋น์ทํ ์ ์ 10๋ช
์ ๋ฝ์ต๋๋ค.
print(user, imc.most_common(user, count=10))
# ์ค๋ณต์ ๋ํ ๊ฑฑ์ ์ ํ์ค ํ์๊ฐ ์์ด์.
update_data = {'user1': ['airplane', 'banana', 'cat'], 'user5': ['hobby', 'internet', 'jogging', 'banana', 'cat', 'dog']}
# init_cluster ์ดํ์๋ update_cluster๋ฅผ ์ฌ์ฉํด ์ฃผ์
์ผ ํฉ๋๋ค.
# init_cluster ๋ update_cluster ๋ณด๋ค ๋น ๋ฆ
๋๋ค.
# ๊ทธ๋ฌ๋ ์ฒ์ ๋ฐ์ดํฐ๋ฅผ ์ง์ด๋ฃ์ผ์ค ๋๋ init_cluster๋ฅผ ์ฌ์ฉํด์ฃผ์ธ์.
imc.update_cluster(update_data)
print('======== UPDATED!! =========')
# user5๋ ์ ๋ณด๋ค ๋ ๋น์ทํด ์ก๋ค๋ ๊ฑธ ํ์ธํ์ค ์ ์์ด์.
for user in users:
print(user, imc.most_common(user, count=10))๊ทธ๋ฆฌ๊ณ ๊ฒฐ๊ณผ๋ ๋ค์๊ณผ ๊ฐ์ต๋๋ค.
user1 [(b'user2', 60), (b'user11', 45), (b'user5', 16), (b'user8', 13), (b'user7', 12), (b'user4', 11), (b'user6', 8), (b'user10', 5), (b'user9', 5)]
user2 [(b'user1', 60), (b'user11', 36), (b'user6', 14), (b'user10', 12), (b'user7', 12), (b'user9', 9), (b'user8', 9)]
user3 [(b'user9', 75)]
user4 [(b'user8', 23), (b'user1', 11)]
user5 [(b'user11', 25), (b'user1', 16)]
user6 [(b'user10', 67), (b'user8', 17), (b'user2', 14), (b'user1', 8)]
user7 [(b'user8', 42), (b'user1', 12), (b'user2', 12)]
user8 [(b'user7', 42), (b'user10', 37), (b'user4', 23), (b'user6', 17), (b'user1', 13), (b'user2', 9)]
user9 [(b'user3', 75), (b'user2', 9), (b'user11', 7), (b'user1', 5)]
user10 [(b'user6', 67), (b'user8', 37), (b'user2', 12), (b'user1', 5)]
user11 [(b'user1', 45), (b'user2', 36), (b'user5', 25), (b'user9', 7)]
======== UPDATED!! =========
user1 [(b'user5', 93), (b'user2', 60), (b'user11', 45), (b'user8', 13), (b'user7', 12), (b'user4', 11), (b'user6', 8), (b'user10', 5), (b'user9', 5)]
user2 [(b'user1', 60), (b'user11', 36), (b'user5', 28), (b'user6', 14), (b'user10', 12), (b'user7', 12), (b'user9', 9), (b'user8', 9)]
user3 [(b'user9', 75)]
user4 [(b'user8', 23), (b'user5', 15), (b'user1', 11)]
user5 [(b'user1', 93), (b'user2', 28), (b'user8', 20), (b'user6', 15), (b'user7', 15), (b'user4', 15), (b'user11', 13), (b'user10', 10)]
user6 [(b'user10', 67), (b'user8', 17), (b'user5', 15), (b'user2', 14), (b'user1', 8)]
user7 [(b'user8', 42), (b'user5', 15), (b'user1', 12), (b'user2', 12)]
user8 [(b'user7', 42), (b'user10', 37), (b'user4', 23), (b'user5', 20), (b'user6', 17), (b'user1', 13), (b'user2', 9)]
user9 [(b'user3', 75), (b'user2', 9), (b'user11', 7), (b'user1', 5)]
user10 [(b'user6', 67), (b'user8', 37), (b'user2', 12), (b'user5', 10), (b'user1', 5)]
user11 [(b'user1', 45), (b'user2', 36), (b'user5', 13), (b'user9', 7)]
- ์ฑ๋ฅํฅ์
- ๋ค๋ฅธ์ธ์ด๋ก ๊ตฌํํ๊ธฐ