-
Notifications
You must be signed in to change notification settings - Fork 1k
. #48
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
. #48
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,11 +30,11 @@ | |
| from ..utils import preprocess_nxgraph | ||
|
|
||
|
|
||
| def line_loss(y_true, y_pred): | ||
| def line_loss(y_true, y_pred): ##no problem | ||
| return -K.mean(K.log(K.sigmoid(y_true*y_pred))) | ||
|
|
||
|
|
||
| def create_model(numNodes, embedding_size, order='second'): | ||
| def create_model(numNodes, embedding_size, order='second'): ##no problem | ||
|
|
||
| v_i = Input(shape=(1,)) | ||
| v_j = Input(shape=(1,)) | ||
|
|
@@ -48,12 +48,18 @@ def create_model(numNodes, embedding_size, order='second'): | |
|
|
||
| v_i_emb_second = second_emb(v_i) | ||
| v_j_context_emb = context_emb(v_j) | ||
|
|
||
| first = Lambda(lambda x: tf.reduce_sum( | ||
| x[0]*x[1], axis=-1, keep_dims=False), name='first_order')([v_i_emb, v_j_emb]) | ||
| second = Lambda(lambda x: tf.reduce_sum( | ||
| x[0]*x[1], axis=-1, keep_dims=False), name='second_order')([v_i_emb_second, v_j_context_emb]) | ||
|
|
||
| try: | ||
| first = Lambda(lambda x: tf.reduce_sum( | ||
| x[0]*x[1], axis=-1, keepdims=False), name='first_order')([v_i_emb, v_j_emb]) | ||
| except(TypeError): | ||
| first = Lambda(lambda x: tf.reduce_sum( | ||
| x[0]*x[1], axis=-1, keep_dims=False), name='first_order')([v_i_emb, v_j_emb]) | ||
| try: | ||
| second = Lambda(lambda x: tf.reduce_sum( | ||
| x[0]*x[1], axis=-1, keepdims=False), name='second_order')([v_i_emb_second, v_j_context_emb]) | ||
| except(TypeError): | ||
| second = Lambda(lambda x: tf.reduce_sum( | ||
| x[0]*x[1], axis=-1, keep_dims=False), name='second_order')([v_i_emb_second, v_j_context_emb]) | ||
| if order == 'first': | ||
| output_list = [first] | ||
| elif order == 'second': | ||
|
|
@@ -205,9 +211,13 @@ def get_embeddings(self,): | |
|
|
||
| return self._embeddings | ||
|
|
||
| def train(self, batch_size=1024, epochs=1, initial_epoch=0, verbose=1, times=1): | ||
| def train(self, batch_size=1024, epochs=1, initial_epoch=0, verbose=1, times=1,workers=tf.data.experimental.AUTOTUNE,use_multiprocessing=True): | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这里的修改是为什么呀
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tf.data.experimental.AUTOTUNE可以让程序自动的选择最优的线程并行个数
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 当然用户也可以自己选择workers的数量,这里就是做为默认的设定 |
||
| self.reset_training_config(batch_size, times) | ||
| hist = self.model.fit_generator(self.batch_it, epochs=epochs, initial_epoch=initial_epoch, steps_per_epoch=self.steps_per_epoch, | ||
| verbose=verbose) | ||
| try: | ||
| hist = self.model.fit_generator(self.batch_it, epochs=epochs, initial_epoch=initial_epoch, steps_per_epoch=self.steps_per_epoch, | ||
| verbose=verbose,workers=workers,use_multiprocessing=use_multiprocessing) | ||
| except: | ||
| hist = self.model.fit(self.batch_it, epochs=epochs, initial_epoch=initial_epoch, steps_per_epoch=self.steps_per_epoch, | ||
| verbose=verbose,workers=workers,use_multiprocessing=use_multiprocessing) | ||
|
|
||
| return hist | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,70 +1,82 @@ | ||
| # -*- coding:utf-8 -*- | ||
|
|
||
| """ | ||
|
|
||
|
|
||
|
|
||
| Author: | ||
|
|
||
| Weichen Shen,wcshen1994@163.com | ||
|
|
||
|
|
||
|
|
||
| Reference: | ||
|
|
||
| [1] Grover A, Leskovec J. node2vec: Scalable feature learning for networks[C]//Proceedings of the 22nd ACM SIGKDD international conference on Knowledge discovery and data mining. ACM, 2016: 855-864.(https://www.kdd.org/kdd2016/papers/files/rfp0218-groverA.pdf) | ||
|
|
||
|
|
||
|
|
||
| """ | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这一大块为啥删除了?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 修改的时候直接复制进来,给替换掉了。。。 |
||
|
|
||
| from gensim.models import Word2Vec | ||
| import pandas as pd | ||
| import networkx as nx | ||
| import csrgraph as cg | ||
|
|
||
| from ..walker import RandomWalker | ||
|
|
||
|
|
||
| class Node2Vec: | ||
| import gc | ||
| import numba | ||
| import time | ||
| import numpy as np | ||
| import pandas as pd | ||
| from gensim.models import word2vec | ||
|
|
||
| def __init__(self, graph, walk_length, num_walks, p=1.0, q=1.0, workers=1, use_rejection_sampling=0): | ||
|
|
||
| self.graph = graph | ||
| self._embeddings = {} | ||
| self.walker = RandomWalker( | ||
| graph, p=p, q=q, use_rejection_sampling=use_rejection_sampling) | ||
|
|
||
| print("Preprocess transition probs...") | ||
| self.walker.preprocess_transition_probs() | ||
|
|
||
| self.sentences = self.walker.simulate_walks( | ||
| num_walks=num_walks, walk_length=walk_length, workers=workers, verbose=1) | ||
| class Node2Vec: | ||
|
|
||
| def train(self, embed_size=128, window_size=5, workers=3, iter=5, **kwargs): | ||
| def __init__(self, graph, walk_length, num_walks, p=1.0, q=1.0,threads=1): | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 新的函数参数比旧的少了。。
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. def init(self, graph, walk_length, num_walks, p=1.0, q=1.0, workers=1, use_rejection_sampling=0):部分的参数移动到train的部分了,use_rejection_sampling 这个木有实现
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use_rejection_sampling 如果需要增加这个的numba实现我可以写一下
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 那相当于把原来有的功能给删除了,这里还是要保持一致的 |
||
|
|
||
| if type(threads) is not int: | ||
| raise ValueError('Threads must be int!') | ||
| if walk_length<1: | ||
| raise ValueError('Walk lengh must be >1') | ||
| if num_walks<1: | ||
| raise ValueError('num_walks must be >1') | ||
| if type(walk_length) is not int or type(num_walks) is not int: | ||
| raise ValueError('Walk length or num_walks must be int') | ||
|
|
||
| self.walk_length=walk_length | ||
| self.num_walks=num_walks | ||
| self.p=p | ||
| self.q=q | ||
| self.threads=threads | ||
| # todo numba-based use_rejection_samplling | ||
|
|
||
| if not isinstance(graph, cg.csrgraph): | ||
| self.graph = cg.csrgraph(graph, threads=self.threads) | ||
| if self.graph.threads != self.threads: | ||
| self.graph.set_threads(self.threads) | ||
| self.node_names = self.graph.names | ||
| if type(self.node_names[0]) not in [int, str, np.int32, np.uint32, | ||
| np.int64, np.uint64]: | ||
| raise ValueError("Graph node names must be int or str!") | ||
|
|
||
|
|
||
|
|
||
| def train(self, embed_size=128, window_size=5, workers=3, iters=5 **kwargs): | ||
| print('Start making random walks...') | ||
| start=time.time() | ||
| self.sentences=self.graph.random_walks(walklen=self.walk_length,epochs=self.num_walks, \ | ||
| return_weight=self.p,neighbor_weight=self.q).astype(str).tolist() # It seems gensim word2vec only accept list and string types data | ||
| end=time.time() | ||
| print('Random walks uses '+str(end-start)+' seconds') | ||
|
|
||
|
|
||
|
|
||
| kwargs["sentences"] = self.sentences | ||
| kwargs["min_count"] = kwargs.get("min_count", 0) | ||
| kwargs["size"] = embed_size | ||
| kwargs["sg"] = 1 | ||
| kwargs["hs"] = 0 # node2vec not use Hierarchical Softmax | ||
| kwargs["hs"] = 0 # node2vec don't need to use Hierarchical Softmax | ||
| kwargs["workers"] = workers | ||
| kwargs["window"] = window_size | ||
| kwargs["iter"] = iter | ||
|
|
||
| kwargs["iter"] = iters | ||
| print("Learning embedding vectors...") | ||
| model = Word2Vec(**kwargs) | ||
| model = word2vec.Word2Vec(sentences=self.sentences,**kwargs) ##Avoid to copy self.sentences in order to save the memory | ||
| print("Learning embedding vectors done!") | ||
|
|
||
| self.w2v_model = model | ||
| self.node_dict = dict(zip(np.arange(len(self.node_names)).astype(str),self.node_names)) # map the node_names to the original node names | ||
|
|
||
| return model | ||
|
|
||
| def get_embeddings(self,): | ||
| if self.w2v_model is None: | ||
| print("model not train") | ||
| return {} | ||
|
|
||
| self._embeddings = {} | ||
| for word in self.graph.nodes(): | ||
| self._embeddings[word] = self.w2v_model.wv[word] | ||
| for word in self.node_dict.keys(): | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 为什么用self.node_dict替换self.graph?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. csrgraph是以scipy形式存储图的,所以节点的名字变成了0,1,2,3.。。。这样的形式,node_dict是networkx和csrgraph之间的节点名字的对应关系,比如原来节点叫“XXX”可能对应的是新的节点名是1这样 |
||
| self._embeddings[self.node_dict[word]] = self.w2v_model.wv[self.node_dict[word]] | ||
|
|
||
| return self._embeddings | ||
Uh oh!
There was an error while loading. Please reload this page.