model = Word2Vec(common_texts, size=100, window=5, min_count=1, workers=4)
方法一:model.save(path)
- 保存: model.save(path)
- 加载:Word2Vec.load(path)
- 模型可继续训练
训练时流式的,这意味这句子可以时一个生成器,动态的从磁盘读取输入数据,而无需将整个语料库加载到RAM中。这也意味着我们可以在以后继续训练模型
使用以下命令初始化模型
from gensim.test.utils import common_texts, get_tmpfile
from gensim.models import Word2Vec
path = get_tmpfile("word2vec.model") #创建临时文件
model = Word2Vec(sentences, size=100, window=5, min_count=1, workers=4)
model.save("word2vec.model")
#加载模型
model = Word2Vec.load("word2vec.model")
#继续训练模型
model.train(["hello","world"], total_examples=1, epochs=1)
#模型调用
vector = model['computer'] # get numpy vector of a word
sims =model.wv.most_similar("computer",topn=10) #get other similar words
# 如果已经完成了模型的培训(即不再更新,只进行查询),可以切换到KeyedVectors实例
word_vectors = model.wv
del model
方法二: model.wv.save(path)
- 保存:model.wv.save(path)
- 加载:wv = KeyedVectors.load("model.wv", mmap='r')
- 模型不可继续训练
from gensim.models import KeyedVectors
word_vectors = model.wv # Store just the words + their trained embeddings.
word_vectors.save("word2vec.wordvectors")
wv = KeyedVectors.load("word2vec.wordvectors", mmap='r') # Load back with memory-mapping = read-only, shared across processes.
vector = wv['computer'] # Get numpy vector of a word
link guid:https://radimrehurek.com/gensim/models/word2vec.html