最近工作中遇到依存句法相关的知识,特此记录一个简单实例:
使用哈工大的pyltp工具包进行依存句法分析:
1.分词;2.词性标注;3.依存句法分析。
from pyltp import *
import os
LTP_DATA_DIR = r"path" # 模型路径
cws_model_apth = os.path.join(LTP_DATA_DIR,"cws.model") #分词模型
pos_model_apth = os.path.join(LTP_DATA_DIR,"pos.model") #词性标注模型
par_model_apth = os.path.join(LTP_DATA_DIR,"parser.model") #依存句法模型
# 分词
segmentor = Segmentor()
segmentor.load(cws_model_apth)
words = segmentor.segment("江苏苏州")
segmentor.release() #释放模型
print('\t'.join(words))
# 江苏 苏州
# 词性标注
postagger = Postagger()
postagger.load(pos_model_path)
postags = postagger.postag(words)
postagger.release()
print('\t'.join(postags))
# ns ns
# 依存句法分析
parser = Parser()
parser.load(par_model_path)
arcs = parser.parse(words,postags)
rely_id = [arc.head for arc in arcs]
relation = [arc.relation for arc in arcs]
heads = ['root' if id == 0 else words[id-1] for id in rely_id]
for i in range(len(words)):
print(relation[i] + '(' + words[i] + ',' + heads[i] + ')')
# ATT(江苏,苏州)
# HED(苏州,root)
依存句法关系类型如下: