词典匹配
备份下这份代码
# 使用最大逆向匹配进行提槽
class DictSlot:
def __init__(self, dict_path):
slot_dict = {}
with open(dict_path, encoding="utf8") as f:
for line in f:
ll = line.strip().split("\t")
if len(ll) != 2:
continue
slot_info = ll[1].split('+++')
if len(slot_info) != 2:
continue
if ll[0] in slot_dict:
slot_dict[ll[0]].append({"slot_name": slot_info[0], "slot_value": slot_info[1]})
else:
slot_dict[ll[0]] = [{"slot_name": slot_info[0], "slot_value": slot_info[1]}]
self.slot_dict = slot_dict
def predict(self, query):
query_len = len(query)
idx = 0
idy = query_len
slot_get = []
tmp_slot_get_len = 0
while idy > 0:
while idx < idy:
if query[idx:idy] in self.slot_dict:
for item in self.slot_dict[query[idx:idy]]:
slot_get.append({"slot_word": query[idx:idy],
"slot_name": item["slot_name"],
"slot_value":item["slot_value"],
"slot_start":idx,
"slot_end":idy
})
break
idx = idx + 1
if len(slot_get) != tmp_slot_get_len:
idy = idx
idx = 0
tmp_slot_get_len = len(slot_get)
else:
idx = 0
idy = idy - 1
return slot_get
if __name__ == "__main__":
dict_sloter = DictSlot("slot_dict.txt")
print(dict_sloter.predict("我想去南京的南京博物馆"))
print(dict_sloter.predict("我想看斗罗大陆"))
print(dict_sloter.slot_dict)
参考:https://mp.weixin.qq.com/s/i77FjD5a7s4kr3Cmw4HdXQ
Last updated
Was this helpful?