词典匹配

备份下这份代码

# 使用最大逆向匹配进行提槽

class DictSlot:
    def __init__(self, dict_path):
        slot_dict = {}
        with open(dict_path, encoding="utf8") as f:
            for line in f:
                ll = line.strip().split("\t")
                if len(ll) != 2:
                    continue
                slot_info = ll[1].split('+++')
                if len(slot_info) != 2:
                    continue
                if ll[0] in slot_dict:
                    slot_dict[ll[0]].append({"slot_name": slot_info[0], "slot_value": slot_info[1]})
                else:
                    slot_dict[ll[0]] = [{"slot_name": slot_info[0], "slot_value": slot_info[1]}]
        self.slot_dict = slot_dict
    
    def predict(self, query):
        query_len = len(query)
        idx = 0
        idy = query_len
        slot_get = []
        tmp_slot_get_len = 0
        while idy > 0:
            while idx < idy:
                if query[idx:idy] in self.slot_dict:
                    for item in self.slot_dict[query[idx:idy]]:
                        slot_get.append({"slot_word": query[idx:idy],
                                            "slot_name": item["slot_name"],
                                            "slot_value":item["slot_value"],
                                            "slot_start":idx,
                                            "slot_end":idy
                                            })
                    break
                idx = idx + 1
            if len(slot_get) != tmp_slot_get_len:
                idy = idx
                idx = 0
                tmp_slot_get_len = len(slot_get)
            else:
                idx = 0
                idy = idy - 1
        return slot_get

if __name__ == "__main__":
    dict_sloter = DictSlot("slot_dict.txt")
    print(dict_sloter.predict("我想去南京的南京博物馆"))
    print(dict_sloter.predict("我想看斗罗大陆"))
    print(dict_sloter.slot_dict)

参考:https://mp.weixin.qq.com/s/i77FjD5a7s4kr3Cmw4HdXQ

Last updated

Was this helpful?