-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathexactdecode.py
56 lines (47 loc) · 1.86 KB
/
exactdecode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
"""Exact match entity linking.
Takes an input entity and returns the knowledge base candidate that is an exact string match to the input. Optionally includes exact match to an intermediate pivot language, using Wikipedia language links to obtain the English KB candidate.
Author: Shruti Rijhwani ([email protected])
Last update: 2019-04-15
"""
from collections import defaultdict
import codecs
import numpy as np
import sys
import argparse
import time
from collections import OrderedDict
from utils.constants import DELIM,MAX_SCORE,ID_IDX,SOURCE_IDX,TARGET_IDX
class ExactDataLoader():
def __init__(self, kb_filename, links_filename=None):
self.kb = self.load_kb(kb_filename)
if links_filename:
self.links = self.load_links(links_filename)
else:
self.links = None
def load_kb(self, filename):
entries = {}
with codecs.open(filename, 'r', 'utf8') as f:
for line in f:
spl = line.strip().split(DELIM)
if len(spl) != 2:
continue
entries[spl[ID_IDX]] = int(spl[SOURCE_IDX])
return entries
def load_links(self,filename):
entries = {}
with codecs.open(filename, 'r', 'utf8') as f:
for line in f:
spl = line.strip().split(DELIM)
entries[spl[TARGET_IDX]] = int(spl[ID_IDX])
return entries
class ExactDecode():
def __init__(self, data_loader):
self.data_loader = data_loader
def decode(self, input_string, pivot=True):
if input_string in self.data_loader.kb:
return (self.data_loader.kb[input_string], MAX_SCORE)
if pivot:
assert self.data_loader.links != None
if input_string in self.data_loader.links:
return (self.data_loader.links[input_string], MAX_SCORE)
return False