-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLexTermEval.py
547 lines (458 loc) · 27.2 KB
/
LexTermEval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
"""
LexTermEval
Fine-grained automatic evaluation of legal terminology in MT output
The expected test set file is a tab-separated file with:
sentence_id, source(detokenised), reference(detokenised), hypothesis(detokenised), lemmatised_source, lemmatised_reference, lemmatised_hypothesis
"""
from spacy import load as spacy_load
from spacy.lang.it import Italian
from spacy.lang.de import German
from spacy.matcher import PhraseMatcher
from spacy.util import filter_spans
from charsplit import Splitter
import treetaggerwrapper
from Levenshtein import distance
import operator
import csv
from collections import Counter
import pickle
from hlepor import single_hlepor_score, hlepor_score
# set filepaths
termListMatchRef = r"path\to\TB_m_lemmatised.pkl" # termlist with AA lemmatised terms only, for matching purposes
termListMatchHyp = r"path\to\TB_full_lemmatised.pkl"
termListEval = r"path\to\TB_full.pkl" # full termbase with all variants and tags
testSet = r"path\to\testset"
output = r"path\for\output\tsv\file"
# instantiating compound splitter and TreeTagger
splitter = Splitter()
tagger = treetaggerwrapper.TreeTagger(TAGLANG="de")
# (black)list of prepositions and adverbs to avoid compound splitter to split preposition+noun
prep_de = [
"an", "auf", "bis", "in", "neben", "vor", "zu", "nach", "aus", "mit", "gegen", "her", "hin", "ein", "ab"
]
def lemmatise(text):
'''
TreeTagger lemmatisation (applied to German compounds after splitting)
'''
tags = tagger.tag_text(text)
mytags = treetaggerwrapper.make_tags(tags)
lemma_list = []
for tag in mytags:
try:
lemma_list.append(tag.lemma)
except AttributeError:
# if NoTag, ignore
continue
return " ".join(lemma_list)
def check_overlap(matches_de, spanSet, n):
'''
A function to check if the n-th span in SpaCy's matches_de (output from PhraseMatcher) has already been evaluated
(i.e., if it is in spanSet)
spanSet is a set of (matched_term, span) tuples where annotated/evaluated terms are added
This is done both on the German reference sentence and on the German MT hypothesis sentence, in order to
prevent double annotation of a same term
n: the position (0,1,2...) in matches_de to check
returns: Bool (False: match has already been evaluated; True: match has not already been evaluated) and n
'''
span = matches_de[n]
start = span.start
end = span.end
span_de = "%s-%s" % (start, end)
matched_term_de = doc_de[start:end] # get the matched term by slicing the doc
if (str(matched_term_de), span_de) in spanSet:
return False, n
else:
return True, n
def split_compounds(sent, thr, lemma=True):
"""
Compound splitter for German sentences (https://github.com/dtuggener/CharSplit)
It splits compounds according to a given threshold (thr)
A lower threshold (0.5/0.6) allows for higher recall without influencing precision (we are looking for only the few
German terms corresponding to the entry of the Italian matched term, therefore a risk of introducing noise
is extremely low)
sent = a sentence in Doc format
thr = a float number between 0 and 1 (0.6 high recall, 0.9 high precision)
lemma = lemmatising elements of split compound or not (very slow but necessary). Defaults to True.
returns: split sentence in Doc format
"""
split_text = [] # the "new" sentence that will be searched for matches after compound splitting
for token in sent:
split_token = splitter.split_compound(token.text) # output is a list of tuples (score, word, word)
hyp_split = split_token[0] # considering only the first splitting hypothesis
# (i.e., the one with highest score)
if hyp_split[0] > thr and hyp_split[1].lower() not in prep_de:
# if confidence score exceedes threshold and the split word is not preposition+noun
if lemma:
split_text.append(lemmatise(hyp_split[1])) # lemmatising and appending first word of compound
split_text.append(lemmatise(hyp_split[2])) # lemmatising and appending second element of compound
else: # if lemma == False
split_text.append(hyp_split[1]) # appending first word of compound (faster, no lemmatisation)
split_text.append(hyp_split[2]) # appending second element of compound (faster, no lemmatisation)
#print("Compound has been split: ", hyp_split)
else: # if score is under threshold, i.e., word is not a compound, append original unsplit token
split_text.append(token.text)
split_text = " ".join(split_text) # sentence as string
split_doc = nlp_de.make_doc(split_text)
return split_doc
print("Loading data...")
with open(termListMatchRef, "rb") as termlistmatch:
id_terms = pickle.load(termlistmatch) # {id:([termsIT], [termsAA])}
with open(termListMatchHyp, "rb") as termlistmatch_full:
id_terms_full = pickle.load(termlistmatch_full) # {id:([termsIT], [termsAA|DE|AT...])}
with open(termListEval, "rb") as termlisteval:
referenceTB = pickle.load(termlisteval) # in the following format...
'''
{
'28629': ({'immunità di gregge': ('NA', 'NA'),
'immunità di comunità': ('NA', 'NA'),
'immunità di gruppo': ('NA', 'NA')},
{'Herdenimmunität': (['Südtirol', 'AT', 'DE', 'CH'], 'CNS', 'NA'),
'Herdenschutz': (['Südtirol', 'AT', 'DE', 'CH'], 'CNS', 'NA')})
}
'''
# test set contains tab-separated (id, source, reference, hypothesis, lemmatised source, lemmatised reference,
# lemmatised hypothesis)
with open(testSet, "r", encoding="utf-8") as testset:
test = testset.read().splitlines()
test_ = []
for test_ref in test:
test_.append(tuple(test_ref.split("\t")))
# list of tuples -> (id, src, ref, hyp, src_lemma, ref_lemma, hyp_lemma)
# loading/instantiating Spacy models and instantiating the PhraseMatchers for each language
print("Loading SpaCy...\n")
# nlp_de = spacy_load("de_core_news_lg")
# nlp_it = spacy_load("it_core_news_lg")
nlp_de = German()
nlp_it = Italian()
matcher_it = PhraseMatcher(nlp_it.vocab, attr="LOWER")
matcher_de = PhraseMatcher(nlp_de.vocab, attr="LOWER")
print("Starting term matching and evaluation...\n")
# for testing purposes (todo: remove after testing?)
identified_terms = []
matched_after_split = [] # see which additional matches have been found thanks to compound-splitting
# FINAL ANNOTATION DATA
# list of tuples (sentenceID, src, ref, hyp, src_l, ref_l, hyp_l, conceptID, terminology_entry, src_term, hyp_term,
# C/W, legal_system, tag, hLEPOR)
annotated_data = []
# adding ALL Italian terms to the Italian PhraseMatcher
# (with respective entry_ID from bistro to allow retrieval of German terms)
# concept-oriented: terms and variants are "grouped" in the same search pattern
# and are associated to the entry ID from bistro
for id, (it, de) in id_terms.items():
pattern_it = [nlp_it.make_doc(term) for term in it] # converting Str to Doc (needed by PhraseMatcher)
matcher_it.add(str(id), pattern_it) # adding all Italian terms to PhraseMatcher, with respective conceptID
# starting automatic terminology evaluation
counter = 0
lenTestSet = len(test_)
for (id, src, ref, hyp, src_lemma, ref_lemma, hyp_lemma) in test_: # iterating over each sentence-tuple in the test-set
counter += 1
# for each sentence in test set, creating a set of tuples (matched_term, span)
# to avoid duplicate annotations in sentences with more than one term from the same concept
match_spans_de_ref = set()
match_spans_de_hyp = set()
#print("Next sentence pair...")
doc_it = nlp_it.make_doc(src_lemma) # Str to Doc (lemmatised)
doc_it_or = nlp_it.make_doc(src) # Str to Doc (original); used to retrieve the original form of the matched term
matches_it = matcher_it(doc_it, as_spans=True)
# matching terms in the source sentence;
# as_spans=True is needed by filter_spans to remove overlapping matches by retaining greedy matches.
# to allow overlapping matches, set as_spans=False, remove filter_spans and change the following for loop
matches_it = filter_spans(matches_it)
# filtering out overlapping matches, greedily.
# "When spans overlap, the (first) longest span is preferred over shorter spans."
# https://spacy.io/api/top-level#util.filter_spans
# Same approach used by Farajian et al. 2018 (cfr. e-mail exchange with Farajian)
# for match_id, start, end in matches_it: # if as_spans=False and filter_spans() is not applied
for span in matches_it: # iterating over each single term match in the Italian source sentence
start_it = span.start
end_it = span.end
match_id = span.label
span_it = "%s-%s" % (start_it, end_it)
concept_id = nlp_it.vocab.strings[match_id] # getting the concept ID of the Italian matched term
#print(concept_id)
matched_term_it = doc_it[start_it:end_it] # getting the Italian matched term by slicing the doc
matched_term_it_or = doc_it_or[start_it:end_it] # matched term in the original Italian sentence
#print(matched_term_it, "\t", matched_term_it_or)
# now checking if the correspondent German term is in the reference sentence
Ita, De = id_terms[concept_id] # getting the DE equivalents using the concept ID of the IT matched term
pattern_de = [nlp_de.make_doc(term) for term in De] # converting DE term(s) from str to Doc
matcher_de.add(concept_id, pattern_de) # adding DE term(s) to the German PhraseMatcher (NB: only AA terms)
doc_de = nlp_de.make_doc(ref_lemma) # Str to Doc
doc_de_or = nlp_de.make_doc(ref) # Str to Doc
matches_de = matcher_de(doc_de, as_spans=True) # searching for term matches in the German reference sentence
matches_de = filter_spans(matches_de) # filtering overlapping matches (greedy)
if not matches_de: # if no match is found in the German reference sentence, I retry after splitting compounds
doc_de = split_compounds(doc_de, 0.3, True) # splitting compounds and overwriting existing non-split doc_de
# TODO: CHECK THE FOLLOWING: by doing so, I am overwriting the actual original non-lemmatised DE sentence,
# and, as a consequence, I am not annotating the actual term from the original sentence, but the lemmatised
# form. It's actually not a problem, but still, it is a small inconsistency. It is necessary if using the
# original form of the matched term to point to the reference termbase (not doing it would imply wrong
# spans and wrong (original) matched terms
doc_de_or = doc_de # todo: check: if useless, remove.
# now re-trying to match on the German reference sentence with split compounds
matches_de = matcher_de(doc_de, as_spans=True) # checking for term matches in the German reference sentence
matches_de = filter_spans(matches_de) # filtering overlapping matches (greedy)
if not matches_de:
# if still no matches, the sentence is not annotated and therefore discarded from the test set
#print("[NO MATCH] German equivalent term absent in reference.")
#print(id_terms[concept_id])
#print(src, "\n", ref, "\n")
pass
else: # if matches_de (meaning a match has been found but ONLY AFTER COMPOUND SPLITTING); for testing only
# todo: remove "else" after testing
for span in matches_de: # iterating over each single term match on the sentence
start = span.start
end = span.end
match_id = span.label
matched_after_split.append((id_terms[concept_id], src, ref))
# hereafter, matches_de is/are the German match(es) in the reference sentence from either the first search
# or the search after compound splitting
if not matches_de: # if no matches in reference sentence
matcher_de.remove(concept_id)
continue # go the the next Italian match, therefore discarding the sentence from final test-set
elif len(matches_de) == 1: # if only one match in reference, no "disambiguation" needed
span = matches_de[0]
start = span.start
end = span.end
match_id = span.label
span_de = "%s-%s" % (start, end)
matched_term_de = doc_de[start:end] # get the matched term by slicing the doc
elif len(matches_de) > 1: # if more than one match in German reference (for one match in IT)
# here I am avoiding double annotation of a same term
for i in range(len(matches_de)):
# checking if term is not already annotated (see "check_overlap" function defined above)
whichMatch, num = check_overlap(matches_de, match_spans_de_ref, i)
if whichMatch: # meaning the term has not yet been considered and annotated
span = matches_de[num]
start = span.start
end = span.end
match_id = span.label
span_de = "%s-%s" % (start, end)
matched_term_de = doc_de[start:end] # get the matched term by slicing the doc
# add to set of already annotated terms, in order to avoid re-annotating in following iterations
match_spans_de_ref.add((str(matched_term_de), span_de))
break
#print("[MATCH] German equivalent term found in reference.")
concept_id = nlp_it.vocab.strings[match_id] # getting the concept ID
matched_term_de = doc_de[start:end] # get the matched term by slicing the doc
matched_term_de_or = doc_de_or[start:end] # todo: check. if useless, remove
#print(matched_term_de, "\t", matched_term_de_or)
identified_terms.append([concept_id, id_terms[concept_id]]) # todo: remove after testing?
#print(concept_id, "\t", id_terms[concept_id])
#print(src, "\n", ref, "\n")
# now checking for matches in the German hypothesis sentence from the machine translation system
# here I'm using the full termbase (with German terms from all legal systems, instead of only South Tyrol)
matcher_de = PhraseMatcher(nlp_de.vocab, attr="LOWER")
# RE-INSTANTIATING AN EMPTY GERMAN MATCHER INSTEAD OF REMOVING THE PATTERN FROM THE EXISTING MATCHER
# because the following --> (matcher_de.remove(concept_id)) <-- raised an anomalous error
# --> "Process finished with exit code -1073741819 (0xC0000005)"
# probably due to a bug in SpaCy, cfr. https://github.com/explosion/spaCy/issues/6148
# getting DE terms from the full lemmatised reference TB (containing all terms, not only South Tyrol)
itTerms, deTerms = id_terms_full[concept_id] # getting TB entry through concept ID
# getting German terms to be added to the PhraseMatcher and converting to Doc
terms_matcher = [nlp_de.make_doc(term) for term in deTerms]
matcher_de.add(concept_id, terms_matcher) # adding all German term(s) to the German PhraseMatcher (full TB)
doc_hyp = nlp_de.make_doc(hyp_lemma) # Str to Doc
doc_hyp_or = nlp_de.make_doc(hyp) # Str to Doc
matches_de = matcher_de(doc_hyp, as_spans=True) # checking if DE term matches in German hyp sentence
matches_de = filter_spans(matches_de) # filtering overlapping matches (greedy)
if not matches_de:
# if no match is found in the German sentence, I retry after splitting compounds
doc_hyp = split_compounds(doc_hyp, 0.3, True) # splitting compounds and overwriting existing non-split doc_de
#print(doc_hyp)
# TODO: NB: I'm overwriting (for compound-split sentences) the original non-lemmatised hyp sentence with
# the split, lemmatised version, in order to retrieve the matched term using spans. (SEE ABOVE)
doc_hyp_or = doc_hyp
# now retrying to match on German hypothesis sentence with split compounds
matches_de = matcher_de(doc_hyp, as_spans=True) # checking for DE term matches in German hyp sentence
matches_de = filter_spans(matches_de) # filtering overlapping matches (greedy)
if not matches_de:
# if still no matches... append to annotated data as wrong/omitted, NEO: Non-equivalent term/omitted.
# to assign NEO-S and NEO-NS, check Termstatus tags of German terms in entry
# (sentenceID, src, ref, hyp, src_l, ref_l, hyp_l, conceptID,
# terminology_entry, src_term, hyp_term, C/W, legal_system, tag)
German_tags = set() # set of Termstatus tags in German terms of the entry
itTerms, deTerms = referenceTB[concept_id] # getting German terms from full termbase using ID
for deTerm, (spr, status, statusOLD) in deTerms.items():
German_tags.add(status)
# assigning NEO-S and NEO-NS according to the Termstatus tags of the German terms in the entry
# If the entry contains German standardised/recommended terms, assign NEO-S tag
# (non-equivalent/omitted term given a standardised/recommended German term),
# else: assign NEO-NS (non-equivalent/omitted term given a standardised/recommended German term).
if "CS" in German_tags: # if standardised/recommended German terms in entry
tag = "NEO-S"
else: # if no standardised/recommended German terms in entry
tag = "NEO-NS"
# computing hLEPOR score
hlepor = str(float("{:.3f}".format(single_hlepor_score(ref, hyp))))
hlepor = hlepor.replace(".", ",") # localizing decimal separator for Excel
annotated_tuple = (id, src, ref, hyp, src_lemma, ref_lemma, hyp_lemma, concept_id,
id_terms_full[concept_id], str(matched_term_it), "NA", "W", "NA", tag, hlepor)
annotated_data.append(annotated_tuple)
print("\r", "%s/%s test set sentences evaluated." % (counter, lenTestSet), end="")
#print("[NO MATCH] No German equivalent found in hypothesis.")
#print(matched_term_it)
#print(doc_de)
#print(terms_matcher)
#print()
# hereafter, matches_de is/are the German match(es) in the hypothesis sentence from either the first search
# or the search after compound splitting
# if no matches_de, it has already been handled above and annotated as wrong/omitted
if not matches_de:
matcher_de.remove(concept_id)
continue
elif len(matches_de) == 1: # if only one match in hypothesis, no "disambiguation" problem
span = matches_de[0]
start = span.start
end = span.end
match_id = span.label
span_de = "%s-%s" % (start, end)
matched_term_de = doc_de[start:end] # get the matched term by slicing the doc
elif len(matches_de) > 1: # if more than one match in german hypothesis (for one match in IT)
for i in range(len(matches_de)):
whichMatch, num = check_overlap(matches_de, match_spans_de_hyp, i)
if whichMatch: # meaning the term has not yet been considered and annotated
span = matches_de[num]
start = span.start
end = span.end
match_id = span.label
span_de = "%s-%s" % (start, end)
matched_term_de = doc_de[start:end] # get the matched term by slicing the doc
# add to set of already annotated terms, in order to avoid re-annotating in following iterations
match_spans_de_hyp.add((str(matched_term_de), span_de))
break
#print("[MATCH] German equivalent found in hypothesis.")
concept_id = nlp_it.vocab.strings[match_id] # getting the concept ID
matched_term_de = doc_hyp[start:end] # get the matched term by slicing the doc
matched_term_de_or = doc_hyp_or[start:end]
# which term has been matched?
# computing Levenshtein edit distance to match matched_term with term within the entry in the full TB
lev_dist_IT = []
lev_dist_DE = []
# TODO: HERE THE COMPARISON MUST BE DONE WITH THE REFERENCE TERMBASE
itTerms, deTerms = referenceTB[concept_id] # getting German terms from full termbase using ID
# Italian term
for itTerm, (status, statusOLD) in itTerms.items():
edit_distance = distance(str(matched_term_it_or), itTerm) # todo: move conversion to string up?
lev_dist_IT.append((edit_distance, itTerm))
lev_dist_IT.sort(key=operator.itemgetter(0)) # sorting by lowest Levenshtein distance
match_it = lev_dist_IT[0] # first tuple, lowest Levenshtein distance. tuples -> (levenshtein_dist, term)
matchIT = match_it[1] # ITALIAN MATCHED TERM
# German term
for deTerm, (spr, status, statusOLD) in deTerms.items():
edit_distance = distance(str(matched_term_de_or), deTerm) # todo: move conversion to string up?
lev_dist_DE.append((edit_distance, deTerm))
lev_dist_DE.sort(key=operator.itemgetter(0)) # sorting by lowest Levenshtein distance
match_de = lev_dist_DE[0] # first tuple, lowest Levenshtein distance. tuples -> (levenshtein_dist, term)
matchDE = match_de[1] # GERMAN MATCHED TERM
# getting tags from full reference termbase and annotating
# first, I handle exceptions (tags that have to be assigned according to combination of other tags)
# ex: genormt; OLD;
# then, the remaining cases can be simply annotated with the tag in Termstatus
# metadata for matched terms
(status_it, old_it) = itTerms[matchIT]
(spr, status_de, old_de) = deTerms[matchDE]
tag = status_de
CW = "C"
# assigning W tag (wrong) to terms with NST-S and NST-N status tags
if tag == "NST-S" or tag == "NST-NS":
CW = "W"
# handling terms with "OLD" tag
if old_de == "OLD" and spr == "Südtirol": # defensive; all OLD terms should already be from South Tyrol
if old_it != "OLD": # if German term is OLD and Italian was not
tag = "OLD" # assign OLD
CW = "W"
else: # if Italian is OLD, too
pass # keep the tag already assigned
# computing hlepor score
hlepor = str(float("{:.3f}".format(single_hlepor_score(ref, hyp))))
hlepor = hlepor.replace(".", ",") # localizing decimal number separator for Excel
# writing annotation row
"""it, de = id_terms[concept_id]
concept_terms = ", ".join(it) + " = " + ", ".join(de)"""
annotated_tuple = (id, src, ref, hyp, src_lemma, ref_lemma, hyp_lemma, concept_id, id_terms_full[concept_id],
str(matched_term_it), str(matched_term_de_or), CW, "|".join(spr), tag, hlepor)
annotated_data.append(annotated_tuple)
print("\r", "%s/%s test set sentences evaluated." % (counter, lenTestSet), end="")
matcher_de.remove(concept_id)
# removing current terminology patterns from German PhraseMatcher for next iterations
# should errors be raised, re-instantiate an empty matcher_de here (as done above)
# print(identified_terms) # todo: to convert to a dict to do counting with Counter
#print(matched_after_split)
#print(len(matched_after_split))
'''# printing matches after compound splitting nicely (for testing purposes)
for ((ITTERMS, DETERMS), ITSENT, DESENT) in matched_after_split:
print(ITTERMS, "\t", DETERMS)
print(ITSENT)
print(DESENT)
print()'''
print("Evaluation completed. Exporting tab-separated annotation file...")
# counting each tag to compute term accuracy score
total = len(annotated_data)
counter_correct = 0
counter_wrong = 0
counter_NEO_S = 0
counter_NEO_NS = 0
counter_NST_S = 0
counter_NST_NS = 0
counter_OLD = 0
counter_CS = 0
counter_CNS = 0
counter_ANS = 0
for (id, src, ref, hyp, src_lemma, ref_lemma, hyp_lemma, concept_id, concept_terms,
matched_term_it, matched_term_de_or, CW, spr, tag, hlepor) in annotated_data:
if CW == "W":
counter_wrong += 1
elif CW == "C":
counter_correct += 1
if tag == "NEO-S":
counter_NEO_S += 1
if tag == "NEO-NS":
counter_NEO_NS += 1
elif tag == "NST-S":
counter_NST_S += 1
elif tag == "NST-NS":
counter_NST_NS += 1
elif tag == "OLD":
counter_OLD += 1
elif tag == "CS":
counter_CS += 1
elif tag == "CNS":
counter_CNS += 1
elif tag == "ANS":
counter_ANS += 1
# adding id to each row
final = []
idEval = 1
for (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) in annotated_data:
final.append((idEval, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o))
idEval += 1
print()
print("===============================================================================================================")
print("Evaluated terms: ", total)
print("_______________________________________________________________________________________________________________")
print("Correct terms: ", counter_correct)
print("\tCorrect standardised/recommended terms: ", counter_CS)
print("\tCorrect non-standardised/non-recommended terms: ", counter_CNS)
print("\tAcceptable variant terms given a standardised/recommended term: ", counter_ANS)
print("_______________________________________________________________________________________________________________")
print("Wrong/omitted terms: ", counter_wrong)
print("\tNon-equivalent/omitted terms: ", counter_NEO_S + counter_NEO_NS)
print("\t\tNon-equivalent/omitted terms (given a standardised or recommended term): ", counter_NEO_S)
print("\t\tNon-equivalent/omitted terms (without a standardised or recommended term): ", counter_NEO_NS)
print("\tNon-South-Tyrol-specific terms: ", counter_NST_S + counter_NST_NS)
print("\t\tNon-South-Tyrol-specific terms (given a standardised or recommended term): ", counter_NST_S)
print("\t\tNon-South-Tyrol-specific terms (without a standardised or recommended term): ", counter_NST_NS)
print("\tOutdated terms: ", counter_OLD)
print("_______________________________________________________________________________________________________________")
print("LexTermEval score: ", (counter_correct / total) * 100)
print("===============================================================================================================")
# exporting as TSV file
with open(output, "w", encoding="utf-8") as out:
tsv_writer = csv.writer(out, delimiter='\t', lineterminator='\n')
tsv_writer.writerow(
["ID", "sentenceID", "source", "reference", "hypothesis", "source_lemmatised", "reference_lemmatised",
"hypothesis_lemmatised", "conceptID", "terms", "matched_term_source",
"matched_term_hypothesis", "C/W", "Sprachgebrauch", "tag", "hLEPOR"])
for (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) in final:
tsv_writer.writerow([a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p])