-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimple-pipeline.lisp
65 lines (58 loc) · 3.08 KB
/
simple-pipeline.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
(in-package aprnlp)
(defclass simple-pipeline ()
((name :initarg :name
:initform "unnamed-pipeline")
(tagger :initarg :tagger
:initform *loaded-pos-tagger*)
(parser :initarg :parser
:initform *loaded-dep-parser*)))
(defgeneric pipeline-process (pipeline result-type text)
(:method ((pipeline simple-pipeline) (result-type (eql :dep)) text)
(with-slots (tagger parser) pipeline
(let ((sentences (simple-tokenize text)))
(iter (for sentence :in-vector sentences)
(process tagger sentence)
(process parser sentence))
sentences)))
(:method ((pipeline simple-pipeline) (result-type (eql :pos)) text)
(with-slots (tagger) pipeline
(let ((sentences (simple-tokenize text)))
(iter (for sentence :in-vector sentences)
(process tagger sentence))
sentences)))
(:method ((pipeline simple-pipeline) (result-type (eql :token)) text)
(simple-tokenize text))
(:method ((pipeline simple-pipeline) result-type text)
(pipeline-process pipeline :dep text)))
(defgeneric train-pipeline (class sentences &key name cycles save-dir)
(:method ((class (eql 'simple-pipeline)) sentences &key (name "unnamed-pipeline") (cycles 5) (save-dir (asdf:system-source-directory :aprnlp)))
(let* ((tagger (make-instance 'pos-tagger :name "tagger"))
(parser (make-instance 'dep-parser :name "parser"))
(pipeline-dir (ensure-directories-exist
(make-pathname :directory (append (pathname-directory save-dir) (list name))
:name nil :type nil :defaults save-dir))))
(train tagger sentences :cycles cycles :save-dir pipeline-dir)
(let ((new-sentences (copy-sentences sentences)))
(iter (for sentence :in-vector new-sentences)
(process tagger sentence))
(train parser new-sentences :cycles cycles :save-dir pipeline-dir))
(values (make-instance 'simple-pipeline :name name :tagger tagger :parser parser)
pipeline-dir))))
(defgeneric load-pipeline (class directory)
(:method ((class (eql 'simple-pipeline)) directory)
(let ((tagger (load-processor 'pos-tagger (merge-pathnames "tagger.fasl" directory)))
(parser (load-processor 'dep-parser (merge-pathnames "parser.fasl" directory))))
(make-instance 'simple-pipeline
:name (car (last (pathname-directory directory)))
:tagger tagger :parser parser))))
(defmethod test ((pipeline simple-pipeline) sentences)
(with-slots (tagger parser) pipeline
(test tagger sentences)
(let ((new-sentences (copy-sentences sentences)))
(iter (for sentence :in-vector new-sentences)
(process tagger sentence))
(test parser sentences))))
(defmethod test-training ((class (eql 'simple-pipeline)))
(let ((pipeline (train-pipeline 'simple-pipeline (read-conllu-file (treebank-file :english "GUM" :train)))))
(test pipeline (read-conllu-file (treebank-file :english "GUM" :test)))))
(export '(pipeline-process train-pipeline load-pipeline simple-pipeline name tagger parser))