Skip to content

Commit ee4f146

Browse files
committed
Add the capacity to negate attributes in a node, rather than requiring negative lookahead regex
1 parent 81290ba commit ee4f146

File tree

8 files changed

+154
-80
lines changed

8 files changed

+154
-80
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package edu.stanford.nlp.semgraph.semgrex;
2+
3+
public class Attribute {
4+
final String key;
5+
final Object cased;
6+
final Object caseless;
7+
final boolean negated;
8+
9+
Attribute(String key, Object cased, Object caseless, boolean negated) {
10+
this.key = key;
11+
this.cased = cased;
12+
this.caseless = caseless;
13+
this.negated = negated;
14+
}
15+
}

src/edu/stanford/nlp/semgraph/semgrex/NodeAttributes.java

+17-8
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
package edu.stanford.nlp.semgraph.semgrex;
22

3-
import java.util.LinkedHashMap;
4-
import java.util.Map;
3+
import java.util.ArrayList;
4+
import java.util.HashSet;
5+
import java.util.List;
6+
import java.util.Set;
7+
8+
import edu.stanford.nlp.util.Triple;
59

610
/**
711
* Stores attributes for a Semgrex NodePattern.
@@ -18,12 +22,14 @@
1822
public class NodeAttributes {
1923
private boolean root;
2024
private boolean empty;
21-
private Map<String, String> attributes;
25+
private List<Triple<String, String, Boolean>> attributes;
26+
private Set<String> positiveAttributes;
2227

2328
public NodeAttributes() {
2429
root = false;
2530
empty = false;
26-
attributes = new LinkedHashMap<>();
31+
attributes = new ArrayList<>();
32+
positiveAttributes = new HashSet<>();
2733
}
2834

2935
public void setRoot(boolean root) {
@@ -42,14 +48,17 @@ public boolean empty() {
4248
return empty;
4349
}
4450

45-
public void setAttribute(String key, String value) {
46-
if (attributes.containsKey(key)) {
51+
public void setAttribute(String key, String value, boolean negated) {
52+
if (positiveAttributes.contains(key)) {
4753
throw new SemgrexParseException("Duplicate attribute " + key + " found in semgrex expression");
4854
}
49-
attributes.put(key, value);
55+
if (!negated) {
56+
positiveAttributes.add(key);
57+
}
58+
attributes.add(new Triple(key, value, negated));
5059
}
5160

52-
public Map<String, String> attributes() {
61+
public List<Triple<String, String, Boolean>> attributes() {
5362
return attributes;
5463
}
5564
}

src/edu/stanford/nlp/semgraph/semgrex/NodePattern.java

+28-34
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import java.util.ArrayList;
44
import java.util.Collections;
55
import java.util.Iterator;
6-
import java.util.LinkedHashMap;
76
import java.util.List;
87
import java.util.Map;
98
import java.util.regex.Matcher;
@@ -13,6 +12,7 @@
1312
import edu.stanford.nlp.semgraph.SemanticGraph;
1413
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
1514
import edu.stanford.nlp.util.Pair;
15+
import edu.stanford.nlp.util.Triple;
1616
import edu.stanford.nlp.util.logging.Redwood;
1717

1818
public class NodePattern extends SemgrexPattern {
@@ -31,7 +31,7 @@ public class NodePattern extends SemgrexPattern {
3131
* value.
3232
* Otherwise, the type will be a Pattern, and you must use Pattern.matches().
3333
*/
34-
private final Map<String, Pair<Object, Object>> attributes;
34+
private final List<Attribute> attributes;
3535
private final boolean isRoot;
3636
private final boolean isLink;
3737
private final boolean isEmpty;
@@ -43,33 +43,34 @@ public class NodePattern extends SemgrexPattern {
4343
private List<Pair<Integer, String>> variableGroups;
4444

4545
public NodePattern(GraphRelation r, boolean negDesc,
46-
Map<String, String> attrs,
46+
List<Triple<String, String, Boolean>> attrs,
4747
boolean root, boolean empty, boolean isLink, String name) {
4848
this(r, negDesc, attrs, root, empty, isLink, name,
4949
new ArrayList<>(0));
5050
}
5151

5252
// TODO: there is no capacity for named variable groups in the parser right now
5353
public NodePattern(GraphRelation r, boolean negDesc,
54-
Map<String, String> attrs,
54+
List<Triple<String, String, Boolean>> attrs,
5555
boolean root, boolean empty, boolean isLink, String name,
5656
List<Pair<Integer, String>> variableGroups) {
5757
this.reln = r;
5858
this.negDesc = negDesc;
5959
this.isLink = isLink;
6060
// order the attributes so that the pattern stays the same when
6161
// printing a compiled pattern
62-
attributes = new LinkedHashMap<>();
62+
attributes = new ArrayList<>();
6363
descString = "{";
64-
for (Map.Entry<String, String> entry : attrs.entrySet()) {
64+
for (Triple<String, String, Boolean> entry : attrs) {
6565
if (!descString.equals("{"))
6666
descString += ";";
67-
String key = entry.getKey();
68-
String value = entry.getValue();
67+
String key = entry.first();
68+
String value = entry.second();
69+
boolean negated = entry.third();
6970

7071
// Add the attributes for this key
7172
if (value.equals("__")) {
72-
attributes.put(key, Pair.makePair(true, true));
73+
attributes.add(new Attribute(key, true, true, negated));
7374
} else if (value.matches("/.*/")) {
7475
boolean isRegexp = false;
7576
for (int i = 1; i < value.length() - 1; ++i) {
@@ -81,34 +82,24 @@ public NodePattern(GraphRelation r, boolean negDesc,
8182
}
8283
String patternContent = value.substring(1, value.length() - 1);
8384
if (isRegexp) {
84-
attributes.put(key, Pair.makePair(
85-
Pattern.compile(patternContent),
86-
Pattern.compile(patternContent, Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE))
87-
);
85+
attributes.add(new Attribute(key,
86+
Pattern.compile(patternContent),
87+
Pattern.compile(patternContent, Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE),
88+
negated));
8889
} else {
89-
attributes.put(key, Pair.makePair(patternContent, patternContent));
90+
attributes.add(new Attribute(key, patternContent, patternContent, negated));
9091
}
9192
} else { // raw description
92-
attributes.put(key, Pair.makePair(value, value));
93+
attributes.add(new Attribute(key, value, value, negated));
9394
}
9495

95-
96-
97-
// if (value.equals("__")) {
98-
// attributes.put(key, Pair.makePair(Pattern.compile(".*"), Pattern.compile(".*", Pattern.CASE_INSENSITIVE)));
99-
// } else if (value.matches("/.*/")) {
100-
// attributes.put(key, Pair.makePair(
101-
// Pattern.compile(value.substring(1, value.length() - 1)),
102-
// Pattern.compile(value.substring(1, value.length() - 1), Pattern.CASE_INSENSITIVE))
103-
// );
104-
// } else { // raw description
105-
// attributes.put(key, Pair.makePair(
106-
// Pattern.compile("^(" + value + ")$"),
107-
// Pattern.compile("^(" + value + ")$", Pattern.CASE_INSENSITIVE))
108-
// );
109-
// }
110-
descString += (key + ':' + value);
96+
if (negated) {
97+
descString += (key + "!:" + value);
98+
} else {
99+
descString += (key + ':' + value);
100+
}
111101
}
102+
112103
if (root) {
113104
if (!descString.equals("{"))
114105
descString += ";";
@@ -145,8 +136,8 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
145136
return (negDesc ? !node.equals(IndexedWord.NO_WORD) : node.equals(IndexedWord.NO_WORD));
146137

147138
// log.info("Attributes are: " + attributes);
148-
for (Map.Entry<String, Pair<Object, Object>> attr : attributes.entrySet()) {
149-
String key = attr.getKey();
139+
for (Attribute attr : attributes) {
140+
String key = attr.key;
150141
// System.out.println(key);
151142
String nodeValue;
152143
// if (key.equals("idx"))
@@ -167,7 +158,7 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
167158
return negDesc;
168159

169160
// Get the node pattern
170-
Object toMatch = ignoreCase ? attr.getValue().second : attr.getValue().first;
161+
Object toMatch = ignoreCase ? attr.caseless : attr.cased;
171162
boolean matches;
172163
if (toMatch instanceof Boolean) {
173164
matches = ((Boolean) toMatch);
@@ -182,6 +173,9 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
182173
} else {
183174
throw new IllegalStateException("Unknown matcher type: " + toMatch + " (of class + " + toMatch.getClass() + ")");
184175
}
176+
if (attr.negated) {
177+
matches = !matches;
178+
}
185179

186180
if (!matches) {
187181
// System.out.println("doesn't match");

0 commit comments

Comments
 (0)