Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for regex named capture groups #659

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/regex.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ This returns the following JSONata object (JSON, but also with a function proper
"start": 2,
"end": 4,
"groups": [],
"namedGroups": {},
"next": "<native function>#0"
}
```
Expand All @@ -68,6 +69,7 @@ This contains information of the first matching substring within this famous pal
- `start` - the starting position (zero offset) of the matching substring within the original string
- `end` - the endinging position of the matching substring within the original string
- `groups` - if capturing groups are used in the regex, then this array contains a string for the text captured by each group
- `namedGroups` - if named capture groups are used in the regex, this object will contain the captured text by name
- `next()` - when invoked, will return details of the second occurrence of any matching substring (and so on).

In this example, invoking `next()` will return:
Expand All @@ -78,6 +80,7 @@ In this example, invoking `next()` will return:
"start": 17,
"end": 22,
"groups": [],
"namedGroups": {},
"next": "<native function>#0"
}
```
Expand Down
3 changes: 2 additions & 1 deletion src/functions.js
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,8 @@ const functions = (() => {
result.push({
match: matches.match,
index: matches.start,
groups: matches.groups
groups: matches.groups,
namedGroups: matches.namedGroups || {}
});
matches = await evaluateMatcher(matches.next);
count++;
Expand Down
3 changes: 2 additions & 1 deletion src/jsonata.js
Original file line number Diff line number Diff line change
Expand Up @@ -1112,7 +1112,8 @@ var jsonata = (function() {
match: match[0],
start: match.index,
end: match.index + match[0].length,
groups: []
groups: [],
namedGroups: match.groups || {}
};
if(match.length > 1) {
for(var i = 1; i < match.length; i++) {
Expand Down
41 changes: 22 additions & 19 deletions test/implementation-tests.js
Original file line number Diff line number Diff line change
Expand Up @@ -470,12 +470,13 @@ describe("Tests that bind Javascript functions", () => {
var expr = jsonata("$match('LLANFAIRPWLLGWYNGYLLGOGERYCHWYRNDROBWLLLLANTYSILIOGOGOGOCH', $repeatingLetters('L', 2))");
expr.registerFunction("repeatingLetters", repeatingLetters);
var result = await expr.evaluate();
console.log(result)
var expected = [
{"match": "LL", "index": 0, "groups": []},
{"match": "LL", "index": 10, "groups": []},
{"match": "LL", "index": 18, "groups": []},
{"match": "LL", "index": 37, "groups": []},
{"match": "LL", "index": 39, "groups": []}
{"match": "LL", "index": 0, "groups": [], "namedGroups": {}},
{"match": "LL", "index": 10, "groups": [], "namedGroups": {}},
{"match": "LL", "index": 18, "groups": [], "namedGroups": {}},
{"match": "LL", "index": 37, "groups": [], "namedGroups": {}},
{"match": "LL", "index": 39, "groups": [], "namedGroups": {}}
];
expect(result).to.deep.equal(expected);
});
Expand Down Expand Up @@ -659,7 +660,7 @@ describe("Tests that are specific to a Javascript runtime", () => {
it("should return result object", async function() {
var expr = jsonata('/ab/ ("ab")');
var result = await expr.evaluate();
var expected = { match: "ab", start: 0, end: 2, groups: [] };
var expected = { match: "ab", start: 0, end: 2, groups: [], namedGroups: {} };
expect(JSON.stringify(result)).to.equal(JSON.stringify(expected));
});
});
Expand All @@ -677,7 +678,7 @@ describe("Tests that are specific to a Javascript runtime", () => {
it("should return result object", async function() {
var expr = jsonata('/ab+/ ("ababbabbcc")');
var result = await expr.evaluate();
var expected = { match: "ab", start: 0, end: 2, groups: [] };
var expected = { match: "ab", start: 0, end: 2, groups: [], namedGroups: {} };
expect(JSON.stringify(result)).to.equal(JSON.stringify(expected));
});
});
Expand All @@ -686,7 +687,7 @@ describe("Tests that are specific to a Javascript runtime", () => {
it("should return result object", async function() {
var expr = jsonata('/a(b+)/ ("ababbabbcc")');
var result = await expr.evaluate();
var expected = { match: "ab", start: 0, end: 2, groups: ["b"] };
var expected = { match: "ab", start: 0, end: 2, groups: ["b"], namedGroups: {} };
expect(JSON.stringify(result)).to.equal(JSON.stringify(expected));
});
});
Expand All @@ -695,7 +696,7 @@ describe("Tests that are specific to a Javascript runtime", () => {
it("should return result object", async function() {
var expr = jsonata('/a(b+)/ ("ababbabbcc").next()');
var result = await expr.evaluate();
var expected = { match: "abb", start: 2, end: 5, groups: ["bb"] };
var expected = { match: "abb", start: 2, end: 5, groups: ["bb"], namedGroups: {} };
expect(JSON.stringify(result)).to.equal(JSON.stringify(expected));
});
});
Expand All @@ -704,7 +705,7 @@ describe("Tests that are specific to a Javascript runtime", () => {
it("should return result object", async function() {
var expr = jsonata('/a(b+)/ ("ababbabbcc").next().next()');
var result = await expr.evaluate();
var expected = { match: "abb", start: 5, end: 8, groups: ["bb"] };
var expected = { match: "abb", start: 5, end: 8, groups: ["bb"], namedGroups: {} };
expect(JSON.stringify(result)).to.equal(JSON.stringify(expected));
});
});
Expand All @@ -722,7 +723,7 @@ describe("Tests that are specific to a Javascript runtime", () => {
it("should return result object", async function() {
var expr = jsonata('/a(b+)/i ("Ababbabbcc")');
var result = await expr.evaluate();
var expected = { match: "Ab", start: 0, end: 2, groups: ["b"] };
var expected = { match: "Ab", start: 0, end: 2, groups: ["b"], namedGroups: {} };
expect(JSON.stringify(result)).to.equal(JSON.stringify(expected));
});
});
Expand Down Expand Up @@ -776,7 +777,7 @@ describe("Tests that are specific to a Javascript runtime", () => {
it("should find \\", async function() {
var expr = jsonata('$match("test escape \\\\", /\\\\/)');
var result = await expr.evaluate();
var expected = { match: "\\", index: 12, groups: []};
var expected = { match: "\\", index: 12, groups: [], namedGroups: {} };
expect(result).to.deep.equal(expected);
});
});
Expand All @@ -786,13 +787,14 @@ describe("Tests that are specific to a Javascript runtime", () => {
var expr = jsonata('$match("ababbabbcc",/ab/)');
var result = await expr.evaluate();
var expected = [
{ match: "ab", index: 0, groups: [] },
{ match: "ab", index: 0, groups: [], namedGroups: {} },
{
match: "ab",
index: 2,
groups: []
groups: [],
namedGroups: {}
},
{ match: "ab", index: 5, groups: [] }
{ match: "ab", index: 5, groups: [], namedGroups: {} }
];
expect(result).to.deep.equal(expected);
});
Expand All @@ -803,13 +805,14 @@ describe("Tests that are specific to a Javascript runtime", () => {
var expr = jsonata('$match("ababbabbcc",/a(b+)/)');
var result = await expr.evaluate();
var expected = [
{ match: "ab", index: 0, groups: ["b"] },
{ match: "ab", index: 0, groups: ["b"], namedGroups: {} },
{
match: "abb",
index: 2,
groups: ["bb"]
groups: ["bb"],
namedGroups: {}
},
{ match: "abb", index: 5, groups: ["bb"] }
{ match: "abb", index: 5, groups: ["bb"], namedGroups: {} }
];
expect(result).to.deep.equal(expected);
});
Expand All @@ -819,7 +822,7 @@ describe("Tests that are specific to a Javascript runtime", () => {
it("should return result object", async function() {
var expr = jsonata('$match("ababbabbcc",/a(b+)/, 1)');
var result = await expr.evaluate();
var expected = { match: "ab", index: 0, groups: ["b"] };
var expected = { match: "ab", index: 0, groups: ["b"], namedGroups: {} };
expect(result).to.deep.equal(expected);
});
});
Expand Down
15 changes: 10 additions & 5 deletions test/test-suite/groups/matchers/case000.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,32 @@
{
"match": "a",
"index": 0,
"groups": []
"groups": [],
"namedGroups": {}
},
{
"match": "a",
"index": 3,
"groups": []
"groups": [],
"namedGroups": {}
},
{
"match": "a",
"index": 5,
"groups": []
"groups": [],
"namedGroups": {}
},
{
"match": "a",
"index": 7,
"groups": []
"groups": [],
"namedGroups": {}
},
{
"match": "a",
"index": 10,
"groups": []
"groups": [],
"namedGroups": {}
}
]
}
30 changes: 30 additions & 0 deletions test/test-suite/groups/regex/case039.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"expr": "$map($, function($i){ $match($i, /^(?<first>[^ ]*)\\s*(?<last>.*)/) }).namedGroups",
"data": [
"Felicia Saunders",
"Jimmy Schultz",
"Dolores Figueroa",
"Craig Moreno",
"Lindsey Hall",
"Bonnie Russell",
"Kristin Stewart",
"Owen Reid",
"Brenda Sherman",
"Dwayne Baldwin",
"Joy Smith Carmichael"
],
"bindings": {},
"result": [
{ "first": "Felicia", "last": "Saunders" },
{ "first": "Jimmy", "last": "Schultz" },
{ "first": "Dolores", "last": "Figueroa" },
{ "first": "Craig", "last": "Moreno" },
{ "first": "Lindsey", "last": "Hall" },
{ "first": "Bonnie", "last": "Russell" },
{ "first": "Kristin", "last": "Stewart" },
{ "first": "Owen", "last": "Reid" },
{ "first": "Brenda", "last": "Sherman" },
{ "first": "Dwayne", "last": "Baldwin" },
{ "first": "Joy", "last": "Smith Carmichael" }
]
}