Skip to content

Commit fda9ae7

Browse files
committed
Merge branch 'UnitTests' of https://github.com/jeanbern/portent into UnitTests
2 parents 8101343 + 1b80f49 commit fda9ae7

File tree

5 files changed

+501247
-0
lines changed

5 files changed

+501247
-0
lines changed

portent.Test/DawgTests/DawgHelper.cs

+83
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#nullable enable
2+
using System;
3+
using System.IO;
4+
using System.Linq;
5+
6+
namespace Portent.Test.DawgTests
7+
{
8+
internal static class DawgHelper
9+
{
10+
private const string TempAugPath = "writefile.aug";
11+
12+
public static Dawg Create(params string[] words)
13+
{
14+
var builder = new PartitionedGraphBuilder();
15+
foreach (var word in words.OrderBy(x => x))
16+
{
17+
builder.Insert(word, 0);
18+
}
19+
20+
using var compressed = builder.AsCompressedSparseRows();
21+
compressed.Save(TempAugPath);
22+
23+
using var read = File.OpenRead(TempAugPath);
24+
return new Dawg(read);
25+
}
26+
27+
public static Dawg CreateFromCorpus(string corpusLocation)
28+
{
29+
var builder = new PartitionedGraphBuilder();
30+
using var stream = File.OpenRead(corpusLocation);
31+
using var reader = new StreamReader(stream);
32+
string? line;
33+
while ((line = reader.ReadLine()) != null)
34+
{
35+
var lineTokens = line.Split(' ');
36+
if (lineTokens.Length != 2)
37+
{
38+
continue;
39+
}
40+
41+
if (!ulong.TryParse(lineTokens[1], out var count))
42+
{
43+
continue;
44+
}
45+
46+
builder.Insert(lineTokens[0], count);
47+
}
48+
49+
using var compressedGraph = builder.AsCompressedSparseRows();
50+
compressedGraph.Save(TempAugPath);
51+
52+
using var dawgStream = File.OpenRead(TempAugPath);
53+
return new Dawg(dawgStream);
54+
}
55+
56+
public static string[] BuildQuery1K(string queryLocation)
57+
{
58+
using var stream = File.OpenRead(queryLocation);
59+
var testList = new string[1000];
60+
var i = 0;
61+
62+
using var reader = new StreamReader(stream);
63+
64+
string? line;
65+
while ((line = reader.ReadLine()) != null)
66+
{
67+
var lineParts = line.Split(default(char[]), StringSplitOptions.None);
68+
if (lineParts.Length == 3)
69+
{
70+
testList[i++] = lineParts[0];
71+
}
72+
}
73+
74+
if (i != 1000)
75+
{
76+
// ReSharper disable once RedundantToStringCallForValueType - would box value type?
77+
throw new InvalidOperationException("Unexpected number of query inputs: " + i.ToString());
78+
}
79+
80+
return testList;
81+
}
82+
}
83+
}
+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
using System.Collections.Generic;
2+
using System.Linq;
3+
using Xunit;
4+
5+
namespace Portent.Test.DawgTests
6+
{
7+
public class LargeCorpusTests
8+
{
9+
[Theory]
10+
[InlineData(
11+
"TestData/frequency_dictionary_en_500_000.txt",
12+
"TestData/noisy_query_en_1000.txt",
13+
497,
14+
34814,
15+
869864,
16+
8775261)]
17+
public void Lookup_CountMatchesExpected_EmptyDictionary(string corpusLocation, string queryLocation, int matchCount0Errors, int matchCount1Errors, int matchCount2Errors, int matchCount3Errors)
18+
{
19+
using var dawg = DawgHelper.CreateFromCorpus(corpusLocation);
20+
var terms = DawgHelper.BuildQuery1K(queryLocation);
21+
Assert.Equal(matchCount0Errors, ResultTotal(dawg, terms, 0u));
22+
Assert.Equal(matchCount1Errors, ResultTotal(dawg, terms, 1u));
23+
Assert.Equal(matchCount2Errors, ResultTotal(dawg, terms, 2u));
24+
Assert.Equal(matchCount3Errors, ResultTotal(dawg, terms, 3u));
25+
}
26+
27+
private static int ResultTotal(Dawg dawg, IEnumerable<string> searchTerms, uint maxEdits)
28+
{
29+
return searchTerms.Sum(searchTerm => dawg.Lookup(searchTerm, maxEdits).Count());
30+
}
31+
}
32+
}
+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
using System.Linq;
2+
using Xunit;
3+
4+
namespace Portent.Test.DawgTests
5+
{
6+
public class SingleEditTests
7+
{
8+
[Theory]
9+
[InlineData("ab--", "ba--")]
10+
[InlineData("-ab-", "-ba-")]
11+
[InlineData("--ab", "--ba")]
12+
[InlineData("--", "a--")]
13+
[InlineData("--", "-a-")]
14+
[InlineData("--", "--a")]
15+
[InlineData("abc", "bc")]
16+
[InlineData("abc", "ac")]
17+
[InlineData("abc", "ab")]
18+
[InlineData("abc", "xbc")]
19+
[InlineData("abc", "axc")]
20+
[InlineData("abc", "abx")]
21+
public void Lookup_SingleEdit_IsRejected(string word, string modifiedWord)
22+
{
23+
const uint editDistance = 0u;
24+
using var dawg = DawgHelper.Create(word);
25+
26+
var lookup = dawg.Lookup(modifiedWord, editDistance).ToList();
27+
28+
Assert.Empty(lookup);
29+
}
30+
31+
[Theory]
32+
[InlineData("ab--", "ba--")]
33+
[InlineData("-ab-", "-ba-")]
34+
[InlineData("--ab", "--ba")]
35+
public void Lookup_SingleTransposition_IsAccepted(string word, string modifiedWord)
36+
{
37+
const uint editDistance = 1u;
38+
using var dawg = DawgHelper.Create(word);
39+
40+
var lookup = dawg.Lookup(modifiedWord, editDistance).ToList();
41+
42+
Assert.Single(lookup);
43+
Assert.Equal(word, lookup[0].Term);
44+
}
45+
46+
[Theory]
47+
[InlineData("--", "a--")]
48+
[InlineData("--", "-a-")]
49+
[InlineData("--", "--a")]
50+
public void Lookup_SingleInsertion_IsAccepted(string word, string modifiedWord)
51+
{
52+
const uint editDistance = 1u;
53+
using var dawg = DawgHelper.Create(word);
54+
55+
var lookup = dawg.Lookup(modifiedWord, editDistance).ToList();
56+
57+
Assert.Single(lookup);
58+
Assert.Equal(word, lookup[0].Term);
59+
}
60+
61+
[Theory]
62+
[InlineData("abc", "bc")]
63+
[InlineData("abc", "ac")]
64+
[InlineData("abc", "ab")]
65+
public void Lookup_SingleDeletion_IsAccepted(string word, string modifiedWord)
66+
{
67+
const uint editDistance = 1u;
68+
using var dawg = DawgHelper.Create(word);
69+
70+
var lookup = dawg.Lookup(modifiedWord, editDistance).ToList();
71+
72+
Assert.Single(lookup);
73+
Assert.Equal(word, lookup[0].Term);
74+
}
75+
76+
[Theory]
77+
[InlineData("abc", "xbc")]
78+
[InlineData("abc", "axc")]
79+
[InlineData("abc", "abx")]
80+
public void Lookup_SingleSubstitution_IsAccepted(string word, string modifiedWord)
81+
{
82+
const uint editDistance = 1u;
83+
using var dawg = DawgHelper.Create(word);
84+
85+
var lookup = dawg.Lookup(modifiedWord, editDistance).ToList();
86+
87+
Assert.Single(lookup);
88+
Assert.Equal(word, lookup[0].Term);
89+
}
90+
91+
[Theory]
92+
[InlineData("ab--", "bxa--")]
93+
[InlineData("-ab-", "-bxa-")]
94+
[InlineData("--ab", "--bxa")]
95+
// TODO: Get the proper definitions for the statement below.
96+
// This isn't full Damerau-Levensthein, it's the optimal string alignment thing instead.
97+
public void Lookup_InterruptedTransposition_IsNotAccepted(string word, string modifiedWord)
98+
{
99+
// Insertion between the transposed characters plus the transposition itself.
100+
const uint editDistance = 2u;
101+
using var dawg = DawgHelper.Create(word);
102+
103+
var lookup = dawg.Lookup(modifiedWord, editDistance).ToList();
104+
105+
Assert.Empty(lookup);
106+
}
107+
}
108+
}

0 commit comments

Comments
 (0)