Skip to content

Commit 33690aa

Browse files
committed
Separate SRM from Automata
Code has been cleaned up and reorganized, with functionality that is not essential to the core task of matching regular expressions stripped out.
0 parents  commit 33690aa

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+36281
-0
lines changed

.gitignore

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# IDE directories
2+
.vs/
3+
.vscode/
4+
5+
# Build directories
6+
bin/
7+
obj/

nuget.config

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<configuration>
3+
<packageSources>
4+
<!--To inherit the global NuGet package sources remove the <clear/> line below -->
5+
<clear />
6+
<add key="nuget" value="https://api.nuget.org/v3/index.json" />
7+
</packageSources>
8+
</configuration>

srm.sln

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
2+
Microsoft Visual Studio Solution File, Format Version 12.00
3+
# Visual Studio 15
4+
VisualStudioVersion = 15.0.26124.0
5+
MinimumVisualStudioVersion = 15.0.26124.0
6+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "srm", "srm\srm.csproj", "{69ED8C3B-1140-441B-8FEB-AA05855C84F5}"
7+
EndProject
8+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "tests", "tests\tests.csproj", "{70878658-B583-496F-A113-BE95FDF2E4EF}"
9+
EndProject
10+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "unicode_table_gen", "unicode_table_gen\unicode_table_gen.csproj", "{548048A4-FC83-41E1-A070-BDA5B814C254}"
11+
EndProject
12+
Global
13+
GlobalSection(SolutionConfigurationPlatforms) = preSolution
14+
Debug|Any CPU = Debug|Any CPU
15+
Debug|x64 = Debug|x64
16+
Debug|x86 = Debug|x86
17+
Release|Any CPU = Release|Any CPU
18+
Release|x64 = Release|x64
19+
Release|x86 = Release|x86
20+
EndGlobalSection
21+
GlobalSection(SolutionProperties) = preSolution
22+
HideSolutionNode = FALSE
23+
EndGlobalSection
24+
GlobalSection(ProjectConfigurationPlatforms) = postSolution
25+
{69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
26+
{69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Debug|Any CPU.Build.0 = Debug|Any CPU
27+
{69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Debug|x64.ActiveCfg = Debug|Any CPU
28+
{69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Debug|x64.Build.0 = Debug|Any CPU
29+
{69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Debug|x86.ActiveCfg = Debug|Any CPU
30+
{69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Debug|x86.Build.0 = Debug|Any CPU
31+
{69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Release|Any CPU.ActiveCfg = Release|Any CPU
32+
{69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Release|Any CPU.Build.0 = Release|Any CPU
33+
{69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Release|x64.ActiveCfg = Release|Any CPU
34+
{69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Release|x64.Build.0 = Release|Any CPU
35+
{69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Release|x86.ActiveCfg = Release|Any CPU
36+
{69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Release|x86.Build.0 = Release|Any CPU
37+
{70878658-B583-496F-A113-BE95FDF2E4EF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
38+
{70878658-B583-496F-A113-BE95FDF2E4EF}.Debug|Any CPU.Build.0 = Debug|Any CPU
39+
{70878658-B583-496F-A113-BE95FDF2E4EF}.Debug|x64.ActiveCfg = Debug|Any CPU
40+
{70878658-B583-496F-A113-BE95FDF2E4EF}.Debug|x64.Build.0 = Debug|Any CPU
41+
{70878658-B583-496F-A113-BE95FDF2E4EF}.Debug|x86.ActiveCfg = Debug|Any CPU
42+
{70878658-B583-496F-A113-BE95FDF2E4EF}.Debug|x86.Build.0 = Debug|Any CPU
43+
{70878658-B583-496F-A113-BE95FDF2E4EF}.Release|Any CPU.ActiveCfg = Release|Any CPU
44+
{70878658-B583-496F-A113-BE95FDF2E4EF}.Release|Any CPU.Build.0 = Release|Any CPU
45+
{70878658-B583-496F-A113-BE95FDF2E4EF}.Release|x64.ActiveCfg = Release|Any CPU
46+
{70878658-B583-496F-A113-BE95FDF2E4EF}.Release|x64.Build.0 = Release|Any CPU
47+
{70878658-B583-496F-A113-BE95FDF2E4EF}.Release|x86.ActiveCfg = Release|Any CPU
48+
{70878658-B583-496F-A113-BE95FDF2E4EF}.Release|x86.Build.0 = Release|Any CPU
49+
{548048A4-FC83-41E1-A070-BDA5B814C254}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
50+
{548048A4-FC83-41E1-A070-BDA5B814C254}.Debug|Any CPU.Build.0 = Debug|Any CPU
51+
{548048A4-FC83-41E1-A070-BDA5B814C254}.Debug|x64.ActiveCfg = Debug|Any CPU
52+
{548048A4-FC83-41E1-A070-BDA5B814C254}.Debug|x64.Build.0 = Debug|Any CPU
53+
{548048A4-FC83-41E1-A070-BDA5B814C254}.Debug|x86.ActiveCfg = Debug|Any CPU
54+
{548048A4-FC83-41E1-A070-BDA5B814C254}.Debug|x86.Build.0 = Debug|Any CPU
55+
{548048A4-FC83-41E1-A070-BDA5B814C254}.Release|Any CPU.ActiveCfg = Release|Any CPU
56+
{548048A4-FC83-41E1-A070-BDA5B814C254}.Release|Any CPU.Build.0 = Release|Any CPU
57+
{548048A4-FC83-41E1-A070-BDA5B814C254}.Release|x64.ActiveCfg = Release|Any CPU
58+
{548048A4-FC83-41E1-A070-BDA5B814C254}.Release|x64.Build.0 = Release|Any CPU
59+
{548048A4-FC83-41E1-A070-BDA5B814C254}.Release|x86.ActiveCfg = Release|Any CPU
60+
{548048A4-FC83-41E1-A070-BDA5B814C254}.Release|x86.Build.0 = Release|Any CPU
61+
EndGlobalSection
62+
EndGlobal

srm/AutomataException.cs

+113
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Text;
4+
5+
namespace Microsoft.Automata
6+
{
7+
/// <summary>
8+
/// Exeption thrown by the automata constructions
9+
/// </summary>
10+
public class AutomataException : Exception
11+
{
12+
/// <summary>
13+
/// the kind of exception
14+
/// </summary>
15+
public readonly AutomataExceptionKind kind;
16+
17+
/// <summary>
18+
/// construct an exception
19+
/// </summary>
20+
public AutomataException(string message, Exception innerException)
21+
: base(message, innerException)
22+
{
23+
kind = AutomataExceptionKind.Unspecified;
24+
}
25+
26+
/// <summary>
27+
/// construct an exception with given message
28+
/// </summary>
29+
public AutomataException(string message)
30+
: base(message)
31+
{
32+
kind = AutomataExceptionKind.Unspecified;
33+
}
34+
35+
/// <summary>
36+
/// construct an exception with given kind
37+
/// </summary>
38+
public AutomataException(AutomataExceptionKind kind)
39+
: base(GetMessage(kind))
40+
{
41+
this.kind = kind;
42+
}
43+
44+
/// <summary>
45+
/// construct an exception with given kind and inner exception
46+
/// </summary>
47+
public AutomataException(AutomataExceptionKind kind, Exception innerException)
48+
: base(GetMessage(kind), innerException)
49+
{
50+
this.kind = kind;
51+
}
52+
53+
private static string GetMessage(AutomataExceptionKind kind)
54+
{
55+
switch (kind)
56+
{
57+
case AutomataExceptionKind.CharacterEncodingIsUnspecified:
58+
return CharacterEncodingIsUnspecified;
59+
case AutomataExceptionKind.CharSetMustBeNonempty:
60+
return CharSetMustBeNonempty;
61+
case AutomataExceptionKind.UnrecognizedRegex:
62+
return UnrecognizedRegex;
63+
case AutomataExceptionKind.InternalError:
64+
return InternalError;
65+
default:
66+
return kind.ToString();
67+
}
68+
}
69+
70+
public const string UnrecognizedRegex =
71+
"Unrecognized regex construct";
72+
public const string CharSetMustBeNonempty =
73+
"Set must be nonempty";
74+
public const string CharacterEncodingIsUnspecified =
75+
"Character encoding is unspecified";
76+
public const string InternalError =
77+
"Internal error";
78+
}
79+
80+
81+
/// <summary>
82+
/// Kinds of exceptions that may be thrown by the Automata library operations.
83+
/// </summary>
84+
public enum AutomataExceptionKind
85+
{
86+
UnrecognizedRegex,
87+
CharSetMustBeNonempty,
88+
CharacterEncodingIsUnspecified,
89+
InternalError,
90+
Unspecified,
91+
InvalidArguments,
92+
CharSetMustBeNontrivial,
93+
CompactSerializationNodeLimitViolation,
94+
CompactSerializationBitLimitViolation,
95+
CompactDeserializationError,
96+
SetIsEmpty,
97+
InvalidArgument,
98+
IncompatibleAlgebras,
99+
NotSupported,
100+
BooleanAlgebraIsNotAtomic,
101+
OrdinalIsTooLarge,
102+
UnexpectedMTBDDTerminal,
103+
AlgebraMustBeCharSetSolver,
104+
MTBDDsNotSupportedForThisOperation,
105+
BDDSerializationNodeLimitViolation,
106+
BDDSerializationBitLimitViolation,
107+
BDDDeserializationError,
108+
BitOutOfRange,
109+
InternalError_SymbolicRegex,
110+
MustNotAcceptEmptyString,
111+
NrOfMintermsCanBeAtMost64,
112+
}
113+
}

srm/Match.cs

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
namespace Microsoft.Automata
2+
{
3+
public struct Match
4+
{
5+
public int Index { get; private set; }
6+
public int Length { get; private set; }
7+
8+
public Match(int index, int length)
9+
{
10+
Index = index;
11+
Length = length;
12+
}
13+
14+
public static bool operator==(Match left, Match right)
15+
=> left.Index == right.Index && left.Length == right.Length;
16+
17+
public static bool operator!=(Match left, Match right) => !(left == right);
18+
19+
public override bool Equals(object obj) => obj is Match other && this == other;
20+
21+
public override int GetHashCode() => System.HashCode.Combine(Index, Length);
22+
}
23+
}

srm/Regex.cs

+119
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.IO;
4+
using System.Runtime.Serialization;
5+
using System.Runtime.Serialization.Formatters.Binary;
6+
7+
namespace Microsoft.Automata
8+
{
9+
[Serializable]
10+
public class Regex
11+
{
12+
private static readonly CharSetSolver solver;
13+
private static readonly RegexToAutomatonConverter<BDD> converter;
14+
static Regex()
15+
{
16+
solver = new CharSetSolver();
17+
converter = new RegexToAutomatonConverter<BDD>(solver);
18+
}
19+
20+
private IMatcher matcher;
21+
22+
public Regex(string pattern) : this(pattern, RegexOptions.None) { }
23+
24+
public Regex(string pattern, RegexOptions options)
25+
{
26+
var root = converter.ConvertToSymbolicRegex(pattern, options, keepAnchors: true);
27+
var partition = root.ComputeMinterms();
28+
if (partition.Length > 64)
29+
{
30+
//more than 64 bits needed to represent a set
31+
matcher = new SymbolicRegexBV(root, solver, converter.srBuilder, partition);
32+
}
33+
else
34+
{
35+
//enough to use 64 bits
36+
matcher = new SymbolicRegexUInt64(root, solver, converter.srBuilder, partition);
37+
}
38+
}
39+
40+
/// <summary>
41+
/// Returns true iff the input string matches.
42+
/// <param name="input">given iput string</param>
43+
/// <param name="startat">start position in the input</param>
44+
/// <param name="endat">end position in the input, -1 means that the value is unspecified and taken to be input.Length-1</param>
45+
/// </summary>
46+
public bool IsMatch(string input, int startat = 0, int endat = -1)
47+
=> matcher.IsMatch(input, startat, endat);
48+
49+
/// <summary>
50+
/// Returns all matches as pairs (startindex, length) in the input string.
51+
/// </summary>
52+
/// <param name="input">given iput string</param>
53+
/// <param name="limit">as soon as this many matches have been found the search terminates, 0 or negative value means that there is no bound, default is 0</param>
54+
/// <param name="startat">start position in the input, default is 0</param>
55+
/// <param name="endat">end position in the input, -1 means that the value is unspecified and taken to be input.Length-1</param>
56+
public List<Match> Matches(string input, int limit = 0, int startat = 0, int endat = -1)
57+
=> matcher.Matches(input, limit, startat, endat);
58+
59+
/// <summary>
60+
/// Serialize this symbolic regex matcher to the given file.
61+
/// If formatter is null then an instance of
62+
/// System.Runtime.Serialization.Formatters.Binary.BinaryFormatter is used.
63+
/// </summary>
64+
/// <param name="file">file where the serialization is stored</param>
65+
/// <param name="formatter">given formatter</param>
66+
public void Serialize(string file, IFormatter formatter = null)
67+
{
68+
var stream = new FileStream(file, FileMode.Create, FileAccess.Write, FileShare.None);
69+
Serialize(stream, formatter);
70+
stream.Close();
71+
}
72+
73+
/// <summary>
74+
/// Serialize this symbolic regex matcher to the given file.
75+
/// If formatter is null then an instance of
76+
/// System.Runtime.Serialization.Formatters.Binary.BinaryFormatter is used.
77+
/// </summary>
78+
/// <param name="stream">stream where the serialization is stored</param>
79+
/// <param name="formatter">given formatter</param>
80+
public void Serialize(Stream stream, IFormatter formatter = null)
81+
{
82+
if (formatter == null)
83+
formatter = new BinaryFormatter();
84+
formatter.Serialize(stream, this);
85+
}
86+
87+
/// <summary>
88+
/// Deserialize the matcher of a symblic regex from the given file using the given formatter.
89+
/// If formatter is null then an instance of
90+
/// System.Runtime.Serialization.Formatters.Binary.BinaryFormatter is used.
91+
/// </summary>
92+
/// <param name="file">source file of the serialized matcher</param>
93+
/// <param name="formatter">given formatter</param>
94+
/// <returns></returns>
95+
public static Regex Deserialize(string file, IFormatter formatter = null)
96+
{
97+
Stream stream = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.None);
98+
Regex matcher = Deserialize(stream, formatter);
99+
stream.Close();
100+
return matcher;
101+
}
102+
103+
/// <summary>
104+
/// Deserialize the matcher of a symblic regex from the given stream using the given formatter.
105+
/// If formatter is null then an instance of
106+
/// System.Runtime.Serialization.Formatters.Binary.BinaryFormatter is used.
107+
/// </summary>
108+
/// <param name="stream">source stream of the serialized matcher</param>
109+
/// <param name="formatter">given formatter</param>
110+
/// <returns></returns>
111+
public static Regex Deserialize(Stream stream, IFormatter formatter = null)
112+
{
113+
if (formatter == null)
114+
formatter = new BinaryFormatter();
115+
Regex matcher = (Regex)formatter.Deserialize(stream);
116+
return matcher;
117+
}
118+
}
119+
}

0 commit comments

Comments
 (0)