Add several string types.

lmb · lmb · commit 2ddb514f5737 · 2013-12-07T10:24:40.000Z
Namely VISIBLESTRING and T61STRING. Both string encodings are only supported in a very limited fashion,
see docs/T61String.md for a discussion of what would be required. Safe comparison, etc. has not been
implemented yet.
diff --git a/docs/T61String.md b/docs/T61String.md
@@ -0,0 +1,145 @@
+This file is Copyright (c) 2003, 2006 Lev Walkin <vlm@lionet.info>. All rights
+reserved. Redistribution and modifications are permitted subject to BSD license.
+
+Originally part of the asn1c source code, file TeletexString.c. -- Lorenz Bauer
+
+Here is a formal attempt at creating a mapping from TeletexString
+(T61String) of the latest ASN.1 standard (X.680:2002) into the Unicode
+character set. -- Lev Walkin <vlm@lionet.info>
+
+The first thing to keep in mind is that TeletexString (T61String)
+is defined in ASN.1, and is not really a T.61 string.
+The T.61 standard is withdrawn by ITU-T and is no longer an authoritative
+reference. See http://www.itu.int/rec/T-REC-T.61
+
+The X.680 specifies TeletexString (T61String) as a combination of the
+character sets specified by the registration numbers listed in
+ISO International Register of Coded Character Sets to be used with
+Escape Sequences (ISO-2375):
+6, 87, 102, 103, 106, 107, 126, 144, 150, 153, 156, 164, 165, 168,
+plus SPACE and DELETE characters.
+In addition to that, the X.680 Table 6 NOTE 2 allows using register entries
+6 and 156 instead of 102 and 103.
+
+The ISO Register itself is available at http://www.itscj.ipsj.or.jp/ISO-IR/
+
+#6 is ASCII. http://www.itscj.ipsj.or.jp/ISO-IR/006.pdf
+        Escapes into:
+                G0: ESC 2/8 4/2 ("(B")
+                G1: ESC 2/9 4/2 (")B")
+        The range is [0x21 .. 0x7e]. Conversion into Unicode
+       is simple, because it has one-to-one correspondence.
+#87 is a "Japanese Graphic Character Set for Information Interchange".
+        Is a multiple-byte set of 6877 characters.
+        The character set is JIS X 0208-1983 (originally JIS C 6226-1983).
+        Escapes into:
+                G0: ESC 2/4 4/2 ("$B")
+                G1: ESC 2/4 2/9 4/2 ("$)B")
+                G2: ESC 2/4 2/10 4/2 ("$*B")
+                G3: ESC 2/4 2/11 4/2 ("$+B")
+#102 is "Teletex Primary Set of Graphic Characters" and is almost ASCII.
+        Escapes into:
+                G0: ESC 2/8 7/5 ("(u")
+                G1: ESC 2/9 7/5 (")u")
+                G2: ESC 2/10 7/5 ("*u")
+                G3: ESC 2/11 7/5 ("+u")
+       It is almost identical to ASCII, except for ASCII position for '$'
+        (DOLLAR SIGN) is filled with '¤' (CURRENCY SIGN), which is U+00A4.
+        Also, ASCII positions for '`', '\', '^', '{', '}', '~' are marked
+        as "should not be used".
+#103 is a supplementary set of characters used in combination with #102.
+        Escapes into:
+                G0: ESC 2/8 7/6 ("(v")
+                G1: ESC 2/9 7/6 (")v")
+                G2: ESC 2/10 7/6 ("*v")
+                G3: ESC 2/11 7/6 ("+v")
+        Some characters in that character set are combining characters,
+        which can only be restrictively used with certain basic Latin letters.
+        It can be thought of as a subset of #156 with the exception of 4/12
+        which is UNDERLINE in #103 and absent in #156.
+#106 is a primary set of control functions, used in combination with #107.
+        Escapes into:
+                C0: ESC 2/1 4/5 ("!E")
+        This set is so short I can list it here:
+                0x08        BS        BACKSPACE        -- same as Unicode
+                0x0a        LF        LINE FEED        -- same as Unicode
+                0x0c        FF        FORM FEED        -- same as Unicode
+                0x0d        CR        CARRIAGE RETURN        -- same as Unicode
+                0x0e        LS1        LOCKING SHIFT ONE
+                0x0f        LS0        LOCKING SHIFT ZERO
+                0x19        SS2        SINGLE SHIFT TWO
+                0x1a        SUB        SUBSTITUTE CHARACTER
+                0x1b        ESC        ESCAPE                -- same as Unicode
+                0x1d        SS3        SINGLE SHIFT THREE
+       The LS1 and LS0 are two magical functions which, respectively, invoke
+        the currently designated G1 or G0 set into positions 2/1 to 7/14
+        The SS2 and SS3, respectively, invoke one character of the
+        currently designated set G2 and G3.
+        The SUB is wholly equivalent to U+001a (SUBSTITUTE)
+#107 is a supplementary set of control functions, used with #106.
+        Escapes into:
+                C1: ESC 2/2 4/8 ('"H')
+        This set contains three special control codes:
+                0x8b        PLD        PARTIAL LINE DOWN        -- similar to <SUB>
+                0x8c        PLU        PARTIAL LINE UP                -- sumilar to <SUP>
+                0x9b        CSI        CONTROL SEQUENCE INTRODUCER
+        This set is so out of world we can probably safely ignore it.
+#126 is a "Right-hand Part of the Latin/Greek Alphabet".
+        Comprises of 90 characters, including accented letters.
+        Escapes into:
+                G1: ESC 2/13 4/6 ("-F")
+                G2: ESC 2/14 4/6 (".F")
+                G3: ESC 2/15 4/6 ("/F")
+        Note: This Registration is a subset of ISO-IR 227.
+#144 is a "Cyrillic part of the Latin/Cyrillic Alphabet".
+        Comprises of 95 characters.
+        Escapes into:
+                G1: ESC 2/13 4/12 ("-L")
+                G2: ESC 2/14 4/12 (".L")
+                G3: ESC 2/15 4/12 ("/L")
+#150 is a "Greek Primary Set of Graphic Characters".
+        Comprises of 94 characters.
+        Escapes into:
+                G0: ESC 2/8 2/1 4/0 ("(!@")
+                G1: ESC 2/9 2/1 4/0 (")!@")
+                G2: ESC 2/10 2/1 4/0 ("*!@")
+                G3: ESC 2/11 2/1 4/0 ("+!@")
+#153 is a "Basic Cyrillic Character Set for 8-bit codes".
+        Comprises of 68 characters.
+        Escapes into:
+                G1: ESC 2/13 4/15 ("-O")
+                G2: ESC 2/14 4/15 (".O")
+                G3: ESC 2/15 4/15 ("/O")
+#156 is a "Supplementary Set of ISO/IEC 6937:1992" for use with #6
+        Comprises of 87 characters.
+        Escapes into:
+                G1: ESC 2/13 5/2 ("-R")
+                G2: ESC 2/14 5/2 (".R")
+                G3: ESC 2/15 5/2 ("/R")
+#164 is a "Hebrew Supplementary Set of Graphic Characters"
+        Comprises of 27 characters.
+        Escapes into:
+                G1: ESC 2/13 5/3 ("-S")
+                G2: ESC 2/14 5/3 (".S")
+                G3: ESC 2/15 5/3 ("/S")
+#165 is a set of "Codes of the Chinese graphic character set"
+        Is a multiple-byte set of 8446 characters.
+        Escapes into:
+                G0: ESC 2/4 2/8 4/5 ("$(E")
+                G1: ESC 2/4 2/9 4/5 ("$)E")
+                G2: ESC 2/4 2/10 4/5 ("$*E")
+                G3: ESC 2/4 2/11 4/5 ("$+E")
+#168 is a "Japanese Graphic Character Set for Information Interchange"
+        A multiple-byte set of 6879 characters updated from #87.
+        Escapes into:
+                G0: ESC 2/6 4/0 ESC 2/4 4/2 ("&@" "$B")
+                G1: ESC 2/6 4/0 ESC 2/4 2/9 4/2 ("&@" "$)B")
+                G2: ESC 2/6 4/0 ESC 2/4 2/10 4/2 ("&@" "$*B")
+                G3: ESC 2/6 4/0 ESC 2/4 2/11 4/2 ("&@" "$+B")
+
+The different registers reside at the following byte values:
+- C0: 0x00 - 0x1f
+- G0: 0x20 - 0x7f
+- C1: 0x80 - 0x9f
+- G2: 0xa0 - 0xff
+- G2 and G3: ???
diff --git a/include/asinine/asn1.h b/include/asinine/asn1.h
@@ -53,9 +53,11 @@ typedef enum asn1_universal_type {
 	ASN1_TYPE_SEQUENCE        = 16,
 	ASN1_TYPE_SET             = 17,
 	ASN1_TYPE_PRINTABLESTRING = 19,
+	ASN1_TYPE_T61STRING       = 20,
 	ASN1_TYPE_IA5STRING       = 22,
 	ASN1_TYPE_UTCTIME         = 23,
-	ASN1_TYPE_GENERALIZEDTIME = 24
+	ASN1_TYPE_GENERALIZEDTIME = 24,
+	ASN1_TYPE_VISIBLESTRING   = 26
 } asn1_universal_type_t;
 
 typedef unsigned int asn1_type_t;
diff --git a/include/asinine/x509.h b/include/asinine/x509.h
@@ -34,10 +34,8 @@ typedef enum x509_algorithm {
 } x509_algorithm_t;
 
 typedef struct {
-	asn1_token_t common_name;
-	asn1_token_t country_name;
-	asn1_token_t organization;
-	asn1_token_t organization_unit;
+	asn1_token_t root;
+	size_t num_rdns;
 } x509_name_t;
 
 struct x509_cert {
@@ -55,6 +53,7 @@ void x509_cert_init(x509_cert_t *cert);
 x509_err_t x509_parse(x509_cert_t *cert, const uint8_t *data, size_t num);
 x509_err_t x509_validate(const x509_cert_t *cert);
 
+bool x509_name_eq(const x509_name_t *a, const x509_name_t *b);
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/asn1-types.c b/src/asn1-types.c
@@ -20,28 +20,48 @@ validate_string(const asn1_token_t *token)
 {
 	const uint8_t *data;
 
-	if (asn1_is(token, ASN1_CLASS_UNIVERSAL, ASN1_TYPE_PRINTABLESTRING)) {
+	if (token == NULL || token->class != ASN1_CLASS_UNIVERSAL) {
+		return false;
+	}
+
+	switch (token->type) {
+	case ASN1_TYPE_PRINTABLESTRING:
 		for (data = token->data; data < token->data + token->length; data++) {
-			if (*data == ' ') {
+			// Space
+			if (*data == 0x20) {
 				continue;
 			}
 
-			if (*data < '\'' || *data > 'z') {
+			// ' and z
+			if (*data < 0x27 || *data > 0x7a) {
 				return false;
 			}
 
-			if (*data == '*' || *data == ';' || *data == '<' || *data == '>' ||
-				*data == '@') {
+			// Illegal characters: *, ;, <, >, @
+			if (*data == 0x2a || *data == 0x3b || *data == 0x3c || *data == 0x3e
+				|| *data == 0x40) {
 				return false;
 			}
 		}
-	} else if (asn1_is(token, ASN1_CLASS_UNIVERSAL, ASN1_TYPE_IA5STRING)) {
+		break;
+
+	case ASN1_TYPE_IA5STRING:
+	case ASN1_TYPE_VISIBLESTRING:
+	case ASN1_TYPE_T61STRING:
 		for (data = token->data; data < token->data + token->length; data++) {
-			if (*data < 0 || *data > 127) {
+			/* Strictly speaking, control codes are allowed for IA5STRING, but
+			 * since we don't have a way of dealing with code-page switching we
+			 * restrict the type. This is non-conformant to the spec.
+			 * Same goes for T61String, which can switch code pages mid-stream.
+			 * We assume that the initial code-page is #6 (ASCII), and flag
+			 * switching as an error. */
+			if (*data < 0x20 || *data > 0x7f) {
 				return false;
 			}
 		}
-	} else if (asn1_is(token, ASN1_CLASS_UNIVERSAL, ASN1_TYPE_UTF8STRING)) {
+		break;
+
+	case ASN1_TYPE_UTF8STRING: {
 		enum {
 			LEADING,
 			CONTINUATION
@@ -89,7 +109,10 @@ validate_string(const asn1_token_t *token)
 				}
 			}
 		}
-	} else {
+		break;
+	}
+
+	default:
 		return false;
 	}
 
@@ -196,7 +219,6 @@ asn1_time(const asn1_token_t *token, asn1_time_t *time)
 		    31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
 	};
 
-	const char * const end = ((char *)token->data) + token->length;
 	const char *data = (char *)token->data;
 
 	union {
@@ -230,7 +252,7 @@ asn1_time(const asn1_token_t *token, asn1_time_t *time)
 
 	if (*data != 'Z') {
 		// Try to decode seconds
-		if (data + 2 >= end) {
+		if (data + 2 >= (char*)token->end) {
 			// Need at least another char for seconds, plus 'Z' or timezone
 			return ASN1_ERROR_INVALID;
 		}
@@ -416,5 +438,7 @@ asn1_is_string(const asn1_token_t *token)
 		(token->class == ASN1_CLASS_UNIVERSAL) &&
 		(token->type == ASN1_TYPE_PRINTABLESTRING ||
 			token->type == ASN1_TYPE_IA5STRING ||
-			token->type == ASN1_TYPE_UTF8STRING);
+			token->type == ASN1_TYPE_UTF8STRING ||
+			token->type == ASN1_TYPE_VISIBLESTRING ||
+			token->type == ASN1_TYPE_T61STRING);
 }
diff --git a/src/tests/asn1-tests.c b/src/tests/asn1-tests.c
@@ -98,7 +98,7 @@ test_asn1_oid_to_string(void)
 	const asn1_oid_t oid = ASN1_OID(1,2,3);
 	const asn1_oid_t invalid_oid = ASN1_OID(1);
 
-	check(asn1_oid_to_string(&oid, oid_str, sizeof(oid_str)) == ASN1_OK);
+	check(asn1_oid_to_string(&oid, oid_str, sizeof(oid_str)));
 	check(strncmp("1.2.3", oid_str, 5) == 0);
 
 	check(!asn1_oid_to_string(&invalid_oid, oid_str, sizeof(oid_str)));
@@ -327,6 +327,8 @@ test_asn1_all(int *tests_run)
 {
 	declare_set;
 
+	printf("sizeof asn1_token_t: %lu\n", sizeof(asn1_token_t));
+
 	run_test(test_asn1_oid_decode);
 	run_test(test_asn1_oid_decode_invalid);
 	run_test(test_asn1_oid_to_string);
diff --git a/src/tests/x509-tests.c b/src/tests/x509-tests.c
@@ -9,20 +9,33 @@
 #include "asinine/tests/certs.h"
 
 static char*
-test_x509_parse(void)
+test_x509_certs(void)
 {
 	x509_cert_t cert;
 	size_t i;
 	bool errors;
 
 	for (errors = false, i = 0; i < x509_certs_num; i++) {
+		const char * const host = x509_certs[i].host;
 		const uint8_t * const data = x509_certs[i].data;
 		const size_t length = x509_certs[i].length;
 
-		if (x509_parse(&cert, data, length) != X509_OK) {
-			errors = true;
-
-			printf("> %s (#%lu) failed to parse\n", x509_certs[i].host, i);
+		switch (x509_parse(&cert, data, length)) {
+			case X509_OK: {
+				continue;
+			}
+
+			case X509_ERROR_UNSUPPORTED: {
+				printf("> %s (#%lu) uses unsupported features\n", host, i);
+				errors = true;
+				break;
+			}
+
+			default: {
+				printf("> %s (#%lu) failed to parse\n", host, i);
+				errors = true;
+				break;
+			}
 		}
 	}
 
@@ -36,7 +49,9 @@ test_x509_all(int *tests_run)
 {
 	declare_set;
 
-	run_test(test_x509_parse);
+	printf("sizeof x509_cert_t: %lu\n", sizeof(x509_cert_t));
+
+	run_test(test_x509_certs);
 
 	end_set;
 }
diff --git a/src/x509.c b/src/x509.c