1 module tcenal.dsl.lexer; 2 3 import std.array : Appender; 4 import std.ascii : isAlpha, isAlphaNum, isWhite; 5 import std.range.primitives : empty; 6 import std.algorithm : startsWith; 7 import std.meta : AliasSeq; 8 9 import tcenal.parser_combinator.token : Token; 10 11 import compile_time_unittest : enableCompileTimeUnittest; 12 13 mixin enableCompileTimeUnittest; 14 15 16 Token[] lex(string src) 17 { 18 return root(src); 19 } 20 unittest 21 { 22 assert( 23 lex(q{foo <- @foo_bar baz* <- "baz"}) == 24 [ 25 Token("foo", "identifier"), 26 Token("<-"), 27 Token("@"), 28 Token("foo_bar", "identifier"), 29 Token("baz", "identifier"), 30 Token("*"), 31 Token("<-"), 32 Token("baz", "stringLiteral"), 33 ] 34 ); 35 } 36 37 private: 38 39 Token[] root(string src) { 40 Appender!(Token[]) tokenAppender; 41 42 loop: 43 while (!src.empty) { 44 if (src[0].isWhite()) { 45 src = src[1..$]; 46 continue; 47 } 48 49 alias untypedTokens = AliasSeq!("<-", "@", "/", "*", "+", "?", "&", "!", "(", ")", "<", ">"); 50 foreach (untypedToken; untypedTokens) 51 { 52 if (src.startsWith(untypedToken)) { 53 tokenAppender.put(Token(untypedToken)); 54 src = src[untypedToken.length..$]; 55 56 continue loop; 57 } 58 } 59 60 if (src[0] == '"') 61 { 62 tokenAppender.put(stringLiteral(src)); 63 continue; 64 } 65 66 if (src[0].isAlpha() || src[0] == '_') 67 { 68 tokenAppender.put(identifier(src)); 69 continue; 70 } 71 72 throw new Exception(""); 73 } 74 75 return tokenAppender.data; 76 } 77 78 Token stringLiteral(ref string src) { 79 size_t closingDoubleQuoteIndex; 80 81 foreach (i, c; src) { 82 if (i == 0) continue; // starting double quote 83 84 if (c == '"') 85 { 86 closingDoubleQuoteIndex = i; 87 break; 88 } 89 } 90 91 if (closingDoubleQuoteIndex == 0) throw new Exception(""); 92 93 Token token = Token(src[1..closingDoubleQuoteIndex], "stringLiteral"); 94 src = src[(closingDoubleQuoteIndex + 1)..$]; 95 96 return token; 97 } 98 99 Token identifier(ref string src) { 100 size_t immediatelyFollowingWhiteSpaceIndex; 101 102 foreach (i, c; src) { 103 if (!c.isAlphaNum() && c != '_') 104 { 105 immediatelyFollowingWhiteSpaceIndex = i; 106 break; 107 } 108 } 109 110 if (immediatelyFollowingWhiteSpaceIndex == 0) immediatelyFollowingWhiteSpaceIndex = src.length; 111 112 Token token = Token(src[0..immediatelyFollowingWhiteSpaceIndex], "identifier"); 113 src = src[immediatelyFollowingWhiteSpaceIndex..$]; 114 115 return token; 116 }