1 module tcenal.dsl.lexer;
2 
3 import std.array : Appender;
4 import std.ascii : isAlpha, isAlphaNum, isWhite;
5 import std.range.primitives : empty;
6 import std.algorithm : startsWith;
7 import std.meta : AliasSeq;
8 
9 import tcenal.parser_combinator.token : Token;
10 
11 import compile_time_unittest : enableCompileTimeUnittest;
12 
13 mixin enableCompileTimeUnittest;
14 
15 
16 Token[] lex(string src)
17 {
18     return root(src);
19 }
20 unittest
21 {
22     assert(
23         lex(q{foo <- @foo_bar baz* <- "baz"}) ==
24         [
25             Token("foo", "identifier"),
26             Token("<-"),
27             Token("@"),
28             Token("foo_bar", "identifier"),
29             Token("baz", "identifier"),
30             Token("*"),
31             Token("<-"),
32             Token("baz", "stringLiteral"),
33         ]
34     );
35 }
36 
37 private:
38 
39 Token[] root(string src) {
40     Appender!(Token[]) tokenAppender;
41 
42     loop:
43     while (!src.empty) {
44         if (src[0].isWhite()) {
45             src = src[1..$];
46             continue;
47         }
48 
49         alias untypedTokens = AliasSeq!("<-", "@", "/", "*", "+", "?", "&", "!", "(", ")", "<", ">");
50         foreach (untypedToken; untypedTokens)
51         {
52             if (src.startsWith(untypedToken)) {
53                 tokenAppender.put(Token(untypedToken));
54                 src = src[untypedToken.length..$];
55 
56                 continue loop;
57             }
58         }
59 
60         if (src[0] == '"')
61         {
62             tokenAppender.put(stringLiteral(src));
63             continue;
64         }
65 
66         if (src[0].isAlpha() || src[0] == '_')
67         {
68             tokenAppender.put(identifier(src));
69             continue;
70         }
71 
72         throw new Exception("");
73     }
74 
75     return tokenAppender.data;
76 }
77 
78 Token stringLiteral(ref string src) {
79     size_t closingDoubleQuoteIndex;
80 
81     foreach (i, c; src) {
82         if (i == 0) continue; // starting double quote
83 
84         if (c == '"')
85         {
86             closingDoubleQuoteIndex = i;
87             break;
88         }
89     }
90 
91     if (closingDoubleQuoteIndex == 0) throw new Exception("");
92 
93     Token token = Token(src[1..closingDoubleQuoteIndex], "stringLiteral");
94     src = src[(closingDoubleQuoteIndex + 1)..$];
95 
96     return token;
97 }
98 
99 Token identifier(ref string src) {
100     size_t immediatelyFollowingWhiteSpaceIndex;
101 
102     foreach (i, c; src) {
103         if (!c.isAlphaNum() && c != '_')
104         {
105             immediatelyFollowingWhiteSpaceIndex = i;
106             break;
107         }
108     }
109 
110     if (immediatelyFollowingWhiteSpaceIndex == 0) immediatelyFollowingWhiteSpaceIndex = src.length;
111 
112     Token token = Token(src[0..immediatelyFollowingWhiteSpaceIndex], "identifier");
113     src = src[immediatelyFollowingWhiteSpaceIndex..$];
114 
115     return token;
116 }