001 //Part 1 the lexer 002 003 004 005 006 007 008 009 010 011 012 013 <1>014 015 016 017 018 019 020 021 022 023 024 025 026 027 028 029 030 031 032 033 034 035 036 037 038 039 040 //ASCI ends at 0x100,so use the numbers above the ASCII range for sexy sauce 041 042 #define TOK_NUMBER 0x100 043 #define TOK_STRING 0x101 044 #define TOK_NAME 0x102 045 046 047 //Sometimes we want to put a charactor back if our end token doesnt use it 048 #define LEXF_USE_LAST_CHAR 1 049 class CLexer { 050 U8 *src; 051 I64 line,col,pos; 052 I64 last_tok,flags; 053 F64 number; 054 U8 name[STR_LEN]; 055 U8 last_ch; 056 }; 057 //This will keep track of the line positin for us 058 U8 LexGetNextChar(CLexer *l) { 059 if(l->flags&LEXF_USE_LAST_CHAR) { 060 l->flags&=~LEXF_USE_LAST_CHAR; 061 return l->last_ch; 062 } 063 if(!l->src[l->pos]) return 0; //No more data 064 U8 tmp=l->src[l->pos++]; 065 if(tmp=='\n') { //Advance the line and reset the poo poo 066 l->line++; 067 l->col=1; 068 } 069 return l->last_ch=tmp; 070 } 071 072 CLexer *LexerNew(U8 *src_code) { 073 CLexer *l=CAlloc(sizeof CLexer); 074 l->src=StrNew(src_code); 075 l->line=1; 076 l->pos=0; 077 l->col=1; 078 return l; 079 } 080 081 extern U0 LexError(CLexer *l,U8*,...); 082 083 I64 Lex(CLexer *l) { 084 U8 c,terminator; 085 I64 advance_by,str_ptr; 086 U8 *src_ptr; 087 enter: 088 c=LexGetNextChar(l); 089 if(!c) return l->last_tok=0; 090 //Skip whitespace 091 if(Bt(char_bmp_white_space,c)) { 092 goto enter; 093 } 094 switch(c) { 095 case '0' ... '9': 096 src_ptr=l->src+l->pos-1; //-1 as we are at 1^23 097 l->number=Str2F64(src_ptr,&src_ptr); 098 advance_by=src_ptr-(l->src+l->pos); 099 while(--advance_by>=0) 100 LexGetNextChar(l); 101 return l->last_tok=TOK_NUMBER; 102 case '\'': 103 case '\"': 104 //EAt our way until we find a string 105 terminator=c; 106 src_ptr=0; 107 while(c=LexGetNextChar(l)) { 108 if(c==terminator) { 109 l->name[src_ptr++]=0; 110 return l->last_tok=TOK_STRING; 111 } 112 l->name[src_ptr++]=c; 113 } 114 //TODO error out 115 LexError(l,"Untemrinated string."); 116 break; 117 default: 118 l->last_tok=TOK_NAME; 119 src_ptr=0; 120 while(Bt(char_bmp_alpha_numeric,c)) { 121 l->name[src_ptr++]=c; 122 c=LexGetNextChar(l); 123 } 124 if(src_ptr==0) //Nopthign found 125 LexError(l,"Expected a name."); 126 l->name[src_ptr++]=0; 127 //We advanged past the next charactor,SO USE LEXF_USE_LAST_CHAR 128 l->flags|=LEXF_USE_LAST_CHAR; 129 return l->last_tok=TOK_NAME; 130 break; 131 // <2>132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 //State 1: 147 case '>': 148 //State 2: 149 c=LexGetNextChar(l); 150 switch(c) { 151 case '=': 152 StrCpy(l->name,">="); 153 return l->last_tok=TOK_NAME; 154 default: //Just return our first one"=" 155 StrCpy(l->name,">"); 156 //PUT THE UNUSED CHATRACOTR BACK ON THE STREAM 157 l->flags|=LEXF_USE_LAST_CHAR; 158 return l->last_tok=TOK_NAME; 159 } 160 break; 161 case '<': 162 //State 2: 163 c=LexGetNextChar(l); 164 switch(c) { 165 case '=': 166 StrCpy(l->name,"<="); 167 return l->last_tok=TOK_NAME; 168 default: //Just return our first one"=" 169 StrCpy(l->name,"<"); 170 //PUT THE UNUSED CHATRACOTR BACK ON THE STREAM 171 l->flags|=LEXF_USE_LAST_CHAR; 172 return l->last_tok=TOK_NAME; 173 } 174 break; 175 case '=': 176 //State 2: 177 c=LexGetNextChar(l); 178 switch(c) { 179 case '=': 180 StrCpy(l->name,"=="); 181 return l->last_tok=TOK_NAME; 182 default: //Just return our first one"=" 183 StrCpy(l->name,"="); 184 //PUT THE UNUSED CHATRACOTR BACK ON THE STREAM 185 l->flags|=LEXF_USE_LAST_CHAR; 186 return l->last_tok=TOK_NAME; 187 } 188 case ',': 189 case ';': 190 case '(': 191 case ')': 192 case '-': 193 case '+': 194 case '*': 195 case '/': 196 case '%': 197 case '~': 198 l->name[0]=c; 199 l->name[1]=0; 200 return l->last_tok=TOK_NAME; 201 case '!': 202 //Copied from above 203 switch(c) { 204 case '=': 205 StrCpy(l->name,"!="); 206 return l->last_tok=TOK_NAME; 207 default: //Just return our first one"=" 208 StrCpy(l->name,"!"); 209 //PUT THE UNUSED CHATRACOTR BACK ON THE STREAM 210 l->flags|=LEXF_USE_LAST_CHAR; 211 return l->last_tok=TOK_NAME; 212 } 213 } 214 } 215 216 // 217 // Filler 218 // 219 U0 LexError(CLexer *l,U8 *msg,...) { 220 msg=StrPrintJoin(NULL,msg,argc,argv); 221 "At %d:%d %s",l->line,l->col,msg; 222 Free(msg); 223 throw('Compiler'); 224 } 225 #if __CMD_LINE__ 226 // 227 // Test 228 // 229 CLexer *l=LexerNew("a == 123 + 456; "); 230 I64 token; 231 while(token=Lex(l)) { 232 switch(token) { 233 case TOK_NAME: 234 "$BLUE$NAME(%s)$FD$\n",l->name; 235 break; 236 case TOK_STRING: 237 "$PURPLE$STRING(%s)$FD$\n",l->name; 238 break; 239 case TOK_NUMBER: 240 "$RED$NUMBER(%n)$FD$\n",l->number; 241 break; 242 } 243 } 244 #endif