001 #define TOK_NUM 0x100 002 #define TOK_NAME 0x101 003 #define TOK_STR 0x102 004 #define TOK_CHR 0x103 005 #define LEXf_USE_LAST_CHAR 1 006 class CLexer { 007 U8 filename[STR_LEN],str[STR_LEN]; 008 U8 *body; 009 I32 flags,last_chr; 010 I64 col,ln,pos,tok,chr; 011 F64 num; 012 }; 013 CLexer *LexerNew(U8 *buf,U8 *filename=NULL) { 014 CLexer *l=CAlloc(sizeof CLexer); 015 l->ln=1; 016 l->col=1; 017 if(filename) 018 StrCpy(l->filename,filename); 019 l->body=StrNew(buf); 020 return l; 021 } 022 U0 LexerDel(CLexer *l) { 023 Free(l->body); 024 Free(l); 025 } 026 U8 LexGetChar(CLexer *l) { 027 U8 ret; 028 if(Btr(&l->flags,LEXf_USE_LAST_CHAR)) { 029 return l->last_chr; 030 } 031 ret=l->body[l->pos++]; 032 if(ret=='\n') l->ln++,l->col=1; 033 return l->last_chr=ret; 034 } 035 I64 LexInt(CLexer *l,I64 radix=10) { 036 I64 r=0,ch,digit; 037 while(ch=LexGetChar(l)) { 038 ch=ToUpper(ch); 039 if('0'<=ch<='9') { 040 digit=ch-'0'; 041 } else if('A'<=ch<='Z') { 042 digit=ch-'A'+10; 043 } else 044 break; 045 if(digit>=radix) 046 break; 047 r*=radix; 048 r+=digit; 049 } 050 Bts(&l->flags,LEXf_USE_LAST_CHAR); 051 return r; 052 } 053 U0 LexExcept(CLexer *l,U8 *fmt,...) { 054 fmt=StrPrint(NULL,fmt,argc,argv); 055 PrintErr("(%s:%d,%d) %s\n",l->filename,l->ln,l->pos,fmt); 056 Free(fmt); 057 throw('Shlong'); 058 } 059 I64 IsOperatorChr(I64 ch) { 060 switch(ch) { 061 case '+': 062 case '-': 063 case '*': 064 case '/': 065 case '&': 066 case '=': 067 case '>': 068 case '<': 069 case '~': 070 case '!': 071 return TRUE; 072 } 073 return FALSE; 074 } 075 I64 Lex(CLexer *l) { 076 I64 radix=10; 077 I64 idx; 078 F64 num=0,frac=0,mul=1.; 079 again:; 080 U8 ch=LexGetChar(l); 081 if(Bt(char_bmp_white_space,ch)) 082 goto again; 083 if(Bt(char_bmp_dec_numeric,ch)) { 084 Bts(&l->flags,LEXf_USE_LAST_CHAR); 085 //Radix? 086 n: 087 radix=LexInt(l); 088 ch=LexGetChar(l); 089 if(ch=='R'||ch=='r') { 090 num=LexInt(l); 091 } else if(ch=='.') { 092 num=radix; 093 dot: 094 frac=LexInt(l); 095 if(frac) 096 num+=frac*Pow10(-Floor(1+Log10(frac))); 097 exp: 098 ch=LexGetChar(l); 099 if(ch=='e'||ch=='E') { 100 num*=Pow10(LexInt(l)); 101 } else 102 Bts(&l->flags,LEXf_USE_LAST_CHAR); 103 } else { 104 num=radix; 105 Bts(&l->flags,LEXf_USE_LAST_CHAR); 106 goto exp; 107 } 108 l->num=num*mul; 109 return l->tok=TOK_NUM; 110 } else if(ch=='\'') { 111 idx=0; 112 do { 113 ch=LexGetChar(l); 114 if(ch=='\'') break; 115 if(idx+1>=STR_LEN) 116 LexExcept(l,"String is too big"); 117 l->str[idx++]=ch; 118 } while(TRUE); 119 l->str[idx]=0; 120 return l->tok=TOK_STR; 121 } else if(IsOperatorChr(ch)) { 122 idx=0; 123 l->tok=0; 124 if(ch=='-') { 125 l->tok.u8[idx++]=ch; 126 ch=LexGetChar(l); 127 Bts(&l->flags,LEXf_USE_LAST_CHAR); 128 if(Bt(char_bmp_dec_numeric,ch)) 129 goto n; 130 } 131 Bts(&l->flags,LEXf_USE_LAST_CHAR); 132 do { 133 ch=LexGetChar(l); 134 if(!IsOperatorChr(ch)) break; 135 if(idx+1>=8) 136 LexExcept(l,"Invalid operator '%c'",); 137 l->tok.u8[idx++]=ch; 138 } while(TRUE); 139 Bts(&l->flags,LEXf_USE_LAST_CHAR); 140 return l->tok; 141 } else if(ch=='$') { 142 l->chr=LexGetChar(l); 143 return l->tok=TOK_CHR; 144 } 145 switch(ch) { 146 case '[': 147 case ']': 148 case '(': 149 case ')': 150 case ':': 151 case '_': 152 case '|': 153 case ';': 154 case '^': 155 case '#': 156 case 0: 157 return l->tok=ch; 158 case '.': 159 if(Bt(char_bmp_dec_numeric,LexGetChar(l))) { 160 Bts(&l->flags,LEXf_USE_LAST_CHAR); 161 goto dot; 162 } 163 Bts(&l->flags,LEXf_USE_LAST_CHAR); 164 return l->tok='.'; 165 } 166 if(Bt(char_bmp_alpha_numeric,ch)) { 167 idx=0; 168 Bts(&l->flags,LEXf_USE_LAST_CHAR); 169 do { 170 ch=LexGetChar(l); 171 if(!Bt(char_bmp_alpha_numeric,ch)) break; 172 if(idx+1>=STR_LEN) 173 LexExcept(l,"Name is too big"); 174 l->str[idx++]=ch; 175 } while(TRUE); 176 l->str[idx]=0; 177 Bts(&l->flags,LEXf_USE_LAST_CHAR); 178 return l->tok=TOK_NAME; 179 } 180 LexExcept(l,"Unexpected character '%c'.",ch); 181 } 182 U8 *test="exampleWithNumber: x" 183 " | y |" 184 " true & false not & (nil isNil) ifFalse: [self halt]." 185 " y := self size + super size." 186 " #($a #a 'a' 1 1.0 .1 .1e10)" 187 " do: [ :each |" 188 " Transcript show: (each class name);" 189 " show: ' ']." 190 " ^x < y"; 191 CLexer *l=LexerNew(test); 192 while(Lex(l)) { 193 switch(l->tok) { 194 case TOK_CHR: 195 "CHR:%c\n",l->chr; 196 break; 197 case TOK_NAME: 198 "NAME:%s\n",l->str; 199 break; 200 case TOK_STR: 201 "STR:%s\n",l->str; 202 break; 203 case TOK_NUM: 204 "NUM:%n\n",l->num; 205 break; 206 default: 207 "%c\n",l->tok; 208 break; 209 } 210 }