001 #define TOK_NUM 0x100
002 #define TOK_NAME 0x101
003 #define TOK_STR 0x102
004 #define TOK_CHR 0x103
005 #define LEXf_USE_LAST_CHAR 1
006 class CLexer {
007   U8 filename[STR_LEN],str[STR_LEN];
008   U8 *body;
009   I32 flags,last_chr;
010   I64 col,ln,pos,tok,chr;
011   F64 num;
012 };
013 CLexer *LexerNew(U8 *buf,U8 *filename=NULL) {
014   CLexer *l=CAlloc(sizeof CLexer);
015   l->ln=1;
016   l->col=1;
017   if(filename)
018     StrCpy(l->filename,filename);
019   l->body=StrNew(buf);
020   return l;
021 }
022 U0 LexerDel(CLexer *l) {
023   Free(l->body);
024   Free(l);
025 }
026 U8 LexGetChar(CLexer *l) {
027   U8 ret;
028   if(Btr(&l->flags,LEXf_USE_LAST_CHAR)) {
029     return l->last_chr;
030   }
031   ret=l->body[l->pos++];
032   if(ret=='\n') l->ln++,l->col=1;
033   return l->last_chr=ret;
034 }
035 I64 LexInt(CLexer *l,I64 radix=10) {
036   I64 r=0,ch,digit;
037   while(ch=LexGetChar(l)) {
038     ch=ToUpper(ch);
039     if('0'<=ch<='9') {
040       digit=ch-'0';
041     } else if('A'<=ch<='Z') {
042       digit=ch-'A'+10;
043     } else
044       break;
045     if(digit>=radix)
046       break;
047     r*=radix;
048     r+=digit;
049   }
050   Bts(&l->flags,LEXf_USE_LAST_CHAR);
051   return r;
052 }
053 U0 LexExcept(CLexer *l,U8 *fmt,...) {
054   fmt=StrPrint(NULL,fmt,argc,argv);
055   PrintErr("(%s:%d,%d)  %s\n",l->filename,l->ln,l->pos,fmt);
056   Free(fmt);
057   throw('Shlong');
058 }
059 I64 IsOperatorChr(I64 ch) {
060   switch(ch) {
061     case '+':
062     case '-':
063     case '*':
064     case '/':
065     case '&':
066     case '=':
067     case '>':
068     case '<':
069     case '~':
070     case '!':
071       return TRUE;
072   }
073   return FALSE;
074 }
075 I64 Lex(CLexer *l) {
076   I64 radix=10;
077   I64 idx;
078   F64 num=0,frac=0,mul=1.;
079 again:;
080   U8 ch=LexGetChar(l);
081   if(Bt(char_bmp_white_space,ch))
082    goto again;
083   if(Bt(char_bmp_dec_numeric,ch)) {
084     Bts(&l->flags,LEXf_USE_LAST_CHAR);
085     //Radix?
086 n:
087     radix=LexInt(l);
088     ch=LexGetChar(l);
089     if(ch=='R'||ch=='r') {
090       num=LexInt(l);
091     } else if(ch=='.') {
092      num=radix;
093 dot:
094       frac=LexInt(l);
095       if(frac)
096         num+=frac*Pow10(-Floor(1+Log10(frac)));
097 exp:
098       ch=LexGetChar(l);
099       if(ch=='e'||ch=='E') {
100         num*=Pow10(LexInt(l));
101       } else
102         Bts(&l->flags,LEXf_USE_LAST_CHAR);
103     } else {
104       num=radix;
105       Bts(&l->flags,LEXf_USE_LAST_CHAR);
106       goto exp;
107     }
108     l->num=num*mul;
109     return l->tok=TOK_NUM;
110   } else if(ch=='\'') {
111     idx=0;
112     do {
113       ch=LexGetChar(l);
114       if(ch=='\'') break;
115       if(idx+1>=STR_LEN)
116         LexExcept(l,"String is too big");
117       l->str[idx++]=ch;
118     } while(TRUE);
119     l->str[idx]=0;
120     return l->tok=TOK_STR;
121   } else if(IsOperatorChr(ch)) {
122     idx=0;
123     l->tok=0;
124     if(ch=='-') {
125       l->tok.u8[idx++]=ch;
126       ch=LexGetChar(l);
127       Bts(&l->flags,LEXf_USE_LAST_CHAR);
128       if(Bt(char_bmp_dec_numeric,ch))
129         goto n;
130     }
131     Bts(&l->flags,LEXf_USE_LAST_CHAR);
132     do {
133       ch=LexGetChar(l);
134       if(!IsOperatorChr(ch)) break;
135       if(idx+1>=8)
136         LexExcept(l,"Invalid operator '%c'",);
137       l->tok.u8[idx++]=ch;
138     } while(TRUE);
139     Bts(&l->flags,LEXf_USE_LAST_CHAR);
140     return l->tok;
141   } else if(ch=='$') {
142     l->chr=LexGetChar(l);
143     return l->tok=TOK_CHR;
144   }
145   switch(ch) {
146     case '[':
147     case ']':
148     case '(':
149     case ')':
150     case ':':
151     case '_':
152     case '|':
153     case ';':
154     case '^':
155     case '#':
156     case 0:
157     return l->tok=ch;
158     case '.':
159     if(Bt(char_bmp_dec_numeric,LexGetChar(l))) {
160       Bts(&l->flags,LEXf_USE_LAST_CHAR);
161       goto dot;
162     }
163     Bts(&l->flags,LEXf_USE_LAST_CHAR);
164     return l->tok='.';
165   }
166   if(Bt(char_bmp_alpha_numeric,ch)) {
167     idx=0;
168     Bts(&l->flags,LEXf_USE_LAST_CHAR);
169     do {
170       ch=LexGetChar(l);
171       if(!Bt(char_bmp_alpha_numeric,ch)) break;
172       if(idx+1>=STR_LEN)
173         LexExcept(l,"Name is too big");
174       l->str[idx++]=ch;
175     } while(TRUE);
176     l->str[idx]=0;
177     Bts(&l->flags,LEXf_USE_LAST_CHAR);
178     return l->tok=TOK_NAME;
179   }
180   LexExcept(l,"Unexpected character '%c'.",ch);
181 }
182 U8 *test="exampleWithNumber: x"
183 "    | y |"
184 "    true & false not & (nil isNil) ifFalse: [self halt]."
185 "    y := self size + super size."
186 "    #($a #a 'a' 1 1.0 .1 .1e10)"
187 "        do: [ :each |"
188 "            Transcript show: (each class name);"
189 "                       show: ' ']."
190 "    ^x < y";
191 CLexer *l=LexerNew(test);
192 while(Lex(l)) {
193   switch(l->tok) {
194     case TOK_CHR:
195       "CHR:%c\n",l->chr;
196       break;
197     case TOK_NAME:
198       "NAME:%s\n",l->str;
199       break;
200     case TOK_STR:
201       "STR:%s\n",l->str;
202       break;
203     case TOK_NUM:
204       "NUM:%n\n",l->num;
205       break;
206     default:
207       "%c\n",l->tok;
208       break;
209   }
210 }