001 //I must deposit a turd to the porcelain throne
002 Cd(__DIR__);
003 #include "Latin.HC";
004 #include "Generator.HC";
005 #define AST_NOUN 1
006 #define AST_CONJUNCTION 2
007 #define AST_VERB 3
008 #define AST_OF 4
009 #define AST_WHERE 5
010 #define AST_ADJECTIVE 6
011 #define AST_INFINITIVE 7
012 //The boy is 5 feet tall
013 #define AST_MEASURE 8
014 #define AST_ADVERB 9
015 //He was [eating pie]
016 #define AST_INDIRECT_CLAUSE 10
017 #define AST_COMPARSION 11
018 #define AST_QUESTION 12
019 extern class AST;
020 extern class CDisplayNode;
021 CTask *mem_task=Fs;
022 CTask *debug_log_task=Fs;
023 class ASTSet {
024   I64 cnt;
025   AST *body[0]; 
026 };
027 class I64Set {
028   I64 cnt;
029   I64 body[0];
030 };
031 Bool I64SetHasItem(I64Set *dst,I64 a) {
032   I64 i;
033   if(!dst) return FALSE;
034   for(i=0;i!=dst->cnt;i++)
035     if(dst->body[i]==a)
036       return TRUE;
037   return FALSE;
038 }
039 I64Set *I64SetAdd(I64Set *dst,I64 a,Bool repeats=FALSE) {
040   I64Set *tmp;
041   I64 i;
042   if(!dst) {
043     dst=CAlloc(sizeof(ASTSet)+8,mem_task);
044     dst->cnt=1;
045     dst->body[0]=a;
046     return dst;
047   } 
048   if(!repeats)
049   for(i=0;i!=dst->cnt;i++) {
050     if(dst->body[i]==a)
051       return dst;
052   }
053   tmp=CAlloc(sizeof(I64Set)+8*(dst->cnt+1),mem_task);
054   tmp->cnt=dst->cnt+1;
055   MemCpy(tmp->body,dst->body,dst->cnt*8);
056   tmp->body[tmp->cnt-1]=a;
057   Free(dst);
058   return tmp;
059 }
060 class AST {
061   F64 prob,final_prob;
062   I64 raw_type,act_as_type;
063   I64 flags; //From Latin.HC
064   ASTSet *nomitive;
065   ASTSet *accusative;
066   ASTSet *dative;
067   ASTSet *ablative;
068   ASTSet *genitive;
069   ASTSet *adjective;
070   ASTSet *preposition;
071   ASTSet *conjunction;
072   ASTSet *measurement;
073   ASTSet *adverb;
074   ASTSet *question;
075   I64Set *args; //Dont free
076   CTrie *word; //May be NULL
077   I64 word_idx;
078   CDisplayNode *disp_node;
079 //Private
080   I64 end;
081 };
082 extern U0 DumpAST(AST*a);
083 extern ASTSet *ASTSetClone(ASTSet *orig);
084 AST *ASTClone(AST *orig) {
085   static I64 cnt=0;
086   AST *ret=CAlloc(sizeof(AST),mem_task);
087   MemCpy(ret,orig,sizeof AST);
088   ret->nomitive=ASTSetClone(ret->nomitive);
089   ret->accusative=ASTSetClone(ret->accusative);
090   ret->dative=ASTSetClone(ret->dative);
091   ret->ablative=ASTSetClone(ret->ablative);
092   ret->genitive=ASTSetClone(ret->genitive);
093   ret->adjective=ASTSetClone(ret->adjective);
094   ret->preposition=ASTSetClone(ret->preposition);
095   ret->conjunction=ASTSetClone(ret->conjunction);
096   ret->measurement=ASTSetClone(ret->measurement);
097   ret->adverb=ASTSetClone(ret->adverb);
098   ret->question=ASTSetClone(ret->question);
099   if(ret->args) ret->args=MAllocIdent(ret->args,mem_task);
100   return ret;
101 }
102 ASTSet *ASTSetClone(ASTSet *orig) {
103   if(!orig) return NULL;
104   ASTSet *ret=CAlloc(sizeof(ASTSet)+8*orig->cnt,mem_task);
105   MemCpy(ret,orig,sizeof ASTSet);
106   I64 cnt=orig->cnt;
107   while(cnt--) {
108     ret->body[cnt]=ASTClone(orig->body[cnt]);
109   }
110   return ret;
111 }
112 Bool I64SetHasItem(I64Set *dst,I64 a) {
113   I64 i;
114   if(!dst) return FALSE;
115   for(i=0;i!=dst->cnt;i++)
116     if(dst->body[i]==a)
117       return TRUE;
118   return FALSE;
119 }
120 I64Set *I64SetAdd(I64Set *dst,I64 a,Bool repeats=FALSE) {
121   I64Set *tmp;
122   I64 i;
123   if(!dst) {
124     dst=CAlloc(sizeof(ASTSet)+8,mem_task);
125     dst->cnt=1;
126     dst->body[0]=a;
127     return dst;
128   } 
129   if(!repeats)
130   for(i=0;i!=dst->cnt;i++) {
131     if(dst->body[i]==a)
132       return dst;
133   }
134   tmp=CAlloc(sizeof(I64Set)+8*(dst->cnt+1),mem_task);
135   tmp->cnt=dst->cnt+1;
136   MemCpy(tmp->body,dst->body,dst->cnt*8);
137   tmp->body[tmp->cnt-1]=a;
138   Free(dst);
139   return tmp;
140 }
141 ASTSet *ASTSetAdd(ASTSet *dst,AST *a) {
142   if(!a) return dst;
143   ASTSet *tmp;
144   if(!dst) {
145     dst=CAlloc(sizeof(ASTSet)+8,mem_task);
146     dst->cnt=1;
147     dst->body[0]=a;
148     return dst;
149   }
150   tmp=CAlloc(sizeof(ASTSet)+8*(dst->cnt+1),mem_task);
151   tmp->cnt=dst->cnt+1;
152   MemCpy(tmp->body,dst->body,dst->cnt*8);
153   tmp->body[tmp->cnt-1]=a;
154   Free(dst);
155   return tmp;
156 }
157 extern U0 ASTSetDel(ASTSet *s);
158 extern class CRule;
159 class CGrammarState {
160   I64 woff,cnt,*en;
161   U8 *rule;
162   U8 **words;
163   Bool is_conj2; //See RuleSet
164 };
165 extern U0 ASTDel(AST *a);
166 class CSubGenPair {
167   CGrammarState *input_data;
168   U8 *fptr;
169 };
170 U0 ASTDel(AST *a) {
171   if(!a) return;
172   ASTSetDel(a->nomitive);
173   ASTSetDel(a->adjective);
174   ASTSetDel(a->conjunction);
175   ASTSetDel(a->accusative);
176   ASTSetDel(a->dative);
177   ASTSetDel(a->ablative);
178   ASTSetDel(a->genitive);
179   ASTSetDel(a->preposition);
180   ASTSetDel(a->measurement);
181   ASTSetDel(a->adverb);
182   Free(a->args); //Present in other things
183   MemSet(a,0xbe,sizeof AST);
184   Free(a);
185 }
186 U0 ASTSetDel(ASTSet *s) {
187   if(!s) return;
188   I64 cnt=s->cnt;
189   while(cnt--)
190     ASTDel(s->body[cnt]);
191   Free(s);
192 }
193 
194 U0 DumpASTSet(ASTSet *s) {
195   I64 i;
196   for(i=0;i!=s->cnt;i++)
197     DumpAST(s->body[i]);
198 }
199 U0 DumpAST(AST *a) {
200   if(!a) return;
201   AST *t;
202   DocPrint(DocPut(mem_task),"$ID,4$\n");
203   if(a->word)
204     DocPrint(DocPut(mem_task),"(WORD%s,%d):",a->word->str,a->word_idx);
205   switch(a->act_as_type) {
206       break;case AST_VERB: DocPrint(DocPut(mem_task),"VERB\n");
207       break;case AST_ADJECTIVE: DocPrint(DocPut(mem_task),"ADJECTIVE\n");
208       break;case AST_NOUN:DocPrint(DocPut(mem_task), "NOUN\n");
209       break;case AST_OF:DocPrint(DocPut(mem_task), "OF\n");
210       break;case AST_CONJUNCTION: DocPrint(DocPut(mem_task),"CONUJUNCTION\n");
211       break;case AST_WHERE: DocPrint(DocPut(mem_task),"WHERE\n");
212       break;case AST_INFINITIVE: DocPrint(DocPut(mem_task),"INFINITIVE\n");
213       break;case AST_ADVERB: DocPrint(DocPut(mem_task),"ADVERB\n");
214   }
215   if(a->nomitive) {
216     DocPrint(DocPut(mem_task),"NOMITIVE:\n");
217     DumpASTSet(a->nomitive);
218   }
219   if(a->accusative) {
220     DocPrint(DocPut(mem_task),"ACCUSATIVE:\n");
221     DumpASTSet(a->accusative);
222   }
223   if(a->dative) {
224     DocPrint(DocPut(mem_task),"DATIVE:\n");
225     DumpASTSet(a->dative);
226   }
227   if(a->ablative) {
228     DocPrint(DocPut(mem_task),"ABLATIVE:\n");
229     DumpASTSet(a->ablative);
230   }
231   if(a->genitive) {
232     DocPrint(DocPut(mem_task),"GENITIVE:\n");
233     DumpASTSet(a->genitive);
234   }
235   if(a->preposition) {
236     DocPrint(DocPut(mem_task),"PREPOSITION\n");
237     DumpASTSet(a->preposition);
238   }
239   if(a->question) {
240     DocPrint(DocPut(mem_task),"QUESTION\n");
241     DumpASTSet(a->question);
242   }
243   if(a->adjective) {
244     DocPrint(DocPut(mem_task),"ADJECTIVE\n");
245     DumpASTSet(a->adjective);
246   }
247   if(a->conjunction) {
248     DocPrint(DocPut(mem_task),"CONJUNCTION\n");
249     DumpASTSet(a->conjunction);
250   }
251   if(a->measurement) {
252     DocPrint(DocPut(mem_task),"MEASURE\n");
253     DumpASTSet(a->measurement);
254   }
255   if(a->adverb) {
256     DocPrint(DocPut(mem_task),"ADVERB:\n");
257     DumpASTSet(a->adverb);
258   }
259   DocPrint(DocPut(mem_task),"$ID,-4$\n");
260 }
261 class CCacheNugget:CQue {
262   U8 *fun;
263   I64 start,end,hits;
264   U8 *rule;
265   ASTSet *results;
266 //Still being generated
267   Bool in_progress;
268 };
269 
270 CQue ast_cache[128][128];
271 I64 cache_cnt=0;
272 U0 InitCache() {
273   cache_cnt=0;
274   I64 i,j;
275   for(i=0;i!=128;i++)
276   for(j=0;j!=128;j++)
277     QueInit(&ast_cache[i][j]);
278 }
279 InitCache;
280 U0 FlushCache() {
281   CCacheNugget *nug,*head;
282   I64 i,j;
283   for(i=0;i!=128;i++)
284     for(j=0;j!=128;j++) {
285       head=&ast_cache[i][j];
286       for(nug=head->next;nug!=head;nug=nug->next) {
287         Free(nug->rule);
288         ASTSetDel(nug->results);
289       }
290       QueDel(head);
291     }
292   InitCache;
293 }
294 U0 Repeater(CGrammarState *st) {
295   AST *a;
296   CCacheNugget *nug=st->rule;
297   if(nug->in_progress)
298     return;
299   if(!nug->results) return;
300   I64 idx=nug->results->cnt;
301   while(--idx>=0) {
302     a=ASTClone(nug->results->body[idx]);
303     if(st->en) *st->en=a->end;
304     GeneratorYield(a);
305   }
306 }
307 CGenerator *GeneratorNewC(U8 *fptr,CGrammarState *st) {
308   CCacheNugget *nug,*least;
309   CGenerator *g;
310   AST *have;
311   Bool ran_already=FALSE;
312   ASTSet *s=NULL;
313 enter:;
314   CQue *head=&ast_cache[st->woff][st->woff+st->cnt];
315   for(nug=head->next;nug!=head;nug=nug->next) {
316     if(nug->fun==fptr)
317       if(!StrCmp(nug->rule,st->rule)) {
318         nug->hits++;
319         st->rule=nug;
320         return GeneratorNew(&Repeater,st);
321       }
322   }
323   if(!ran_already) {
324     g=GeneratorNew(fptr,st);
325     g->maximum;
326     nug=CAlloc(sizeof(CCacheNugget),mem_task);
327     nug->fun=fptr;
328     nug->start=st->woff;
329     nug->end=nug->start+st->cnt;
330     nug->rule=StrNew(st->rule,mem_task);
331     nug->in_progress=TRUE;
332     while(GeneratorGet(g,&have)) {
333       have->end=*st->en;
334       s=ASTSetAdd(s,have);
335     }
336     nug->in_progress=FALSE;
337     nug->results=s;
338     ran_already=TRUE;
339 //I insert "empty" caches to signifiy nothing of intrest in in the cache spot(s==NULL if empty)
340     QueIns(nug,head);
341     goto enter;
342   }
343   return GeneratorNew(NULL,NULL);
344 }
345 extern U0 RunRule(CGrammarState*);
346 AST *ParseSentenceJoin(I64 argc,U8 **argv) {
347   FlushCache;
348   F64 stS=tS;
349   I64 i,en;
350   AST *r,*r2;
351   ASTSet *canidates=NULL;
352   CGrammarState *st=CAlloc(sizeof(CGrammarState),mem_task);
353   CGenerator *gen;
354   st->words=argv;
355   st->cnt=argc;
356   st->en=&en;
357   st->rule="Sent";
358   gen=GeneratorNew(&RunRule,st);
359   while(GeneratorGet(gen,&r)) {
360     if(en==argc)
361       canidates=ASTSetAdd(canidates,r);
362     else
363       ASTDel(r);
364   }
365   r=NULL;
366   if(canidates) {
367     r=canidates->body[0];
368     for(i=0;i<canidates->cnt;i++) {
369       r2=canidates->body[i];
370       if(r->prob<r2->prob)
371         r=r2;
372     }
373   }
374   if(r) r=ASTClone(r);
375   ASTSetDel(canidates);
376   Free(st);
377   return r;
378 }
379 AST *ParseSentence(...) {
380   return ParseSentenceJoin(argc,argv);
381 }
382 I64 ASTGetTrieWords(AST *a,CTrie **words) {
383   I64 run,max=0,i;
384   ASTSet *s;
385   if(words) words[a->word_idx]=a->word;
386   max=a->word_idx;
387   for(run=0;run<=10;run++) {
388     switch(run) {
389       break;case 0: s=a->nomitive;
390       break;case: s=a->accusative;
391       break;case: s=a->dative;
392       break;case: s=a->ablative;
393       break;case: s=a->genitive;
394       break;case: s=a->adjective;
395       break;case: s=a->preposition;
396       break;case: s=a->conjunction;
397       break;case: s=a->measurement;
398       break;case: s=a->adverb;
399       break;case: s=a->question;
400     }
401     if(s)
402       for(i=0;i!=s->cnt;i++)
403         max=MaxI64(max,ASTGetTrieWords(s->body[i],words));
404   }
405   return max;
406 }