001 //I must deposit a turd to the porcelain throne 002 Cd(__DIR__); 003 #include "Latin.HC"; 004 #include "Generator.HC"; 005 #define AST_NOUN 1 006 #define AST_CONJUNCTION 2 007 #define AST_VERB 3 008 #define AST_OF 4 009 #define AST_WHERE 5 010 #define AST_ADJECTIVE 6 011 #define AST_INFINITIVE 7 012 //The boy is 5 feet tall 013 #define AST_MEASURE 8 014 #define AST_ADVERB 9 015 //He was [eating pie] 016 #define AST_INDIRECT_CLAUSE 10 017 #define AST_COMPARSION 11 018 #define AST_QUESTION 12 019 extern class AST; 020 extern class CDisplayNode; 021 CTask *mem_task=Fs; 022 CTask *debug_log_task=Fs; 023 class ASTSet { 024 I64 cnt; 025 AST *body[0]; 026 }; 027 class I64Set { 028 I64 cnt; 029 I64 body[0]; 030 }; 031 Bool I64SetHasItem(I64Set *dst,I64 a) { 032 I64 i; 033 if(!dst) return FALSE; 034 for(i=0;i!=dst->cnt;i++) 035 if(dst->body[i]==a) 036 return TRUE; 037 return FALSE; 038 } 039 I64Set *I64SetAdd(I64Set *dst,I64 a,Bool repeats=FALSE) { 040 I64Set *tmp; 041 I64 i; 042 if(!dst) { 043 dst=CAlloc(sizeof(ASTSet)+8,mem_task); 044 dst->cnt=1; 045 dst->body[0]=a; 046 return dst; 047 } 048 if(!repeats) 049 for(i=0;i!=dst->cnt;i++) { 050 if(dst->body[i]==a) 051 return dst; 052 } 053 tmp=CAlloc(sizeof(I64Set)+8*(dst->cnt+1),mem_task); 054 tmp->cnt=dst->cnt+1; 055 MemCpy(tmp->body,dst->body,dst->cnt*8); 056 tmp->body[tmp->cnt-1]=a; 057 Free(dst); 058 return tmp; 059 } 060 class AST { 061 F64 prob,final_prob; 062 I64 raw_type,act_as_type; 063 I64 flags; //From Latin.HC 064 ASTSet *nomitive; 065 ASTSet *accusative; 066 ASTSet *dative; 067 ASTSet *ablative; 068 ASTSet *genitive; 069 ASTSet *adjective; 070 ASTSet *preposition; 071 ASTSet *conjunction; 072 ASTSet *measurement; 073 ASTSet *adverb; 074 ASTSet *question; 075 I64Set *args; //Dont free 076 CTrie *word; //May be NULL 077 I64 word_idx; 078 CDisplayNode *disp_node; 079 //Private 080 I64 end; 081 }; 082 extern U0 DumpAST(AST*a); 083 extern ASTSet *ASTSetClone(ASTSet *orig); 084 AST *ASTClone(AST *orig) { 085 static I64 cnt=0; 086 AST *ret=CAlloc(sizeof(AST),mem_task); 087 MemCpy(ret,orig,sizeof AST); 088 ret->nomitive=ASTSetClone(ret->nomitive); 089 ret->accusative=ASTSetClone(ret->accusative); 090 ret->dative=ASTSetClone(ret->dative); 091 ret->ablative=ASTSetClone(ret->ablative); 092 ret->genitive=ASTSetClone(ret->genitive); 093 ret->adjective=ASTSetClone(ret->adjective); 094 ret->preposition=ASTSetClone(ret->preposition); 095 ret->conjunction=ASTSetClone(ret->conjunction); 096 ret->measurement=ASTSetClone(ret->measurement); 097 ret->adverb=ASTSetClone(ret->adverb); 098 ret->question=ASTSetClone(ret->question); 099 if(ret->args) ret->args=MAllocIdent(ret->args,mem_task); 100 return ret; 101 } 102 ASTSet *ASTSetClone(ASTSet *orig) { 103 if(!orig) return NULL; 104 ASTSet *ret=CAlloc(sizeof(ASTSet)+8*orig->cnt,mem_task); 105 MemCpy(ret,orig,sizeof ASTSet); 106 I64 cnt=orig->cnt; 107 while(cnt--) { 108 ret->body[cnt]=ASTClone(orig->body[cnt]); 109 } 110 return ret; 111 } 112 Bool I64SetHasItem(I64Set *dst,I64 a) { 113 I64 i; 114 if(!dst) return FALSE; 115 for(i=0;i!=dst->cnt;i++) 116 if(dst->body[i]==a) 117 return TRUE; 118 return FALSE; 119 } 120 I64Set *I64SetAdd(I64Set *dst,I64 a,Bool repeats=FALSE) { 121 I64Set *tmp; 122 I64 i; 123 if(!dst) { 124 dst=CAlloc(sizeof(ASTSet)+8,mem_task); 125 dst->cnt=1; 126 dst->body[0]=a; 127 return dst; 128 } 129 if(!repeats) 130 for(i=0;i!=dst->cnt;i++) { 131 if(dst->body[i]==a) 132 return dst; 133 } 134 tmp=CAlloc(sizeof(I64Set)+8*(dst->cnt+1),mem_task); 135 tmp->cnt=dst->cnt+1; 136 MemCpy(tmp->body,dst->body,dst->cnt*8); 137 tmp->body[tmp->cnt-1]=a; 138 Free(dst); 139 return tmp; 140 } 141 ASTSet *ASTSetAdd(ASTSet *dst,AST *a) { 142 if(!a) return dst; 143 ASTSet *tmp; 144 if(!dst) { 145 dst=CAlloc(sizeof(ASTSet)+8,mem_task); 146 dst->cnt=1; 147 dst->body[0]=a; 148 return dst; 149 } 150 tmp=CAlloc(sizeof(ASTSet)+8*(dst->cnt+1),mem_task); 151 tmp->cnt=dst->cnt+1; 152 MemCpy(tmp->body,dst->body,dst->cnt*8); 153 tmp->body[tmp->cnt-1]=a; 154 Free(dst); 155 return tmp; 156 } 157 extern U0 ASTSetDel(ASTSet *s); 158 extern class CRule; 159 class CGrammarState { 160 I64 woff,cnt,*en; 161 U8 *rule; 162 U8 **words; 163 Bool is_conj2; //See RuleSet 164 }; 165 extern U0 ASTDel(AST *a); 166 class CSubGenPair { 167 CGrammarState *input_data; 168 U8 *fptr; 169 }; 170 U0 ASTDel(AST *a) { 171 if(!a) return; 172 ASTSetDel(a->nomitive); 173 ASTSetDel(a->adjective); 174 ASTSetDel(a->conjunction); 175 ASTSetDel(a->accusative); 176 ASTSetDel(a->dative); 177 ASTSetDel(a->ablative); 178 ASTSetDel(a->genitive); 179 ASTSetDel(a->preposition); 180 ASTSetDel(a->measurement); 181 ASTSetDel(a->adverb); 182 Free(a->args); //Present in other things 183 MemSet(a,0xbe,sizeof AST); 184 Free(a); 185 } 186 U0 ASTSetDel(ASTSet *s) { 187 if(!s) return; 188 I64 cnt=s->cnt; 189 while(cnt--) 190 ASTDel(s->body[cnt]); 191 Free(s); 192 } 193 194 U0 DumpASTSet(ASTSet *s) { 195 I64 i; 196 for(i=0;i!=s->cnt;i++) 197 DumpAST(s->body[i]); 198 } 199 U0 DumpAST(AST *a) { 200 if(!a) return; 201 AST *t; 202 DocPrint(DocPut(mem_task),"$ID,4$\n"); 203 if(a->word) 204 DocPrint(DocPut(mem_task),"(WORD%s,%d):",a->word->str,a->word_idx); 205 switch(a->act_as_type) { 206 break;case AST_VERB: DocPrint(DocPut(mem_task),"VERB\n"); 207 break;case AST_ADJECTIVE: DocPrint(DocPut(mem_task),"ADJECTIVE\n"); 208 break;case AST_NOUN:DocPrint(DocPut(mem_task), "NOUN\n"); 209 break;case AST_OF:DocPrint(DocPut(mem_task), "OF\n"); 210 break;case AST_CONJUNCTION: DocPrint(DocPut(mem_task),"CONUJUNCTION\n"); 211 break;case AST_WHERE: DocPrint(DocPut(mem_task),"WHERE\n"); 212 break;case AST_INFINITIVE: DocPrint(DocPut(mem_task),"INFINITIVE\n"); 213 break;case AST_ADVERB: DocPrint(DocPut(mem_task),"ADVERB\n"); 214 } 215 if(a->nomitive) { 216 DocPrint(DocPut(mem_task),"NOMITIVE:\n"); 217 DumpASTSet(a->nomitive); 218 } 219 if(a->accusative) { 220 DocPrint(DocPut(mem_task),"ACCUSATIVE:\n"); 221 DumpASTSet(a->accusative); 222 } 223 if(a->dative) { 224 DocPrint(DocPut(mem_task),"DATIVE:\n"); 225 DumpASTSet(a->dative); 226 } 227 if(a->ablative) { 228 DocPrint(DocPut(mem_task),"ABLATIVE:\n"); 229 DumpASTSet(a->ablative); 230 } 231 if(a->genitive) { 232 DocPrint(DocPut(mem_task),"GENITIVE:\n"); 233 DumpASTSet(a->genitive); 234 } 235 if(a->preposition) { 236 DocPrint(DocPut(mem_task),"PREPOSITION\n"); 237 DumpASTSet(a->preposition); 238 } 239 if(a->question) { 240 DocPrint(DocPut(mem_task),"QUESTION\n"); 241 DumpASTSet(a->question); 242 } 243 if(a->adjective) { 244 DocPrint(DocPut(mem_task),"ADJECTIVE\n"); 245 DumpASTSet(a->adjective); 246 } 247 if(a->conjunction) { 248 DocPrint(DocPut(mem_task),"CONJUNCTION\n"); 249 DumpASTSet(a->conjunction); 250 } 251 if(a->measurement) { 252 DocPrint(DocPut(mem_task),"MEASURE\n"); 253 DumpASTSet(a->measurement); 254 } 255 if(a->adverb) { 256 DocPrint(DocPut(mem_task),"ADVERB:\n"); 257 DumpASTSet(a->adverb); 258 } 259 DocPrint(DocPut(mem_task),"$ID,-4$\n"); 260 } 261 class CCacheNugget:CQue { 262 U8 *fun; 263 I64 start,end,hits; 264 U8 *rule; 265 ASTSet *results; 266 //Still being generated 267 Bool in_progress; 268 }; 269 270 CQue ast_cache[128][128]; 271 I64 cache_cnt=0; 272 U0 InitCache() { 273 cache_cnt=0; 274 I64 i,j; 275 for(i=0;i!=128;i++) 276 for(j=0;j!=128;j++) 277 QueInit(&ast_cache[i][j]); 278 } 279 InitCache; 280 U0 FlushCache() { 281 CCacheNugget *nug,*head; 282 I64 i,j; 283 for(i=0;i!=128;i++) 284 for(j=0;j!=128;j++) { 285 head=&ast_cache[i][j]; 286 for(nug=head->next;nug!=head;nug=nug->next) { 287 Free(nug->rule); 288 ASTSetDel(nug->results); 289 } 290 QueDel(head); 291 } 292 InitCache; 293 } 294 U0 Repeater(CGrammarState *st) { 295 AST *a; 296 CCacheNugget *nug=st->rule; 297 if(nug->in_progress) 298 return; 299 if(!nug->results) return; 300 I64 idx=nug->results->cnt; 301 while(--idx>=0) { 302 a=ASTClone(nug->results->body[idx]); 303 if(st->en) *st->en=a->end; 304 GeneratorYield(a); 305 } 306 } 307 CGenerator *GeneratorNewC(U8 *fptr,CGrammarState *st) { 308 CCacheNugget *nug,*least; 309 CGenerator *g; 310 AST *have; 311 Bool ran_already=FALSE; 312 ASTSet *s=NULL; 313 enter:; 314 CQue *head=&ast_cache[st->woff][st->woff+st->cnt]; 315 for(nug=head->next;nug!=head;nug=nug->next) { 316 if(nug->fun==fptr) 317 if(!StrCmp(nug->rule,st->rule)) { 318 nug->hits++; 319 st->rule=nug; 320 return GeneratorNew(&Repeater,st); 321 } 322 } 323 if(!ran_already) { 324 g=GeneratorNew(fptr,st); 325 g->maximum; 326 nug=CAlloc(sizeof(CCacheNugget),mem_task); 327 nug->fun=fptr; 328 nug->start=st->woff; 329 nug->end=nug->start+st->cnt; 330 nug->rule=StrNew(st->rule,mem_task); 331 nug->in_progress=TRUE; 332 while(GeneratorGet(g,&have)) { 333 have->end=*st->en; 334 s=ASTSetAdd(s,have); 335 } 336 nug->in_progress=FALSE; 337 nug->results=s; 338 ran_already=TRUE; 339 //I insert "empty" caches to signifiy nothing of intrest in in the cache spot(s==NULL if empty) 340 QueIns(nug,head); 341 goto enter; 342 } 343 return GeneratorNew(NULL,NULL); 344 } 345 extern U0 RunRule(CGrammarState*); 346 AST *ParseSentenceJoin(I64 argc,U8 **argv) { 347 FlushCache; 348 F64 stS=tS; 349 I64 i,en; 350 AST *r,*r2; 351 ASTSet *canidates=NULL; 352 CGrammarState *st=CAlloc(sizeof(CGrammarState),mem_task); 353 CGenerator *gen; 354 st->words=argv; 355 st->cnt=argc; 356 st->en=&en; 357 st->rule="Sent"; 358 gen=GeneratorNew(&RunRule,st); 359 while(GeneratorGet(gen,&r)) { 360 if(en==argc) 361 canidates=ASTSetAdd(canidates,r); 362 else 363 ASTDel(r); 364 } 365 r=NULL; 366 if(canidates) { 367 r=canidates->body[0]; 368 for(i=0;i<canidates->cnt;i++) { 369 r2=canidates->body[i]; 370 if(r->prob<r2->prob) 371 r=r2; 372 } 373 } 374 if(r) r=ASTClone(r); 375 ASTSetDel(canidates); 376 Free(st); 377 return r; 378 } 379 AST *ParseSentence(...) { 380 return ParseSentenceJoin(argc,argv); 381 } 382 I64 ASTGetTrieWords(AST *a,CTrie **words) { 383 I64 run,max=0,i; 384 ASTSet *s; 385 if(words) words[a->word_idx]=a->word; 386 max=a->word_idx; 387 for(run=0;run<=10;run++) { 388 switch(run) { 389 break;case 0: s=a->nomitive; 390 break;case: s=a->accusative; 391 break;case: s=a->dative; 392 break;case: s=a->ablative; 393 break;case: s=a->genitive; 394 break;case: s=a->adjective; 395 break;case: s=a->preposition; 396 break;case: s=a->conjunction; 397 break;case: s=a->measurement; 398 break;case: s=a->adverb; 399 break;case: s=a->question; 400 } 401 if(s) 402 for(i=0;i!=s->cnt;i++) 403 max=MaxI64(max,ASTGetTrieWords(s->body[i],words)); 404 } 405 return max; 406 }