001 Bool IsStopWord(U8 *w) { 002 if(!StrICmp("A",w)) return TRUE; 003 if(!StrICmp("MY",w)) return TRUE; 004 if(!StrICmp("MINE",w)) return TRUE; 005 if(!StrICmp("no",w)) return TRUE; 006 if(!StrICmp("never",w)) return TRUE; 007 if(!StrICmp("though",w)) return TRUE; 008 if(!StrICmp("because",w)) return TRUE; 009 if(!StrICmp("do",w)) return TRUE; 010 if(!StrICmp("aN",w)) return TRUE; 011 if(!StrICmp("be",w)) return TRUE; 012 if(!StrICmp("THE",w)) return TRUE; 013 if(!StrICmp("AND",w)) return TRUE; 014 if(!StrICmp("but",w)) return TRUE; 015 if(!StrICmp("or",w)) return TRUE; 016 if(!StrICmp("in",w)) return TRUE; 017 if(!StrICmp("on",w)) return TRUE; 018 if(!StrICmp("at",w)) return TRUE; 019 if(!StrICmp("for",w)) return TRUE; 020 if(!StrICmp("as",w)) return TRUE; 021 if(!StrICmp("of",w)) return TRUE; 022 if(!StrICmp("i",w)) return TRUE; 023 if(!StrICmp("that",w)) return TRUE; 024 if(!StrICmp("this",w)) return TRUE; 025 if(!StrICmp("these",w)) return TRUE; 026 if(!StrICmp("their",w)) return TRUE; 027 if(!StrICmp("which",w)) return TRUE; 028 if(!StrICmp("whoose",w)) return TRUE; 029 if(!StrICmp("not",w)) return TRUE; 030 if(!StrICmp("is",w)) return TRUE; 031 if(!StrICmp("are",w)) return TRUE; 032 if(!StrICmp("here",w)) return TRUE; 033 if(!StrICmp("its",w)) return TRUE; 034 if(!StrICmp("it",w)) return TRUE; 035 if(!StrICmp("dont",w)) return TRUE; 036 if(!StrICmp("there",w)) return TRUE; 037 if(!StrICmp("who",w)) return TRUE; 038 if(!StrICmp("you",w)) return TRUE; 039 if(!StrICmp("if",w)) return TRUE; 040 if(!StrICmp("to",w)) return TRUE; 041 if(!StrICmp("so",w)) return TRUE; 042 if(!StrICmp("what",w)) return TRUE; 043 return FALSE; 044 } 045 046 U8 *GetWord(U8 **_ptr) { 047 U8 *ptr=*_ptr,*endd; 048 U8 buf[STR_LEN]; 049 buf[0]=0; 050 again:; 051 while(*ptr&&!(Bt(char_bmp_alpha,*ptr)||StrOcc("'.<>$",*ptr))) 052 ++ptr; 053 if(*ptr=='$') { 054 if(endd=StrFirstOcc(ptr+1,"$")) { 055 ptr=endd+1; 056 goto again; 057 } 058 } 059 if(StrOcc("<>.'$",*ptr)) 060 buf[0](U16)=*ptr++; 061 else while(Bt(char_bmp_alpha,*ptr)||StrOcc("'",*ptr)) 062 CatPrint(buf,"%c",ToUpper(*ptr++)); 063 *_ptr=ptr; 064 return StrNew(buf); 065 } 066 #include "ConceptRank"; 067 #include "KneserNey"; 068 #define MOTIF_LEN 100 069 #define TOKENS_AT_ONCE 2 070 U8 *LastNGrams(U8 *text,I64 many=TOKENS_AT_ONCE) { 071 U8 *ptr=text,*tmp; 072 I64 occ=StrOcc(text,' '); 073 while(occ-many>=0) { 074 ptr=StrFirstOcc(ptr," "); 075 if(!ptr) 076 return NULL; 077 --occ; 078 ptr++; 079 } 080 return StrNew(ptr); 081 } 082 U8 *FirstNGrams(U8 *text,I64 many=TOKENS_AT_ONCE) { 083 U8 *ptr=StrNew(text),*tmp=ptr; 084 while(--many>=0) { 085 ptr=StrFirstOcc(ptr," "); 086 if(!ptr) { 087 if(many==0) { 088 ptr=tmp+StrLen(tmp); 089 break; 090 } 091 return NULL; 092 } 093 ptr++; 094 } 095 *ptr=0; 096 return tmp; 097 } 098 099 F64 WordMotifScore0(U8 *word,U8 **motifs,F64 *attention) { 100 F64 ret=0.; 101 I64 idx; 102 for(idx=0;idx!=MOTIF_LEN;++idx) { 103 if(!motifs[idx]) 104 break; 105 if(!StrICmp(word,motifs[idx])) 106 ret+=.1*attention[idx]; 107 ret+=KneserNey0(Fs->hash_table,motifs[idx],word)*attention[idx]; 108 } 109 return ret; 110 } 111 F64 WordPairMotifScore(U8 *pairs,U8 **motifs,F64 *attention) { 112 pairs=StrNew(pairs); 113 U8 buf[STR_LEN]; 114 F64 ret=0.; 115 while(StrFirstRem(pairs," ",buf)) { 116 ret+=WordMotifScore0(buf,motifs,attention); 117 if(!*pairs) 118 break; 119 } 120 Free(pairs); 121 return ret; 122 } 123 U8 *PredictWord(U8 *last_word,U8 **motifs,F64 *attention) { 124 U8 *lw2=last_word; 125 if(StrOcc(lw2,' ')) 126 lw2=StrLastOcc(lw2," ")+1; 127 I64 idx,idx2; 128 CHashGeneric *gen; 129 for(idx=1;gen=HashSingleTableFind(last_word,Fs->hash_table,HTT_FRAME_PTR,idx);++idx) 130 ; 131 F64 *probs=CAlloc(8*idx); 132 F64 sum=0,avg; 133 F64 sum2=0,coin,ahead_strength,best_strength=0; 134 F64 strength2,removed; 135 U8 *best_word="."; 136 for(idx=1;gen=HashSingleTableFind(last_word,Fs->hash_table,HTT_FRAME_PTR,idx);++idx) { 137 probs[idx-1]=Exp(1+WordMotifScore0(gen->user_data0,motifs,attention)+KneserNey(Fs->hash_table,last_word,gen->user_data0,.5)); 138 sum+=probs[idx-1]; 139 } 140 if(!sum) { 141 for(idx2=1;idx2<idx;++idx2) { 142 probs[idx2-1]=.001; 143 sum+=.001; 144 } 145 } 146 for(idx2=1;idx2<idx;++idx2) { 147 probs[idx2-1]/=sum; 148 } 149 sum2=0.; 150 again:; 151 coin=Rand; 152 sum=0.; 153 for(idx2=1;idx2<idx;++idx2) { 154 sum+=probs[idx2-1]; 155 if(sum>=coin) { 156 gen=HashSingleTableFind(last_word,Fs->hash_table,HTT_FRAME_PTR,idx2); 157 next_word:; 158 best_word=gen->user_data0; 159 goto en; 160 } 161 } 162 en:; 163 Free(probs); 164 return best_word; 165 } 166 167 U8 *PredictWordRev(U8 *last_word,U8 **motifs,F64 *attention) { 168 I64 idx,idx2; 169 CHashGeneric *gen; 170 U8 buf[STR_LEN]; 171 StrPrint(buf,"!%s",last_word); 172 for(idx=1;gen=HashSingleTableFind(buf,Fs->hash_table,HTT_FRAME_PTR,idx);++idx) 173 ; 174 F64 *probs=CAlloc(8*idx); 175 F64 sum=0,avg; 176 F64 sum2=0,coin,ahead_strength,best_strength=0; 177 F64 strength2,removed; 178 U8 *best_word="."; 179 for(idx=1;gen=HashSingleTableFind(buf,Fs->hash_table,HTT_FRAME_PTR,idx);++idx) { 180 probs[idx-1]=Exp(1+1.*(WordPairMotifScore(gen->user_data0,motifs,attention)+KneserNey0(Fs->hash_table,gen->user_data0,last_word))); 181 sum+=probs[idx-1]; 182 } 183 if(!sum) { 184 for(idx2=1;idx2<idx;++idx2) { 185 probs[idx2-1]=.001; 186 sum+=.001; 187 } 188 } 189 for(idx2=1;idx2<idx;++idx2) { 190 probs[idx2-1]/=sum; 191 } 192 sum2=0.; 193 again:; 194 coin=Rand; 195 sum=0.; 196 for(idx2=1;idx2<idx;++idx2) { 197 sum+=probs[idx2-1]; 198 if(sum>=coin) { 199 gen=HashSingleTableFind(buf,Fs->hash_table,HTT_FRAME_PTR,idx2); 200 next_word:; 201 best_word=gen->user_data0; 202 goto en; 203 } 204 } 205 en:; 206 Free(probs); 207 return best_word; 208 } 209 210 211 U0 AddStringToWordsBuf(U8 *words,U8 *str) { 212 StrFirstRem(words," "); 213 StrUtil(words,SUF_REM_TRAILING); 214 CatPrint(words," %s",str); 215 StrUtil(words,SUF_REM_TRAILING|SUF_REM_LEADING); 216 } 217 218 219 F64 PredictWords(U8 **to,U8 *words,U8 **motifs,F64 *attention,I64 look_ahead,I64 attempts=3) { 220 F64 strength,best_strength=0; 221 U8 **best_words=CAlloc(8*look_ahead); 222 F64 score,best_score=-1.; 223 I64 attempt,idx; 224 U8 cur[STR_LEN*2]; 225 226 for(attempt=0;attempt!=attempts;++attempt) { 227 for(idx=0;idx!=look_ahead;++idx) { 228 if(!idx) { 229 StrCpy(cur,words); 230 to[idx]=PredictWord(cur,motifs,attention); 231 } else { 232 to[idx]=PredictWord(cur,motifs,attention); 233 } 234 AddStringToWordsBuf(cur,to[idx]); 235 } 236 score=0; 237 for(idx=0;idx!=look_ahead;++idx) { 238 score+=WordMotifScore0(to[idx],motifs,attention); 239 } 240 if(score>best_score) { 241 MemCpy(best_words,to,8*look_ahead); 242 best_score=score; 243 } 244 } 245 MemCpy(to,best_words,8*look_ahead); 246 Free(best_words); 247 return best_strength; 248 } 249 U8 *PredictWordsRev(U8 *cur_word,U8 **motifs,F64 *attention,I64 look_ahead,I64 attempts=3) { 250 CHashGeneric *gen; 251 F64 strength,best_strength=0; 252 U8 **best_words=CAlloc(8*look_ahead); 253 U8 **to=CAlloc(8*look_ahead); 254 F64 score,best_score=-1.; 255 I64 attempt,idx,who,idx2,best_cnt=0,len; 256 U8 last[STR_LEN],*words,*ret,*tmp; 257 for(attempt=0;attempt!=attempts;++attempt) { 258 for(idx2=idx=0;idx<look_ahead;) { 259 if(!idx) { 260 StrCpy(last,cur_word); 261 words=PredictWordRev(cur_word,motifs,attention); 262 } else { 263 words=PredictWordRev(last,motifs,attention); 264 } 265 if(!words) 266 break; 267 idx+=StrOcc(words,' ')+1; 268 to[idx2++]=words; 269 270 tmp=FirstNGrams(last,TOKENS_AT_ONCE-1); 271 StrUtil(tmp,SUF_REM_LEADING|SUF_REM_TRAILING); 272 StrUtil(words,SUF_REM_LEADING|SUF_REM_TRAILING); 273 StrPrint(last,"%s %s",words,tmp); 274 Free(tmp); 275 } 276 score=0; 277 for(idx=0;idx!=idx2;++idx) { 278 score+=WordPairMotifScore(to[idx],motifs,attention); 279 } 280 if(score>best_score||idx2>best_cnt) { 281 best_cnt=idx2; 282 MemCpy(best_words,to,8*look_ahead); 283 best_score=score; 284 } 285 } 286 287 len=0; 288 MemCpy(to,best_words,8*look_ahead); 289 for(idx=best_cnt-1;idx>=0;--idx) { 290 len+=StrLen(to[idx])+1; 291 } 292 ret=CAlloc(len+1); 293 for(idx=best_cnt-1;idx>=0;--idx) { 294 CatPrint(ret,"%s ",to[idx]); 295 } 296 Free(best_words),Free(to); 297 return ret; 298 } 299 300 301 U0 MarkovSaveTable(U8 *to="Markov.DD",CHashTable *table) { 302 CDoc *doc=DocNew(to); 303 I64 bucket=table->mask+1; 304 CHashGeneric *gen; 305 306 while(--bucket>=0) { 307 for(gen=table->body[bucket];gen;gen=gen->next) { 308 //See ConceptRank.HC 309 if(gen->str[0]=='!') 310 ; //Dont save reverse n-grams 311 else if(StrIMatch("Relat.",gen->str)) { 312 DocPrint(doc,"\"%q\",\"%q\",%d;\n",gen->str,gen->user_data0,gen->user_data1); 313 } else if(StrIMatch("Freq.",gen->str)||StrIMatch("Relats.",gen->str)) { 314 DocPrint(doc,"\"%q\",%d;\n",gen->str,gen->user_data0); 315 } else 316 DocPrint(doc,"\"%q\",\"%q\",%d,'%c';\n",gen->str,gen->user_data0,gen->user_data1,gen->user_data2); 317 318 } 319 } 320 DocWrite(doc); 321 DocDel(doc); 322 } 323 CHashTable *MarkovLoadTable(U8 *from="Markov.DD") { 324 CHashTable *tab=HashTableNew(0x10000); 325 CCmpCtrl *cc=CmpCtrlNew(FileRead(from),,from); 326 CHashGeneric *gen,*gen2; 327 I64 which,val; 328 U8 *tmp; 329 total_relats=0; 330 while(Lex(cc)) { 331 if(cc->token!=TK_STR) 332 LexExcept(cc,"Expected string at: "); 333 gen=CAlloc(sizeof CHashGeneric ); 334 gen->str=StrNew(cc->cur_str); 335 gen->type=HTT_FRAME_PTR; 336 val=which=0; 337 while(Lex(cc)!=';') { 338 if(cc->token==',') { 339 } else if(cc->token==TK_I64) { 340 val=cc->cur_i64; 341 add_value: 342 switch(which++) { 343 case 0: 344 gen->user_data0=val; 345 break; 346 case 1: 347 gen->user_data1=val; 348 break; 349 case 2: 350 gen->user_data2=val; 351 break; 352 default: 353 LexExcept(cc,"Too many valus at: "); 354 break; 355 } 356 } else if(cc->token==TK_STR) { 357 val=StrNew(cc->cur_str); 358 goto add_value; 359 } else if(cc->token==TK_CHAR_CONST) { 360 val=cc->cur_i64; 361 goto add_value; 362 } else 363 LexExcept(cc,"Expected at ';' at: "); 364 } 365 if(StrIMatch("Relats.",gen->str)) 366 total_relats+=gen->user_data0; //Relationship cnt 367 else if(StrIMatch("Freq.",gen->str)) 368 total+=gen->user_data0; 369 else if(StrIMatch("Relat.",gen->str)) 370 ;//Do nothing 371 else { 372 //make a reverse connection 373 gen2=CAlloc(sizeof CHashGeneric); 374 gen2->type=HTT_FRAME_PTR; 375 tmp=LastNGrams(gen->str,TOKENS_AT_ONCE-1); 376 gen2->str=MStrPrint("!%s %s",tmp,gen->user_data0); 377 Free(tmp); 378 gen2->user_data0=FirstNGrams(gen->str,1); 379 HashAdd(gen2,tab); 380 } 381 HashAdd(gen,tab); 382 } 383 CmpCtrlDel(cc); 384 return tab; 385 } 386 U8 *GetWords(U8 **ptr,I64 cnt=TOKENS_AT_ONCE) { 387 U8 buf[STR_LEN*2]; 388 U8 *word; 389 U8 *tmp=*ptr; 390 Bool first=TRUE; 391 buf[0]=0; 392 while(--cnt>=0) { 393 word=GetWord(ptr); 394 if(first) { 395 first=FALSE; 396 tmp=*ptr; 397 } 398 CatPrint(buf,"%s ",word); 399 Free(word); 400 } 401 StrUtil(buf,SUF_REM_TRAILING); 402 if(ptr) *ptr=tmp; 403 return Upperify(StrNew(buf)); 404 } 405 U8 *PickRandomWordPair(U8 *start_word,I64 who=-1) { 406 CHashTable *t=Fs->hash_table; 407 I64 mask=t->mask; 408 CHashGeneric *gen; 409 I64 l,cnt=0; 410 while(mask>=0) { 411 for(gen=t->body[mask];gen;gen=gen->next) { 412 if(!StrMatch("Relat.",gen->str)&&!StrMatch("Freq.",gen->str) 413 &&!StrMatch("Relats.",gen->str)&&gen->str[0]!='!') 414 ; //All is good 415 else 416 goto skip; 417 if(!start_word) 418 goto pass; 419 l=StrLen(start_word); 420 if(!StrNICmp(start_word,gen->str,l)) { 421 if(gen->str[l]==0||gen->str[l]==' ') { 422 pass:; 423 ++cnt; 424 if(!--who) 425 return gen->str; 426 } 427 } 428 skip:; 429 } 430 --mask; 431 } 432 if(!cnt) 433 return PickRandomWordPair(NULL); 434 //none found pick random 435 return PickRandomWordPair(start_word,RandU64%cnt+1); 436 } 437 U8 *PickGoodWordPair(U8 *start_word,U8 **motifs,F64 *attention) { 438 CHashTable *t=Fs->hash_table; 439 I64 idx,idx2,mask; 440 CHashGeneric *gen; 441 I64 l=0,cnt=0; 442 F64 tmp,best_score; 443 U8 *best=NULL,*m1,*m2; 444 U8 buf1[STR_LEN],buf2[STR_LEN]; 445 for(idx=0;idx<MOTIF_LEN&&(m1=motifs[idx]);++idx) 446 ; 447 if(!idx) 448 return PickRandomWordPair(NULL); 449 return PickRandomWordPair(motifs[RandU64%idx]); 450 again:; 451 best_score=0; 452 start_word=best; 453 mask=t->mask; 454 best=NULL; 455 while(mask>=0) { 456 for(gen=t->body[mask];gen;gen=gen->next) { 457 if(!StrMatch("Relat.",gen->str)&&!StrMatch("Freq.",gen->str) 458 &&!StrMatch("Relats.",gen->str)&&gen->str[0]!='!') 459 for(idx=0;m1=motifs[idx];++idx) { 460 if(StrIMatch(m1,gen->str)) { 461 pass:; 462 tmp=WordPairMotifScore(gen->str,motifs,attention); 463 if(tmp>best_score) { 464 best_score=tmp; 465 best=gen->str; 466 ++cnt; 467 } 468 break; 469 } 470 } 471 skip:; 472 } 473 --mask; 474 } 475 if(best) 476 return best; 477 return PickRandomWordPair(NULL); 478 } 479 480 481 //Recurses to predict ahead 482 //stop_at_lt stops at '<' 483 U0 MarkovGenerateModel(U8 *to="Markov.DD",U8 *file,Bool stop_at_lt=FALSE) { 484 if(!FileFind(file)) 485 return; 486 U8 buf[STR_LEN*2],buf2[STR_LEN*2],*a,*b; 487 F64 *motif_vals,*sorted_probs,*last_word; 488 I64 motif_cnt; 489 CHashTable *t; 490 CHashGeneric *gen; 491 I64 idx,idx2; 492 U64 anta,antb; 493 F64 *probs,sum,coin,sum2; 494 U8 *optr=file=FileRead(file),*fptr2; 495 if(!FileFind(to)) { 496 t=HashTableNew(0x8000); 497 } else { 498 t=MarkovLoadTable(to); 499 } 500 file=optr; 501 t->next=Fs->hash_table; 502 Fs->hash_table=t; 503 a=GetWords(&file,TOKENS_AT_ONCE+1); 504 ConceptRank(file); 505 506 while(*file) { 507 if(stop_at_lt) { 508 if(StrOcc(a,'<')) 509 break; 510 } 511 b=StrLastRem(a," ",buf2); //We did +1 512 513 514 if(!b) 515 break; 516 for(idx=1;gen=HashSingleTableFind(a,t,HTT_FRAME_PTR,idx);++idx) { 517 if(!StrCmp(gen->user_data0,buf2)) { 518 ++gen->user_data1; 519 goto skip; 520 } 521 } 522 HashGenericAdd(a,HTT_FRAME_PTR,StrNew(buf2),1); 523 skip:; 524 Free(a); 525 a=GetWords(&file,TOKENS_AT_ONCE+1); 526 } 527 Free(a); 528 MarkovSaveTable(to,t); 529 Fs->hash_table=t->next; 530 HashTableDel(t); 531 Free(optr); 532 } 533 534 U0 AddMotif0(U8 *a,U8 **motifs,F64 *attention,F64 weight=1.,I64 argc,U8 **argv) { 535 I64 idx2,idx3,worst; 536 CHashGeneric *gen; 537 F64 cur_w,lowest_w,best_w,tmp; 538 U8 *last_word; 539 U8 buf[STR_LEN]; 540 Bool relevant; 541 if(IsStopWord(a)) 542 return; 543 //Isolate worst motif(or empty motif slot) 544 cur_w=lowest_w=U16_MAX; 545 worst=0; 546 for(idx2=worst;idx2!=MOTIF_LEN;++idx2) { 547 if(motifs[idx2]) { 548 tmp=WordMotifScore0(a,motifs,attention); 549 if(tmp<cur_w) { 550 worst=idx2; 551 cur_w=tmp; 552 } 553 } else { 554 worst=idx2; 555 break; 556 } 557 } 558 relevant=FALSE; 559 while(--argc>=0) { 560 if(RelationshipsWithWord(a,argv[argc])) 561 relevant=TRUE; 562 } 563 if(!relevant) 564 return; 565 last_word=a; 566 for(idx2=0;idx2!=MOTIF_LEN;++idx2) { 567 if(!motifs[idx2]) 568 break; 569 if(!StrICmp(motifs[idx2],last_word)) { 570 attention[idx2]/=100.; 571 return; 572 } 573 } 574 motifs[worst]=last_word; 575 attention[worst]=weight; 576 } 577 578 U0 AddMotifs(U8 *str,U8 **motifs,F64 *attention,I64 argc,I64 *argv) { 579 str=StrNew(str); 580 U8 *ptr; 581 while(ptr=StrFirstOcc(str," ")) { 582 *ptr=0; 583 AddMotif0(str,motifs,attention,,argc,argv); 584 StrCpy(str,ptr+1); 585 } 586 AddMotif0(str,motifs,attention,,argc,argv); 587 Free(str); 588 } 589 590 //Argv is motifs 591 U8 *MarkovGenerateText0(U8 *model,I64 cnt=100,U8 *start_word="THE",I64 argc,I64 *argv) { 592 I64 ocnt=cnt; 593 CHashTable *t=MarkovLoadTable(model); 594 CDoc *ret=DocNew; 595 I64 motif_cnt=0; 596 CHashGeneric *gen; 597 U8 words[STR_LEN],tmpbuf[STR_LEN]; 598 U8 first_words[STR_LEN]; 599 I64 idx,idx2,idx3,worst; 600 U8 *a,*last_word; 601 t->next=Fs->hash_table; 602 Fs->hash_table=t; 603 U8 *motifs[MOTIF_LEN]; 604 F64 attention[MOTIF_LEN]; 605 U8 *predicted[TOKENS_AT_ONCE*3]; 606 F64 lowest_w,cur_w,tmp,best_w; 607 MemSetU64(motifs,NULL,MOTIF_LEN); 608 for(idx=0;idx<argc;++idx) 609 AddMotif0(argv[idx],motifs,attention,1.,argc,argv); 610 611 a=PickGoodWordPair(start_word,motifs,attention); // 612 StrCpy(words,a); 613 StrCpy(first_words,a); 614 DocPrint(ret,"%s ",first_words); 615 if(StrFirstOcc(first_words,"<>.")) { 616 goto fin; 617 } 618 while(cnt>=0) { 619 PredictWords(predicted,&words,motifs,attention,TOKENS_AT_ONCE,3); 620 for(idx=0;idx!=TOKENS_AT_ONCE;++idx) { 621 last_word=a; 622 a=predicted[idx]; 623 if(!StrICmp(a,"<")) 624 goto fin; 625 if(!StrICmp(a,">")) 626 goto fin; 627 if(StrICmp(a,last_word)) { 628 DocPrint(ret,"%s ",a); 629 if(!StrICmp(a,".")) 630 goto fin; 631 AddStringToWordsBuf(&words,a); 632 AddMotifs(a,motifs,attention,argc,argv); 633 skip:; 634 --cnt; 635 } else 636 break; 637 638 } 639 } 640 fin: 641 for(idx=0;idx<argc;++idx) { 642 AddMotif0(argv[idx],motifs,attention,1.,argc,argv); 643 } 644 AddMotif0(".",motifs,attention,1/10.,argc,argv); 645 if(a=StrFirstOcc(first_words,"<>")) { 646 goto flush; 647 } 648 649 while(TRUE&&--ocnt>=0) { 650 a=first_words; 651 StrUtil(first_words,SUF_REM_LEADING|SUF_REM_TRAILING); 652 a=first_words; 653 a=PredictWordsRev(first_words,motifs,attention,TOKENS_AT_ONCE,3); 654 StrCpy(tmpbuf,a); 655 Free(a); 656 a=tmpbuf; 657 StrUtil(a,SUF_REM_LEADING|SUF_REM_TRAILING); 658 if(StrLastOcc(a,"<>.")) { 659 flush: 660 a=StrLastOcc(a,"<>."); 661 DocTop(ret); 662 DocPrint(ret,"%s ",a+1); 663 break; 664 } 665 DocTop(ret); 666 DocPrint(ret,"%s ",a); 667 if(!a) 668 break; 669 AddMotifs(a,motifs,attention,argc,argv); 670 a=FirstNGrams(a,TOKENS_AT_ONCE); 671 if(!a) 672 break; 673 StrCpy(first_words,a); 674 Free(a); 675 } 676 Fs->hash_table=t->next; 677 HashTableDel(t); 678 a=DocSave(ret); 679 DocDel(ret); 680 return a; 681 } 682 //Del("Markov.DD"); 683 if(!FileFind("Markov.DD")) { 684 MarkovGenerateModel("Markov.DD","/WWW/index.DD"); 685 MarkovGenerateModel("Markov.DD","/WWW/SP2024.DD"); 686 MarkovGenerateModel("Markov.DD","/WWW/oldshitpit.DD"); 687 } 688 ///MarkovGenerateModel("Markov.DD","timecube.DD"); 689 U8 *MarkovGenerateText(U8 *model,I64 cnt=100,U8 *start_word="THE",...) { 690 return MarkovGenerateText0(model,cnt,start_word,argc,argv); 691 } 692 DocMax; 693 //MarkovGenerateModel("Markov.DD",BIBLE_FILENAME); 694 U0 MarkovBot(U8 *model="Markov.DD") { 695 U8 *str,*ptr,*word,*train,*result,*last=NULL; 696 U8 *motifs[MOTIF_LEN]; 697 I64 idx,cnt; 698 while(str=GetStr("ME: ",NULL)) { 699 ptr=str; 700 cnt=0; 701 for(idx=0;idx!=MOTIF_LEN;++idx) { 702 word=GetWord(&ptr); 703 if(!IsStopWord(word)) { 704 motifs[cnt++]=word; 705 } else { 706 Free(word); 707 } 708 if(!*ptr) break; 709 if(cnt>=MOTIF_LEN) 710 break; 711 } 712 "CPU: "; 713 result=MarkovGenerateText0(model,15,NULL,cnt,motifs); 714 "%s.",result; 715 train=MStrPrint(".%s<%s.",str,last); 716 Free(last); 717 last=StrNew(result); 718 FileWrite(blkdev.tmp_filename,train,StrLen(train)); 719 MarkovGenerateModel(model,blkdev.tmp_filename,TRUE); 720 Free(result),Free(train); 721 "\n"; 722 Free(str); 723 while(--cnt>=0) 724 Free(motifs[cnt]); 725 Copy(model,"Model2.DD"); 726 } 727 } 728 MarkovBot;