001 Bool IsStopWord(U8 *w) {
002   if(!StrICmp("A",w)) return TRUE;
003   if(!StrICmp("MY",w)) return TRUE;
004   if(!StrICmp("MINE",w)) return TRUE;
005   if(!StrICmp("no",w)) return TRUE;
006   if(!StrICmp("never",w)) return TRUE;
007   if(!StrICmp("though",w)) return TRUE;
008   if(!StrICmp("because",w)) return TRUE;
009   if(!StrICmp("do",w)) return TRUE;
010   if(!StrICmp("aN",w)) return TRUE;
011   if(!StrICmp("be",w)) return TRUE;
012   if(!StrICmp("THE",w)) return TRUE;
013   if(!StrICmp("AND",w)) return TRUE;
014   if(!StrICmp("but",w)) return TRUE;
015   if(!StrICmp("or",w)) return TRUE;
016   if(!StrICmp("in",w)) return TRUE;
017   if(!StrICmp("on",w)) return TRUE;
018   if(!StrICmp("at",w)) return TRUE;
019   if(!StrICmp("for",w)) return TRUE;
020   if(!StrICmp("as",w)) return TRUE;
021   if(!StrICmp("of",w)) return TRUE;
022   if(!StrICmp("i",w)) return TRUE;
023   if(!StrICmp("that",w)) return TRUE;
024   if(!StrICmp("this",w)) return TRUE;
025   if(!StrICmp("these",w)) return TRUE;
026   if(!StrICmp("their",w)) return TRUE;
027   if(!StrICmp("which",w)) return TRUE;
028   if(!StrICmp("whoose",w)) return TRUE;
029   if(!StrICmp("not",w)) return TRUE;
030   if(!StrICmp("is",w)) return TRUE;
031   if(!StrICmp("are",w)) return TRUE;
032   if(!StrICmp("here",w)) return TRUE;
033   if(!StrICmp("its",w)) return TRUE;
034   if(!StrICmp("it",w)) return TRUE;
035   if(!StrICmp("dont",w)) return TRUE;
036   if(!StrICmp("there",w)) return TRUE;
037   if(!StrICmp("who",w)) return TRUE;
038   if(!StrICmp("you",w)) return TRUE;
039   if(!StrICmp("if",w)) return TRUE;
040   if(!StrICmp("to",w)) return TRUE;
041   if(!StrICmp("so",w)) return TRUE;
042   if(!StrICmp("what",w)) return TRUE;
043   return FALSE;
044 }
045 
046 U8 *GetWord(U8 **_ptr) {
047   U8 *ptr=*_ptr,*endd;
048   U8 buf[STR_LEN];
049   buf[0]=0;
050 again:;
051   while(*ptr&&!(Bt(char_bmp_alpha,*ptr)||StrOcc("'.<>$",*ptr)))
052     ++ptr;
053   if(*ptr=='$') {
054     if(endd=StrFirstOcc(ptr+1,"$")) {
055       ptr=endd+1;
056       goto again;
057     }
058   }
059   if(StrOcc("<>.'$",*ptr))
060     buf[0](U16)=*ptr++;
061   else while(Bt(char_bmp_alpha,*ptr)||StrOcc("'",*ptr))
062    CatPrint(buf,"%c",ToUpper(*ptr++));
063   *_ptr=ptr;
064   return StrNew(buf);
065 }
066 #include "ConceptRank";
067 #include "KneserNey";
068 #define MOTIF_LEN 100
069 #define TOKENS_AT_ONCE 2
070 U8 *LastNGrams(U8 *text,I64 many=TOKENS_AT_ONCE) {
071   U8 *ptr=text,*tmp;
072   I64 occ=StrOcc(text,' ');
073   while(occ-many>=0) {
074    ptr=StrFirstOcc(ptr," ");
075    if(!ptr)
076      return NULL;
077    --occ;
078    ptr++;
079   }
080   return StrNew(ptr);
081 }
082 U8 *FirstNGrams(U8 *text,I64 many=TOKENS_AT_ONCE) {
083   U8 *ptr=StrNew(text),*tmp=ptr;
084   while(--many>=0) {
085    ptr=StrFirstOcc(ptr," ");
086    if(!ptr) {
087      if(many==0) {
088         ptr=tmp+StrLen(tmp);
089         break;
090      }
091      return NULL;
092    }
093    ptr++;
094   }
095   *ptr=0;
096   return tmp;
097 }
098 
099 F64 WordMotifScore0(U8 *word,U8 **motifs,F64 *attention) {
100   F64 ret=0.;
101   I64 idx;
102   for(idx=0;idx!=MOTIF_LEN;++idx) {
103     if(!motifs[idx])
104         break;
105     if(!StrICmp(word,motifs[idx]))
106         ret+=.1*attention[idx];
107     ret+=KneserNey0(Fs->hash_table,motifs[idx],word)*attention[idx];
108   }
109   return ret;
110 }
111 F64 WordPairMotifScore(U8 *pairs,U8 **motifs,F64 *attention) {
112   pairs=StrNew(pairs);
113   U8 buf[STR_LEN];
114   F64 ret=0.;
115   while(StrFirstRem(pairs," ",buf)) {
116     ret+=WordMotifScore0(buf,motifs,attention);
117     if(!*pairs)
118         break;
119   }
120   Free(pairs);
121   return ret;
122 }
123 U8 *PredictWord(U8 *last_word,U8 **motifs,F64 *attention) {
124   U8 *lw2=last_word;
125   if(StrOcc(lw2,' '))
126     lw2=StrLastOcc(lw2," ")+1;
127   I64 idx,idx2;
128   CHashGeneric *gen;
129   for(idx=1;gen=HashSingleTableFind(last_word,Fs->hash_table,HTT_FRAME_PTR,idx);++idx)
130     ;
131   F64 *probs=CAlloc(8*idx);
132   F64 sum=0,avg;
133   F64 sum2=0,coin,ahead_strength,best_strength=0;
134   F64 strength2,removed;
135   U8 *best_word=".";
136   for(idx=1;gen=HashSingleTableFind(last_word,Fs->hash_table,HTT_FRAME_PTR,idx);++idx) {
137     probs[idx-1]=Exp(1+WordMotifScore0(gen->user_data0,motifs,attention)+KneserNey(Fs->hash_table,last_word,gen->user_data0,.5));
138     sum+=probs[idx-1];
139   }
140   if(!sum) {
141     for(idx2=1;idx2<idx;++idx2) {
142       probs[idx2-1]=.001;
143       sum+=.001;
144     }   
145   }
146   for(idx2=1;idx2<idx;++idx2) {
147     probs[idx2-1]/=sum;
148   }
149   sum2=0.;
150 again:;
151   coin=Rand;
152   sum=0.;
153   for(idx2=1;idx2<idx;++idx2) {
154     sum+=probs[idx2-1];
155     if(sum>=coin) {
156       gen=HashSingleTableFind(last_word,Fs->hash_table,HTT_FRAME_PTR,idx2);
157 next_word:;
158       best_word=gen->user_data0;
159       goto en;
160     }
161   }
162 en:;
163   Free(probs);
164   return best_word;
165 }
166 
167 U8 *PredictWordRev(U8 *last_word,U8 **motifs,F64 *attention) {
168   I64 idx,idx2;
169   CHashGeneric *gen;
170   U8 buf[STR_LEN];
171   StrPrint(buf,"!%s",last_word);
172   for(idx=1;gen=HashSingleTableFind(buf,Fs->hash_table,HTT_FRAME_PTR,idx);++idx)
173     ;
174   F64 *probs=CAlloc(8*idx);
175   F64 sum=0,avg;
176   F64 sum2=0,coin,ahead_strength,best_strength=0;
177   F64 strength2,removed;
178   U8 *best_word=".";
179   for(idx=1;gen=HashSingleTableFind(buf,Fs->hash_table,HTT_FRAME_PTR,idx);++idx) {
180     probs[idx-1]=Exp(1+1.*(WordPairMotifScore(gen->user_data0,motifs,attention)+KneserNey0(Fs->hash_table,gen->user_data0,last_word)));
181     sum+=probs[idx-1];
182   }
183   if(!sum) {
184     for(idx2=1;idx2<idx;++idx2) {
185       probs[idx2-1]=.001;
186       sum+=.001;
187     }   
188   }
189   for(idx2=1;idx2<idx;++idx2) {
190     probs[idx2-1]/=sum;
191   }
192   sum2=0.;
193 again:;
194   coin=Rand;
195   sum=0.;
196   for(idx2=1;idx2<idx;++idx2) {
197     sum+=probs[idx2-1];
198     if(sum>=coin) {
199       gen=HashSingleTableFind(buf,Fs->hash_table,HTT_FRAME_PTR,idx2);
200 next_word:;
201       best_word=gen->user_data0;
202       goto en;
203     }
204   }
205 en:;
206   Free(probs);
207   return best_word;
208 }
209 
210 
211 U0 AddStringToWordsBuf(U8 *words,U8 *str) {
212   StrFirstRem(words," ");
213   StrUtil(words,SUF_REM_TRAILING);
214   CatPrint(words," %s",str);
215   StrUtil(words,SUF_REM_TRAILING|SUF_REM_LEADING);
216 }
217 
218 
219 F64 PredictWords(U8 **to,U8 *words,U8 **motifs,F64 *attention,I64 look_ahead,I64 attempts=3) {
220   F64 strength,best_strength=0;
221   U8 **best_words=CAlloc(8*look_ahead);
222   F64 score,best_score=-1.;
223   I64 attempt,idx;
224   U8 cur[STR_LEN*2];
225   
226   for(attempt=0;attempt!=attempts;++attempt) {
227     for(idx=0;idx!=look_ahead;++idx) {
228       if(!idx) {
229         StrCpy(cur,words);
230         to[idx]=PredictWord(cur,motifs,attention);
231       } else {
232         to[idx]=PredictWord(cur,motifs,attention);
233       }
234       AddStringToWordsBuf(cur,to[idx]);
235     }
236     score=0;
237     for(idx=0;idx!=look_ahead;++idx) {
238        score+=WordMotifScore0(to[idx],motifs,attention);
239     }
240     if(score>best_score) {
241       MemCpy(best_words,to,8*look_ahead);
242       best_score=score;
243     }
244   }
245   MemCpy(to,best_words,8*look_ahead);
246   Free(best_words);
247   return best_strength;
248 }
249 U8 *PredictWordsRev(U8 *cur_word,U8 **motifs,F64 *attention,I64 look_ahead,I64 attempts=3) {
250   CHashGeneric *gen;
251   F64 strength,best_strength=0;
252   U8 **best_words=CAlloc(8*look_ahead);
253   U8 **to=CAlloc(8*look_ahead);
254   F64 score,best_score=-1.;
255   I64 attempt,idx,who,idx2,best_cnt=0,len;
256   U8 last[STR_LEN],*words,*ret,*tmp;
257   for(attempt=0;attempt!=attempts;++attempt) {
258     for(idx2=idx=0;idx<look_ahead;) {
259       if(!idx) {
260         StrCpy(last,cur_word);
261         words=PredictWordRev(cur_word,motifs,attention);
262       } else {
263         words=PredictWordRev(last,motifs,attention);
264       }
265       if(!words)
266         break;
267       idx+=StrOcc(words,' ')+1;
268       to[idx2++]=words;
269       
270       tmp=FirstNGrams(last,TOKENS_AT_ONCE-1);
271       StrUtil(tmp,SUF_REM_LEADING|SUF_REM_TRAILING);
272       StrUtil(words,SUF_REM_LEADING|SUF_REM_TRAILING);
273       StrPrint(last,"%s %s",words,tmp);
274       Free(tmp);
275     }
276     score=0;
277     for(idx=0;idx!=idx2;++idx) {
278       score+=WordPairMotifScore(to[idx],motifs,attention);
279     }
280     if(score>best_score||idx2>best_cnt) {
281       best_cnt=idx2;
282       MemCpy(best_words,to,8*look_ahead);
283       best_score=score;
284     }
285   }
286 
287   len=0;
288   MemCpy(to,best_words,8*look_ahead);
289   for(idx=best_cnt-1;idx>=0;--idx) {
290     len+=StrLen(to[idx])+1;
291   }
292   ret=CAlloc(len+1);
293   for(idx=best_cnt-1;idx>=0;--idx) {
294     CatPrint(ret,"%s ",to[idx]);
295   }
296   Free(best_words),Free(to);
297   return ret;
298 }
299 
300 
301 U0 MarkovSaveTable(U8 *to="Markov.DD",CHashTable *table) {
302   CDoc *doc=DocNew(to);
303   I64 bucket=table->mask+1;
304   CHashGeneric *gen;
305 
306   while(--bucket>=0) {
307     for(gen=table->body[bucket];gen;gen=gen->next) {
308 //See ConceptRank.HC
309       if(gen->str[0]=='!')
310         ; //Dont save reverse n-grams
311       else if(StrIMatch("Relat.",gen->str)) {
312         DocPrint(doc,"\"%q\",\"%q\",%d;\n",gen->str,gen->user_data0,gen->user_data1);
313       } else if(StrIMatch("Freq.",gen->str)||StrIMatch("Relats.",gen->str)) {
314         DocPrint(doc,"\"%q\",%d;\n",gen->str,gen->user_data0);
315       } else
316         DocPrint(doc,"\"%q\",\"%q\",%d,'%c';\n",gen->str,gen->user_data0,gen->user_data1,gen->user_data2);
317     
318     }
319   }
320   DocWrite(doc);
321   DocDel(doc);
322 }
323 CHashTable *MarkovLoadTable(U8 *from="Markov.DD") {
324   CHashTable *tab=HashTableNew(0x10000);
325   CCmpCtrl *cc=CmpCtrlNew(FileRead(from),,from);
326   CHashGeneric *gen,*gen2;
327   I64 which,val;
328   U8 *tmp;
329   total_relats=0;
330   while(Lex(cc)) {
331     if(cc->token!=TK_STR)
332       LexExcept(cc,"Expected string at: ");
333     gen=CAlloc(sizeof CHashGeneric );
334     gen->str=StrNew(cc->cur_str);
335     gen->type=HTT_FRAME_PTR;
336     val=which=0;
337     while(Lex(cc)!=';') {
338       if(cc->token==',') {
339       } else if(cc->token==TK_I64) {
340         val=cc->cur_i64;
341 add_value:
342         switch(which++) {
343           case 0:
344           gen->user_data0=val;
345           break;
346           case 1:
347           gen->user_data1=val;
348           break;
349           case 2:
350           gen->user_data2=val;
351           break;
352           default:
353           LexExcept(cc,"Too many valus at: ");
354           break;
355         }
356       } else if(cc->token==TK_STR) {
357         val=StrNew(cc->cur_str);
358         goto add_value;
359       } else if(cc->token==TK_CHAR_CONST) {
360         val=cc->cur_i64;
361         goto add_value;
362       } else
363         LexExcept(cc,"Expected at ';' at: ");
364     }
365     if(StrIMatch("Relats.",gen->str))
366       total_relats+=gen->user_data0; //Relationship cnt
367     else if(StrIMatch("Freq.",gen->str))
368        total+=gen->user_data0;
369     else if(StrIMatch("Relat.",gen->str))
370        ;//Do nothing
371     else  {
372 //make a reverse connection
373       gen2=CAlloc(sizeof CHashGeneric);
374       gen2->type=HTT_FRAME_PTR;
375       tmp=LastNGrams(gen->str,TOKENS_AT_ONCE-1);
376       gen2->str=MStrPrint("!%s %s",tmp,gen->user_data0);
377       Free(tmp);
378       gen2->user_data0=FirstNGrams(gen->str,1);
379       HashAdd(gen2,tab);
380     }
381     HashAdd(gen,tab);
382   }
383   CmpCtrlDel(cc);
384   return tab;
385 }
386 U8 *GetWords(U8 **ptr,I64 cnt=TOKENS_AT_ONCE) {
387   U8 buf[STR_LEN*2];
388   U8 *word;
389   U8 *tmp=*ptr;
390   Bool first=TRUE;
391   buf[0]=0;
392   while(--cnt>=0) {
393     word=GetWord(ptr);
394     if(first) {
395         first=FALSE;
396         tmp=*ptr;
397     }
398     CatPrint(buf,"%s ",word);
399     Free(word);
400   }
401   StrUtil(buf,SUF_REM_TRAILING);
402   if(ptr) *ptr=tmp;
403   return Upperify(StrNew(buf));
404 }
405 U8 *PickRandomWordPair(U8 *start_word,I64 who=-1) {
406   CHashTable *t=Fs->hash_table;
407   I64 mask=t->mask;
408   CHashGeneric *gen;
409   I64 l,cnt=0;
410   while(mask>=0) {
411     for(gen=t->body[mask];gen;gen=gen->next) {
412       if(!StrMatch("Relat.",gen->str)&&!StrMatch("Freq.",gen->str)
413             &&!StrMatch("Relats.",gen->str)&&gen->str[0]!='!')
414         ; //All is good
415         else
416       goto skip;
417       if(!start_word) 
418         goto pass;
419       l=StrLen(start_word);
420       if(!StrNICmp(start_word,gen->str,l)) {
421         if(gen->str[l]==0||gen->str[l]==' ') {
422 pass:;
423           ++cnt;
424           if(!--who)
425             return gen->str;
426         }
427       }
428 skip:;
429     }
430     --mask;
431   }
432   if(!cnt)
433     return PickRandomWordPair(NULL);
434 //none found pick random
435   return PickRandomWordPair(start_word,RandU64%cnt+1);
436 }
437 U8 *PickGoodWordPair(U8 *start_word,U8 **motifs,F64 *attention) {
438   CHashTable *t=Fs->hash_table;
439   I64 idx,idx2,mask;
440   CHashGeneric *gen;
441   I64 l=0,cnt=0;
442   F64 tmp,best_score;
443   U8 *best=NULL,*m1,*m2;
444   U8 buf1[STR_LEN],buf2[STR_LEN];
445   for(idx=0;idx<MOTIF_LEN&&(m1=motifs[idx]);++idx)
446     ;
447   if(!idx)
448     return PickRandomWordPair(NULL);
449   return PickRandomWordPair(motifs[RandU64%idx]);
450 again:;
451   best_score=0;
452   start_word=best;
453   mask=t->mask;
454   best=NULL;
455   while(mask>=0) {
456     for(gen=t->body[mask];gen;gen=gen->next) {
457       if(!StrMatch("Relat.",gen->str)&&!StrMatch("Freq.",gen->str)
458             &&!StrMatch("Relats.",gen->str)&&gen->str[0]!='!')
459         for(idx=0;m1=motifs[idx];++idx) {
460           if(StrIMatch(m1,gen->str)) {
461 pass:;
462             tmp=WordPairMotifScore(gen->str,motifs,attention);
463             if(tmp>best_score) {
464               best_score=tmp;
465               best=gen->str;
466               ++cnt;
467             }
468             break;
469           }
470         }
471 skip:;
472     }
473     --mask;
474   }
475   if(best)
476     return best;
477   return PickRandomWordPair(NULL);
478 }
479 
480 
481 //Recurses to predict ahead
482 //stop_at_lt stops at '<'
483 U0 MarkovGenerateModel(U8 *to="Markov.DD",U8 *file,Bool stop_at_lt=FALSE) {
484   if(!FileFind(file))
485     return;
486   U8 buf[STR_LEN*2],buf2[STR_LEN*2],*a,*b;
487   F64 *motif_vals,*sorted_probs,*last_word;
488   I64 motif_cnt;
489   CHashTable *t;
490   CHashGeneric *gen;
491   I64 idx,idx2;
492   U64 anta,antb;
493   F64 *probs,sum,coin,sum2;
494   U8 *optr=file=FileRead(file),*fptr2;
495   if(!FileFind(to)) {
496    t=HashTableNew(0x8000);
497   } else {
498     t=MarkovLoadTable(to);
499   }
500   file=optr;
501   t->next=Fs->hash_table;
502   Fs->hash_table=t;
503   a=GetWords(&file,TOKENS_AT_ONCE+1);
504   ConceptRank(file);
505   
506   while(*file) {
507     if(stop_at_lt) {
508       if(StrOcc(a,'<'))
509         break;
510     }
511     b=StrLastRem(a," ",buf2); //We did +1
512 
513 
514     if(!b)
515         break;
516     for(idx=1;gen=HashSingleTableFind(a,t,HTT_FRAME_PTR,idx);++idx) {
517       if(!StrCmp(gen->user_data0,buf2)) {
518         ++gen->user_data1;
519         goto skip;
520       }
521     }
522     HashGenericAdd(a,HTT_FRAME_PTR,StrNew(buf2),1);
523 skip:;
524     Free(a);
525     a=GetWords(&file,TOKENS_AT_ONCE+1);
526   }
527   Free(a);
528   MarkovSaveTable(to,t);
529   Fs->hash_table=t->next;
530   HashTableDel(t);
531   Free(optr);
532 }
533 
534 U0 AddMotif0(U8 *a,U8 **motifs,F64 *attention,F64 weight=1.,I64 argc,U8 **argv) {
535   I64 idx2,idx3,worst;
536   CHashGeneric *gen;
537   F64 cur_w,lowest_w,best_w,tmp;
538   U8 *last_word;
539   U8 buf[STR_LEN];
540   Bool relevant;
541   if(IsStopWord(a))
542     return;
543 //Isolate worst motif(or empty motif slot)
544   cur_w=lowest_w=U16_MAX;
545   worst=0;
546   for(idx2=worst;idx2!=MOTIF_LEN;++idx2) {
547     if(motifs[idx2]) {
548       tmp=WordMotifScore0(a,motifs,attention);
549       if(tmp<cur_w) {
550         worst=idx2;
551         cur_w=tmp;
552       }
553     } else {
554       worst=idx2;
555       break;
556     }
557   }
558   relevant=FALSE;
559   while(--argc>=0) {
560     if(RelationshipsWithWord(a,argv[argc]))
561       relevant=TRUE;
562   }
563   if(!relevant)
564     return;
565   last_word=a;
566   for(idx2=0;idx2!=MOTIF_LEN;++idx2) {
567     if(!motifs[idx2])
568       break;
569     if(!StrICmp(motifs[idx2],last_word)) {
570       attention[idx2]/=100.;
571       return;
572     }
573   }
574   motifs[worst]=last_word;
575   attention[worst]=weight;
576 }
577 
578 U0 AddMotifs(U8 *str,U8 **motifs,F64 *attention,I64 argc,I64 *argv) {
579   str=StrNew(str);
580   U8 *ptr;
581   while(ptr=StrFirstOcc(str," ")) {
582     *ptr=0;
583     AddMotif0(str,motifs,attention,,argc,argv);
584     StrCpy(str,ptr+1);
585   }
586   AddMotif0(str,motifs,attention,,argc,argv);
587   Free(str);
588 }
589 
590 //Argv is motifs
591 U8 *MarkovGenerateText0(U8 *model,I64 cnt=100,U8 *start_word="THE",I64 argc,I64 *argv) {
592   I64 ocnt=cnt;
593   CHashTable *t=MarkovLoadTable(model);
594   CDoc *ret=DocNew;
595   I64 motif_cnt=0;
596   CHashGeneric *gen;
597   U8 words[STR_LEN],tmpbuf[STR_LEN];
598   U8 first_words[STR_LEN];
599   I64 idx,idx2,idx3,worst;
600   U8 *a,*last_word;
601   t->next=Fs->hash_table;
602   Fs->hash_table=t;
603   U8 *motifs[MOTIF_LEN];
604   F64 attention[MOTIF_LEN];
605   U8 *predicted[TOKENS_AT_ONCE*3];
606   F64 lowest_w,cur_w,tmp,best_w;
607   MemSetU64(motifs,NULL,MOTIF_LEN);
608   for(idx=0;idx<argc;++idx)
609     AddMotif0(argv[idx],motifs,attention,1.,argc,argv);
610 
611   a=PickGoodWordPair(start_word,motifs,attention); //
612   StrCpy(words,a);
613   StrCpy(first_words,a);
614   DocPrint(ret,"%s ",first_words);
615   if(StrFirstOcc(first_words,"<>.")) {
616     goto fin;
617   }
618   while(cnt>=0) {
619     PredictWords(predicted,&words,motifs,attention,TOKENS_AT_ONCE,3);
620     for(idx=0;idx!=TOKENS_AT_ONCE;++idx) {
621       last_word=a;
622       a=predicted[idx];
623       if(!StrICmp(a,"<"))
624         goto fin;
625       if(!StrICmp(a,">"))
626         goto fin;
627       if(StrICmp(a,last_word)) {
628         DocPrint(ret,"%s ",a);
629         if(!StrICmp(a,"."))
630           goto fin;
631         AddStringToWordsBuf(&words,a);
632         AddMotifs(a,motifs,attention,argc,argv);
633 skip:;
634         --cnt;
635       } else
636         break;
637 
638     }
639   }
640 fin:
641   for(idx=0;idx<argc;++idx) {
642      AddMotif0(argv[idx],motifs,attention,1.,argc,argv);
643   }
644   AddMotif0(".",motifs,attention,1/10.,argc,argv);
645   if(a=StrFirstOcc(first_words,"<>")) {
646     goto flush;
647   }
648 
649   while(TRUE&&--ocnt>=0) {
650     a=first_words;
651     StrUtil(first_words,SUF_REM_LEADING|SUF_REM_TRAILING);
652     a=first_words;
653     a=PredictWordsRev(first_words,motifs,attention,TOKENS_AT_ONCE,3);
654     StrCpy(tmpbuf,a);
655     Free(a);
656     a=tmpbuf;
657     StrUtil(a,SUF_REM_LEADING|SUF_REM_TRAILING);
658     if(StrLastOcc(a,"<>.")) {
659 flush:
660       a=StrLastOcc(a,"<>.");
661       DocTop(ret);
662       DocPrint(ret,"%s ",a+1);
663       break;
664     }
665     DocTop(ret);
666     DocPrint(ret,"%s ",a);
667     if(!a)
668       break;
669     AddMotifs(a,motifs,attention,argc,argv);
670     a=FirstNGrams(a,TOKENS_AT_ONCE);
671     if(!a)
672       break;
673     StrCpy(first_words,a);
674     Free(a);
675   }
676   Fs->hash_table=t->next;
677   HashTableDel(t);
678   a=DocSave(ret);
679   DocDel(ret);
680   return a;
681 }
682 //Del("Markov.DD");
683 if(!FileFind("Markov.DD")) {
684   MarkovGenerateModel("Markov.DD","/WWW/index.DD");
685   MarkovGenerateModel("Markov.DD","/WWW/SP2024.DD");
686   MarkovGenerateModel("Markov.DD","/WWW/oldshitpit.DD");
687 }
688 ///MarkovGenerateModel("Markov.DD","timecube.DD");
689 U8 *MarkovGenerateText(U8 *model,I64 cnt=100,U8 *start_word="THE",...) {
690   return MarkovGenerateText0(model,cnt,start_word,argc,argv);
691 }
692 DocMax;
693 //MarkovGenerateModel("Markov.DD",BIBLE_FILENAME);
694 U0 MarkovBot(U8 *model="Markov.DD") {
695   U8 *str,*ptr,*word,*train,*result,*last=NULL;
696   U8 *motifs[MOTIF_LEN];
697   I64 idx,cnt;
698   while(str=GetStr("ME:  ",NULL)) {
699     ptr=str;
700     cnt=0;
701     for(idx=0;idx!=MOTIF_LEN;++idx) {
702       word=GetWord(&ptr);
703       if(!IsStopWord(word)) {
704         motifs[cnt++]=word;
705       } else {
706         Free(word);
707       }
708       if(!*ptr) break;
709       if(cnt>=MOTIF_LEN)
710         break;
711     }   
712     "CPU:  ";
713     result=MarkovGenerateText0(model,15,NULL,cnt,motifs);
714     "%s.",result;
715     train=MStrPrint(".%s<%s.",str,last);
716     Free(last);
717     last=StrNew(result);
718     FileWrite(blkdev.tmp_filename,train,StrLen(train));
719     MarkovGenerateModel(model,blkdev.tmp_filename,TRUE);
720     Free(result),Free(train);
721     "\n";
722     Free(str);
723     while(--cnt>=0) 
724       Free(motifs[cnt]);
725     Copy(model,"Model2.DD");
726   }
727 }
728 MarkovBot;