001 Bool EndsWith(U8 *a,U8 *e) {
002   if(StrLen(e)>StrLen(a)) return FALSE;
003   return !StrICmp(a+StrLen(a)-StrLen(e),e);
004 }
005 U8 *Upperify(U8 *str) {
006   U8 *p=str,c;
007   p=StrNew(str);
008   Free(str);
009   str=p;
010   while(c=*p)
011     *p++=ToUpper(c);
012   return str;
013 } 
014 U8 *WordForm0(U8 *word,I64 who=1) {
015   U8 *str=ACDDefsGet(word),*ret=NULL,*ptr=str;
016 again:;
017   if(ptr&&*ptr) {
018     switch(*ptr++) {
019       case ACD_POS_CHAR:
020         Free(ret);
021         ret=StrNew(ptr);
022         ptr+=StrLen(ptr)+1;
023         if(--who) {
024           Free(ret);ret=NULL;
025           goto again;
026         }
027         break;
028       default:
029         ptr+=StrLen(ptr)+1;
030         goto again;
031     }
032   }
033   Free(str);
034   return ret;
035 }
036 U64 WordForm1(U8 *word,U8 *copy_to=NULL,I64 who=1) {
037   U8 *wf,buf[STR_LEN];
038   U64 ret=0;
039   StrCpy(buf,word);
040   if(EndsWith(word,"ing")) {
041     StrCpy(buf,word);
042     buf[StrLen(buf)-3]=0;
043     if(wf=WordForm0(buf,who)) {
044       Free(wf);
045       if(copy_to) StrCpy(copy_to,buf);
046       return 'Gerund';
047     }
048   }
049 again:;
050   if(wf=WordForm0(buf,who)) {
051 use_wf:;
052     if(StrIMatch("pron.",wf))
053         ret='Pronoun';
054     else if(StrIMatch("adv.",wf))
055         ret='Adv';
056     else if(StrIMatch("prep.",wf))
057         ret='Adv';
058     else if(StrIMatch("a.",wf))
059         ret='Adj';
060     else if(StrIMatch("v.",wf))
061         ret='Verb';
062     else if(StrIMatch("n.",wf))
063         ret='Noun';
064     Free(wf);
065     if(!ret) {
066       ++who;
067       goto again;
068     }
069     if(copy_to) StrCpy(copy_to,buf);
070     return ret;
071   }
072   if(EndsWith(word,"ed")) {
073     StrPrint(buf,"%s",word);
074     buf[StrLen(buf)-2]=0;
075     if(wf=WordForm0(buf,who))
076         goto use_wf;
077   }
078   if(EndsWith(word,"s")) {
079     StrPrint(buf,"%s",word);
080     buf[StrLen(buf)-1]=0;
081     if(wf=WordForm0(buf,who))
082         goto use_wf;
083   }
084   if(EndsWith(word,"ies")) {
085     StrPrint(buf,"%s",word);
086 //good[ies]
087     buf[StrLen(buf)-3]=0;
088     if(wf=WordForm0(buf,who))
089         goto use_wf;
090     CatPrint(buf,"Y");
091 //Stud[ies]->study
092     if(wf=WordForm0(buf))
093         goto use_wf;
094   }
095   if(EndsWith(word,"d")) {
096     StrPrint(buf,"%s",word);
097 //use[d]
098     buf[StrLen(buf)-1]=0;
099     if(wf=WordForm0(buf,who))
100         goto use_wf;
101   }
102   if(wf=WordForm0(buf,who)) {
103     goto use_wf;
104   }
105   if(copy_to) StrCpy(copy_to,buf);
106   return ret;
107 }
108 
109 
110 U8 *BaseWordify(U8 *str) {
111   str=Upperify(StrNew(str));
112   U8 buf[STR_LEN];
113   WordForm1(str,buf);
114   Free(str);
115   return StrNew(buf);
116 }
117 I64 RelationshipsWithWord(U8 *word,U8 *other) {
118   I64 who,cnt=0;
119   CHashGeneric *gen;
120   U8 buf[STR_LEN];
121   StrPrint(buf,"Relat.%s",word);
122   for(who=1;gen=HashSingleTableFind(buf,Fs->hash_table,HTT_FRAME_PTR,who);++who) {
123     if(!StrICmp(gen->user_data0,other)) {
124         cnt=gen->user_data1;
125         break;
126     }
127   }
128   return cnt;
129 }
130 
131 U0 AddRelationshipWithWord(U8 *word,U8 *other,I64 amt=1) {
132   I64 who,cnt=0;
133   CHashGeneric *gen;
134   U8 buf[STR_LEN];
135   StrPrint(buf,"Relat.%s",word);
136   for(who=1;gen=HashSingleTableFind(buf,Fs->hash_table,HTT_FRAME_PTR,who);++who) {
137     if(!StrICmp(gen->user_data0,other)) {
138       gen->user_data1+=amt;
139       cnt=1;
140     }
141   }
142   if(!cnt) {
143     gen=CAlloc(sizeof CHashGeneric);
144     gen->str=StrNew(buf);
145     gen->type=HTT_FRAME_PTR;
146     gen->user_data0=StrNew(other);
147     gen->user_data1=amt;
148     HashAdd(gen,Fs->hash_table);
149   }
150 }
151 
152 
153 Bool PronSubj(U8 *word) {
154   U8 buf[STR_LEN];
155   word=Upperify(StrNew(word));
156   StrCpy(buf,word);
157   Free(word);
158   word=buf;
159   if(LstMatch(word,"I\0YOU\0HE\0SHE\0IT\0WE\0THEY\0",word)!=-1)  
160     return TRUE;
161   return FALSE;
162 }
163 Bool PronObj(U8 *word) {
164   U8 buf[STR_LEN];
165   word=Upperify(StrNew(word));
166   StrCpy(buf,word);
167   Free(word);
168   word=buf;
169 
170   if(LstMatch(word,"ME\0YOU\0HIM\0HER\0IT\0US\0YOU\0THEM\0")!=-1)  
171     return TRUE;
172 //Reflexive's act as objects(?)
173   if(LstMatch(word,"MYSELF\0YOURSELF\0HIMSELF\0HERSELF\0ITSELF\0OURSELVES\0YOUSELVES\0THEMSELVES\0")!=-1)  
174     return TRUE;
175   return FALSE;
176   
177 }
178 Bool PronDemo(U8 *word) {
179   U8 buf[STR_LEN];
180   word=Upperify(StrNew(word));
181   StrCpy(buf,word);
182   Free(word);
183   word=buf;
184 
185   if(LstMatch(word,"THIS\0THAT\0THOSE\0THESE\0")!=-1)
186     return TRUE;
187   return FALSE;
188   
189 }
190 Bool DetPoss(U8 *word) {
191   U8 buf[STR_LEN];
192   word=Upperify(StrNew(word));
193   StrCpy(buf,word);
194   Free(word);
195   word=buf;
196 
197   if(LstMatch(word,"MY\0YOUR\0HER\0HIS\0ITS\0OUR\0YOUR\0THEIR\0")!=-1)  
198     return TRUE;
199   return FALSE;
200 }
201 Bool PronPoss(U8 *word) {
202   U8 buf[STR_LEN];
203   word=Upperify(StrNew(word));
204   StrCpy(buf,word);
205   Free(word);
206   word=buf;
207 
208   if(LstMatch(word,"MINE\0YOURS\0YOURS\0HIS\0HERS\0ITS\0OURS\0YOURS\0THEIRS\0")!=-1)  
209     return TRUE;
210   return FALSE;
211 }
212 
213 
214 I64 PronPerson(U8 *word) {
215   U8 buf[STR_LEN];
216   word=Upperify(StrNew(word));
217   StrCpy(buf,word);
218   Free(word);
219   word=buf;
220 
221   if(LstMatch(word,"I\0ME\0MY\0MINE\0MYSELF\0")!=-1)
222     return 1;
223   if(LstMatch(word,"WE\0US\0OUR\0\0OURS\0OURSELVES\0")!=-1)
224     return 1;
225 
226   if(LstMatch(word,"YOU\0YOUR\0YOURS\0YOURSELF\0")!=-1)
227     return 2;
228   if(LstMatch(word,"YOURSELVES\0")!=-1)
229     return 2;
230 
231   if(LstMatch(word,"HE\0SHE\0IT\0HIM\0HER\0IT\0ITS\0HERS\0HERSELF\0HIMSELF\0ITESELF\0")!=-1)
232     return 3;
233   if(LstMatch(word,"THEY\0THEM\0THEIR\0THEIRS\0THEMSELVES\0")!=-1)
234     return 3;
235 
236   return 0;
237 }
238 
239 Bool PronPlural(U8 *word) {
240   U8 buf[STR_LEN];
241   word=Upperify(StrNew(word));
242   StrCpy(buf,word);
243   Free(word);
244   word=buf;
245 
246   if(LstMatch(word,"WE\0US\0OUR\0\0OURS\0OURSELVES\0")!=-1)
247     return TRUE;
248   if(LstMatch(word,"YOURSELVES\0")!=-1)
249     return TRUE;
250 
251   if(LstMatch(word,"THESE\0THOSE\0")!=-1)
252     return TRUE;
253 
254   if(LstMatch(word,"THEY\0THEM\0THEIR\0THEIRS\0THEMSELVES\0")!=-1)
255     return TRUE;
256 
257   return FALSE;
258 }
259 U8 PronGender(U8 *word) {
260   U8 buf[STR_LEN];
261   word=Upperify(StrNew(word));
262   StrCpy(buf,word);
263   Free(word);
264   word=buf;
265 
266   if(LstMatch(word,"I\0ME\0MINE\0MY\0MYSELF\0YOU\0YOURS\0YOURSELF\0WE\0US\0OUR\0OURS\0OURSELVES\0YOURSELVES\0"))
267     return 'y';
268   if(LstMatch(word,"HE\0HIS\0HIM\0HIMSELF"))
269     return 'm';    
270   if(LstMatch(word,"HER\0HERS\0SHE\0HERSELF"))
271     return 'f';    
272   return 'i';
273 }
274 U64 GetAntecedantCh(U8 *word) {
275   if(!StrICmp("the",word))
276     return 'D';
277   if(!StrICmp("a",word)||!StrICmp("an",word))
278     return 'I';
279   I64 per=PronPerson(word);
280   I64 own=DetPoss(word);
281   I64 owned=PronPoss(word);
282   I64 demo=PronDemo(word);
283   U8 gender=PronGender(word);
284   if(!per&&!own&&!owned&&!demo)
285     return 0;
286   U8 buf[STR_LEN];  
287   buf[0](U64)='A';
288   if(per) {
289     CatPrint(buf,"%d",per);
290   }
291   if(PronPlural(word)) {
292     CatPrint(buf,"+");
293   } else
294     CatPrint(buf,"=");
295 
296   if(own)
297     CatPrint(buf,"P");
298   if(owned)
299     CatPrint(buf,"O");
300   if(demo)
301    CatPrint(buf,"D");
302   if(PronObj(word))
303     CatPrint(buf,"o");
304   else if(PronSubj(word))
305     CatPrint(buf,"s");
306   CatPrint(buf,"%c",gender);
307   return buf[0](U64);
308 }
309 
310 U64 WordForm(U8 *word,U8 *copy_to=NULL,I64 who=1) {
311   if(copy_to) StrCpy(copy_to,word);
312   if(!WordForm1(word,copy_to,who))
313     return 0;
314   if(!StrICmp(word,"an")||
315         !StrICmp(word,"a")
316   )
317     return 'Art';
318   if(!StrICmp(word,"the"))
319     return 'DefArt';
320   if(!StrICmp(word,"this")||
321         !StrICmp(word,"that")||
322         !StrICmp(word,"these")||
323         !StrICmp(word,"those")
324   )
325     return 'Pron';
326   if(!StrICmp(word,"my")||
327         !StrICmp(word,"your")||
328         !StrICmp(word,"yours")||
329         !StrICmp(word,"his")||
330         !StrICmp(word,"hers")||
331         !StrICmp(word,"her")||
332         !StrICmp(word,"its")||
333         !StrICmp(word,"their")||
334         !StrICmp(word,"theirs")||
335         !StrICmp(word,"our")||
336         !StrICmp(word,"ours")||
337         !StrICmp(word,"whoose")
338   )
339     return 'Pron';
340   if(!StrICmp(word,"I")||
341         !StrICmp(word,"you")||
342         !StrICmp(word,"he")||
343         !StrICmp(word,"she")||
344         !StrICmp(word,"it")||
345         !StrICmp(word,"me")||
346         !StrICmp(word,"you")||
347         !StrICmp(word,"her")||
348         !StrICmp(word,"him")||
349         !StrICmp(word,"it")||
350         !StrICmp(word,"we")||
351         !StrICmp(word,"they")||
352         !StrICmp(word,"us")||
353         !StrICmp(word,"them")||
354         !StrICmp(word,"myself")||
355         !StrICmp(word,"yourself")||
356         !StrICmp(word,"himself")||
357         !StrICmp(word,"herself")||
358         !StrICmp(word,"itself")||
359         !StrICmp(word,"ourselves")||
360         !StrICmp(word,"themselves")||
361         !StrICmp(word,"yourselves")
362   )
363     return 'Pron';
364   return WordForm1(word,copy_to,who);
365 }
366 Bool IsVerb(U8 *word) {
367   I64 who=1;
368   U64 type;
369   for(;type=WordForm(word,NULL,who);++who) {
370     if(type=='Verb')
371       return TRUE;
372   }
373   return FALSE;
374 }
375 Bool IsNoun(U8 *word) {
376   I64 who=1;
377   U64 type;
378   for(;type=WordForm(word,NULL,who);++who) {
379     if(type=='Noun')
380       return TRUE;
381   }
382   return FALSE;
383 }
384 
385 Bool IsAdj(U8 *word) {
386   I64 who=1;
387   U64 type;
388   for(;type=WordForm(word,NULL,who);++who) {
389     if(type=='Adj')
390       return TRUE;
391   }
392   return FALSE;
393 }
394 Bool IsPluralNoun(U8 *word) {
395   I64 who=1;
396   U64 type;
397   U8 buf[STR_LEN];
398   for(;type=WordForm(word,buf,who);++who) {
399     if(type=='Noun') {
400       if(EndsWith(word,"ES"))
401         return TRUE;
402       if(EndsWith(word,"S")&&!EndsWith(buf,"S"))
403         return TRUE;
404       if(EndsWith(word,"ES")&&(
405             EndsWith(buf,"J")||
406             EndsWith(buf,"S")||
407             EndsWith(buf,"SH")||
408             EndsWith(buf,"X")||
409             EndsWith(buf,"Z")||
410             EndsWith(buf,"CH")
411             ))
412         return TRUE;
413       if(EndsWith(word,"IES")&&EndsWith(buf,"Y"))
414         return TRUE;
415       if(EndsWith(word,"ZES")&&EndsWith(buf,"Z"))
416         return TRUE;
417     }
418   }
419   return FALSE;
420 }
421 Bool IsPrep(U8 *word) {
422   I64 who=1;
423   U64 type;
424   for(;type=WordForm(word,NULL,who);++who) {
425     if(type=='Prep')
426       return TRUE;
427   }
428   return FALSE;
429 }
430 I64 total=0,total_relats=0,unique=0;
431 F64 WordOccurances(U8 *a) {
432   a=BaseWordify(StrNew(a));
433   U8 buf[STR_LEN];
434   I64 r=FramePtr(StrPrint(buf,"Freq.%s",a));
435   Free(a);
436   return r;
437 }
438 #define FIXED 25.
439 U0 NewlineBetween(U8 *from,U8 *to) {
440   while(from<to) {
441     if(*from=='\n')
442       return TRUE;
443     ++from;
444   }
445   return FALSE;
446 }
447 U0 ConceptRank(U8 *file) {
448   U8 *a=GetWord(&file),*b=GetWord(&file);
449   U8 *sentence_words[0x1000],*old_file;
450   U8 buf[STR_LEN];
451   Bool flush;
452   I64 sent_len=2,idx,idx2,bucket;
453   U64 anta,antb;
454   F64 mul;
455   CHashTable *t=Fs->hash_table;
456   CHashGeneric *gen;
457   sentence_words[0]=BaseWordify(a),sentence_words[1]=BaseWordify(b);
458   total=2;
459   while(*file) {
460     old_file=file;
461     a=GetWord(&file);
462     if(*a!='.') {
463       DbgPrint("%s",a);
464       sentence_words[sent_len++]=BaseWordify(a);
465       DbgPrint(",%s\n",sentence_words[sent_len-1]);
466     } else if(*a=='.') {
467       Free(a);
468       flush=TRUE;
469       mul=1;
470 finale:
471       for(idx=0;idx!=sent_len;++idx)
472         for(idx2=0;idx2!=sent_len;++idx2) {
473           if(!IsStopWord(sentence_words[idx])&&!IsStopWord(sentence_words[idx2])) { //TODO stop words
474             if(idx!=idx2) {
475 self:
476               AddRelationshipWithWord(sentence_words[idx],sentence_words[idx2]);
477               ++total_relats;
478               StrPrint(buf,"Relats.%s",sentence_words[idx]);
479               if(!FramePtr(buf))
480                 FramePtrAdd(buf,1);
481               else
482                 FramePtrSet(buf,1+FramePtr(buf));
483             } else {
484               StrPrint(buf,"Freq.%s",sentence_words[idx]);
485               if(gen=HashSingleTableFind(buf,Fs->hash_table,HTT_FRAME_PTR)) {
486                 ++gen->user_data0;
487               } else {
488                 FramePtrAdd(buf,1);
489                 ++unique;
490               }
491 //Self relation
492               goto self;
493             }
494           }
495         }
496       if(flush)
497         while(--sent_len>=0)
498           Free(sentence_words[sent_len]);
499 
500       sent_len=0;
501       if(!*file) {
502         return;
503       }
504     }
505   }
506 //End of passsage(paragraph)
507   mul=1;
508   flush=TRUE;
509   goto finale;
510 }