File tree Expand file tree Collapse file tree 1 file changed +9
-13
lines changed Expand file tree Collapse file tree 1 file changed +9
-13
lines changed Original file line number Diff line number Diff line change 1919
2020
2121def worker (path : str , outdir : str , sorting : bool = False ) -> Tuple [str , int ]:
22- # if sorting:
23- # if i18nsorting:
24- # collator = Collator.createInstance(Locale("pl_PL.UTF-8"))
25- # print("I18nN sorting not available")
26-
27- separator = re .compile ("[\W\d]+" )
22+ if sorting and i18nsorting :
23+ collator = Collator .createInstance (Locale ("pl_PL.UTF-8" ))
24+
25+ separator = re .compile ("[\W\d]+" ) # also ignore Strong numbers (Python has no \p{L} pattern)
2826 filepath = path .replace (".yml" , ".txt" )
2927 filesize = os .path .getsize (filepath )
3028 with open (filepath ) as file :
31- text = file .read ().lower ().rstrip ()
32- words = set (re .split (separator , text ))
33- try :
34- words .remove ('' )
35- except KeyError :
36- pass
37- words = list (words )
29+ words = []
30+ for line in file .readlines ():
31+ _line = ' ' .join (line .strip ().lower ().split (' ' )[2 :- 1 ]) # without book reference
32+ words += [w for w in set (re .split (separator , _line )) if w and len (w ) > 1 ]
33+ words = list (set (words ))
3834 with open (path ) as file :
3935 meta = yaml .safe_load (file )
4036 with open (f"{ outdir } /{ meta ['lang' ]} -{ meta ['code' ]} .txt" , "w" ) as file :
You can’t perform that action at this time.
0 commit comments