1+ import argparse
12import glob
23from typing import Tuple
3- import os
44
55try :
66 from icu import Collator , Locale
1919
2020
2121def worker (path : str , outdir : str , sorting : bool = False ) -> Tuple [str , int ]:
22- if sorting :
23- if i18nsorting :
24- collator = Collator .createInstance (Locale ("pl_PL.UTF-8" ))
25- print ("I18nN sorting not available" )
22+ # if sorting:
23+ # if i18nsorting:
24+ # collator = Collator.createInstance(Locale("pl_PL.UTF-8"))
25+ # print("I18nN sorting not available")
2626
2727 separator = re .compile ("[\W\d]+" )
2828 filepath = path .replace (".yml" , ".txt" )
2929 filesize = os .path .getsize (filepath )
3030 with open (filepath ) as file :
3131 text = file .read ().lower ().rstrip ()
3232 words = set (re .split (separator , text ))
33+ try :
34+ words .remove ('' )
35+ except KeyError :
36+ pass
37+ words = list (words )
3338 with open (path ) as file :
3439 meta = yaml .safe_load (file )
3540 with open (f"{ outdir } /{ meta ['lang' ]} -{ meta ['code' ]} .txt" , "w" ) as file :
36- if sorting and i18nsorting :
37- words = sorted (words , key = collator .getSortKey )
41+ if sorting :
42+ if i18nsorting :
43+ words = sorted (words , key = collator .getSortKey )
44+ else :
45+ words .sort ()
3846 file .write ("\n " .join (words ))
3947 return path , filesize
4048
4149
4250if __name__ == "__main__" :
51+ program_name = os .path .basename (__file__ )
52+ cores = mp .cpu_count ()
53+ parser = argparse .ArgumentParser (f'python { program_name } ' )
54+ parser .add_argument ('-n' , type = int , help = f'Number of cores to run (default: { cores } )' , default = cores )
55+ parser .add_argument ('-s' , '--sort' , action = 'store_true' , help = 'Sort results' )
56+ args = parser .parse_args ()
57+ if not 1 <= args .n <= cores :
58+ args .n = 10
59+
60+ cpu_cores = args .n
61+ sorting = args .sort
62+
4363 t = time .time ()
4464
4565 outdir = "words"
@@ -49,20 +69,25 @@ def worker(path: str, outdir: str, sorting: bool = False) -> Tuple[str, int]:
4969 shutil .rmtree (outdir )
5070 os .makedirs (outdir )
5171
52- pool = mp .Pool (mp .cpu_count ())
53-
54- results = []
5572 paths = glob .glob (src_path , recursive = True )
5673 if not paths :
5774 raise Exception (f"WRONG PATH { src_path } " )
5875
76+ print (f"Running using { cpu_cores } processes" , end = '' )
77+ if sorting :
78+ if i18nsorting :
79+ print (" with sorting using collations" )
80+ else :
81+ print (" with sorting" )
82+ results : list = []
83+ pool = mp .Pool (cpu_cores )
5984 for path in paths :
6085 res = pool .apply_async (
6186 worker ,
6287 kwds = dict (
6388 path = path ,
6489 outdir = outdir ,
65- sorting = False ,
90+ sorting = sorting ,
6691 ),
6792 )
6893 results .append (res )
@@ -71,9 +96,8 @@ def worker(path: str, outdir: str, sorting: bool = False) -> Tuple[str, int]:
7196 for i , res in enumerate (results ):
7297 path , size = res .get ()
7398 total_size += size
74- print (f"[{ i + 1 } /{ items_count } ] { path } " )
99+ print (f"[{ i + 1 } /{ items_count } ] { path } " )
75100 print (f"Total files: { items_count } " )
76101 print (f"Total size: { round ((total_size / 1024 / 1024 ))} MB" )
77102 t = time .time () - t
78103 print (f"Total time: { t :.4f} s" )
79-
0 commit comments