@@ -2,14 +2,15 @@ module words_extractor_jl
22
33using Distributed
44using YAML
5+ using Glob
56
6- const folder = " words"
7+ const outdir = " words"
78
8- function worker (yaml_path)
9+ function worker (yaml_path, i, count )
910 path = get_filepath (yaml_path)
1011 words = get_words (yaml_path)
1112 write (path, join (words, " \n " ))
12- # println(string("Saved...", path) )
13+ println (" [ $( lpad (i, 3 , ' ' )) / $count ] $path " )
1314end
1415
1516function get_words (yaml_path)
2021
2122function get_filepath (path)
2223 meta = YAML. load_file (path)
23- string (folder, " /extracted-words-for- " , meta[" label " ], " .txt" )
24+ """ ./ $outdir / $( meta[" lang " ]) - $(meta[ " code " ]) .txt"""
2425end
2526
26- function walk (path, file_ext)
27- res = []
28- for (root, _, files) in walkdir (path, topdown = true )
29- for file in files
30- if endswith (file, file_ext)
31- filepath = joinpath (root, file)
32- push! (res, filepath)
33- end
34- end
27+ function rdir (dir:: AbstractString , pat:: Glob.FilenameMatch )
28+ result = String[]
29+ for (root, dirs, files) in walkdir (dir)
30+ filepaths = joinpath .(root, files)
31+ append! (result, filter! (f -> occursin (pat, f), filepaths))
3532 end
36- res
33+ return result
3734end
3835
36+ rdir (dir:: AbstractString , pat:: AbstractString ) = rdir (dir, Glob. FilenameMatch (pat))
37+
3938function main ()
40- if ispath (folder )
41- rm (folder , recursive = true )
39+ if ispath (outdir )
40+ rm (outdir , recursive = true )
4241 end
43- mkdir (folder)
44- Threads. @threads for path in walk (" ../data/pl/" , " .yml" )
45- # println("Spawn $path")
46- worker (path)
42+ mkdir (outdir)
43+ paths = rdir (" ../data" , fn " ../data/??/*.yml" )
44+ count = length (paths)
45+ i = 1
46+ Threads. @threads for path in paths
47+ # println("Spawn $path")
48+ worker (path, i, count)
49+ i += 1
4750 end
4851end
4952
50- # addprocs()
51- # println(string("Workers ", nworkers()))
53+ addprocs ()
54+ println (string (" Workers " , nworkers ()))
5255println (string (" Processing... using " , Threads. nthreads (), " threads" ))
5356@time main ()
5457end # module
0 commit comments