@@ -9,25 +9,32 @@ module FastWordsCr
99 def self.main (outpath = " words" )
1010 prepare_folder(outpath, " *.txt" )
1111 Dir .glob(" ../data/pl/**/*.yml" ).each do |path |
12- text = File .read(path.gsub(" .yml" , " .txt" )).gsub(" \n " , " " ).downcase
12+ # spawn do
13+ worker(path, outpath)
14+ # end
15+ end
16+ # Fiber.yield
17+ end
1318
14- # 2m13s or 2m58 with verbose .downcase in self.word_cmp
15- words_json = (text.split(/[^\p {L}] +/ ).to_set - Set {" " }).to_json.downcase
16- words = Array (String ).from_json(words_json).sort { |x , y | self .word_cmp(x, y) }
19+ def self.worker (path, outpath)
20+ text = File .read(path.gsub(" .yml" , " .txt" )).gsub(" \n " , " " ).downcase
1721
18- # 3m8s
19- # words = (text.split(/[^\p{L}]+/).to_set - Set{""}).to_a.sort do |x, y|
20- # self.word_cmp(x, y)
21- # end
22+ # 35sec
23+ # words_json = (text.split(/[^\p{L}]+/).to_set - Set{""}).to_json.downcase
24+ # words = Array(String).from_json(words_json).sort { |x, y| self.word_cmp(x, y) }
2225
23- # 27s (no sort)
24- # words = (text.split(/[^\p{L}]+/).to_set - Set{""}).to_a
26+ # 35s
27+ # words = (text.split(/[^\p{L}]+/).to_set - Set{""}).to_a.sort do |x, y|
28+ # self.word_cmp(x, y)
29+ # end
2530
26- meta = File .open(path) { |file | YAML .parse(file) }
27- filepath = %Q( #{ outpath } /słowa - #{ meta[" label" ] } .txt)
28- puts filepath
29- File .write(filepath, words.join(" \n " ))
30- end
31+ # 7s (no sort)
32+ words = (text.split(/[^\p {L}] +/ ).to_set - Set {" " }).to_a
33+
34+ meta = File .open(path) { |file | YAML .parse(file) }
35+ filepath = %Q( #{ outpath } /słowa - #{ meta[" label" ] } .txt)
36+ puts filepath
37+ File .write(filepath, words.join(" \n " ))
3138 end
3239
3340 def self.prepare_folder (folder : String , pattern : String )
0 commit comments