@@ -10,7 +10,7 @@ import (
1010 "sync"
1111 "time"
1212
13- "github.com/bmatcuk/doublestar/v2 "
13+ "github.com/bmatcuk/doublestar"
1414 "github.com/thoas/go-funk"
1515 "github.com/tidwall/collate"
1616)
@@ -20,92 +20,56 @@ type Pair struct {
2020 Size int64
2121}
2222
23+ // var srcPath = "../data/**/*.yml"
24+ var srcPath = "./bibles/??/**/*.yml"
25+ var outdir = "words"
26+
2327func main () {
24- with_channels ()
25- without_channels ()
26- }
28+ paths , _ := doublestar .Glob (srcPath )
2729
28- func without_channels () {
29- var wg sync. WaitGroup
30+ clearResults ()
31+ runWithChannels ( paths )
3032
31- t := time .Now ()
32- defer timeTrack (t )
33+ clearResults ()
34+ runWithWaitGroups (paths )
35+ }
3336
34- outdir := "words"
37+ func clearResults () {
3538 os .RemoveAll (outdir )
3639 os .Mkdir (outdir , 0777 )
37-
38- fmt .Println ("Parsing without channels..." )
39-
40- paths , _ := doublestar .Glob ("../data/**/*.yml" )
41-
42- items_count := len (paths )
43- wg .Add (items_count )
44- for _ , path := range paths {
45- go processFile (& wg , outdir , path , false )
46- }
47- wg .Wait ()
48-
49- fmt .Printf ("Total items: %d\n " , items_count )
50-
5140}
5241
53- func with_channels ( ) {
54- queue : = make (chan string )
42+ func runWithChannels ( paths [] string ) {
43+ var ch = make (chan string )
5544
5645 t := time .Now ()
5746 defer timeTrack (t )
5847
59- outdir := "words"
60- os .RemoveAll (outdir )
61- os .Mkdir (outdir , 0777 )
62-
63- fmt .Println ("Parsing with channels..." )
64-
65- paths , _ := doublestar .Glob ("../data/**/*.yml" )
66-
67- // total_size := int64(0)
68- items_count := len (paths )
6948 for _ , path := range paths {
70- go processFileWithChannels (queue , outdir , path , false )
71- path := <- queue
72- fmt .Println (path )
73- // total_size += res.Size
74- // fmt.Printf("[%d/%d] %s\n", i+1, items_count, res.Path)
49+ go func (yamlPath string ) {
50+ ch <- parseFile (yamlPath , false )
51+ }(path )
52+ }
53+ for range paths {
54+ <- ch
7555 }
76- fmt .Printf ("Total items: %d\n " , items_count )
77- // fmt.Printf("Total size: %d MB\n", total_size/(1024*1024))
7856}
7957
80- func processFileWithChannels (queue chan string , outdir string , path string , sorting bool ) {
81- meta := GetYAML (path )
82- // load text file
83- filepath := strings .Replace (path , ".yml" , ".txt" , - 1 )
84- // info, err := os.Stat(filepath)
85- // if err != nil {
86- // panic(err)
87- // }
88- content , err := ioutil .ReadFile (filepath )
89- if err != nil {
90- panic (err )
91- }
92- // extract and sort unique words
93- words := extractUniqueWords (content )
94- if sorting {
95- words = sortWords (words , "POLISH_CI" )
96- }
97- text := strings .Join (words , "\n " )
98- outpath := fmt .Sprintf ("%s/%s-%s.txt" , outdir , meta .Lang , meta .Code )
99- for err := ioutil .WriteFile (outpath , []byte (text ), 0644 ); err != nil ; {
100- panic (err )
58+ func runWithWaitGroups (paths []string ) {
59+ var wg sync.WaitGroup
60+ t := time .Now ()
61+ defer timeTrack (t )
62+ for _ , path := range paths {
63+ wg .Add (1 )
64+ go func (yamlPath string ) {
65+ parseFile (yamlPath , false )
66+ wg .Done ()
67+ }(path )
10168 }
102- // queue <- Pair{path, info.Size()}
103- queue <- path
104-
69+ wg .Wait ()
10570}
10671
107- func processFile (wg * sync.WaitGroup , outdir string , path string , sorting bool ) {
108- defer wg .Done ()
72+ func parseFile (path string , sorting bool ) string {
10973 // load YAML file
11074 meta := GetYAML (path )
11175 outfilepath := fmt .Sprintf ("%s/extracted-words-for-%s.txt" , outdir , meta .Code )
@@ -128,7 +92,7 @@ func processFile(wg *sync.WaitGroup, outdir string, path string, sorting bool) {
12892 for err := ioutil .WriteFile (outfilepath , []byte (text ), 0644 ); err != nil ; {
12993 panic (err )
13094 }
131- fmt . Println ( path )
95+ return outfilepath
13296}
13397
13498func timeTrack (start time.Time ) {
0 commit comments