Skip to content

Commit e3840c2

Browse files
authored
Update main.go
improve Go implementation
1 parent 0b804cd commit e3840c2

File tree

1 file changed

+33
-69
lines changed

1 file changed

+33
-69
lines changed

words_extractor_go/main.go

Lines changed: 33 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import (
1010
"sync"
1111
"time"
1212

13-
"github.com/bmatcuk/doublestar/v2"
13+
"github.com/bmatcuk/doublestar"
1414
"github.com/thoas/go-funk"
1515
"github.com/tidwall/collate"
1616
)
@@ -20,92 +20,56 @@ type Pair struct {
2020
Size int64
2121
}
2222

23+
// var srcPath = "../data/**/*.yml"
24+
var srcPath = "./bibles/??/**/*.yml"
25+
var outdir = "words"
26+
2327
func main() {
24-
with_channels()
25-
without_channels()
26-
}
28+
paths, _ := doublestar.Glob(srcPath)
2729

28-
func without_channels() {
29-
var wg sync.WaitGroup
30+
clearResults()
31+
runWithChannels(paths)
3032

31-
t := time.Now()
32-
defer timeTrack(t)
33+
clearResults()
34+
runWithWaitGroups(paths)
35+
}
3336

34-
outdir := "words"
37+
func clearResults() {
3538
os.RemoveAll(outdir)
3639
os.Mkdir(outdir, 0777)
37-
38-
fmt.Println("Parsing without channels...")
39-
40-
paths, _ := doublestar.Glob("../data/**/*.yml")
41-
42-
items_count := len(paths)
43-
wg.Add(items_count)
44-
for _, path := range paths {
45-
go processFile(&wg, outdir, path, false)
46-
}
47-
wg.Wait()
48-
49-
fmt.Printf("Total items: %d\n", items_count)
50-
5140
}
5241

53-
func with_channels() {
54-
queue := make(chan string)
42+
func runWithChannels(paths []string) {
43+
var ch = make(chan string)
5544

5645
t := time.Now()
5746
defer timeTrack(t)
5847

59-
outdir := "words"
60-
os.RemoveAll(outdir)
61-
os.Mkdir(outdir, 0777)
62-
63-
fmt.Println("Parsing with channels...")
64-
65-
paths, _ := doublestar.Glob("../data/**/*.yml")
66-
67-
// total_size := int64(0)
68-
items_count := len(paths)
6948
for _, path := range paths {
70-
go processFileWithChannels(queue, outdir, path, false)
71-
path := <-queue
72-
fmt.Println(path)
73-
// total_size += res.Size
74-
// fmt.Printf("[%d/%d] %s\n", i+1, items_count, res.Path)
49+
go func(yamlPath string) {
50+
ch <- parseFile(yamlPath, false)
51+
}(path)
52+
}
53+
for range paths {
54+
<-ch
7555
}
76-
fmt.Printf("Total items: %d\n", items_count)
77-
// fmt.Printf("Total size: %d MB\n", total_size/(1024*1024))
7856
}
7957

80-
func processFileWithChannels(queue chan string, outdir string, path string, sorting bool) {
81-
meta := GetYAML(path)
82-
// load text file
83-
filepath := strings.Replace(path, ".yml", ".txt", -1)
84-
// info, err := os.Stat(filepath)
85-
// if err != nil {
86-
// panic(err)
87-
// }
88-
content, err := ioutil.ReadFile(filepath)
89-
if err != nil {
90-
panic(err)
91-
}
92-
// extract and sort unique words
93-
words := extractUniqueWords(content)
94-
if sorting {
95-
words = sortWords(words, "POLISH_CI")
96-
}
97-
text := strings.Join(words, "\n")
98-
outpath := fmt.Sprintf("%s/%s-%s.txt", outdir, meta.Lang, meta.Code)
99-
for err := ioutil.WriteFile(outpath, []byte(text), 0644); err != nil; {
100-
panic(err)
58+
func runWithWaitGroups(paths []string) {
59+
var wg sync.WaitGroup
60+
t := time.Now()
61+
defer timeTrack(t)
62+
for _, path := range paths {
63+
wg.Add(1)
64+
go func(yamlPath string) {
65+
parseFile(yamlPath, false)
66+
wg.Done()
67+
}(path)
10168
}
102-
// queue <- Pair{path, info.Size()}
103-
queue <- path
104-
69+
wg.Wait()
10570
}
10671

107-
func processFile(wg *sync.WaitGroup, outdir string, path string, sorting bool) {
108-
defer wg.Done()
72+
func parseFile(path string, sorting bool) string {
10973
// load YAML file
11074
meta := GetYAML(path)
11175
outfilepath := fmt.Sprintf("%s/extracted-words-for-%s.txt", outdir, meta.Code)
@@ -128,7 +92,7 @@ func processFile(wg *sync.WaitGroup, outdir string, path string, sorting bool) {
12892
for err := ioutil.WriteFile(outfilepath, []byte(text), 0644); err != nil; {
12993
panic(err)
13094
}
131-
fmt.Println(path)
95+
return outfilepath
13296
}
13397

13498
func timeTrack(start time.Time) {

0 commit comments

Comments
 (0)