Skip to content

Commit 47806fd

Browse files
authored
Update main.go
1 parent d1c19a0 commit 47806fd

File tree

1 file changed

+81
-10
lines changed

1 file changed

+81
-10
lines changed

words_extractor_go/main.go

Lines changed: 81 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,17 @@ import (
1515
"github.com/tidwall/collate"
1616
)
1717

18+
type Pair struct {
19+
Path string
20+
Size int64
21+
}
22+
1823
func main() {
24+
with_channels()
25+
without_channels()
26+
}
27+
28+
func without_channels() {
1929
var wg sync.WaitGroup
2030

2131
t := time.Now()
@@ -25,17 +35,77 @@ func main() {
2535
os.RemoveAll(outdir)
2636
os.Mkdir(outdir, 0777)
2737

28-
fmt.Println("Parsing...")
38+
fmt.Println("Parsing without channels...")
39+
40+
paths, _ := doublestar.Glob("../data/**/*.yml")
41+
42+
items_count := len(paths)
2943

30-
paths, _ := doublestar.Glob("../data/pl/**/*.yml")
31-
for i, path := range paths {
44+
for _, path := range paths {
3245
wg.Add(1)
33-
go worker(i, &wg, path, outdir, true)
46+
go processFile(&wg, outdir, path, false)
3447
}
3548
wg.Wait()
49+
50+
fmt.Printf("Total items: %d\n", items_count)
51+
52+
}
53+
54+
func with_channels() {
55+
queue := make(chan string)
56+
57+
t := time.Now()
58+
defer timeTrack(t)
59+
60+
outdir := "words"
61+
os.RemoveAll(outdir)
62+
os.Mkdir(outdir, 0777)
63+
64+
fmt.Println("Parsing with channels...")
65+
66+
paths, _ := doublestar.Glob("../data/**/*.yml")
67+
68+
// total_size := int64(0)
69+
items_count := len(paths)
70+
for _, path := range paths {
71+
go processFileWithChannels(queue, outdir, path, false)
72+
path := <-queue
73+
fmt.Println(path)
74+
// total_size += res.Size
75+
// fmt.Printf("[%d/%d] %s\n", i+1, items_count, res.Path)
76+
}
77+
fmt.Printf("Total items: %d\n", items_count)
78+
// fmt.Printf("Total size: %d MB\n", total_size/(1024*1024))
3679
}
3780

38-
func worker(id int, wg *sync.WaitGroup, path, outdir string, verbose bool) {
81+
func processFileWithChannels(queue chan string, outdir string, path string, sorting bool) {
82+
meta := GetYAML(path)
83+
// load text file
84+
filepath := strings.Replace(path, ".yml", ".txt", -1)
85+
// info, err := os.Stat(filepath)
86+
// if err != nil {
87+
// panic(err)
88+
// }
89+
content, err := ioutil.ReadFile(filepath)
90+
if err != nil {
91+
panic(err)
92+
}
93+
// extract and sort unique words
94+
words := extractUniqueWords(content)
95+
if sorting {
96+
words = sortWords(words, "POLISH_CI")
97+
}
98+
text := strings.Join(words, "\n")
99+
outpath := fmt.Sprintf("%s/%s-%s.txt", outdir, meta.Lang, meta.Code)
100+
for err := ioutil.WriteFile(outpath, []byte(text), 0644); err != nil; {
101+
panic(err)
102+
}
103+
// queue <- Pair{path, info.Size()}
104+
queue <- path
105+
106+
}
107+
108+
func processFile(wg *sync.WaitGroup, outdir string, path string, sorting bool) {
39109
defer wg.Done()
40110
// load YAML file
41111
meta := GetYAML(path)
@@ -48,17 +118,18 @@ func worker(id int, wg *sync.WaitGroup, path, outdir string, verbose bool) {
48118
panic(err)
49119
}
50120

51-
// extract and sort unique words
52121
words := extractUniqueWords(content)
53-
words = sortWords(words, "POLISH_CI")
122+
123+
// sort unique words
124+
if sorting {
125+
words = sortWords(words, "POLISH_CI")
126+
}
54127

55128
text := strings.Join(words, "\n")
56129
for err := ioutil.WriteFile(outfilepath, []byte(text), 0644); err != nil; {
57130
panic(err)
58131
}
59-
if verbose {
60-
fmt.Println("Saved ", path)
61-
}
132+
fmt.Println(path)
62133
}
63134

64135
func timeTrack(start time.Time) {

0 commit comments

Comments
 (0)