@@ -15,7 +15,17 @@ import (
1515 "github.com/tidwall/collate"
1616)
1717
18+ type Pair struct {
19+ Path string
20+ Size int64
21+ }
22+
1823func main () {
24+ with_channels ()
25+ without_channels ()
26+ }
27+
28+ func without_channels () {
1929 var wg sync.WaitGroup
2030
2131 t := time .Now ()
@@ -25,17 +35,77 @@ func main() {
2535 os .RemoveAll (outdir )
2636 os .Mkdir (outdir , 0777 )
2737
28- fmt .Println ("Parsing..." )
38+ fmt .Println ("Parsing without channels..." )
39+
40+ paths , _ := doublestar .Glob ("../data/**/*.yml" )
41+
42+ items_count := len (paths )
2943
30- paths , _ := doublestar .Glob ("../data/pl/**/*.yml" )
31- for i , path := range paths {
44+ for _ , path := range paths {
3245 wg .Add (1 )
33- go worker ( i , & wg , path , outdir , true )
46+ go processFile ( & wg , outdir , path , false )
3447 }
3548 wg .Wait ()
49+
50+ fmt .Printf ("Total items: %d\n " , items_count )
51+
52+ }
53+
54+ func with_channels () {
55+ queue := make (chan string )
56+
57+ t := time .Now ()
58+ defer timeTrack (t )
59+
60+ outdir := "words"
61+ os .RemoveAll (outdir )
62+ os .Mkdir (outdir , 0777 )
63+
64+ fmt .Println ("Parsing with channels..." )
65+
66+ paths , _ := doublestar .Glob ("../data/**/*.yml" )
67+
68+ // total_size := int64(0)
69+ items_count := len (paths )
70+ for _ , path := range paths {
71+ go processFileWithChannels (queue , outdir , path , false )
72+ path := <- queue
73+ fmt .Println (path )
74+ // total_size += res.Size
75+ // fmt.Printf("[%d/%d] %s\n", i+1, items_count, res.Path)
76+ }
77+ fmt .Printf ("Total items: %d\n " , items_count )
78+ // fmt.Printf("Total size: %d MB\n", total_size/(1024*1024))
3679}
3780
38- func worker (id int , wg * sync.WaitGroup , path , outdir string , verbose bool ) {
81+ func processFileWithChannels (queue chan string , outdir string , path string , sorting bool ) {
82+ meta := GetYAML (path )
83+ // load text file
84+ filepath := strings .Replace (path , ".yml" , ".txt" , - 1 )
85+ // info, err := os.Stat(filepath)
86+ // if err != nil {
87+ // panic(err)
88+ // }
89+ content , err := ioutil .ReadFile (filepath )
90+ if err != nil {
91+ panic (err )
92+ }
93+ // extract and sort unique words
94+ words := extractUniqueWords (content )
95+ if sorting {
96+ words = sortWords (words , "POLISH_CI" )
97+ }
98+ text := strings .Join (words , "\n " )
99+ outpath := fmt .Sprintf ("%s/%s-%s.txt" , outdir , meta .Lang , meta .Code )
100+ for err := ioutil .WriteFile (outpath , []byte (text ), 0644 ); err != nil ; {
101+ panic (err )
102+ }
103+ // queue <- Pair{path, info.Size()}
104+ queue <- path
105+
106+ }
107+
108+ func processFile (wg * sync.WaitGroup , outdir string , path string , sorting bool ) {
39109 defer wg .Done ()
40110 // load YAML file
41111 meta := GetYAML (path )
@@ -48,17 +118,18 @@ func worker(id int, wg *sync.WaitGroup, path, outdir string, verbose bool) {
48118 panic (err )
49119 }
50120
51- // extract and sort unique words
52121 words := extractUniqueWords (content )
53- words = sortWords (words , "POLISH_CI" )
122+
123+ // sort unique words
124+ if sorting {
125+ words = sortWords (words , "POLISH_CI" )
126+ }
54127
55128 text := strings .Join (words , "\n " )
56129 for err := ioutil .WriteFile (outfilepath , []byte (text ), 0644 ); err != nil ; {
57130 panic (err )
58131 }
59- if verbose {
60- fmt .Println ("Saved " , path )
61- }
132+ fmt .Println (path )
62133}
63134
64135func timeTrack (start time.Time ) {
0 commit comments