Skip to content

Commit 9506e14

Browse files
authored
Merge pull request #2 from hipertracker/revert-1-main
Revert "Golang improvements."
2 parents 394294d + a7d68d8 commit 9506e14

File tree

9 files changed

+43
-195
lines changed

9 files changed

+43
-195
lines changed

words_extractor_go/.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,3 @@ words
33
.history
44
/fast_words
55
/.DS_Store
6-
sort-me-out

words_extractor_go/Makefile

Lines changed: 0 additions & 10 deletions
This file was deleted.

words_extractor_go/content.go

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,24 @@
11
package main
22

33
import (
4-
"bufio"
54
"bytes"
6-
"os"
5+
"io/ioutil"
76
"regexp"
87
"strconv"
98
"strings"
109
)
1110

1211
func getRows(metaPath string) ListOfStrings {
1312
path := strings.Replace(metaPath, ".yml", ".txt", -1)
14-
data, _ := os.Open(path)
15-
defer data.Close()
16-
17-
scanner := bufio.NewScanner(data)
18-
scanner.Split(bufio.ScanLines)
19-
var txtlines []string
20-
for scanner.Scan() {
21-
txtlines = append(txtlines, scanner.Text())
13+
data, err := ioutil.ReadFile(path)
14+
if err != nil {
15+
panic(err)
16+
}
17+
rows := strings.Split(string(data), "\n")
18+
if rows[len(rows)-1] == "" {
19+
rows = rows[:len(rows)-1]
2220
}
23-
return txtlines
21+
return rows
2422
}
2523

2624
func (arr ListOfStrings) toString() string {

words_extractor_go/go.mod

Lines changed: 0 additions & 13 deletions
This file was deleted.

words_extractor_go/go.sum

Lines changed: 0 additions & 29 deletions
This file was deleted.

words_extractor_go/main.go

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,28 +3,12 @@ package main
33
import (
44
"fmt"
55
"io/ioutil"
6-
"strings"
7-
"time"
6+
"sort"
87

9-
"github.com/jfcg/sorty"
10-
"github.com/thoas/go-funk"
8+
"github.com/tidwall/collate"
119
)
1210

13-
type resultsArray struct {
14-
Results []string
15-
}
16-
17-
var (
18-
res resultsArray
19-
)
20-
21-
func timeTrack(start time.Time) {
22-
fmt.Println("Total timing: ", time.Since(start))
23-
}
24-
2511
func main() {
26-
t1 := time.Now()
27-
defer timeTrack(t1)
2812
folder := "./words"
2913
prepareFolder(folder, "*.txt")
3014

@@ -33,13 +17,34 @@ func main() {
3317
filename := "słowa - " + meta.Label + ".txt"
3418
fmt.Println("Parsing...", filename)
3519

36-
res.extractWords(getRows(path).toString())
37-
res.Results = funk.UniqString(res.Results)
38-
sorty.SortS(res.Results)
39-
data := strings.Join(res.Results, "\n")
20+
// set: extracted unique words normalized to lowercase
21+
set := make(map[string]void)
22+
extractWords(getRows(path).toString(), set)
23+
delete(set, "")
4024

41-
for err := ioutil.WriteFile(folder+"/"+filename, []byte(data), 0644); err != nil; {
25+
// convert map[string]void to []string
26+
var words []string
27+
for word := range set {
28+
words = append(words, word)
29+
}
30+
31+
// sortArray(words, "POLISH_CI")
32+
33+
var data []byte
34+
for _, word := range words {
35+
bytes := []byte(word + "\n")
36+
data = append(data, bytes...)
37+
}
38+
39+
for err := ioutil.WriteFile(folder+"/"+filename, data, 0644); err != nil; {
4240
panic(err)
4341
}
4442
}
4543
}
44+
45+
func sortArray(arr []string, lang string) {
46+
less := collate.IndexString(lang)
47+
sort.SliceStable(arr, func(i, j int) bool {
48+
return less(arr[i], arr[j])
49+
})
50+
}

words_extractor_go/meta.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
package main
22

33
import (
4-
"io/ioutil"
5-
64
"gopkg.in/yaml.v3"
5+
"io/ioutil"
76
)
87

98
type metaConfig struct {

words_extractor_go/utils.go

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package main
33
import (
44
"os"
55
"path/filepath"
6+
"regexp"
67
"strings"
78
)
89

@@ -25,19 +26,10 @@ func getYamlFilepaths(root string) []string {
2526
return result
2627
}
2728

28-
func removeCharacters(input string, characters string) string {
29-
filter := func(r rune) rune {
30-
if strings.IndexRune(characters, r) < 0 {
31-
return r
32-
}
33-
return -1
34-
}
35-
return strings.Map(filter, input)
36-
}
37-
38-
func (r *resultsArray) extractWords(s string) {
39-
for _, word := range strings.Fields(s) {
40-
r.Results = append(r.Results, strings.ToLower(removeCharacters(word, ".:,;()!?'-_")))
29+
func extractWords(s string, set map[string]void) {
30+
re := regexp.MustCompile("[^\\p{L}]+")
31+
for _, word := range re.Split(s, -1) {
32+
set[strings.ToLower(word)] = member
4133
}
4234
}
4335

words_extractor_go/utils_test.go

Lines changed: 0 additions & 93 deletions
This file was deleted.

0 commit comments

Comments
 (0)