Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion words_extractor_go/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,3 @@ words
.history
/fast_words
/.DS_Store
sort-me-out
10 changes: 0 additions & 10 deletions words_extractor_go/Makefile

This file was deleted.

20 changes: 9 additions & 11 deletions words_extractor_go/content.go
Original file line number Diff line number Diff line change
@@ -1,26 +1,24 @@
package main

import (
"bufio"
"bytes"
"os"
"io/ioutil"
"regexp"
"strconv"
"strings"
)

func getRows(metaPath string) ListOfStrings {
path := strings.Replace(metaPath, ".yml", ".txt", -1)
data, _ := os.Open(path)
defer data.Close()

scanner := bufio.NewScanner(data)
scanner.Split(bufio.ScanLines)
var txtlines []string
for scanner.Scan() {
txtlines = append(txtlines, scanner.Text())
data, err := ioutil.ReadFile(path)
if err != nil {
panic(err)
}
rows := strings.Split(string(data), "\n")
if rows[len(rows)-1] == "" {
rows = rows[:len(rows)-1]
}
return txtlines
return rows
}

func (arr ListOfStrings) toString() string {
Expand Down
13 changes: 0 additions & 13 deletions words_extractor_go/go.mod

This file was deleted.

29 changes: 0 additions & 29 deletions words_extractor_go/go.sum

This file was deleted.

51 changes: 28 additions & 23 deletions words_extractor_go/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,12 @@ package main
import (
"fmt"
"io/ioutil"
"strings"
"time"
"sort"

"github.com/jfcg/sorty"
"github.com/thoas/go-funk"
"github.com/tidwall/collate"
)

type resultsArray struct {
Results []string
}

var (
res resultsArray
)

func timeTrack(start time.Time) {
fmt.Println("Total timing: ", time.Since(start))
}

func main() {
t1 := time.Now()
defer timeTrack(t1)
folder := "./words"
prepareFolder(folder, "*.txt")

Expand All @@ -33,13 +17,34 @@ func main() {
filename := "słowa - " + meta.Label + ".txt"
fmt.Println("Parsing...", filename)

res.extractWords(getRows(path).toString())
res.Results = funk.UniqString(res.Results)
sorty.SortS(res.Results)
data := strings.Join(res.Results, "\n")
// set: extracted unique words normalized to lowercase
set := make(map[string]void)
extractWords(getRows(path).toString(), set)
delete(set, "")

for err := ioutil.WriteFile(folder+"/"+filename, []byte(data), 0644); err != nil; {
// convert map[string]void to []string
var words []string
for word := range set {
words = append(words, word)
}

// sortArray(words, "POLISH_CI")

var data []byte
for _, word := range words {
bytes := []byte(word + "\n")
data = append(data, bytes...)
}

for err := ioutil.WriteFile(folder+"/"+filename, data, 0644); err != nil; {
panic(err)
}
}
}

func sortArray(arr []string, lang string) {
less := collate.IndexString(lang)
sort.SliceStable(arr, func(i, j int) bool {
return less(arr[i], arr[j])
})
}
3 changes: 1 addition & 2 deletions words_extractor_go/meta.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
package main

import (
"io/ioutil"

"gopkg.in/yaml.v3"
"io/ioutil"
)

type metaConfig struct {
Expand Down
18 changes: 5 additions & 13 deletions words_extractor_go/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
import (
"os"
"path/filepath"
"regexp"
"strings"
)

Expand All @@ -25,19 +26,10 @@ func getYamlFilepaths(root string) []string {
return result
}

func removeCharacters(input string, characters string) string {
filter := func(r rune) rune {
if strings.IndexRune(characters, r) < 0 {
return r
}
return -1
}
return strings.Map(filter, input)
}

func (r *resultsArray) extractWords(s string) {
for _, word := range strings.Fields(s) {
r.Results = append(r.Results, strings.ToLower(removeCharacters(word, ".:,;()!?'-_")))
func extractWords(s string, set map[string]void) {
re := regexp.MustCompile("[^\\p{L}]+")
for _, word := range re.Split(s, -1) {
set[strings.ToLower(word)] = member
}
}

Expand Down
93 changes: 0 additions & 93 deletions words_extractor_go/utils_test.go

This file was deleted.