Skip to content

Commit 27d420a

Browse files
committed
add sorting in 23 languages for Golang example
1 parent 735539d commit 27d420a

File tree

5 files changed

+29
-9
lines changed

5 files changed

+29
-9
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ The following results are for 123 unique utf-8 Bible text files in 23 languages
1515

1616
<pre>
1717
1. Rust 1.58 = 1.14s (with sorting: 1.59s) with tokyo (previous: 1.34s, with sorting: 1.79)
18-
2. Golang 1.17.6 = 1.34s (with sorting: 6.55s)
18+
2. Golang 1.17.6 = 1.34s (with sorting: 4.56s)
1919
3. Python 3.10.2 = 2.80s (with multiprocessing)
2020
4. Julia 1.7.1 = 4.522
2121
5. Crystal 1.3.2 = 5.72s

example-golang/app/app.go

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,7 @@ func Run(srcDir, outDir string, numWorkers int, sortResults bool) error {
4646
dst := filepath.Join(outDir, spec.Lang+"-"+spec.Code+".txt")
4747

4848
wg.Add(1)
49-
// TODO: add more collations
50-
go extract(src, dst, "POLISH_CI", sortResults, sem, &wg)
49+
go extract(src, dst, langMap[spec.Lang], sortResults, sem, &wg)
5150
}
5251

5352
wg.Wait()
@@ -66,3 +65,29 @@ func clearOutput(outDir string) error {
6665

6766
return nil
6867
}
68+
69+
var langMap = map[string]string{
70+
"en": "ENGLISH_CI", // The first language is used as fallback.
71+
"la": "ENGLISH_CI", // Latin
72+
"eo": "ENGLISH_CI", // Esperanto
73+
"ar": "ARABIC_CI",
74+
"cz": "CZECH_CI",
75+
"da": "DANISH_CI", // ?
76+
"de": "GERMAN_CI",
77+
"el": "GREEK_CI",
78+
"es": "SPANISH_CI",
79+
"fi": "FINNISH_CI",
80+
"fr": "FRENCH_CI",
81+
"he": "HEBREW_CI",
82+
"hr": "CROATIAN_CI",
83+
"hu": "HUNGARIAN_CI",
84+
"it": "ITALIAN_CI",
85+
"lt": "LITHUANIAN_CI",
86+
"nl": "DUTCH_CI",
87+
"pl": "POLISH_CI",
88+
"pt": "PORTUGUESE_CI",
89+
"ru": "RUSSIAN_CI",
90+
"sk": "SLOVAK_CI",
91+
"sv": "SWEDISH_CI",
92+
"uk": "UKRAINIAN_CI",
93+
}

example-golang/app/extract.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ const (
2727
// correctly.
2828
//
2929
// Rust and Python versions split text according to "[\W\d]+" - anything that is
30-
// not a word or a digit. WTF?
30+
// not a word or a digit. TODO: comfirm if some words contain digits
3131
func splitWordsUnicode(data []byte, atEOF bool) (advance int, token []byte, err error) {
3232
var start int
3333
var r rune

example-golang/go.mod

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ go 1.17
55
require (
66
github.com/bmatcuk/doublestar v1.3.4
77
github.com/stretchr/testify v1.7.0
8-
github.com/thoas/go-funk v0.9.1
98
github.com/tidwall/collate v1.0.0
109
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b
1110
)

example-golang/go.sum

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
55
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
66
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
77
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
8-
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
98
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
109
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
11-
github.com/thoas/go-funk v0.9.1 h1:O549iLZqPpTUQ10ykd26sZhzD+rmR5pWhuElrhbC20M=
12-
github.com/thoas/go-funk v0.9.1/go.mod h1:+IWnUfUmFO1+WVYQWQtIJHeRRdaIyyYglZN7xzUPe4Q=
1310
github.com/tidwall/collate v1.0.0 h1:xgvwO2UunUoXx3NS3UqHBX63l248ZApqo7mUe3NHy6I=
1411
github.com/tidwall/collate v1.0.0/go.mod h1:S56qxEr2ALVCaGY41npreOJ5lBIILSrxYLgEpxoHVIk=
1512
github.com/tidwall/gjson v1.3.4 h1:On5waDnyKKk3SWE4EthbjjirAWXp43xx5cKCUZY1eZw=
@@ -23,7 +20,6 @@ golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
2320
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
2421
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
2522
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
26-
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
2723
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
2824
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo=
2925
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

0 commit comments

Comments
 (0)