Skip to content

Commit 3e6eed0

Browse files
committed
improve code build
1 parent 06ba78b commit 3e6eed0

File tree

12 files changed

+112
-26
lines changed

12 files changed

+112
-26
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@
44

55
/words_extractor_jl/
66
/words_extractor_pypy/
7+
/.elixir_ls/

README.md

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@ Example of text file parsing in Python, Golang, Elixir, Rust and Crystal
55
Text source: 79.4MB in 30 files
66

77
- Python 3.9.5 (parallel) with sorting: 3.42s, without sorting: 2.47
8+
- Rust 1.51.0 with sorting: 7s, without sorting: 5s (no parallelism)
89
- Go 1.16.4 (parallel) with sorting: 7.32s, without sorting: 4.06s
9-
- Python 3.9.5 with sorting: 10s, without sorting 8.32s
10-
- Go 1.16.4 with sorting: 21s, without sorting: 11s
11-
- Elixir 1.12 (parallel) with sorting: 33s
12-
- Rust 1.51.0 with sorting: 1m31s, without sorting: 1m10s
13-
- Crystal 1.0.0 with sorting: 2m55s, without sorting: 27s
10+
- Python 3.9.5 with sorting: 10s, without sorting 8.32s (no multiprocessing)
11+
- Go 1.16.4 with sorting: 21s, without sorting: 11s (no parallelism)
12+
- Crystal 1.0.0 with sorting: 35s, without sorting: 7s (non optimized sort, no parallelism)
13+
- Elixir 1.12 (parallel) with sorting: 33s (without release build)
14+
1415

1516
macOS 11.3.1, MacBook Pro (Retina, 15-inch, Late 2013)
1617

@@ -19,13 +20,16 @@ cd words_extractor_py
1920
python words.py
2021

2122
cd words_extractor_rs
22-
cargo run
23+
cargo build --release
24+
target/release/words_extractor_rs
2325

2426
cd words_extractor_go
25-
make run
27+
make build
28+
./main
2629

2730
cd words_extractor_cr
28-
crystal run src/fast_words_cr.cr
31+
crystal build --release src/fast_words_cr.cr -o main
32+
./main
2933

3034
cd words_extractor_ex
3135
mix run "WordsExtractor.run"

words_extractor_cr/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@
88
/fast_words_cr
99
/.idea/
1010
/.DS_Store
11+
/main

words_extractor_cr/src/fast_words_cr.cr

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,25 +9,32 @@ module FastWordsCr
99
def self.main(outpath = "words")
1010
prepare_folder(outpath, "*.txt")
1111
Dir.glob("../data/pl/**/*.yml").each do |path|
12-
text = File.read(path.gsub(".yml", ".txt")).gsub("\n", " ").downcase
12+
# spawn do
13+
worker(path, outpath)
14+
# end
15+
end
16+
# Fiber.yield
17+
end
1318

14-
# 2m13s or 2m58 with verbose .downcase in self.word_cmp
15-
words_json = (text.split(/[^\p{L}]+/).to_set - Set{""}).to_json.downcase
16-
words = Array(String).from_json(words_json).sort { |x, y| self.word_cmp(x, y) }
19+
def self.worker(path, outpath)
20+
text = File.read(path.gsub(".yml", ".txt")).gsub("\n", " ").downcase
1721

18-
# 3m8s
19-
# words = (text.split(/[^\p{L}]+/).to_set - Set{""}).to_a.sort do |x, y|
20-
# self.word_cmp(x, y)
21-
# end
22+
# 35sec
23+
# words_json = (text.split(/[^\p{L}]+/).to_set - Set{""}).to_json.downcase
24+
# words = Array(String).from_json(words_json).sort { |x, y| self.word_cmp(x, y) }
2225

23-
# 27s (no sort)
24-
# words = (text.split(/[^\p{L}]+/).to_set - Set{""}).to_a
26+
# 35s
27+
# words = (text.split(/[^\p{L}]+/).to_set - Set{""}).to_a.sort do |x, y|
28+
# self.word_cmp(x, y)
29+
# end
2530

26-
meta = File.open(path) { |file| YAML.parse(file) }
27-
filepath = %Q(#{outpath}/słowa - #{meta["label"]}.txt)
28-
puts filepath
29-
File.write(filepath, words.join("\n"))
30-
end
31+
# 7s (no sort)
32+
words = (text.split(/[^\p{L}]+/).to_set - Set{""}).to_a
33+
34+
meta = File.open(path) { |file| YAML.parse(file) }
35+
filepath = %Q(#{outpath}/słowa - #{meta["label"]}.txt)
36+
puts filepath
37+
File.write(filepath, words.join("\n"))
3138
end
3239

3340
def self.prepare_folder(folder : String, pattern : String)

words_extractor_go/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ words
55
/.DS_Store
66
/sort-me-out
77
/coverage.out
8+
/main

words_extractor_go/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
BINARY_NAME=sort-me-out
1+
BINARY_NAME=main
22

33
all: clean test build
44

55
build:
6-
@go build -o ${BINARY_NAME} *.go
6+
@go build -o ${BINARY_NAME} -ldflags "-s -w" *.go
77

88
run: build
99
./${BINARY_NAME}

words_extractor_go/go.mod

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ require (
99
github.com/stretchr/testify v1.7.0
1010
github.com/thoas/go-funk v0.8.0
1111
github.com/tidwall/collate v1.0.0
12+
github.com/tidwall/gjson v1.8.0 // indirect
13+
github.com/tidwall/pretty v1.1.1 // indirect
14+
golang.org/x/text v0.3.6 // indirect
1215
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect
1316
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b
1417
)

words_extractor_go/go.sum

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,21 @@ github.com/tidwall/collate v1.0.0 h1:xgvwO2UunUoXx3NS3UqHBX63l248ZApqo7mUe3NHy6I
2020
github.com/tidwall/collate v1.0.0/go.mod h1:S56qxEr2ALVCaGY41npreOJ5lBIILSrxYLgEpxoHVIk=
2121
github.com/tidwall/gjson v1.3.4 h1:On5waDnyKKk3SWE4EthbjjirAWXp43xx5cKCUZY1eZw=
2222
github.com/tidwall/gjson v1.3.4/go.mod h1:P256ACg0Mn+j1RXIDXoss50DeIABTYK1PULOJHhxOls=
23+
github.com/tidwall/gjson v1.8.0 h1:Qt+orfosKn0rbNTZqHYDqBrmm3UDA4KRkv70fDzG+PQ=
24+
github.com/tidwall/gjson v1.8.0/go.mod h1:5/xDoumyyDNerp2U36lyolv46b3uF/9Bu6OfyQ9GImk=
2325
github.com/tidwall/match v1.0.1 h1:PnKP62LPNxHKTwvHHZZzdOAOCtsJTjo6dZLCwpKm5xc=
2426
github.com/tidwall/match v1.0.1/go.mod h1:LujAq0jyVjBy028G1WhWfIzbpQfMO8bBZ6Tyb0+pL9E=
27+
github.com/tidwall/match v1.0.3 h1:FQUVvBImDutD8wJLN6c5eMzWtjgONK9MwIBCOrUJKeE=
28+
github.com/tidwall/match v1.0.3/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
2529
github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4=
2630
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
31+
github.com/tidwall/pretty v1.1.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
32+
github.com/tidwall/pretty v1.1.1 h1:nt6/Ot5LtZnJCWwEFlelOixPo0xhPFsuZlKyOL3Xfnc=
33+
github.com/tidwall/pretty v1.1.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
2734
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
2835
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
36+
golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M=
37+
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
2938
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
3039
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
3140
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=

words_extractor_rs/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
/target/
22
/words/
3+
/.history/
4+
/.ipynb_checkpoints/

words_extractor_rs/Cargo.lock

Lines changed: 56 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)