Skip to content

Commit 0ba4560

Browse files
committed
add Elixir version
1 parent 543afe2 commit 0ba4560

File tree

10 files changed

+153
-4
lines changed

10 files changed

+153
-4
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,5 @@
22
/.vscode/
33
/.DS_Store
44

5+
/words_extractor_jl/
6+
/words_extractor_pypy/

README.md

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,17 @@ Example of words extracting in Go, Crystal, Rust and Python
44

55
Text source: 79.4MB in 30 files
66

7-
- Python 3.9.5 (parallel) with sorting: 4.01s, without sorting: 2.47
7+
- Python 3.9.5 (parallel) with sorting: 3.42s, without sorting: 2.47
88
- Go 1.16.4 (parallel) with sorting: 7.32s, without sorting: 4.06s
99
- Python 3.9.5 with sorting: 10s, without sorting 8.32s
1010
- Go 1.16.4 with sorting: 21s, without sorting: 11s
11+
- Elixir 1.12 (parallel) with sorting: 33s
1112
- Rust 1.51.0 with sorting: 1m31s, without sorting: 1m10s
1213
- Crystal 1.0.0 with sorting: 2m55s, without sorting: 27s
1314

1415
macOS 11.3.1, MacBook Pro (Retina, 15-inch, Late 2013)
1516

16-
```
17+
```bash
1718
cd words_extractor_py
1819
python words.py
1920

@@ -25,14 +26,17 @@ make run
2526

2627
cd words_extractor_cr
2728
crystal run src/fast_words_cr.cr
29+
30+
cd words_extractor_ex
31+
mix run "WordExtractor.run"
2832
```
2933

3034
## Running Python
3135

3236
1. Install the latest Python 3.9.5
3337
2. Create venv and dependencies
3438

35-
```
39+
```bash
3640
cd words_extractor_py
3741
python -m venv venv
3842
source venv/bin/activate
@@ -41,6 +45,6 @@ pip install -r requirements.txt
4145

4246
3. Run the code
4347

44-
```
48+
```bash
4549
python words_parallel.py
4650
```

words_extractor_ex/.formatter.exs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Used by "mix format"
2+
[
3+
inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
4+
]

words_extractor_ex/.gitignore

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# The directory Mix will write compiled artifacts to.
2+
/_build/
3+
4+
# If you run "mix test --cover", coverage assets end up here.
5+
/cover/
6+
7+
# The directory Mix downloads your dependencies sources to.
8+
/deps/
9+
10+
# Where third-party dependencies like ExDoc output generated docs.
11+
/doc/
12+
13+
# Ignore .fetch files in case you like to edit your project deps locally.
14+
/.fetch
15+
16+
# If the VM crashes, it generates a dump, let's ignore it too.
17+
erl_crash.dump
18+
19+
# Also ignore archive artifacts (built via "mix archive.build").
20+
*.ez
21+
22+
# Ignore package tarball (built via "mix hex.build").
23+
words_extractor_ex-*.tar
24+
25+
# Temporary files, for example, from tests.
26+
/tmp/
27+
/.elixir_ls/
28+
/.history/
29+
/.ipynb_checkpoints/

words_extractor_ex/README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# WordsExtractor
2+
3+
**TODO: Add description**
4+
5+
## Installation
6+
7+
If [available in Hex](https://hex.pm/docs/publish), the package can be installed
8+
by adding `words_extractor_ex` to your list of dependencies in `mix.exs`:
9+
10+
```elixir
11+
def deps do
12+
[
13+
{:words_extractor_ex, "~> 0.1.0"}
14+
]
15+
end
16+
```
17+
18+
Documentation can be generated with [ExDoc](https://github.com/elixir-lang/ex_doc)
19+
and published on [HexDocs](https://hexdocs.pm). Once published, the docs can
20+
be found at [https://hexdocs.pm/words_extractor_ex](https://hexdocs.pm/words_extractor_ex).
21+
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
defmodule WordsExtractor do
2+
@moduledoc nil
3+
4+
def run do
5+
outdir = "words"
6+
clean_dir(outdir)
7+
8+
walk("../data/pl/", ".yml")
9+
|> Task.async_stream(
10+
WordsExtractor,
11+
:worker,
12+
[outdir],
13+
ordered: false,
14+
timeout: :infinity
15+
)
16+
|> Enum.to_list()
17+
end
18+
19+
def clean_dir(path) do
20+
File.rm_rf!(path)
21+
File.mkdir!(path)
22+
end
23+
24+
def worker(path, outdir) do
25+
%{"code" => code} = YamlElixir.read_from_file!(path)
26+
27+
words =
28+
File.read!(String.replace(path, ".yml", ".txt"))
29+
|> String.downcase()
30+
|> String.trim()
31+
|> then(&Regex.split(~r/[\W\d]+/u, &1))
32+
|> MapSet.new()
33+
# sorting does not respect collation
34+
|> Enum.sort()
35+
36+
File.write!("#{outdir}/extracted-#{code}.txt", Enum.join(words, "\n"))
37+
IO.puts(path)
38+
end
39+
40+
def walk(path, pattern) do
41+
dir = String.to_charlist(path)
42+
regexp = String.to_charlist(pattern)
43+
44+
:filelib.fold_files(dir, regexp, true, fn file, acc -> [file | acc] end, [])
45+
|> Enum.map(fn filepath -> to_string(filepath) end)
46+
end
47+
end

words_extractor_ex/mix.exs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
defmodule WordsExtractor.MixProject do
2+
use Mix.Project
3+
4+
def project do
5+
[
6+
app: :words_extractor_ex,
7+
version: "0.1.0",
8+
elixir: "~> 1.12",
9+
start_permanent: Mix.env() == :prod,
10+
deps: deps()
11+
]
12+
end
13+
14+
# Run "mix help compile.app" to learn about applications.
15+
def application do
16+
[
17+
extra_applications: [:logger]
18+
]
19+
end
20+
21+
# Run "mix help deps" to learn about dependencies.
22+
defp deps do
23+
[
24+
{:yaml_elixir, "~> 2.7"}
25+
# {:dep_from_hexpm, "~> 0.3.0"},
26+
# {:dep_from_git, git: "https://github.com/elixir-lang/my_dep.git", tag: "0.1.0"}
27+
]
28+
end
29+
end

words_extractor_ex/mix.lock

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
%{
2+
"yamerl": {:hex, :yamerl, "0.8.1", "07da13ffa1d8e13948943789665c62ccd679dfa7b324a4a2ed3149df17f453a4", [:rebar3], [], "hexpm", "96cb30f9d64344fed0ef8a92e9f16f207de6c04dfff4f366752ca79f5bceb23f"},
3+
"yaml_elixir": {:hex, :yaml_elixir, "2.7.0", "6f731d622a3b28769e26e634f82aa171d1d01cae0e8820ce9e93e559c83c23c8", [:mix], [{:yamerl, "~> 0.8", [hex: :yamerl, repo: "hexpm", optional: false]}], "hexpm", "f2eb39e0fb23625777f74e0c821f42f033dc4eaa5402c34beaa665944d85f3ea"},
4+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ExUnit.start()
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
defmodule WordsExtractorTest do
2+
use ExUnit.Case
3+
doctest WordsExtractor
4+
5+
# test "greets the world" do
6+
# assert WordsExtractor.hello() == :world
7+
# end
8+
end

0 commit comments

Comments
 (0)