Skip to content

Commit 1cf0b9a

Browse files
committed
improved Julia example
1 parent e0e4c26 commit 1cf0b9a

File tree

2 files changed

+58
-13
lines changed

2 files changed

+58
-13
lines changed

example-julia/README.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,19 @@
22

33
## setup and run
44

5+
Syntax
6+
7+
```
8+
❯ julia src/words.jl -h
9+
usage: words.jl [-s] [-h]
10+
11+
optional arguments:
12+
-s Sort results
13+
-h, --help show this help message and exit
14+
```
15+
16+
Run
17+
518
```
619
julia -t10 src/words.jl
720
```
8-

example-julia/src/words.jl

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,49 @@
11
module words_extractor_jl
22

3+
using ArgParse
34
using Distributed
45
using YAML
56
using Glob
67

78
const outdir = "words"
89

9-
function worker(yaml_path, i, count)
10+
function parse_commandline()
11+
s = ArgParseSettings()
12+
@add_arg_table s begin
13+
"-s"
14+
help = "Sort results"
15+
action = :store_true
16+
end
17+
return parse_args(s)
18+
end
19+
20+
function worker(yaml_path, sorting, i, count)
1021
path = get_filepath(yaml_path)
11-
words = get_words(yaml_path)
22+
words = get_words(yaml_path, sorting)
1223
write(path, join(words, "\n"))
1324
println("[$(lpad(i, 3, ' '))/$count] $path")
1425
end
1526

16-
function get_words(yaml_path)
17-
text_path = replace(yaml_path, ".yml" => ".txt")
18-
text = read(text_path, String) |> lowercase
19-
split(text, r"[\W\d]+") |> Set |> collect
27+
function get_words(yaml_path, sorting = false)
28+
words = []
29+
open(replace(yaml_path, ".yml" => ".txt")) do file
30+
for line in readlines(file)
31+
# exclude beginning book refrence from the line
32+
text = split(line, " ")[begin+2:end] |> t -> join(t, " ")
33+
tokens =
34+
text |>
35+
lowercase |>
36+
t -> split(t, r"[\W\d]+") |> t -> filter(token -> length(token) > 1, t)
37+
append!(words, tokens)
38+
end
39+
end
40+
unique_words = Set(words)
41+
if sorting
42+
arr = collect(unique_words)
43+
sort(arr)
44+
else
45+
unique_words
46+
end
2047
end
2148

2249
function get_filepath(path)
@@ -26,16 +53,25 @@ end
2653

2754
function rdir(dir::AbstractString, pat::Glob.FilenameMatch)
2855
result = String[]
29-
for (root, dirs, files) in walkdir(dir)
56+
for (root, _dirs, files) in walkdir(dir)
3057
filepaths = joinpath.(root, files)
3158
append!(result, filter!(f -> occursin(pat, f), filepaths))
3259
end
33-
return result
60+
result
3461
end
3562

3663
rdir(dir::AbstractString, pat::AbstractString) = rdir(dir, Glob.FilenameMatch(pat))
3764

3865
function main()
66+
parsed_args = parse_commandline()
67+
sorting = parsed_args["s"]
68+
69+
addprocs()
70+
println(string("Workers ", nworkers()))
71+
println(string("Processing... using ", Threads.nthreads(), " threads"))
72+
if sorting
73+
println("with sorting")
74+
end
3975
if ispath(outdir)
4076
rm(outdir, recursive = true)
4177
end
@@ -45,13 +81,10 @@ function main()
4581
i = 1
4682
Threads.@threads for path in paths
4783
# println("Spawn $path")
48-
worker(path, i, count)
84+
worker(path, sorting, i, count)
4985
i += 1
5086
end
5187
end
5288

53-
addprocs()
54-
println(string("Workers ", nworkers()))
55-
println(string("Processing... using ", Threads.nthreads(), " threads"))
5689
@time main()
5790
end # module

0 commit comments

Comments
 (0)