Skip to content

Commit 5e3a022

Browse files
committed
update Ruby 3.3.0
1 parent 1a322a3 commit 5e3a022

File tree

5 files changed

+83
-43
lines changed

5 files changed

+83
-43
lines changed

README.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@ The following results are for 123 unique utf-8 Bible text files in 23 languages
1414
* Machine: MacBook Pro 16" 64GB 2TB M1Max 10 cores.
1515

1616
<pre>
17-
1. Rust 1.63 = 1.15s, with sorting: 1.64s
18-
2. Golang 1.19.1 = 1.38s, with sorting: 1.71s
17+
1. Rust 1.63 = 1.15s, sorting: 1.64s
18+
2. Golang 1.22.0 = 1.40s, sorting with collations: 1.71s
1919
3. Python 3.12.2 = 5.69, sorting with collations: 6.04s
2020
4. Crystal 1.5.1 = 5.61s
2121
5. Elixir 1.14.0 = 7.34s
22-
6. Julia 1.8.1 = 12.13s, with sorting: 12.22s
23-
7. Ruby 3.3.0 = 12.63s, sorting with collations: 12.87s
22+
6. Julia 1.8.1 = 12.13s, sorting: 12.22s
23+
7. Ruby 3.3.0 = 12.63s, sorting with collations: 22.00s
2424
</pre>
2525

2626
### Conclusion
@@ -52,6 +52,8 @@ The new optimized Golang code version is very fast, slower than Rust but faster
5252
2024-03-02
5353

5454
* Updated Python version to 3.12.2, added poetry, solved missing icu4 collations for M1 processors, added a fancy progress bar
55+
* Updated Golang version to 1.22.0
56+
* Updated Ruby version to 3.3.0, added sorting with collations for many languages (this slowed the code almost 2x)
5557

5658
2022-09-17
5759

example-ruby/.tool-versions

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
ruby 3.2.0-preview2
1+
ruby 3.3.0

example-ruby/Gemfile

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
source 'https://rubygems.org'
22
git_source(:github) { |repo| "https://github.com/#{repo}.git" }
33

4-
ruby '3.2.0.preview2'
4+
ruby '3.3.0'
55

6+
gem 'bigdecimal', require: false
7+
gem 'ffi-icu'
68
gem 'parallel'
79
gem 'rubocop', require: false
810
# gem 'irbtools', require: 'irbtools/binding'
9-
gem 'pry'
10-
gem 'pry-doc'
11-
gem 'pry-gem', '~> 1.0'
11+
# gem 'pry'
12+
# gem 'pry-doc'
13+
# gem 'pry-gem'

example-ruby/Gemfile.lock

Lines changed: 25 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2,53 +2,47 @@ GEM
22
remote: https://rubygems.org/
33
specs:
44
ast (2.4.2)
5-
coderay (1.1.3)
6-
json (2.6.2)
7-
method_source (1.0.0)
8-
parallel (1.22.1)
9-
parser (3.1.2.1)
5+
bigdecimal (3.1.6)
6+
ffi (1.16.3)
7+
ffi-icu (0.5.3)
8+
ffi (~> 1.0, >= 1.0.9)
9+
json (2.7.1)
10+
language_server-protocol (3.17.0.3)
11+
parallel (1.24.0)
12+
parser (3.3.0.5)
1013
ast (~> 2.4.1)
11-
pry (0.14.1)
12-
coderay (~> 1.1)
13-
method_source (~> 1.0)
14-
pry-doc (1.3.0)
15-
pry (~> 0.11)
16-
yard (~> 0.9.11)
17-
pry-gem (1.0.0)
18-
pry (~> 0.12)
14+
racc
15+
racc (1.7.3)
1916
rainbow (3.1.1)
20-
regexp_parser (2.5.0)
21-
rexml (3.2.5)
22-
rubocop (1.36.0)
17+
regexp_parser (2.9.0)
18+
rexml (3.2.6)
19+
rubocop (1.61.0)
2320
json (~> 2.3)
21+
language_server-protocol (>= 3.17.0)
2422
parallel (~> 1.10)
25-
parser (>= 3.1.2.1)
23+
parser (>= 3.3.0.2)
2624
rainbow (>= 2.2.2, < 4.0)
2725
regexp_parser (>= 1.8, < 3.0)
2826
rexml (>= 3.2.5, < 4.0)
29-
rubocop-ast (>= 1.20.1, < 2.0)
27+
rubocop-ast (>= 1.30.0, < 2.0)
3028
ruby-progressbar (~> 1.7)
31-
unicode-display_width (>= 1.4.0, < 3.0)
32-
rubocop-ast (1.21.0)
33-
parser (>= 3.1.1.0)
34-
ruby-progressbar (1.11.0)
35-
unicode-display_width (2.3.0)
36-
webrick (1.7.0)
37-
yard (0.9.28)
38-
webrick (~> 1.7.0)
29+
unicode-display_width (>= 2.4.0, < 3.0)
30+
rubocop-ast (1.31.1)
31+
parser (>= 3.3.0.4)
32+
ruby-progressbar (1.13.0)
33+
unicode-display_width (2.5.0)
3934

4035
PLATFORMS
4136
ruby
4237

4338
DEPENDENCIES
39+
bigdecimal
40+
ffi-icu
4441
parallel
45-
pry
46-
pry-doc
47-
pry-gem (~> 1.0)
4842
rubocop
4943

5044
RUBY VERSION
51-
ruby 3.2.0.preview2
45+
ruby 3.3.0p0
5246

5347
BUNDLED WITH
54-
2.4.0.dev
48+
2.5.6

example-ruby/words.rb

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
require 'etc'
44
require 'fileutils'
55
require 'optparse'
6+
require 'uri'
7+
require 'ffi-icu'
68

79
class WordsExtractor
8-
def initialize(cores: Etc.nprocessors, sorting: false, outdir: 'words', source: '../data/??/**/*.yml')
10+
def initialize(outdir:, source:, cores: Etc.nprocessors, sorting: false)
911
@cores = cores
1012
@sorting = sorting
1113
@outdir = outdir
@@ -17,6 +19,35 @@ def clear_output
1719
Dir.mkdir(@outdir)
1820
end
1921

22+
def get_collation(lang)
23+
mapper = {
24+
'ar' => 'ar_SA', # Arabic, Saudi Arabia
25+
'cs' => 'cs_CZ', # Czech, Czech Republic
26+
'da' => 'da_DK', # Danish, Denmark
27+
'de' => 'de_DE', # German, Germany
28+
'el' => 'el_GR', # Greek, Greece
29+
'en' => 'en_EN', # English
30+
'eo' => 'eo', # Esperanto, not country-specific
31+
'es' => 'es_ES', # Spanish, Spain
32+
'fi' => 'fi_FI', # Finnish, Finland
33+
'fr' => 'fr_FR', # French, France
34+
'he' => 'he_IL', # Hebrew, Israel
35+
'hr' => 'hr_HR', # Croatian, Croatia
36+
'hu' => 'hu_HU', # Hungarian, Hungary
37+
'it' => 'it_IT', # Italian, Italy
38+
'lt' => 'lt_LT', # Lithuanian, Lithuania
39+
'la' => 'en_EN', # Latin locale is the same as English
40+
'nl' => 'nl_NL', # Dutch, Netherlands
41+
'pl' => 'pl_PL', # Polish, Poland
42+
'pt' => 'pt_PT', # Portuguese, Portugal
43+
'ru' => 'ru_RU', # Russian, Russia
44+
'sk' => 'sk_SK', # Slovak, Slovakia
45+
'sv' => 'sv_SE', # Swedish, Sweden
46+
'uk' => 'uk_UA' # Ukrainian, Ukraine
47+
}
48+
mapper[lang]
49+
end
50+
2051
def get_words(filepath)
2152
IO.readlines(filepath).map do |line|
2253
line.strip.downcase.split(' ')[2...-1].join(' ').split(/[^\p{L}]/).uniq.select { |s| s.size > 1 }
@@ -34,15 +65,21 @@ def run
3465
print ' with sorting' if @sorting
3566
puts '...'
3667
clear_output
68+
3769
start = Time.now
3870
paths = Dir[@source]
3971
count = paths.count
72+
4073
sizes = Parallel.map_with_index(paths, in_processes: @cores) do |yaml_path, i|
4174
meta = YAML.load_file(yaml_path)
4275
filepath = yaml_path.gsub('.yml', '.txt')
4376
words = get_words(filepath)
44-
words.sort! if @sorting
77+
if @sorting
78+
collator = ICU::Collation::Collator.new(get_collation(meta['lang']))
79+
words = words.sort { |a, b| collator.compare(a, b) }
80+
end
4581
save_words(words:, meta:, yaml_path:, count:, i:)
82+
4683
File.size(filepath)
4784
end
4885
puts "Total size: #{(sizes.sum / 1024.0 / 1024).round} MB"
@@ -64,5 +101,10 @@ def run
64101
end
65102
opts.on('-s', 'Sort results') { |v| options[:s] = v }
66103
end.parse!
67-
WordsExtractor.new(cores: options[:n], sorting: options[:s]).run
104+
WordsExtractor.new(
105+
cores: options[:n],
106+
sorting: options[:s],
107+
outdir: 'words',
108+
source: '../data/??/**/*.yml'
109+
).run
68110
end

0 commit comments

Comments
 (0)