Skip to content

Commit 894313f

Browse files
committed
fix Rust warnings
1 parent 0112b35 commit 894313f

File tree

1 file changed

+54
-83
lines changed

1 file changed

+54
-83
lines changed

example-rust/src/main.rs

Lines changed: 54 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,91 +1,62 @@
1+
use crossbeam_utils::sync::WaitGroup;
12
use glob::glob;
2-
use itertools::Itertools;
33
use lexical_sort::{natural_lexical_cmp, StringSort};
4-
use once_cell::sync::Lazy;
54
use regex::Regex;
6-
use std::path::Path;
7-
use tokio::fs;
5+
use std::collections::HashSet;
6+
use std::fs;
7+
use std::thread;
8+
use time::Instant;
89
use yaml_rust::YamlLoader;
910

10-
const SORT: bool = false;
11-
const OUTDIR: &str = "words_new";
12-
const FILE_DIR: &str = "../data/??/**/*.yml";
13-
static SEPARATOR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"[\W\d]+").unwrap());
14-
15-
async fn create_outdir() -> tokio::io::Result<()> {
16-
fs::create_dir_all(OUTDIR).await
17-
}
18-
19-
async fn read_file(path: &Path) -> String {
20-
let raw = fs::read_to_string(path).await.unwrap();
21-
raw.to_lowercase().replace('\n', " ")
22-
}
23-
24-
fn get_unique_token(src: &str) -> Vec<&str> {
25-
let mut data = SEPARATOR_REGEX.split(src).unique().collect::<Vec<_>>();
26-
27-
if SORT {
28-
data.string_sort_unstable(natural_lexical_cmp);
11+
fn main() -> std::io::Result<()> {
12+
let start = Instant::now();
13+
let with_sorting = false;
14+
let outdir = "words";
15+
fs::create_dir_all(outdir)?;
16+
let wg = WaitGroup::new();
17+
let path = "../data/??/**/*.yml";
18+
for entry in glob(path).expect("Failed to read glob pattern") {
19+
match entry {
20+
Ok(path) => {
21+
// let separator = Regex::new(r"[^\p{L}]+").unwrap();
22+
let separator = Regex::new(r"[\W\d]+").unwrap();
23+
let wg = wg.clone();
24+
thread::spawn(move || {
25+
let filepath = path.to_str().unwrap().replace(".yml", ".txt");
26+
// println!("{:?}", filepath);
27+
let text = fs::read_to_string(&filepath)
28+
.unwrap()
29+
.to_lowercase()
30+
.replace("\n", " ");
31+
let tokens: Vec<&str> = separator.split(&text).collect();
32+
let unique_tokens: HashSet<&str> = tokens.into_iter().collect();
33+
let mut words: Vec<&str>;
34+
if with_sorting {
35+
words = unique_tokens.into_iter().collect();
36+
words.string_sort_unstable(natural_lexical_cmp);
37+
} else {
38+
words = unique_tokens.into_iter().collect();
39+
}
40+
let yaml = fs::read_to_string(&path).unwrap();
41+
let docs = YamlLoader::load_from_str(&yaml).unwrap();
42+
let meta = &docs[0];
43+
let out = format!(
44+
"{}/{}-{}.txt",
45+
outdir,
46+
meta["lang"].as_str().unwrap(),
47+
meta["code"].as_str().unwrap()
48+
);
49+
if let Err(e) = fs::write(out, words.join("\n")) {
50+
println!("Writing error: {}", e.to_string());
51+
}
52+
drop(wg);
53+
});
54+
}
55+
Err(e) => println!("{:?}", e),
56+
}
2957
}
30-
31-
data
32-
}
33-
34-
async fn get_filename_from_meta(path: &Path) -> anyhow::Result<String> {
35-
let yaml = fs::read_to_string(path).await?;
36-
let docs = YamlLoader::load_from_str(&yaml)?;
37-
let meta = &docs[0];
38-
39-
let label = meta["label"]
40-
.as_str()
41-
.ok_or_else(|| anyhow::anyhow!("label not found"))?;
42-
43-
Ok(format!("{}/extracted-words-for-{}.txt", OUTDIR, label))
44-
}
45-
46-
#[tokio::main]
47-
async fn main() -> std::io::Result<()> {
48-
let start = std::time::Instant::now();
49-
let path = glob(FILE_DIR).expect("failed to read glob pattern");
50-
51-
let submissions = path.map(|entry| {
52-
tokio::spawn(async {
53-
let yaml_path = entry.expect("should be existed");
54-
let txt_path = yaml_path.with_extension("txt");
55-
56-
let outdir_submission =
57-
tokio::spawn(async { create_outdir().await.expect("unable to create outdir") });
58-
59-
let read_text_file_submission = tokio::spawn(async move {
60-
let data = read_file(&txt_path).await;
61-
let tokens = get_unique_token(&data);
62-
63-
tokens.join("\n")
64-
});
65-
66-
let filename_submission = tokio::spawn(async move {
67-
get_filename_from_meta(&yaml_path)
68-
.await
69-
.expect("should be existed")
70-
});
71-
72-
let (tokens, filename, _) = tokio::join!(
73-
read_text_file_submission,
74-
filename_submission,
75-
outdir_submission,
76-
);
77-
78-
fs::write(
79-
filename.expect("failed to run filename"),
80-
tokens.expect("failed to get tokens"),
81-
)
82-
.await
83-
.expect("failed to write");
84-
})
85-
});
86-
87-
futures::future::join_all(submissions).await;
88-
89-
println!("{:?}", start.elapsed());
58+
wg.wait();
59+
let end = Instant::now();
60+
println!("{:?} seconds.", end - start);
9061
Ok(())
9162
}

0 commit comments

Comments
 (0)