diff --git a/example-rust/Cargo.lock b/example-rust/Cargo.lock index 1860bc0..71b7221 100644 --- a/example-rust/Cargo.lock +++ b/example-rust/Cargo.lock @@ -18,22 +18,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70033777eb8b5124a81a1889416543dddef2de240019b674c81285a2635a7e1e" [[package]] -name = "autocfg" -version = "1.0.1" +name = "anyhow" +version = "1.0.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +checksum = "94a45b455c14666b85fc40a019e8ab9eb75e3a124e05494f5397122bc9eb06e0" [[package]] -name = "base-x" -version = "0.2.8" +name = "bitflags" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4521f3e3d031370679b3b140beb36dfe4801b09ac77e30c61941f97df3ef28b" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] -name = "bumpalo" -version = "3.7.0" +name = "bytes" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c59e7af012c713f529e7a3ee57ce9b31ddd858d4b512923602f74608b009631" +checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" [[package]] name = "cfg-if" @@ -42,91 +42,99 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] -name = "collate" -version = "0.1.10" +name = "either" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "969719a0e6f949876ac05ba79bb84c8933afb007303a18ca8a021711db98f84a" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" [[package]] -name = "const_fn" -version = "0.4.8" +name = "futures" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f92cfa0fd5690b3cf8c1ef2cabbd9b7ef22fa53cf5e1f92b05103f6d5d1cf6e7" +checksum = "28560757fe2bb34e79f907794bb6b22ae8b0e5c669b638a1132f2592b19035b4" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] [[package]] -name = "crossbeam" -version = "0.8.0" +name = "futures-channel" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd01a6eb3daaafa260f6fc94c3a6c36390abc2080e38e3e34ced87393fb77d80" +checksum = "ba3dda0b6588335f360afc675d0564c17a77a2bda81ca178a4b6081bd86c7f0b" dependencies = [ - "cfg-if", - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-epoch", - "crossbeam-queue", - "crossbeam-utils", + "futures-core", + "futures-sink", ] [[package]] -name = "crossbeam-channel" -version = "0.5.1" +name = "futures-core" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" -dependencies = [ - "cfg-if", - "crossbeam-utils", -] +checksum = "d0c8ff0461b82559810cdccfde3215c3f373807f5e5232b71479bff7bb2583d7" [[package]] -name = "crossbeam-deque" -version = "0.8.0" +name = "futures-executor" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9" +checksum = "29d6d2ff5bb10fb95c85b8ce46538a2e5f5e7fdc755623a7d4529ab8a4ed9d2a" dependencies = [ - "cfg-if", - "crossbeam-epoch", - "crossbeam-utils", + "futures-core", + "futures-task", + "futures-util", ] [[package]] -name = "crossbeam-epoch" -version = "0.9.4" +name = "futures-io" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52fb27eab85b17fbb9f6fd667089e07d6a2eb8743d02639ee7f6a7a7729c9c94" -dependencies = [ - "cfg-if", - "crossbeam-utils", - "lazy_static", - "memoffset", - "scopeguard", -] +checksum = "b1f9d34af5a1aac6fb380f735fe510746c38067c5bf16c7fd250280503c971b2" [[package]] -name = "crossbeam-queue" -version = "0.3.1" +name = "futures-macro" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f6cb3c7f5b8e51bc3ebb73a2327ad4abdbd119dc13223f14f961d2f38486756" +checksum = "6dbd947adfffb0efc70599b3ddcf7b5597bb5fa9e245eb99f62b3a5f7bb8bd3c" dependencies = [ - "cfg-if", - "crossbeam-utils", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "crossbeam-utils" -version = "0.8.4" +name = "futures-sink" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4feb231f0d4d6af81aed15928e58ecf5816aa62a2393e2c82f46973e92a9a278" -dependencies = [ - "autocfg", - "cfg-if", - "lazy_static", -] +checksum = "e3055baccb68d74ff6480350f8d6eb8fcfa3aa11bdc1a1ae3afdd0514617d508" [[package]] -name = "discard" -version = "1.0.4" +name = "futures-task" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "212d0f5754cb6769937f4501cc0e67f4f4483c8d2c3e1e922ee9edbe4ab4c7c0" +checksum = "6ee7c6485c30167ce4dfb83ac568a849fe53274c831081476ee13e0dce1aad72" + +[[package]] +name = "futures-util" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b5cf40b47a271f77a8b1bec03ca09044d99d2372c0de244e66430761127164" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] [[package]] name = "glob" @@ -136,24 +144,30 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" [[package]] name = "hermit-abi" -version = "0.1.18" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "322f4de77956e22ed0e5032c359a0f1273f1f7f0d79bfa3b8ffbc730d7fbcc5c" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" dependencies = [ "libc", ] [[package]] -name = "itoa" -version = "0.4.7" +name = "instant" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] [[package]] -name = "lazy_static" -version = "1.4.0" +name = "itertools" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" +dependencies = [ + "either", +] [[package]] name = "lexical-sort" @@ -166,9 +180,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.95" +version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "789da6d93f1b866ffe175afc5322a4d76c038605a1c3319bb57b06967ca98a36" +checksum = "e74d72e0f9b65b5b4ca49a346af3976df0f9c61d550727f349ecd559f251a26c" [[package]] name = "linked-hash-map" @@ -176,6 +190,15 @@ version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3" +[[package]] +name = "lock_api" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88943dd7ef4a2e5a4bfa2753aaab3013e34ce2533d1996fb18ef591e315e2b3b" +dependencies = [ + "scopeguard", +] + [[package]] name = "log" version = "0.4.14" @@ -187,203 +210,170 @@ dependencies = [ [[package]] name = "memchr" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" [[package]] -name = "memoffset" -version = "0.6.4" +name = "mio" +version = "0.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59accc507f1338036a0477ef61afdae33cde60840f4dfe481319ce3ad116ddf9" +checksum = "8067b404fe97c70829f082dec8bcf4f71225d7eaea1d8645349cb76fa06205cc" dependencies = [ - "autocfg", + "libc", + "log", + "miow", + "ntapi", + "winapi", ] [[package]] -name = "num_cpus" -version = "1.13.0" +name = "miow" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" +checksum = "b9f1c5b025cda876f66ef43a113f91ebc9f4ccef34843000e0adf6ebbab84e21" dependencies = [ - "hermit-abi", - "libc", + "winapi", ] [[package]] -name = "proc-macro-hack" -version = "0.5.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" - -[[package]] -name = "proc-macro2" -version = "1.0.27" +name = "ntapi" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0d8caf72986c1a598726adc988bb5984792ef84f5ee5aa50209145ee8077038" +checksum = "3f6bb902e437b6d86e03cce10a7e2af662292c5dfef23b65899ea3ac9354ad44" dependencies = [ - "unicode-xid", + "winapi", ] [[package]] -name = "quote" -version = "1.0.9" +name = "num_cpus" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" dependencies = [ - "proc-macro2", + "hermit-abi", + "libc", ] [[package]] -name = "regex" -version = "1.5.4" +name = "once_cell" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] +checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" [[package]] -name = "regex-syntax" -version = "0.6.25" +name = "parking_lot" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core", +] [[package]] -name = "rustc_version" -version = "0.2.3" +name = "parking_lot_core" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216" dependencies = [ - "semver", + "cfg-if", + "instant", + "libc", + "redox_syscall", + "smallvec", + "winapi", ] [[package]] -name = "ryu" -version = "1.0.5" +name = "pin-project-lite" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +checksum = "e280fbe77cc62c91527259e9442153f4688736748d24660126286329742b4c6c" [[package]] -name = "scopeguard" -version = "1.1.0" +name = "pin-utils" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] -name = "semver" -version = "0.9.0" +name = "proc-macro2" +version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" +checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029" dependencies = [ - "semver-parser", + "unicode-xid", ] [[package]] -name = "semver-parser" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" - -[[package]] -name = "serde" -version = "1.0.126" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03" - -[[package]] -name = "serde_derive" -version = "1.0.126" +name = "quote" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "963a7dbc9895aeac7ac90e74f34a5d5261828f79df35cbed41e10189d3804d43" +checksum = "864d3e96a899863136fc6e99f3d7cae289dafe43bf2c5ac19b70df7210c0a145" dependencies = [ "proc-macro2", - "quote", - "syn", ] [[package]] -name = "serde_json" -version = "1.0.64" +name = "redox_syscall" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "799e97dc9fdae36a5c8b8f2cae9ce2ee9fdce2058c57a93e6099d919fd982f79" +checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff" dependencies = [ - "itoa", - "ryu", - "serde", + "bitflags", ] [[package]] -name = "sha1" -version = "0.6.0" +name = "regex" +version = "1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2579985fda508104f7587689507983eadd6a6e84dd35d6d115361f530916fa0d" +checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] [[package]] -name = "standback" -version = "0.2.17" +name = "regex-syntax" +version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e113fb6f3de07a243d434a56ec6f186dfd51cb08448239fe7bcae73f87ff28ff" -dependencies = [ - "version_check", -] +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" [[package]] -name = "stdweb" -version = "0.4.20" +name = "scopeguard" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d022496b16281348b52d0e30ae99e01a73d737b2f45d38fed4edf79f9325a1d5" -dependencies = [ - "discard", - "rustc_version", - "stdweb-derive", - "stdweb-internal-macros", - "stdweb-internal-runtime", - "wasm-bindgen", -] +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] -name = "stdweb-derive" -version = "0.5.3" +name = "signal-hook-registry" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c87a60a40fccc84bef0652345bbbbbe20a605bf5d0ce81719fc476f5c03b50ef" +checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" dependencies = [ - "proc-macro2", - "quote", - "serde", - "serde_derive", - "syn", + "libc", ] [[package]] -name = "stdweb-internal-macros" -version = "0.2.9" +name = "slab" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58fa5ff6ad0d98d1ffa8cb115892b6e69d67799f6763e162a1c9db421dc22e11" -dependencies = [ - "base-x", - "proc-macro2", - "quote", - "serde", - "serde_derive", - "serde_json", - "sha1", - "syn", -] +checksum = "9def91fd1e018fe007022791f865d0ccc9b3a0d5001e01aabb8b40e46000afb5" [[package]] -name = "stdweb-internal-runtime" -version = "0.1.5" +name = "smallvec" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "213701ba3370744dcd1a12960caa4843b3d68b4d1c0a5d575e0d65b2ee9d16c0" +checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" [[package]] name = "syn" -version = "1.0.72" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1e8cdbefb79a9a5a65e0db8b47b723ee907b7c7f8496c76a1770b5c310bab82" +checksum = "8a65b3f4ffa0092e9887669db0eae07941f023991ab58ea44da8fe8e2d511c6b" dependencies = [ "proc-macro2", "quote", @@ -391,40 +381,32 @@ dependencies = [ ] [[package]] -name = "time" -version = "0.2.26" +name = "tokio" +version = "1.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08a8cbfbf47955132d0202d1662f49b2423ae35862aee471f3ba4b133358f372" +checksum = "0c27a64b625de6d309e8c57716ba93021dccf1b3b5c97edd6d3dd2d2135afc0a" dependencies = [ - "const_fn", + "bytes", "libc", - "standback", - "stdweb", - "time-macros", - "version_check", + "memchr", + "mio", + "num_cpus", + "once_cell", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "tokio-macros", "winapi", ] [[package]] -name = "time-macros" -version = "0.1.1" +name = "tokio-macros" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "957e9c6e26f12cb6d0dd7fc776bb67a706312e7299aed74c8dd5b17ebb27e2f1" +checksum = "b557f72f448c511a979e2564e55d74e6c4432fc96ff4f6241bc6bded342643b7" dependencies = [ - "proc-macro-hack", - "time-macros-impl", -] - -[[package]] -name = "time-macros-impl" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5c3be1edfad6027c69f5491cf4cb310d1a71ecd6af742788c6ff8bced86b8fa" -dependencies = [ - "proc-macro-hack", "proc-macro2", "quote", - "standback", "syn", ] @@ -434,66 +416,6 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" -[[package]] -name = "version_check" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" - -[[package]] -name = "wasm-bindgen" -version = "0.2.74" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54ee1d4ed486f78874278e63e4069fc1ab9f6a18ca492076ffb90c5eb2997fd" -dependencies = [ - "cfg-if", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.74" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b33f6a0694ccfea53d94db8b2ed1c3a8a4c86dd936b13b9f0a15ec4a451b900" -dependencies = [ - "bumpalo", - "lazy_static", - "log", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.74" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "088169ca61430fe1e58b8096c24975251700e7b1f6fd91cc9d59b04fb9b18bd4" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.74" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be2241542ff3d9f241f5e2cb6dd09b37efe786df8851c54957683a49f0987a97" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.74" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7cff876b8f18eed75a66cf49b65e7f967cb354a7aa16003fb55dbfd25b44b4f" - [[package]] name = "winapi" version = "0.3.9" @@ -520,14 +442,15 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" name = "words_extractor_rs" version = "0.2.0" dependencies = [ - "collate", - "crossbeam", - "crossbeam-utils", + "anyhow", + "futures", "glob", + "itertools", "lexical-sort", "num_cpus", + "once_cell", "regex", - "time", + "tokio", "yaml-rust", ] diff --git a/example-rust/Cargo.toml b/example-rust/Cargo.toml index d8a1523..7242bd5 100644 --- a/example-rust/Cargo.toml +++ b/example-rust/Cargo.toml @@ -7,12 +7,16 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -collate = "0.1.10" -crossbeam = "0.8.0" -crossbeam-utils = "0.8.4" +anyhow = "1.0.53" +futures = "0.3.19" glob = "0.3.0" +itertools = "0.10.3" lexical-sort = "0.3.1" num_cpus = "1.0" +once_cell = "1.9.0" regex = "1.5.4" -time = "*" -yaml-rust = "0.4.5" \ No newline at end of file +tokio = { version = "1.16.1", features = ["full"] } +yaml-rust = "0.4.5" + +[profile.release] +debug = 1 diff --git a/example-rust/src/main.rs b/example-rust/src/main.rs index 40f4b8a..f26e557 100644 --- a/example-rust/src/main.rs +++ b/example-rust/src/main.rs @@ -1,62 +1,91 @@ -use crossbeam_utils::sync::WaitGroup; use glob::glob; +use itertools::Itertools; use lexical_sort::{natural_lexical_cmp, StringSort}; +use once_cell::sync::Lazy; use regex::Regex; -use std::collections::HashSet; -use std::fs; -use std::thread; -use time::Instant; +use std::path::Path; +use tokio::fs; use yaml_rust::YamlLoader; -fn main() -> std::io::Result<()> { - let start = Instant::now(); - let with_sorting = false; - let outdir = "words"; - fs::create_dir_all(outdir)?; - let wg = WaitGroup::new(); - let path = "../data/??/**/*.yml"; - for entry in glob(path).expect("Failed to read glob pattern") { - match entry { - Ok(path) => { - // let separator = Regex::new(r"[^\p{L}]+").unwrap(); - let separator = Regex::new(r"[\W\d]+").unwrap(); - let wg = wg.clone(); - thread::spawn(move || { - let filepath = path.to_str().unwrap().replace(".yml", ".txt"); - // println!("{:?}", filepath); - let text = fs::read_to_string(&filepath) - .unwrap() - .to_lowercase() - .replace("\n", " "); - let tokens: Vec<&str> = separator.split(&text).collect(); - let unique_tokens: HashSet<&str> = tokens.into_iter().collect(); - let mut words: Vec<&str>; - if with_sorting { - words = unique_tokens.into_iter().collect(); - words.string_sort_unstable(natural_lexical_cmp); - } else { - words = unique_tokens.into_iter().collect(); - } - let yaml = fs::read_to_string(&path).unwrap(); - let docs = YamlLoader::load_from_str(&yaml).unwrap(); - let meta = &docs[0]; - let out = format!( - "{}/{}-{}.txt", - outdir, - meta["lang"].as_str().unwrap(), - meta["code"].as_str().unwrap() - ); - if let Err(e) = fs::write(out, words.join("\n")) { - println!("Writing error: {}", e.to_string()); - } - drop(wg); - }); - } - Err(e) => println!("{:?}", e), - } +const SORT: bool = false; +const OUTDIR: &str = "words_new"; +const FILE_DIR: &str = "../data/??/**/*.yml"; +static SEPARATOR_REGEX: Lazy = Lazy::new(|| Regex::new(r"[\W\d]+").unwrap()); + +async fn create_outdir() -> tokio::io::Result<()> { + fs::create_dir_all(OUTDIR).await +} + +async fn read_file(path: &Path) -> String { + let raw = fs::read_to_string(path).await.unwrap(); + raw.to_lowercase().replace('\n', " ") +} + +fn get_unique_token(src: &str) -> Vec<&str> { + let mut data = SEPARATOR_REGEX.split(src).unique().collect::>(); + + if SORT { + data.string_sort_unstable(natural_lexical_cmp); } - wg.wait(); - let end = Instant::now(); - println!("{:?} seconds.", end - start); + + data +} + +async fn get_filename_from_meta(path: &Path) -> anyhow::Result { + let yaml = fs::read_to_string(path).await?; + let docs = YamlLoader::load_from_str(&yaml)?; + let meta = &docs[0]; + + let label = meta["label"] + .as_str() + .ok_or_else(|| anyhow::anyhow!("label not found"))?; + + Ok(format!("{}/extracted-words-for-{}.txt", OUTDIR, label)) +} + +#[tokio::main] +async fn main() -> std::io::Result<()> { + let start = std::time::Instant::now(); + let path = glob(FILE_DIR).expect("failed to read glob pattern"); + + let submissions = path.map(|entry| { + tokio::spawn(async { + let yaml_path = entry.expect("should be existed"); + let txt_path = yaml_path.with_extension("txt"); + + let outdir_submission = + tokio::spawn(async { create_outdir().await.expect("unable to create outdir") }); + + let read_text_file_submission = tokio::spawn(async move { + let data = read_file(&txt_path).await; + let tokens = get_unique_token(&data); + + tokens.join("\n") + }); + + let filename_submission = tokio::spawn(async move { + get_filename_from_meta(&yaml_path) + .await + .expect("should be existed") + }); + + let (tokens, filename, _) = tokio::join!( + read_text_file_submission, + filename_submission, + outdir_submission, + ); + + fs::write( + filename.expect("failed to run filename"), + tokens.expect("failed to get tokens"), + ) + .await + .expect("failed to write"); + }) + }); + + futures::future::join_all(submissions).await; + + println!("{:?}", start.elapsed()); Ok(()) }