jpn_wordbook

This commit is contained in:
ゆめ 2023-09-16 22:48:41 -05:00
parent 8142e8161e
commit 2a714d029a
22 changed files with 2103 additions and 19 deletions

511
Cargo.lock generated
View file

@ -52,6 +52,18 @@ dependencies = [
"subtle",
]
[[package]]
name = "ahash"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f"
dependencies = [
"cfg-if",
"getrandom",
"once_cell",
"version_check",
]
[[package]]
name = "aho-corasick"
version = "1.0.2"
@ -226,7 +238,7 @@ dependencies = [
"async-trait",
"axum-core",
"bitflags 1.3.2",
"bytes",
"bytes 1.4.0",
"futures-util",
"http",
"http-body",
@ -256,7 +268,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c"
dependencies = [
"async-trait",
"bytes",
"bytes 1.4.0",
"futures-util",
"http",
"http-body",
@ -273,7 +285,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "447f28c85900215cc1bea282f32d4a2f22d55c5a300afdfbc661c8d6a632e063"
dependencies = [
"arc-swap",
"bytes",
"bytes 1.4.0",
"futures-util",
"http",
"http-body",
@ -370,12 +382,45 @@ version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
[[package]]
name = "bytes"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "206fdffcfa2df7cbe15601ef46c813fce0965eb3286db6b56c583b814b51c81c"
dependencies = [
"byteorder",
"iovec",
]
[[package]]
name = "bytes"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be"
[[package]]
name = "bzip2"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8"
dependencies = [
"bzip2-sys",
"futures 0.1.31",
"libc",
"tokio-io",
]
[[package]]
name = "bzip2-sys"
version = "0.1.11+1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]]
name = "cc"
version = "1.0.79"
@ -492,6 +537,23 @@ dependencies = [
"version_check",
]
[[package]]
name = "cookie_store"
version = "0.16.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d606d0fba62e13cf04db20536c05cb7f13673c161cb47a47a82b9b9e7d3f1daa"
dependencies = [
"cookie",
"idna 0.2.3",
"log",
"publicsuffix",
"serde",
"serde_derive",
"serde_json",
"time 0.3.23",
"url",
]
[[package]]
name = "core-foundation"
version = "0.9.3"
@ -537,6 +599,50 @@ dependencies = [
"typenum",
]
[[package]]
name = "cssparser"
version = "0.31.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b3df4f93e5fbbe73ec01ec8d3f68bba73107993a5b1e7519273c32db9b0d5be"
dependencies = [
"cssparser-macros",
"dtoa-short",
"itoa",
"phf 0.11.2",
"smallvec",
]
[[package]]
name = "cssparser-macros"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
dependencies = [
"quote",
"syn 2.0.26",
]
[[package]]
name = "csv"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086"
dependencies = [
"csv-core",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
dependencies = [
"memchr",
]
[[package]]
name = "ctr"
version = "0.9.2"
@ -560,6 +666,17 @@ dependencies = [
"serde",
]
[[package]]
name = "derive_more"
version = "0.99.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "diesel"
version = "2.1.0"
@ -615,6 +732,27 @@ dependencies = [
"subtle",
]
[[package]]
name = "dtoa"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653"
[[package]]
name = "dtoa-short"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbaceec3c6e4211c79e7b1800fb9680527106beb2f9c51904a3210c03a448c74"
dependencies = [
"dtoa",
]
[[package]]
name = "ego-tree"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591"
[[package]]
name = "email-encoding"
version = "0.2.0"
@ -744,6 +882,22 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "futf"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
dependencies = [
"mac",
"new_debug_unreachable",
]
[[package]]
name = "futures"
version = "0.1.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a471a38ef8ed83cd6e40aa59c1ffe17db6855c18e3604d9c4ed8c08ebc28678"
[[package]]
name = "futures"
version = "0.3.28"
@ -833,6 +987,15 @@ dependencies = [
"slab",
]
[[package]]
name = "fxhash"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
dependencies = [
"byteorder",
]
[[package]]
name = "generic-array"
version = "0.14.7"
@ -843,6 +1006,15 @@ dependencies = [
"version_check",
]
[[package]]
name = "getopts"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5"
dependencies = [
"unicode-width",
]
[[package]]
name = "getrandom"
version = "0.2.10"
@ -876,7 +1048,7 @@ version = "0.3.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97ec8491ebaf99c8eaa73058b045fe58073cd6be7f596ac993ced0b0a0c01049"
dependencies = [
"bytes",
"bytes 1.4.0",
"fnv",
"futures-core",
"futures-sink",
@ -924,13 +1096,27 @@ dependencies = [
"winapi",
]
[[package]]
name = "html5ever"
version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7"
dependencies = [
"log",
"mac",
"markup5ever",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "http"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482"
dependencies = [
"bytes",
"bytes 1.4.0",
"fnv",
"itoa",
]
@ -941,7 +1127,7 @@ version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1"
dependencies = [
"bytes",
"bytes 1.4.0",
"http",
"pin-project-lite",
]
@ -979,7 +1165,7 @@ version = "0.14.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468"
dependencies = [
"bytes",
"bytes 1.4.0",
"futures-channel",
"futures-core",
"futures-util",
@ -1032,7 +1218,7 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905"
dependencies = [
"bytes",
"bytes 1.4.0",
"hyper",
"native-tls",
"tokio",
@ -1062,6 +1248,17 @@ dependencies = [
"cc",
]
[[package]]
name = "idna"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8"
dependencies = [
"matches",
"unicode-bidi",
"unicode-normalization",
]
[[package]]
name = "idna"
version = "0.3.0"
@ -1120,6 +1317,15 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "iovec"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e"
dependencies = [
"libc",
]
[[package]]
name = "ipnet"
version = "2.8.0"
@ -1225,12 +1431,38 @@ version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
[[package]]
name = "mac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "markup5ever"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016"
dependencies = [
"log",
"phf 0.10.1",
"phf_codegen",
"string_cache",
"string_cache_codegen",
"tendril",
]
[[package]]
name = "match_cfg"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4"
[[package]]
name = "matches"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5"
[[package]]
name = "matchit"
version = "0.7.0"
@ -1324,6 +1556,12 @@ dependencies = [
"tempfile",
]
[[package]]
name = "new_debug_unreachable"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54"
[[package]]
name = "nom"
version = "7.1.3"
@ -1476,6 +1714,86 @@ version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
[[package]]
name = "phf"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259"
dependencies = [
"phf_shared 0.10.0",
]
[[package]]
name = "phf"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
dependencies = [
"phf_macros",
"phf_shared 0.11.2",
]
[[package]]
name = "phf_codegen"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd"
dependencies = [
"phf_generator 0.10.0",
"phf_shared 0.10.0",
]
[[package]]
name = "phf_generator"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
dependencies = [
"phf_shared 0.10.0",
"rand",
]
[[package]]
name = "phf_generator"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0"
dependencies = [
"phf_shared 0.11.2",
"rand",
]
[[package]]
name = "phf_macros"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b"
dependencies = [
"phf_generator 0.11.2",
"phf_shared 0.11.2",
"proc-macro2",
"quote",
"syn 2.0.26",
]
[[package]]
name = "phf_shared"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
dependencies = [
"siphasher",
]
[[package]]
name = "phf_shared"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b"
dependencies = [
"siphasher",
]
[[package]]
name = "pin-project"
version = "1.1.2"
@ -1532,6 +1850,12 @@ version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]]
name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "proc-macro-error"
version = "1.0.4"
@ -1565,6 +1889,22 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "psl-types"
version = "2.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33cb294fe86a74cbcf50d4445b37da762029549ebeea341421c7c70370f86cac"
[[package]]
name = "publicsuffix"
version = "2.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96a8c1bda5ae1af7f99a2962e49df150414a43d62404644d98dd5c3a93d07457"
dependencies = [
"idna 0.3.0",
"psl-types",
]
[[package]]
name = "quote"
version = "1.0.31"
@ -1655,7 +1995,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55"
dependencies = [
"base64 0.21.2",
"bytes",
"bytes 1.4.0",
"cookie",
"cookie_store",
"encoding_rs",
"futures-core",
"futures-util",
@ -1852,6 +2194,23 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "scraper"
version = "0.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c95a930e03325234c18c7071fd2b60118307e025d6fff3e12745ffbf63a3d29c"
dependencies = [
"ahash",
"cssparser",
"ego-tree",
"getopts",
"html5ever",
"once_cell",
"selectors",
"smallvec",
"tendril",
]
[[package]]
name = "sct"
version = "0.7.0"
@ -1885,6 +2244,25 @@ dependencies = [
"libc",
]
[[package]]
name = "selectors"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06"
dependencies = [
"bitflags 2.3.3",
"cssparser",
"derive_more",
"fxhash",
"log",
"new_debug_unreachable",
"phf 0.10.1",
"phf_codegen",
"precomputed-hash",
"servo_arc",
"smallvec",
]
[[package]]
name = "serde"
version = "1.0.173"
@ -1992,12 +2370,12 @@ dependencies = [
"async-tungstenite",
"base64 0.13.1",
"bitflags 1.3.2",
"bytes",
"bytes 1.4.0",
"cfg-if",
"chrono",
"dashmap",
"flate2",
"futures",
"futures 0.3.28",
"mime",
"mime_guess",
"parking_lot",
@ -2013,6 +2391,15 @@ dependencies = [
"url",
]
[[package]]
name = "servo_arc"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d036d71a959e00c77a63538b90a6c2390969f9772b096ea837205c6bd0491a44"
dependencies = [
"stable_deref_trait",
]
[[package]]
name = "sha-1"
version = "0.10.1"
@ -2047,6 +2434,12 @@ dependencies = [
"windows-sys 0.42.0",
]
[[package]]
name = "siphasher"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
[[package]]
name = "slab"
version = "0.4.8"
@ -2078,6 +2471,38 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "string_cache"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b"
dependencies = [
"new_debug_unreachable",
"once_cell",
"parking_lot",
"phf_shared 0.10.0",
"precomputed-hash",
"serde",
]
[[package]]
name = "string_cache_codegen"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988"
dependencies = [
"phf_generator 0.10.0",
"phf_shared 0.10.0",
"proc-macro2",
"quote",
]
[[package]]
name = "stringmatch"
version = "0.4.0"
@ -2140,6 +2565,17 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "tendril"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
dependencies = [
"futf",
"mac",
"utf-8",
]
[[package]]
name = "thirtyfour"
version = "0.31.0"
@ -2151,7 +2587,7 @@ dependencies = [
"chrono",
"cookie",
"fantoccini",
"futures",
"futures 0.3.28",
"http",
"log",
"parking_lot",
@ -2261,7 +2697,7 @@ checksum = "532826ff75199d5833b9d2c5fe410f29235e25704ee5f0ef599fb51c21f4a4da"
dependencies = [
"autocfg",
"backtrace",
"bytes",
"bytes 1.4.0",
"libc",
"mio",
"num_cpus",
@ -2271,6 +2707,17 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "tokio-io"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57fc868aae093479e3131e3d165c93b1c7474109d13c90ec0dda2a1bbfff0674"
dependencies = [
"bytes 0.4.12",
"futures 0.1.31",
"log",
]
[[package]]
name = "tokio-macros"
version = "2.1.0"
@ -2313,13 +2760,24 @@ dependencies = [
"tokio",
]
[[package]]
name = "tokio-stream"
version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842"
dependencies = [
"futures-core",
"pin-project-lite",
"tokio",
]
[[package]]
name = "tokio-util"
version = "0.7.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d"
dependencies = [
"bytes",
"bytes 1.4.0",
"futures-core",
"futures-sink",
"pin-project-lite",
@ -2385,7 +2843,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55ae70283aba8d2a8b411c695c437fe25b8b5e44e23e780662002fc72fb47a82"
dependencies = [
"bitflags 2.3.3",
"bytes",
"bytes 1.4.0",
"futures-core",
"futures-util",
"http",
@ -2456,7 +2914,7 @@ checksum = "e27992fd6a8c29ee7eef28fc78349aa244134e10ad447ce3b9f0ac0ed0fa4ce0"
dependencies = [
"base64 0.13.1",
"byteorder",
"bytes",
"bytes 1.4.0",
"http",
"httparse",
"log",
@ -2517,6 +2975,12 @@ version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36"
[[package]]
name = "unicode-width"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
[[package]]
name = "universal-hash"
version = "0.5.1"
@ -2551,6 +3015,12 @@ dependencies = [
"serde",
]
[[package]]
name = "urlencoding"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
[[package]]
name = "urlparse"
version = "0.7.3"
@ -2717,7 +3187,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9973cb72c8587d5ad5efdb91e663d36177dc37725e6c90ca86c626b0cc45c93f"
dependencies = [
"base64 0.13.1",
"bytes",
"bytes 1.4.0",
"cookie",
"http",
"log",
@ -2941,12 +3411,15 @@ dependencies = [
"axum",
"axum-server",
"base64 0.21.2",
"bzip2",
"chrono",
"chrono-humanize",
"clap",
"csv",
"diesel",
"diesel_migrations",
"flate2",
"futures 0.3.28",
"hyper",
"lazy_static",
"lettre",
@ -2956,6 +3429,7 @@ dependencies = [
"reqwest",
"rust-embed",
"rustls-pemfile",
"scraper",
"serde",
"serde_json",
"serde_yaml",
@ -2965,7 +3439,10 @@ dependencies = [
"thirtyfour",
"tokio",
"tokio-rustls 0.24.1",
"tokio-stream",
"tower",
"tower-http",
"url",
"urlencoding",
"uuid",
]

View file

@ -17,21 +17,25 @@ async-trait = "0.1.68"
axum = { version = "0.6.18", features = ["http2"] }
axum-server = { version = "0.5.1", features = ["rustls", "rustls-pemfile", "tls-rustls"] }
base64 = "0.21.2"
bzip2 = { version = "0.4.4", features = ["tokio"] }
chrono = { version = "0.4.26", features = ["serde"] }
chrono-humanize = "0.2.2"
clap = { version = "4.3.2", features = ["derive"] }
csv = "1.2.2"
diesel = { version = "2.1.0", features = ["sqlite", "chrono"] }
diesel_migrations = { version = "2.1.0", features = ["sqlite"] }
flate2 = "1.0.26"
futures = "0.3.28"
hyper = "0.14.26"
lazy_static = "1.4.0"
lettre = "0.10.4"
log = "0.4.18"
rand_core = { version = "0.6.4", features = ["getrandom"] }
regex = "1.8.4"
reqwest = { version = "0.11.18", features = ["json"] }
reqwest = { version = "0.11.18", features = ["json", "cookies"] }
rust-embed = "6.7.0"
rustls-pemfile = "1.0.2"
scraper = "0.17.1"
serde = { version = "1.0.163", features = ["derive"] }
serde_json = "1.0.96"
serde_yaml = "0.9.21"
@ -41,13 +45,16 @@ tempfile = "3.6.0"
thirtyfour = "0.31.0"
tokio = { version = "1.28.2", features = ["macros", "rt-multi-thread", "time"] }
tokio-rustls = "0.24.0"
tokio-stream = "0.1.14"
tower = { version = "0.4.13", features = ["limit", "timeout", "buffer"] }
tower-http = { version = "0.4.0", features = ["timeout", "limit", "cors"] }
url = "2.4.0"
urlencoding = "2.1.3"
uuid = { version = "1.3.3", features = ["v4"] }
[profile.release]
lto = true
lto = "thin"
[[bin]]
name = "yoake_server"

View file

@ -3,6 +3,7 @@ listen:
addr: 127.0.0.1:3000
db:
data_dir: data
url: sqlite://data/development.db
canvas_lms:

View file

@ -0,0 +1,2 @@
-- This file should undo anything in `up.sql`
drop table jpn_wordbook;

View file

@ -0,0 +1,15 @@
-- Your SQL goes here
create table jpn_wordbook (
uuid text primary key not null,
ja text not null,
altn text not null,
jm text not null,
fu text not null,
en text not null,
ex text not null,
src text not null,
created datetime not null,
updated datetime not null
)

View file

@ -0,0 +1,161 @@
use std::{path::PathBuf, sync::Arc};
use async_trait::async_trait;
use axum::{routing::get, Extension, Router};
use lazy_static::lazy_static;
use reqwest::redirect::Policy;
use tokio::sync::Mutex;
use crate::{
comm::{Message, MessageDigestor},
config::Config,
AppState,
};
use self::{
routes_sources::{
route_combo_search, route_combo_search_top, route_goo_search, route_jisho_search,
route_jisho_search_top, route_tatoeba_search,
},
routes_wordbook::{route_get_wordbook, route_get_wordbook_csv, route_store_wordbook},
sources::Lookup,
};
use super::App;
mod routes_sources;
mod routes_wordbook;
pub mod sources;
pub struct JpnWordBookApp {
state: Mutex<Option<JpnWordBookAppState>>,
}
struct JpnWordBookAppState {
global_app_state: Arc<Mutex<AppState>>,
jisho: sources::jisho::Client,
goo: sources::goo::Client,
tatoeba: sources::tatoeba::Client,
}
impl JpnWordBookApp {
pub fn new() -> Self {
Self {
state: Mutex::new(None),
}
}
}
#[async_trait]
impl MessageDigestor for Arc<JpnWordBookApp> {
async fn digest(&self, message: &Message) -> anyhow::Result<Option<Message>> {
lazy_static! {
static ref REGEX_WORDBOOK_QUERY: regex::Regex =
regex::Regex::new(r"^jisho (.*)$").unwrap();
}
if REGEX_WORDBOOK_QUERY.is_match(message.body.as_str()) {
let captures = REGEX_WORDBOOK_QUERY
.captures(message.body.as_str())
.unwrap();
let query = captures.get(1).unwrap().as_str();
let state = self.state.lock().await;
let state = state.as_ref().unwrap();
let results = state
.jisho
.lookup(query)
.await
.map_err(|e| anyhow::anyhow!("Failed to lookup word: {}", e))?;
let mut msg = Message::default();
msg.subject = "Jisho search successful".to_string();
for (i, result) in results.iter().enumerate() {
msg.body
.push_str(format!("\n{}. {}\n", i + 1, result.ja).as_str());
msg.body.push_str(
result
.fu
.clone()
.unwrap_or("(no furigana)".to_string())
.as_str(),
);
msg.body.push_str("\n");
msg.body.push_str(
result
.en
.clone()
.map(|s| s.join("\n"))
.unwrap_or("(no english)".to_string())
.as_str(),
);
msg.body.push_str("\n");
msg.body.push_str(
result
.jm
.clone()
.map(|s| s.join("\n"))
.unwrap_or("(解説を見つからない)".to_string())
.as_str(),
);
msg.body.push_str("\n");
}
return Ok(Some(msg));
}
Ok(None)
}
}
#[async_trait]
impl App for JpnWordBookApp {
async fn initialize(self: Arc<Self>, config: &'static Config, app_state: Arc<Mutex<AppState>>) {
let mut state = self.state.lock().await;
*state = Some(JpnWordBookAppState {
global_app_state: app_state,
jisho: sources::jisho::Client::new(reqwest::Client::new()),
goo: sources::goo::Client::new(
reqwest::ClientBuilder::new()
.redirect(Policy::none())
.user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0")
.cookie_store(true)
.build()
.unwrap(),
),
tatoeba: sources::tatoeba::Client::new(reqwest::Client::new(), PathBuf::from(config.db.data_dir.clone())).await.expect("Failed to initialize tatoeba client"),
});
}
fn api_routes(self: Arc<Self>) -> Router {
Router::new()
.route(
"/jpn_wordbook/sources/combo/search",
get(route_combo_search),
)
.route(
"/jpn_wordbook/sources/combo/search_top",
get(route_combo_search_top),
)
.route(
"/jpn_wordbook/sources/tatoeba/search",
get(route_tatoeba_search),
)
.route("/jpn_wordbook/sources/goo/search", get(route_goo_search))
.route(
"/jpn_wordbook/sources/jisho/search",
get(route_jisho_search),
)
.route(
"/jpn_wordbook/sources/jisho/search_top",
get(route_jisho_search_top),
)
.route(
"/jpn_wordbook/wordbook",
get(route_get_wordbook).post(route_store_wordbook),
)
.route(
"/jpn_wordbook/wordbook/csv_export",
get(route_get_wordbook_csv),
)
.layer(Extension(self.clone()))
}
fn message_digestors(self: Arc<Self>) -> Vec<Box<dyn MessageDigestor + Send + Sync>> {
vec![Box::new(self)]
}
}

View file

@ -0,0 +1,196 @@
use std::{collections::HashMap, sync::Arc};
use axum::{extract::Query, Extension};
use futures::StreamExt;
use log::{error, info};
use serde::Deserialize;
use crate::http::{ApiResponse, ApiResult};
use super::{
sources::{Lookup, LookupResult},
JpnWordBookApp,
};
#[derive(Deserialize)]
pub struct SearchQuery {
pub query: String,
}
pub async fn route_jisho_search(
Extension(app): Extension<Arc<JpnWordBookApp>>,
Query(query): Query<SearchQuery>,
) -> ApiResult<Vec<LookupResult>> {
let state = app.state.lock().await;
let state = state.as_ref().unwrap();
let results = state.jisho.lookup(&query.query).await.map_err(|e| {
error!("Failed to lookup word: {}", e);
ApiResponse::error("Failed to lookup word".to_string(), 500, None)
})?;
Ok(ApiResponse::ok(
"Jisho search successful".to_string(),
Some(results),
))
}
pub async fn route_jisho_search_top(
Extension(app): Extension<Arc<JpnWordBookApp>>,
Query(query): Query<SearchQuery>,
) -> ApiResult<LookupResult> {
let state = app.state.lock().await;
let state = state.as_ref().unwrap();
let result = state.jisho.lookup_top(&query.query).await.map_err(|e| {
error!("Failed to lookup word: {}", e);
ApiResponse::error("Failed to lookup word".to_string(), 500, None)
})?;
Ok(ApiResponse::ok(
"Jisho search successful".to_string(),
Some(result),
))
}
pub async fn route_goo_search(
Extension(app): Extension<Arc<JpnWordBookApp>>,
Query(query): Query<SearchQuery>,
) -> ApiResult<Vec<LookupResult>> {
let state = app.state.lock().await;
let state = state.as_ref().unwrap();
let results = state.goo.lookup(&query.query).await.map_err(|e| {
error!("Failed to lookup word: {}", e);
ApiResponse::error("Failed to lookup word".to_string(), 500, None)
})?;
Ok(ApiResponse::ok(
"Goo search successful".to_string(),
Some(results),
))
}
pub async fn route_tatoeba_search(
Extension(app): Extension<Arc<JpnWordBookApp>>,
Query(query): Query<SearchQuery>,
) -> ApiResult<Vec<LookupResult>> {
let state = app.state.lock().await;
let state = state.as_ref().unwrap();
let results = state.tatoeba.lookup(&query.query).await.map_err(|e| {
error!("Failed to lookup word: {}", e);
ApiResponse::error("Failed to lookup word".to_string(), 500, None)
})?;
Ok(ApiResponse::ok(
"Tatoeba search successful".to_string(),
Some(results),
))
}
pub async fn route_combo_search(
Extension(app): Extension<Arc<JpnWordBookApp>>,
Query(query): Query<SearchQuery>,
) -> ApiResult<Vec<LookupResult>> {
let state = app.state.lock().await;
let state = state.as_ref().unwrap();
let start = std::time::Instant::now();
let results_jisho = state.jisho.lookup(&query.query).await.map_err(|e| {
error!("Failed to lookup word: {}", e);
ApiResponse::error("Failed to lookup word".to_string(), 500, None)
})?;
let time = start.elapsed();
info!("Jisho lookup took {}ms", time.as_millis());
let start = std::time::Instant::now();
let results_goo = state.goo.lookup(&query.query).await.map_err(|e| {
error!("Failed to lookup word: {}", e);
ApiResponse::error("Failed to lookup word".to_string(), 500, None)
})?;
let time = start.elapsed();
info!("Goo lookup took {}ms", time.as_millis());
let mut combined_results = HashMap::new();
for result in results_jisho {
combined_results.insert(result.ja.clone(), result);
}
for result in results_goo {
if let Some(existing_result) = combined_results.get_mut(&result.ja) {
existing_result.merge(result);
} else {
combined_results.insert(result.ja.clone(), result);
}
}
let combined_results = combined_results
.into_iter()
.map(|(_, v)| v)
.collect::<Vec<_>>();
let start = std::time::Instant::now();
let combined_results_stream = tokio_stream::iter(combined_results.into_iter());
let mut combined_results = combined_results_stream
.map(|mut r| async move {
r.merge(state.tatoeba.lookup_top(&r.ja).await.unwrap());
r.clone()
})
.buffer_unordered(10)
.collect::<Vec<_>>()
.await;
let time = start.elapsed();
info!("Tatoeba lookup took {}ms", time.as_millis());
combined_results.sort_by(|a, b| {
b.match_score(&query.query)
.cmp(&a.match_score(&query.query))
});
Ok(ApiResponse::ok(
"Combo search successful".to_string(),
Some(combined_results),
))
}
pub async fn route_combo_search_top(
Extension(app): Extension<Arc<JpnWordBookApp>>,
Query(query): Query<SearchQuery>,
) -> ApiResult<LookupResult> {
let state = app.state.lock().await;
let state = state.as_ref().unwrap();
let results_jisho = state.jisho.lookup_top(&query.query).await.map_err(|e| {
error!("Failed to lookup word: {}", e);
ApiResponse::error("Failed to lookup word".to_string(), 500, None)
})?;
let results_goo = state.goo.lookup_top(&query.query).await.map_err(|e| {
error!("Failed to lookup word: {}", e);
ApiResponse::error("Failed to lookup word".to_string(), 500, None)
})?;
let mut combined_result = if results_jisho.ja != results_goo.ja {
if results_jisho.match_score(&query.query) > results_goo.match_score(&query.query) {
results_jisho
} else {
results_goo
}
} else {
let mut combined_result = results_jisho;
combined_result.merge(results_goo);
combined_result
};
combined_result.merge(state.tatoeba.lookup_top(&combined_result.ja).await.unwrap());
Ok(ApiResponse::ok(
"Combo search successful".to_string(),
Some(combined_result),
))
}

View file

@ -0,0 +1,183 @@
use std::sync::Arc;
use axum::{extract::Query, Extension, Json};
use chrono::NaiveDateTime;
use hyper::body::Bytes;
use log::error;
use serde::Deserialize;
use crate::{
apps::auth::{middleware::AuthInfo, Role},
http::{ApiResponse, ApiResult},
models::jpn_wordbook::{JpnWordbook, JpnWordbookExternal},
};
use super::{sources::LookupResult, JpnWordBookApp};
#[derive(Deserialize)]
pub struct GetWordbookOptions {
until: Option<NaiveDateTime>,
limit: i64,
}
#[derive(Deserialize)]
pub struct GetCsvOptions {
header: Option<bool>,
}
pub async fn route_get_wordbook_csv(
Extension(app): Extension<Arc<JpnWordBookApp>>,
Query(GetCsvOptions { header }): Query<GetCsvOptions>,
) -> Result<Bytes, ApiResponse<()>> {
let state = app.state.lock().await;
let state = state.as_ref().unwrap();
let mut global_app_state = state.global_app_state.lock().await;
let results = {
use crate::schema::jpn_wordbook::dsl::*;
use diesel::prelude::*;
jpn_wordbook
.order_by(created.desc())
.load::<JpnWordbook>(&mut global_app_state.db)
.map_err(|e| {
error!("Failed to get wordbook: {}", e);
ApiResponse::error("Failed to get wordbook".to_string(), 500, None)
})?
};
let mut csv_file = Vec::new();
let mut csv_writer = csv::Writer::from_writer(&mut csv_file);
if header.unwrap_or(true) {
csv_writer
.write_record(&["uuid", "ja", "altn", "jm", "fu", "en", "ex", "src"])
.map_err(|e| {
error!("Failed to write csv header: {}", e);
ApiResponse::error("Failed to write csv header".to_string(), 500, None)
})?;
}
for result in results {
csv_writer
.write_record(&[
&result.uuid,
&result.ja,
&result.altn,
&result.jm,
&result.fu,
&result.en,
&result.ex,
&result.src,
])
.map_err(|e| {
error!("Failed to write csv record: {}", e);
ApiResponse::error("Failed to write csv record".to_string(), 500, None)
})?;
}
csv_writer.flush().map_err(|e| {
error!("Failed to flush csv writer: {}", e);
ApiResponse::error("Failed to flush csv writer".to_string(), 500, None)
})?;
drop(csv_writer);
Ok(Bytes::from(csv_file))
}
pub async fn route_get_wordbook(
Extension(app): Extension<Arc<JpnWordBookApp>>,
Query(GetWordbookOptions { until, limit }): Query<GetWordbookOptions>,
) -> ApiResult<Vec<JpnWordbookExternal>> {
let state = app.state.lock().await;
let state = state.as_ref().unwrap();
let mut global_app_state = state.global_app_state.lock().await;
let results = {
use crate::schema::jpn_wordbook::dsl::*;
use diesel::prelude::*;
match until {
None => jpn_wordbook
.limit(limit)
.order_by(created.desc())
.load::<JpnWordbook>(&mut global_app_state.db)
.map_err(|e| {
error!("Failed to get wordbook: {}", e);
ApiResponse::error("Failed to get wordbook".to_string(), 500, None)
})?,
Some(until) => jpn_wordbook
.limit(limit)
.order_by(created.desc())
.filter(created.lt(until))
.load::<JpnWordbook>(&mut global_app_state.db)
.map_err(|e| {
error!("Failed to get wordbook: {}", e);
ApiResponse::error("Failed to get wordbook".to_string(), 500, None)
})?,
}
};
Ok(ApiResponse::ok(
"Word lookup successful".to_string(),
Some(results.into_iter().map(Into::into).collect()),
))
}
pub async fn route_store_wordbook(
auth: AuthInfo,
Extension(app): Extension<Arc<JpnWordBookApp>>,
Json(word): Json<LookupResult>,
) -> ApiResult<()> {
auth.check_for_any_role(&[Role::Admin])?;
let state = app.state.lock().await;
let state = state.as_ref().unwrap();
let mut global_app_state = state.global_app_state.lock().await;
let existing_uuid = {
use crate::schema::jpn_wordbook::dsl::*;
use diesel::prelude::*;
jpn_wordbook
.select(uuid)
.filter(ja.eq(&word.ja))
.first::<String>(&mut global_app_state.db)
.optional()
.map_err(|e| {
error!("Failed to get word: {}", e);
ApiResponse::error("Failed to get word".to_string(), 500, None)
})?
};
{
use crate::schema::jpn_wordbook::dsl::*;
use diesel::prelude::*;
diesel::delete(jpn_wordbook)
.filter(ja.eq(&word.ja))
.execute(&mut global_app_state.db)
.map_err(|e| {
error!("Failed to delete word: {}", e);
ApiResponse::error("Failed to delete word".to_string(), 500, None)
})?;
let mut word = Into::<JpnWordbook>::into(word);
if let Some(existing_uuid) = existing_uuid {
word.uuid = existing_uuid;
}
diesel::insert_into(jpn_wordbook)
.values(&word)
.execute(&mut global_app_state.db)
.map_err(|e| {
error!("Failed to insert word: {}", e);
ApiResponse::error("Failed to insert word".to_string(), 500, None)
})?;
}
Ok(ApiResponse::ok(
"Word inserted successfully".to_string(),
None,
))
}

View file

@ -0,0 +1,240 @@
use async_trait::async_trait;
use futures::StreamExt;
use log::error;
use scraper::{Html, Selector};
use super::{match_len, Lookup, LookupResult};
pub fn strip_url_hash<S: AsRef<str>>(url: S, origin: &'static str) -> String {
let url_with_host = if url.as_ref().starts_with("//") {
format!("https:{}", url.as_ref())
} else if url.as_ref().starts_with("/") {
format!("{}{}", origin, url.as_ref())
} else {
url.as_ref().to_string()
};
let mut url = url::Url::parse(&url_with_host).unwrap();
if url.host().is_none() {
url.set_host(Some(origin)).unwrap();
}
url.set_fragment(None);
url.into()
}
pub struct Client {
client: reqwest::Client,
}
pub struct Candidate {
pub url: String,
pub title: String,
pub text: Option<String>,
}
impl Client {
pub fn new(client: reqwest::Client) -> Self {
Self { client }
}
async fn fetch_html<S: AsRef<str>>(&self, url: S) -> anyhow::Result<Html> {
let response = self.client.get(url.as_ref()).send().await?;
let text = response.text().await?;
Ok(Html::parse_document(&text))
}
async fn lookup_candidates<S: AsRef<str>>(&self, prefix: S) -> anyhow::Result<Vec<Candidate>> {
let resp = self
.client
.get(format!(
"https://dictionary.goo.ne.jp/srch/jn/{}/m0u/",
urlencoding::encode(prefix.as_ref())
))
.send()
.await?;
if let Some(redir) = resp.headers().get("Location") {
return Ok(vec![Candidate {
url: redir.to_str()?.to_string(),
title: prefix.as_ref().to_string(),
text: None,
}]);
}
let mut candidates = Vec::new();
let response_html = Html::parse_document(&resp.text().await?);
let clist = response_html
.select(&Selector::parse("div.section ul.content_list").unwrap())
.next()
.unwrap();
for c in clist.select(&Selector::parse("li").unwrap()) {
let url = c
.select(&Selector::parse("a").unwrap())
.next()
.unwrap()
.value()
.attr("href")
.unwrap();
let real_url = strip_url_hash(url, "https://dictionary.goo.ne.jp");
let title = c.select(&Selector::parse("p.title").unwrap()).next();
if title.is_none() {
continue;
}
let title = title.unwrap().text();
let text = c
.select(&Selector::parse("p.text").unwrap())
.next()
.unwrap()
.text();
candidates.push(Candidate {
url: real_url,
title: title.fold(String::new(), |mut acc, s| {
acc.push_str(s);
acc
}),
text: Some(text.fold(String::new(), |mut acc, s| {
acc.push_str(s);
acc
})),
});
}
Ok(candidates)
}
pub async fn lookup_definition<S: AsRef<str>>(
&self,
url: S,
query: &str,
) -> anyhow::Result<LookupResult> {
let response_html = self.fetch_html(url).await?;
if let Some(error_ele) = response_html
.select(&Selector::parse("div#NR-main div.error").unwrap())
.next()
{
let error = error_ele.text().fold(String::new(), |mut acc, s| {
acc.push_str(s);
acc
});
return Err(anyhow::anyhow!("Error: {}", error));
}
let keyword_ele = response_html
.select(&Selector::parse("div#NR-main h1").unwrap())
.next()
.unwrap();
let keyword = keyword_ele
.text()
.next()
.unwrap()
.replace("\n", "")
.replace("(", "")
.replace("", "")
.replace(")", "")
.replace("", "");
let keywords = keyword.split("").collect::<Vec<_>>();
let keyword = keywords
.iter()
.max_by(|a, b| match_len(query, a).cmp(&match_len(query, b)))
.unwrap()
.to_string();
let altn_keywords = keywords
.iter()
.filter(|k| ***k != keyword)
.map(|k| k.to_string())
.collect::<Vec<_>>();
let yomi_ele = keyword_ele
.select(&Selector::parse("span.yomi").unwrap())
.next();
let yomi = yomi_ele.map(|e| {
e.text()
.next()
.unwrap()
.replace("(", "")
.replace("", "")
.replace(")", "")
.replace("", "")
});
let tense_list_ele = response_html
.select(&Selector::parse("div.section").unwrap())
.next()
.unwrap();
let mut meanings = Vec::new();
for t in tense_list_ele.select(&Selector::parse("ol.meaning").unwrap()) {
let mut meaning = String::new();
for (i, m) in t.select(&Selector::parse(".text").unwrap()).enumerate() {
if i != 0 {
meaning.push_str("\n");
}
meaning.push_str(&m.text().fold(String::new(), |mut acc, s| {
acc.push_str(s);
acc
}));
}
meanings.push(meaning);
}
if meanings.len() == 0 {
for ele in
response_html.select(&Selector::parse("div.meaning_area div.contents").unwrap())
{
let mut meaning = String::new();
for (i, m) in ele.select(&Selector::parse(".text").unwrap()).enumerate() {
if i != 0 {
meaning.push_str("\n");
}
meaning.push_str(&m.text().fold(String::new(), |mut acc, s| {
acc.push_str(s);
acc
}));
}
meanings.push(meaning);
}
}
let mut result = LookupResult::new(keyword, "goo_jp");
result.altn = if altn_keywords.len() > 0 {
Some(altn_keywords)
} else {
None
};
result.fu = yomi;
result.jm = Some(meanings);
Ok(result)
}
}
#[async_trait]
impl Lookup for Client {
async fn lookup(&self, word: &str) -> anyhow::Result<Vec<LookupResult>> {
let candiates = self.lookup_candidates(word).await?;
let candidates_stream = tokio_stream::iter(candiates.into_iter());
let results = candidates_stream
.map(|c| async move { self.lookup_definition(c.url, word).await })
.buffer_unordered(10)
.collect::<Vec<_>>()
.await
.into_iter()
.map(|r| {
if let Err(e) = r {
error!("Failed to lookup definition: {}", e);
return None;
}
Some(r.unwrap())
})
.filter(|r| r.is_some())
.map(|r| r.unwrap())
.collect::<Vec<_>>();
Ok(results)
}
async fn lookup_top(&self, word: &str) -> anyhow::Result<LookupResult> {
let candidates = self.lookup_candidates(word).await?;
if candidates.len() == 0 {
return Err(anyhow::anyhow!("No candidates found"));
}
let result = self
.lookup_definition(candidates[0].url.clone(), word)
.await?;
Ok(result)
}
}

View file

@ -0,0 +1,118 @@
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use super::{Lookup, LookupResult};
pub struct Client {
client: reqwest::Client,
}
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
pub struct Response<T> {
pub meta: ResponseMeta,
pub data: T,
}
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
pub struct ResponseMeta {
pub status: u16,
}
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
pub struct WordResponse {
pub slug: String,
pub is_common: bool,
pub tags: Vec<String>,
pub jlpt: Vec<String>,
pub japanese: Vec<Japanese>,
pub senses: Vec<Sense>,
pub attribution: Attribution,
}
impl Into<LookupResult> for WordResponse {
fn into(self) -> LookupResult {
let first_meaning = &self.japanese[0];
let mut result = LookupResult::new(
first_meaning
.word
.as_ref()
.or_else(|| Some(&first_meaning.reading))
.unwrap()
.to_string(),
"Jisho.org",
);
result.fu = Some(first_meaning.reading.clone());
let mut en = Vec::new();
for sense in self.senses {
let mut en_chunk = String::new();
for (i, def) in sense.english_definitions.into_iter().enumerate() {
if i != 0 {
en_chunk.push_str("; ");
}
en_chunk.push_str(&def);
}
en.push(en_chunk);
}
result.en = Some(en);
result
}
}
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
pub struct Japanese {
pub word: Option<String>,
pub reading: String,
}
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
pub struct Sense {
pub english_definitions: Vec<String>,
pub parts_of_speech: Vec<String>,
pub links: Vec<Link>,
pub tags: Vec<String>,
pub see_also: Vec<String>,
pub info: Vec<String>,
}
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
pub struct Link {
pub text: String,
pub url: String,
}
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
pub struct Attribution {
pub jmdict: bool,
pub jmnedict: bool,
}
#[async_trait]
impl Lookup for Client {
async fn lookup(&self, word: &str) -> anyhow::Result<Vec<LookupResult>> {
let query = url::form_urlencoded::Serializer::new(String::new())
.append_pair("keyword", word)
.finish();
let url = format!("https://jisho.org/api/v1/search/words?{}", query.as_str());
let response = self.client.get(&url).send().await?;
let response: Response<Vec<WordResponse>> = response.json().await?;
Ok(response.data.into_iter().map(|r| r.into()).collect())
}
}
impl Client {
pub fn new(client: reqwest::Client) -> Self {
Self { client }
}
pub async fn lookup_word<S: AsRef<str>>(&self, word: S) -> anyhow::Result<Vec<WordResponse>> {
let query = url::form_urlencoded::Serializer::new(String::new())
.append_pair("keyword", word.as_ref())
.finish();
let url = format!("https://jisho.org/api/v1/search/words?{}", query.as_str());
let response = self.client.get(&url).send().await?;
let response: Response<Vec<WordResponse>> = response.json().await?;
Ok(response.data)
}
}

View file

@ -0,0 +1,153 @@
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
pub mod goo;
pub mod jisho;
pub mod tatoeba;
pub fn match_len(query: &str, word: &str) -> usize {
let mut query_chars = query.chars();
let mut word_chars = word.chars();
let mut match_len = 0;
loop {
let query_char = query_chars.next();
let word_char = word_chars.next();
if query_char.is_none() || word_char.is_none() {
break;
}
if query_char == word_char {
match_len += 1;
} else {
break;
}
}
match_len
}
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
pub struct LookupResult {
pub ja: String,
pub altn: Option<Vec<String>>,
pub jm: Option<Vec<String>>,
pub fu: Option<String>,
pub en: Option<Vec<String>>,
pub ex: Option<Vec<String>>,
pub src: String,
}
impl Into<crate::models::jpn_wordbook::JpnWordbook> for LookupResult {
fn into(self) -> crate::models::jpn_wordbook::JpnWordbook {
crate::models::jpn_wordbook::JpnWordbook {
uuid: uuid::Uuid::new_v4().to_string(),
ja: self.ja,
altn: if self.altn.is_some() {
self.altn.unwrap().join(",")
} else {
String::new()
},
jm: if self.jm.is_some() {
self.jm.unwrap().join("\n")
} else {
String::new()
},
fu: if self.fu.is_some() {
self.fu.unwrap()
} else {
String::new()
},
en: if self.en.is_some() {
self.en.unwrap().join("\n")
} else {
String::new()
},
ex: if self.ex.is_some() {
self.ex.unwrap().join("\n")
} else {
String::new()
},
src: self.src,
created: chrono::Local::now().naive_utc(),
updated: chrono::Local::now().naive_utc(),
}
}
}
impl LookupResult {
pub fn new(ja: String, src: &'static str) -> Self {
Self {
ja,
altn: None,
jm: None,
fu: None,
en: None,
ex: None,
src: src.to_string(),
}
}
pub fn merge(&mut self, other: Self) {
if self.altn.is_none() {
self.altn = other.altn;
} else {
if other.altn.is_some() {
self.altn.as_mut().unwrap().extend(other.altn.unwrap());
}
}
if self.jm.is_none() {
self.jm = other.jm;
}
if self.fu.is_none() {
self.fu = other.fu;
}
if self.en.is_none() {
self.en = other.en;
}
if self.ex.is_none() {
self.ex = other.ex;
}
if self.src != other.src {
self.src = format!("{},{}", self.src, other.src);
}
}
}
impl LookupResult {
pub fn match_score<S: AsRef<str>>(&self, word: S) -> usize {
if self.ja == word.as_ref() {
return 100;
}
if self.ja.starts_with(word.as_ref()) {
return 95;
}
(match_len(word.as_ref(), &self.ja) as f64 * 100.0 / self.ja.len() as f64) as usize
}
}
#[async_trait]
pub trait Lookup {
async fn lookup(&self, word: &str) -> anyhow::Result<Vec<LookupResult>>;
async fn lookup_top(&self, word: &str) -> anyhow::Result<LookupResult> {
let results = self.lookup(word).await?;
let top_result = match results
.into_iter()
.max_by(|a, b| a.match_score(word).cmp(&b.match_score(word)))
{
Some(r) => r,
None => {
return Err(anyhow::anyhow!("No results found"));
}
};
Ok(top_result)
}
}

View file

@ -0,0 +1,159 @@
use std::{
collections::HashMap,
io::{BufRead, BufReader},
path::PathBuf,
};
use super::{Lookup, LookupResult};
use async_trait::async_trait;
use tokio::{fs::File, io::AsyncWriteExt};
pub struct Client {
client: reqwest::Client,
data_dir: PathBuf,
chars_index: HashMap<char, CharIndex>,
}
pub enum CharIndex {
Hot,
Cold(Vec<usize>),
}
impl Client {
pub async fn new(client: reqwest::Client, data_dir: PathBuf) -> anyhow::Result<Self> {
let mut ret = Self {
client,
data_dir,
chars_index: HashMap::new(),
};
ret.download_sentences().await?;
ret.build_char_index()?;
Ok(ret)
}
pub async fn download_sentences(&self) -> anyhow::Result<()> {
let mut output_file = File::create(self.data_dir.join("jpn_sentences.tsv.bz2")).await?;
let url = "https://downloads.tatoeba.org/exports/per_language/jpn/jpn_sentences.tsv.bz2";
let mut response = self.client.get(url).send().await?;
while let Some(chunk) = response.chunk().await? {
output_file.write_all(&chunk).await?;
}
Ok(())
}
fn open_sentences_file(
&self,
) -> anyhow::Result<BufReader<bzip2::read::BzDecoder<std::fs::File>>> {
let input_file = std::fs::File::open(self.data_dir.join("jpn_sentences.tsv.bz2"))?;
let decompressor = bzip2::read::BzDecoder::new(input_file);
let reader = BufReader::new(decompressor);
Ok(reader)
}
pub fn build_char_index(&mut self) -> anyhow::Result<()> {
let reader = self.open_sentences_file()?;
for (line_no, line) in reader.lines().enumerate() {
let line = line?;
let mut fields = line.split('\t');
let _id = fields.next().unwrap();
let lang = fields.next().unwrap();
let text = fields.next().unwrap();
if lang != "jpn" {
continue;
}
for c in text.chars() {
let entry = self
.chars_index
.entry(c)
.or_insert(CharIndex::Cold(Vec::new()));
match entry {
CharIndex::Hot => {
continue;
}
CharIndex::Cold(v) => {
v.push(line_no);
if v.len() > 500 {
*entry = CharIndex::Hot;
}
}
}
}
}
Ok(())
}
pub fn search_char_index(&self, word: &str) -> anyhow::Result<Option<Vec<usize>>> {
let mut result = None;
for c in word.chars() {
if let Some(entry) = self.chars_index.get(&c) {
match entry {
CharIndex::Hot => {
continue;
}
CharIndex::Cold(v) => {
if result.is_none() {
result = Some(v.clone());
} else {
let mut new_result = Vec::new();
for i in result.unwrap() {
if v.contains(&i) {
new_result.push(i);
}
}
result = Some(new_result);
}
}
}
} else {
return Ok(Vec::new().into());
}
}
Ok(result)
}
pub fn search_sentences(&self, word: &str) -> anyhow::Result<Vec<String>> {
let possible_line_nos = self.search_char_index(word)?.map(|v| {
let mut v = v;
v.sort();
v
});
let mut next_line_no_idx = 0;
let reader = self.open_sentences_file()?;
let mut results = Vec::new();
for (i, line) in reader.lines().enumerate() {
if let Some(line_nos) = &possible_line_nos {
if next_line_no_idx >= line_nos.len() {
break;
}
if i != line_nos[next_line_no_idx] {
continue;
}
next_line_no_idx += 1;
}
let line = line?;
let mut fields = line.split('\t');
let _id = fields.next().unwrap();
let lang = fields.next().unwrap();
let text = fields.next().unwrap();
if lang != "jpn" {
continue;
}
if text.contains(word) {
results.push(text.to_string());
}
}
Ok(results)
}
}
#[async_trait]
impl Lookup for Client {
async fn lookup(&self, word: &str) -> anyhow::Result<Vec<LookupResult>> {
let examples = tokio::task::block_in_place(|| self.search_sentences(word))?;
let mut result = LookupResult::new(word.to_string(), "tatoeba");
result.ex = Some(examples);
Ok(vec![result])
}
}

View file

@ -21,6 +21,7 @@ pub trait App {
pub mod auth;
pub mod canvas_lms;
pub mod jpn_wordbook;
pub mod med;
pub mod server_info;
pub mod webcheck;

View file

@ -40,6 +40,7 @@ pub struct ListenConfig {
#[derive(Debug, Deserialize)]
pub struct DbConfig {
pub data_dir: String,
pub url: String,
}

View file

@ -92,6 +92,7 @@ pub async fn main_server(dev: bool) {
Arc::new(canvas_lms::CanvasLMSApp::new()),
Arc::new(med::MedManagementApp::new()),
Arc::new(webcheck::WebcheckApp::new()),
Arc::new(apps::jpn_wordbook::JpnWordBookApp::new()),
];
let mut message_digestor = Vec::new();

View file

@ -0,0 +1,73 @@
use diesel::prelude::*;
use serde::{Deserialize, Serialize};
#[derive(Queryable, Selectable, Insertable, Serialize, Deserialize, Debug, Clone)]
#[diesel(table_name = crate::schema::jpn_wordbook)]
pub struct JpnWordbook {
pub uuid: String,
pub ja: String,
pub altn: String,
pub jm: String,
pub fu: String,
pub en: String,
pub ex: String,
pub src: String,
pub created: chrono::NaiveDateTime,
pub updated: chrono::NaiveDateTime,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct JpnWordbookExternal {
pub uuid: String,
pub ja: String,
pub altn: Vec<String>,
pub jm: Vec<String>,
pub fu: String,
pub en: Vec<String>,
pub ex: Vec<String>,
pub src: String,
pub created: chrono::NaiveDateTime,
pub updated: chrono::NaiveDateTime,
}
impl Into<JpnWordbookExternal> for JpnWordbook {
fn into(self) -> JpnWordbookExternal {
JpnWordbookExternal {
uuid: self.uuid,
ja: self.ja,
altn: if self.altn.len() > 0 {
self.altn.split(',').map(|s| s.to_string()).collect()
} else {
vec![]
},
jm: if self.jm.len() > 0 {
self.jm.split('\n').map(|s| s.to_string()).collect()
} else {
vec![]
},
fu: self.fu,
en: if self.en.len() > 0 {
self.en.split('\n').map(|s| s.to_string()).collect()
} else {
vec![]
},
ex: if self.ex.len() > 0 {
self.ex.split('\n').map(|s| s.to_string()).collect()
} else {
vec![]
},
src: self.src,
created: self.created,
updated: self.updated,
}
}
}

View file

@ -1,3 +1,4 @@
pub mod jpn_wordbook;
pub mod med;
#[cfg(test)]
mod med_test;

View file

@ -1,5 +1,20 @@
// @generated automatically by Diesel CLI.
diesel::table! {
jpn_wordbook (uuid) {
uuid -> Text,
ja -> Text,
altn -> Text,
jm -> Text,
fu -> Text,
en -> Text,
ex -> Text,
src -> Text,
created -> Timestamp,
updated -> Timestamp,
}
}
diesel::table! {
medication_logs (uuid) {
uuid -> Text,
@ -38,6 +53,7 @@ diesel::table! {
diesel::joinable!(medication_logs -> medications (med_uuid));
diesel::allow_tables_to_appear_in_same_query!(
jpn_wordbook,
medication_logs,
medications,
sessions,

View file

@ -13,6 +13,7 @@ import GradesPage from './pages/GradesPage';
import FramePage from './pages/FramePage';
import { EnsureRole } from './components/EnsureRole';
import MedsPage from './pages/MedsPage';
import JpnWordbookPage from './pages/JpnWordbookPage';
const persistent_pages = [
@ -55,6 +56,10 @@ function App() {
path: "/meds",
element: <PageBase persistentPages={persistent_pages} ><EnsureRole role="Admin"><MedsPage /></EnsureRole></PageBase>
},
{
path: "/jpn_wordbook",
element: <PageBase persistentPages={persistent_pages} ><JpnWordbookPage /></PageBase>
},
{
path: "/gotify_ui",
element: <PageBase persistentPages={persistent_pages} ></PageBase>

View file

@ -3,6 +3,7 @@ import './App.css'
import { AppBar, Button, Divider, IconButton, Toolbar, Drawer, List, ListItem, ListItemButton, ListItemText } from '@mui/material'
import Typography from '@mui/material/Typography'
import { Box, Container } from '@mui/system'
import BookIcon from '@mui/icons-material/Book'
import MenuIcon from '@mui/icons-material/Menu'
import HomeIcon from '@mui/icons-material/Home'
import GradeIcon from '@mui/icons-material/Grade'
@ -58,6 +59,7 @@ function PageBase({ children, persistentPages }: { children?: ReactNode, persist
{ key: "home", name: "Home", icon: <HomeIcon />, path: "/" },
{ key: "grades", name: "Grades", icon: <GradeIcon />, path: "/grades" },
{ key: "meds", name: "Meds", icon: <MedicationIcon />, path: "/meds" },
{ key: "jpn-wordbook", name: "WordBook", icon: <BookIcon />, path: "/jpn_wordbook" },
{ key: "gotify", name: "Gotify", icon: <CampaignIcon />, path: "/gotify_ui" },
].map((item) => (
<ListItem key={item.key}

View file

@ -0,0 +1,84 @@
import { makeJSONRequest } from "./request";
export interface LookupResult {
ja: string,
altn: string[] | null,
jm: string[] | null,
fu: string | null,
en: string[] | null,
ex: string[] | null,
src: string,
}
export interface WordbookItem {
uuid: string,
ja: string,
altn: string[],
jm: string[],
fu: string,
en: string[],
ex: string[],
src: string,
created: string,
updated: string,
}
export async function comboSearchWord(query: string): Promise<LookupResult[]> {
const url = "/api/jpn_wordbook/sources/combo/search?query=" + encodeURIComponent(query);
const method = "GET";
let ret = (await makeJSONRequest<LookupResult[]>(url, method));
if (ret.status != "Ok") {
throw new Error(ret.message);
}
return ret.data;
}
export async function comboSearchWordTop(query: string): Promise<LookupResult> {
const url = "/api/jpn_wordbook/sources/combo/search_top?query=" + encodeURIComponent(query);
const method = "GET";
let ret = (await makeJSONRequest<LookupResult>(url, method));
if (ret.status != "Ok") {
throw new Error(ret.message);
}
return ret.data;
}
export async function storeWordbook(word: LookupResult): Promise<void> {
const url = "/api/jpn_wordbook/wordbook";
const method = "POST";
const body = word;
let ret = (await makeJSONRequest<void>(url, method, body));
if (ret.status != "Ok") {
throw new Error(ret.message);
}
}
export interface getWordbookParams {
until?: string
limit: number,
}
export async function getWordbook(params: getWordbookParams): Promise<WordbookItem[]> {
let url = `/api/jpn_wordbook/wordbook?limit=${params.limit}`;
if (params.until) {
url += `&until=${params.until}`;
}
const method = "GET";
let ret = (await makeJSONRequest<WordbookItem[]>(url, method));
if (ret.status != "Ok") {
throw new Error(ret.message);
}
return ret.data;
}
export function downloadWordbookCsv(header: boolean) {
let url = `/api/jpn_wordbook/wordbook/csv_export?header=${header}`;
let link = document.createElement("a");
link.download = "wordbook.csv";
link.href = url;
link.click();
}

View file

@ -0,0 +1,188 @@
import { Accordion, AccordionDetails, AccordionSummary, Alert, Button, Container, List, ListItem, ListItemText, Paper, TextField, Typography } from "@mui/material";
import { useEffect, useMemo, useState } from "react";
import { LookupResult, WordbookItem, comboSearchWord, comboSearchWordTop, downloadWordbookCsv, getWordbook, storeWordbook } from "../api/jpn_wordbook";
import { DataGrid } from "@mui/x-data-grid";
import { LoginContext } from "../context/LoginContext";
export default function JpnWordbookPage() {
const [keyword, setKeyword] = useState<string>("");
const [lookupResult, setLookupResult] = useState<LookupResult[] | null>(null);
const [lookupError, setLookupError] = useState<string | null>(null);
const [lookupStoreCount, setLookupStoreCount] = useState<number>(0);
const [wordbookRows, setWordbookRows] = useState<WordbookItem[] | null>(null);
const [wordbookPaginationModel, setWordbookPaginationModel] = useState({
page: 0,
pageSize: 100,
})
const [wordbookError, setWordbookError] = useState<string | null>(null);
const wordbookQueryOptions = useMemo(() => {
return {
until: (wordbookRows && wordbookRows.length > 0) ? wordbookRows[wordbookRows.length - 1]?.created : undefined,
limit: wordbookPaginationModel.pageSize,
}
}, [wordbookPaginationModel]);
useEffect(() => {
getWordbook(wordbookQueryOptions).then((result) => {
let rows = wordbookRows ?? [];
for (const new_row of result) {
if (rows.find((row) => row.uuid === new_row.uuid)) {
continue;
}
rows.push(new_row);
}
setWordbookRows(rows);
}).catch((error) => {
setWordbookError(error.message);
});
}, [wordbookQueryOptions, lookupStoreCount]);
return (
<LoginContext.Consumer>
{
({ auth }) => (
<Container>
<Paper sx={{ padding: "1em" }}>
<Typography variant="h4" component="h1" gutterBottom>
Lookup word
</Typography>
{
lookupError ?
<Alert severity="error">{lookupError}</Alert>
: null
}
<TextField label="Word" variant="outlined" value={keyword} onChange={(event) => {
setKeyword(event.target.value);
}} />
<Button variant="contained" onClick={() => {
comboSearchWord(keyword).then((result) => {
setLookupError(null);
setLookupResult(result);
}).catch((error) => {
setLookupError(error.message);
});
}}>Search</Button>
<Button variant="contained" onClick={() => {
comboSearchWordTop(keyword).then((result) => {
setLookupResult([result]);
}).catch((error) => {
setLookupError(error.message);
});
}}>Top</Button>
{
lookupResult && lookupResult.map((result) => {
return (
<Accordion key={result.ja}>
<AccordionSummary>
{result.ja}
</AccordionSummary>
<AccordionDetails>
{
auth.roles.includes("Admin") ?
<Button variant="contained" onClick={() => {
storeWordbook(result)
.then(() => {
setLookupStoreCount(lookupStoreCount + 1);
})
.catch((error) => {
setWordbookError(error.message);
});
}}>Store</Button>
: null
}
<Typography variant="h6" component="div" gutterBottom>
</Typography>
<List dense={true}>
{
result.jm?.map((item) => {
return (
<ListItem key={item}>
-&nbsp;<ListItemText primary={item} />
</ListItem>
)
})
}
</List>
<Typography variant="h6" component="div" gutterBottom>
</Typography>
<List dense={true}>
{
result.en?.map((item) => {
return (
<ListItem key={item}>
-&nbsp;<ListItemText primary={item} />
</ListItem>
)
})
}
</List>
<Typography variant="h6" component="div" gutterBottom>
</Typography>
<List dense={true}>
{
result.ex?.map((item) => {
return (
<ListItem key={item}>
-&nbsp;<ListItemText primary={item} />
</ListItem>
)
})
}
</List>
</AccordionDetails>
</Accordion>
)
})
}
</Paper>
<Paper sx={{ padding: "1em", marginTop: "1em" }}>
<Typography variant="h4" component="h1" gutterBottom>
Wordbook
</Typography>
{
wordbookError ?
<Alert severity="error">{wordbookError}</Alert>
: null
}
<Button variant="contained" onClick={() => {
downloadWordbookCsv(true);
}}>Download</Button>
<Button variant="contained" onClick={() => {
downloadWordbookCsv(false);
}}>Download (no header)</Button>
<DataGrid
paginationMode="client"
rows={wordbookRows ?? []}
columns={
[
{ field: "ja", headerName: "Japanese", width: 200 },
{ field: "fu", headerName: "Furigana", width: 200 },
{ field: "en", headerName: "English", width: 200 },
{ field: "ex", headerName: "Example", width: 200 },
{ field: "jm", headerName: "Meaning", width: 200 },
]
}
getRowId={(row) => row.uuid}
onPaginationModelChange={(model) => {
console.log("Pagination model changed: ", model);
setWordbookPaginationModel(model);
}}
initialState={
{
pagination: {
paginationModel: {
pageSize: 100,
}
}
}
}
/>
</Paper>
</Container >
)}
</LoginContext.Consumer>
)
}