diff --git a/Cargo.lock b/Cargo.lock index 7f82b33..c957c2f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -82,6 +82,12 @@ dependencies = [ "syn", ] +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "autocfg" version = "1.5.0" @@ -170,6 +176,7 @@ dependencies = [ "iana-time-zone", "js-sys", "num-traits", + "serde", "wasm-bindgen", "windows-link", ] @@ -341,6 +348,12 @@ dependencies = [ "litrs", ] +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -530,6 +543,25 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +[[package]] +name = "h2" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17da50a276f1e01e0ba6c029e47b7100754904ee8a278f886546e98575380785" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "hashbrown" version = "0.14.5" @@ -613,6 +645,7 @@ dependencies = [ "bytes", "futures-channel", "futures-util", + "h2", "http", "http-body", "httparse", @@ -623,6 +656,22 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-rustls" +version = "0.27.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +dependencies = [ + "http", + "hyper", + "hyper-util", + "rustls", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tower-service", +] + [[package]] name = "hyper-tls" version = "0.6.0" @@ -658,9 +707,11 @@ dependencies = [ "percent-encoding", "pin-project-lite", "socket2", + "system-configuration", "tokio", "tower-service", "tracing", + "windows-registry", ] [[package]] @@ -831,6 +882,15 @@ dependencies = [ "serde", ] +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.15" @@ -858,6 +918,12 @@ dependencies = [ "serde", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" version = "0.2.174" @@ -926,6 +992,12 @@ version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -1311,17 +1383,21 @@ dependencies = [ "bytes", "cookie", "cookie_store", + "encoding_rs", "futures-channel", "futures-core", "futures-util", + "h2", "http", "http-body", "http-body-util", "hyper", + "hyper-rustls", "hyper-tls", "hyper-util", "js-sys", "log", + "mime", "native-tls", "percent-encoding", "pin-project-lite", @@ -1341,6 +1417,20 @@ dependencies = [ "web-sys", ] +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.16", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + [[package]] name = "ron" version = "0.8.1" @@ -1382,6 +1472,19 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "rustls" +version = "0.23.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0ebcbd2f03de0fc1122ad9bb24b127a5a6cd51d72604a3f3c50ac459762b6cc" +dependencies = [ + "once_cell", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + [[package]] name = "rustls-pki-types" version = "1.12.0" @@ -1391,6 +1494,17 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-webpki" +version = "0.103.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a17884ae0c1b773f1ccd2bd4a8c72f16da897310a98b0e84bf349ad5ead92fc" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + [[package]] name = "rustversion" version = "1.0.21" @@ -1596,6 +1710,27 @@ dependencies = [ "syn", ] +[[package]] +name = "system-configuration" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +dependencies = [ + "bitflags", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "tempfile" version = "3.20.0" @@ -1720,6 +1855,29 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-rustls" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" +dependencies = [ + "rustls", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + [[package]] name = "toml" version = "0.8.23" @@ -1861,6 +2019,12 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + [[package]] name = "url" version = "2.5.4" @@ -2001,15 +2165,30 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "wikidata" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a34b80c94c1d2e5df9e3e7dcb886e646b2cf148773f689da670d8f11849d8c72" +dependencies = [ + "chrono", + "lazy_static", + "serde", + "serde_json", +] + [[package]] name = "wikipedia-infobox-analyzer" version = "0.1.0" dependencies = [ "ascii_table", + "itertools", "mediawiki", "regex", + "reqwest", "serde_json", "tokio", + "wikidata", ] [[package]] @@ -2053,6 +2232,17 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" +[[package]] +name = "windows-registry" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e" +dependencies = [ + "windows-link", + "windows-result", + "windows-strings", +] + [[package]] name = "windows-result" version = "0.3.4" @@ -2071,6 +2261,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + [[package]] name = "windows-sys" version = "0.59.0" diff --git a/Cargo.toml b/Cargo.toml index 899c25a..4f23d53 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,9 @@ categories = ["command-line-utilities"] [dependencies] regex = "1.11.1" ascii_table = "4.0.7" +itertools = "0.14.0" mediawiki = "0.3.1" +reqwest = { version = "0.12", features = ["blocking"] } serde_json = "1.0.64" tokio = "1.47.1" +wikidata = "1.1.0" diff --git a/src/lib.rs b/src/lib.rs index 672facd..c3f6c61 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,7 @@ pub mod wikipedia_infobox_analyzer { use ascii_table::{Align, AsciiTable}; use regex::Regex; + use itertools::Itertools; /// Looks for a used template that does lists wikidata properties listing pub fn extract_used_properties_from_template(template: String) -> Vec { @@ -64,6 +65,33 @@ pub mod wikipedia_infobox_analyzer { return label[1].to_string(); } + + /// Fetches the properties which have claims for the given wiki item inside of wikidata + pub fn fetch_properties_for_wiki_item(qid: u64) -> Vec { + let mut ids: Vec = vec![]; + let uri = format!("https://www.wikidata.org/wiki/Special:EntityData/Q{qid}.json"); + let res = reqwest::blocking::get(uri).unwrap(); + let text = res.text().unwrap(); + if text.contains("

Not Found

No entity with ID ") { + return vec![]; + } + let ent = wikidata::Entity::from_json(serde_json::from_str(&text).unwrap()).unwrap(); + if ent.claims.is_empty() { + return vec![]; + } + + for i in 0..ent.claims.len() { + let (wikidata::Pid(pid), _) = &ent.claims[i]; + ids.push(*pid); + } + + ids.sort(); + + ids.iter() + .map(|n| n.to_owned()) + .unique() + .collect::>() + } } #[cfg(test)] @@ -121,4 +149,20 @@ mod tests { let name = fetch_name_for_wiki_property(31).await; assert_eq!(name, "instance of") } + + #[test] + fn test_fetch_properties_for_wiki_item() { + let qid = 16639197; // identifier for GitLab on wikidata + let properties = fetch_properties_for_wiki_item(qid); + + // Properties which should be present: + assert!(properties.contains(&10)); // property: video + assert!(properties.contains(&18)); // property: image + assert!(properties.contains(&154)); // property: logo image + assert!(properties.contains(&178)); // property: developer + + // Properties which should not be present: + assert!(!properties.contains(&19)); // property: place of birth + assert!(!properties.contains(&30)); // property: continent + } }