diff --git a/src/lib.rs b/src/lib.rs index a7b082b..61c9237 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -85,6 +85,22 @@ pub mod wikipedia_infobox_analyzer { .unique() .collect::>() } + + /// Fetch the wikidata identifier associated with the wikipedia article + pub fn fetch_wiki_item_by_article_title(title: String, language_code: String) -> u64 { + let uri = format!("https://{}.wikipedia.org/w/api.php?action=query&prop=pageprops&format=json&titles={}", &language_code, &title); + + let res = reqwest::blocking::get(uri).unwrap(); + let text = res.text().unwrap(); + if text.contains("-1") { + return 0; + } + + let re = Regex::new("\"wikibase_item\":\"Q(\\d+)\"").unwrap(); + let Some(qid) = re.captures(&text) else { return 0 }; + + qid[1].parse().expect("Should be a numeric value") + } } #[cfg(test)] @@ -158,4 +174,11 @@ mod tests { assert!(!properties.contains(&19)); // property: place of birth assert!(!properties.contains(&30)); // property: continent } + + #[test] + fn test_fetch_wiki_item_by_article_title() { + assert_eq!(fetch_wiki_item_by_article_title("Earth".to_string(), "en".to_string()), 2); + assert_eq!(fetch_wiki_item_by_article_title("Train".to_string(), "en".to_string()), 870); + assert_eq!(fetch_wiki_item_by_article_title("Baum".to_string(), "de".to_string()), 10884); + } }