|
|
|
@ -85,6 +85,22 @@ pub mod wikipedia_infobox_analyzer { |
|
|
|
|
.unique() |
|
|
|
|
.collect::<Vec<u64>>() |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/// Fetch the wikidata identifier associated with the wikipedia article
|
|
|
|
|
pub fn fetch_wiki_item_by_article_title(title: String, language_code: String) -> u64 { |
|
|
|
|
let uri = format!("https://{}.wikipedia.org/w/api.php?action=query&prop=pageprops&format=json&titles={}", &language_code, &title); |
|
|
|
|
|
|
|
|
|
let res = reqwest::blocking::get(uri).unwrap(); |
|
|
|
|
let text = res.text().unwrap(); |
|
|
|
|
if text.contains("-1") { |
|
|
|
|
return 0; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
let re = Regex::new("\"wikibase_item\":\"Q(\\d+)\"").unwrap(); |
|
|
|
|
let Some(qid) = re.captures(&text) else { return 0 }; |
|
|
|
|
|
|
|
|
|
qid[1].parse().expect("Should be a numeric value") |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#[cfg(test)] |
|
|
|
@ -158,4 +174,11 @@ mod tests { |
|
|
|
|
assert!(!properties.contains(&19)); // property: place of birth
|
|
|
|
|
assert!(!properties.contains(&30)); // property: continent
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#[test] |
|
|
|
|
fn test_fetch_wiki_item_by_article_title() { |
|
|
|
|
assert_eq!(fetch_wiki_item_by_article_title("Earth".to_string(), "en".to_string()), 2); |
|
|
|
|
assert_eq!(fetch_wiki_item_by_article_title("Train".to_string(), "en".to_string()), 870); |
|
|
|
|
assert_eq!(fetch_wiki_item_by_article_title("Baum".to_string(), "de".to_string()), 10884); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|