Fetch identifiers by article title

master
Raymon Zutekouw 2 days ago
parent 0097b105a1
commit d310cce749
Signed by: raymon
GPG Key ID: 0E62222846283925
  1. 23
      src/lib.rs

@ -85,6 +85,22 @@ pub mod wikipedia_infobox_analyzer {
.unique()
.collect::<Vec<u64>>()
}
/// Fetch the wikidata identifier associated with the wikipedia article
pub fn fetch_wiki_item_by_article_title(title: String, language_code: String) -> u64 {
let uri = format!("https://{}.wikipedia.org/w/api.php?action=query&prop=pageprops&format=json&titles={}", &language_code, &title);
let res = reqwest::blocking::get(uri).unwrap();
let text = res.text().unwrap();
if text.contains("-1") {
return 0;
}
let re = Regex::new("\"wikibase_item\":\"Q(\\d+)\"").unwrap();
let Some(qid) = re.captures(&text) else { return 0 };
qid[1].parse().expect("Should be a numeric value")
}
}
#[cfg(test)]
@ -158,4 +174,11 @@ mod tests {
assert!(!properties.contains(&19)); // property: place of birth
assert!(!properties.contains(&30)); // property: continent
}
#[test]
fn test_fetch_wiki_item_by_article_title() {
assert_eq!(fetch_wiki_item_by_article_title("Earth".to_string(), "en".to_string()), 2);
assert_eq!(fetch_wiki_item_by_article_title("Train".to_string(), "en".to_string()), 870);
assert_eq!(fetch_wiki_item_by_article_title("Baum".to_string(), "de".to_string()), 10884);
}
}

Loading…
Cancel
Save