|
|
|
@ -5,18 +5,18 @@ pub mod wikipedia_infobox_analyzer { |
|
|
|
|
use itertools::Itertools; |
|
|
|
|
|
|
|
|
|
/// Looks for a used template that does lists wikidata properties listing
|
|
|
|
|
pub fn extract_used_properties_from_template(template: String) -> Vec<String> { |
|
|
|
|
pub fn extract_used_properties_from_template(template: String) -> Vec<u64> { |
|
|
|
|
let mut lines = template.lines(); |
|
|
|
|
let used = lines |
|
|
|
|
.find(|line| line.starts_with("{{") && line.contains("Wikidata|")) |
|
|
|
|
.expect("Template should have a line declaring which properties are used"); |
|
|
|
|
|
|
|
|
|
// The properties are listed inside the template with their number: P1|P2|P...
|
|
|
|
|
let re = Regex::new(r"P\d+").unwrap(); |
|
|
|
|
let re = Regex::new(r"P(\d+)").unwrap(); |
|
|
|
|
|
|
|
|
|
// Find all matches and collect the properties into vector
|
|
|
|
|
re.find_iter(used) |
|
|
|
|
.map(|m| m.as_str().to_owned()) |
|
|
|
|
re.captures_iter(used) |
|
|
|
|
.map(|m| m[1].parse().unwrap()) |
|
|
|
|
.collect() |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -116,9 +116,9 @@ mod tests { |
|
|
|
|
assert_eq!( |
|
|
|
|
extract_used_properties_from_template(contents), |
|
|
|
|
vec![ |
|
|
|
|
"P18", "P154", "P1128", "P4103", "P2139", "P2295", "P2226", "P856", "P169", |
|
|
|
|
"P1448", "P159", "P749", "P355", "P17", "P1056", "P452", "P576", "P112", "P127", |
|
|
|
|
"P856", "P2096" |
|
|
|
|
18, 154, 1128, 4103, 2139, 2295, 2226, 856, 169, |
|
|
|
|
1448, 159, 749, 355, 17, 1056, 452, 576, 112, 127, |
|
|
|
|
856, 2096 |
|
|
|
|
] |
|
|
|
|
); |
|
|
|
|
|
|
|
|
@ -128,8 +128,8 @@ mod tests { |
|
|
|
|
assert_eq!( |
|
|
|
|
extract_used_properties_from_template(contents), |
|
|
|
|
vec![ |
|
|
|
|
"P18", "P154", "P170", "P178", "P275", "P277", "P306", "P348", "P400", "P548", |
|
|
|
|
"P571", "P577", "P856", "P1324", "P2096" |
|
|
|
|
18, 154, 170, 178, 275, 277, 306, 348, 400, 548, |
|
|
|
|
571, 577, 856, 1324, 2096 |
|
|
|
|
] |
|
|
|
|
); |
|
|
|
|
} |
|
|
|
|