Integrate property name lookup into the program output

Raymon Zutekouw 3 weeks ago
parent 4b8562089e
commit 8f61c26168
Signed by: raymon
GPG Key ID: 0E62222846283925
  1. 2
      Cargo.lock
  2. 2
      Cargo.toml
  3. 68
      README.md
  4. 64
      src/main.rs

2
Cargo.lock generated

@ -2386,7 +2386,7 @@ dependencies = [
[[package]] [[package]]
name = "wikipedia-infobox-analyzer" name = "wikipedia-infobox-analyzer"
version = "0.1.0" version = "0.2.0"
dependencies = [ dependencies = [
"ascii_table", "ascii_table",
"dirs-next", "dirs-next",

@ -1,6 +1,6 @@
[package] [package]
name = "wikipedia-infobox-analyzer" name = "wikipedia-infobox-analyzer"
version = "0.1.0" version = "0.2.0"
edition = "2021" edition = "2021"
description = """ description = """
This analysis tool allows seeing the wikidata behind articles through the lens of the infoboxes. This analysis tool allows seeing the wikidata behind articles through the lens of the infoboxes.

@ -38,46 +38,46 @@ If it is not present, the right column will be blank.
Remaining properties in wikidata that are not required by the infobox are trailed in the right column. Remaining properties in wikidata that are not required by the infobox are trailed in the right column.
``` markdown ``` markdown
┌──────────────────┬─────────────────────┐ ┌──────────────────┬─────────────────────┬───────────────
│ Infobox requires │ Wikidata Earth (Q2) │ │ Infobox requires │ Wikidata Earth (Q2) │ Property name │
├──────────────────┼─────────────────────┤ ├──────────────────┼─────────────────────┼───────────────
│ P18 │ P18 │ // property: image │ P18 │ P18 │ image
│ P170 │ P170 │ // property: creator │ P170 │ P170 │ creator
│ P571 │ P571 │ // property: inception │ P571 │ P571 │ inception
│ │ P31 │ // property: instance of │ │ P31 │
│ │ P138 │ // property: inception │ │ P138 │
│ │ P361 │ // property: part of │ │ P361 │
│ │ .... │ │ │ .... │
└──────────────────┴─────────────────────┘ └──────────────────┴─────────────────────┴───────────────
``` ```
This (shortened) example is complete, but if we were to try a different infobox template on the earth entry you can see that it is not a good fit. This (shortened) example is complete, but if we were to try a different infobox template on the earth entry you can see that it is not a good fit.
To demonstrate, we can apply the software template to the earth entry: To demonstrate, we can apply the software template to the earth entry:
``` markdown ``` markdown
┌──────────────────┬─────────────────────┐ ┌──────────────────┬─────────────────────┬───────────────
│ Infobox requires │ Wikidata Earth (Q2) │ │ Infobox requires │ Wikidata Earth (Q2) │ Property name │
├──────────────────┼─────────────────────┤ ├──────────────────┼─────────────────────┼───────────────
│ P18 │ P18 │ // property: image │ P18 │ P18 │ image
│ P154 │ │ │ P154 │ │
│ P170 │ P170 │ // property: creator │ P170 │ P170 │ creator
│ P178 │ │ │ P178 │ │
│ P275 │ │ │ P275 │ │
│ P277 │ │ │ P277 │ │
│ P306 │ │ │ P306 │ │
│ P348 │ │ │ P348 │ │
│ P400 │ │ │ P400 │ │
│ P548 │ │ │ P548 │ │
│ P571 │ P571 │ // property: inception │ P571 │ P571 │ inception
│ P577 │ │ │ P577 │ │
│ P856 │ │ │ P856 │ │
│ P1324 │ │ │ P1324 │ │
│ P2096 │ │ │ P2096 │ │
│ │ P10 │ │ │ P10 │
│ │ P31 │ │ │ P31 │
│ │ P138 │ │ │ P138 │
│ │ .... │ │ │ .... │
└──────────────────┴─────────────────────┘ └──────────────────┴─────────────────────┴───────────────
``` ```
## Templates ## Templates

@ -88,6 +88,7 @@ impl Cache {
Ok(()) Ok(())
} }
#[allow(dead_code)]
fn delete(&mut self) -> std::io::Result<()> { fn delete(&mut self) -> std::io::Result<()> {
fs::remove_file(self.storage.base_path.join(self.file_name.clone()))?; fs::remove_file(self.storage.base_path.join(self.file_name.clone()))?;
self.property_labels = HashMap::new(); self.property_labels = HashMap::new();
@ -96,17 +97,15 @@ impl Cache {
async fn lookup_property_name(&mut self, pid: u64) -> std::io::Result<String> { async fn lookup_property_name(&mut self, pid: u64) -> std::io::Result<String> {
let key = format!("P{pid}"); let key = format!("P{pid}");
if self.property_labels.contains_key(&key) { if let Some(name) = self.property_labels.get(&key) {
let name = self.property_labels.get(&key).unwrap().to_string(); return Ok(name.to_string());
return Ok(name);
} }
// If the property name cannot be found, fetch the name // If the property name cannot be found, fetch the name
let name = fetch_name_for_wiki_property(pid).await; let name = fetch_name_for_wiki_property(pid).await;
// Update cache with new value, so it can be used the next time // Update cache with new value, so it can be used the next time
self.property_labels self.property_labels.insert(key, name.clone());
.insert(key, name.clone());
self.sync_to_storage()?; self.sync_to_storage()?;
Ok(name) Ok(name)
@ -114,7 +113,7 @@ impl Cache {
} }
/// Creates an ascii table out of the properties /// Creates an ascii table out of the properties
fn format_property_usage(name: String, qid: String, properties_template: Vec<u64>, properties_item: Vec<u64>, ) -> String { fn format_property_usage(name: String, qid: String, properties_template: Vec<u64>, properties_item: Vec<u64>, properties_labels: Vec<String>) -> String {
let mut ascii_table = AsciiTable::default(); let mut ascii_table = AsciiTable::default();
ascii_table.set_max_width(80); ascii_table.set_max_width(80);
ascii_table ascii_table
@ -127,13 +126,25 @@ fn format_property_usage(name: String, qid: String, properties_template: Vec<u64
.set_header(format!("Wikidata {name} ({qid})")) .set_header(format!("Wikidata {name} ({qid})"))
.set_align(Align::Left); .set_align(Align::Left);
ascii_table
.column(2)
.set_header("Property name")
.set_align(Align::Left);
// Display the property columns as a table // Display the property columns as a table
let data: Vec<Vec<String>> = join_columns(&properties_template, &properties_item) let data: Vec<Vec<String>> = join_columns(&properties_template, &properties_item)
.iter() .iter()
.map(|&m| { .enumerate()
.map(|(index, &m)| {
let (v1, v2) = m; let (v1, v2) = m;
let property_name = if index >= properties_template.len() {
"".to_string()
} else {
properties_labels[index].clone()
};
vec![ v1.map_or("".to_owned(), |v| format!("P{v}").to_owned()) vec![ v1.map_or("".to_owned(), |v| format!("P{v}").to_owned())
, v2.map_or("".to_owned(), |v| format!("P{v}").to_owned())] , v2.map_or("".to_owned(), |v| format!("P{v}").to_owned())
, property_name]
} }
).collect(); ).collect();
ascii_table.format(data) ascii_table.format(data)
@ -143,7 +154,7 @@ fn format_property_usage(name: String, qid: String, properties_template: Vec<u64
async fn main() -> Result<(), Box<dyn std::error::Error>> { async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Initialize cache and storage // Initialize cache and storage
let storage = StorageDisk::new("wikipedia-infobox-analyzer")?; let storage = StorageDisk::new("wikipedia-infobox-analyzer")?;
let mut _cache = Cache::new("property_labels.cache".to_string(), storage)?; let mut cache = Cache::new("property_labels.cache".to_string(), storage)?;
// Input to the program // Input to the program
let args = Cli::from_args(); let args = Cli::from_args();
@ -153,13 +164,20 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let qid = fetch_wiki_item_by_article_title(title.clone(), language.clone()); let qid = fetch_wiki_item_by_article_title(title.clone(), language.clone());
let properties_template = extract_used_properties_from_template(template); let properties_template = extract_used_properties_from_template(template);
let properties_item = fetch_properties_for_wiki_item(qid); let properties_item = fetch_properties_for_wiki_item(qid);
let mut properties_labels: Vec<String> = vec![];
for property in &properties_template {
let name = cache.lookup_property_name(*property).await?;
properties_labels.push(name);
}
let table = format_property_usage( let table = format_property_usage(
title.clone(), title.clone(),
format!("Q{qid}"), format!("Q{qid}"),
properties_template, properties_template,
properties_item, properties_item,
properties_labels,
); );
// Output of the program // Output of the program
@ -202,20 +220,24 @@ mod tests_main {
"Earth".to_string(), "Earth".to_string(),
"Q2".to_string(), "Q2".to_string(),
vec![18, 170, 571], vec![18, 170, 571],
vec![18, 31, 138, 170, 361, 571] vec![18, 31, 138, 170, 361, 571],
vec![ "image".to_string()
, "creator".to_string()
, "inception".to_string()
]
), ),
concat![ concat![
"┌──────────────────┬─────────────────────┐\n", "┌──────────────────┬─────────────────────┬───────────────┐\n",
"│ Infobox requires │ Wikidata Earth (Q2) │\n", "│ Infobox requires │ Wikidata Earth (Q2) │ Property name │\n",
"├──────────────────┼─────────────────────┤\n", "├──────────────────┼─────────────────────┼───────────────┤\n",
"│ P18 │ P18 │\n", // property: image "│ P18 │ P18 │ image │\n",
"│ P170 │ P170 │\n", // property: creator "│ P170 │ P170 │ creator │\n",
"│ P571 │ P571 │\n", // property: inception "│ P571 │ P571 │ inception │\n",
"│ │ P31 │\n", // property: instance of "│ │ P31 │\n",
"│ │ P138 │\n", // property: inception "│ │ P138 │\n",
"│ │ P361 │\n", // property: part of "│ │ P361 │\n",
"└──────────────────┴─────────────────────┘\n", "└──────────────────┴─────────────────────┴───────────────┘\n",
] ]
); );
} }

Loading…
Cancel
Save