Integrate property name lookup into the program output

master
Raymon Zutekouw 3 weeks ago
parent 4b8562089e
commit aa3b4d21d4
Signed by: raymon
GPG Key ID: 0E62222846283925
  1. 68
      README.md
  2. 64
      src/main.rs

@ -38,46 +38,46 @@ If it is not present, the right column will be blank.
Remaining properties in wikidata that are not required by the infobox are trailed in the right column.
``` markdown
┌──────────────────┬─────────────────────┐
│ Infobox requires │ Wikidata Earth (Q2) │
├──────────────────┼─────────────────────┤
│ P18 │ P18 │ // property: image
│ P170 │ P170 │ // property: creator
│ P571 │ P571 │ // property: inception
│ │ P31 │ // property: instance of
│ │ P138 │ // property: inception
│ │ P361 │ // property: part of
│ │ .... │
└──────────────────┴─────────────────────┘
┌──────────────────┬─────────────────────┬───────────────
│ Infobox requires │ Wikidata Earth (Q2) │ Property name │
├──────────────────┼─────────────────────┼───────────────
│ P18 │ P18 │ image
│ P170 │ P170 │ creator
│ P571 │ P571 │ inception
│ │ P31 │
│ │ P138 │
│ │ P361 │
│ │ .... │
└──────────────────┴─────────────────────┴───────────────
```
This (shortened) example is complete, but if we were to try a different infobox template on the earth entry you can see that it is not a good fit.
To demonstrate, we can apply the software template to the earth entry:
``` markdown
┌──────────────────┬─────────────────────┐
│ Infobox requires │ Wikidata Earth (Q2) │
├──────────────────┼─────────────────────┤
│ P18 │ P18 │ // property: image
│ P154 │ │
│ P170 │ P170 │ // property: creator
│ P178 │ │
│ P275 │ │
│ P277 │ │
│ P306 │ │
│ P348 │ │
│ P400 │ │
│ P548 │ │
│ P571 │ P571 │ // property: inception
│ P577 │ │
│ P856 │ │
│ P1324 │ │
│ P2096 │ │
│ │ P10 │
│ │ P31 │
│ │ P138 │
│ │ .... │
└──────────────────┴─────────────────────┘
┌──────────────────┬─────────────────────┬───────────────
│ Infobox requires │ Wikidata Earth (Q2) │ Property name │
├──────────────────┼─────────────────────┼───────────────
│ P18 │ P18 │ image
│ P154 │ │
│ P170 │ P170 │ creator
│ P178 │ │
│ P275 │ │
│ P277 │ │
│ P306 │ │
│ P348 │ │
│ P400 │ │
│ P548 │ │
│ P571 │ P571 │ inception
│ P577 │ │
│ P856 │ │
│ P1324 │ │
│ P2096 │ │
│ │ P10 │
│ │ P31 │
│ │ P138 │
│ │ .... │
└──────────────────┴─────────────────────┴───────────────
```
## Templates

@ -88,6 +88,7 @@ impl Cache {
Ok(())
}
#[allow(dead_code)]
fn delete(&mut self) -> std::io::Result<()> {
fs::remove_file(self.storage.base_path.join(self.file_name.clone()))?;
self.property_labels = HashMap::new();
@ -96,17 +97,15 @@ impl Cache {
async fn lookup_property_name(&mut self, pid: u64) -> std::io::Result<String> {
let key = format!("P{pid}");
if self.property_labels.contains_key(&key) {
let name = self.property_labels.get(&key).unwrap().to_string();
return Ok(name);
if let Some(name) = self.property_labels.get(&key) {
return Ok(name.to_string());
}
// If the property name cannot be found, fetch the name
let name = fetch_name_for_wiki_property(pid).await;
// Update cache with new value, so it can be used the next time
self.property_labels
.insert(key, name.clone());
self.property_labels.insert(key, name.clone());
self.sync_to_storage()?;
Ok(name)
@ -114,7 +113,7 @@ impl Cache {
}
/// Creates an ascii table out of the properties
fn format_property_usage(name: String, qid: String, properties_template: Vec<u64>, properties_item: Vec<u64>, ) -> String {
fn format_property_usage(name: String, qid: String, properties_template: Vec<u64>, properties_item: Vec<u64>, properties_labels: Vec<String>) -> String {
let mut ascii_table = AsciiTable::default();
ascii_table.set_max_width(80);
ascii_table
@ -127,13 +126,25 @@ fn format_property_usage(name: String, qid: String, properties_template: Vec<u64
.set_header(format!("Wikidata {name} ({qid})"))
.set_align(Align::Left);
ascii_table
.column(2)
.set_header("Property name")
.set_align(Align::Left);
// Display the property columns as a table
let data: Vec<Vec<String>> = join_columns(&properties_template, &properties_item)
.iter()
.map(|&m| {
.enumerate()
.map(|(index, &m)| {
let (v1, v2) = m;
let property_name = if index >= properties_template.len() {
"".to_string()
} else {
properties_labels[index].clone()
};
vec![ v1.map_or("".to_owned(), |v| format!("P{v}").to_owned())
, v2.map_or("".to_owned(), |v| format!("P{v}").to_owned())]
, v2.map_or("".to_owned(), |v| format!("P{v}").to_owned())
, property_name]
}
).collect();
ascii_table.format(data)
@ -143,7 +154,7 @@ fn format_property_usage(name: String, qid: String, properties_template: Vec<u64
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Initialize cache and storage
let storage = StorageDisk::new("wikipedia-infobox-analyzer")?;
let mut _cache = Cache::new("property_labels.cache".to_string(), storage)?;
let mut cache = Cache::new("property_labels.cache".to_string(), storage)?;
// Input to the program
let args = Cli::from_args();
@ -153,13 +164,20 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let qid = fetch_wiki_item_by_article_title(title.clone(), language.clone());
let properties_template = extract_used_properties_from_template(template);
let properties_item = fetch_properties_for_wiki_item(qid);
let mut properties_labels: Vec<String> = vec![];
for property in &properties_template {
let name = cache.lookup_property_name(*property).await?;
properties_labels.push(name);
}
let table = format_property_usage(
title.clone(),
format!("Q{qid}"),
properties_template,
properties_item,
properties_labels,
);
// Output of the program
@ -202,20 +220,24 @@ mod tests_main {
"Earth".to_string(),
"Q2".to_string(),
vec![18, 170, 571],
vec![18, 31, 138, 170, 361, 571]
vec![18, 31, 138, 170, 361, 571],
vec![ "image".to_string()
, "creator".to_string()
, "inception".to_string()
]
),
concat![
"┌──────────────────┬─────────────────────┐\n",
"│ Infobox requires │ Wikidata Earth (Q2) │\n",
"├──────────────────┼─────────────────────┤\n",
"│ P18 │ P18 │\n", // property: image
"│ P170 │ P170 │\n", // property: creator
"│ P571 │ P571 │\n", // property: inception
"│ │ P31 │\n", // property: instance of
"│ │ P138 │\n", // property: inception
"│ │ P361 │\n", // property: part of
"└──────────────────┴─────────────────────┘\n",
"┌──────────────────┬─────────────────────┬───────────────┐\n",
"│ Infobox requires │ Wikidata Earth (Q2) │ Property name │\n",
"├──────────────────┼─────────────────────┼───────────────┤\n",
"│ P18 │ P18 │ image │\n",
"│ P170 │ P170 │ creator │\n",
"│ P571 │ P571 │ inception │\n",
"│ │ P31 │\n",
"│ │ P138 │\n",
"│ │ P361 │\n",
"└──────────────────┴─────────────────────┴───────────────┘\n",
]
);
}

Loading…
Cancel
Save