|
|
|
@ -17,13 +17,13 @@ struct Cli { |
|
|
|
|
infobox_template_file: std::path::PathBuf, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#[derive(Debug)] |
|
|
|
|
#[derive(Clone, Debug)] |
|
|
|
|
struct StorageDisk { |
|
|
|
|
base_path: PathBuf, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
impl StorageDisk { |
|
|
|
|
fn new(app_name: &str) -> Self { |
|
|
|
|
fn new(app_name: &str) -> std::io::Result<Self> { |
|
|
|
|
// Get OS-specific cache directory, otherwise use a fallback location
|
|
|
|
|
let mut path = match cache_dir() { |
|
|
|
|
Some(path) => path, |
|
|
|
@ -31,15 +31,10 @@ impl StorageDisk { |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
path.push(app_name); |
|
|
|
|
fs::create_dir_all(&path) |
|
|
|
|
.unwrap_or_else(|_| panic!("Failed to create storage directory: {path:?}")); |
|
|
|
|
|
|
|
|
|
StorageDisk { base_path: path } |
|
|
|
|
match fs::create_dir_all(&path) { |
|
|
|
|
Ok(_) => Ok(StorageDisk { base_path: path }), |
|
|
|
|
Err(_) => panic!("Failed to create storage directory: {path:?}"), |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#[allow(dead_code)] |
|
|
|
|
fn delete(&self) -> std::io::Result<()> { |
|
|
|
|
fs::remove_dir_all(&self.base_path) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
fn store_data(&self, filename: String, data: String) -> std::io::Result<()> { |
|
|
|
@ -61,41 +56,68 @@ struct Cache { |
|
|
|
|
|
|
|
|
|
impl Cache { |
|
|
|
|
fn new(file_name: String, storage: StorageDisk) -> std::io::Result<Self> { |
|
|
|
|
let mut property_labels: HashMap<String, String> = HashMap::new(); |
|
|
|
|
|
|
|
|
|
match storage.read_data(file_name.clone()) { |
|
|
|
|
let property_labels: HashMap<String, String> = match storage.read_data(file_name.clone()) { |
|
|
|
|
Ok(raw_data) => { |
|
|
|
|
property_labels = serde_json::from_str(&raw_data)?; |
|
|
|
|
serde_json::from_str(&raw_data)? |
|
|
|
|
} |
|
|
|
|
Err(err) if err.kind() == io::ErrorKind::NotFound => { |
|
|
|
|
println!("No cache found, populating the program cache with wikidata..."); |
|
|
|
|
|
|
|
|
|
property_labels = HashMap::new(); |
|
|
|
|
property_labels.insert("P31".to_string(), "instance of".to_string()); |
|
|
|
|
println!("No cache found, starting an empty cache"); |
|
|
|
|
|
|
|
|
|
let data = serde_json::to_string(&property_labels)?; |
|
|
|
|
storage.store_data(file_name.clone(), data)?; |
|
|
|
|
|
|
|
|
|
println!("Cache populated from wikidata") |
|
|
|
|
HashMap::new() |
|
|
|
|
} |
|
|
|
|
Err(err) => { |
|
|
|
|
println!("Unknown error occurred, exiting gracefully"); |
|
|
|
|
return Err(err); |
|
|
|
|
} |
|
|
|
|
return Err(err) |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
Ok(Cache { |
|
|
|
|
let cache = Cache { |
|
|
|
|
file_name, |
|
|
|
|
storage, |
|
|
|
|
property_labels, |
|
|
|
|
}) |
|
|
|
|
property_labels |
|
|
|
|
}; |
|
|
|
|
cache.sync_to_storage()?; |
|
|
|
|
|
|
|
|
|
Ok(cache) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
fn sync_to_storage(&self) -> std::io::Result<()> { |
|
|
|
|
let data = serde_json::to_string(&self.property_labels)?; |
|
|
|
|
self.storage.store_data(self.file_name.clone(), data)?; |
|
|
|
|
Ok(()) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
fn delete(&mut self) -> std::io::Result<()> { |
|
|
|
|
fs::remove_file(self.storage.base_path.join(self.file_name.clone()))?; |
|
|
|
|
self.property_labels = HashMap::new(); |
|
|
|
|
Ok(()) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
async fn lookup_property_name(&mut self, pid: u64) -> std::io::Result<String> { |
|
|
|
|
let key = format!("P{pid}"); |
|
|
|
|
if self.property_labels.contains_key(&key) { |
|
|
|
|
let name = self.property_labels.get(&key).unwrap().to_string(); |
|
|
|
|
return Ok(name); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// If the property name cannot be found, fetch the name
|
|
|
|
|
let name = fetch_name_for_wiki_property(pid).await; |
|
|
|
|
|
|
|
|
|
// Update cache with new value, so it can be used the next time
|
|
|
|
|
self.property_labels |
|
|
|
|
.insert(key, name.clone()); |
|
|
|
|
self.sync_to_storage()?; |
|
|
|
|
|
|
|
|
|
Ok(name) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
fn main() -> Result<(), Box<dyn std::error::Error>> { |
|
|
|
|
|
|
|
|
|
#[tokio::main] |
|
|
|
|
async fn main() -> Result<(), Box<dyn std::error::Error>> { |
|
|
|
|
// Initialize cache and storage
|
|
|
|
|
let storage = StorageDisk::new("wikipedia-infobox-analyzer"); |
|
|
|
|
let cache = Cache::new("property_labels.cache".to_string(), storage); |
|
|
|
|
let storage = StorageDisk::new("wikipedia-infobox-analyzer")?; |
|
|
|
|
let mut _cache = Cache::new("property_labels.cache".to_string(), storage)?; |
|
|
|
|
|
|
|
|
|
// Input to the program
|
|
|
|
|
let args = Cli::from_args(); |
|
|
|
@ -119,3 +141,31 @@ fn main() -> Result<(), Box<dyn std::error::Error>> { |
|
|
|
|
|
|
|
|
|
Ok(()) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#[cfg(test)] |
|
|
|
|
mod tests_main { |
|
|
|
|
use super::*; |
|
|
|
|
|
|
|
|
|
#[tokio::test] |
|
|
|
|
async fn test_cache() { |
|
|
|
|
let storage = StorageDisk::new("wikipedia-infobox-analyzer").unwrap(); |
|
|
|
|
let mut cache = Cache::new("property_labels.test.cache".to_string(), storage.clone()).unwrap(); |
|
|
|
|
let _ = cache.delete(); |
|
|
|
|
|
|
|
|
|
// Cache should be clear after deletion
|
|
|
|
|
assert_eq!(cache.property_labels, HashMap::new()); |
|
|
|
|
|
|
|
|
|
let name = cache.lookup_property_name(31).await.unwrap(); |
|
|
|
|
assert_eq!(name, "instance of"); |
|
|
|
|
|
|
|
|
|
// Cache should store values that are looked up
|
|
|
|
|
assert_eq!(cache.property_labels.get("P31").unwrap(), "instance of"); |
|
|
|
|
|
|
|
|
|
// Cache should be persistent if the same file is passed
|
|
|
|
|
let mut cache_new = Cache::new("property_labels.test.cache".to_string(), storage.clone()).unwrap(); |
|
|
|
|
assert_eq!(cache_new.property_labels.get("P31").unwrap(), "instance of"); |
|
|
|
|
|
|
|
|
|
// Cleanup
|
|
|
|
|
let _ = cache_new.delete(); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|