insta-import-pixelfed/src/image_description.rs

101 lines
3.9 KiB
Rust
Raw Normal View History

2025-03-03 22:41:36 +01:00
use base64::{Engine as _, engine::general_purpose::STANDARD};
// module to hold all code for generating/fetching image descriptions
// Input is the image name
// Output is a String containing the image description
pub struct ChatGPTConfig {
pub openai_api_key: String,
pub openai_api_url: String,
pub openai_model: String
}
pub struct OllamaConfig {
pub ollama_api_key: String,
pub ollama_api_url: String,
pub ollama_model: String
}
pub struct FileConfig {
pub caption_extension: String,
}
// fetch the imagedescription from a file named like the Image
pub fn get_description_from_file(image_name: String , file_config: FileConfig) -> Result<String, Box<dyn super::Error>> {
2025-03-03 22:41:36 +01:00
//read image caption from a local file that
//has the same name than the image with the extension ".caption.txt"
let caption_extension = file_config.caption_extension;
let captionname = format!("{}{}", image_name, caption_extension);
println!("Looking for {}",captionname);
let caption_data = std::fs::read_to_string(captionname);
2025-03-03 22:41:36 +01:00
Ok(caption_data.unwrap())
2025-03-03 22:41:36 +01:00
}
// fetch image description from ChatGPT
pub fn get_description_from_chatgpt(image_name: String, chatgpt_config: self::ChatGPTConfig) -> Result<String, Box<dyn super::Error>> {
2025-03-03 22:41:36 +01:00
// Read and encode image
let image_data = std::fs::read(image_name)?;
2025-03-03 22:41:36 +01:00
// Base64 encode the image for ChatGTP API
let base64_image = STANDARD.encode(image_data);
// Create ChatGPT API request
let client = reqwest::blocking::Client::new();
2025-03-03 22:41:36 +01:00
let response = client
.post(chatgpt_config.openai_api_url)
.header("Authorization", format!("Bearer {}", chatgpt_config.openai_api_key))
.header("Content-Type", "application/json")
.json(&super::json!({
"model": chatgpt_config.openai_model,
"max_tokens": 300,
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Please describe this image concisely for use as an alt text description. Focus on key visual elements and context."
},
{
"type": "image_url",
"image_url": {
"url": format!("data:image/jpeg;base64,{}", base64_image)
}
}
]
}
]
}))
.send();
2025-03-03 22:41:36 +01:00
// Improved error handling for API response
//if !response.unwrap().status().is_success() {
// let error_text = response.unwrap_err();
// return Err(format!("OpenAI API error: ", std::error.box(error_text));
//}
2025-03-03 22:41:36 +01:00
let result: super::Value = response.unwrap().json()?;
2025-03-03 22:41:36 +01:00
// More detailed error handling for JSON parsing
let description = result["choices"]
.get(0)
.ok_or("No choices in response")?
["message"]["content"]
.as_str()
.ok_or("Invalid content format in response")?
.to_string();
Ok(description)
}
// fetch images description from own OLLAMA server
pub fn get_description_from_ollama(image_name: String, ollama_config: OllamaConfig) -> Result<String, Box<dyn super::Error>> {
2025-03-03 22:41:36 +01:00
Ok("Not implemented yet".to_string())
}