insta-import-pixelfed/src/image_description.rs

102 lines
4 KiB
Rust
Raw Normal View History

2025-03-03 22:41:36 +01:00
use base64::{Engine as _, engine::general_purpose::STANDARD};
// module to hold all code for generating/fetching image descriptions
// Input is the image name
// Output is a String containing the image description
pub struct ChatGPTConfig {
pub openai_api_key: String,
pub openai_api_url: String,
pub openai_model: String
}
pub struct OllamaConfig {
pub ollama_api_key: String,
pub ollama_api_url: String,
pub ollama_model: String
}
pub struct FileConfig {
pub caption_extension: String,
}
// fetch the imagedescription from a file named like the Image
pub async fn get_description_from_file(image_name: String , file_config: FileConfig) -> Result<String, Box<dyn super::Error>> {
//read image caption from a local file that
//has the same name than the image with the extension ".caption.txt"
let caption_extension = file_config.caption_extension;
let captionname = format!("{}{}", image_name, caption_extension);
println!("Looking for {}",captionname);
let caption_data = tokio::fs::read_to_string(captionname).await.map_err(|e| format!("Failed to read caption from file: {}", e))?;
Ok(caption_data)
}
// fetch image description from ChatGPT
pub async fn get_description_from_chatgpt(image_name: String, chatgpt_config: self::ChatGPTConfig) -> Result<String, Box<dyn super::Error>> {
// Read and encode image
let image_data = tokio::fs::read(image_name)
.await
.map_err(|e| format!("Failed to read image file: {}", e))?;
// Base64 encode the image for ChatGTP API
let base64_image = STANDARD.encode(image_data);
// Create ChatGPT API request
let client = reqwest::Client::new();
let response = client
.post(chatgpt_config.openai_api_url)
.header("Authorization", format!("Bearer {}", chatgpt_config.openai_api_key))
.header("Content-Type", "application/json")
.json(&super::json!({
"model": chatgpt_config.openai_model,
"max_tokens": 300,
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Please describe this image concisely for use as an alt text description. Focus on key visual elements and context."
},
{
"type": "image_url",
"image_url": {
"url": format!("data:image/jpeg;base64,{}", base64_image)
}
}
]
}
]
}))
.send()
.await?;
// Improved error handling for API response
if !response.status().is_success() {
let error_text = response.text().await?;
return Err(format!("OpenAI API error: {}", error_text).into());
}
let result: super::Value = response.json().await?;
// More detailed error handling for JSON parsing
let description = result["choices"]
.get(0)
.ok_or("No choices in response")?
["message"]["content"]
.as_str()
.ok_or("Invalid content format in response")?
.to_string();
Ok(description)
}
// fetch images description from own OLLAMA server
pub async fn get_description_from_ollama(image_name: String, ollama_config: OllamaConfig) -> Result<String, Box<dyn super::Error>> {
Ok("Not implemented yet".to_string())
}