102 lines
4 KiB
Rust
102 lines
4 KiB
Rust
![]() |
use base64::{Engine as _, engine::general_purpose::STANDARD};
|
||
|
|
||
|
|
||
|
// module to hold all code for generating/fetching image descriptions
|
||
|
// Input is the image name
|
||
|
// Output is a String containing the image description
|
||
|
pub struct ChatGPTConfig {
|
||
|
pub openai_api_key: String,
|
||
|
pub openai_api_url: String,
|
||
|
pub openai_model: String
|
||
|
|
||
|
}
|
||
|
|
||
|
pub struct OllamaConfig {
|
||
|
pub ollama_api_key: String,
|
||
|
pub ollama_api_url: String,
|
||
|
pub ollama_model: String
|
||
|
}
|
||
|
|
||
|
pub struct FileConfig {
|
||
|
pub caption_extension: String,
|
||
|
}
|
||
|
|
||
|
// fetch the imagedescription from a file named like the Image
|
||
|
pub async fn get_description_from_file(image_name: String , file_config: FileConfig) -> Result<String, Box<dyn super::Error>> {
|
||
|
//read image caption from a local file that
|
||
|
//has the same name than the image with the extension ".caption.txt"
|
||
|
let caption_extension = file_config.caption_extension;
|
||
|
let captionname = format!("{}{}", image_name, caption_extension);
|
||
|
|
||
|
println!("Looking for {}",captionname);
|
||
|
let caption_data = tokio::fs::read_to_string(captionname).await.map_err(|e| format!("Failed to read caption from file: {}", e))?;
|
||
|
|
||
|
Ok(caption_data)
|
||
|
}
|
||
|
|
||
|
// fetch image description from ChatGPT
|
||
|
pub async fn get_description_from_chatgpt(image_name: String, chatgpt_config: self::ChatGPTConfig) -> Result<String, Box<dyn super::Error>> {
|
||
|
// Read and encode image
|
||
|
let image_data = tokio::fs::read(image_name)
|
||
|
.await
|
||
|
.map_err(|e| format!("Failed to read image file: {}", e))?;
|
||
|
|
||
|
|
||
|
// Base64 encode the image for ChatGTP API
|
||
|
let base64_image = STANDARD.encode(image_data);
|
||
|
|
||
|
// Create ChatGPT API request
|
||
|
let client = reqwest::Client::new();
|
||
|
let response = client
|
||
|
.post(chatgpt_config.openai_api_url)
|
||
|
.header("Authorization", format!("Bearer {}", chatgpt_config.openai_api_key))
|
||
|
.header("Content-Type", "application/json")
|
||
|
.json(&super::json!({
|
||
|
"model": chatgpt_config.openai_model,
|
||
|
"max_tokens": 300,
|
||
|
"messages": [
|
||
|
{
|
||
|
"role": "user",
|
||
|
"content": [
|
||
|
{
|
||
|
"type": "text",
|
||
|
"text": "Please describe this image concisely for use as an alt text description. Focus on key visual elements and context."
|
||
|
},
|
||
|
{
|
||
|
"type": "image_url",
|
||
|
"image_url": {
|
||
|
"url": format!("data:image/jpeg;base64,{}", base64_image)
|
||
|
}
|
||
|
}
|
||
|
]
|
||
|
}
|
||
|
]
|
||
|
}))
|
||
|
.send()
|
||
|
.await?;
|
||
|
|
||
|
// Improved error handling for API response
|
||
|
if !response.status().is_success() {
|
||
|
let error_text = response.text().await?;
|
||
|
return Err(format!("OpenAI API error: {}", error_text).into());
|
||
|
}
|
||
|
|
||
|
let result: super::Value = response.json().await?;
|
||
|
|
||
|
// More detailed error handling for JSON parsing
|
||
|
let description = result["choices"]
|
||
|
.get(0)
|
||
|
.ok_or("No choices in response")?
|
||
|
["message"]["content"]
|
||
|
.as_str()
|
||
|
.ok_or("Invalid content format in response")?
|
||
|
.to_string();
|
||
|
|
||
|
Ok(description)
|
||
|
}
|
||
|
|
||
|
// fetch images description from own OLLAMA server
|
||
|
pub async fn get_description_from_ollama(image_name: String, ollama_config: OllamaConfig) -> Result<String, Box<dyn super::Error>> {
|
||
|
Ok("Not implemented yet".to_string())
|
||
|
}
|