From 6c00ba9c76b359820385c637c0e11ba155288502 Mon Sep 17 00:00:00 2001 From: Mike Gifford Date: Tue, 28 Jan 2025 14:08:14 +0100 Subject: [PATCH] Update ollama code --- alt-text-generator.py | 108 ++++++++++++++++++++---------------------- 1 file changed, 51 insertions(+), 57 deletions(-) diff --git a/alt-text-generator.py b/alt-text-generator.py index 985fc6f..01f28ad 100644 --- a/alt-text-generator.py +++ b/alt-text-generator.py @@ -5,6 +5,8 @@ import argparse import csv +import json +import time import logging import requests from PIL import Image @@ -41,9 +43,9 @@ def validate_anthropic_key(selected_model): "Consider simplifying the text.", "Alt text is too short. Provide more context." ] +# Enable logging with timestamps +logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s") -# Set up logging for debugging -# logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s") def generate_with_anthropic(prompt): """Generate text using Anthropic's Claude API.""" @@ -122,34 +124,30 @@ def send_request_with_retry(payload, timeout=60): response.raise_for_status() return response -def generate_with_ollama(image_path_or_url, prompt, model_name="llama3.3:latest"): - """ - Generate alt text using a hosted Ollama model with improved response handling. - """ + +def generate_with_ollama(image_path_or_url, prompt, model_name="llama3.2-vision:latest"): + """Generate text using a hosted Ollama model with enhanced streaming response handling.""" try: - # Debugging: Check if the input is a URL or a local file + logging.info("Starting Ollama text generation...") + + # Determine if input is a URL or local file if image_path_or_url.startswith("http"): - # Fetch the image from the URL - response = requests.get(image_path_or_url, stream=True, timeout=30) + logging.debug("Fetching image from URL...") + response = requests.get(image_path_or_url, stream=True) response.raise_for_status() - content_type = response.headers.get("Content-Type", "") - if not content_type.startswith("image/"): - raise ValueError(f"Invalid content type for image: {content_type}") image_data = response.content - logging.info("Image fetched successfully from URL.") + logging.info("Image fetched successfully.") else: - # Read the image from a local file + logging.debug("Reading image from local file...") with open(image_path_or_url, "rb") as image_file: image_data = image_file.read() - logging.info("Image read successfully from file.") + logging.info("Image read successfully.") - # Encode the image to Base64 + # Encode image to Base64 base64_image = base64.b64encode(image_data).decode("utf-8") - if not base64_image: - raise ValueError("Base64 encoding failed. The image might be invalid.") logging.info("Image successfully encoded to Base64.") - # Create a specific prompt for alt text generation + # Prepare payload formatted_prompt = ( "Generate alt text for an image. Respond ONLY with the text that should go inside " "the alt attribute of an img tag. Do not include 'Alt text:', explanations, quotes, " @@ -162,49 +160,45 @@ def generate_with_ollama(image_path_or_url, prompt, model_name="llama3.3:latest" "images": [base64_image], } - # Debugging: Print the payload being sent to the API - logging.debug(f"Payload being sent to Ollama API: {payload}") - - # Send the request to Ollama API with retry logic - response = send_request_with_retry(payload, timeout=60) - response_data = response.json() - - # Debugging: Print the raw response from the API - logging.debug(f"Raw response from Ollama API: {response_data}") - - # Extract the generated text from the response - if "response" in response_data: - generated_text = response_data["response"].strip() - elif "text" in response_data: - generated_text = response_data["text"].strip() - else: - return "Error: Unexpected response structure from Ollama API." - - # Post-process the generated text - prefixes_to_remove = [ - "Alt text:", "Here is", "The alt text is", "I suggest", - "Based on the image,", "A concise alt text would be", - "Here's a descriptive alt text:", "The appropriate alt text is" - ] - for prefix in prefixes_to_remove: - if generated_text.lower().startswith(prefix.lower()): - generated_text = generated_text[len(prefix):].strip() - - # Clean up unnecessary punctuation and ensure proper capitalization - generated_text = generated_text.strip('"\'").,: ') - if generated_text: - generated_text = generated_text[0].upper() + generated_text[1:] + "." - - return generated_text + # Send request to Ollama API + logging.info("Sending request to Ollama API...") + start_time = time.time() + response = requests.post(OLLAMA_API_URL, json=payload, timeout=90) + elapsed_time = time.time() - start_time + logging.info(f"Response received from Ollama API in {elapsed_time:.2f} seconds.") + + # Parse the response containing multiple JSON objects + raw_response = response.text + logging.debug(f"Raw response text from Ollama API: {raw_response}") + + # Split and parse each JSON object + final_text = "" + for line in raw_response.splitlines(): + try: + json_object = json.loads(line) + if "response" in json_object: + final_text += json_object["response"] + if json_object.get("done", False): + break # Stop if "done" is true + except json.JSONDecodeError as e: + logging.error(f"JSON decoding failed for line: {line}. Error: {e}") + continue + + # Clean and format the final text + final_text = final_text.strip('"\'").,: ') + if final_text: + final_text = final_text[0].upper() + final_text[1:] + "." + logging.info("Alt text generated successfully.") + return final_text except requests.exceptions.Timeout: logging.error("Request to Ollama API timed out.") return "Error: Request to Ollama API timed out." - except requests.exceptions.HTTPError as http_err: - logging.error(f"HTTP Error: {http_err}") - return f"Error: Failed to connect to Ollama API: {http_err}" + except requests.exceptions.RequestException as req_err: + logging.error(f"HTTP Error: {req_err}") + return f"Error: Failed to connect to Ollama API: {req_err}" except Exception as e: - logging.error(f"Error: {e}") + logging.error(f"Unexpected error: {e}") return f"Error generating alt text: {str(e)}"