Vision Optimization
This example demonstrates how to use toonverter’s vision optimization tools to reduce token usage and costs when working with multimodal LLMs.
1"""Example of optimizing images for vision models.
2
3This example demonstrates how to use toonverter's vision optimization tools
4to reduce token usage and costs when working with multimodal LLMs.
5"""
6
7import os
8
9try:
10 from PIL import Image
11 PIL_AVAILABLE = True
12except ImportError:
13 PIL_AVAILABLE = False
14
15import toonverter as toon
16
17
18def create_sample_image(path: str) -> None:
19 """Create a sample image for testing."""
20 if not PIL_AVAILABLE:
21 print("Pillow not installed, skipping image creation.")
22 return
23
24 img = Image.new('RGB', (2048, 2048), color='red')
25 img.save(path)
26 print(f"Created sample image at {path} ({os.path.getsize(path) / 1024:.1f} KB)")
27
28
29def run_vision_optimization():
30 """Run vision optimization examples."""
31 print("--- Vision Optimization Example ---")
32
33 if not PIL_AVAILABLE:
34 print("Error: This example requires Pillow. Install with: pip install Pillow")
35 return
36
37 # Create a large sample image
38 image_path = "sample_large.png"
39 create_sample_image(image_path)
40
41 try:
42 # Read raw bytes
43 with open(image_path, "rb") as f:
44 raw_bytes = f.read()
45
46 print(f"Original Size: {len(raw_bytes) / 1024:.1f} KB")
47
48 # 1. Optimize for OpenAI (GPT-4o)
49 print("\nOptimizing for OpenAI (GPT-4o)...")
50 opt_bytes_openai, mime_openai = toon.optimize_vision(
51 raw_bytes,
52 provider="openai"
53 )
54 print(f"Optimized Size: {len(opt_bytes_openai) / 1024:.1f} KB")
55 print(f"Format: {mime_openai}")
56
57 # Calculate savings
58 savings = (len(raw_bytes) - len(opt_bytes_openai)) / len(raw_bytes) * 100
59 print(f"Savings: {savings:.1f}%")
60
61 # 2. Get provider-specific payload directly
62 print("\nGenerating Anthropic payload...")
63 payload = toon.optimize_vision(
64 raw_bytes,
65 provider="anthropic",
66 return_payload=True
67 )
68 # Payload structure for Anthropic API
69 print("Payload keys:", payload.keys())
70 print(f"Media Type: {payload['source']['media_type']}")
71
72 finally:
73 # Cleanup
74 if os.path.exists(image_path):
75 os.remove(image_path)
76 print(f"\nRemoved {image_path}")
77
78
79if __name__ == "__main__":
80 run_vision_optimization()