import gradio as gr from PIL import Image import src.depth_pro as depth_pro import numpy as np import matplotlib.pyplot as plt import subprocess import spaces import torch import tempfile import os # Run the script to get pretrained models subprocess.run(["bash", "get_pretrained_models.sh"]) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load model and preprocessing transform model, transform = depth_pro.create_model_and_transforms() model = model.to(device) model.eval() def resize_image(image_path, max_size=1536): with Image.open(image_path) as img: # Calculate the new size while maintaining aspect ratio ratio = max_size / max(img.size) new_size = tuple([int(x * ratio) for x in img.size]) # Resize the image img = img.resize(new_size, Image.LANCZOS) # Create a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file: img.save(temp_file, format="PNG") return temp_file.name @spaces.GPU(duration=20) def predict_depth(input_image): temp_file = None try: # Resize the input image temp_file = resize_image(input_image) # Preprocess the image result = depth_pro.load_rgb(temp_file) image = result[0] f_px = result[-1] # Assuming f_px is the last item in the returned tuple image = transform(image) image = image.to(device) # Run inference prediction = model.infer(image, f_px=f_px) depth = prediction["depth"] # Depth in [m] focallength_px = prediction["focallength_px"] # Focal length in pixels # Convert depth to numpy array if it's a torch tensor if isinstance(depth, torch.Tensor): depth = depth.cpu().numpy() # Ensure depth is a 2D numpy array if depth.ndim != 2: depth = depth.squeeze() # Calculate inverse depth inverse_depth = 1.0 / depth # Clip inverse depth to 0-10 range inverse_depth_clipped = np.clip(inverse_depth, 0, 10) # Create a color map plt.figure(figsize=(15.36, 15.36), dpi=100) # Set figure size to 1536x1536 pixels plt.imshow(inverse_depth_clipped, cmap='viridis') plt.colorbar(label='Inverse Depth') plt.title('Predicted Inverse Depth Map') plt.axis('off') # Save the plot to a file output_path = "inverse_depth_map.png" plt.savefig(output_path, dpi=100, bbox_inches='tight', pad_inches=0) plt.close() return output_path, f"Focal length: {focallength_px:.2f} pixels" except Exception as e: return None, f"An error occurred: {str(e)}" finally: # Clean up the temporary file if temp_file and os.path.exists(temp_file): os.remove(temp_file) # Example images example_images = [ "examples/lemur.jpg", ] # Create Gradio interface iface = gr.Interface( fn=predict_depth, inputs=gr.Image(type="filepath"), outputs=[ gr.Image(type="filepath", label="Inverse Depth Map", height=768, width=768), gr.Textbox(label="Focal Length or Error Message") ], title="DepthPro Demo", description="[DepthPro](https://huggingface.co/apple/DepthPro) is a fast metric depth prediction model. Simply upload an image to predict its inverse depth map and focal length. Large images will be automatically resized to 1536x1536 pixels.", examples=example_images ) # Launch the interface iface.launch()