Spaces:
Sleeping
Sleeping
import torch | |
import numpy as np | |
from PIL import Image | |
from transformers import AutoImageProcessor, AutoModelForDepthEstimation | |
import streamlit as st | |
import cv2 | |
# Load model and image processor | |
image_processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf") | |
model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf") | |
# Set the device for model (CUDA if available) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
# Use FP16 if available (half precision for speed) | |
if torch.cuda.is_available(): | |
model = model.half() | |
# Streamlit App | |
st.title("Depth Estimation from Webcam") | |
# Capture image from webcam | |
image_data = st.camera_input("Capture an image") | |
if image_data is not None: | |
# Convert the captured image data to a PIL image | |
image = Image.open(image_data) | |
# Prepare the image for the model | |
inputs = image_processor(images=image, return_tensors="pt").to(device) | |
# Model inference (no gradients needed) | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
predicted_depth = outputs.predicted_depth | |
# Interpolate depth map to match the image's dimensions | |
prediction = torch.nn.functional.interpolate( | |
predicted_depth.unsqueeze(1), | |
size=(image.height, image.width), # Match the image's dimensions | |
mode="bicubic", | |
align_corners=False, | |
) | |
# Convert depth map to numpy for visualization | |
depth_map = prediction.squeeze().cpu().numpy() | |
# Normalize depth map for display (visualization purposes) | |
depth_map_normalized = np.uint8(depth_map / np.max(depth_map) * 255) | |
depth_map_colored = cv2.applyColorMap(depth_map_normalized, cv2.COLORMAP_JET) | |
# Display the original image and the depth map in Streamlit | |
st.image(image, caption="Captured Image", use_column_width=True) | |
st.image(depth_map_colored, caption="Depth Map", use_column_width=True) | |