File size: 2,830 Bytes
13607a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
from PIL import Image as PILImage
from transformers import AutoModel, AutoTokenizer, AutoImageProcessor
import re
import cv2
import numpy as np
class Utils():
def xywh2xyxy(b):
b[..., 2:] += b[..., :2]
return b
def bbox_to_sbbox(bbox):
# xyxy in [0, 1]
assert len(bbox) == 4
bbox = (np.asarray(bbox) * 1000).astype(np.int16)
bbox = np.clip(bbox, 0, 999)
bbox = " ".join([f"<bin_{i}>" for i in bbox])
return bbox
def sbbox_to_bbox(sbbox, set_default: bool = False):
sbbox = [re.findall(r"<bin_(\d+)>", s)[:4] for s in sbbox.split("region:")]
bbox = np.asarray([s for s in sbbox if len(s) >= 4], dtype=int)
if set_default and not len(bbox):
bbox = np.asarray([0, 0, 1000, 1000])
bbox = np.clip(bbox / 1000, 1e-5, 1 - 1e-5)
return bbox.reshape(-1, 4)
def make_dialog_context(dialog: list, text_human: str = None) -> str:
# dialog: [("pass me an apple.", "which apple do you want?"), ...]
context = "".join([f"human: {d[0]}\nagent: {d[1]}\n" for d in dialog])
if text_human is not None:
context += f"human: {text_human}"
return context
def show_mask(image: PILImage.Image, bboxes=None, masks=None, show_id=False, text_size=1) -> PILImage.Image:
""" 给图片画上mask: 只更改被mask标记部分的rgb值. """
import colorsys
colors = [tuple(int(c * 255) for c in colorsys.hsv_to_rgb(i * 1.0 / 36, 1, 1)) for i in range(36)]
size = image.size
image = np.asarray(image)
if bboxes is not None:
bboxes = np.array(bboxes).reshape(-1, 4)
for k, bbox in enumerate(bboxes):
bbox = (np.asarray(bbox) * np.asarray([*size, *size])).astype(int)
image = cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), tuple(colors[k]), thickness=2)
if show_id:
for k, bbox in enumerate(bboxes):
bbox = (np.asarray(bbox) * np.asarray([*size, *size])).astype(int)
image = cv2.putText(image, str(k), tuple(bbox[:2] + np.array([2, 28 * text_size])), cv2.FONT_HERSHEY_SIMPLEX, text_size, (255, 255, 255), 2, cv2.LINE_AA)
image = cv2.putText(image, str(k), tuple(bbox[:2] + np.array([2, 28 * text_size])), cv2.FONT_HERSHEY_SIMPLEX, text_size, tuple(colors[k%len(colors)]), 1, cv2.LINE_AA)
if masks is not None:
for k, mask in enumerate(masks):
mask_color = (mask[..., None] * colors[k%len(colors)][:3]).astype(np.uint8)
image_mask = cv2.addWeighted(mask_color, 0.5, image * mask[..., None], 0.5, 0)
image = cv2.add(image * ~mask[..., None], image_mask)
return PILImage.fromarray(image)
|