# -*- encoding: utf-8 -*- # @Author: SWHL # @Contact: liekkaskono@163.com import math import random from pathlib import Path from typing import List, Optional, Tuple, Union import cv2 import numpy as np from PIL import Image, ImageDraw, ImageFont from .load_image import LoadImage root_dir = Path(__file__).resolve().parent InputType = Union[str, np.ndarray, bytes, Path, Image.Image] class VisRes: def __init__(self, text_score: float = 0.5): self.text_score = text_score self.load_img = LoadImage() def __call__( self, img_content: InputType, dt_boxes: np.ndarray, txts: Optional[Union[List[str], Tuple[str]]] = None, scores: Optional[Tuple[float]] = None, font_path: Optional[str] = None, ) -> np.ndarray: if txts is None: return self.draw_dt_boxes(img_content, dt_boxes) return self.draw_ocr_box_txt(img_content, dt_boxes, txts, scores, font_path) def draw_dt_boxes(self, img_content: InputType, dt_boxes: np.ndarray) -> np.ndarray: img = self.load_img(img_content) for idx, box in enumerate(dt_boxes): color = self.get_random_color() points = np.array(box) cv2.polylines(img, np.int32([points]), 1, color=color, thickness=1) start_point = round(points[0][0]), round(points[0][1]) cv2.putText( img, f"{idx}", start_point, cv2.FONT_HERSHEY_SIMPLEX, 1, color, 3 ) return img def draw_ocr_box_txt( self, img_content: InputType, dt_boxes: np.ndarray, txts: Union[List[str], Tuple[str]], scores: Optional[Tuple[float]] = None, font_path: Optional[str] = None, ) -> np.ndarray: font_path = self.get_font_path(font_path) image = Image.fromarray(self.load_img(img_content)) h, w = image.height, image.width if image.mode == "L": image = image.convert("RGB") img_left = image.copy() img_right = Image.new("RGB", (w, h), (255, 255, 255)) random.seed(0) draw_left = ImageDraw.Draw(img_left) draw_right = ImageDraw.Draw(img_right) for idx, (box, txt) in enumerate(zip(dt_boxes, txts)): if scores is not None and float(scores[idx]) < self.text_score: continue color = self.get_random_color() box_list = np.array(box).reshape(8).tolist() draw_left.polygon(box_list, fill=color) draw_right.polygon(box_list, outline=color) box_height = self.get_box_height(box) box_width = self.get_box_width(box) if box_height > 2 * box_width: font_size = max(int(box_width * 0.9), 10) font = ImageFont.truetype(font_path, font_size, encoding="utf-8") cur_y = box[0][1] for c in txt: draw_right.text( (box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font ) cur_y += self.get_char_size(font, c) else: font_size = max(int(box_height * 0.8), 10) font = ImageFont.truetype(font_path, font_size, encoding="utf-8") draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font) img_left = Image.blend(image, img_left, 0.5) img_show = Image.new("RGB", (w * 2, h), (255, 255, 255)) img_show.paste(img_left, (0, 0, w, h)) img_show.paste(img_right, (w, 0, w * 2, h)) return np.array(img_show) @staticmethod def get_font_path(font_path: Optional[Union[str, Path]] = None) -> str: if font_path is None or not Path(font_path).exists(): raise FileNotFoundError( f"The {font_path} does not exists! \n" f"You could download the file in the https://drive.google.com/file/d/1evWVX38EFNwTq_n5gTFgnlv8tdaNcyIA/view?usp=sharing" ) return str(font_path) @staticmethod def get_random_color() -> Tuple[int, int, int]: return ( random.randint(0, 255), random.randint(0, 255), random.randint(0, 255), ) @staticmethod def get_box_height(box: List[List[float]]) -> float: return math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2) @staticmethod def get_box_width(box: List[List[float]]) -> float: return math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2) @staticmethod def get_char_size(font, char_str: str) -> float: # compatible with Pillow v9 and v10. if hasattr(font, "getsize"): get_size_func = getattr(font, "getsize") return get_size_func(char_str)[1] if hasattr(font, "getlength"): get_size_func = getattr(font, "getlength") return get_size_func(char_str) raise ValueError( "The Pillow ImageFont instance has not getsize or getlength func." )
Memory