import cv2 as cv import numpy as np import h5py as h5 from rasterizer import text_to_matrix db = None def init_train(): ''' Default init based on the train set `train.h5` ''' init('train.h5') def init(path): ''' initializes the database, must be called before any use ''' global db db = h5.File(path, 'r') def needs_init(): ''' checks if the database has been initialized ''' if db is None: print('db is none, please use init(path_to_db) first!') return db is None # Extract letter from a bounding box def extract_bb(img, bb): ''' extracts a bounding box/letter from the given image ''' # Get the bounding box rect = cv.minAreaRect(bb.astype(np.float32).transpose()) # will be useful later, map center and size to ints center, size = tuple(map(int, rect[0])), tuple(map(int, rect[1])) # Calculate rotation matrix and rotate the image rot_matrix = cv.getRotationMatrix2D(center, rect[2], 1) rot_img = cv.warpAffine(img, rot_matrix, (img.shape[1], img.shape[0])) # bounding box is now axis aligned, and we can crop it cropped = cv.getRectSubPix(rot_img, size, center) return cropped.transpose(1, 0, 2)[::, ::-1, ::] def get_img(index): ''' gets image from database ''' if needs_init(): return None names = list(db['data'].keys()) im = names[index] return db['data'][im][:] def get_attrs(index): ''' gets attribute dict from the database ''' if needs_init(): return None names = list(db['data'].keys()) im = names[index] return db['data'][im].attrs def get_avg_color(img, mask): ''' gets avg color from an image that is underneath a mask, img and mask needs to be of same size(in x,y) but img can have any third dimension size it want(usually 3 for rgb or 1 for grayscale) mask needs to be of shape(img.width, img.height, 1) ''' sx, sy, sw = img.shape mx, my = mask.shape if sx != mx or sy != my: print('Image and mask size doesnt match!') return None avg = np.zeros(sw, dtype=np.float32) count = 0.0 for x in range(sx): for y in range(sy): m = mask[x, y] avg += img[x, y] * m count += m avg /= count return avg def get_color_variance(img, mask, avg_color): ''' Gets color variance under the mask with given avg_color ''' sx, sy, sw = img.shape mx, my = mask.shape if sx != mx or sy != my: print('Image and mask size doesnt match!') return None var = np.zeros(sw, dtype=np.float32) count = 0.0 for x in range(sx): for y in range(sy): m = mask[x, y] diff = img[x, y] - avg_color var += diff.dot(diff) * m count += m var /= count return var def calc_score(img, mask, avg_color, var_mag): ''' Calculates the score for each mask with each color ''' sx, sy, sw = img.shape mx, my = mask.shape if sx != mx or sy != my: print('Image and mask size doesnt match!') return 0.0 if sw != avg_color.shape[0]: print('Image width doesnt match color width!') return 0.0 score = 0.0 for x in range(sx): for y in range(sy): m = mask[x, y] - 0.5 diff = img[x, y] - avg_color mag = var_mag - np.sqrt(diff.dot(diff)) # calculate magnitude score += mag * m / var_mag return score def generate_subimg(img, c_avg, nc_avg): sx, sy, sw = img.shape if sw != c_avg.shape[0]: print('Image depth doesnt match color!') # can be bool actually but float32 because why not res = np.zeros([sx, sy], dtype=np.float32) for x in range(sx): for y in range(sy): da = img[x, y] - c_avg mag_a = da.dot(da) dn = img[x, y] - nc_avg mag_n = dn.dot(dn) res[x, y] = 1.0 if mag_a < mag_n else 0.0 return res def score_font(char_img, char, font_name): ''' Takes a char_img, the wanted character and a font_name/path and calculates the relevant score ''' # default to 128, i think it should be enough and we will probably mostly # reduce the size anyway, also change from rgb to grayscale font_img = text_to_matrix(char, 128, font_name)[::, ::, 1] # resize font_img to match char_img dimensions dim = [char_img.shape[1], char_img.shape[0]] mask = cv.resize(font_img, dim, interpolation=cv.INTER_LINEAR) rv = np.ones(mask.shape, dtype=np.float32) - mask # get average color ac = get_avg_color(char_img, mask) rac = get_avg_color(char_img, rv) diff = ac - rac mag = diff.dot(diff) return mag