diff --git a/README.md b/README.md index e45fe55..eedf75f 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,11 @@ as the sum of the different pixel scores. For a given pixel(`po` for the original image and `pm` for the mask, same position) its score will be calculated as follows: +`v` for variance + ``` -S_p = | po - acolor | x (0.5 - pm) +||po - acolor|| - ||v - acolor|| +S_p = (|po - acolor| - v) x (0.5 - pm) ``` it is assumed that the font mask is of values between `0..1` and made as a diff --git a/classify.py b/classify.py index d4fecc3..36aa4e6 100644 --- a/classify.py +++ b/classify.py @@ -1,6 +1,7 @@ import cv2 as cv import numpy as np import h5py as h5 +from rasterizer import text_to_matrix db = None @@ -30,9 +31,8 @@ def extract_bb(img, bb): rot_matrix = cv.getRotationMatrix2D(center, rect[2], 1) rot_img = cv.warpAffine(img, rot_matrix, (img.shape[1], img.shape[0])) # bounding box is now axis aligned, and we can crop it - print(size) cropped = cv.getRectSubPix(rot_img, size, center) - return cropped + return cropped.transpose(1, 0, 2)[::, ::-1, ::] def get_img(index): ''' gets image from database ''' @@ -73,7 +73,25 @@ def get_avg_color(img, mask): avg /= count return avg -def calc_score(img, mask, avg_color): +def get_color_variance(img, mask, avg_color): + ''' Gets color variance under the mask with given avg_color ''' + sx, sy, sw = img.shape + mx, my = mask.shape + if sx != mx or sy != my: + print('Image and mask size doesnt match!') + return None + var = np.zeros(sw, dtype=np.float32) + count = 0.0 + for x in range(sx): + for y in range(sy): + m = mask[x, y] + diff = img[x, y] - avg_color + var += diff.dot(diff) * m + count += m + var /= count + return var + +def calc_score(img, mask, avg_color, var_mag): ''' Calculates the score for each mask with each color ''' @@ -88,8 +106,25 @@ def calc_score(img, mask, avg_color): score = 0.0 for x in range(sx): for y in range(sy): - m = 0.5 - mask[x, y] + m = mask[x, y] - 0.5 diff = img[x, y] - avg_color - mag = np.sqrt(diff.dot(diff)) # calculate magnitude - score += mag * m + mag = var_mag - np.sqrt(diff.dot(diff)) # calculate magnitude + score += mag * m / var_mag return score + +def score_font(char_img, char, font_name): + ''' + Takes a char_img, the wanted character and a font_name/path + and calculates the relevant score + ''' + # default to 128, i think it should be enough and we will probably mostly + # reduce the size anyway, also change from rgb to grayscale + font_img = text_to_matrix(char, 128, font_name)[::, ::, 1] + # resize font_img to match char_img dimensions + dim = [char_img.shape[1], char_img.shape[0]] + mask = cv.resize(font_img, dim, interpolation=cv.INTER_LINEAR) + # get average color + ac = get_avg_color(char_img, mask) + var = get_color_variance(char_img, mask, ac) + var = np.sqrt(var.dot(var)) + return calc_score(char_img, mask, ac, var)