Compare commits

...

4 commits

3 changed files with 68 additions and 8 deletions

View file

@ -14,8 +14,11 @@ as the sum of the different pixel scores.
For a given pixel(`po` for the original image and `pm` for the mask, same position) For a given pixel(`po` for the original image and `pm` for the mask, same position)
its score will be calculated as follows: its score will be calculated as follows:
`v` for variance
``` ```
S_p = | po - acolor | x (0.5 - pm) ||po - acolor|| - ||v - acolor||
S_p = (|po - acolor| - v) x (0.5 - pm)
``` ```
it is assumed that the font mask is of values between `0..1` and made as a it is assumed that the font mask is of values between `0..1` and made as a
@ -26,6 +29,10 @@ variations of where the letter should be, while also taking into
consideration the fact that the background should be of different consideration the fact that the background should be of different
color. color.
I seem to be missing something in the original idea, as some fonts gets better
score on incorrect guesses with bigger color variance, and others get
the smallest color variance on some other fonts.
## Potential improvements ## Potential improvements
Some potential improvements would be: Some potential improvements would be:

View file

@ -1,6 +1,7 @@
import cv2 as cv import cv2 as cv
import numpy as np import numpy as np
import h5py as h5 import h5py as h5
from rasterizer import text_to_matrix
db = None db = None
@ -30,9 +31,8 @@ def extract_bb(img, bb):
rot_matrix = cv.getRotationMatrix2D(center, rect[2], 1) rot_matrix = cv.getRotationMatrix2D(center, rect[2], 1)
rot_img = cv.warpAffine(img, rot_matrix, (img.shape[1], img.shape[0])) rot_img = cv.warpAffine(img, rot_matrix, (img.shape[1], img.shape[0]))
# bounding box is now axis aligned, and we can crop it # bounding box is now axis aligned, and we can crop it
print(size)
cropped = cv.getRectSubPix(rot_img, size, center) cropped = cv.getRectSubPix(rot_img, size, center)
return cropped return cropped.transpose(1, 0, 2)[::, ::-1, ::]
def get_img(index): def get_img(index):
''' gets image from database ''' ''' gets image from database '''
@ -73,7 +73,25 @@ def get_avg_color(img, mask):
avg /= count avg /= count
return avg return avg
def calc_score(img, mask, avg_color): def get_color_variance(img, mask, avg_color):
''' Gets color variance under the mask with given avg_color '''
sx, sy, sw = img.shape
mx, my = mask.shape
if sx != mx or sy != my:
print('Image and mask size doesnt match!')
return None
var = np.zeros(sw, dtype=np.float32)
count = 0.0
for x in range(sx):
for y in range(sy):
m = mask[x, y]
diff = img[x, y] - avg_color
var += diff.dot(diff) * m
count += m
var /= count
return var
def calc_score(img, mask, avg_color, var_mag):
''' '''
Calculates the score for each mask with each color Calculates the score for each mask with each color
''' '''
@ -88,8 +106,43 @@ def calc_score(img, mask, avg_color):
score = 0.0 score = 0.0
for x in range(sx): for x in range(sx):
for y in range(sy): for y in range(sy):
m = 0.5 - mask[x, y] m = mask[x, y] - 0.5
diff = img[x, y] - avg_color diff = img[x, y] - avg_color
mag = np.sqrt(diff.dot(diff)) # calculate magnitude mag = var_mag - np.sqrt(diff.dot(diff)) # calculate magnitude
score += mag * m score += mag * m / var_mag
return score return score
def generate_subimg(img, c_avg, nc_avg):
sx, sy, sw = img.shape
if sw != c_avg.shape[0]:
print('Image depth doesnt match color!')
# can be bool actually but float32 because why not
res = np.zeros([sx, sy], dtype=np.float32)
for x in range(sx):
for y in range(sy):
da = img[x, y] - c_avg
mag_a = da.dot(da)
dn = img[x, y] - nc_avg
mag_n = dn.dot(dn)
res[x, y] = 1.0 if mag_a < mag_n else 0.0
return res
def score_font(char_img, char, font_name):
'''
Takes a char_img, the wanted character and a font_name/path
and calculates the relevant score
'''
# default to 128, i think it should be enough and we will probably mostly
# reduce the size anyway, also change from rgb to grayscale
font_img = text_to_matrix(char, 128, font_name)[::, ::, 1]
# resize font_img to match char_img dimensions
dim = [char_img.shape[1], char_img.shape[0]]
mask = cv.resize(font_img, dim, interpolation=cv.INTER_LINEAR)
rv = np.ones(mask.shape, dtype=np.float32) - mask
# get average color
ac = get_avg_color(char_img, mask)
rac = get_avg_color(char_img, rv)
diff = ac - rac
mag = diff.dot(diff)
return mag

View file

@ -3,7 +3,7 @@ import numpy as np
def text_to_matrix(text, size, font): def text_to_matrix(text, size, font):
pil_font = ImageFont.truetype(font, size=size // len(text), encoding="unic") pil_font = ImageFont.truetype(font, size=size // len(text), encoding="unic")
canvas = Image.new('RGB', [size + 20, size + 20], (255, 255, 255)) canvas = Image.new('RGB', [size * 2, size * 2], (255, 255, 255))
draw = ImageDraw.Draw(canvas) draw = ImageDraw.Draw(canvas)
black = "#000000" black = "#000000"
draw.text((10, 10), text, font=pil_font, fill=black) draw.text((10, 10), text, font=pil_font, fill=black)