Compare commits

...

4 commits

3 changed files with 68 additions and 8 deletions

View file

@ -14,8 +14,11 @@ as the sum of the different pixel scores.
For a given pixel(`po` for the original image and `pm` for the mask, same position)
its score will be calculated as follows:
`v` for variance
```
S_p = | po - acolor | x (0.5 - pm)
||po - acolor|| - ||v - acolor||
S_p = (|po - acolor| - v) x (0.5 - pm)
```
it is assumed that the font mask is of values between `0..1` and made as a
@ -26,6 +29,10 @@ variations of where the letter should be, while also taking into
consideration the fact that the background should be of different
color.
I seem to be missing something in the original idea, as some fonts gets better
score on incorrect guesses with bigger color variance, and others get
the smallest color variance on some other fonts.
## Potential improvements
Some potential improvements would be:

View file

@ -1,6 +1,7 @@
import cv2 as cv
import numpy as np
import h5py as h5
from rasterizer import text_to_matrix
db = None
@ -30,9 +31,8 @@ def extract_bb(img, bb):
rot_matrix = cv.getRotationMatrix2D(center, rect[2], 1)
rot_img = cv.warpAffine(img, rot_matrix, (img.shape[1], img.shape[0]))
# bounding box is now axis aligned, and we can crop it
print(size)
cropped = cv.getRectSubPix(rot_img, size, center)
return cropped
return cropped.transpose(1, 0, 2)[::, ::-1, ::]
def get_img(index):
''' gets image from database '''
@ -73,7 +73,25 @@ def get_avg_color(img, mask):
avg /= count
return avg
def calc_score(img, mask, avg_color):
def get_color_variance(img, mask, avg_color):
''' Gets color variance under the mask with given avg_color '''
sx, sy, sw = img.shape
mx, my = mask.shape
if sx != mx or sy != my:
print('Image and mask size doesnt match!')
return None
var = np.zeros(sw, dtype=np.float32)
count = 0.0
for x in range(sx):
for y in range(sy):
m = mask[x, y]
diff = img[x, y] - avg_color
var += diff.dot(diff) * m
count += m
var /= count
return var
def calc_score(img, mask, avg_color, var_mag):
'''
Calculates the score for each mask with each color
'''
@ -88,8 +106,43 @@ def calc_score(img, mask, avg_color):
score = 0.0
for x in range(sx):
for y in range(sy):
m = 0.5 - mask[x, y]
m = mask[x, y] - 0.5
diff = img[x, y] - avg_color
mag = np.sqrt(diff.dot(diff)) # calculate magnitude
score += mag * m
mag = var_mag - np.sqrt(diff.dot(diff)) # calculate magnitude
score += mag * m / var_mag
return score
def generate_subimg(img, c_avg, nc_avg):
sx, sy, sw = img.shape
if sw != c_avg.shape[0]:
print('Image depth doesnt match color!')
# can be bool actually but float32 because why not
res = np.zeros([sx, sy], dtype=np.float32)
for x in range(sx):
for y in range(sy):
da = img[x, y] - c_avg
mag_a = da.dot(da)
dn = img[x, y] - nc_avg
mag_n = dn.dot(dn)
res[x, y] = 1.0 if mag_a < mag_n else 0.0
return res
def score_font(char_img, char, font_name):
'''
Takes a char_img, the wanted character and a font_name/path
and calculates the relevant score
'''
# default to 128, i think it should be enough and we will probably mostly
# reduce the size anyway, also change from rgb to grayscale
font_img = text_to_matrix(char, 128, font_name)[::, ::, 1]
# resize font_img to match char_img dimensions
dim = [char_img.shape[1], char_img.shape[0]]
mask = cv.resize(font_img, dim, interpolation=cv.INTER_LINEAR)
rv = np.ones(mask.shape, dtype=np.float32) - mask
# get average color
ac = get_avg_color(char_img, mask)
rac = get_avg_color(char_img, rv)
diff = ac - rac
mag = diff.dot(diff)
return mag

View file

@ -3,7 +3,7 @@ import numpy as np
def text_to_matrix(text, size, font):
pil_font = ImageFont.truetype(font, size=size // len(text), encoding="unic")
canvas = Image.new('RGB', [size + 20, size + 20], (255, 255, 255))
canvas = Image.new('RGB', [size * 2, size * 2], (255, 255, 255))
draw = ImageDraw.Draw(canvas)
black = "#000000"
draw.text((10, 10), text, font=pil_font, fill=black)