148 lines
4.6 KiB
Python
148 lines
4.6 KiB
Python
import cv2 as cv
|
|
import numpy as np
|
|
import h5py as h5
|
|
from rasterizer import text_to_matrix
|
|
|
|
db = None
|
|
|
|
def init_train():
|
|
''' Default init based on the train set `train.h5` '''
|
|
init('train.h5')
|
|
|
|
def init(path):
|
|
''' initializes the database, must be called before any use '''
|
|
global db
|
|
db = h5.File(path, 'r')
|
|
|
|
def needs_init():
|
|
''' checks if the database has been initialized '''
|
|
if db is None:
|
|
print('db is none, please use init(path_to_db) first!')
|
|
return db is None
|
|
|
|
# Extract letter from a bounding box
|
|
def extract_bb(img, bb):
|
|
''' extracts a bounding box/letter from the given image '''
|
|
# Get the bounding box
|
|
rect = cv.minAreaRect(bb.astype(np.float32).transpose())
|
|
# will be useful later, map center and size to ints
|
|
center, size = tuple(map(int, rect[0])), tuple(map(int, rect[1]))
|
|
# Calculate rotation matrix and rotate the image
|
|
rot_matrix = cv.getRotationMatrix2D(center, rect[2], 1)
|
|
rot_img = cv.warpAffine(img, rot_matrix, (img.shape[1], img.shape[0]))
|
|
# bounding box is now axis aligned, and we can crop it
|
|
cropped = cv.getRectSubPix(rot_img, size, center)
|
|
return cropped.transpose(1, 0, 2)[::, ::-1, ::]
|
|
|
|
def get_img(index):
|
|
''' gets image from database '''
|
|
if needs_init():
|
|
return None
|
|
names = list(db['data'].keys())
|
|
im = names[index]
|
|
return db['data'][im][:]
|
|
|
|
def get_attrs(index):
|
|
''' gets attribute dict from the database '''
|
|
if needs_init():
|
|
return None
|
|
names = list(db['data'].keys())
|
|
im = names[index]
|
|
return db['data'][im].attrs
|
|
|
|
def get_avg_color(img, mask):
|
|
'''
|
|
gets avg color from an image that is underneath a mask,
|
|
img and mask needs to be of same size(in x,y) but img can
|
|
have any third dimension size it want(usually 3 for rgb or 1 for grayscale)
|
|
|
|
mask needs to be of shape(img.width, img.height, 1)
|
|
'''
|
|
sx, sy, sw = img.shape
|
|
mx, my = mask.shape
|
|
if sx != mx or sy != my:
|
|
print('Image and mask size doesnt match!')
|
|
return None
|
|
avg = np.zeros(sw, dtype=np.float32)
|
|
count = 0.0
|
|
for x in range(sx):
|
|
for y in range(sy):
|
|
m = mask[x, y]
|
|
avg += img[x, y] * m
|
|
count += m
|
|
avg /= count
|
|
return avg
|
|
|
|
def get_color_variance(img, mask, avg_color):
|
|
''' Gets color variance under the mask with given avg_color '''
|
|
sx, sy, sw = img.shape
|
|
mx, my = mask.shape
|
|
if sx != mx or sy != my:
|
|
print('Image and mask size doesnt match!')
|
|
return None
|
|
var = np.zeros(sw, dtype=np.float32)
|
|
count = 0.0
|
|
for x in range(sx):
|
|
for y in range(sy):
|
|
m = mask[x, y]
|
|
diff = img[x, y] - avg_color
|
|
var += diff.dot(diff) * m
|
|
count += m
|
|
var /= count
|
|
return var
|
|
|
|
def calc_score(img, mask, avg_color, var_mag):
|
|
'''
|
|
Calculates the score for each mask with each color
|
|
'''
|
|
sx, sy, sw = img.shape
|
|
mx, my = mask.shape
|
|
if sx != mx or sy != my:
|
|
print('Image and mask size doesnt match!')
|
|
return 0.0
|
|
if sw != avg_color.shape[0]:
|
|
print('Image width doesnt match color width!')
|
|
return 0.0
|
|
score = 0.0
|
|
for x in range(sx):
|
|
for y in range(sy):
|
|
m = mask[x, y] - 0.5
|
|
diff = img[x, y] - avg_color
|
|
mag = var_mag - np.sqrt(diff.dot(diff)) # calculate magnitude
|
|
score += mag * m / var_mag
|
|
return score
|
|
|
|
def generate_subimg(img, c_avg, nc_avg):
|
|
sx, sy, sw = img.shape
|
|
if sw != c_avg.shape[0]:
|
|
print('Image depth doesnt match color!')
|
|
# can be bool actually but float32 because why not
|
|
res = np.zeros([sx, sy], dtype=np.float32)
|
|
for x in range(sx):
|
|
for y in range(sy):
|
|
da = img[x, y] - c_avg
|
|
mag_a = da.dot(da)
|
|
dn = img[x, y] - nc_avg
|
|
mag_n = dn.dot(dn)
|
|
res[x, y] = 1.0 if mag_a < mag_n else 0.0
|
|
return res
|
|
|
|
def score_font(char_img, char, font_name):
|
|
'''
|
|
Takes a char_img, the wanted character and a font_name/path
|
|
and calculates the relevant score
|
|
'''
|
|
# default to 128, i think it should be enough and we will probably mostly
|
|
# reduce the size anyway, also change from rgb to grayscale
|
|
font_img = text_to_matrix(char, 128, font_name)[::, ::, 1]
|
|
# resize font_img to match char_img dimensions
|
|
dim = [char_img.shape[1], char_img.shape[0]]
|
|
mask = cv.resize(font_img, dim, interpolation=cv.INTER_LINEAR)
|
|
rv = np.ones(mask.shape, dtype=np.float32) - mask
|
|
# get average color
|
|
ac = get_avg_color(char_img, mask)
|
|
rac = get_avg_color(char_img, rv)
|
|
diff = ac - rac
|
|
mag = diff.dot(diff)
|
|
return mag
|
|
|