computer_vision_project/classify.py

import cv2 as cv
import numpy as np
import h5py as h5
from rasterizer import text_to_matrix

db = None

def init_train():
    ''' Default init based on the train set `train.h5` '''
    init('train.h5')

def init(path):
    ''' initializes the database, must be called before any use '''
    global db
    db = h5.File(path, 'r')

def needs_init():
    ''' checks if the database has been initialized '''
    if db is None:
        print('db is none, please use init(path_to_db) first!')
    return db is None

# Extract letter from a bounding box
def extract_bb(img, bb):
    ''' extracts a bounding box/letter from the given image '''
    # Get the bounding box
    rect = cv.minAreaRect(bb.astype(np.float32).transpose())
    # will be useful later, map center and size to ints
    center, size = tuple(map(int, rect[0])), tuple(map(int, rect[1]))
    # Calculate rotation matrix and rotate the image
    rot_matrix = cv.getRotationMatrix2D(center, rect[2], 1)
    rot_img = cv.warpAffine(img, rot_matrix, (img.shape[1], img.shape[0]))
    # bounding box is now axis aligned, and we can crop it
    cropped = cv.getRectSubPix(rot_img, size, center)
    return cropped.transpose(1, 0, 2)[::, ::-1, ::]

def get_img(index):
    ''' gets image from database '''
    if needs_init():
        return None
    names = list(db['data'].keys())
    im = names[index]
    return db['data'][im][:]

def get_attrs(index):
    ''' gets attribute dict from the database '''
    if needs_init():
        return None
    names = list(db['data'].keys())
    im = names[index]
    return db['data'][im].attrs

def get_avg_color(img, mask):
    '''
    gets avg color from an image that is underneath a mask,
    img and mask needs to be of same size(in x,y) but img can
    have any third dimension size it want(usually 3 for rgb or 1 for grayscale)

    mask needs to be of shape(img.width, img.height, 1)
    '''
    sx, sy, sw = img.shape
    mx, my = mask.shape
    if sx != mx or sy != my:
        print('Image and mask size doesnt match!')
        return None
    avg = np.zeros(sw, dtype=np.float32)
    count = 0.0
    for x in range(sx):
        for y in range(sy):
            m = mask[x, y]
            avg += img[x, y] * m
            count += m
    avg /= count
    return avg

def get_color_variance(img, mask, avg_color):
    ''' Gets color variance under the mask with given avg_color '''
    sx, sy, sw = img.shape
    mx, my = mask.shape
    if sx != mx or sy != my:
        print('Image and mask size doesnt match!')
        return None
    var = np.zeros(sw, dtype=np.float32)
    count = 0.0
    for x in range(sx):
        for y in range(sy):
            m = mask[x, y]
            diff = img[x, y] - avg_color
            var += diff.dot(diff) * m
            count += m
    var /= count
    return var

def calc_score(img, mask, avg_color, var_mag):
    '''
    Calculates the score for each mask with each color
    '''
    sx, sy, sw = img.shape
    mx, my = mask.shape
    if sx != mx or sy != my:
        print('Image and mask size doesnt match!')
        return 0.0
    if sw != avg_color.shape[0]:
        print('Image width doesnt match color width!')
        return 0.0
    score = 0.0
    for x in range(sx):
        for y in range(sy):
            m = mask[x, y] - 0.5
            diff = img[x, y] - avg_color
            mag = var_mag - np.sqrt(diff.dot(diff)) # calculate magnitude
            score += mag * m / var_mag
    return score

def generate_subimg(img, c_avg, nc_avg):
    sx, sy, sw = img.shape
    if sw != c_avg.shape[0]:
        print('Image depth doesnt match color!')
    # can be bool actually but float32 because why not
    res = np.zeros([sx, sy], dtype=np.float32)
    for x in range(sx):
        for y in range(sy):
            da = img[x, y] - c_avg
            mag_a = da.dot(da)
            dn = img[x, y] - nc_avg
            mag_n = dn.dot(dn)
            res[x, y] = 1.0 if mag_a < mag_n else 0.0
    return res

def score_font(char_img, char, font_name):
    '''
    Takes a char_img, the wanted character and a font_name/path
    and calculates the relevant score
    '''
    # default to 128, i think it should be enough and we will probably mostly
    # reduce the size anyway, also change from rgb to grayscale
    font_img = text_to_matrix(char, 128, font_name)[::, ::, 1]
    # resize font_img to match char_img dimensions
    dim = [char_img.shape[1], char_img.shape[0]]
    mask = cv.resize(font_img, dim, interpolation=cv.INTER_LINEAR)
    rv = np.ones(mask.shape, dtype=np.float32) - mask
    # get average color
    ac = get_avg_color(char_img, mask)
    rac = get_avg_color(char_img, rv)
    diff = ac - rac
    mag = diff.dot(diff)
    return mag