3 changed files with 8 additions and 68 deletions
--- a/README.md
+++ b/README.md
@ -14,11 +14,8 @@ as the sum of the different pixel scores.
 For a given pixel(`po` for the original image and `pm` for the mask, same position)
 its score will be calculated as follows:
 `v` for variance
 ```
-||po - acolor|| - ||v - acolor||
+S_p = | po - acolor | x (0.5 - pm)
 S_p = (|po - acolor| - v) x (0.5 - pm)
 ```
 it is assumed that the font mask is of values between `0..1` and made as a
@ -29,10 +26,6 @@ variations of where the letter should be, while also taking into
 consideration the fact that the background should be of different
 color.
 I seem to be missing something in the original idea, as some fonts gets better
 score on incorrect guesses with bigger color variance, and others get
 the smallest color variance on some other fonts.
 ## Potential improvements
 Some potential improvements would be:
--- a/classify.py
+++ b/classify.py
@ -1,7 +1,6 @@
 import cv2 as cv
 import numpy as np
 import h5py as h5
 from rasterizer import text_to_matrix
 db = None
@ -31,8 +30,9 @@ def extract_bb(img, bb):
    rot_matrix = cv.getRotationMatrix2D(center, rect[2], 1)
    rot_img = cv.warpAffine(img, rot_matrix, (img.shape[1], img.shape[0]))
    # bounding box is now axis aligned, and we can crop it
    print(size)
    cropped = cv.getRectSubPix(rot_img, size, center)
-    return cropped.transpose(1, 0, 2)[::, ::-1, ::]
+    return cropped
 def get_img(index):
    ''' gets image from database '''
@ -73,25 +73,7 @@ def get_avg_color(img, mask):
    avg /= count
    return avg
-def get_color_variance(img, mask, avg_color):
+def calc_score(img, mask, avg_color):
    ''' Gets color variance under the mask with given avg_color '''
    sx, sy, sw = img.shape
    mx, my = mask.shape
    if sx != mx or sy != my:
        print('Image and mask size doesnt match!')
        return None
    var = np.zeros(sw, dtype=np.float32)
    count = 0.0
    for x in range(sx):
        for y in range(sy):
            m = mask[x, y]
            diff = img[x, y] - avg_color
            var += diff.dot(diff) * m
            count += m
    var /= count
    return var
 def calc_score(img, mask, avg_color, var_mag):
    '''
    Calculates the score for each mask with each color
    '''
@ -106,43 +88,8 @@ def calc_score(img, mask, avg_color, var_mag):
    score = 0.0
    for x in range(sx):
        for y in range(sy):
-            m = mask[x, y] - 0.5
+            m = 0.5 - mask[x, y]
            diff = img[x, y] - avg_color
-            mag = var_mag - np.sqrt(diff.dot(diff)) # calculate magnitude
+            mag = np.sqrt(diff.dot(diff)) # calculate magnitude
-            score += mag * m / var_mag
+            score += mag * m
    return score
 def generate_subimg(img, c_avg, nc_avg):
    sx, sy, sw = img.shape
    if sw != c_avg.shape[0]:
        print('Image depth doesnt match color!')
    # can be bool actually but float32 because why not
    res = np.zeros([sx, sy], dtype=np.float32) 
    for x in range(sx):
        for y in range(sy):
            da = img[x, y] - c_avg
            mag_a = da.dot(da)
            dn = img[x, y] - nc_avg
            mag_n = dn.dot(dn)
            res[x, y] = 1.0 if mag_a < mag_n else 0.0
    return res
 def score_font(char_img, char, font_name):
    '''
    Takes a char_img, the wanted character and a font_name/path
    and calculates the relevant score
    '''
    # default to 128, i think it should be enough and we will probably mostly
    # reduce the size anyway, also change from rgb to grayscale
    font_img = text_to_matrix(char, 128, font_name)[::, ::, 1]
    # resize font_img to match char_img dimensions
    dim = [char_img.shape[1], char_img.shape[0]]
    mask = cv.resize(font_img, dim, interpolation=cv.INTER_LINEAR)
    rv = np.ones(mask.shape, dtype=np.float32) - mask
    # get average color
    ac = get_avg_color(char_img, mask)
    rac = get_avg_color(char_img, rv)
    diff = ac - rac
    mag = diff.dot(diff)
    return mag
--- a/rasterizer.py
+++ b/rasterizer.py
@ -3,7 +3,7 @@ import numpy as np
 def text_to_matrix(text, size, font):
    pil_font = ImageFont.truetype(font, size=size // len(text), encoding="unic")
-    canvas = Image.new('RGB', [size * 2, size * 2], (255, 255, 255))
+    canvas = Image.new('RGB', [size + 20, size + 20], (255, 255, 255))
    draw = ImageDraw.Draw(canvas)
    black = "#000000"
    draw.text((10, 10), text, font=pil_font, fill=black)