try different scoring approach and attempt an image construction based on the avg colors of the image

it seems i am missing something maybe?
Add color variance functions and such
2024-01-29 19:50:02 +02:00 · 2024-01-29 19:45:11 +02:00 · 2024-01-26 15:27:40 +02:00 · 2024-01-26 15:27:10 +02:00
3 changed files with 68 additions and 8 deletions
--- a/README.md
+++ b/README.md
@ -14,8 +14,11 @@ as the sum of the different pixel scores.
 For a given pixel(`po` for the original image and `pm` for the mask, same position)
 its score will be calculated as follows:
 `v` for variance
 ```
-S_p = | po - acolor | x (0.5 - pm)
+||po - acolor|| - ||v - acolor||
 S_p = (|po - acolor| - v) x (0.5 - pm)
 ```
 it is assumed that the font mask is of values between `0..1` and made as a
@ -26,6 +29,10 @@ variations of where the letter should be, while also taking into
 consideration the fact that the background should be of different
 color.
 I seem to be missing something in the original idea, as some fonts gets better
 score on incorrect guesses with bigger color variance, and others get
 the smallest color variance on some other fonts.
 ## Potential improvements
 Some potential improvements would be:
--- a/classify.py
+++ b/classify.py
@ -1,6 +1,7 @@
 import cv2 as cv
 import numpy as np
 import h5py as h5
 from rasterizer import text_to_matrix
 db = None
@ -30,9 +31,8 @@ def extract_bb(img, bb):
    rot_matrix = cv.getRotationMatrix2D(center, rect[2], 1)
    rot_img = cv.warpAffine(img, rot_matrix, (img.shape[1], img.shape[0]))
    # bounding box is now axis aligned, and we can crop it
    print(size)
    cropped = cv.getRectSubPix(rot_img, size, center)
-    return cropped
+    return cropped.transpose(1, 0, 2)[::, ::-1, ::]
 def get_img(index):
    ''' gets image from database '''
@ -73,7 +73,25 @@ def get_avg_color(img, mask):
    avg /= count
    return avg
-def calc_score(img, mask, avg_color):
+def get_color_variance(img, mask, avg_color):
    ''' Gets color variance under the mask with given avg_color '''
    sx, sy, sw = img.shape
    mx, my = mask.shape
    if sx != mx or sy != my:
        print('Image and mask size doesnt match!')
        return None
    var = np.zeros(sw, dtype=np.float32)
    count = 0.0
    for x in range(sx):
        for y in range(sy):
            m = mask[x, y]
            diff = img[x, y] - avg_color
            var += diff.dot(diff) * m
            count += m
    var /= count
    return var
 def calc_score(img, mask, avg_color, var_mag):
    '''
    Calculates the score for each mask with each color
    '''
@ -88,8 +106,43 @@ def calc_score(img, mask, avg_color):
    score = 0.0
    for x in range(sx):
        for y in range(sy):
-            m = 0.5 - mask[x, y]
+            m = mask[x, y] - 0.5
            diff = img[x, y] - avg_color
-            mag = np.sqrt(diff.dot(diff)) # calculate magnitude
+            mag = var_mag - np.sqrt(diff.dot(diff)) # calculate magnitude
-            score += mag * m
+            score += mag * m / var_mag
    return score
 def generate_subimg(img, c_avg, nc_avg):
    sx, sy, sw = img.shape
    if sw != c_avg.shape[0]:
        print('Image depth doesnt match color!')
    # can be bool actually but float32 because why not
    res = np.zeros([sx, sy], dtype=np.float32) 
    for x in range(sx):
        for y in range(sy):
            da = img[x, y] - c_avg
            mag_a = da.dot(da)
            dn = img[x, y] - nc_avg
            mag_n = dn.dot(dn)
            res[x, y] = 1.0 if mag_a < mag_n else 0.0
    return res
 def score_font(char_img, char, font_name):
    '''
    Takes a char_img, the wanted character and a font_name/path
    and calculates the relevant score
    '''
    # default to 128, i think it should be enough and we will probably mostly
    # reduce the size anyway, also change from rgb to grayscale
    font_img = text_to_matrix(char, 128, font_name)[::, ::, 1]
    # resize font_img to match char_img dimensions
    dim = [char_img.shape[1], char_img.shape[0]]
    mask = cv.resize(font_img, dim, interpolation=cv.INTER_LINEAR)
    rv = np.ones(mask.shape, dtype=np.float32) - mask
    # get average color
    ac = get_avg_color(char_img, mask)
    rac = get_avg_color(char_img, rv)
    diff = ac - rac
    mag = diff.dot(diff)
    return mag
--- a/rasterizer.py
+++ b/rasterizer.py
@ -3,7 +3,7 @@ import numpy as np
 def text_to_matrix(text, size, font):
    pil_font = ImageFont.truetype(font, size=size // len(text), encoding="unic")
-    canvas = Image.new('RGB', [size + 20, size + 20], (255, 255, 255))
+    canvas = Image.new('RGB', [size * 2, size * 2], (255, 255, 255))
    draw = ImageDraw.Draw(canvas)
    black = "#000000"
    draw.text((10, 10), text, font=pil_font, fill=black)
Author	SHA1	Message	Date
Rusty Striker	b1b77c0f24	try different scoring approach and attempt an image construction based on the avg colors of the image	2024-01-29 19:50:02 +02:00
Rusty Striker	51a0efee01	it seems i am missing something maybe?	2024-01-29 19:45:11 +02:00
Rusty Striker	5649a2d4cd	Add color variance functions and such	2024-01-26 15:27:40 +02:00
Rusty Striker	83eb229189	fix some fonts not rendering fully(getting cropped)	2024-01-26 15:27:10 +02:00