try different scoring approach and attempt an image construction based on the avg colors of the image

it seems i am missing something maybe?
Add color variance functions and such
2024-01-29 19:50:02 +02:00 · 2024-01-29 19:45:11 +02:00 · 2024-01-26 15:27:40 +02:00 · 2024-01-26 15:27:10 +02:00
3 changed files with 68 additions and 8 deletions
--- a/README.md
+++ b/README.md
@ -14,8 +14,11 @@ as the sum of the different pixel scores.
 For a given pixel(`po` for the original image and `pm` for the mask, same position)
 its score will be calculated as follows:

+`v` for variance
+
 ```
-S_p = | po - acolor | x (0.5 - pm)
+||po - acolor|| - ||v - acolor||
+S_p = (|po - acolor| - v) x (0.5 - pm)
 ```

 it is assumed that the font mask is of values between `0..1` and made as a
@ -26,6 +29,10 @@ variations of where the letter should be, while also taking into
 consideration the fact that the background should be of different
 color.

+I seem to be missing something in the original idea, as some fonts gets better
+score on incorrect guesses with bigger color variance, and others get
+the smallest color variance on some other fonts.
+
 ## Potential improvements

 Some potential improvements would be:
--- a/classify.py
+++ b/classify.py
@ -1,6 +1,7 @@
 import cv2 as cv
 import numpy as np
 import h5py as h5
+from rasterizer import text_to_matrix

 db = None

@ -30,9 +31,8 @@ def extract_bb(img, bb):
    rot_matrix = cv.getRotationMatrix2D(center, rect[2], 1)
    rot_img = cv.warpAffine(img, rot_matrix, (img.shape[1], img.shape[0]))
    # bounding box is now axis aligned, and we can crop it
-    print(size)
    cropped = cv.getRectSubPix(rot_img, size, center)
-    return cropped
+    return cropped.transpose(1, 0, 2)[::, ::-1, ::]

 def get_img(index):
    ''' gets image from database '''
@ -73,7 +73,25 @@ def get_avg_color(img, mask):
    avg /= count
    return avg

-def calc_score(img, mask, avg_color):
+def get_color_variance(img, mask, avg_color):
+    ''' Gets color variance under the mask with given avg_color '''
+    sx, sy, sw = img.shape
+    mx, my = mask.shape
+    if sx != mx or sy != my:
+        print('Image and mask size doesnt match!')
+        return None
+    var = np.zeros(sw, dtype=np.float32)
+    count = 0.0
+    for x in range(sx):
+        for y in range(sy):
+            m = mask[x, y]
+            diff = img[x, y] - avg_color
+            var += diff.dot(diff) * m
+            count += m
+    var /= count
+    return var
+
+def calc_score(img, mask, avg_color, var_mag):
    '''
    Calculates the score for each mask with each color
    '''
@ -88,8 +106,43 @@ def calc_score(img, mask, avg_color):
    score = 0.0
    for x in range(sx):
        for y in range(sy):
-            m = 0.5 - mask[x, y]
+            m = mask[x, y] - 0.5
            diff = img[x, y] - avg_color
-            mag = np.sqrt(diff.dot(diff)) # calculate magnitude
-            score += mag * m
+            mag = var_mag - np.sqrt(diff.dot(diff)) # calculate magnitude
+            score += mag * m / var_mag
    return score
+
+def generate_subimg(img, c_avg, nc_avg):
+    sx, sy, sw = img.shape
+    if sw != c_avg.shape[0]:
+        print('Image depth doesnt match color!')
+    # can be bool actually but float32 because why not
+    res = np.zeros([sx, sy], dtype=np.float32) 
+    for x in range(sx):
+        for y in range(sy):
+            da = img[x, y] - c_avg
+            mag_a = da.dot(da)
+            dn = img[x, y] - nc_avg
+            mag_n = dn.dot(dn)
+            res[x, y] = 1.0 if mag_a < mag_n else 0.0
+    return res
+
+def score_font(char_img, char, font_name):
+    '''
+    Takes a char_img, the wanted character and a font_name/path
+    and calculates the relevant score
+    '''
+    # default to 128, i think it should be enough and we will probably mostly
+    # reduce the size anyway, also change from rgb to grayscale
+    font_img = text_to_matrix(char, 128, font_name)[::, ::, 1]
+    # resize font_img to match char_img dimensions
+    dim = [char_img.shape[1], char_img.shape[0]]
+    mask = cv.resize(font_img, dim, interpolation=cv.INTER_LINEAR)
+    rv = np.ones(mask.shape, dtype=np.float32) - mask
+    # get average color
+    ac = get_avg_color(char_img, mask)
+    rac = get_avg_color(char_img, rv)
+    diff = ac - rac
+    mag = diff.dot(diff)
+    return mag
+
--- a/rasterizer.py
+++ b/rasterizer.py
@ -3,7 +3,7 @@ import numpy as np

 def text_to_matrix(text, size, font):
    pil_font = ImageFont.truetype(font, size=size // len(text), encoding="unic")
-    canvas = Image.new('RGB', [size + 20, size + 20], (255, 255, 255))
+    canvas = Image.new('RGB', [size * 2, size * 2], (255, 255, 255))
    draw = ImageDraw.Draw(canvas)
    black = "#000000"
    draw.text((10, 10), text, font=pil_font, fill=black)
Author	SHA1	Message	Date
Rusty Striker	b1b77c0f24	try different scoring approach and attempt an image construction based on the avg colors of the image	2024-01-29 19:50:02 +02:00
Rusty Striker	51a0efee01	it seems i am missing something maybe?	2024-01-29 19:45:11 +02:00
Rusty Striker	5649a2d4cd	Add color variance functions and such	2024-01-26 15:27:40 +02:00
Rusty Striker	83eb229189	fix some fonts not rendering fully(getting cropped)	2024-01-26 15:27:10 +02:00