Compare commits
No commits in common. "b1b77c0f2455721518c160a321bc96ce7b2f057a" and "2f98d26842aa283ded2ed0f4656c97fc976a3fd4" have entirely different histories.
b1b77c0f24
...
2f98d26842
3 changed files with 8 additions and 68 deletions
|
@ -14,11 +14,8 @@ as the sum of the different pixel scores.
|
||||||
For a given pixel(`po` for the original image and `pm` for the mask, same position)
|
For a given pixel(`po` for the original image and `pm` for the mask, same position)
|
||||||
its score will be calculated as follows:
|
its score will be calculated as follows:
|
||||||
|
|
||||||
`v` for variance
|
|
||||||
|
|
||||||
```
|
```
|
||||||
||po - acolor|| - ||v - acolor||
|
S_p = | po - acolor | x (0.5 - pm)
|
||||||
S_p = (|po - acolor| - v) x (0.5 - pm)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
it is assumed that the font mask is of values between `0..1` and made as a
|
it is assumed that the font mask is of values between `0..1` and made as a
|
||||||
|
@ -29,10 +26,6 @@ variations of where the letter should be, while also taking into
|
||||||
consideration the fact that the background should be of different
|
consideration the fact that the background should be of different
|
||||||
color.
|
color.
|
||||||
|
|
||||||
I seem to be missing something in the original idea, as some fonts gets better
|
|
||||||
score on incorrect guesses with bigger color variance, and others get
|
|
||||||
the smallest color variance on some other fonts.
|
|
||||||
|
|
||||||
## Potential improvements
|
## Potential improvements
|
||||||
|
|
||||||
Some potential improvements would be:
|
Some potential improvements would be:
|
||||||
|
|
65
classify.py
65
classify.py
|
@ -1,7 +1,6 @@
|
||||||
import cv2 as cv
|
import cv2 as cv
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import h5py as h5
|
import h5py as h5
|
||||||
from rasterizer import text_to_matrix
|
|
||||||
|
|
||||||
db = None
|
db = None
|
||||||
|
|
||||||
|
@ -31,8 +30,9 @@ def extract_bb(img, bb):
|
||||||
rot_matrix = cv.getRotationMatrix2D(center, rect[2], 1)
|
rot_matrix = cv.getRotationMatrix2D(center, rect[2], 1)
|
||||||
rot_img = cv.warpAffine(img, rot_matrix, (img.shape[1], img.shape[0]))
|
rot_img = cv.warpAffine(img, rot_matrix, (img.shape[1], img.shape[0]))
|
||||||
# bounding box is now axis aligned, and we can crop it
|
# bounding box is now axis aligned, and we can crop it
|
||||||
|
print(size)
|
||||||
cropped = cv.getRectSubPix(rot_img, size, center)
|
cropped = cv.getRectSubPix(rot_img, size, center)
|
||||||
return cropped.transpose(1, 0, 2)[::, ::-1, ::]
|
return cropped
|
||||||
|
|
||||||
def get_img(index):
|
def get_img(index):
|
||||||
''' gets image from database '''
|
''' gets image from database '''
|
||||||
|
@ -73,25 +73,7 @@ def get_avg_color(img, mask):
|
||||||
avg /= count
|
avg /= count
|
||||||
return avg
|
return avg
|
||||||
|
|
||||||
def get_color_variance(img, mask, avg_color):
|
def calc_score(img, mask, avg_color):
|
||||||
''' Gets color variance under the mask with given avg_color '''
|
|
||||||
sx, sy, sw = img.shape
|
|
||||||
mx, my = mask.shape
|
|
||||||
if sx != mx or sy != my:
|
|
||||||
print('Image and mask size doesnt match!')
|
|
||||||
return None
|
|
||||||
var = np.zeros(sw, dtype=np.float32)
|
|
||||||
count = 0.0
|
|
||||||
for x in range(sx):
|
|
||||||
for y in range(sy):
|
|
||||||
m = mask[x, y]
|
|
||||||
diff = img[x, y] - avg_color
|
|
||||||
var += diff.dot(diff) * m
|
|
||||||
count += m
|
|
||||||
var /= count
|
|
||||||
return var
|
|
||||||
|
|
||||||
def calc_score(img, mask, avg_color, var_mag):
|
|
||||||
'''
|
'''
|
||||||
Calculates the score for each mask with each color
|
Calculates the score for each mask with each color
|
||||||
'''
|
'''
|
||||||
|
@ -106,43 +88,8 @@ def calc_score(img, mask, avg_color, var_mag):
|
||||||
score = 0.0
|
score = 0.0
|
||||||
for x in range(sx):
|
for x in range(sx):
|
||||||
for y in range(sy):
|
for y in range(sy):
|
||||||
m = mask[x, y] - 0.5
|
m = 0.5 - mask[x, y]
|
||||||
diff = img[x, y] - avg_color
|
diff = img[x, y] - avg_color
|
||||||
mag = var_mag - np.sqrt(diff.dot(diff)) # calculate magnitude
|
mag = np.sqrt(diff.dot(diff)) # calculate magnitude
|
||||||
score += mag * m / var_mag
|
score += mag * m
|
||||||
return score
|
return score
|
||||||
|
|
||||||
def generate_subimg(img, c_avg, nc_avg):
|
|
||||||
sx, sy, sw = img.shape
|
|
||||||
if sw != c_avg.shape[0]:
|
|
||||||
print('Image depth doesnt match color!')
|
|
||||||
# can be bool actually but float32 because why not
|
|
||||||
res = np.zeros([sx, sy], dtype=np.float32)
|
|
||||||
for x in range(sx):
|
|
||||||
for y in range(sy):
|
|
||||||
da = img[x, y] - c_avg
|
|
||||||
mag_a = da.dot(da)
|
|
||||||
dn = img[x, y] - nc_avg
|
|
||||||
mag_n = dn.dot(dn)
|
|
||||||
res[x, y] = 1.0 if mag_a < mag_n else 0.0
|
|
||||||
return res
|
|
||||||
|
|
||||||
def score_font(char_img, char, font_name):
|
|
||||||
'''
|
|
||||||
Takes a char_img, the wanted character and a font_name/path
|
|
||||||
and calculates the relevant score
|
|
||||||
'''
|
|
||||||
# default to 128, i think it should be enough and we will probably mostly
|
|
||||||
# reduce the size anyway, also change from rgb to grayscale
|
|
||||||
font_img = text_to_matrix(char, 128, font_name)[::, ::, 1]
|
|
||||||
# resize font_img to match char_img dimensions
|
|
||||||
dim = [char_img.shape[1], char_img.shape[0]]
|
|
||||||
mask = cv.resize(font_img, dim, interpolation=cv.INTER_LINEAR)
|
|
||||||
rv = np.ones(mask.shape, dtype=np.float32) - mask
|
|
||||||
# get average color
|
|
||||||
ac = get_avg_color(char_img, mask)
|
|
||||||
rac = get_avg_color(char_img, rv)
|
|
||||||
diff = ac - rac
|
|
||||||
mag = diff.dot(diff)
|
|
||||||
return mag
|
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@ import numpy as np
|
||||||
|
|
||||||
def text_to_matrix(text, size, font):
|
def text_to_matrix(text, size, font):
|
||||||
pil_font = ImageFont.truetype(font, size=size // len(text), encoding="unic")
|
pil_font = ImageFont.truetype(font, size=size // len(text), encoding="unic")
|
||||||
canvas = Image.new('RGB', [size * 2, size * 2], (255, 255, 255))
|
canvas = Image.new('RGB', [size + 20, size + 20], (255, 255, 255))
|
||||||
draw = ImageDraw.Draw(canvas)
|
draw = ImageDraw.Draw(canvas)
|
||||||
black = "#000000"
|
black = "#000000"
|
||||||
draw.text((10, 10), text, font=pil_font, fill=black)
|
draw.text((10, 10), text, font=pil_font, fill=black)
|
||||||
|
|
Loading…
Reference in a new issue