computer_vision_project/classify.py

96 lines
2.8 KiB
Python
Raw Normal View History

2024-01-12 12:10:37 +00:00
import cv2 as cv
import numpy as np
import h5py as h5
2024-01-19 13:51:35 +00:00
db = None
def init_train():
''' Default init based on the train set `train.h5` '''
init('train.h5')
2024-01-19 13:51:35 +00:00
def init(path):
''' initializes the database, must be called before any use '''
global db
db = h5.File(path, 'r')
def needs_init():
''' checks if the database has been initialized '''
if db is None:
print('db is none, please use init(path_to_db) first!')
return db is None
2024-01-12 12:10:37 +00:00
# Extract letter from a bounding box
def extract_bb(img, bb):
2024-01-19 13:51:35 +00:00
''' extracts a bounding box/letter from the given image '''
2024-01-12 12:10:37 +00:00
# Get the bounding box
rect = cv.minAreaRect(bb.astype(np.float32).transpose())
# will be useful later, map center and size to ints
center, size = tuple(map(int, rect[0])), tuple(map(int, rect[1]))
# Calculate rotation matrix and rotate the image
rot_matrix = cv.getRotationMatrix2D(center, rect[2], 1)
rot_img = cv.warpAffine(img, rot_matrix, (img.shape[1], img.shape[0]))
# bounding box is now axis aligned, and we can crop it
print(size)
cropped = cv.getRectSubPix(rot_img, size, center)
return cropped
2024-01-19 13:51:35 +00:00
def get_img(index):
''' gets image from database '''
if needs_init():
return None
names = list(db['data'].keys())
im = names[index]
return db['data'][im][:]
def get_attrs(index):
''' gets attribute dict from the database '''
if needs_init():
return None
names = list(db['data'].keys())
im = names[index]
return db['data'][im].attrs
def get_avg_color(img, mask):
'''
gets avg color from an image that is underneath a mask,
img and mask needs to be of same size(in x,y) but img can
have any third dimension size it want(usually 3 for rgb or 1 for grayscale)
mask needs to be of shape(img.width, img.height, 1)
2024-01-19 13:51:35 +00:00
'''
sx, sy, sw = img.shape
mx, my = mask.shape
if sx != mx or sy != my:
print('Image and mask size doesnt match!')
2024-01-19 13:51:35 +00:00
return None
avg = np.zeros(sw, dtype=np.float32)
2024-01-19 13:51:35 +00:00
count = 0.0
for x in range(sx):
for y in range(sy):
m = mask[x, y]
2024-01-19 13:51:35 +00:00
avg += img[x, y] * m
count += m
avg /= count
return avg
def calc_score(img, mask, avg_color):
'''
Calculates the score for each mask with each color
'''
sx, sy, sw = img.shape
mx, my = mask.shape
if sx != mx or sy != my:
print('Image and mask size doesnt match!')
return 0.0
if sw != avg_color.shape[0]:
print('Image width doesnt match color width!')
return 0.0
score = 0.0
for x in range(sx):
for y in range(sy):
m = 0.5 - mask[x, y]
diff = img[x, y] - avg_color
mag = np.sqrt(diff.dot(diff)) # calculate magnitude
score += mag * m
return score