|
3 | 3 | import os |
4 | 4 | import traceback |
5 | 5 | import sys |
| 6 | +import tempfile |
6 | 7 |
|
7 | 8 | import cv2 |
8 | 9 | import numpy as np |
9 | 10 | from PIL import Image |
10 | 11 | from scipy.ndimage.filters import rank_filter |
| 12 | +from scipy.optimize import minimize |
11 | 13 | import pytesseract |
12 | 14 |
|
13 | 15 |
|
@@ -42,7 +44,7 @@ def deskew(self): |
42 | 44 | self.healthy = False |
43 | 45 |
|
44 | 46 | def extract_text(self): |
45 | | - temp_path = 'text_temp.png' |
| 47 | + _, temp_path = tempfile.mkstemp(suffix="png") |
46 | 48 | cv2.imwrite(temp_path, self.image) |
47 | 49 | self.text = pytesseract.image_to_string(Image.open(temp_path), lang=self.lang) |
48 | 50 | os.remove(temp_path) |
@@ -77,7 +79,7 @@ def downscale_image(im, max_dim=2048): |
77 | 79 | return 1.0, im |
78 | 80 |
|
79 | 81 | scale = 1.0 * max_dim / max(a, b) |
80 | | - new_im = cv2.resize(im, (int(b * scale), int(a * scale)), cv2.INTER_AREA) |
| 82 | + new_im = cv2.resize(im, (int(b * scale), int(a * scale)), interpolation=cv2.INTER_AREA) |
81 | 83 | return scale, new_im |
82 | 84 |
|
83 | 85 |
|
@@ -146,6 +148,8 @@ def rect_area(crop): |
146 | 148 |
|
147 | 149 |
|
148 | 150 | def crop_image(im, rect, scale): |
| 151 | + if rect is None: |
| 152 | + rect = [0, 0, im.shape[0], im.shape[1]] |
149 | 153 | xmin, ymin, xmax, ymax = rect |
150 | 154 | crop = [xmin, ymin, xmax, ymax] |
151 | 155 | xmin, ymin, xmax, ymax = [int(x / scale) for x in crop] |
@@ -283,6 +287,52 @@ def compute_skew(theta): |
283 | 287 |
|
284 | 288 | def process_skewed_crop(image): |
285 | 289 | theta = compute_skew(estimate_skew(image)) |
| 290 | + _, _, new_im = optimize_light(image) |
286 | 291 | ret, thresh = cv2.threshold(image.copy(), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) |
287 | 292 | rotated = rotate(thresh, theta) |
288 | 293 | return (rotated, theta) |
| 294 | + |
| 295 | + |
| 296 | +def get_op_im(pars, dwn_im): |
| 297 | + """ |
| 298 | + Reshape the parameters and compute the corresponding corrected image |
| 299 | + """ |
| 300 | + offset = np.reshape(pars, dwn_im.shape) |
| 301 | + new_im = offset + dwn_im |
| 302 | + return offset, new_im |
| 303 | + |
| 304 | + |
| 305 | +def cost(pars, dwn_im, orig_lap, k1, k2): |
| 306 | + """" |
| 307 | + Cost function getting optimized. |
| 308 | + Two parameters are considered: |
| 309 | + - Getting the image close to white (bg_term) |
| 310 | + - Preserving the laplacian of the image |
| 311 | + """ |
| 312 | + offset, new_im = get_op_im(pars, dwn_im) |
| 313 | + new_lap = cv2.Laplacian(new_im, cv2.CV_64F) |
| 314 | + bg_term = k1 * np.sum(np.square(new_im - 255)) |
| 315 | + contrast_term = k2 * np.sum(np.square(new_lap - orig_lap)) |
| 316 | + return bg_term + contrast_term |
| 317 | + |
| 318 | + |
| 319 | +def optimize_light(image, k1=1, k2=50): |
| 320 | + """" |
| 321 | + Try to get an homogeneous white background while preserving good contrast. |
| 322 | + As we down sample the image a lot, it will only work for slowly varying |
| 323 | + illuminations |
| 324 | + """ |
| 325 | + _, im = downscale_image(image, 50) |
| 326 | + orig_lap = cv2.Laplacian(im, cv2.CV_64F) |
| 327 | + nb_px = np.prod(im.shape) |
| 328 | + |
| 329 | + offset = np.zeros(nb_px) + (255 - np.mean(image)) |
| 330 | + res = minimize(cost, offset, (im, orig_lap, k1, k2), method='CG', |
| 331 | + options={'maxiter': 5}) |
| 332 | + |
| 333 | + op_pars = res.x |
| 334 | + offset, new_im = get_op_im(op_pars, im) |
| 335 | + s_off = cv2.resize(offset, image.shape[::-1], interpolation=cv2.INTER_CUBIC) |
| 336 | + new_im = cv2.convertScaleAbs(image + s_off) |
| 337 | + |
| 338 | + return res, s_off, new_im |
0 commit comments