Skip to content

Commit 991923e

Browse files
committed
Try to get an homogeneous bg
cf jlsutherland#13
1 parent 5405563 commit 991923e

1 file changed

Lines changed: 52 additions & 2 deletions

File tree

doc2text/page.py

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
import os
44
import traceback
55
import sys
6+
import tempfile
67

78
import cv2
89
import numpy as np
910
from PIL import Image
1011
from scipy.ndimage.filters import rank_filter
12+
from scipy.optimize import minimize
1113
import pytesseract
1214

1315

@@ -42,7 +44,7 @@ def deskew(self):
4244
self.healthy = False
4345

4446
def extract_text(self):
45-
temp_path = 'text_temp.png'
47+
_, temp_path = tempfile.mkstemp(suffix="png")
4648
cv2.imwrite(temp_path, self.image)
4749
self.text = pytesseract.image_to_string(Image.open(temp_path), lang=self.lang)
4850
os.remove(temp_path)
@@ -77,7 +79,7 @@ def downscale_image(im, max_dim=2048):
7779
return 1.0, im
7880

7981
scale = 1.0 * max_dim / max(a, b)
80-
new_im = cv2.resize(im, (int(b * scale), int(a * scale)), cv2.INTER_AREA)
82+
new_im = cv2.resize(im, (int(b * scale), int(a * scale)), interpolation=cv2.INTER_AREA)
8183
return scale, new_im
8284

8385

@@ -146,6 +148,8 @@ def rect_area(crop):
146148

147149

148150
def crop_image(im, rect, scale):
151+
if rect is None:
152+
rect = [0, 0, im.shape[0], im.shape[1]]
149153
xmin, ymin, xmax, ymax = rect
150154
crop = [xmin, ymin, xmax, ymax]
151155
xmin, ymin, xmax, ymax = [int(x / scale) for x in crop]
@@ -283,6 +287,52 @@ def compute_skew(theta):
283287

284288
def process_skewed_crop(image):
285289
theta = compute_skew(estimate_skew(image))
290+
_, _, new_im = optimize_light(image)
286291
ret, thresh = cv2.threshold(image.copy(), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
287292
rotated = rotate(thresh, theta)
288293
return (rotated, theta)
294+
295+
296+
def get_op_im(pars, dwn_im):
297+
"""
298+
Reshape the parameters and compute the corresponding corrected image
299+
"""
300+
offset = np.reshape(pars, dwn_im.shape)
301+
new_im = offset + dwn_im
302+
return offset, new_im
303+
304+
305+
def cost(pars, dwn_im, orig_lap, k1, k2):
306+
""""
307+
Cost function getting optimized.
308+
Two parameters are considered:
309+
- Getting the image close to white (bg_term)
310+
- Preserving the laplacian of the image
311+
"""
312+
offset, new_im = get_op_im(pars, dwn_im)
313+
new_lap = cv2.Laplacian(new_im, cv2.CV_64F)
314+
bg_term = k1 * np.sum(np.square(new_im - 255))
315+
contrast_term = k2 * np.sum(np.square(new_lap - orig_lap))
316+
return bg_term + contrast_term
317+
318+
319+
def optimize_light(image, k1=1, k2=50):
320+
""""
321+
Try to get an homogeneous white background while preserving good contrast.
322+
As we down sample the image a lot, it will only work for slowly varying
323+
illuminations
324+
"""
325+
_, im = downscale_image(image, 50)
326+
orig_lap = cv2.Laplacian(im, cv2.CV_64F)
327+
nb_px = np.prod(im.shape)
328+
329+
offset = np.zeros(nb_px) + (255 - np.mean(image))
330+
res = minimize(cost, offset, (im, orig_lap, k1, k2), method='CG',
331+
options={'maxiter': 5})
332+
333+
op_pars = res.x
334+
offset, new_im = get_op_im(op_pars, im)
335+
s_off = cv2.resize(offset, image.shape[::-1], interpolation=cv2.INTER_CUBIC)
336+
new_im = cv2.convertScaleAbs(image + s_off)
337+
338+
return res, s_off, new_im

0 commit comments

Comments
 (0)