-
Notifications
You must be signed in to change notification settings - Fork 10
/
CaptchaBreaker.rb
73 lines (59 loc) · 1.91 KB
/
CaptchaBreaker.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# Author: Hugo Ribeira
# 13 Aug 2014
require 'rmagick'
require 'rtesseract'
class CaptchaBreaker
def initialize(image_string)
@image = Magick::Image.from_blob(image_string).first
@image = @image.quantize(3, Magick::GRAYColorspace)
end
def break
@image = binarize_image(@image)
2.times { @image = erode(@image) }
3.times { @image = dilate(@image) }
# Use tesseract to read the characters
@image.format = 'JPEG'
tesseract = RTesseract.new('')
tesseract.from_blob @image.to_blob
tesseract.to_s_without_spaces
end
private
def get_pixels(image)
image.dispatch(0, 0, image.columns, image.rows, 'R')
end
def image_from_pixels(pixels)
pixels = pixels.map{ |px| [px,px,px] }.flatten # Replicate channels to create an rgb image
Magick::Image.constitute(@image.columns, @image.rows, 'RGB', pixels)
end
def binarize_image(image)
# Filter out the white line in the captcha and make the image binary
pixels = get_pixels(image)
colors = pixels.uniq.sort
pixels = pixels.map { |px| px == colors.last ? 0 : px }
image = image_from_pixels(pixels)
image = image.quantize(2, Magick::GRAYColorspace)
end
def erode(image, action = :erode)
pixels = get_pixels(image)
if action == :erode
white = pixels.uniq.sort.last
else
white = pixels.uniq.sort.first
end
pixels.each_with_index do |px, i|
next if px == white # skip white pixels
pixels[i] = 1 if pixels[i + 1] == white ||
pixels[i - 1] == white ||
pixels[i + image.columns] == white ||
pixels[i - image.columns] == white
end
pixels.each_with_index do |px, i|
pixels[i] = white if px == 1
end
image_from_pixels(pixels)
end
def dilate(image)
# Dilating is eroding if you exchange the blacks with whites and vice versa
erode(image, :dilate)
end
end