Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed DigitRecognizer causing issues with sikuli-api unit tests #2

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 25 additions & 14 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>org.sonatype.oss</groupId>
<artifactId>oss-parent</artifactId>
<version>7</version>
</parent>
<!--<parent>-->
<!--<groupId>org.sonatype.oss</groupId>-->
<!--<artifactId>oss-parent</artifactId>-->
<!--<version>7</version>-->
<!--</parent>-->

<parent>
<artifactId>cats-pom</artifactId>
<groupId>com.gtspt.pom</groupId>
<version>1.0.0</version>
</parent>

<licenses>
<license>
Expand Down Expand Up @@ -33,7 +39,7 @@
<artifactId>sikuli-core</artifactId>
<name>sikuli-core</name>
<url>http://www.sikuli.org</url>
<version>1.1.4-SNAPSHOT</version>
<version>1.1.4-GTS-0.1</version>
<packaging>jar</packaging>

<properties>
Expand Down Expand Up @@ -78,41 +84,41 @@
<dependency>
<groupId>com.googlecode.javacpp</groupId>
<artifactId>javacpp</artifactId>
<version>0.1</version>
<version>0.7</version>
</dependency>
<dependency>
<groupId>com.googlecode.javacv</groupId>
<artifactId>javacv</artifactId>
<version>0.1</version>
<version>0.7</version>
</dependency>
<dependency>
<groupId>com.googlecode.javacv</groupId>
<artifactId>javacv</artifactId>
<version>0.1</version>
<version>0.7</version>
<classifier>macosx-x86_64</classifier>
</dependency>
<dependency>
<groupId>com.googlecode.javacv</groupId>
<artifactId>javacv</artifactId>
<version>0.1</version>
<version>0.7</version>
<classifier>windows-x86</classifier>
</dependency>
<dependency>
<groupId>com.googlecode.javacv</groupId>
<artifactId>javacv</artifactId>
<version>0.1</version>
<version>0.7</version>
<classifier>windows-x86_64</classifier>
</dependency>
<dependency>
<groupId>com.googlecode.javacv</groupId>
<artifactId>javacv</artifactId>
<version>0.1</version>
<version>0.7</version>
<classifier>linux-x86_64</classifier>
</dependency>
<dependency>
<groupId>com.googlecode.javacv</groupId>
<artifactId>javacv</artifactId>
<version>0.1</version>
<version>0.7</version>
<classifier>linux-x86</classifier>
</dependency>
<dependency>
Expand All @@ -135,7 +141,12 @@
<artifactId>looks</artifactId>
<version>2.2.2</version>
</dependency>
</dependencies>
<dependency>
<groupId>org.imgscalr</groupId>
<artifactId>imgscalr-lib</artifactId>
<version>4.2</version>
</dependency>
</dependencies>

<build>
<plugins>
Expand Down
150 changes: 80 additions & 70 deletions src/main/java/org/sikuli/ocr/DigitRecognizer.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
package org.sikuli.ocr;

import java.awt.Color;
import java.awt.Font;
import java.awt.Rectangle;
import java.awt.image.BufferedImage;
import java.util.List;

import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import edu.umd.cs.piccolo.PLayer;
import edu.umd.cs.piccolo.nodes.PImage;
import edu.umd.cs.piccolo.nodes.PText;
import org.imgscalr.Scalr;
import org.sikuli.core.cv.TextMap;
import org.sikuli.core.draw.ImageRenderer;
import org.sikuli.core.draw.PiccoloImageRenderer;
Expand All @@ -16,133 +16,143 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Lists;

import edu.umd.cs.piccolo.PLayer;
import edu.umd.cs.piccolo.nodes.PImage;
import edu.umd.cs.piccolo.nodes.PText;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

public class DigitRecognizer {

final static private ImageExplainer explainer = ImageExplainer.getExplainer(DigitRecognizer.class);
final static private ImageExplainer explainer = ImageExplainer.getExplainer(DigitRecognizer.class);
final static private Logger logger = LoggerFactory.getLogger(DigitRecognizer.class);

// Digit search parameters
static final int HORIZONTAL_SPLIT_THRESHOLD = 12;
static final double DIGIT_MATCH_MIN_SCORE = 0.65;

// Digit template parameters
static final int dy=20;
static final int dx=12;
static final int margin=5;
static final int digitY = 20;
static final int digitX = 12;
static final int margin = 5;
private static final int TEMPLATE_ROWS = 10;
private static final int TEMPLATE_FONT_SIZE = 15;
private static final int TRACKING = 0;

static ImageSearcher digitImageSearcher = new ImageSearcher(generateDigitTemplateImage());
static ImageSearcher digitImageSearcher = new ImageSearcher(generateDigitTemplateImage());

private DigitRecognizer(){
private DigitRecognizer(){
}

static private Integer convertLocationToDigit(int x, int y){
int i = Math.round((x - margin) / dx);
int px = (x - margin) % dx;
int py = (y - margin) % dy;
if (px < 3 && py > 3)
return null;
else
return i;

static private Integer convertLocationToDigit(int x){
return Math.round((x - margin) / digitX);
}

static private BufferedImage generateDigitTemplateImage(){
final List<Font> fonts = Lists.newArrayList();
fonts.add(new Font("sansserif",0,0));
fonts.add(new Font("serif",0,0));
fonts.add(new Font("monaco",0,0));
PiccoloImageRenderer a = new PiccoloImageRenderer(130,dy*7*fonts.size()+20){

final List<Font> fonts = Lists.newArrayList();
fonts.add(new Font("sansserif",0,0));
fonts.add(new Font("serif",0,0));
fonts.add(new Font("monaco",0,0));

PiccoloImageRenderer canvas = new PiccoloImageRenderer(digitX * TEMPLATE_ROWS + digitX, digitY * fonts.size() + digitY){

@Override
protected void addContent(PLayer layer) {
int x = margin;
int y = margin;
for (Font font : fonts){
for (int size = 9; size <= 15; ++ size){
for (int i=0;i<=9;i++){
BufferedImage digitImage = TextImageRenderer.render(""+i, font, size, 0);
PImage pi = new PImage(digitImage);
pi.setOffset(x,y);
layer.addChild(pi);
x += dx;
}
y += dy;
x = margin;
}
for (int i=0;i<=9;i++){
BufferedImage digitImage = TextImageRenderer.render(""+i, font, TEMPLATE_FONT_SIZE, TRACKING);
PImage pi = new PImage(digitImage);
pi.setOffset(x,y);
layer.addChild(pi);
x += digitX;
}
y += digitY;
x = margin;
}
}
}
};
explainer.step(a, "generated digit template images");
return a.render();
explainer.step(canvas, "generated digit template images");
return canvas.render();
}


static public List<RecognizedDigit> recognize(BufferedImage inputImage){

List<RecognizedDigit> recognizedDigits = Lists.newArrayList();
List<RecognizedDigit> recognizedDigits = Lists.newArrayList();

TextMap tm = TextMap.createFrom(inputImage);

for (Rectangle r : tm.getCharacterBounds()){
recognizeDigit(inputImage, r, digitImageSearcher, recognizedDigits);
}
}

explainer.step(visualize(inputImage, recognizedDigits), "recognized digits");

return recognizedDigits;
}

static private ImageRenderer visualize(BufferedImage inputImage, final List<RecognizedDigit> recognizedDigits){
return new PiccoloImageRenderer(inputImage){
@Override
protected void addContent(PLayer layer) {
for (RecognizedDigit r : recognizedDigits){
for (RecognizedDigit r : recognizedDigits){
//Rectangle r = md.bounds;
PText t = new PText(""+r.digit);
t.setOffset(r.x, r.y+r.height);
t.setScale(0.7f);
t.setTextPaint(Color.red);
layer.addChild(t);
layer.addChild(t);
}
}
}
};
}

static private void recognizeDigit(BufferedImage inputImage, Rectangle r, ImageSearcher digitImageSearcher,
static private void recognizeDigit(BufferedImage inputImage, Rectangle r, final ImageSearcher digitImageSearcher,
List<RecognizedDigit> recognizedDigits){
if (r.width == 0 || r.height <= 3)

if (r.width == 0 || r.height <= 3)
return;

BufferedImage charImage = inputImage.getSubimage(r.x, r.y, r.width, r.height);
ImageQuery q = new ImageQuery(charImage);
List<RegionMatch> matches = digitImageSearcher.search(q,null,1);
final BufferedImage charImage = inputImage.getSubimage(r.x, r.y, r.width, r.height);

List<RegionMatch> matches = new ArrayList<RegionMatch>(){{
addAll(digitImageSearcher.search(new ImageQuery(Scalr.resize(charImage, Scalr.Method.ULTRA_QUALITY, digitX)), null, 1));
addAll(digitImageSearcher.search(new ImageQuery(Scalr.resize(charImage, Scalr.Method.ULTRA_QUALITY, digitX - 1)), null, 1));
addAll(digitImageSearcher.search(new ImageQuery(Scalr.resize(charImage, Scalr.Method.ULTRA_QUALITY, digitX - 2)), null, 1));
}};

RegionMatch m = matches.get(0);
double score = m.getScore();
Integer i = convertLocationToDigit(m.x,m.y);
Collections.sort(matches, Collections.reverseOrder(new Comparator<RegionMatch>() {
@Override
public int compare(RegionMatch regionMatch1, RegionMatch regionMatch2) {
return Double.compare(regionMatch1.getScore(), regionMatch2.getScore());
}
}));

logger.trace("[" + i + "] (" + m.x + "," + m.y + ") score: " + score);
RegionMatch bestRegionMatch = Iterables.getFirst(matches, null);

if (score > DIGIT_MATCH_MIN_SCORE && i != null){
RecognizedDigit md = new RecognizedDigit();
if (bestRegionMatch != null && bestRegionMatch.getScore() > DIGIT_MATCH_MIN_SCORE){
Integer i = convertLocationToDigit(bestRegionMatch.x);

logger.trace("[" + i + "] (" + bestRegionMatch.x + "," + bestRegionMatch.y + ") score: " + bestRegionMatch.getScore());

RecognizedDigit md = new RecognizedDigit();
md.x = r.x;
md.y = r.y;
md.width = r.width;
md.height = r.height;
md.digit = Integer.toString(i).charAt(0);
recognizedDigits.add(md);
}else{

if (r.width > HORIZONTAL_SPLIT_THRESHOLD){
} else {
if (r.width > HORIZONTAL_SPLIT_THRESHOLD){
Rectangle r1 = new Rectangle(r.x,r.y,r.width/2,r.height);
Rectangle r2 = new Rectangle(r.x + r.width/2,r.y,r.width/2,r.height);
recognizeDigit(inputImage, r1, digitImageSearcher, recognizedDigits);
recognizeDigit(inputImage, r2, digitImageSearcher, recognizedDigits);
recognizeDigit(inputImage, r2, digitImageSearcher, recognizedDigits);
}

}
}
}