Let me know if you don't follow anything in my code. The biggest faults of this concept are
1: (if you have noisy breaks in the main box line that would break it into separate blobs)
2: idk if this is a thing where there can be handwritten text, but having letters overlap the edges of boxes could be bad.
3: It does absolutely no orientation checking, (you may actually want to improve this as I don't think it would be too bad and would give you more accurate handles). What I mean is that it depends on your boxes being approximately aligned to the xy axes, if they are sufficiently skew, it will give you gross offsets to all your box corners (though it should still find them all)
I fiddled with the threshold set point a bit to get all the text separated from the edges, you could probably pull it even lower if necessary before you start breaking the main line. Also, if you are worried about line breaks, you could add together sufficiently large blobs into the final image.
![Processing steps]()
![Final result]()
Basically, first step fiddling with the threshold to get it in the most stable (likely lowest value that still keeps a connected box) cuttoff value for separating text and noise from box.
Second find the biggest positive blob (should be the boxgrid). If your box doesnt stay all together, you may want to take a few of the highest blobs... though that will get sticky, so try to get the threshold so that you can get it as a single blob.
Last step is to get the rectangles, to do this, I just look for negative blobs (ignoring the first outer area).
And here is the code (sorry that it is in C++, but hopefully you understand the concept and would write it yourself anyhow):
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <iostream>
#include <stdio.h>
#include <opencv2/opencv.hpp>
using namespace cv;
//Attempts to find the largest connected group of points (assumed to be the interconnected boundaries of the textbox grid)
Mat biggestComponent(Mat targetImage, int connectivity=8)
{
Mat inputImage;
inputImage = targetImage.clone();
Mat finalImage;// = inputImage;
int greatestBlobSize=0;
std::cout<<"Top"<<std::endl;
std::cout<<inputImage.rows<<std::endl;
std::cout<<inputImage.cols<<std::endl;
for(int i=0;i<inputImage.cols;i++)
{
for(int ii=0;ii<inputImage.rows;ii++)
{
if(inputImage.at<uchar>(ii,i)!=0)
{
Mat lastImage;
lastImage = inputImage.clone();
Rect* boundbox;
int blobSize = floodFill(inputImage, cv::Point(i,ii), Scalar(0),boundbox,Scalar(200),Scalar(255),connectivity);
if(greatestBlobSize<blobSize)
{
greatestBlobSize=blobSize;
std::cout<<blobSize<<std::endl;
Mat tempDif = lastImage-inputImage;
finalImage = tempDif.clone();
}
//std::cout<<"Loop"<<std::endl;
}
}
}
return finalImage;
}
//Takes an image that only has outlines of boxes and gets handles for each textbox.
//Returns a vector of points which represent the top left corners of the text boxes.
std::vector<Rect> boxCorners(Mat processedImage, int connectivity=4)
{
std::vector<Rect> boxHandles;
Mat inputImage;
bool outerRegionFlag=true;
inputImage = processedImage.clone();
std::cout<<inputImage.rows<<std::endl;
std::cout<<inputImage.cols<<std::endl;
for(int i=0;i<inputImage.cols;i++)
{
for(int ii=0;ii<inputImage.rows;ii++)
{
if(inputImage.at<uchar>(ii,i)==0)
{
Mat lastImage;
lastImage = inputImage.clone();
Rect boundBox;
if(outerRegionFlag) //This is to floodfill the outer zone of the page
{
outerRegionFlag=false;
floodFill(inputImage, cv::Point(i,ii), Scalar(255),&boundBox,Scalar(0),Scalar(50),connectivity);
}
else
{
floodFill(inputImage, cv::Point(i,ii), Scalar(255),&boundBox,Scalar(0),Scalar(50),connectivity);
boxHandles.push_back(boundBox);
}
}
}
}
return boxHandles;
}
Mat drawTestBoxes(Mat originalImage, std::vector<Rect> boxes)
{
Mat outImage;
outImage = originalImage.clone();
outImage = outImage*0; //really I am just being lazy, this should just be initialized with dimensions
for(int i=0;i<boxes.size();i++)
{
rectangle(outImage,boxes[i],Scalar(255));
}
return outImage;
}
int main() {
Mat image;
Mat thresholded;
Mat processed;
image = imread( "Images/W2.png", 1 );
Mat channel[3];
split(image, channel);
threshold(channel[0],thresholded,150,255,1);
std::cout<<"Coputing biggest object"<<std::endl;
processed = biggestComponent(thresholded);
std::vector<Rect> textBoxes = boxCorners(processed);
Mat finalBoxes = drawTestBoxes(image,textBoxes);
namedWindow("Original", WINDOW_AUTOSIZE );
imshow("Original", channel[0]);
namedWindow("Thresholded", WINDOW_AUTOSIZE );
imshow("Thresholded", thresholded);
namedWindow("Processed", WINDOW_AUTOSIZE );
imshow("Processed", processed);
namedWindow("Boxes", WINDOW_AUTOSIZE );
imshow("Boxes", finalBoxes);
std::cout<<"waiting for user input"<<std::endl;
waitKey(0);
return 0;
}