OpenCV实现基于傅里叶变换的旋转文本校订

时间 2019-11-05

标签 opencv 实现基于傅里叶变换旋转文本校订繁體版

原文原文链接

代码html

先给出代码，再详细解释一下过程：ios

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

#include <opencv2/core/core.hpp>

#include <opencv2/imgproc/imgproc.hpp>

#include <opencv2/highgui/highgui.hpp>

#include <iostream>

using namespace cv;

using namespace std;

#define GRAY_THRESH 150

#define HOUGH_VOTE 100

//#define DEGREE 27

int main(int argc, char **argv)

{

//Read a single-channel image

const char* filename = "imageText.jpg";

Mat srcImg = imread(filename, CV_LOAD_IMAGE_GRAYSCALE);

if(srcImg.empty())

return -1;

imshow("source", srcImg);

Point center(srcImg.cols/2, srcImg.rows/2);

#ifdef DEGREE

//Rotate source image

Mat rotMatS = getRotationMatrix2D(center, DEGREE, 1.0);

warpAffine(srcImg, srcImg, rotMatS, srcImg.size(), 1, 0, Scalar(255,255,255));

imshow("RotatedSrc", srcImg);

//imwrite("imageText_R.jpg",srcImg);

#endif

//Expand image to an optimal size, for faster processing speed

//Set widths of borders in four directions

//If borderType==BORDER_CONSTANT, fill the borders with (0,0,0)

Mat padded;

int opWidth = getOptimalDFTSize(srcImg.rows);

int opHeight = getOptimalDFTSize(srcImg.cols);

copyMakeBorder(srcImg, padded, 0, opWidth-srcImg.rows, 0, opHeight-srcImg.cols, BORDER_CONSTANT, Scalar::all(0));

Mat planes[] = {Mat_<float>(padded), Mat::zeros(padded.size(), CV_32F)};

Mat comImg;

//Merge into a double-channel image

merge(planes,2,comImg);

//Use the same image as input and output,

//so that the results can fit in Mat well

dft(comImg, comImg);

//Compute the magnitude

//planes[0]=Re(DFT(I)), planes[1]=Im(DFT(I))

//magnitude=sqrt(Re^2+Im^2)

split(comImg, planes);

magnitude(planes[0], planes[1], planes[0]);

//Switch to logarithmic scale, for better visual results

//M2=log(1+M1)

Mat magMat = planes[0];

magMat += Scalar::all(1);

log(magMat, magMat);

//Crop the spectrum

//Width and height of magMat should be even, so that they can be divided by 2

//-2 is 11111110 in binary system, operator & make sure width and height are always even

magMat = magMat(Rect(0, 0, magMat.cols & -2, magMat.rows & -2));

//Rearrange the quadrants of Fourier image,

//so that the origin is at the center of image,

//and move the high frequency to the corners

int cx = magMat.cols/2;

int cy = magMat.rows/2;

Mat q0(magMat, Rect(0, 0, cx, cy));

Mat q1(magMat, Rect(0, cy, cx, cy));

Mat q2(magMat, Rect(cx, cy, cx, cy));

Mat q3(magMat, Rect(cx, 0, cx, cy));

Mat tmp;

q0.copyTo(tmp);

q2.copyTo(q0);

tmp.copyTo(q2);

q1.copyTo(tmp);

q3.copyTo(q1);

tmp.copyTo(q3);

//Normalize the magnitude to [0,1], then to[0,255]

normalize(magMat, magMat, 0, 1, CV_MINMAX);

Mat magImg(magMat.size(), CV_8UC1);

magMat.convertTo(magImg,CV_8UC1,255,0);

imshow("magnitude", magImg);

//imwrite("imageText_mag.jpg",magImg);

//Turn into binary image

threshold(magImg,magImg,GRAY_THRESH,255,CV_THRESH_BINARY);

imshow("mag_binary", magImg);

//imwrite("imageText_bin.jpg",magImg);

//Find lines with Hough Transformation

vector<Vec2f> lines;

float pi180 = (float)CV_PI/180;

Mat linImg(magImg.size(),CV_8UC3);

HoughLines(magImg,lines,1,pi180,HOUGH_VOTE,0,0);

int numLines = lines.size();

for(int l=0; l<numLines; l++)

{

float rho = lines[l][0], theta = lines[l][1];

Point pt1, pt2;

double a = cos(theta), b = sin(theta);

double x0 = a*rho, y0 = b*rho;

pt1.x = cvRound(x0 + 1000*(-b));

pt1.y = cvRound(y0 + 1000*(a));

pt2.x = cvRound(x0 - 1000*(-b));

pt2.y = cvRound(y0 - 1000*(a));

line(linImg,pt1,pt2,Scalar(255,0,0),3,8,0);

}

imshow("lines",linImg);

//imwrite("imageText_line.jpg",linImg);

if(lines.size() == 3){

cout << "found three angels:" << endl;

cout << lines[0][1]*180/CV_PI << endl << lines[1][1]*180/CV_PI << endl << lines[2][1]*180/CV_PI << endl << endl;

}

//Find the proper angel from the three found angels

float angel=0;

float piThresh = (float)CV_PI/90;

float pi2 = CV_PI/2;

for(int l=0; l<numLines; l++)

{

float theta = lines[l][1];

if(abs(theta) < piThresh || abs(theta-pi2) < piThresh)

continue;

else{

angel = theta;

break;

}

//Calculate the rotation angel

//The image has to be square,

//so that the rotation angel can be calculate right

angel = angel<pi2 ? angel : angel-CV_PI;

if(angel != pi2){

float angelT = srcImg.rows*tan(angel)/srcImg.cols;

angel = atan(angelT);

}

float angelD = angel*180/(float)CV_PI;

cout << "the rotation angel to be applied:" << endl << angelD << endl << endl;

//Rotate the image to recover

Mat rotMat = getRotationMatrix2D(center,angelD,1.0);

Mat dstImg = Mat::ones(srcImg.size(),CV_8UC3);

warpAffine(srcImg,dstImg,rotMat,srcImg.size(),1,0,Scalar(255,255,255));

imshow("result",dstImg);

//imwrite("imageText_D.jpg",dstImg);

waitKey(0);

return 0;

}

过程
读取图片git

Mat srcImg = imread(filename, CV_LOAD_IMAGE_GRAYSCALE);

if(srcImg.empty())

return -1;

srcImg.empty()用来判断是否成功读进图像，若是srcImg中没有数据，在后面的步骤会产生内存错误。
因为处理的是文本，彩色信息不会提供额外帮助，因此要用CV_LOAD_IMAGE_GRAYSCALE代表以灰度形式读进图像。
假定读取的图像以下：github

旋转原图像(可选)算法

Point center(srcImg.cols/2, srcImg.rows/2);

#ifdef DEGREE

//Rotate source image

Mat rotMatS = getRotationMatrix2D(center, DEGREE, 1.0);

warpAffine(srcImg, srcImg, rotMatS, srcImg.size(), 1, 0, Scalar(255,255,255));

imshow("RotatedSrc", srcImg);

//imwrite("H:\\imageText_02_R.jpg",srcImg);

#endif

若是手头没有这样的倾斜图像，能够选择一张正放的文本图像，再把第12行#define DEGREE那行前的注释符号去掉。而后这部分代码就会把所给的图像旋转你规定的角度，再交给后面处理。app

图像延扩ide

Mat padded;

int opWidth = getOptimalDFTSize(srcImg.rows);

int opHeight = getOptimalDFTSize(srcImg.cols);

copyMakeBorder(srcImg, padded, 0, opWidth-srcImg.rows, 0, opHeight-srcImg.cols, BORDER_CONSTANT, Scalar::all(0));

OpenCV中的DFT采用的是快速算法，这种算法要求图像的尺寸是二、3和5的倍数时处理速度最快。因此须要用getOptimalDFTSize()找到最适合的尺寸，而后用copyMakeBorder()填充多余的部分。这里是让原图像和扩大的图像左上角对齐。填充的颜色若是是纯色对变换结果的影响不会很大，后面寻找倾斜线的过程又会彻底忽略这一点影响。函数

DFT测试

Mat planes[] = {Mat_<float>(padded), Mat::zeros(padded.size(), CV_32F)};

Mat comImg;

merge(planes,2,comImg);

dft(comImg, comImg);

DFT要分别计算实部和虚部，把要处理的图像做为输入的实部、一个全零的图像做为输入的虚部。dft()输入和输出应该分别为单张图像，因此要先用merge()把实虚部图像合并，分别处于图像comImg的两个通道内。计算获得的实虚部仍然保存在comImg的两个通道内。ui

得到DFT图像

split(comImg, planes);

magnitude(planes[0], planes[1], planes[0]);

Mat magMat = planes[0];

magMat += Scalar::all(1);

log(magMat, magMat);

通常都会用幅度图像来表示图像傅里叶的变换结果（傅里叶谱）。
幅度的计算公式：magnitude = sqrt(Re(DFT)^2 + Im(DFT)^2)。
因为幅度的变化范围很大，而通常图像亮度范围只有[0,255]，容易形成一大片漆黑，只有几个点很亮。因此要用log函数把数值的范围缩小。

magMat = magMat(Rect(0, 0, magMat.cols & -2, magMat.rows & -2));

int cx = magMat.cols/2;

int cy = magMat.rows/2;

Mat q0(magMat, Rect(0, 0, cx, cy));

Mat q1(magMat, Rect(0, cy, cx, cy));

Mat q2(magMat, Rect(cx, cy, cx, cy));

Mat q3(magMat, Rect(cx, 0, cx, cy));

Mat tmp;

q0.copyTo(tmp);

q2.copyTo(q0);

tmp.copyTo(q2);

q1.copyTo(tmp);

q3.copyTo(q1);

tmp.copyTo(q3);

normalize(magMat, magMat, 0, 1, CV_MINMAX);

Mat magImg(magMat.size(), CV_8UC1);

magMat.convertTo(magImg,CV_8UC1,255,0);

dft()直接得到的结果中，低频部分位于四角，高频部分位于中间。习惯上会把图像作四等份，互相对调，使低频部分位于图像中心，也就是让频域原点位于中心。

虽然用log()缩小了数据范围，但仍然不能保证数值都落在[0,255]以内，因此要先用normalize()规范化到[0,1]内，再用convertTo()把小数映射到[0,255]内的整数。结果保存在一幅单通道图像内：

Hough直线检测
从傅里叶谱能够明显地看到一条过中心点的倾斜直线。要想求出这个倾斜角，首先要在图像上找出这条直线。
一个很方便的方法是采用霍夫（Hough）变换检测直线。

1	threshold(magImg,magImg,GRAY_THRESH,255,CV_THRESH_BINARY);

Hough变换要求输入图像是二值的，因此要用threshold()把图像二值化。
二值化的一种结果：

vector<Vec2f> lines;

float pi180 = (float)CV_PI/180;

Mat linImg(magImg.size(),CV_8UC3);

HoughLines(magImg,lines,1,pi180,HOUGH_VOTE,0,0);

int numLines = lines.size();

for(int l=0; l<numLines; l++)

{

float rho = lines[l][0], theta = lines[l][1];

Point pt1, pt2;

double a = cos(theta), b = sin(theta);

double x0 = a*rho, y0 = b*rho;

pt1.x = cvRound(x0 + 1000*(-b));

pt1.y = cvRound(y0 + 1000*(a));

pt2.x = cvRound(x0 - 1000*(-b));

pt2.y = cvRound(y0 - 1000*(a));

line(linImg,pt1,pt2,Scalar(255,0,0),3,8,0);

}

这一部分用HoughLines()检测图像中可能存在的直线，并把直线参数保存在向量组lines中，而后绘制出找到的直线。
两个参数GRAY_THRESH和HOUGH_VOTE须要手动指定，不一样的图像须要设置不一样的参数，同一段文本旋转不一样的角度也须要不一样的参数。GRAY_THRESH越大，二值化的阈值就越高；HOUGH_VOTE越大，霍夫检测的投票数就越高（须要更多的共线点来肯定一条直线）。说白了，若是发现二值化图像中直线附近有不少散点，就要适当提升GRAY_THRESH；若是发现从二值图像的一条直线上检测到了几条角度相差很小的直线，就须要适当提升HOUGH_VOTE。咱们但愿获得的结果时恰好检测到三条直线（有时只能检测到一条直线，后面会给出一个例子）。
检测到的直线：

计算倾斜角
上面获得了三个角度，一个是0度，一个是90度，另外一个就是咱们所须要的倾斜角。要把这个角找出来，并且要考虑偏差。

float angel=0;

float piThresh = (float)CV_PI/90;

float pi2 = CV_PI/2;

for(int l=0; l<numLines; l++)

{

float theta = lines[l][1];

if(abs(theta) < piThresh || abs(theta-pi2) < piThresh)

continue;

else{

angel = theta;

break;

}

angel = angel<pi2 ? angel : angel-CV_PI;

if(angel != pi2){

float angelT = srcImg.rows*tan(angel)/srcImg.cols;

angel = atan(angelT);

}

float angelD = angel*180/(float)CV_PI;

因为DFT的特色，只有输入图像是正方形时，检测到的角才是文本真正旋转的角度。但咱们的输入图像不必定是正方形的，因此要根据图像的长宽比改变这个角度。
还有一个须要注意的细节，虽然HoughLines()输出的倾斜角在[0,180)之间，但在[0,90]和(90,180)之间这个角的含义是不一样的。请看图示：

当倾斜角大于90度时，(180-倾斜角)才是直线相对竖直方向的偏离角度。在OpenCV中，逆时针旋转，角度为正。要把图像转回去，这个角度就变成了(倾斜角-180)。
校订图像
最后一步，固然是把图像转回去

Mat rotMat = getRotationMatrix2D(center,angelD,1.0);

Mat dstImg = Mat::ones(srcImg.size(),CV_8UC3);

warpAffine(srcImg,dstImg,rotMat,srcImg.size(),1,0,Scalar(255,255,255));

先用getRotationMatrix2D()得到一个2*3的仿射变换矩阵，再把这个矩阵输入warpAffine()，作一个单纯旋转的仿射变换。warpAffine()的最后一个参数Scalar(255,255,255)是把因为旋转产生的空白用白色填充。
校订的结果：

一个检测单条直线的例子
原始图像：

傅里叶谱：

只有一条明显的直线。还好仅有的这条直线正是咱们所须要的。
检测直线：

校订结果：

对中文的效果
咱们来试试看这段程序对中文的校订效果。
输入图像：

傅里叶谱：

能够发现有许多条平行的亮线，其中过频域原点的那条长度最长，最容易检测出来。
检测直线：

校订结果：

虽然中文和英文在文字上有很大的不一样，但字母（或者文字）的高度比较一致，使得行与行之间的分隔很明显。因此它们的频域特征是类似的。

对其余语言文字的效果
我从IMDB.com摘取影片《教父》的英文介绍，而后用谷歌翻译成其余文字进行测试。
阿拉伯语

一枚反例
老挝语：

傅里叶谱：

一种二值化的结果：

直线检测：

这种文字的不少字母的上下方多了不少“笔画”（我不知道该怎么称呼那些小曲线），让行与行之间的分离变得不明显，使得频域特征变得不明显。
虽然用肉眼能够看出傅里叶谱中存在一条倾斜的直线，但它的亮度过低，二值化过程很难排除噪声，致使直线检测会首先检出噪声产生的直线。这也是个人程序目前受限之处。须要增长一个过滤散点噪声的步骤以增长程序的适用范围。

参考：Discrete Fourier Transform — OpenCV 2.4.7.0 documentation

代码还能够在这里下载：https://github.com/johnhany/textRotCorrect

原文：http://johnhany.net/2013/11/dft-based-text-rotation-correction/