Skip to content

Commit 6f3cb63

Browse files
committed
Add speed_multi_scale_detail_boosting_see.cpp
1 parent 12e6f25 commit 6f3cb63

File tree

1 file changed

+120
-0
lines changed

1 file changed

+120
-0
lines changed
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
#include <stdio.h>
2+
#include <opencv2/opencv.hpp>
3+
#include "../../OpencvTest/OpencvTest/Core.h"
4+
#include "../../OpencvTest/OpencvTest/MaxFilter.h"
5+
#include "../../OpencvTest/OpencvTest/Utility.h"
6+
#include "../../OpencvTest/OpencvTest/BoxFilter.h"
7+
using namespace std;
8+
using namespace cv;
9+
#define __SSSE3__ 1
10+
11+
void BoxBlur_SSE(unsigned char *Src, unsigned char *Dest, int Width, int Height, int Stride, int Channel, int Radius) {
12+
TMatrix a, b;
13+
TMatrix *p1 = &a, *p2 = &b;
14+
TMatrix **p3 = &p1, **p4 = &p2;
15+
IS_CreateMatrix(Width, Height, IS_DEPTH_8U, Channel, p3);
16+
IS_CreateMatrix(Width, Height, IS_DEPTH_8U, Channel, p4);
17+
(p1)->Data = Src;
18+
(p2)->Data = Dest;
19+
BoxBlur_SSE(p1, p2, Radius, EdgeMode::Smear);
20+
}
21+
22+
int IM_Sign(int X) {
23+
return (X >> 31) | (unsigned(-X)) >> 31;
24+
}
25+
26+
inline unsigned char IM_ClampToByte(int Value)
27+
{
28+
if (Value < 0)
29+
return 0;
30+
else if (Value > 255)
31+
return 255;
32+
else
33+
return (unsigned char)Value;
34+
//return ((Value | ((signed int)(255 - Value) >> 31)) & ~((signed int)Value >> 31));
35+
}
36+
37+
38+
inline __m128i _mm_sgn_epi16(__m128i v) {
39+
#ifdef __SSSE3__
40+
v = _mm_sign_epi16(_mm_set1_epi16(1), v); // use PSIGNW on SSSE3 and later
41+
#else
42+
v = _mm_min_epi16(v, _mm_set1_epi16(1)); // use PMINSW/PMAXSW on SSE2/SSE3.
43+
v = _mm_max_epi16(v, _mm_set1_epi16(-1));
44+
//_mm_set1_epi16(1) = _mm_srli_epi16(_mm_cmpeq_epi16(v, v), 15);
45+
//_mm_set1_epi16(-1) = _mm_cmpeq_epi16(v, v);
46+
47+
#endif
48+
return v;
49+
}
50+
51+
void MultiScaleSharpen(unsigned char *Src, unsigned char *Dest, int Width, int Height, int Stride, int Radius) {
52+
int Channel = Stride / Width;
53+
unsigned char *B1 = (unsigned char *)malloc(Height * Stride * sizeof(unsigned char));
54+
unsigned char *B2 = (unsigned char *)malloc(Height * Stride * sizeof(unsigned char));
55+
unsigned char *B3 = (unsigned char *)malloc(Height * Stride * sizeof(unsigned char));
56+
BoxBlur_SSE(Src, B1, Width, Height, Channel, Stride, Radius);
57+
BoxBlur_SSE(Src, B2, Width, Height, Channel, Stride, Radius * 2);
58+
BoxBlur_SSE(Src, B3, Width, Height, Channel, Stride, Radius * 4);
59+
for (int Y = 0; Y < Height * Stride; Y++) {
60+
int DiffB1 = Src[Y] - B1[Y];
61+
int DiffB2 = B1[Y] - B2[Y];
62+
int DiffB3 = B2[Y] - B3[Y];
63+
Dest[Y] = IM_ClampToByte(((4 - 2 * IM_Sign(DiffB1)) * DiffB1 + 2 * DiffB2 + DiffB3) / 4 + Src[Y]);
64+
}
65+
}
66+
67+
void MultiScaleSharpen_SSE(unsigned char *Src, unsigned char *Dest, int Width, int Height, int Stride, int Radius) {
68+
int Channel = Stride / Width;
69+
unsigned char *B1 = (unsigned char *)malloc(Height * Stride * sizeof(unsigned char));
70+
unsigned char *B2 = (unsigned char *)malloc(Height * Stride * sizeof(unsigned char));
71+
unsigned char *B3 = (unsigned char *)malloc(Height * Stride * sizeof(unsigned char));
72+
BoxBlur_SSE(Src, B1, Width, Height, Channel, Stride, Radius);
73+
BoxBlur_SSE(Src, B2, Width, Height, Channel, Stride, Radius * 2);
74+
BoxBlur_SSE(Src, B3, Width, Height, Channel, Stride, Radius * 4);
75+
int BlockSize = 8, Block = (Height * Stride) / BlockSize;
76+
__m128i Zero = _mm_setzero_si128();
77+
__m128i Four = _mm_set1_epi16(4);
78+
for (int Y = 0; Y < Block * BlockSize; Y += BlockSize) {
79+
__m128i SrcV = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(Src + Y)), Zero);
80+
__m128i SrcB1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(B1 + Y)), Zero);
81+
__m128i SrcB2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(B2 + Y)), Zero);
82+
__m128i SrcB3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(B3 + Y)), Zero);
83+
__m128i DiffB1 = _mm_sub_epi16(SrcV, SrcB1);
84+
__m128i DiffB2 = _mm_sub_epi16(SrcB1, SrcB2);
85+
__m128i DiffB3 = _mm_sub_epi16(SrcB2, SrcB3);
86+
//__m128i Offset = _mm_srai_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(_mm_sub_epi16(Four, _mm_slli_epi16(_mm_sgn_epi16(DiffB1), 1)), DiffB1), _mm_slli_epi16(DiffB2, 1)), DiffB3), 2);
87+
__m128i Offset = _mm_add_epi16(_mm_srai_epi16(_mm_sub_epi16(_mm_slli_epi16(_mm_sub_epi16(SrcB1, _mm_sign_epi16(DiffB1, DiffB1)), 1), _mm_add_epi16(SrcB2, SrcB3)), 2), DiffB1);
88+
_mm_storel_epi64((__m128i *)(Dest + Y), _mm_packus_epi16(_mm_add_epi16(SrcV, Offset), Zero));
89+
}
90+
for (int Y = Block * BlockSize; Y < Height * Stride; Y++) {
91+
int DiffB1 = Src[Y] - B1[Y];
92+
int DiffB2 = B1[Y] - B2[Y];
93+
int DiffB3 = B2[Y] - B3[Y];
94+
Dest[Y] = IM_ClampToByte(((4 - 2 * IM_Sign(DiffB1)) * DiffB1 + 2 * DiffB2 + DiffB3) / 4 + Src[Y]);
95+
}
96+
}
97+
98+
int main() {
99+
Mat src = imread("F:\\car.jpg");
100+
int Height = src.rows;
101+
int Width = src.cols;
102+
unsigned char *Src = src.data;
103+
unsigned char *Dest = new unsigned char[Height * Width * 3];
104+
int Stride = Width * 3;
105+
int Radius = 5;
106+
int64 st = cvGetTickCount();
107+
for (int i = 0; i <10; i++) {
108+
//Mat temp = MaxFilter(src, Radius);
109+
MultiScaleSharpen_SSE(Src, Dest, Width, Height, Stride, Radius);
110+
}
111+
double duration = (cv::getTickCount() - st) / cv::getTickFrequency() * 100;
112+
printf("%.5f\n", duration);
113+
MultiScaleSharpen(Src, Dest, Width, Height, Stride, Radius);
114+
Mat dst(Height, Width, CV_8UC3, Dest);
115+
imshow("origin", src);
116+
imshow("result", dst);
117+
imwrite("F:\\res.jpg", dst);
118+
waitKey(0);
119+
return 0;
120+
}

0 commit comments

Comments
 (0)