Skip to content

Commit 77c1588

Browse files
committed
Add speed_rgb2yuv_sse.cpp
1 parent 146c5d7 commit 77c1588

File tree

1 file changed

+27
-51
lines changed

1 file changed

+27
-51
lines changed

speed_rgb2yuv_sse.cpp

Lines changed: 27 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ inline unsigned char ClampToByte(int Value){
1616

1717
void RGBToYUV(unsigned char *RGB, unsigned char *Y, unsigned char *U, unsigned char *V, int Width, int Height, int Stride)
1818
{
19-
const int Shift = 15;
19+
const int Shift = 13;
2020
const int HalfV = 1 << (Shift - 1);
2121
const int Y_B_WT = 0.114f * (1 << Shift), Y_G_WT = 0.587f * (1 << Shift), Y_R_WT = (1 << Shift) - Y_B_WT - Y_G_WT;
2222
const int U_B_WT = 0.436f * (1 << Shift), U_G_WT = -0.28886f * (1 << Shift), U_R_WT = -(U_B_WT + U_G_WT);
@@ -38,7 +38,7 @@ void RGBToYUV(unsigned char *RGB, unsigned char *Y, unsigned char *U, unsigned c
3838
}
3939

4040
void RGBToYUVSSE_1(unsigned char *RGB, unsigned char *Y, unsigned char *U, unsigned char *V, int Width, int Height, int Stride) {
41-
const int Shift = 15;
41+
const int Shift = 13;
4242
const int HalfV = 1 << (Shift - 1);
4343
const int Y_B_WT = 0.114f * (1 << Shift), Y_G_WT = 0.587f * (1 << Shift), Y_R_WT = (1 << Shift) - Y_B_WT - Y_G_WT;
4444
const int U_B_WT = 0.436f * (1 << Shift), U_G_WT = -0.28886f * (1 << Shift), U_R_WT = -(U_B_WT + U_G_WT);
@@ -132,7 +132,7 @@ void RGBToYUVSSE_1(unsigned char *RGB, unsigned char *Y, unsigned char *U, unsig
132132

133133
void RGBToYUVSSE_2(unsigned char *RGB, unsigned char *Y, unsigned char *U, unsigned char *V, int Width, int Height, int Stride)
134134
{
135-
const int Shift = 15; // 这里没有绝对值大于1的系数,最大可取2^15次方的放大倍数。
135+
const int Shift = 13; // 这里没有绝对值大于1的系数,最大可取2^15次方的放大倍数。
136136
const int HalfV = 1 << (Shift - 1);
137137

138138
const int Y_B_WT = 0.114f * (1 << Shift), Y_G_WT = 0.587f * (1 << Shift), Y_R_WT = (1 << Shift) - Y_B_WT - Y_G_WT, Y_C_WT = 1;
@@ -243,7 +243,7 @@ void RGBToYUVSSE_2(unsigned char *RGB, unsigned char *Y, unsigned char *U, unsig
243243

244244
void YUVToRGB(unsigned char *Y, unsigned char *U, unsigned char *V, unsigned char *RGB, int Width, int Height, int Stride)
245245
{
246-
const int Shift = 15;
246+
const int Shift = 13;
247247
const int HalfV = 1 << (Shift - 1);
248248
const int B_Y_WT = 1 << Shift, B_U_WT = 2.03211f * (1 << Shift), B_V_WT = 0;
249249
const int G_Y_WT = 1 << Shift, G_U_WT = -0.39465f * (1 << Shift), G_V_WT = -0.58060f * (1 << Shift);
@@ -264,52 +264,28 @@ void YUVToRGB(unsigned char *Y, unsigned char *U, unsigned char *V, unsigned cha
264264
}
265265
}
266266

267-
void YUVToRGBSSE_1(unsigned char *Y, unsigned char *U, unsigned char *V, unsigned char *RGB, int Width, int Height, int Stride) {
268-
const int Shift = 13;
269-
const int HalfV = 1 << (Shift - 1);
270-
const int B_Y_WT = 1 << Shift, B_U_WT = 2.03211f * (1 << Shift), B_V_WT = 0;
271-
const int G_Y_WT = 1 << Shift, G_U_WT = -0.39465f * (1 << Shift), G_V_WT = -0.58060f * (1 << Shift);
272-
const int R_Y_WT = 1 << Shift, R_U_WT = 0, R_V_WT = 1.13983 * (1 << Shift);
273-
__m128i Zero = _mm_setzero_si128();
274-
275-
const int BlockSize = 16, Block = Width / BlockSize;
276-
for (int YY = 0; YY < Height; YY++) {
277-
unsigned char *LinePD = RGB + YY * Stride;
278-
unsigned char *LinePY = Y + YY * Width;
279-
unsigned char *LinePU = U + YY * Width;
280-
unsigned char *LinePV = V + YY * Width;
281-
for (int XX = 0; XX < Block * BlockSize; XX += BlockSize, LinePY += BlockSize, LinePU += BlockSize, LinePV += BlockSize) {
282-
__m128i Dst1, Dst2, Dst3, YV, UV, VV;
283-
YV = _mm_loadu_si128((__m128i *)(LinePY + 0));
284-
UV = _mm_loadu_si128((__m128i *)(LinePU + 0));
285-
VV = _mm_loadu_si128((__m128i *)(LinePV + 0));
286-
287-
__m128i YV16L = _mm_unpacklo_epi8(YV, Zero);
288-
__m128i YV16H = _mm_unpackhi_epi8(YV, Zero);
289-
__m128i YV32LL = _mm_unpacklo_epi16(YV16L, Zero);
290-
__m128i YV32LH = _mm_unpackhi_epi16(YV16L, Zero);
291-
__m128i YV32HL = _mm_unpacklo_epi16(YV16H, Zero);
292-
__m128i YV32HH = _mm_unpackhi_epi16(YV16H, Zero);
293-
294-
__m128i UV16L = _mm_unpacklo_epi8(UV, Zero);
295-
__m128i UV16H = _mm_unpackhi_epi8(UV, Zero);
296-
__m128i UV32LL = _mm_unpacklo_epi16(UV16L, Zero);
297-
__m128i UV32LH = _mm_unpackhi_epi16(UV16L, Zero);
298-
__m128i UV32HL = _mm_unpacklo_epi16(UV16H, Zero);
299-
__m128i UV32HH = _mm_unpackhi_epi16(UV16H, Zero);
300-
301-
__m128i VV16L = _mm_unpacklo_epi8(VV, Zero);
302-
__m128i VV16H = _mm_unpackhi_epi8(VV, Zero);
303-
__m128i VV32LL = _mm_unpacklo_epi16(VV16L, Zero);
304-
__m128i VV32LH = _mm_unpackhi_epi16(VV16L, Zero);
305-
__m128i VV32HL = _mm_unpacklo_epi16(VV16H, Zero);
306-
__m128i VV32HH = _mm_unpackhi_epi16(VV16H, Zero);
307-
308-
309-
}
310-
}
311-
}
312-
313267
int main() {
314-
268+
Mat src = imread("F:\\car.jpg");
269+
int Height = src.rows;
270+
int Width = src.cols;
271+
unsigned char *Src = src.data;
272+
unsigned char *Dest = new unsigned char[Height * Width * 3];
273+
unsigned char *Y = new unsigned char[Height * Width];
274+
unsigned char *U = new unsigned char[Height * Width];
275+
unsigned char *V = new unsigned char[Height * Width];
276+
int Stride = Width * 3;
277+
int Radius = 11;
278+
int64 st = cvGetTickCount();
279+
/*for (int i = 0; i < 10; i++) {
280+
RGBToYUV(Src, Y, U, V, Width, Height, Stride);
281+
}*/
282+
double duration = (cv::getTickCount() - st) / cv::getTickFrequency() * 100;
283+
printf("%.5f\n", duration);
284+
RGBToYUVSSE_2(Src, Y, U, V, Width, Height, Stride);
285+
YUVToRGB(Y, U, V, Dest, Width, Height, Stride);
286+
Mat dst(Height, Width, CV_8UC3, Dest);
287+
imshow("origin", src);
288+
imshow("result", dst);
289+
imwrite("F:\\res.jpg", dst);
290+
waitKey(0);
315291
}

0 commit comments

Comments
 (0)