@@ -16,7 +16,7 @@ inline unsigned char ClampToByte(int Value){
1616
1717void RGBToYUV (unsigned char *RGB, unsigned char *Y, unsigned char *U, unsigned char *V, int Width, int Height, int Stride)
1818{
19- const int Shift = 15 ;
19+ const int Shift = 13 ;
2020 const int HalfV = 1 << (Shift - 1 );
2121 const int Y_B_WT = 0 .114f * (1 << Shift), Y_G_WT = 0 .587f * (1 << Shift), Y_R_WT = (1 << Shift) - Y_B_WT - Y_G_WT;
2222 const int U_B_WT = 0 .436f * (1 << Shift), U_G_WT = -0 .28886f * (1 << Shift), U_R_WT = -(U_B_WT + U_G_WT);
@@ -38,7 +38,7 @@ void RGBToYUV(unsigned char *RGB, unsigned char *Y, unsigned char *U, unsigned c
3838}
3939
4040void RGBToYUVSSE_1 (unsigned char *RGB, unsigned char *Y, unsigned char *U, unsigned char *V, int Width, int Height, int Stride) {
41- const int Shift = 15 ;
41+ const int Shift = 13 ;
4242 const int HalfV = 1 << (Shift - 1 );
4343 const int Y_B_WT = 0 .114f * (1 << Shift), Y_G_WT = 0 .587f * (1 << Shift), Y_R_WT = (1 << Shift) - Y_B_WT - Y_G_WT;
4444 const int U_B_WT = 0 .436f * (1 << Shift), U_G_WT = -0 .28886f * (1 << Shift), U_R_WT = -(U_B_WT + U_G_WT);
@@ -132,7 +132,7 @@ void RGBToYUVSSE_1(unsigned char *RGB, unsigned char *Y, unsigned char *U, unsig
132132
133133void RGBToYUVSSE_2 (unsigned char *RGB, unsigned char *Y, unsigned char *U, unsigned char *V, int Width, int Height, int Stride)
134134{
135- const int Shift = 15 ; // 这里没有绝对值大于1的系数,最大可取2^15次方的放大倍数。
135+ const int Shift = 13 ; // 这里没有绝对值大于1的系数,最大可取2^15次方的放大倍数。
136136 const int HalfV = 1 << (Shift - 1 );
137137
138138 const int Y_B_WT = 0 .114f * (1 << Shift), Y_G_WT = 0 .587f * (1 << Shift), Y_R_WT = (1 << Shift) - Y_B_WT - Y_G_WT, Y_C_WT = 1 ;
@@ -243,7 +243,7 @@ void RGBToYUVSSE_2(unsigned char *RGB, unsigned char *Y, unsigned char *U, unsig
243243
244244void YUVToRGB (unsigned char *Y, unsigned char *U, unsigned char *V, unsigned char *RGB, int Width, int Height, int Stride)
245245{
246- const int Shift = 15 ;
246+ const int Shift = 13 ;
247247 const int HalfV = 1 << (Shift - 1 );
248248 const int B_Y_WT = 1 << Shift, B_U_WT = 2 .03211f * (1 << Shift), B_V_WT = 0 ;
249249 const int G_Y_WT = 1 << Shift, G_U_WT = -0 .39465f * (1 << Shift), G_V_WT = -0 .58060f * (1 << Shift);
@@ -264,52 +264,28 @@ void YUVToRGB(unsigned char *Y, unsigned char *U, unsigned char *V, unsigned cha
264264 }
265265}
266266
267- void YUVToRGBSSE_1 (unsigned char *Y, unsigned char *U, unsigned char *V, unsigned char *RGB, int Width, int Height, int Stride) {
268- const int Shift = 13 ;
269- const int HalfV = 1 << (Shift - 1 );
270- const int B_Y_WT = 1 << Shift, B_U_WT = 2 .03211f * (1 << Shift), B_V_WT = 0 ;
271- const int G_Y_WT = 1 << Shift, G_U_WT = -0 .39465f * (1 << Shift), G_V_WT = -0 .58060f * (1 << Shift);
272- const int R_Y_WT = 1 << Shift, R_U_WT = 0 , R_V_WT = 1.13983 * (1 << Shift);
273- __m128i Zero = _mm_setzero_si128 ();
274-
275- const int BlockSize = 16 , Block = Width / BlockSize;
276- for (int YY = 0 ; YY < Height; YY++) {
277- unsigned char *LinePD = RGB + YY * Stride;
278- unsigned char *LinePY = Y + YY * Width;
279- unsigned char *LinePU = U + YY * Width;
280- unsigned char *LinePV = V + YY * Width;
281- for (int XX = 0 ; XX < Block * BlockSize; XX += BlockSize, LinePY += BlockSize, LinePU += BlockSize, LinePV += BlockSize) {
282- __m128i Dst1, Dst2, Dst3, YV, UV, VV;
283- YV = _mm_loadu_si128 ((__m128i *)(LinePY + 0 ));
284- UV = _mm_loadu_si128 ((__m128i *)(LinePU + 0 ));
285- VV = _mm_loadu_si128 ((__m128i *)(LinePV + 0 ));
286-
287- __m128i YV16L = _mm_unpacklo_epi8 (YV, Zero);
288- __m128i YV16H = _mm_unpackhi_epi8 (YV, Zero);
289- __m128i YV32LL = _mm_unpacklo_epi16 (YV16L, Zero);
290- __m128i YV32LH = _mm_unpackhi_epi16 (YV16L, Zero);
291- __m128i YV32HL = _mm_unpacklo_epi16 (YV16H, Zero);
292- __m128i YV32HH = _mm_unpackhi_epi16 (YV16H, Zero);
293-
294- __m128i UV16L = _mm_unpacklo_epi8 (UV, Zero);
295- __m128i UV16H = _mm_unpackhi_epi8 (UV, Zero);
296- __m128i UV32LL = _mm_unpacklo_epi16 (UV16L, Zero);
297- __m128i UV32LH = _mm_unpackhi_epi16 (UV16L, Zero);
298- __m128i UV32HL = _mm_unpacklo_epi16 (UV16H, Zero);
299- __m128i UV32HH = _mm_unpackhi_epi16 (UV16H, Zero);
300-
301- __m128i VV16L = _mm_unpacklo_epi8 (VV, Zero);
302- __m128i VV16H = _mm_unpackhi_epi8 (VV, Zero);
303- __m128i VV32LL = _mm_unpacklo_epi16 (VV16L, Zero);
304- __m128i VV32LH = _mm_unpackhi_epi16 (VV16L, Zero);
305- __m128i VV32HL = _mm_unpacklo_epi16 (VV16H, Zero);
306- __m128i VV32HH = _mm_unpackhi_epi16 (VV16H, Zero);
307-
308-
309- }
310- }
311- }
312-
313267int main () {
314-
268+ Mat src = imread (" F:\\ car.jpg" );
269+ int Height = src.rows ;
270+ int Width = src.cols ;
271+ unsigned char *Src = src.data ;
272+ unsigned char *Dest = new unsigned char [Height * Width * 3 ];
273+ unsigned char *Y = new unsigned char [Height * Width];
274+ unsigned char *U = new unsigned char [Height * Width];
275+ unsigned char *V = new unsigned char [Height * Width];
276+ int Stride = Width * 3 ;
277+ int Radius = 11 ;
278+ int64 st = cvGetTickCount ();
279+ /* for (int i = 0; i < 10; i++) {
280+ RGBToYUV(Src, Y, U, V, Width, Height, Stride);
281+ }*/
282+ double duration = (cv::getTickCount () - st) / cv::getTickFrequency () * 100 ;
283+ printf (" %.5f\n " , duration);
284+ RGBToYUVSSE_2 (Src, Y, U, V, Width, Height, Stride);
285+ YUVToRGB (Y, U, V, Dest, Width, Height, Stride);
286+ Mat dst (Height, Width, CV_8UC3, Dest);
287+ imshow (" origin" , src);
288+ imshow (" result" , dst);
289+ imwrite (" F:\\ res.jpg" , dst);
290+ waitKey (0 );
315291}
0 commit comments