ARM平台YUV轉RGB888 - Nova的專欄 - CSDN博客

前段時間,公司有個嵌入式的項目,我做的其中有一項是yuv視頻到rgb888圖像的轉換。我主要是參照一位牛人的博客做的:http://blog.csdn.net/housisong/archive/2007/10/31/1859084.aspx

YUV422p To RGB888 C語言版本介面說明:

view plaincopy to clipboardprint?·········10········20········30········40········50········60········70········80········90········100·······110·······120·······130·······140·······150

  1. /****************************************YUV422P_To_RGB24.c**************************/
  2. //模塊功能:將YUV422_PLANAR圖像數據轉換成RGB24格式
  3. typedefunsignedcharBYTE;//[0..255]
  4. /*
  5. *介面說明:
  6. *功能:構建查詢表,轉換模塊運行前必須進行的初始化操作
  7. */
  8. voidYUV422P_To_RGB24_init();
  9. /*
  10. *介面說明:
  11. *功能:將YUV422P圖像數據轉換成RGB24格式
  12. *參數:
  13. *pY:YUV422P圖像數據Y的起始指針
  14. *pU:YUV422P圖像數據U的起始指針
  15. *pV:YUV422P圖像數據V的起始指針
  16. *DstPic:轉換成的RGB24圖像數據的起始指針
  17. *width:圖像寬度
  18. *height:圖像高度
  19. *返回值:成功返回0,失敗返回-1
  20. *注意:DstPic所指向的緩衝區必須事先分配好,其大小應該為width*height*3
  21. */
  22. intYUV422P_To_RGB24(BYTE*pY,BYTE*pU,BYTE*pV,BYTE*DstPic,intwidth,intheight);

實現:

view plaincopy to clipboardprint?·········10········20········30········40········50········60········70········80········90········100·······110·······120·······130·······140·······150

  1. //使用整數運算(定點數運算)來代替浮點運算
  2. constintcsY_coeff_16=1.164383*(1<<16);
  3. constintcsU_blue_16=2.017232*(1<<16);
  4. constintcsU_green_16=(-0.391762)*(1<<16);
  5. constintcsV_green_16=(-0.812968)*(1<<16);
  6. constintcsV_red_16=1.596027*(1<<16);
  7. //顏色查表
  8. staticBYTE_color_table[256*3];
  9. staticconstBYTE*color_table=&_color_table[256];
  10. //查表
  11. staticintYm_tableEx[256];
  12. staticintUm_blue_tableEx[256];
  13. staticintUm_green_tableEx[256];
  14. staticintVm_green_tableEx[256];
  15. staticintVm_red_tableEx[256];
  16. //顏色飽和函數
  17. inlinelongborder_color(longcolor){
  18. if(color>255)
  19. return255;
  20. elseif(color<0)
  21. return0;
  22. else
  23. returncolor;
  24. }
  25. //採用查找表進行計算時,必須運行的初始化函數
  26. voidYUV422P_To_RGB24_init(){
  27. inti;
  28. for(i=0;i<256*3;++i)
  29. _color_table[i]=border_color(i-256);
  30. for(i=0;i<256;++i){
  31. Ym_tableEx[i]=(csY_coeff_16*(i-16))>>16;
  32. Um_blue_tableEx[i]=(csU_blue_16*(i-128))>>16;
  33. Um_green_tableEx[i]=(csU_green_16*(i-128))>>16;
  34. Vm_green_tableEx[i]=(csV_green_16*(i-128))>>16;
  35. Vm_red_tableEx[i]=(csV_red_16*(i-128))>>16;
  36. }
  37. }
  38. inlinevoidYUVToRGB24_Table(BYTE*p,constBYTEY0,constBYTEY1,
  39. constBYTEU,constBYTEV){
  40. intYe0=Ym_tableEx[Y0];
  41. intYe1=Ym_tableEx[Y1];
  42. intUe_blue=Um_blue_tableEx[U];
  43. intUe_green=Um_green_tableEx[U];
  44. intVe_green=Vm_green_tableEx[V];
  45. intVe_red=Vm_red_tableEx[V];
  46. intUeVe_green=Ue_green+Ve_green;
  47. *p=color_table[(Ye0+Ve_red)];
  48. *(p+1)=color_table[(Ye0+UeVe_green)];
  49. *(p+2)=color_table[(Ye0+Ue_blue)];
  50. *(p+3)=color_table[(Ye1+Ve_red)];
  51. *(p+4)=color_table[(Ye1+UeVe_green)];
  52. *(p+5)=color_table[(Ye1+Ue_blue)];
  53. }
  54. intYUV420P_To_RGB24(BYTE*pY,BYTE*pU,BYTE*pV,BYTE*DstPic,intwidth,
  55. intheight){
  56. inty,x,x_uv;
  57. BYTE*pDstLine=DstPic;
  58. if((width%2)!=0||(height%2)!=0)
  59. return(-1);
  60. for(y=0;y<height;++y){
  61. //DECODE_PlanarYUV211_Common_line(pDstLine,pY,pU,pV,width);
  62. for(x=0;x<width;x+=2){
  63. x_uv=x>>1;
  64. YUVToRGB24_Table(&pDstLine[x*3],pY[x],pY[x+1],pU[x_uv],
  65. pV[x_uv]);
  66. }
  67. pDstLine+=width*3;//RGB888
  68. pY+=width;//YUV422
  69. if(y%2==1){
  70. pU+=width/2;
  71. pV+=width/2;
  72. }
  73. }
  74. return0;
  75. }

經測試發現,在hi3512(arm 926ej-s,267MHz)平台上運行時,該yuv轉rgb模塊的速度不是很快,大概20幀/秒。為了提高效率,核心解碼模塊我採用了arm彙編,重寫了YUVToRGB24_Table模塊。

YUV420P_To_RGB24_asm.c代碼:

view plaincopy to clipboardprint?·········10········20········30········40········50········60········70········80········90········100·······110·······120·······130·······140·······150

  1. externintYUVToRGB24_Assemble(unsignedchar*pDstLine,unsignedchar**yuv,intwidth);
  2. //使用整數運算(定點數運算)來代替浮點運算
  3. constintcsY_coeff_16=1.164383*(1<<16);
  4. constintcsU_blue_16=2.017232*(1<<16);
  5. constintcsU_green_16=(-0.391762)*(1<<16);
  6. constintcsV_green_16=(-0.812968)*(1<<16);
  7. constintcsV_red_16=1.596027*(1<<16);
  8. //查表
  9. intYm_tableEx[256];
  10. intUm_blue_tableEx[256];
  11. intUm_green_tableEx[256];
  12. intVm_green_tableEx[256];
  13. intVm_red_tableEx[256];
  14. //採用查找表進行計算時,必須運行的初始化函數
  15. voidYUV422P_To_RGB24_init()
  16. {
  17. inti;
  18. for(i=0;i<256;++i)
  19. {
  20. Ym_tableEx[i]=(csY_coeff_16*(i-16))>>16;
  21. Um_blue_tableEx[i]=(csU_blue_16*(i-128))>>16;
  22. Um_green_tableEx[i]=(csU_green_16*(i-128))>>16;
  23. Vm_green_tableEx[i]=(csV_green_16*(i-128))>>16;
  24. Vm_red_tableEx[i]=(csV_red_16*(i-128))>>16;
  25. }
  26. }
  27. intYUV420P_To_RGB24(BYTE*pY,BYTE*pU,BYTE*pV,BYTE*DstPic,intwidth,intheight)
  28. {
  29. inty;
  30. BYTE*pDstLine=DstPic;
  31. BYTE*yuv[3];
  32. if((width%8)!=0)
  33. return(-1);
  34. yuv[0]=pY;
  35. yuv[1]=pU;
  36. yuv[2]=pV;
  37. for(y=height;y>0;--y)
  38. {
  39. YUVToRGB24_Assemble(pDstLine,yuv,width);//decoderalinewithasmfunctioninYUVToRGB24_Assemble.s
  40. pDstLine+=width*3;//RGB888
  41. yuv[0]+=width;//YUV422
  42. if(y%2==1){
  43. yuv[1]+=width>>1;
  44. yuv[2]+=width>>1;
  45. }
  46. }
  47. return0;
  48. }

arm彙編核心解碼模塊:

view plaincopy to clipboardprint?

  1. .text
  2. .macroloadua
  3. adrr1,UM_BLUE
  4. ldrr1,[r1]
  5. ldrr9,[r1,a,lsl#2]
  6. adrr1,UM_GREEN
  7. ldrr1,[r1]
  8. ldrr10,[r1,a,lsl#2]
  9. .endm
  10. .macroloadva
  11. adrr1,VM_RED
  12. ldrr1,[r1]
  13. ldrr11,[r1,a,lsl#2]
  14. adrr1,VM_GREEN
  15. ldrr1,[r1]
  16. ldrr12,[r1,a,lsl#2]
  17. .endm
  18. .macrobound_r0
  19. cmpr0,#0x00
  20. movltr0,#0x00
  21. cmpr0,#255
  22. movgtr0,#255
  23. .endm
  24. .globlYUVToRGB24_Assemble
  25. @r0=pDstLine;r1=yuv;r2=width
  26. YUVToRGB24_Assemble:
  27. stmdbsp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
  28. ldmiar1,{r1,r3,r4}@r1=y;r3=u;r4=v;
  29. movr5,#0@r5=rowcounter
  30. hloop:
  31. ldrr6,[r1],#0x04@loady;4bytes
  32. ldrhr7,[r3],#0x02@loadu;2bytes
  33. ldrhr8,[r4],#0x02@loadv;2bytes
  34. stmdbsp!,{r0,r1,r2,r3,r4,r5}
  35. @tempregister:r0,r1rgbdata:r2,r3,r4
  36. @ye:r5ue_blue:r9ue_green:r10ve_red:r11ve_green:r12
  37. movr2,#0
  38. movr3,#0
  39. movr4,#0
  40. @loadue_bule0,ue_green0
  41. movr0,r7
  42. andr0,r0,#0xFF
  43. loadur0
  44. @loadve_red0,ve_green0
  45. movr0,r8
  46. andr0,r0,#0xFF
  47. loadvr0
  48. @loadye0
  49. movr0,r6
  50. andr0,r0,#0xFF
  51. adrr1,YM
  52. ldrr1,[r1]
  53. ldrr5,[r1,r0,lsl#2]
  54. @r0=ye0+ve_red0
  55. addr0,r5,r11
  56. bound_r0
  57. orrr2,r2,r0
  58. @g0=ye0+ue_green0+ve_green0
  59. addsr0,r10,r12
  60. adcr0,r0,r5
  61. bound_r0
  62. orrr2,r2,r0,lsl#8
  63. @b0=ye0+ue_blue0
  64. addr0,r5,r9
  65. bound_r0
  66. orrr2,r2,r0,lsl#16
  67. @loadye1
  68. movr0,r6,lsr#8
  69. andr0,r0,#0xFF
  70. ldrr5,[r1,r0,lsl#2]
  71. @r1=ye1+ve_red0
  72. addr0,r5,r11
  73. bound_r0
  74. orrr2,r2,r0,lsl#24
  75. @g1=ye1+ue_green0+ve_green0
  76. addsr0,r10,r12
  77. adcr0,r0,r5
  78. bound_r0
  79. orrr3,r3,r0
  80. @b1=ye1+ue_blue0
  81. addr0,r5,r9
  82. bound_r0
  83. orrr3,r3,r0,lsl#8
  84. @loadue_bule1,ue_green1
  85. movr0,r7,lsr#8
  86. andr0,r0,#0xFF
  87. loadur0
  88. @loadve_red1,ve_green1
  89. movr0,r8,lsr#8
  90. andr0,r0,#0xFF
  91. loadvr0
  92. @loadye2
  93. movr0,r6,lsr#16
  94. andr0,r0,#0xFF
  95. adrr1,YM
  96. ldrr1,[r1]
  97. ldrr5,[r1,r0,lsl#2]
  98. @r2=ye2+ve_red1
  99. addr0,r5,r11
  100. bound_r0
  101. orrr3,r3,r0,lsl#16
  102. @g2=ye2+ue_green1+ve_green1
  103. addsr0,r10,r12
  104. adcr0,r0,r5
  105. bound_r0
  106. addr3,r3,r0,lsl#24
  107. @b2=ye2+ue_blue1
  108. addr0,r5,r9
  109. bound_r0
  110. orrr4,r4,r0
  111. @loadye3
  112. movr0,r6,lsr#24
  113. andr0,r0,#0xFF
  114. ldrr5,[r1,r0,lsl#2]
  115. @r3=ye3+ve_red1
  116. addr0,r5,r11
  117. bound_r0
  118. orrr4,r4,r0,lsl#8
  119. @g3=ye3+ue_green1+ve_green1
  120. addsr0,r10,r12
  121. adcr0,r0,r5
  122. bound_r0
  123. orrr4,r4,r0,lsl#16
  124. @b3=ye3+ue_blue1
  125. addr0,r5,r9
  126. bound_r0
  127. orrr4,r4,r0,lsl#24
  128. movr10,r2
  129. movr11,r3
  130. movr12,r4
  131. ldmiasp!,{r0,r1,r2,r3,r4,r5}
  132. stmiar0!,{r10,r11,r12}
  133. addr5,r5,#4
  134. cmpr5,r2
  135. blohloop
  136. ldmiasp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}@exit
  137. @tables
  138. YM:.longYm_tableEx
  139. UM_BLUE:.longUm_blue_tableEx
  140. UM_GREEN:.longUm_green_tableEx
  141. VM_GREEN:.longVm_green_tableEx
  142. VM_RED:.longVm_red_tableEx

將核心模塊改成彙編後,解碼達到了50幀/秒,效率提高了60%,彙編果然強大,哈哈。

推薦閱讀:

ASP.NET的網頁代碼模型及生命周期 - lillllllll的專欄 - CSDN博客
項目拖期怎麼辦 - 項目管理 - CSDN技術中心
在國家禁止ico的大環境下,為什麼csdn敢大肆發展區塊鏈?
計算機科學專業必讀的44冊經典著作 - guangshi007的專欄- 博客頻道 - CSDN.NET

TAG:博客 | 平台 | CSDN | 專欄 |