ARM平台YUV轉RGB888 - Nova的專欄 - CSDN博客
07-26
前段時間,公司有個嵌入式的項目,我做的其中有一項是yuv視頻到rgb888圖像的轉換。我主要是參照一位牛人的博客做的:http://blog.csdn.net/housisong/archive/2007/10/31/1859084.aspx
YUV422p To RGB888 C語言版本介面說明:
view plaincopy to clipboardprint?·········10········20········30········40········50········60········70········80········90········100·······110·······120·······130·······140·······150- /****************************************YUV422P_To_RGB24.c**************************/
- //模塊功能:將YUV422_PLANAR圖像數據轉換成RGB24格式
- typedefunsignedcharBYTE;//[0..255]
- /*
- *介面說明:
- *功能:構建查詢表,轉換模塊運行前必須進行的初始化操作
- */
- voidYUV422P_To_RGB24_init();
- /*
- *介面說明:
- *功能:將YUV422P圖像數據轉換成RGB24格式
- *參數:
- *pY:YUV422P圖像數據Y的起始指針
- *pU:YUV422P圖像數據U的起始指針
- *pV:YUV422P圖像數據V的起始指針
- *DstPic:轉換成的RGB24圖像數據的起始指針
- *width:圖像寬度
- *height:圖像高度
- *返回值:成功返回0,失敗返回-1
- *注意:DstPic所指向的緩衝區必須事先分配好,其大小應該為width*height*3
- */
- intYUV422P_To_RGB24(BYTE*pY,BYTE*pU,BYTE*pV,BYTE*DstPic,intwidth,intheight);
實現:
view plaincopy to clipboardprint?·········10········20········30········40········50········60········70········80········90········100·······110·······120·······130·······140·······150- //使用整數運算(定點數運算)來代替浮點運算
- constintcsY_coeff_16=1.164383*(1<<16);
- constintcsU_blue_16=2.017232*(1<<16);
- constintcsU_green_16=(-0.391762)*(1<<16);
- constintcsV_green_16=(-0.812968)*(1<<16);
- constintcsV_red_16=1.596027*(1<<16);
- //顏色查表
- staticBYTE_color_table[256*3];
- staticconstBYTE*color_table=&_color_table[256];
- //查表
- staticintYm_tableEx[256];
- staticintUm_blue_tableEx[256];
- staticintUm_green_tableEx[256];
- staticintVm_green_tableEx[256];
- staticintVm_red_tableEx[256];
- //顏色飽和函數
- inlinelongborder_color(longcolor){
- if(color>255)
- return255;
- elseif(color<0)
- return0;
- else
- returncolor;
- }
- //採用查找表進行計算時,必須運行的初始化函數
- voidYUV422P_To_RGB24_init(){
- inti;
- for(i=0;i<256*3;++i)
- _color_table[i]=border_color(i-256);
- for(i=0;i<256;++i){
- Ym_tableEx[i]=(csY_coeff_16*(i-16))>>16;
- Um_blue_tableEx[i]=(csU_blue_16*(i-128))>>16;
- Um_green_tableEx[i]=(csU_green_16*(i-128))>>16;
- Vm_green_tableEx[i]=(csV_green_16*(i-128))>>16;
- Vm_red_tableEx[i]=(csV_red_16*(i-128))>>16;
- }
- }
- inlinevoidYUVToRGB24_Table(BYTE*p,constBYTEY0,constBYTEY1,
- constBYTEU,constBYTEV){
- intYe0=Ym_tableEx[Y0];
- intYe1=Ym_tableEx[Y1];
- intUe_blue=Um_blue_tableEx[U];
- intUe_green=Um_green_tableEx[U];
- intVe_green=Vm_green_tableEx[V];
- intVe_red=Vm_red_tableEx[V];
- intUeVe_green=Ue_green+Ve_green;
- *p=color_table[(Ye0+Ve_red)];
- *(p+1)=color_table[(Ye0+UeVe_green)];
- *(p+2)=color_table[(Ye0+Ue_blue)];
- *(p+3)=color_table[(Ye1+Ve_red)];
- *(p+4)=color_table[(Ye1+UeVe_green)];
- *(p+5)=color_table[(Ye1+Ue_blue)];
- }
- intYUV420P_To_RGB24(BYTE*pY,BYTE*pU,BYTE*pV,BYTE*DstPic,intwidth,
- intheight){
- inty,x,x_uv;
- BYTE*pDstLine=DstPic;
- if((width%2)!=0||(height%2)!=0)
- return(-1);
- for(y=0;y<height;++y){
- //DECODE_PlanarYUV211_Common_line(pDstLine,pY,pU,pV,width);
- for(x=0;x<width;x+=2){
- x_uv=x>>1;
- YUVToRGB24_Table(&pDstLine[x*3],pY[x],pY[x+1],pU[x_uv],
- pV[x_uv]);
- }
- pDstLine+=width*3;//RGB888
- pY+=width;//YUV422
- if(y%2==1){
- pU+=width/2;
- pV+=width/2;
- }
- }
- return0;
- }
經測試發現,在hi3512(arm 926ej-s,267MHz)平台上運行時,該yuv轉rgb模塊的速度不是很快,大概20幀/秒。為了提高效率,核心解碼模塊我採用了arm彙編,重寫了YUVToRGB24_Table模塊。
YUV420P_To_RGB24_asm.c代碼:
view plaincopy to clipboardprint?·········10········20········30········40········50········60········70········80········90········100·······110·······120·······130·······140·······150- externintYUVToRGB24_Assemble(unsignedchar*pDstLine,unsignedchar**yuv,intwidth);
- //使用整數運算(定點數運算)來代替浮點運算
- constintcsY_coeff_16=1.164383*(1<<16);
- constintcsU_blue_16=2.017232*(1<<16);
- constintcsU_green_16=(-0.391762)*(1<<16);
- constintcsV_green_16=(-0.812968)*(1<<16);
- constintcsV_red_16=1.596027*(1<<16);
- //查表
- intYm_tableEx[256];
- intUm_blue_tableEx[256];
- intUm_green_tableEx[256];
- intVm_green_tableEx[256];
- intVm_red_tableEx[256];
- //採用查找表進行計算時,必須運行的初始化函數
- voidYUV422P_To_RGB24_init()
- {
- inti;
- for(i=0;i<256;++i)
- {
- Ym_tableEx[i]=(csY_coeff_16*(i-16))>>16;
- Um_blue_tableEx[i]=(csU_blue_16*(i-128))>>16;
- Um_green_tableEx[i]=(csU_green_16*(i-128))>>16;
- Vm_green_tableEx[i]=(csV_green_16*(i-128))>>16;
- Vm_red_tableEx[i]=(csV_red_16*(i-128))>>16;
- }
- }
- intYUV420P_To_RGB24(BYTE*pY,BYTE*pU,BYTE*pV,BYTE*DstPic,intwidth,intheight)
- {
- inty;
- BYTE*pDstLine=DstPic;
- BYTE*yuv[3];
- if((width%8)!=0)
- return(-1);
- yuv[0]=pY;
- yuv[1]=pU;
- yuv[2]=pV;
- for(y=height;y>0;--y)
- {
- YUVToRGB24_Assemble(pDstLine,yuv,width);//decoderalinewithasmfunctioninYUVToRGB24_Assemble.s
- pDstLine+=width*3;//RGB888
- yuv[0]+=width;//YUV422
- if(y%2==1){
- yuv[1]+=width>>1;
- yuv[2]+=width>>1;
- }
- }
- return0;
- }
arm彙編核心解碼模塊:
view plaincopy to clipboardprint?- .text
- .macroloadua
- adrr1,UM_BLUE
- ldrr1,[r1]
- ldrr9,[r1,a,lsl#2]
- adrr1,UM_GREEN
- ldrr1,[r1]
- ldrr10,[r1,a,lsl#2]
- .endm
- .macroloadva
- adrr1,VM_RED
- ldrr1,[r1]
- ldrr11,[r1,a,lsl#2]
- adrr1,VM_GREEN
- ldrr1,[r1]
- ldrr12,[r1,a,lsl#2]
- .endm
- .macrobound_r0
- cmpr0,#0x00
- movltr0,#0x00
- cmpr0,#255
- movgtr0,#255
- .endm
- .globlYUVToRGB24_Assemble
- @r0=pDstLine;r1=yuv;r2=width
- YUVToRGB24_Assemble:
- stmdbsp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
- ldmiar1,{r1,r3,r4}@r1=y;r3=u;r4=v;
- movr5,#0@r5=rowcounter
- hloop:
- ldrr6,[r1],#0x04@loady;4bytes
- ldrhr7,[r3],#0x02@loadu;2bytes
- ldrhr8,[r4],#0x02@loadv;2bytes
- stmdbsp!,{r0,r1,r2,r3,r4,r5}
- @tempregister:r0,r1rgbdata:r2,r3,r4
- @ye:r5ue_blue:r9ue_green:r10ve_red:r11ve_green:r12
- movr2,#0
- movr3,#0
- movr4,#0
- @loadue_bule0,ue_green0
- movr0,r7
- andr0,r0,#0xFF
- loadur0
- @loadve_red0,ve_green0
- movr0,r8
- andr0,r0,#0xFF
- loadvr0
- @loadye0
- movr0,r6
- andr0,r0,#0xFF
- adrr1,YM
- ldrr1,[r1]
- ldrr5,[r1,r0,lsl#2]
- @r0=ye0+ve_red0
- addr0,r5,r11
- bound_r0
- orrr2,r2,r0
- @g0=ye0+ue_green0+ve_green0
- addsr0,r10,r12
- adcr0,r0,r5
- bound_r0
- orrr2,r2,r0,lsl#8
- @b0=ye0+ue_blue0
- addr0,r5,r9
- bound_r0
- orrr2,r2,r0,lsl#16
- @loadye1
- movr0,r6,lsr#8
- andr0,r0,#0xFF
- ldrr5,[r1,r0,lsl#2]
- @r1=ye1+ve_red0
- addr0,r5,r11
- bound_r0
- orrr2,r2,r0,lsl#24
- @g1=ye1+ue_green0+ve_green0
- addsr0,r10,r12
- adcr0,r0,r5
- bound_r0
- orrr3,r3,r0
- @b1=ye1+ue_blue0
- addr0,r5,r9
- bound_r0
- orrr3,r3,r0,lsl#8
- @loadue_bule1,ue_green1
- movr0,r7,lsr#8
- andr0,r0,#0xFF
- loadur0
- @loadve_red1,ve_green1
- movr0,r8,lsr#8
- andr0,r0,#0xFF
- loadvr0
- @loadye2
- movr0,r6,lsr#16
- andr0,r0,#0xFF
- adrr1,YM
- ldrr1,[r1]
- ldrr5,[r1,r0,lsl#2]
- @r2=ye2+ve_red1
- addr0,r5,r11
- bound_r0
- orrr3,r3,r0,lsl#16
- @g2=ye2+ue_green1+ve_green1
- addsr0,r10,r12
- adcr0,r0,r5
- bound_r0
- addr3,r3,r0,lsl#24
- @b2=ye2+ue_blue1
- addr0,r5,r9
- bound_r0
- orrr4,r4,r0
- @loadye3
- movr0,r6,lsr#24
- andr0,r0,#0xFF
- ldrr5,[r1,r0,lsl#2]
- @r3=ye3+ve_red1
- addr0,r5,r11
- bound_r0
- orrr4,r4,r0,lsl#8
- @g3=ye3+ue_green1+ve_green1
- addsr0,r10,r12
- adcr0,r0,r5
- bound_r0
- orrr4,r4,r0,lsl#16
- @b3=ye3+ue_blue1
- addr0,r5,r9
- bound_r0
- orrr4,r4,r0,lsl#24
- movr10,r2
- movr11,r3
- movr12,r4
- ldmiasp!,{r0,r1,r2,r3,r4,r5}
- stmiar0!,{r10,r11,r12}
- addr5,r5,#4
- cmpr5,r2
- blohloop
- ldmiasp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}@exit
- @tables
- YM:.longYm_tableEx
- UM_BLUE:.longUm_blue_tableEx
- UM_GREEN:.longUm_green_tableEx
- VM_GREEN:.longVm_green_tableEx
- VM_RED:.longVm_red_tableEx
將核心模塊改成彙編後,解碼達到了50幀/秒,效率提高了60%,彙編果然強大,哈哈。
推薦閱讀:
※ASP.NET的網頁代碼模型及生命周期 - lillllllll的專欄 - CSDN博客
※項目拖期怎麼辦 - 項目管理 - CSDN技術中心
※在國家禁止ico的大環境下,為什麼csdn敢大肆發展區塊鏈?
※計算機科學專業必讀的44冊經典著作 - guangshi007的專欄- 博客頻道 - CSDN.NET