AnsweredAssumed Answered

How can I get the native support multiply with cumulative add?

Question asked by bookevg on Nov 10, 2011
Latest reply on Oct 25, 2012 by MikeP

I use Sharc (ADSP21489) + VDSP++5.U10.

 

I think that multiply with cumulative add is the one of main quality of digital signal processing, but I didn't find to support it in C/C++. Only using asm save the situation

May be AD add builtin function supporting multiply with cumulative add?

 

My code:

/*****************************************************************************
* MultiplyWithAdd.c
*****************************************************************************/
#include "stdfix.h"

#define fr32     fract
#define sfr32     fract
#define ufr32     unsigned fract
#define s32          int
#define u32          unsigned int
#define SPACE1     pm
#define SPACE2     dm

s32 FiltrA1st(void *pSigOut, void *pSigIn, void *pk, u32 len)
{
//===================================================================
sfr32 SPACE2     *pDataDst_Sig     = (sfr32 SPACE2     *)pSigIn;
sfr32 SPACE1     *pDataDst_Coeff     = (sfr32 SPACE1     *)pk;
sfr32               *pDataSrc_Sig     = (sfr32     *)pSigOut;
u32 i;
#ifndef     __MVS__
#pragma SIMD_for
#pragma all_aligned
#pragma vector_for
#pragma loop_count(16, 64, 2)
#endif
for (i     = 0; i < len; i++)
{
*pDataSrc_Sig++        += (*pDataDst_Coeff++)*(*pDataDst_Sig++);
}
//===================================================================
return 1;
//===================================================================
}

volatile int In[4];
volatile int k[4];
volatile int Out[4];

 

int main( void )
{
/* Begin adding your custom code here */
FiltrA1st((void *)&Out[0],(void *)&In[0],(void *)&k[0],4);
return 0;
}

 

The compiler do In asm:

s32 FiltrA1st(void *pSigOut, void *pSigIn, void *pk, u32 len)
{
[124354] i7=modify (i7,0xfffffffa);
[124357] r2=i3;
[124359] dm(0xfffffffa,i6)=r2;
[12435B] r2=i5;
[12435D] r2=mr0f, dm(0xfffffffb,i6)=r2;
[124360] r2=mr1f, dm(0xfffffffc,i6)=r2;
[124363] r2=mr2f, dm(0xfffffffd,i6)=r2;
[124366] i12=0xb2150;
[124373] dm(0xfffffffe,i6)=r2;
[124375] bit set mode1 0x200000;
//===================================================================
sfr32 SPACE2     *pDataDst_Sig     = (sfr32 SPACE2     *)pSigIn;
sfr32 SPACE1     *pDataDst_Coeff     = (sfr32 SPACE1     *)pk;
sfr32               *pDataSrc_Sig     = (sfr32     *)pSigOut;
u32 i;
#ifndef     __MVS__
#pragma SIMD_for
#pragma all_aligned
#pragma vector_for
#pragma loop_count(16, 64, 2)
#endif
for (i     = 0; i < len; i++)
{
*pDataSrc_Sig++          += (*pDataDst_Coeff++)*(*pDataDst_Sig++);
[124369] m12=0x2;
[12436B] m4=0x2;
[12436D] i3=0xb2130;
[124370] i5=0xb2170;
[124378] nop;
[124379] nop;
[12437A] r2=dm(i3,m4), r1=pm(i12,m12);
[12437C] MRF=r2*r1(ssf), i4=modify(i5,m5);
[12437F] r1=sat MRF(sf), r2=dm(i5,0x2);
[124382] lcntr=0xf, do (pc,0xf) until lce;
[124385] r2=r2+r1, r1=dm(i3,m4), r0=pm(i12,m12);
[124388] if av r2=ashift r2 by 0xffffffe1;
[12438B] if av r2=btgl r2 by 0x1f;
[12438E] MRF=r1*r0(ssf), dm(i4,0x2)=r2;
[124391] r1=sat MRF(sf), r2=dm(i5,0x2);
[124394] r2=r2+r1;
[124395] if av r2=ashift r2 by 0xffffffe1;
[124398] if av r2=btgl r2 by 0x1f;
[12439B] dm(i4,0x2)=r2;
[12439D] bit clr mode1 0x200000;
}
//===================================================================
return 1;
[1243A0] nop;
[1243A1] nop;
[1243A2] r2=dm(0xfffffffc,i6);
[1243A4] mr0f=r2, r2=dm(0xfffffffd,i6);
[1243A7] mr1f=r2, i12=dm(m7,i6);
[1243AA] r2=dm(0xfffffffe,i6);
[1243AC] mr2f=r2, r0=m6;
[1243AF] i3=dm(0xfffffffa,i6);
[1243B1] i5=dm(0xfffffffb,i6);
[1243B3] jump (m14,i12) (db);
[1243B5] rframe;
[1243B6] nop;
//===================================================================
}

 

 

I would do as:

s32 FiltrA1st(void *pSigOut, void *pSigIn, void *pk, u32 len)
{
i7=modify (i7,0xfffffffa);
r2=i3;
dm(0xfffffffa,i6)=r2;
r2=i5;
r2=mr0f, dm(0xfffffffb,i6)=r2;
r2=mr1f, dm(0xfffffffc,i6)=r2;
r2=mr2f, dm(0xfffffffd,i6)=r2;
i12=0xb2150;

i3=0xb2130;
i5=0xb2170;

i4=i5;

dm(0xfffffffe,i6)=r2;
bit set mode1 0x200000;

nop;

nop;

mrf=0, r4=dm(i5,m6);

mr1f=r4, r2=dm(i3,m4), r1=pm(i12,m12);

r8 = mrf + r2*r1(ssf), r2=dm(i3,m4), r1=pm(i12,m12);

lcntr=0xf, do cycle_end until lce;

mrf=0, r4=dm(i5,m6);

mr1f=r4, dm(i4,m6)=r8;

cycle_end:

r8 = mrf + r2*r1(ssf), r2=dm(i3,m4), r1=pm(i12,m12);

dm(i4,m6)=r8;

bit clr mode1 0x200000;

[1243A0] nop;
[1243A1] nop;
[1243A2] r2=dm(0xfffffffc,i6);
[1243A4] mr0f=r2, r2=dm(0xfffffffd,i6);
[1243A7] mr1f=r2, i12=dm(m7,i6);
[1243AA] r2=dm(0xfffffffe,i6);
[1243AC] mr2f=r2, r0=m6;
[1243AF] i3=dm(0xfffffffa,i6);
[1243B1] i5=dm(0xfffffffb,i6);
[1243B3] jump (m14,i12) (db);
[1243B5] rframe;
[1243B6] nop;

Outcomes