Post Go back to editing

ADSP-21489 Chained DMA to external port (SDRAM) not working and useless.

Category: Hardware
Product Number: ADSP-21489

Hi,

I have the following code, that doesn't work.

.section/dm seg_dmda;
.var xre[8192];

.section/Data seg_pmda;
.var xim[8192];

.section/dm seg_bank4;
.var Hre[4098];
.var Him[4098];
.var Xre[4098];
.var Xim[4098];
.var Yre[4098];
.var Yim[4098];

.section/dm seg_sdram;
.var X4098_left_re[4098];
.var X4098_left_im[4098];
.var H_tail_left_re[4098];
.var H_tail_left_im[4098];
.var H_tail_right_re[4098];
.var H_tail_right_im[4098];
.var Y_tail_left_re[4098];
.var Y_tail_left_im[4098];


.section/dm seg_dmda;
.var tcb_x_left_1[6] = 0, 1, X4098_left_re, 4098, 1, Xre;
.var tcb_x_left_2[6] = 0, 1, X4098_left_im, 4098, 1, Xim;
.var tcb_y_left_1[6] = 0, 1, Y_tail_left_re, 4098, 1, Yre;
.var tcb_y_left_2[6] = 0, 1, Y_tail_left_im, 4098, 1, Yim;

#define WAIT_CHAINED_DMAC0 call WaitDmaC0Finished
/*
r2 = CHS + DMAS;\
                           r1 = dm(DMAC0);\
                           r1 = r1 and r2;\
                           if ne jump (pc, 0xfffffffe)*/

_process:

  leaf_entry;
   
  //---------------------------------------------------------------------------
  // save non-scratch register
  // scratch regsiter: r0, r1, r2, r4, r8, r12, i4, i12, i13, m4, m12, b4, b12, b13, PX, USTAT1, USTAT2
  //---------------------------------------------------------------------------
  save_reg;
  
  r0 = dm(DMAC0);
  puts = r0;

  puts = mode1;
  puts = ustat1;
  puts = ustat2;
  puts = ustat3;
  puts = ustat4;
  
  r0 = i0; puts = r0;
  r0 = i1; puts = r0;
  r0 = i2; puts = r0;
  r0 = i3; puts = r0;
  r0 = i5; puts = r0;
  r0 = i8; puts = r0;
  r0 = i9; puts = r0;
  r0 = i14; puts = r0;
  r0 = i15; puts = r0;
  
  r0 = m0; puts = r0;
  r0 = m1; puts = r0;
  r0 = m8; puts = r0;
  
  r0 = b5; puts = r0;
  
  r0 = l4; puts = r0;
  r0 = l5; puts = r0;

  bit set mode1 CBUFEN;                                              // enable cyclic buffer access
  nop;
  nop;
  
  r0 = DEN | CHEN | OFCEN; 
  dm(DMAC0) = r0;
  nop;nop;nop;nop;
  
  f8 = 0.0;
  f12 = 1.0;
  f4 = -1.0;
  i4 = Xre;
  i5 = Xim;
  m4 = 1;
  
  lcntr = 4098, do(pc,loop_1) until lce;
    dm(i4,m4) = f8;
    f0 = f8;
    f1 = f0 * f4;
    dm(i5,m4) = f1;
    f8 = f8 + f12;
loop_1:
    nop;
    
    
  r2 = (tcb_y_left_2 + 5) & 0x7FFFF;
  r2 = bclr r2 by 20;
  dm(tcb_y_left_1) = R2;
  r2 = (tcb_y_left_1 + 5) & 0x7FFFF;
  r2 = bclr r2 by 20;
  dm(CPEP0) = r2;
  
  r12 = 13;
  r8 = xim;
  r4 = xre;
 
// CALL fft function
  
  WAIT_CHAINED_DMAC0;
  
  i0 = xre;
  i1 = Xre;
  i2 = Xim;
  i8 = xim;
  m0 = 0;
  m4 = 2;
  m8 = 0;
  m12 = 2;
 
  ENABLE_PEY;
  
  lcntr = 4098 / 2, do(pc,loop_convolve_left_firsttail) until lce;
    f0 = dm(i0,m0), f1 = pm(i8,m8); // load Re{X},Im{X}
    dm(i1,m4) = f0;
    dm(i2,m4) = f1;
    
loop_convolve_left_firsttail:  
    dm(i0,m4) = f0, pm(i8,m12) = f1;
  
  DISABLE_PEY;
    
  
  // save X4098 to SDRAM
  r2 = (tcb_x_left_2 + 5) & 0x7FFFF; 
  r2 = bset r2 by 20;
  dm(tcb_x_left_1) = r2;
  r2 = (tcb_x_left_1 + 5) & 0x7FFFF;
  r2 = bset r2 by 20;
  dm(CPEP0) = r2;
  
  r12 = 13;
  r8 = xim;
  r4 = xre;  
  // CALL ifft function
    
  WAIT_CHAINED_DMAC0;
  
  
 
  // read X4098 from SDRAM
  r2 = (tcb_x_left_2 + 5) & 0x7FFFF;
  r2 = bclr r2 by 20;
  dm(tcb_x_left_1) = R2;
  r2 = (tcb_x_left_1 + 5) & 0x7FFFF;
  r2 = bclr r2 by 20;
  dm(CPEP0) = r2;

  WAIT_CHAINED_DMAC0; 



  lcntr = 2, do(pc,loop_5) until lce;
  
  //------ load delayed spectrum (left) ------  
  r2 = (tcb_y_left_2 + 5) & 0x7FFFF;
  r2 = bclr r2 by 20;
  dm(tcb_y_left_1) = R2;
  r2 = (tcb_y_left_1 + 5) & 0x7FFFF;
  r2 = bclr r2 by 20;
  dm(CPEP0) = r2;
  
  
  i0 = Hre;
  i1 = Him;
  i4 = Xre;
  i5 = Xim;
  m0 = 0;
  m4 = 2;
  f0 = 1.0;
  s0 = 1.0;
  f1 = 1.0;
  s1 = 1.0;
  
  ENABLE_PEY;
  
  lcntr = 4098 / 2, do(pc,loop_2) until lce;
    /*f0 = dm(i0,m0);*/
                  f4 = dm(i4,m4);
    f8 = f0 * f4 /*, f1 = dm(i1,m0)*/;
                  f5 = dm(i5,m4);
    f9 = f1 * f5;
                  dm(i0,m4) = f8;
loop_2:
                  dm(i1,m4) = f9;

  DISABLE_PEY;  
  
  WAIT_CHAINED_DMAC0;

  
  i0 = Hre;
  i1 = Him;
  i4 = Yre;
  i5 = Yim;
  m0 = 0;
  m4 = 2;
  
  ENABLE_PEY;
  
  lcntr = 4098 / 2, do(pc,loop_4) until lce;
                    f8 = dm(i0,m4);
                   f12 = dm(i4,m0);
    f8 = f8 + f12,  f9 = dm(i1,m4);
                   f13 = dm(i5,m0);
    f9 = f9 + f13, dm(i4,m4) = f8;
loop_4:
                   dm(i5,m4) = f9;
    
  DISABLE_PEY;
  
  //------ save delayed spectrum (left) ------
  r0 = DEN | CHEN | TRAN; //OFCEN; 
  dm(DMAC0) = r0;
  nop;nop;nop;nop;
  
  r2 = (tcb_y_left_2 + 5) & 0x7FFFF;
  r2 = bset r2 by 20;
  dm(tcb_y_left_1) = r2;
  r2 = (tcb_y_left_1 + 5) & 0x7FFFF;
  r2 = bset r2 by 20;
  dm(CPEP0) = r2;
  
  i0 = xre;
  i1 = Yre;
  m0 = 0;
  m4 = 1;
  f12 = 1.0;
  
  lcntr = 4098, do(pc,loop_3) until lce;
    f8 = dm(i0,m0);
    f8 = f8 + f12;
    //dm(i0,m4) = f8;

loop_3:
    nop;
  
  
  WAIT_CHAINED_DMAC0;
  
  nop;   

loop_5:
    nop;
  
  //------------------------------------------------------------------------------------------------------------------------------------------------------
  // restore non-scratch register
  //------------------------------------------------------------------------------------------------------------------------------------------------------ 

  l5 = gets(1);
  l4 = gets(2);

  b5 = gets(3);

  m8 = gets(4);
  m1 = gets(5);
  m0 = gets(6);

  i15 = gets(7);
  i14 = gets(8);
  
  i9 = gets(9);
  i8 = gets(10);
  i5 = gets(11);
  i3 = gets(12);
  i2 = gets(13);
  i1 = gets(14);
  i0 = gets(15);
  
  ustat4 = gets(16);
  ustat3 = gets(17);
  ustat2 = gets(18);
  ustat1 = gets(19);
  mode1 = gets(20);
    
  // address warning ea2547 by two nops
  nop;
  nop;
    
  r0 = gets(21);
  dm(DMAC0) = r0;
  


  alter(21);
  
  restore_reg;
  
  leaf_exit;
  
._process.end: rts;



WaitDmaC0Finished:
read_dmac0_status:
   
  r0 = dm(DMAC0);
  btst r0 by 20;
  if not sz jump(pc, read_dmac0_status);
  
  btst r0 by 21;
  if not sz jump(pc, read_dmac0_status);
  
  rts;

I am calling the function process() from a C++ program that writes data to xre. When I run it the first time I can see that data is written to Y_tail_left_re. But when I run process() the second time data in Y_tail_left_re is not changed. You can also see it if you measure the cycle counts of process(). The second call takes 80000 cycles less then the first call.

Calling process() third and fourth time does not change the behavior. I can see the content in Yre was changed but not on SDRAM.

Even more uncanny: If I set a breakpoint directly after the write to SDRAM in line 256 I can see that the content of the SDRAM changes and cycle counts are stable, no 80000 difference. If I set the breakpoint outside the function the behavior above is back.

What am I doing wrong here? This is really a showstopper for the 21489. I need help urgently!

Best regards,

Raphael

P.S. I am working in a EZKIT-21489, therefore I can exclude a hardware issue in our hardware design.

  • Hi,

    Please refer the Application Note EE- 286 "Interfacing SDRAM Memories to SHARC® Processors" and associated example code which is linked below.


    www.analog.com/.../EE286_rev5.pdf
    www.analog.com/.../EE286v05.zip

    Regards,
    Anand Selvaraj.

  • Hi Anand,

    Thank you for looking into this issue.

    I have seen the app note already. But in my case it is not helpful. It does not demonstrate DMA chaining. It only demonstrates DMA access itself. I have proven that writing to SDRAM via DMA does work. As soon as I do it via chained DMA it does not work anymore as described above.

    Did you try my code and does it work for you even if you run the function process multiple times?

    btw. the app note is talking about 214xx examples but they are not inside the zip archive.

    Best regards,

    Raphael

  • Hi Raphael,

    Apologies for the delay in response.

    Chained DMA sequences are a set of multiple DMA operations, each auto initializing the next in line. To start a new DMA sequence after the current one is finished, the IOP automatically loads new index, modify, and count values from an internal memory location (or external memory location for DMA to external ports) pointed to by that channel’s chain pointer register. Using chaining, programs can set up consecutive DMA operations and each operation can have different attributes.

    Also, please note that, in chained DMA operation, the processor’s DMA controller automatically sets up another DMA transfer when the contents of the current buffer has been transmitted or received. This could be controlled by assigning the two TCB’s where chain pointer at one TCB can point to the index register of the next TCB. This chain pointer act as a pointer to the next set of buffers parameters stored in external/internal memory.

    Kindly find the attached code External Port DMA_Chaining Mode for ADSP-21469. You can modify it to work in ADSP-21489 processor
    please let us know in case of any clarifications.

    Regards,
    Divya.P563649.zip

  • Thank you, but your answer was too late. We changed the project layout because I could not get it working on a 21489.