16-bit Continuous DMA Transfer with SC589 via EPPI

Our goal is to be able to send a large chunk of data via EPPI by submitting the chunk's contents in segments as buffers. This seems straightforward, as it was mostly doable by modifying the contents of the provided "PPI driver callback mode" example project in Cross Core, but there are a few anomalies.

The below code is a sample project for testing this functionality. Ideally, this code would output 0-8191 sequentially over EPPI. In the provided output file from a logic analyzer, the results are close to what is desired but has odd jumps and missing data. You'll notice the disparities at various numbers such as 192, 448, 960, 1472, 1984, 3776, 5056, etc. You'll also see that the program is only reaching 8128 as an output before looping again. 

I'm convinced that we are missing something in terms of proper configuration for the 16-bit mode as the provided code example in Cross Core is for 8-bit. Any help would be greatly appreciated!

Code:

Basic idea: Load into a large buffer sequential data to be transmitted via EPPI (0-8191). Load the first two segments into the initial two transmission buffers and submit them. Enable EPPI to begin transmission. Then at each callback return, grab the pointer to the buffer that was just processed and load the next bits of sequential data and resubmit. Repeat until a time-out is reached or the program is interrupted. The program will loop back through the sequential data if the end is reached before the time-out.

/*****************************************************************************
 * EPPI_Testing_Core0.c
 *****************************************************************************/
#define NUM_ELEMENTS 256

#include <sys/platform.h>
#include <sys/adi_core.h>
#include "adi_initialize.h"
#include "EPPI_Testing_Core0.h"
#define BLOCK_SIZE (NUM_ELEMENTS*32)

/* EPPI driver handle */
static ADI_EPPI_HANDLE hDevice;
uint8_t EppiMemory[ADI_EPPI_MEMORY_SIZE];
static uint16_t TestBlock[BLOCK_SIZE];


void PopulateBlock(uint16_t* block);
void LoadBuffer(uint16_t* block, uint16_t* buffer, int count);
void LoadBuffer_Alt(uint16_t* buffer, int count); // Unused

void* bufferPtr; // Global pointer to the last processed EPPI buffer
volatile int counter; // Counter for segmenting the test block of data 

int main()
{
	/**
	 * Initialize managed drivers and/or services that have been added to
	 * the project.
	 * @return zero on success
	 */

	adi_initComponents();

	/**
	 * The default startup code does not include any functionality to allow
	 * core 0 to enable core 1 and core 2. A convenient way to enable
	 * core 1 and core 2 is to use the adi_core_enable function.
	 */
	adi_core_enable(ADI_CORE_SHARC0);
	adi_core_enable(ADI_CORE_SHARC1);

	/* Begin adding your custom code here */

	/* Enabling DMA */
	*pREG_SPU0_SECUREP95 = 2;     /* PPI0 */
	*pREG_SPU0_SECUREP107 = 2;    /* PPI0 Channel0 DMA 28 */
	*pREG_SPU0_SECUREP108 = 2;    /* PPI0 Channel1 DMA 29 */

	ADI_EPPI_RESULT result;

	PopulateBlock(TestBlock); // Initialize the test block (0:(NUM_ELEMENTS*32)-1)

	static uint16_t TxBuffer1[NUM_ELEMENTS]; // Transmission buffer 1
	static uint16_t TxBuffer2[NUM_ELEMENTS]; // Transmission buffer 2

	/* open the EPPI driver */
	result = adi_eppi_Open(0, ADI_EPPI_DIRECTION_TX, EppiMemory, ADI_EPPI_MEMORY_SIZE, &hDevice);
	ConfigureDevice(); // Configure the device for 16-bit transmission

	result = adi_eppi_RepetiveBufferEnable(hDevice, false); // Do NOT repeat the last buffer
	result = adi_eppi_RegisterCallback(hDevice, PpiCallback, NULL); // Register callback function

	/* submit the EPPI buffers */
	counter = 0; // Set counter to read the first NUM_ELEMENTS of test block
	LoadBuffer(TestBlock, TxBuffer1, counter); // Load first NUM_ELEMENTS of test block into TxBuffer1
	result = adi_eppi_SubmitBuffer(hDevice, TxBuffer1, NUM_ELEMENTS*2); // Unknown as to why number of elements needs to be 2x but it works
	counter++; // Increase counter to go to next NUM_ELEMENTS of test block
	LoadBuffer(TestBlock, TxBuffer2, counter); // Load second NUM_ELEMENTS of test block into TxBuffer1
	result = adi_eppi_SubmitBuffer(hDevice, TxBuffer2, NUM_ELEMENTS*2); // Unknown as to why number of elements needs to be 2x but it works
	counter++; // Increase counter to go to next NUM_ELEMENTS of test block

	result = adi_eppi_Enable(hDevice, true); // Enable EPPI

	volatile int timer = 0;
	while(timer < 100000000) // Method of timing-out the program
	{
		//printf("main loop counter: %d\n",timer);
		//printf("callback counter: %d\n",counter);
		timer++;
	}

	/* close the EPPI driver */
	result = adi_eppi_Close(hDevice);

	return 0;
}

void PpiCallback(void* pHandle, uint32_t u32Arg, void* pArg)
{
	//printf("counter: %d\n", counter);
    ADI_EPPI_HANDLE pDevice = (ADI_EPPI_HANDLE *)pHandle;
    ADI_EPPI_EVENT event = (ADI_EPPI_EVENT)u32Arg;
    uint16_t *data = (uint16_t*)pArg;
    if(counter > 31) // We've reached the end of test block. Go back to the first segment of data.
	{
		counter = 0; // Set counter to read the first NUM_ELEMENTS of test block
	}
	//printf("Buffer processed: %x\n", pArg);
	bufferPtr = (uint16_t*)pArg; // Grab the pointer of the just-processed buffer
    switch (event) {
        case ADI_EPPI_DMA_BUFFER_PROCESSED:
			LoadBuffer(TestBlock, bufferPtr, counter); // Load into the just-processed buffer the next segment of test block data
			adi_eppi_SubmitBuffer(hDevice, bufferPtr, NUM_ELEMENTS*2); // Unknown as to why number of elements needs to be 2x but it works
			counter++; // Increase counter to go to next NUM_ELEMENTS of test block
            break;
        case ADI_EPPI_HW_ERR_NONE:
        	break;
    default:
        break;
    }
}

uint32_t ConfigureDevice(void)
{
	uint32_t Result = 0u;

	if(Result == 0u)
	{
    	Result = (uint32_t)adi_eppi_SetITUMode(hDevice, ADI_EPPI_GENERAL_PURPOSE);
	}
	if(Result == 0u)
	{
    	Result = (uint32_t)adi_eppi_SetFSMode(hDevice, ADI_EPPI_FS_MODE0);
	}
	if(Result == 0u)
	{
	   	Result = (uint32_t)adi_eppi_SetClkPolarity(hDevice, ADI_EPPI_RISECLK_RISEFS);
	}
	if(Result == 0u)
	{
		Result = (uint32_t)adi_eppi_SetDataLength(hDevice, ADI_EPPI_16BIT);
	}
	if(Result == 0u)
	{
	    Result = (uint32_t)adi_eppi_SetDmaTransferSize(hDevice, ADI_EPPI_DMA_TRANSFER_16BIT);
	}
	if(Result == 0u)
	{
		Result = (uint32_t)adi_eppi_SetInternalClk(hDevice,true);
	}
	if(Result == 0u)
	{
		Result = (uint32_t)adi_eppi_SetClockDivide(hDevice, 99); // 1.25 MHz
	}
	if(Result == 0u)
	{
		adi_eppi_StreamingEnable(hDevice, false);
	}
	return Result;
}

void PopulateBlock(uint16_t* block)
{
	volatile int counter2 = 0;
	volatile int index = 0;
	for(index; index < BLOCK_SIZE; index++)
	{
		block[index] = (uint16_t)counter2;
		counter2++;
	}
}

void LoadBuffer(uint16_t* block, uint16_t* buffer, int count)
{
	volatile int buffer_index = 0;
	volatile int col = 0;
	col = count*NUM_ELEMENTS;
	int limit = col + NUM_ELEMENTS;
	for(col; col < limit; col++, buffer_index++)
	{
		buffer[buffer_index] = block[col];
	}
}

// Alternatively, populate each submitted buffer with just the value of count
void LoadBuffer_Alt(uint16_t* buffer, int count)
{
	volatile int buffer_index = 0;
	for(buffer_index; buffer_index < NUM_ELEMENTS; buffer_index++)
	{
		buffer[buffer_index] = count;
	}
}
results.txt