Fix cache
This commit is contained in:
parent
c01c5a83f6
commit
4f73860ea2
|
@ -97,6 +97,8 @@ static const char *TAG = "SYSCALL";
|
|||
extern char _heap_start[];
|
||||
extern char _heap_end[];
|
||||
char *_heap_cur = &_heap_start[0];
|
||||
char *_heap_line = &_heap_start[0];
|
||||
char *_ioheap_line = &_heap_end[0]-0x40000000;
|
||||
|
||||
sys_putchar_t sys_putchar;
|
||||
sys_getchar_t sys_getchar;
|
||||
|
@ -184,6 +186,14 @@ static size_t sys_brk(size_t pos)
|
|||
res = -ENOMEM;
|
||||
} else
|
||||
{
|
||||
if((uintptr_t)pos > (uintptr_t)_heap_line)
|
||||
{
|
||||
_heap_line = (char *)(uintptr_t)pos;
|
||||
if((uintptr_t)_heap_line-0x40000000 > (uintptr_t)_ioheap_line)
|
||||
{
|
||||
LOGE(TAG, "WARNING: cache heap line > iomem heap line!\r\n");
|
||||
}
|
||||
}
|
||||
/* Adjust brk pointer. */
|
||||
_heap_cur = (char *)(uintptr_t)pos;
|
||||
/* Return current address. */
|
||||
|
|
|
@ -15,18 +15,27 @@
|
|||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "dmac.h"
|
||||
#include "fpioa.h"
|
||||
#include "plic.h"
|
||||
#include "stdlib.h"
|
||||
#include "sysctl.h"
|
||||
#include "utils.h"
|
||||
#include "iomem.h"
|
||||
|
||||
volatile dmac_t *const dmac = (dmac_t *)DMAC_BASE_ADDR;
|
||||
|
||||
typedef struct _dmac_context
|
||||
{
|
||||
dmac_channel_number_t dmac_channel;
|
||||
#if FIX_CACHE
|
||||
uint8_t *dest_buffer;
|
||||
uint8_t *src_malloc;
|
||||
uint8_t *dest_malloc;
|
||||
size_t buf_len;
|
||||
#endif
|
||||
|
||||
plic_irq_callback_t callback;
|
||||
void *ctx;
|
||||
} dmac_context_t;
|
||||
|
@ -353,6 +362,40 @@ int dmac_set_channel_param(dmac_channel_number_t channel_num,
|
|||
dmac_ch_ctl_u_t ctl;
|
||||
dmac_ch_cfg_u_t cfg_u;
|
||||
|
||||
#if FIX_CACHE
|
||||
uint8_t *src_io = (uint8_t *)src;
|
||||
uint8_t *dest_io = (uint8_t *)dest;
|
||||
if(is_memory_cache((uintptr_t)src))
|
||||
{
|
||||
if(src_inc == DMAC_ADDR_NOCHANGE)
|
||||
{
|
||||
src_io = (uint8_t *)iomem_malloc(1<<dmac_trans_width);
|
||||
memcpy(src_io, src, 1<<dmac_trans_width);
|
||||
}
|
||||
else
|
||||
{
|
||||
src_io = (uint8_t *)iomem_malloc(blockSize * (1<<dmac_trans_width));
|
||||
memcpy(src_io, src, blockSize * (1<<dmac_trans_width));
|
||||
}
|
||||
dmac_context[channel_num].src_malloc = src_io;
|
||||
}
|
||||
if(is_memory_cache((uintptr_t)dest))
|
||||
{
|
||||
if(dest_inc == DMAC_ADDR_NOCHANGE)
|
||||
{
|
||||
dest_io = (uint8_t *)iomem_malloc(1<<dmac_trans_width);
|
||||
dmac_context[channel_num].buf_len = 1<<dmac_trans_width;
|
||||
}
|
||||
else
|
||||
{
|
||||
dest_io = (uint8_t *)iomem_malloc(blockSize * (1<<dmac_trans_width));
|
||||
dmac_context[channel_num].buf_len = blockSize * (1<<dmac_trans_width);
|
||||
}
|
||||
dmac_context[channel_num].dest_malloc = dest_io;
|
||||
dmac_context[channel_num].dest_buffer = dest;
|
||||
}
|
||||
#endif
|
||||
|
||||
int mem_type_src = is_memory((uintptr_t)src), mem_type_dest = is_memory((uintptr_t)dest);
|
||||
dmac_transfer_flow_t flow_control;
|
||||
if(mem_type_src == 0 && mem_type_dest == 0)
|
||||
|
@ -381,8 +424,13 @@ int dmac_set_channel_param(dmac_channel_number_t channel_num,
|
|||
|
||||
writeq(cfg_u.data, &dmac->channel[channel_num].cfg);
|
||||
|
||||
#if FIX_CACHE
|
||||
dmac->channel[channel_num].sar = (uint64_t)src_io;
|
||||
dmac->channel[channel_num].dar = (uint64_t)dest_io;
|
||||
#else
|
||||
dmac->channel[channel_num].sar = (uint64_t)src;
|
||||
dmac->channel[channel_num].dar = (uint64_t)dest;
|
||||
#endif
|
||||
|
||||
ctl.data = readq(&dmac->channel[channel_num].ctl);
|
||||
ctl.ch_ctl.sms = DMAC_MASTER1;
|
||||
|
@ -736,6 +784,22 @@ int dmac_is_done(dmac_channel_number_t channel_num)
|
|||
void dmac_wait_done(dmac_channel_number_t channel_num)
|
||||
{
|
||||
dmac_wait_idle(channel_num);
|
||||
#if FIX_CACHE
|
||||
if(dmac_context[channel_num].dest_buffer)
|
||||
{
|
||||
memcpy(dmac_context[channel_num].dest_buffer, dmac_context[channel_num].dest_malloc, dmac_context[channel_num].buf_len);
|
||||
|
||||
iomem_free(dmac_context[channel_num].dest_malloc);
|
||||
dmac_context[channel_num].dest_malloc = NULL;
|
||||
dmac_context[channel_num].dest_buffer = NULL;
|
||||
dmac_context[channel_num].buf_len = 0;
|
||||
}
|
||||
if(dmac_context[channel_num].src_malloc)
|
||||
{
|
||||
iomem_free(dmac_context[channel_num].src_malloc);
|
||||
dmac_context[channel_num].src_malloc = NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
int dmac_is_idle(dmac_channel_number_t channel_num)
|
||||
|
@ -771,6 +835,22 @@ static int dmac_irq_callback(void *ctx)
|
|||
dmac_context_t *v_dmac_context = (dmac_context_t *)(ctx);
|
||||
dmac_channel_number_t v_dmac_channel = v_dmac_context->dmac_channel;
|
||||
dmac_channel_interrupt_clear(v_dmac_channel);
|
||||
#if FIX_CACHE
|
||||
if(v_dmac_context->dest_buffer)
|
||||
{
|
||||
memcpy(v_dmac_context->dest_buffer, v_dmac_context->dest_malloc, v_dmac_context->buf_len);
|
||||
iomem_free(v_dmac_context->dest_malloc);
|
||||
v_dmac_context->dest_malloc = NULL;
|
||||
v_dmac_context->dest_buffer = NULL;
|
||||
v_dmac_context->buf_len = 0;
|
||||
}
|
||||
if(v_dmac_context->src_malloc)
|
||||
{
|
||||
iomem_free(v_dmac_context->src_malloc);
|
||||
v_dmac_context->src_malloc = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
if(v_dmac_context->callback != NULL)
|
||||
v_dmac_context->callback(v_dmac_context->ctx);
|
||||
|
||||
|
|
|
@ -201,6 +201,11 @@ void dvp_set_image_size(uint32_t width, uint32_t height)
|
|||
|
||||
void dvp_set_ai_addr(uint32_t r_addr, uint32_t g_addr, uint32_t b_addr)
|
||||
{
|
||||
#if FIX_CACHE
|
||||
configASSERT(!is_memory_cache((uintptr_t)r_addr));
|
||||
configASSERT(!is_memory_cache((uintptr_t)g_addr));
|
||||
configASSERT(!is_memory_cache((uintptr_t)b_addr));
|
||||
#endif
|
||||
dvp->r_addr = r_addr;
|
||||
dvp->g_addr = g_addr;
|
||||
dvp->b_addr = b_addr;
|
||||
|
@ -208,6 +213,9 @@ void dvp_set_ai_addr(uint32_t r_addr, uint32_t g_addr, uint32_t b_addr)
|
|||
|
||||
void dvp_set_display_addr(uint32_t addr)
|
||||
{
|
||||
#if FIX_CACHE
|
||||
configASSERT(!is_memory_cache((uintptr_t)addr));
|
||||
#endif
|
||||
dvp->rgb_addr = addr;
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "string.h"
|
||||
#include "sysctl.h"
|
||||
#include "utils.h"
|
||||
#include "iomem.h"
|
||||
|
||||
typedef struct _i2c_slave_instance
|
||||
{
|
||||
|
@ -167,7 +168,11 @@ void i2c_send_data_dma(dmac_channel_number_t dma_channel_num, i2c_device_number_
|
|||
configASSERT(i2c_num < I2C_MAX_NUM);
|
||||
volatile i2c_t *i2c_adapter = i2c[i2c_num];
|
||||
i2c_adapter->clr_tx_abrt = i2c_adapter->clr_tx_abrt;
|
||||
#if FIX_CACHE
|
||||
uint32_t *buf = iomem_malloc(send_buf_len * sizeof(uint32_t));
|
||||
#else
|
||||
uint32_t *buf = malloc(send_buf_len * sizeof(uint32_t));
|
||||
#endif
|
||||
int i;
|
||||
for(i = 0; i < send_buf_len; i++)
|
||||
{
|
||||
|
@ -179,7 +184,11 @@ void i2c_send_data_dma(dmac_channel_number_t dma_channel_num, i2c_device_number_
|
|||
DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, send_buf_len);
|
||||
|
||||
dmac_wait_done(dma_channel_num);
|
||||
#if FIX_CACHE
|
||||
iomem_free((void *)buf);
|
||||
#else
|
||||
free((void *)buf);
|
||||
#endif
|
||||
|
||||
while((i2c_adapter->status & I2C_STATUS_ACTIVITY) || !(i2c_adapter->status & I2C_STATUS_TFE))
|
||||
{
|
||||
|
@ -233,8 +242,11 @@ void i2c_recv_data_dma(dmac_channel_number_t dma_send_channel_num, dmac_channel_
|
|||
configASSERT(i2c_num < I2C_MAX_NUM);
|
||||
|
||||
volatile i2c_t *i2c_adapter = i2c[i2c_num];
|
||||
|
||||
#if FIX_CACHE
|
||||
uint32_t *write_cmd = iomem_malloc(sizeof(uint32_t) * (send_buf_len + receive_buf_len));
|
||||
#else
|
||||
uint32_t *write_cmd = malloc(sizeof(uint32_t) * (send_buf_len + receive_buf_len));
|
||||
#endif
|
||||
size_t i;
|
||||
for(i = 0; i < send_buf_len; i++)
|
||||
write_cmd[i] = *send_buf++;
|
||||
|
@ -257,8 +269,11 @@ void i2c_recv_data_dma(dmac_channel_number_t dma_send_channel_num, dmac_channel_
|
|||
{
|
||||
receive_buf[i] = (uint8_t)write_cmd[i];
|
||||
}
|
||||
|
||||
free(write_cmd);
|
||||
#if FIX_CACHE
|
||||
iomem_free(write_cmd);
|
||||
#else
|
||||
free(write_cmd);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int i2c_dma_irq(void *ctx)
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
#ifndef _IOMEM_MALLOC_H
|
||||
#define _IOMEM_MALLOC_H
|
||||
|
||||
void iomem_free(void *paddr) ;
|
||||
void *iomem_malloc(uint32_t size);
|
||||
uint32_t iomem_unused();
|
||||
|
||||
#endif
|
|
@ -33,6 +33,8 @@ extern "C" {
|
|||
#define KENDRYTE_MIN(a, b) ((a) > (b) ? (b) : (a))
|
||||
#define KENDRYTE_MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#define FIX_CACHE 1
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
#define KENDRYTE_CAST(type, ptr) ptr
|
||||
#else /* __ASSEMBLY__ */
|
||||
|
@ -340,6 +342,7 @@ uint32_t get_bit(volatile uint32_t *bits, uint32_t mask, size_t offset);
|
|||
*/
|
||||
uint32_t get_gpio_bit(volatile uint32_t *bits, size_t offset);
|
||||
|
||||
uint32_t is_memory_cache(uintptr_t address);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
|
|
@ -0,0 +1,169 @@
|
|||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "iomem.h"
|
||||
#include "printf.h"
|
||||
#include "atomic.h"
|
||||
|
||||
#define IOMEM_BLOCK_SIZE 256
|
||||
|
||||
typedef struct _iomem_malloc_t
|
||||
{
|
||||
void (*init)();
|
||||
uint32_t (*unused)();
|
||||
uint8_t *membase;
|
||||
uint32_t memsize;
|
||||
uint32_t memtblsize;
|
||||
uint16_t *memmap;
|
||||
uint8_t memrdy;
|
||||
} iomem_malloc_t;
|
||||
|
||||
static void iomem_init();
|
||||
static uint32_t k_unused();
|
||||
extern char *_ioheap_line;
|
||||
extern char *_heap_line;
|
||||
extern char _heap_start[];
|
||||
extern char *_heap_cur;
|
||||
|
||||
iomem_malloc_t malloc_cortol =
|
||||
{
|
||||
iomem_init,
|
||||
k_unused,
|
||||
NULL,
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
0
|
||||
};
|
||||
|
||||
static void iomem_set(void *s, uint8_t c, uint32_t num)
|
||||
{
|
||||
uint8_t *xs = s;
|
||||
while(num--)
|
||||
*xs++=c;
|
||||
}
|
||||
|
||||
static void iomem_init()
|
||||
{
|
||||
malloc_cortol.membase = (uint8_t *)((uintptr_t)_heap_line-0x40000000);
|
||||
malloc_cortol.memsize = (uint32_t)_ioheap_line - (uint32_t)malloc_cortol.membase;
|
||||
|
||||
malloc_cortol.memtblsize = malloc_cortol.memsize / IOMEM_BLOCK_SIZE;
|
||||
malloc_cortol.memmap = (uint16_t *)malloc(malloc_cortol.memtblsize * 2);
|
||||
mb();
|
||||
|
||||
malloc_cortol.membase = (uint8_t *)((uintptr_t)_heap_line-0x40000000);
|
||||
malloc_cortol.memsize = (uint32_t)_ioheap_line - (uint32_t)malloc_cortol.membase;
|
||||
malloc_cortol.memtblsize = malloc_cortol.memsize / IOMEM_BLOCK_SIZE;
|
||||
|
||||
iomem_set(malloc_cortol.memmap, 0, malloc_cortol.memtblsize * 2);
|
||||
iomem_set(malloc_cortol.membase, 0, malloc_cortol.memsize);
|
||||
malloc_cortol.memrdy = 1;
|
||||
}
|
||||
|
||||
static uint32_t k_unused()
|
||||
{
|
||||
uint32_t unused=0;
|
||||
unused = (uintptr_t)_ioheap_line + 0x40000000 - (uintptr_t)_heap_line;
|
||||
|
||||
return unused;
|
||||
}
|
||||
|
||||
static uint32_t k_malloc(uint32_t size)
|
||||
{
|
||||
signed long offset = 0;
|
||||
uint32_t xmemb;
|
||||
uint32_t kmemb = 0;
|
||||
|
||||
if(!malloc_cortol.memrdy)
|
||||
malloc_cortol.init();
|
||||
if(size==0)
|
||||
return 0XFFFFFFFF;
|
||||
xmemb=size / IOMEM_BLOCK_SIZE;
|
||||
if(size % IOMEM_BLOCK_SIZE)
|
||||
xmemb++;
|
||||
for(offset=malloc_cortol.memtblsize-1; offset>=0; offset--)
|
||||
{
|
||||
if(!malloc_cortol.memmap[offset])
|
||||
{
|
||||
kmemb++;
|
||||
}
|
||||
else
|
||||
{
|
||||
offset = offset - malloc_cortol.memmap[offset] + 1;
|
||||
kmemb=0;
|
||||
}
|
||||
if(kmemb==xmemb)
|
||||
{
|
||||
malloc_cortol.memmap[offset] = xmemb;
|
||||
malloc_cortol.memmap[offset+xmemb-1] = xmemb;
|
||||
return (offset * IOMEM_BLOCK_SIZE);
|
||||
}
|
||||
}
|
||||
return 0XFFFFFFFF;
|
||||
}
|
||||
|
||||
static uint8_t k_free(uint32_t offset)
|
||||
{
|
||||
if(!malloc_cortol.memrdy)
|
||||
{
|
||||
malloc_cortol.init();
|
||||
return 1;
|
||||
}
|
||||
if(offset < malloc_cortol.memsize)
|
||||
{
|
||||
int index=offset / IOMEM_BLOCK_SIZE;
|
||||
int nmemb=malloc_cortol.memmap[index];
|
||||
|
||||
malloc_cortol.memmap[index] = 0;
|
||||
malloc_cortol.memmap[index+nmemb-1] = 0;
|
||||
|
||||
if((uintptr_t)_ioheap_line == (uintptr_t)malloc_cortol.membase + offset)
|
||||
{
|
||||
_ioheap_line = (char *)((uintptr_t)_ioheap_line + nmemb * IOMEM_BLOCK_SIZE);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
|
||||
void iomem_free(void *paddr)
|
||||
{
|
||||
uint32_t offset;
|
||||
if(paddr == NULL)
|
||||
return;
|
||||
offset=(uintptr_t)paddr - (uintptr_t)malloc_cortol.membase;
|
||||
k_free(offset);
|
||||
}
|
||||
|
||||
void *iomem_malloc(uint32_t size)
|
||||
{
|
||||
uint32_t offset;
|
||||
offset=k_malloc(size);
|
||||
if(offset == 0XFFFFFFFF)
|
||||
{
|
||||
printk("IOMEM malloc OUT of MEMORY!\r\n");
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
if((uintptr_t)_ioheap_line > (uintptr_t)malloc_cortol.membase + offset)
|
||||
{
|
||||
_ioheap_line = (char *)((uintptr_t)malloc_cortol.membase + offset);
|
||||
if((uintptr_t)_ioheap_line < (uintptr_t)_heap_line-0x40000000)
|
||||
{
|
||||
printk("WARNING: iomem heap line < cache heap line!\r\n");
|
||||
}
|
||||
};
|
||||
|
||||
return (void*)((uintptr_t)malloc_cortol.membase + offset);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t iomem_unused()
|
||||
{
|
||||
return malloc_cortol.unused();
|
||||
}
|
||||
|
|
@ -11,6 +11,7 @@
|
|||
#include "kpu.h"
|
||||
#include "printf.h"
|
||||
#include "nncase.h"
|
||||
#include "utils.h"
|
||||
|
||||
#define LAYER_BURST_SIZE 12
|
||||
|
||||
|
@ -1004,8 +1005,13 @@ static void kpu_quantize(const kpu_model_quantize_layer_argument_t *arg, kpu_mod
|
|||
{
|
||||
size_t count = arg->count;
|
||||
const float *src = (const float *)(ctx->main_buffer + arg->main_mem_in_address);
|
||||
;
|
||||
const kpu_model_quant_param_t q = arg->quant_param;
|
||||
|
||||
kpu_model_quant_param_t q;
|
||||
#if FIX_CACHE
|
||||
memcpy(&q, &arg->quant_param, sizeof(kpu_model_quant_param_t));
|
||||
#else
|
||||
q = arg->quant_param;
|
||||
#endif
|
||||
float scale = 1.f / q.scale;
|
||||
|
||||
uint8_t *dest = (uint8_t *)(ctx->main_buffer + arg->mem_out_address);
|
||||
|
@ -1026,8 +1032,12 @@ static void kpu_kmodel_dequantize(const kpu_model_dequantize_layer_argument_t *a
|
|||
const uint8_t *src = (const uint8_t *)(ctx->main_buffer + arg->main_mem_in_address);
|
||||
float *dest = (float *)(ctx->main_buffer + arg->main_mem_out_address);
|
||||
size_t oc, count = arg->count;
|
||||
const kpu_model_quant_param_t q = arg->quant_param;
|
||||
|
||||
kpu_model_quant_param_t q;
|
||||
#if FIX_CACHE
|
||||
memcpy(&q, &arg->quant_param, sizeof(kpu_model_quant_param_t));
|
||||
#else
|
||||
q = arg->quant_param;
|
||||
#endif
|
||||
for(oc = 0; oc < count; oc++)
|
||||
dest[oc] = *src++ * q.scale + q.bias;
|
||||
}
|
||||
|
@ -1357,6 +1367,9 @@ static void kpu_upload(const kpu_model_upload_layer_argument_t *arg, kpu_model_c
|
|||
|
||||
int kpu_load_kmodel(kpu_model_context_t *ctx, const uint8_t *buffer)
|
||||
{
|
||||
#if FIX_CACHE
|
||||
configASSERT(!is_memory_cache((uintptr_t)buffer));
|
||||
#endif
|
||||
uintptr_t base_addr = (uintptr_t)buffer;
|
||||
const kpu_kmodel_header_t *header = (const kpu_kmodel_header_t *)buffer;
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "spi.h"
|
||||
#include "sysctl.h"
|
||||
#include "utils.h"
|
||||
#include "iomem.h"
|
||||
|
||||
volatile spi_t *const spi[4] =
|
||||
{
|
||||
|
@ -411,7 +412,11 @@ void spi_send_data_normal_dma(dmac_channel_number_t channel_num, spi_device_num_
|
|||
switch(spi_transfer_width)
|
||||
{
|
||||
case SPI_TRANS_SHORT:
|
||||
buf = malloc((tx_len) * sizeof(uint32_t));
|
||||
#if FIX_CACHE
|
||||
buf = (uint32_t *)iomem_malloc((tx_len) * sizeof(uint32_t));
|
||||
#else
|
||||
buf = (uint32_t *)malloc((tx_len) * sizeof(uint32_t));
|
||||
#endif
|
||||
for(i = 0; i < tx_len; i++)
|
||||
buf[i] = ((uint16_t *)tx_buff)[i];
|
||||
break;
|
||||
|
@ -420,7 +425,12 @@ void spi_send_data_normal_dma(dmac_channel_number_t channel_num, spi_device_num_
|
|||
break;
|
||||
case SPI_TRANS_CHAR:
|
||||
default:
|
||||
buf = malloc((tx_len) * sizeof(uint32_t));
|
||||
#if FIX_CACHE
|
||||
buf = (uint32_t *)iomem_malloc((tx_len) * sizeof(uint32_t));
|
||||
#else
|
||||
buf = (uint32_t *)malloc((tx_len) * sizeof(uint32_t));
|
||||
#endif
|
||||
|
||||
for(i = 0; i < tx_len; i++)
|
||||
buf[i] = ((uint8_t *)tx_buff)[i];
|
||||
break;
|
||||
|
@ -429,13 +439,19 @@ void spi_send_data_normal_dma(dmac_channel_number_t channel_num, spi_device_num_
|
|||
spi_handle->ssienr = 0x01;
|
||||
|
||||
sysctl_dma_select((sysctl_dma_channel_t)channel_num, SYSCTL_DMA_SELECT_SSI0_TX_REQ + spi_num * 2);
|
||||
|
||||
dmac_set_single_mode(channel_num, buf, (void *)(&spi_handle->dr[0]), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE,
|
||||
DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, tx_len);
|
||||
spi_handle->ser = 1U << chip_select;
|
||||
dmac_wait_done(channel_num);
|
||||
if(spi_transfer_width != SPI_TRANS_INT)
|
||||
{
|
||||
#if FIX_CACHE
|
||||
iomem_free((void *)buf);
|
||||
#else
|
||||
free((void *)buf);
|
||||
|
||||
#endif
|
||||
}
|
||||
while((spi_handle->sr & 0x05) != 0x04)
|
||||
;
|
||||
spi_handle->ser = 0x00;
|
||||
|
@ -472,8 +488,13 @@ void spi_dup_send_receive_data_dma(dmac_channel_number_t dma_send_channel_num,
|
|||
|
||||
size_t v_max_len = v_tx_len > v_rx_len ? v_tx_len : v_rx_len;
|
||||
|
||||
#if FIX_CACHE
|
||||
uint32_t *v_tx_buf = iomem_malloc(v_max_len * 4);
|
||||
uint32_t *v_rx_buf = iomem_malloc(v_max_len * 4);
|
||||
#else
|
||||
uint32_t *v_tx_buf = malloc(v_max_len * 4);
|
||||
uint32_t *v_rx_buf = malloc(v_max_len * 4);
|
||||
#endif
|
||||
uint32_t i = 0;
|
||||
switch(frame_width)
|
||||
{
|
||||
|
@ -552,8 +573,13 @@ void spi_dup_send_receive_data_dma(dmac_channel_number_t dma_send_channel_num,
|
|||
rx_buf[i] = v_rx_buf[i];
|
||||
break;
|
||||
}
|
||||
#if FIX_CACHE
|
||||
iomem_free(v_tx_buf);
|
||||
iomem_free(v_rx_buf);
|
||||
#else
|
||||
free(v_tx_buf);
|
||||
free(v_rx_buf);
|
||||
#endif
|
||||
}
|
||||
|
||||
void spi_receive_data_standard(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint8_t *cmd_buff,
|
||||
|
@ -721,7 +747,11 @@ void spi_receive_data_standard_dma(dmac_channel_number_t dma_send_channel_num,
|
|||
switch(frame_width)
|
||||
{
|
||||
case SPI_TRANS_INT:
|
||||
#if FIX_CACHE
|
||||
write_cmd = iomem_malloc(cmd_len + rx_len);
|
||||
#else
|
||||
write_cmd = malloc(cmd_len + rx_len);
|
||||
#endif
|
||||
for(i = 0; i < cmd_len / 4; i++)
|
||||
write_cmd[i] = ((uint32_t *)cmd_buff)[i];
|
||||
read_buf = &write_cmd[i];
|
||||
|
@ -729,7 +759,11 @@ void spi_receive_data_standard_dma(dmac_channel_number_t dma_send_channel_num,
|
|||
v_cmd_len = cmd_len / 4;
|
||||
break;
|
||||
case SPI_TRANS_SHORT:
|
||||
#if FIX_CACHE
|
||||
write_cmd = iomem_malloc((cmd_len + rx_len) / 2 * sizeof(uint32_t));
|
||||
#else
|
||||
write_cmd = malloc((cmd_len + rx_len) / 2 * sizeof(uint32_t));
|
||||
#endif
|
||||
for(i = 0; i < cmd_len / 2; i++)
|
||||
write_cmd[i] = ((uint16_t *)cmd_buff)[i];
|
||||
read_buf = &write_cmd[i];
|
||||
|
@ -737,7 +771,11 @@ void spi_receive_data_standard_dma(dmac_channel_number_t dma_send_channel_num,
|
|||
v_cmd_len = cmd_len / 2;
|
||||
break;
|
||||
default:
|
||||
#if FIX_CACHE
|
||||
write_cmd = iomem_malloc((cmd_len + rx_len) * sizeof(uint32_t));
|
||||
#else
|
||||
write_cmd = malloc((cmd_len + rx_len) * sizeof(uint32_t));
|
||||
#endif
|
||||
for(i = 0; i < cmd_len; i++)
|
||||
write_cmd[i] = cmd_buff[i];
|
||||
read_buf = &write_cmd[i];
|
||||
|
@ -763,8 +801,11 @@ void spi_receive_data_standard_dma(dmac_channel_number_t dma_send_channel_num,
|
|||
rx_buff[i] = read_buf[i];
|
||||
break;
|
||||
}
|
||||
|
||||
#if FIX_CACHE
|
||||
iomem_free(write_cmd);
|
||||
#else
|
||||
free(write_cmd);
|
||||
#endif
|
||||
}
|
||||
|
||||
void spi_receive_data_multiple(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint32_t *cmd_buff,
|
||||
|
@ -887,14 +928,22 @@ void spi_receive_data_multiple_dma(dmac_channel_number_t dma_send_channel_num,
|
|||
v_recv_len = rx_len / 4;
|
||||
break;
|
||||
case SPI_TRANS_SHORT:
|
||||
#if FIX_CACHE
|
||||
write_cmd = iomem_malloc(cmd_len + rx_len / 2 * sizeof(uint32_t));
|
||||
#else
|
||||
write_cmd = malloc(cmd_len + rx_len / 2 * sizeof(uint32_t));
|
||||
#endif
|
||||
for(i = 0; i < cmd_len; i++)
|
||||
write_cmd[i] = cmd_buff[i];
|
||||
read_buf = &write_cmd[i];
|
||||
v_recv_len = rx_len / 2;
|
||||
break;
|
||||
default:
|
||||
#if FIX_CACHE
|
||||
write_cmd = iomem_malloc(cmd_len + rx_len * sizeof(uint32_t));
|
||||
#else
|
||||
write_cmd = malloc(cmd_len + rx_len * sizeof(uint32_t));
|
||||
#endif
|
||||
for(i = 0; i < cmd_len; i++)
|
||||
write_cmd[i] = cmd_buff[i];
|
||||
read_buf = &write_cmd[i];
|
||||
|
@ -921,7 +970,13 @@ void spi_receive_data_multiple_dma(dmac_channel_number_t dma_send_channel_num,
|
|||
}
|
||||
|
||||
if(frame_width != SPI_TRANS_INT)
|
||||
{
|
||||
#if FIX_CACHE
|
||||
iomem_free(write_cmd);
|
||||
#else
|
||||
free(write_cmd);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void spi_send_data_multiple(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint32_t *cmd_buff,
|
||||
|
@ -979,7 +1034,11 @@ void spi_send_data_multiple_dma(dmac_channel_number_t channel_num, spi_device_nu
|
|||
switch(frame_width)
|
||||
{
|
||||
case SPI_TRANS_INT:
|
||||
#if FIX_CACHE
|
||||
buf = iomem_malloc(cmd_len * sizeof(uint32_t) + tx_len);
|
||||
#else
|
||||
buf = malloc(cmd_len * sizeof(uint32_t) + tx_len);
|
||||
#endif
|
||||
for(i = 0; i < cmd_len; i++)
|
||||
buf[i] = cmd_buff[i];
|
||||
for(i = 0; i < tx_len / 4; i++)
|
||||
|
@ -987,7 +1046,11 @@ void spi_send_data_multiple_dma(dmac_channel_number_t channel_num, spi_device_nu
|
|||
v_send_len = cmd_len + tx_len / 4;
|
||||
break;
|
||||
case SPI_TRANS_SHORT:
|
||||
#if FIX_CACHE
|
||||
buf = iomem_malloc(cmd_len * sizeof(uint32_t) + tx_len / 2 * sizeof(uint32_t));
|
||||
#else
|
||||
buf = malloc(cmd_len * sizeof(uint32_t) + tx_len / 2 * sizeof(uint32_t));
|
||||
#endif
|
||||
for(i = 0; i < cmd_len; i++)
|
||||
buf[i] = cmd_buff[i];
|
||||
for(i = 0; i < tx_len / 2; i++)
|
||||
|
@ -995,7 +1058,11 @@ void spi_send_data_multiple_dma(dmac_channel_number_t channel_num, spi_device_nu
|
|||
v_send_len = cmd_len + tx_len / 2;
|
||||
break;
|
||||
default:
|
||||
#if FIX_CACHE
|
||||
buf = iomem_malloc((cmd_len + tx_len) * sizeof(uint32_t));
|
||||
#else
|
||||
buf = malloc((cmd_len + tx_len) * sizeof(uint32_t));
|
||||
#endif
|
||||
for(i = 0; i < cmd_len; i++)
|
||||
buf[i] = cmd_buff[i];
|
||||
for(i = 0; i < tx_len; i++)
|
||||
|
@ -1006,7 +1073,11 @@ void spi_send_data_multiple_dma(dmac_channel_number_t channel_num, spi_device_nu
|
|||
|
||||
spi_send_data_normal_dma(channel_num, spi_num, chip_select, buf, v_send_len, SPI_TRANS_INT);
|
||||
|
||||
#if FIX_CACHE
|
||||
iomem_free((void *)buf);
|
||||
#else
|
||||
free((void *)buf);
|
||||
#endif
|
||||
}
|
||||
|
||||
void spi_fill_data_dma(dmac_channel_number_t channel_num, spi_device_num_t spi_num, spi_chip_select_t chip_select,
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "sysctl.h"
|
||||
#include "uart.h"
|
||||
#include "utils.h"
|
||||
#include "iomem.h"
|
||||
|
||||
#define __UART_BRATE_CONST 16
|
||||
|
||||
|
@ -158,12 +159,18 @@ static int uart_dma_callback(void *ctx)
|
|||
size_t v_buf_len = v_uart_dma_instance->buf_len;
|
||||
uint8_t *v_buffer = v_uart_dma_instance->buffer;
|
||||
uint32_t *v_recv_buffer = v_uart_dma_instance->malloc_buffer;
|
||||
|
||||
for(size_t i = 0; i < v_buf_len; i++)
|
||||
{
|
||||
v_buffer[i] = v_recv_buffer[i];
|
||||
}
|
||||
}
|
||||
#if FIX_CACHE
|
||||
iomem_free(v_uart_dma_instance->malloc_buffer);
|
||||
#else
|
||||
free(v_uart_dma_instance->malloc_buffer);
|
||||
#endif
|
||||
v_uart_dma_instance->malloc_buffer = NULL;
|
||||
if(v_uart_dma_instance->uart_int_instance.callback)
|
||||
v_uart_dma_instance->uart_int_instance.callback(v_uart_dma_instance->uart_int_instance.ctx);
|
||||
return 0;
|
||||
|
@ -184,27 +191,42 @@ int uart_receive_data(uart_device_number_t channel, char *buffer, size_t buf_len
|
|||
|
||||
void uart_receive_data_dma(uart_device_number_t uart_channel, dmac_channel_number_t dmac_channel, uint8_t *buffer, size_t buf_len)
|
||||
{
|
||||
uint32_t *v_recv_buf = malloc(buf_len * sizeof(uint32_t));
|
||||
#if FIX_CACHE
|
||||
uint32_t *v_recv_buf = (uint32_t *)iomem_malloc(buf_len * sizeof(uint32_t));
|
||||
#else
|
||||
uint32_t *v_recv_buf = (uint32_t *)malloc(buf_len * sizeof(uint32_t));
|
||||
#endif
|
||||
configASSERT(v_recv_buf != NULL);
|
||||
|
||||
sysctl_dma_select((sysctl_dma_channel_t)dmac_channel, SYSCTL_DMA_SELECT_UART1_RX_REQ + uart_channel * 2);
|
||||
|
||||
dmac_set_single_mode(dmac_channel, (void *)(&uart[uart_channel]->RBR), v_recv_buf, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
|
||||
DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, buf_len);
|
||||
|
||||
dmac_wait_done(dmac_channel);
|
||||
for(uint32_t i = 0; i < buf_len; i++)
|
||||
{
|
||||
buffer[i] = (uint8_t)(v_recv_buf[i] & 0xff);
|
||||
}
|
||||
#if FIX_CACHE
|
||||
iomem_free(v_recv_buf);
|
||||
#else
|
||||
free(v_recv_buf);
|
||||
#endif
|
||||
}
|
||||
|
||||
void uart_receive_data_dma_irq(uart_device_number_t uart_channel, dmac_channel_number_t dmac_channel,
|
||||
uint8_t *buffer, size_t buf_len, plic_irq_callback_t uart_callback,
|
||||
void *ctx, uint32_t priority)
|
||||
{
|
||||
uint32_t *v_recv_buf = malloc(buf_len * sizeof(uint32_t));
|
||||
#if FIX_CACHE
|
||||
uint32_t *v_recv_buf = (uint32_t *)iomem_malloc(buf_len * sizeof(uint32_t));
|
||||
#else
|
||||
uint32_t *v_recv_buf = (uint32_t *)malloc(buf_len * sizeof(uint32_t));
|
||||
#endif
|
||||
configASSERT(v_recv_buf != NULL);
|
||||
|
||||
|
||||
uart_recv_dma_instance[uart_channel].dmac_channel = dmac_channel;
|
||||
uart_recv_dma_instance[uart_channel].uart_num = uart_channel;
|
||||
uart_recv_dma_instance[uart_channel].malloc_buffer = v_recv_buf;
|
||||
|
@ -217,7 +239,7 @@ void uart_receive_data_dma_irq(uart_device_number_t uart_channel, dmac_channel_n
|
|||
dmac_irq_register(dmac_channel, uart_dma_callback, &uart_recv_dma_instance[uart_channel], priority);
|
||||
sysctl_dma_select((sysctl_dma_channel_t)dmac_channel, SYSCTL_DMA_SELECT_UART1_RX_REQ + uart_channel * 2);
|
||||
dmac_set_single_mode(dmac_channel, (void *)(&uart[uart_channel]->RBR), v_recv_buf, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
|
||||
DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, buf_len);
|
||||
DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, buf_len);
|
||||
}
|
||||
|
||||
int uart_send_data(uart_device_number_t channel, const char *buffer, size_t buf_len)
|
||||
|
@ -233,22 +255,37 @@ int uart_send_data(uart_device_number_t channel, const char *buffer, size_t buf_
|
|||
|
||||
void uart_send_data_dma(uart_device_number_t uart_channel, dmac_channel_number_t dmac_channel, const uint8_t *buffer, size_t buf_len)
|
||||
{
|
||||
#if FIX_CACHE
|
||||
uint32_t *v_send_buf = iomem_malloc(buf_len * sizeof(uint32_t));
|
||||
#else
|
||||
uint32_t *v_send_buf = malloc(buf_len * sizeof(uint32_t));
|
||||
#endif
|
||||
configASSERT(v_send_buf != NULL);
|
||||
|
||||
for(uint32_t i = 0; i < buf_len; i++)
|
||||
v_send_buf[i] = buffer[i];
|
||||
|
||||
sysctl_dma_select((sysctl_dma_channel_t)dmac_channel, SYSCTL_DMA_SELECT_UART1_TX_REQ + uart_channel * 2);
|
||||
dmac_set_single_mode(dmac_channel, v_send_buf, (void *)(&uart[uart_channel]->THR), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE,
|
||||
DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, buf_len);
|
||||
DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, buf_len);
|
||||
|
||||
dmac_wait_done(dmac_channel);
|
||||
#if FIX_CACHE
|
||||
iomem_free((void *)v_send_buf);
|
||||
#else
|
||||
free((void *)v_send_buf);
|
||||
#endif
|
||||
}
|
||||
|
||||
void uart_send_data_dma_irq(uart_device_number_t uart_channel, dmac_channel_number_t dmac_channel,
|
||||
const uint8_t *buffer, size_t buf_len, plic_irq_callback_t uart_callback,
|
||||
void *ctx, uint32_t priority)
|
||||
{
|
||||
#if FIX_CACHE
|
||||
uint32_t *v_send_buf = iomem_malloc(buf_len * sizeof(uint32_t));
|
||||
#else
|
||||
uint32_t *v_send_buf = malloc(buf_len * sizeof(uint32_t));
|
||||
#endif
|
||||
configASSERT(v_send_buf != NULL);
|
||||
|
||||
uart_send_dma_instance[uart_channel] = (uart_dma_instance_t){
|
||||
|
|
|
@ -41,3 +41,11 @@ uint32_t get_gpio_bit(volatile uint32_t *bits, size_t offset)
|
|||
{
|
||||
return get_bit(bits, 1, offset);
|
||||
}
|
||||
|
||||
uint32_t is_memory_cache(uintptr_t address)
|
||||
{
|
||||
#define MEM_CACHE_LEN (6 * 1024 * 1024)
|
||||
|
||||
return ((address >= 0x80000000) && (address < 0x80000000 + MEM_CACHE_LEN));
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
#include <cmath>
|
||||
#include <runtime/runtime_op_utility.h>
|
||||
#include <xtl/xspan.hpp>
|
||||
#include <cstring>
|
||||
#include <utils.h>
|
||||
|
||||
namespace nncase
|
||||
{
|
||||
|
@ -139,11 +141,18 @@ namespace kernels
|
|||
|
||||
inline void matmul(const float *input_a, const float *input_b, float *output, const float *bias, int32_t a_rows, int32_t a_cols, int32_t b_cols, const value_range<float> &fused_activation)
|
||||
{
|
||||
#if FIX_CACHE
|
||||
float *cache_mem = new float[b_cols];
|
||||
memcpy(cache_mem, bias, b_cols*sizeof(float));
|
||||
#else
|
||||
const float *cache_mem =bias;
|
||||
#endif
|
||||
for (size_t oy = 0; oy < a_rows; oy++)
|
||||
{
|
||||
for (size_t ox = 0; ox < b_cols; ox++)
|
||||
{
|
||||
float value = bias[ox];
|
||||
float value = cache_mem[ox];
|
||||
|
||||
for (size_t i = 0; i < a_cols; i++)
|
||||
{
|
||||
const auto a = input_a[oy * a_cols + i];
|
||||
|
@ -154,6 +163,9 @@ namespace kernels
|
|||
output[oy * b_cols + ox] = details::apply_activation(value, fused_activation);
|
||||
}
|
||||
}
|
||||
#if FIX_CACHE
|
||||
delete []cache_mem;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class T>
|
||||
|
|
Loading…
Reference in New Issue