[bio][spiflash] Enforce cache line aligned buffers for default read/write. Use default read hook for stm32 spiflash.

This commit is contained in:
Gurjant Kalsi
2015-11-03 15:13:58 -08:00
parent 7e0a4d3f90
commit 3dafd61197
4 changed files with 148 additions and 76 deletions

View File

@@ -53,8 +53,7 @@ long long strtoll(const char *nptr, char **endptr, int base);
/* allocate a buffer on the stack aligned and padded to the cpu's cache line size */
#define STACKBUF_DMA_ALIGN(var, size) \
uint8_t __##var[(size) + CACHE_LINE]; uint8_t *var = (uint8_t *)(ROUNDUP((addr_t)__##var, CACHE_LINE))
uint8_t var[ROUNDUP(size, CACHE_LINE)] __ALIGNED(CACHE_LINE);
void abort(void) __attribute__((noreturn));
void qsort(void *aa, size_t n, size_t es, int (*cmp)(const void *, const void *));
void *bsearch(const void *key, const void *base, size_t num_elems, size_t size,

View File

@@ -59,8 +59,12 @@ static ssize_t bio_default_read(struct bdev *dev, void *_buf, off_t offset, size
if ((offset % dev->block_size) != 0) {
/* read in the block */
err = bio_read_block(dev, temp, block, 1);
if (err < 0)
if (err < 0) {
goto err;
} else if ((size_t)err != dev->block_size) {
err = ERR_IO;
goto err;
}
/* copy what we need */
size_t block_offset = offset % dev->block_size;
@@ -75,22 +79,42 @@ static ssize_t bio_default_read(struct bdev *dev, void *_buf, off_t offset, size
}
LTRACEF("buf %p, block %u, len %zd\n", buf, block, len);
// If the device requires alignment AND our buffer is not alread aligned.
bool requires_alignment =
(dev->flags & BIO_FLAG_CACHE_ALIGNED_READS) &&
(IS_ALIGNED((size_t)buf, CACHE_LINE) == false);
/* handle middle blocks */
if (len >= dev->block_size) {
/* do the middle reads */
size_t block_count = len / dev->block_size;
err = bio_read_block(dev, buf, block, block_count);
if (err < 0)
if (requires_alignment) {
while (len >= dev->block_size) {
/* do the middle reads */
err = bio_read_block(dev, temp, block, 1);
if (err < 0) {
goto err;
} else if ((size_t)err != dev->block_size) {
err = ERR_IO;
goto err;
}
memcpy(buf, temp, dev->block_size);
buf += dev->block_size;
len -= dev->block_size;
bytes_read += dev->block_size;
block++;
}
} else {
uint32_t num_blocks = divpow2(len, dev->block_shift);
err = bio_read_block(dev, buf, block, num_blocks);
if (err < 0) {
goto err;
/* increment our buffers */
size_t bytes = block_count * dev->block_size;
DEBUG_ASSERT(bytes <= len);
buf += bytes;
len -= bytes;
bytes_read += bytes;
block += block_count;
} else if ((size_t)err != dev->block_size * num_blocks) {
err = ERR_IO;
goto err;
}
buf += err;
len -= err;
bytes_read += err;
block += num_blocks;
}
LTRACEF("buf %p, block %u, len %zd\n", buf, block, len);
@@ -98,8 +122,12 @@ static ssize_t bio_default_read(struct bdev *dev, void *_buf, off_t offset, size
if (len > 0) {
/* read the block */
err = bio_read_block(dev, temp, block, 1);
if (err < 0)
if (err < 0) {
goto err;
} else if ((size_t)err != dev->block_size) {
err = ERR_IO;
goto err;
}
/* copy the partial block from our temp buffer */
memcpy(buf, temp, len);
@@ -128,8 +156,12 @@ static ssize_t bio_default_write(struct bdev *dev, const void *_buf, off_t offse
if ((offset % dev->block_size) != 0) {
/* read in the block */
err = bio_read_block(dev, temp, block, 1);
if (err < 0)
if (err < 0) {
goto err;
} else if ((size_t)err != dev->block_size) {
err = ERR_IO;
goto err;
}
/* copy what we need */
size_t block_offset = offset % dev->block_size;
@@ -138,8 +170,12 @@ static ssize_t bio_default_write(struct bdev *dev, const void *_buf, off_t offse
/* write it back out */
err = bio_write_block(dev, temp, block, 1);
if (err < 0)
if (err < 0) {
goto err;
} else if ((size_t)err != dev->block_size) {
err = ERR_IO;
goto err;
}
/* increment our buffers */
buf += tocopy;
@@ -149,21 +185,45 @@ static ssize_t bio_default_write(struct bdev *dev, const void *_buf, off_t offse
}
LTRACEF("buf %p, block %u, len %zd\n", buf, block, len);
// If the device requires alignment AND our buffer is not alread aligned.
bool requires_alignment =
(dev->flags & BIO_FLAG_CACHE_ALIGNED_WRITES) &&
(IS_ALIGNED((size_t)buf, CACHE_LINE) == false);
/* handle middle blocks */
if (len >= dev->block_size) {
/* do the middle writes */
size_t block_count = len / dev->block_size;
if (requires_alignment) {
while (len >= dev->block_size) {
/* do the middle reads */
memcpy(temp, buf, dev->block_size);
err = bio_write_block(dev, temp, block, 1);
if (err < 0) {
goto err;
} else if ((size_t)err != dev->block_size) {
err = ERR_IO;
goto err;
}
buf += dev->block_size;
len -= dev->block_size;
bytes_written += dev->block_size;
block++;
}
} else {
uint32_t block_count = divpow2(len, dev->block_shift);
err = bio_write_block(dev, buf, block, block_count);
if (err < 0)
if (err < 0) {
goto err;
} else if ((size_t)err != dev->block_size * block_count) {
err = ERR_IO;
goto err;
}
/* increment our buffers */
size_t bytes = block_count * dev->block_size;
DEBUG_ASSERT(bytes <= len);
DEBUG_ASSERT((size_t)err == (block_count * dev->block_size));
buf += bytes;
len -= bytes;
bytes_written += bytes;
buf += err;
len -= err;
bytes_written += err;
block += block_count;
}
@@ -172,16 +232,24 @@ static ssize_t bio_default_write(struct bdev *dev, const void *_buf, off_t offse
if (len > 0) {
/* read the block */
err = bio_read_block(dev, temp, block, 1);
if (err < 0)
if (err < 0) {
goto err;
} else if ((size_t)err != dev->block_size) {
err = ERR_IO;
goto err;
}
/* copy the partial block from our temp buffer */
memcpy(temp, buf, len);
/* write it back out */
err = bio_write_block(dev, temp, block, 1);
if (err < 0)
if (err < 0) {
goto err;
} else if ((size_t)err != dev->block_size) {
err = ERR_IO;
goto err;
}
bytes_written += len;
}

View File

@@ -30,6 +30,7 @@
#include <lib/bio.h>
#include <lib/partition.h>
#include <platform.h>
#include <kernel/thread.h>
#if WITH_LIB_CKSUM
#include <lib/cksum.h>
@@ -133,10 +134,10 @@ usage:
return -1;
}
uint8_t buf[256];
uint8_t* buf = memalign(CACHE_LINE, 256);
ssize_t err = 0;
while (len > 0) {
size_t amt = MIN(sizeof(buf), len);
size_t amt = MIN(256, len);
ssize_t err = bio_read(dev, buf, offset, amt);
if (err < 0) {

View File

@@ -32,8 +32,11 @@
#include <platform/n25qxxa.h>
#include <platform/n25q512a.h>
#include <platform/qspi.h>
#include <trace.h>
#define FOUR_BYTE_ADDR_THRESHOLD (1 << 24)
#define LOCAL_TRACE 0
static QSPI_HandleTypeDef qspi_handle;
static DMA_HandleTypeDef hdma;
@@ -247,21 +250,31 @@ static status_t qspi_reset_memory_unsafe(QSPI_HandleTypeDef* hqspi)
return NO_ERROR;
}
static ssize_t spiflash_bdev_read(struct bdev* device, void* buf, off_t offset, size_t len)
static ssize_t spiflash_bdev_read_block(struct bdev* device, void* buf,
bnum_t block, uint count)
{
len = bio_trim_range(device, offset, len);
if (len == 0) {
return 0;
LTRACEF("device %p, buf %p, block %u, count %u\n",
device, buf, block, count);
if (!IS_ALIGNED((uintptr_t)buf, CACHE_LINE)) {
DEBUG_ASSERT(IS_ALIGNED((uintptr_t)buf, CACHE_LINE));
return ERR_INVALID_ARGS;
}
count = bio_trim_block_range(device, block, count);
if (count == 0)
return 0;
QSPI_CommandTypeDef s_command;
HAL_StatusTypeDef status;
uint64_t largest_offset = (block + count) * device->block_size;
// /* Initialize the read command */
s_command.InstructionMode = QSPI_INSTRUCTION_1_LINE;
s_command.Instruction = get_specialized_instruction(QUAD_OUT_FAST_READ_CMD, offset);
s_command.Instruction = get_specialized_instruction(QUAD_OUT_FAST_READ_CMD, largest_offset);
s_command.AddressMode = QSPI_ADDRESS_1_LINE;
s_command.AddressSize = get_address_size(offset);
s_command.AddressSize = get_address_size(largest_offset);
s_command.AlternateByteMode = QSPI_ALTERNATE_BYTES_NONE;
s_command.DataMode = QSPI_DATA_4_LINES;
s_command.DummyCycles = N25QXXA_DUMMY_CYCLES_READ_QUAD;
@@ -269,24 +282,31 @@ static ssize_t spiflash_bdev_read(struct bdev* device, void* buf, off_t offset,
s_command.DdrHoldHalfCycle = QSPI_DDR_HHC_ANALOG_DELAY;
s_command.SIOOMode = QSPI_SIOO_INST_EVERY_CMD;
s_command.NbData = len;
s_command.Address = offset;
s_command.NbData = device->block_size;
size_t retcode = len;
ssize_t retcode = 0;
mutex_acquire(&spiflash_mutex);
// /* Configure the command */
status = HAL_QSPI_Command(&qspi_handle, &s_command, HAL_QPSI_TIMEOUT_DEFAULT_VALUE);
if (status != HAL_OK) {
retcode = hal_error_to_status(status);
goto err;
}
// /* Reception of the data */
status = qspi_rx_dma(&qspi_handle, &s_command, buf);
if (status != HAL_OK) {
retcode = hal_error_to_status(status);
goto err;
s_command.Address = block * device->block_size;
for (uint i = 0; i < count; i++) {
status = HAL_QSPI_Command(&qspi_handle, &s_command, HAL_QPSI_TIMEOUT_DEFAULT_VALUE);
if (status != HAL_OK) {
retcode = hal_error_to_status(status);
goto err;
}
// /* Reception of the data */
status = qspi_rx_dma(&qspi_handle, &s_command, buf);
if (status != HAL_OK) {
retcode = hal_error_to_status(status);
goto err;
}
buf += device->block_size;
retcode += device->block_size;
s_command.Address += device->block_size;
}
err:
@@ -294,17 +314,6 @@ err:
return retcode;
}
static ssize_t spiflash_bdev_read_block(struct bdev* device, void* buf,
bnum_t block, uint count)
{
count = bio_trim_block_range(device, block, count);
if (count == 0)
return 0;
return spiflash_bdev_read(device, buf, block << device->block_shift,
count << device->block_shift);
}
static ssize_t spiflash_bdev_write_block(struct bdev* device, const void* _buf,
bnum_t block, uint count)
{
@@ -516,7 +525,7 @@ status_t qspi_flash_init(size_t flash_size)
(flash_size / N25QXXA_PAGE_SIZE), 1, &geometry,
BIO_FLAG_CACHE_ALIGNED_READS);
qspi_flash_device.read = &spiflash_bdev_read;
// qspi_flash_device.read: Use default hook.
qspi_flash_device.read_block = &spiflash_bdev_read_block;
// qspi_flash_device.write has a default hook that will be okay
qspi_flash_device.write_block = &spiflash_bdev_write_block;
@@ -648,31 +657,26 @@ static HAL_StatusTypeDef qspi_cmd(QSPI_HandleTypeDef* qspi_handle,
static HAL_StatusTypeDef qspi_tx_dma(QSPI_HandleTypeDef* qspi_handle, QSPI_CommandTypeDef* s_command, uint8_t* buf)
{
// Make sure cache is flushed to RAM before invoking the DMA controller.
arch_clean_invalidate_cache_range((addr_t)buf, s_command->NbData);
arch_clean_cache_range((addr_t)buf, s_command->NbData);
HAL_StatusTypeDef result = HAL_QSPI_Transmit_DMA(qspi_handle, buf);
event_wait(&tx_event);
// CPU may have cached data while we were performing the DMA.
arch_invalidate_cache_range((addr_t)buf, s_command->NbData);
return result;
}
// Send data and wait for interrupt.
static HAL_StatusTypeDef qspi_rx_dma(QSPI_HandleTypeDef* qspi_handle, QSPI_CommandTypeDef* s_command, uint8_t* buf)
{
// DMA controller is about to overwrite this memory. All data pointing to it
// is invalid.
// Make sure the front and back of the buffer are cache aligned.
DEBUG_ASSERT(IS_ALIGNED((uintptr_t)buf, CACHE_LINE));
DEBUG_ASSERT(IS_ALIGNED(((uintptr_t)buf) + s_command->NbData, CACHE_LINE));
arch_invalidate_cache_range((addr_t)buf, s_command->NbData);
HAL_StatusTypeDef result = HAL_QSPI_Receive_DMA(qspi_handle, buf);
event_wait(&rx_event);
// DMA controller has modified this memory. Any caches that reference it are
// now invalid.
arch_invalidate_cache_range((addr_t)buf, s_command->NbData);
return result;
}