[lib][uefi] Add interrupt based async IO support

Add a new API to bio layer(read_async) where function will
return immediately, and a callback function will be called from
interrupt context, when block driver completes the IO request.
This commit is contained in:
Kelvin Zhang
2025-07-30 16:11:23 -07:00
committed by Kelvin Zhang
parent cfeef533d1
commit af1f19a2cc
4 changed files with 170 additions and 72 deletions

View File

@@ -5,20 +5,19 @@
* license that can be found in the LICENSE file or at
* https://opensource.org/licenses/MIT
*/
#include <dev/virtio/block.h>
#include <stdlib.h>
#include <lk/debug.h>
#include <assert.h>
#include <lk/trace.h>
#include <lk/compiler.h>
#include <lk/list.h>
#include <lk/err.h>
#include <kernel/thread.h>
#include <kernel/event.h>
#include <kernel/mutex.h>
#include <lib/bio.h>
#include <dev/virtio/block.h>
#include <inttypes.h>
#include <kernel/event.h>
#include <kernel/spinlock.h>
#include <kernel/thread.h>
#include <lib/bio.h>
#include <lk/compiler.h>
#include <lk/debug.h>
#include <lk/err.h>
#include <lk/list.h>
#include <lk/trace.h>
#include <stdlib.h>
#if WITH_KERNEL_VM
#include <kernel/vm.h>
@@ -83,6 +82,20 @@ struct virtio_blk_discard_write_zeroes {
};
STATIC_ASSERT(sizeof(struct virtio_blk_req) == 16);
struct virtio_block_txn {
/* bio callback, for async */
void *cookie;
size_t len;
/* for async calls */
void (*callback)(void *, struct bdev *, ssize_t);
/* virtio request structure, must be DMA-able */
struct virtio_blk_req req;
/* response status, must be DMA-able */
uint8_t status;
};
#define VIRTIO_BLK_F_BARRIER (1<<0) // legacy
#define VIRTIO_BLK_F_SIZE_MAX (1<<1)
#define VIRTIO_BLK_F_SEG_MAX (1<<2)
@@ -112,29 +125,24 @@ STATIC_ASSERT(sizeof(struct virtio_blk_req) == 16);
#define VIRTIO_BLK_S_IOERR 1
#define VIRTIO_BLK_S_UNSUPP 2
#define VIRTIO_BLK_RING_LEN 256
static enum handler_return virtio_block_irq_driver_callback(struct virtio_device *dev, uint ring, const struct vring_used_elem *e);
static ssize_t virtio_bdev_read_block(struct bdev *bdev, void *buf, bnum_t block, uint count);
static status_t virtio_bdev_read_async(
struct bdev *bdev, void *buf, off_t offset, size_t len,
void (*callback)(void *, struct bdev *, ssize_t), void *cookie);
static ssize_t virtio_bdev_write_block(struct bdev *bdev, const void *buf, bnum_t block, uint count);
struct virtio_block_dev {
struct virtio_device *dev;
mutex_t lock;
event_t io_event;
/* bio block device */
bdev_t bdev;
/* our negotiated guest features */
uint32_t guest_features;
/* one blk_req structure for io, not crossing a page boundary */
struct virtio_blk_req *blk_req;
paddr_t blk_req_phys;
/* one uint8_t response word */
uint8_t blk_response;
paddr_t blk_response_phys;
struct virtio_block_txn *txns;
};
static void dump_feature_bits(const char *name, uint32_t feature) {
@@ -165,26 +173,9 @@ status_t virtio_block_init(struct virtio_device *dev, uint32_t host_features) {
if (!bdev)
return ERR_NO_MEMORY;
mutex_init(&bdev->lock);
event_init(&bdev->io_event, false, EVENT_FLAG_AUTOUNSIGNAL);
bdev->dev = dev;
dev->priv = bdev;
bdev->blk_req = memalign(sizeof(struct virtio_blk_req), sizeof(struct virtio_blk_req));
#if WITH_KERNEL_VM
bdev->blk_req_phys = vaddr_to_paddr(bdev->blk_req);
#else
bdev->blk_req_phys = (uint64_t)(uintptr_t)bdev->blk_req;
#endif
LTRACEF("blk_req structure at %p (%#lx phys)\n", bdev->blk_req, bdev->blk_req_phys);
#if WITH_KERNEL_VM
bdev->blk_response_phys = vaddr_to_paddr(&bdev->blk_response);
#else
bdev->blk_response_phys = (uint64_t)(uintptr_t)&bdev->blk_response;
#endif
/* make sure the device is reset */
virtio_reset_device(dev);
@@ -205,7 +196,6 @@ status_t virtio_block_init(struct virtio_device *dev, uint32_t host_features) {
bdev->guest_features &= (VIRTIO_BLK_F_SIZE_MAX |
VIRTIO_BLK_F_BLK_SIZE |
VIRTIO_BLK_F_GEOMETRY |
VIRTIO_BLK_F_BLK_SIZE |
VIRTIO_BLK_F_TOPOLOGY |
VIRTIO_BLK_F_DISCARD |
VIRTIO_BLK_F_WRITE_ZEROES);
@@ -214,7 +204,12 @@ status_t virtio_block_init(struct virtio_device *dev, uint32_t host_features) {
/* TODO: handle a RO feature */
/* allocate a virtio ring */
virtio_alloc_ring(dev, 0, 256);
virtio_alloc_ring(dev, 0, VIRTIO_BLK_RING_LEN);
// descriptor index would be used to index into the txns array
// This is a simple way to keep track of which transaction entry is
// free, and which transaction entry corresponds to which descriptor.
// Hence, we allocate txns array with the same size as the ring.
bdev->txns = memalign(sizeof(struct virtio_block_txn), VIRTIO_BLK_RING_LEN * sizeof(struct virtio_block_txn));
/* set our irq handler */
dev->irq_driver_callback = &virtio_block_irq_driver_callback;
@@ -233,6 +228,7 @@ status_t virtio_block_init(struct virtio_device *dev, uint32_t host_features) {
/* override our block device hooks */
bdev->bdev.read_block = &virtio_bdev_read_block;
bdev->bdev.write_block = &virtio_bdev_write_block;
bdev->bdev.read_async = &virtio_bdev_read_async;
bio_register_device(&bdev->bdev);
@@ -268,8 +264,10 @@ status_t virtio_block_init(struct virtio_device *dev, uint32_t host_features) {
static enum handler_return virtio_block_irq_driver_callback(struct virtio_device *dev, uint ring, const struct vring_used_elem *e) {
struct virtio_block_dev *bdev = (struct virtio_block_dev *)dev->priv;
LTRACEF("dev %p, ring %u, e %p, id %u, len %u\n", dev, ring, e, e->id, e->len);
struct virtio_block_txn *txn = &bdev->txns[e->id];
LTRACEF("dev %p, ring %u, e %p, id %u, len %u, status %d\n", dev, ring, e, e->id, e->len, txn->status);
/* parse our descriptor chain, add back to the free queue */
uint16_t i = e->id;
for (;;) {
@@ -292,38 +290,60 @@ static enum handler_return virtio_block_irq_driver_callback(struct virtio_device
i = next;
}
/* signal our event */
event_signal(&bdev->io_event, false);
if (txn->callback) {
// async
ssize_t result =
(txn->status == VIRTIO_BLK_S_OK) ? (ssize_t)txn->len : ERR_IO;
LTRACEF("calling callback %p with cookie %p, len %zu\n", txn->callback,
txn->cookie, result);
txn->callback(txn->cookie, &bdev->bdev, result);
}
return INT_RESCHEDULE;
}
ssize_t virtio_block_read_write(struct virtio_device *dev, void *buf, const off_t offset, const size_t len, const bool write) {
static status_t virtio_block_do_txn(struct virtio_device *dev, void *buf,
off_t offset, size_t len, bool write,
bio_async_callback_t callback, void *cookie,
struct virtio_block_txn **txn_out) {
struct virtio_block_dev *bdev = (struct virtio_block_dev *)dev->priv;
uint16_t i;
struct vring_desc *desc;
LTRACEF("dev %p, buf %p, offset 0x%llx, len %zu\n", dev, buf, offset, len);
mutex_acquire(&bdev->lock);
/* set up the request */
bdev->blk_req->type = write ? VIRTIO_BLK_T_OUT : VIRTIO_BLK_T_IN;
bdev->blk_req->ioprio = 0;
bdev->blk_req->sector = offset / 512;
LTRACEF("blk_req type %u ioprio %u sector %llu\n",
bdev->blk_req->type, bdev->blk_req->ioprio, bdev->blk_req->sector);
/* put together a transfer */
desc = virtio_alloc_desc_chain(dev, 0, 3, &i);
LTRACEF("after alloc chain desc %p, i %u\n", desc, i);
if (desc == NULL) {
return ERR_NO_RESOURCES;
}
struct virtio_block_txn *txn = &bdev->txns[i];
/* set up the request */
txn->req.type = write ? VIRTIO_BLK_T_OUT : VIRTIO_BLK_T_IN;
txn->req.ioprio = 0;
txn->req.sector = offset / 512;
txn->callback = callback;
txn->cookie = cookie;
txn->len = len;
LTRACEF("blk_req type %u ioprio %u sector %llu\n", txn->req.type,
txn->req.ioprio, txn->req.sector);
if (txn_out) {
*txn_out = txn;
}
// XXX not cache safe.
// At the moment only tested on arm qemu, which doesn't emulate cache.
/* set up the descriptor pointing to the head */
desc->addr = bdev->blk_req_phys;
/* set up the descriptor pointing to the head */
#if WITH_KERNEL_VM
paddr_t req_phys = vaddr_to_paddr(&txn->req);
#else
paddr_t req_phys = (uint64_t)(uintptr_t)&txn->req;
#endif
desc->addr = req_phys;
desc->len = sizeof(struct virtio_blk_req);
desc->flags |= VRING_DESC_F_NEXT;
@@ -389,8 +409,13 @@ ssize_t virtio_block_read_write(struct virtio_device *dev, void *buf, const off_
#endif
/* set up the descriptor pointing to the response */
#if WITH_KERNEL_VM
paddr_t status_phys = vaddr_to_paddr(&txn->status);
#else
paddr_t status_phys = (uint64_t)(uintptr_t)&txn->status;
#endif
desc = virtio_desc_index_to_desc(dev, 0, desc->next);
desc->addr = bdev->blk_response_phys;
desc->addr = status_phys;
desc->len = 1;
desc->flags = VRING_DESC_F_WRITE;
@@ -400,16 +425,36 @@ ssize_t virtio_block_read_write(struct virtio_device *dev, void *buf, const off_
/* kick it off */
virtio_kick(dev, 0);
return NO_ERROR;
}
static void sync_completion_cb(void *cookie, struct bdev *dev, ssize_t bytes) {
DEBUG_ASSERT(cookie);
event_t *event = (event_t *)cookie;
event_signal(event, false);
}
ssize_t virtio_block_read_write(struct virtio_device *dev, void *buf,
const off_t offset, const size_t len,
const bool write) {
struct virtio_block_txn *txn;
event_t event;
event_init(&event, false, EVENT_FLAG_AUTOUNSIGNAL);
status_t err = virtio_block_do_txn(dev, buf, offset, len, write,
&sync_completion_cb, &event, &txn);
if (err < 0) {
return err;
}
/* wait for the transfer to complete */
event_wait(&bdev->io_event);
event_wait(&event);
LTRACEF("status 0x%hhx\n", bdev->blk_response);
LTRACEF("status 0x%hhx\n", txn->status);
/* TODO: handle transfer errors and return error */
ssize_t result = (txn->status == VIRTIO_BLK_S_OK) ? (ssize_t)len : ERR_IO;
mutex_release(&bdev->lock);
return len;
return result;
}
static ssize_t virtio_bdev_read_block(struct bdev *bdev, void *buf, bnum_t block, uint count) {
@@ -422,6 +467,17 @@ static ssize_t virtio_bdev_read_block(struct bdev *bdev, void *buf, bnum_t block
return result;
}
static status_t virtio_bdev_read_async(struct bdev *bdev, void *buf,
off_t offset, size_t len,
bio_async_callback_t callback,
void *cookie) {
struct virtio_block_dev *dev =
containerof(bdev, struct virtio_block_dev, bdev);
return virtio_block_do_txn(dev->dev, buf, offset, len, false, callback,
cookie, NULL);
}
static ssize_t virtio_bdev_write_block(struct bdev *bdev, const void *buf, bnum_t block, uint count) {
struct virtio_block_dev *dev = containerof(bdev, struct virtio_block_dev, bdev);

View File

@@ -369,6 +369,26 @@ ssize_t bio_read(bdev_t *dev, void *buf, off_t offset, size_t len) {
return dev->read(dev, buf, offset, len);
}
status_t bio_read_async(bdev_t *dev, void *buf, off_t offset, size_t len,
bio_async_callback_t callback, void *callback_context) {
LTRACEF("dev '%s', buf %p, offset %lld, len %zd\n", dev->name, buf, offset,
len);
DEBUG_ASSERT(dev && dev->ref > 0);
DEBUG_ASSERT(buf);
if (dev->read_async == NULL) {
return ERR_NOT_SUPPORTED;
}
/* range check */
len = bio_trim_range(dev, offset, len);
if (len == 0) {
return 0;
}
return dev->read_async(dev, buf, offset, len, callback, callback_context);
}
ssize_t bio_read_block(bdev_t *dev, void *buf, bnum_t block, uint count) {
LTRACEF("dev '%s', buf %p, block %d, count %u\n", dev->name, buf, block, count);

View File

@@ -47,6 +47,9 @@ typedef struct bdev {
/* function pointers */
ssize_t (*read)(struct bdev *, void *buf, off_t offset, size_t len);
status_t (*read_async)(struct bdev *, void *buf, off_t offset, size_t len,
void (*callback)(void *cookie, struct bdev *, ssize_t),
void *callback_context);
ssize_t (*read_block)(struct bdev *, void *buf, bnum_t block, uint count);
ssize_t (*write)(struct bdev *, const void *buf, off_t offset, size_t len);
ssize_t (*write_block)(struct bdev *, const void *buf, bnum_t block, uint count);
@@ -55,10 +58,13 @@ typedef struct bdev {
void (*close)(struct bdev *);
} bdev_t;
typedef void (*bio_async_callback_t)(void *cookie, bdev_t *dev, ssize_t status);
/* user api */
bdev_t *bio_open(const char *name);
void bio_close(bdev_t *dev);
ssize_t bio_read(bdev_t *dev, void *buf, off_t offset, size_t len);
status_t bio_read_async(bdev_t *dev, void *buf, off_t offset, size_t len,
bio_async_callback_t callback, void *callback_context);
ssize_t bio_read_block(bdev_t *dev, void *buf, bnum_t block, uint count);
ssize_t bio_write(bdev_t *dev, const void *buf, off_t offset, size_t len);
ssize_t bio_write_block(bdev_t *dev, const void *buf, bnum_t block, uint count);

View File

@@ -27,6 +27,7 @@
#include <uefi/protocols/block_io2_protocol.h>
#include <uefi/types.h>
#include "defer.h"
#include "events.h"
#include "io_stack.h"
#include "memory_protocols.h"
@@ -48,6 +49,21 @@ EfiStatus reset(EfiBlockIo2Protocol* self, bool extended_verification) {
return UNSUPPORTED;
}
void async_read_callback(void* cookie, struct bdev* dev, ssize_t bytes_read) {
// |cookie| might be identity mapped memory, which is in UEFI address space.
// We need to switch to the UEFI address space to access it.
auto aspace = set_boot_aspace();
auto old_aspace = vmm_set_active_aspace(aspace);
auto token = reinterpret_cast<EfiBlockIo2Token*>(cookie);
if (bytes_read < 0) {
token->transaction_status = DEVICE_ERROR;
} else {
token->transaction_status = SUCCESS;
}
signal_event(token->event);
vmm_set_active_aspace(old_aspace);
}
// Read from dev, after I/O completes, signal token->event and set
// token->transaction_status
EfiStatus read_blocks_async(bdev_t* dev, uint64_t lba, EfiBlockIo2Token* token,
@@ -60,6 +76,11 @@ EfiStatus read_blocks_async(bdev_t* dev, uint64_t lba, EfiBlockIo2Token* token,
printf("Invalid token %p\n", token);
return INVALID_PARAMETER;
}
if (dev->read_async != nullptr) {
bio_read_async(dev, buffer, lba * dev->block_size, buffer_size,
async_read_callback, token);
return SUCCESS;
}
// First draft of this API will just use a background thread.
// More efficient version can be implemented once LK's bio layer
// supports async IO
@@ -70,12 +91,7 @@ EfiStatus read_blocks_async(bdev_t* dev, uint64_t lba, EfiBlockIo2Token* token,
vmm_set_active_aspace(aspace);
auto bytes_read =
bio_read_block(dev, buffer, lba, buffer_size / dev->block_size);
if (static_cast<size_t>(bytes_read) != buffer_size) {
token->transaction_status = DEVICE_ERROR;
} else {
token->transaction_status = SUCCESS;
}
signal_event(token->event);
async_read_callback(token, dev, bytes_read);
return 0;
},
get_current_thread()->priority, kIoStackSize);