diff --git a/config/grub/config/grub.cfg b/config/grub/config/grub.cfg index 3bbd2ecc..6b36d90b 100644 --- a/config/grub/config/grub.cfg +++ b/config/grub/config/grub.cfg @@ -76,10 +76,20 @@ function search_grub { echo -n "Attempting to load grub.cfg from '${1}' devices" for i in 0 1 2 3 4 5 6 7 8 9 10 11; do for part in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20; do - try_user_config "(${1}${i},${part})" + if [ "${1}" != "nvme" ]; then + try_user_config "(${1}${i},${part})" + else + # TODO: do we care about other namesapces + try_user_config "(nvme${i}n1,${part})" + fi done - # raw devices e.g. (ahci0) instead of (ahci0,1) - try_user_config "(${1}${i})" + if [ "${1}" != "nvme" ]; then + # raw devices e.g. (ahci0) instead of (ahci0,1) + try_user_config "(${1}${i})" + else + # TODO: do we care about other namesapces + try_user_config "(nvme${i}n1)" + fi done echo # Insert newline } @@ -102,10 +112,20 @@ function search_isolinux { echo "\nAttempting to parse iso/sys/extlinux config from '${1}' devices" for i in 0 1 2 3 4 5 6 7 8 9 10 11; do for part in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20; do - try_isolinux_config "(${1}${i},${part})" + if [ "${1}" != "nvme" ]; then + try_isolinux_config "(${1}${i},${part})" + else + # TODO: see above + try_isolinux_config "(nvme${i}n1,${part})" + fi done - # raw devices e.g. (usb0) instead of (usb0,1) - try_isolinux_config "(${1}${i})" + if [ "${1}" != "nvme" ]; then + # raw devices e.g. (usb0) instead of (usb0,1) + try_isolinux_config "(${1}${i})" + else + # TODO: do we care about other namesapces + try_isolinux_config "(nvme${i}n1)" + fi done echo # Insert newline } @@ -125,6 +145,9 @@ menuentry 'Load Operating System (incl. fully encrypted disks) [o]' --hotkey='o if [ "${grub_scan_disk}" != "ahci" ]; then search_bootcfg ata fi + if [ "${grub_scan_disk}" != "nvme" ]; then + search_bootcfg nvme + fi # grub device enumeration is very slow, so checks are hardcoded @@ -143,6 +166,7 @@ menuentry 'Load Operating System (incl. fully encrypted disks) [o]' --hotkey='o unset ahcidev unset atadev + unset nvmedev for i in 11 10 9 8 7 6 5 4 3 2 1 0; do for part in 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1; do if [ "${grub_scan_disk}" != "ata" ]; then @@ -151,6 +175,10 @@ menuentry 'Load Operating System (incl. fully encrypted disks) [o]' --hotkey='o if [ "${grub_scan_disk}" != "ahci" ]; then atadev="(ata${i},${part}) ${atadev}" fi + if [ "${grub_scan_disk}" != "nvme" ]; then + # TODO: do we care about other namesapces + nvmedev="(nvme${i}n1,${part}) ${nvmedev}" + fi done done @@ -185,6 +213,9 @@ menuentry 'Search for GRUB/SYSLINUX/EXTLINUX/ISOLINUX on AHCI [a]' --hotkey='a' menuentry 'Search for GRUB/SYSLINUX/EXTLINUX/ISOLINUX on ATA/IDE [d]' --hotkey='d' { search_bootcfg ata } +menuentry 'Search for GRUB/SYSLINUX/EXTLINUX/ISOLINUX on NVMe [e]' --hotkey='e' { + search_bootcfg nvme +} if [ -f (cbfsdisk)/grubtest.cfg ]; then menuentry 'Load test configuration (grubtest.cfg) inside of CBFS [t]' --hotkey='t' { set root='(cbfsdisk)' @@ -246,4 +277,4 @@ submenu 'Other [z]' --hotkey='z' { menuentry 'Disable spkmodem [z]' --hotkey='z' { terminal_output --remove spkmodem } -} \ No newline at end of file +} diff --git a/config/grub/modules.list b/config/grub/modules.list index f3768adb..5943a9ae 100644 --- a/config/grub/modules.list +++ b/config/grub/modules.list @@ -105,6 +105,7 @@ multiboot2 \ nativedisk \ normal \ ntfs \ +nvme \ ohci \ part_bsd \ part_dfly \ diff --git a/config/grub/patches/0006-nvme/0001-Add-native-NVMe-driver-based-on-SeaBIOS.patch b/config/grub/patches/0006-nvme/0001-Add-native-NVMe-driver-based-on-SeaBIOS.patch new file mode 100644 index 00000000..b00611a0 --- /dev/null +++ b/config/grub/patches/0006-nvme/0001-Add-native-NVMe-driver-based-on-SeaBIOS.patch @@ -0,0 +1,1074 @@ +From ef9d61ae9ed490301adf9ee064a9fc1190069d81 Mon Sep 17 00:00:00 2001 +From: Mate Kukri +Date: Mon, 20 May 2024 11:43:35 +0100 +Subject: Add native NVMe driver based on SeaBIOS + +Tested to successfully boot Debian on QEMU and OptiPlex 3050. + +Signed-off-by: Mate Kukri +--- + Makefile.am | 2 +- + grub-core/Makefile.core.def | 6 + + grub-core/commands/nativedisk.c | 1 + + grub-core/disk/nvme-int.h | 208 +++++++++ + grub-core/disk/nvme.c | 781 ++++++++++++++++++++++++++++++++ + include/grub/disk.h | 1 + + 6 files changed, 998 insertions(+), 1 deletion(-) + create mode 100644 grub-core/disk/nvme-int.h + create mode 100644 grub-core/disk/nvme.c + +diff --git a/Makefile.am b/Makefile.am +index 65016f856..7bc0866ba 100644 +--- a/Makefile.am ++++ b/Makefile.am +@@ -434,7 +434,7 @@ if COND_i386_coreboot + FS_PAYLOAD_MODULES ?= $(shell cat grub-core/fs.lst) + default_payload.elf: grub-mkstandalone grub-mkimage FORCE + test -f $@ && rm $@ || true +- pkgdatadir=. ./grub-mkstandalone --grub-mkimage=./grub-mkimage -O i386-coreboot -o $@ --modules='ahci pata xhci ehci uhci ohci usb_keyboard usbms part_msdos ext2 fat at_keyboard part_gpt usbserial_usbdebug cbfs' --install-modules='ls linux search configfile normal cbtime cbls memrw iorw minicmd lsmmap lspci halt reboot hexdump pcidump regexp setpci lsacpi chain test serial multiboot cbmemc linux16 gzio echo help syslinuxcfg xnu $(FS_PAYLOAD_MODULES) password_pbkdf2 $(EXTRA_PAYLOAD_MODULES)' --fonts= --themes= --locales= -d grub-core/ /boot/grub/grub.cfg=$(srcdir)/coreboot.cfg ++ pkgdatadir=. ./grub-mkstandalone --grub-mkimage=./grub-mkimage -O i386-coreboot -o $@ --modules='ahci pata nvme xhci ehci uhci ohci usb_keyboard usbms part_msdos ext2 fat at_keyboard part_gpt usbserial_usbdebug cbfs' --install-modules='ls linux search configfile normal cbtime cbls memrw iorw minicmd lsmmap lspci halt reboot hexdump pcidump regexp setpci lsacpi chain test serial multiboot cbmemc linux16 gzio echo help syslinuxcfg xnu $(FS_PAYLOAD_MODULES) password_pbkdf2 $(EXTRA_PAYLOAD_MODULES)' --fonts= --themes= --locales= -d grub-core/ /boot/grub/grub.cfg=$(srcdir)/coreboot.cfg + endif + + endif +diff --git a/grub-core/Makefile.core.def b/grub-core/Makefile.core.def +index ea782666d..0f893369a 100644 +--- a/grub-core/Makefile.core.def ++++ b/grub-core/Makefile.core.def +@@ -2602,3 +2602,9 @@ module = { + efi = commands/bli.c; + enable = efi; + }; ++ ++module = { ++ name = nvme; ++ common = disk/nvme.c; ++ enable = pci; ++}; +diff --git a/grub-core/commands/nativedisk.c b/grub-core/commands/nativedisk.c +index 580c8d3b0..a2c766fbd 100644 +--- a/grub-core/commands/nativedisk.c ++++ b/grub-core/commands/nativedisk.c +@@ -78,6 +78,7 @@ get_uuid (const char *name, char **uuid, int getnative) + case GRUB_DISK_DEVICE_ATA_ID: + case GRUB_DISK_DEVICE_SCSI_ID: + case GRUB_DISK_DEVICE_XEN: ++ case GRUB_DISK_DEVICE_NVME_ID: + if (getnative) + break; + /* FALLTHROUGH */ +diff --git a/grub-core/disk/nvme-int.h b/grub-core/disk/nvme-int.h +new file mode 100644 +index 000000000..1295b58aa +--- /dev/null ++++ b/grub-core/disk/nvme-int.h +@@ -0,0 +1,208 @@ ++// NVMe datastructures and constants ++// ++// Copyright 2017 Amazon.com, Inc. or its affiliates. ++// ++// This file may be distributed under the terms of the GNU LGPLv3 license. ++ ++#ifndef __NVME_INT_H ++#define __NVME_INT_H ++ ++#include ++ ++/* Data structures */ ++ ++/* The register file of a NVMe host controller. This struct follows the naming ++ scheme in the NVMe specification. */ ++struct nvme_reg { ++ grub_uint64_t cap; /* controller capabilities */ ++ grub_uint32_t vs; /* version */ ++ grub_uint32_t intms; /* interrupt mask set */ ++ grub_uint32_t intmc; /* interrupt mask clear */ ++ grub_uint32_t cc; /* controller configuration */ ++ grub_uint32_t _res0; ++ grub_uint32_t csts; /* controller status */ ++ grub_uint32_t _res1; ++ grub_uint32_t aqa; /* admin queue attributes */ ++ grub_uint64_t asq; /* admin submission queue base address */ ++ grub_uint64_t acq; /* admin completion queue base address */ ++}; ++ ++/* Submission queue entry */ ++struct nvme_sqe { ++ union { ++ grub_uint32_t dword[16]; ++ struct { ++ grub_uint32_t cdw0; /* Command DWORD 0 */ ++ grub_uint32_t nsid; /* Namespace ID */ ++ grub_uint64_t _res0; ++ grub_uint64_t mptr; /* metadata ptr */ ++ ++ grub_uint64_t dptr_prp1; ++ grub_uint64_t dptr_prp2; ++ }; ++ }; ++}; ++ ++/* Completion queue entry */ ++struct nvme_cqe { ++ union { ++ grub_uint32_t dword[4]; ++ struct { ++ grub_uint32_t cdw0; ++ grub_uint32_t _res0; ++ grub_uint16_t sq_head; ++ grub_uint16_t sq_id; ++ grub_uint16_t cid; ++ grub_uint16_t status; ++ }; ++ }; ++}; ++ ++/* The common part of every submission or completion queue. */ ++struct nvme_queue { ++ grub_uint32_t *dbl; /* doorbell */ ++ grub_uint16_t mask; /* length - 1 */ ++}; ++ ++struct nvme_cq { ++ struct nvme_queue common; ++ struct nvme_cqe *cqe; ++ ++ /* We have read upto (but not including) this entry in the queue. */ ++ grub_uint16_t head; ++ ++ /* The current phase bit the controller uses to indicate that it has written ++ a new entry. This is inverted after each wrap. */ ++ unsigned phase : 1; ++}; ++ ++struct nvme_sq { ++ struct nvme_queue common; ++ struct nvme_sqe *sqe; ++ ++ /* Corresponding completion queue. We only support a single SQ per CQ. */ ++ struct nvme_cq *cq; ++ ++ /* The last entry the controller has fetched. */ ++ grub_uint16_t head; ++ ++ /* The last value we have written to the tail doorbell. */ ++ grub_uint16_t tail; ++}; ++ ++struct nvme_ctrl { ++ grub_pci_device_t pci; ++ struct nvme_reg volatile *reg; ++ ++ grub_uint32_t ctrlnum; ++ ++ grub_uint32_t doorbell_stride; /* in bytes */ ++ ++ struct nvme_sq admin_sq; ++ struct nvme_cq admin_cq; ++ ++ grub_uint32_t ns_count; ++ ++ struct nvme_sq io_sq; ++ struct nvme_cq io_cq; ++}; ++ ++struct nvme_namespace { ++ struct nvme_namespace *next; ++ struct nvme_namespace **prev; ++ ++ char *devname; ++ ++ grub_uint32_t nsnum; ++ ++ struct nvme_ctrl *ctrl; ++ ++ grub_uint32_t ns_id; ++ ++ grub_uint64_t lba_count; /* The total amount of sectors. */ ++ ++ grub_uint32_t block_size; ++ grub_uint32_t metadata_size; ++ grub_uint32_t max_req_size; ++}; ++ ++/* Data structures for NVMe admin identify commands */ ++ ++struct nvme_identify_ctrl { ++ grub_uint16_t vid; ++ grub_uint16_t ssvid; ++ char sn[20]; ++ char mn[40]; ++ char fr[8]; ++ ++ grub_uint8_t rab; ++ grub_uint8_t ieee[3]; ++ grub_uint8_t cmic; ++ grub_uint8_t mdts; ++ ++ char _boring[516 - 78]; ++ ++ grub_uint32_t nn; /* number of namespaces */ ++}; ++ ++struct nvme_identify_ns_list { ++ grub_uint32_t ns_id[1024]; ++}; ++ ++struct nvme_lba_format { ++ grub_uint16_t ms; ++ grub_uint8_t lbads; ++ grub_uint8_t rp; ++}; ++ ++struct nvme_identify_ns { ++ grub_uint64_t nsze; ++ grub_uint64_t ncap; ++ grub_uint64_t nuse; ++ grub_uint8_t nsfeat; ++ grub_uint8_t nlbaf; ++ grub_uint8_t flbas; ++ ++ char _boring[128 - 27]; ++ ++ struct nvme_lba_format lbaf[16]; ++}; ++ ++union nvme_identify { ++ struct nvme_identify_ns ns; ++ struct nvme_identify_ctrl ctrl; ++ struct nvme_identify_ns_list ns_list; ++}; ++ ++/* NVMe constants */ ++ ++#define NVME_CAP_CSS_NVME (1ULL << 37) ++ ++#define NVME_CSTS_FATAL (1U << 1) ++#define NVME_CSTS_RDY (1U << 0) ++ ++#define NVME_CC_EN (1U << 0) ++ ++#define NVME_SQE_OPC_ADMIN_CREATE_IO_SQ 1U ++#define NVME_SQE_OPC_ADMIN_CREATE_IO_CQ 5U ++#define NVME_SQE_OPC_ADMIN_IDENTIFY 6U ++ ++#define NVME_SQE_OPC_IO_WRITE 1U ++#define NVME_SQE_OPC_IO_READ 2U ++ ++#define NVME_ADMIN_IDENTIFY_CNS_ID_NS 0U ++#define NVME_ADMIN_IDENTIFY_CNS_ID_CTRL 1U ++#define NVME_ADMIN_IDENTIFY_CNS_GET_NS_LIST 2U ++ ++#define NVME_CQE_DW3_P (1U << 16) ++ ++#define NVME_PAGE_SIZE 4096 ++#define NVME_PAGE_MASK ~(NVME_PAGE_SIZE - 1) ++ ++/* Length for the queue entries. */ ++#define NVME_SQE_SIZE_LOG 6 ++#define NVME_CQE_SIZE_LOG 4 ++ ++#endif ++ ++/* EOF */ +diff --git a/grub-core/disk/nvme.c b/grub-core/disk/nvme.c +new file mode 100644 +index 000000000..093237c70 +--- /dev/null ++++ b/grub-core/disk/nvme.c +@@ -0,0 +1,781 @@ ++// Low level NVMe disk access ++// ++// Based on SeaBIOS NVMe driver - Copyright 2017 Amazon.com, Inc. or its affiliates. ++// Port to GRUB2 done by Mate Kukri ++// ++// This file may be distributed under the terms of the GNU LGPLv3 license. ++ ++#include ++#include ++#include ++#include "nvme-int.h" ++ ++GRUB_MOD_LICENSE ("GPLv3"); /* LGPLv3 in reality but it is GPLv3 compatible */ ++ ++static grub_uint32_t grub_nvme_ctrlcnt; ++static grub_uint32_t grub_nvme_nscnt; ++ ++static struct nvme_namespace *grub_nvme_namespaces; ++ ++// Page aligned "dma bounce buffer" of size NVME_PAGE_SIZE ++static void *nvme_dma_buffer; ++ ++static void * ++zalloc_page_aligned(grub_uint32_t size) ++{ ++ void *res = grub_memalign(NVME_PAGE_SIZE, size); ++ if (res) grub_memset(res, 0, size); ++ return res; ++} ++ ++static void ++nvme_init_queue_common(struct nvme_ctrl *ctrl, struct nvme_queue *q, grub_uint16_t q_idx, ++ grub_uint16_t length) ++{ ++ grub_memset(q, 0, sizeof(*q)); ++ q->dbl = (grub_uint32_t *)((char *)ctrl->reg + 0x1000 + q_idx * ctrl->doorbell_stride); ++ grub_dprintf("nvme", " q %p q_idx %u dbl %p\n", q, q_idx, q->dbl); ++ q->mask = length - 1; ++} ++ ++static int ++nvme_init_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq, grub_uint16_t q_idx, grub_uint16_t length, ++ struct nvme_cq *cq) ++{ ++ nvme_init_queue_common(ctrl, &sq->common, q_idx, length); ++ sq->sqe = zalloc_page_aligned(sizeof(*sq->sqe) * length); ++ ++ if (!sq->sqe) { ++ return -1; ++ } ++ ++ grub_dprintf("nvme", "sq %p q_idx %u sqe %p\n", sq, q_idx, sq->sqe); ++ sq->cq = cq; ++ sq->head = 0; ++ sq->tail = 0; ++ ++ return 0; ++} ++ ++static int ++nvme_init_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq, grub_uint16_t q_idx, grub_uint16_t length) ++{ ++ nvme_init_queue_common(ctrl, &cq->common, q_idx, length); ++ cq->cqe = zalloc_page_aligned(sizeof(*cq->cqe) * length); ++ if (!cq->cqe) { ++ return -1; ++ } ++ ++ cq->head = 0; ++ ++ /* All CQE phase bits are initialized to zero. This means initially we wait ++ for the host controller to set these to 1. */ ++ cq->phase = 1; ++ ++ return 0; ++} ++ ++static int ++nvme_poll_cq(struct nvme_cq *cq) ++{ ++ grub_uint32_t dw3 = *(volatile grub_uint32_t *) &cq->cqe[cq->head].dword[3]; ++ return (!!(dw3 & NVME_CQE_DW3_P) == cq->phase); ++} ++ ++static int ++nvme_is_cqe_success(struct nvme_cqe const *cqe) ++{ ++ return ((cqe->status >> 1) & 0xFF) == 0; ++} ++ ++static struct nvme_cqe ++nvme_error_cqe(void) ++{ ++ struct nvme_cqe r; ++ ++ /* 0xFF is a vendor specific status code != success. Should be okay for ++ indicating failure. */ ++ grub_memset(&r, 0xFF, sizeof(r)); ++ return r; ++} ++ ++static struct nvme_cqe ++nvme_consume_cqe(struct nvme_sq *sq) ++{ ++ struct nvme_cq *cq = sq->cq; ++ ++ if (!nvme_poll_cq(cq)) { ++ /* Cannot consume a completion queue entry, if there is none ready. */ ++ return nvme_error_cqe(); ++ } ++ ++ struct nvme_cqe *cqe = &cq->cqe[cq->head]; ++ grub_uint16_t cq_next_head = (cq->head + 1) & cq->common.mask; ++ grub_dprintf("nvme", "cq %p head %u -> %u\n", cq, cq->head, cq_next_head); ++ if (cq_next_head < cq->head) { ++ grub_dprintf("nvme", "cq %p wrap\n", cq); ++ cq->phase = ~cq->phase; ++ } ++ cq->head = cq_next_head; ++ ++ /* Update the submission queue head. */ ++ if (cqe->sq_head != sq->head) { ++ sq->head = cqe->sq_head; ++ grub_dprintf("nvme", "sq %p advanced to %u\n", sq, cqe->sq_head); ++ } ++ ++ /* Tell the controller that we consumed the completion. */ ++ *(volatile grub_uint32_t *) cq->common.dbl = cq->head; ++ ++ return *cqe; ++} ++ ++static struct nvme_cqe ++nvme_wait(struct nvme_sq *sq) ++{ ++ // static const unsigned nvme_timeout = 5000 /* ms */; ++ // grub_uint32_t to = timer_calc(nvme_timeout); ++ while (!nvme_poll_cq(sq->cq)) { ++ /* FIXME ++ yield(); ++ ++ if (timer_check(to)) { ++ warn_timeout(); ++ return nvme_error_cqe(); ++ }*/ ++ } ++ ++ return nvme_consume_cqe(sq); ++} ++ ++/* Returns the next submission queue entry (or NULL if the queue is full). It ++ also fills out Command Dword 0 and clears the rest. */ ++static struct nvme_sqe * ++nvme_get_next_sqe(struct nvme_sq *sq, grub_uint8_t opc, void *metadata, void *data, void *data2) ++{ ++ if (((sq->head + 1) & sq->common.mask) == sq->tail) { ++ grub_dprintf("nvme", "submission queue is full\n"); ++ return NULL; ++ } ++ ++ struct nvme_sqe *sqe = &sq->sqe[sq->tail]; ++ grub_dprintf("nvme", "sq %p next_sqe %u\n", sq, sq->tail); ++ ++ grub_memset(sqe, 0, sizeof(*sqe)); ++ sqe->cdw0 = opc | (sq->tail << 16 /* CID */); ++ sqe->mptr = (grub_uint32_t)metadata; ++ sqe->dptr_prp1 = (grub_uint32_t)data; ++ sqe->dptr_prp2 = (grub_uint32_t)data2; ++ ++ return sqe; ++} ++ ++/* Call this after you've filled out an sqe that you've got from nvme_get_next_sqe. */ ++static void ++nvme_commit_sqe(struct nvme_sq *sq) ++{ ++ grub_dprintf("nvme", "sq %p commit_sqe %u\n", sq, sq->tail); ++ sq->tail = (sq->tail + 1) & sq->common.mask; ++ *(volatile grub_uint32_t *) sq->common.dbl = sq->tail; ++} ++ ++/* Perform an identify command on the admin queue and return the resulting ++ buffer. This may be a NULL pointer, if something failed. This function ++ cannot be used after initialization, because it uses buffers in tmp zone. */ ++static union nvme_identify * ++nvme_admin_identify(struct nvme_ctrl *ctrl, grub_uint8_t cns, grub_uint32_t nsid) ++{ ++ union nvme_identify *identify_buf = zalloc_page_aligned(4096); ++ if (!identify_buf) ++ return NULL; ++ ++ struct nvme_sqe *cmd_identify; ++ cmd_identify = nvme_get_next_sqe(&ctrl->admin_sq, ++ NVME_SQE_OPC_ADMIN_IDENTIFY, NULL, ++ identify_buf, NULL); ++ if (!cmd_identify) ++ goto error; ++ ++ cmd_identify->nsid = nsid; ++ cmd_identify->dword[10] = cns; ++ ++ nvme_commit_sqe(&ctrl->admin_sq); ++ ++ struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq); ++ ++ if (!nvme_is_cqe_success(&cqe)) { ++ goto error; ++ } ++ ++ return identify_buf; ++ error: ++ grub_free(identify_buf); ++ return NULL; ++} ++ ++static struct nvme_identify_ctrl * ++nvme_admin_identify_ctrl(struct nvme_ctrl *ctrl) ++{ ++ return &nvme_admin_identify(ctrl, NVME_ADMIN_IDENTIFY_CNS_ID_CTRL, 0)->ctrl; ++} ++ ++static struct nvme_identify_ns * ++nvme_admin_identify_ns(struct nvme_ctrl *ctrl, grub_uint32_t ns_id) ++{ ++ return &nvme_admin_identify(ctrl, NVME_ADMIN_IDENTIFY_CNS_ID_NS, ++ ns_id)->ns; ++} ++ ++static void ++nvme_probe_ns(struct nvme_ctrl *ctrl, grub_uint32_t ns_idx, grub_uint8_t mdts) ++{ ++ grub_uint32_t ns_id = ns_idx + 1; ++ ++ struct nvme_identify_ns *id = nvme_admin_identify_ns(ctrl, ns_id); ++ if (!id) { ++ grub_dprintf("nvme", "NVMe couldn't identify namespace %u.\n", ns_id); ++ goto free_buffer; ++ } ++ ++ grub_uint8_t current_lba_format = id->flbas & 0xF; ++ if (current_lba_format > id->nlbaf) { ++ grub_dprintf("nvme", "NVMe NS %u: current LBA format %u is beyond what the " ++ " namespace supports (%u)?\n", ++ ns_id, current_lba_format, id->nlbaf + 1); ++ goto free_buffer; ++ } ++ ++ if (!id->nsze) { ++ grub_dprintf("nvme", "NVMe NS %u is inactive.\n", ns_id); ++ goto free_buffer; ++ } ++ ++ if (!nvme_dma_buffer) { ++ nvme_dma_buffer = zalloc_page_aligned(NVME_PAGE_SIZE); ++ if (!nvme_dma_buffer) { ++ goto free_buffer; ++ } ++ } ++ ++ struct nvme_namespace *ns = grub_malloc(sizeof(*ns)); ++ if (!ns) { ++ goto free_buffer; ++ } ++ grub_memset(ns, 0, sizeof(*ns)); ++ ns->ctrl = ctrl; ++ ns->ns_id = ns_id; ++ ns->lba_count = id->nsze; ++ ++ struct nvme_lba_format *fmt = &id->lbaf[current_lba_format]; ++ ++ ns->block_size = 1U << fmt->lbads; ++ ns->metadata_size = fmt->ms; ++ ++ if (ns->block_size > NVME_PAGE_SIZE) { ++ /* If we see devices that trigger this path, we need to increase our ++ buffer size. */ ++ grub_free(ns); ++ goto free_buffer; ++ } ++ ++ if (mdts) { ++ ns->max_req_size = ((1U << mdts) * NVME_PAGE_SIZE) / ns->block_size; ++ grub_dprintf("nvme", "NVME NS %u max request size: %d sectors\n", ++ ns_id, ns->max_req_size); ++ } else { ++ ns->max_req_size = -1U; ++ } ++ ++ ns->devname = grub_xasprintf("nvme%un%u", ctrl->ctrlnum, ns_id); ++ ns->nsnum = grub_nvme_nscnt++; ++ ++ grub_list_push (GRUB_AS_LIST_P (&grub_nvme_namespaces), GRUB_AS_LIST (ns)); ++ ++free_buffer: ++ grub_free(id); ++} ++ ++ ++/* Release memory allocated for a completion queue */ ++static void ++nvme_destroy_cq(struct nvme_cq *cq) ++{ ++ grub_free(cq->cqe); ++ cq->cqe = NULL; ++} ++ ++/* Release memory allocated for a submission queue */ ++static void ++nvme_destroy_sq(struct nvme_sq *sq) ++{ ++ grub_free(sq->sqe); ++ sq->sqe = NULL; ++} ++ ++/* Returns 0 on success. */ ++static int ++nvme_create_io_cq(struct nvme_ctrl *ctrl, struct nvme_cq *cq, grub_uint16_t q_idx) ++{ ++ int rc; ++ struct nvme_sqe *cmd_create_cq; ++ grub_uint32_t length = 1 + (ctrl->reg->cap & 0xffff); ++ if (length > NVME_PAGE_SIZE / sizeof(struct nvme_cqe)) ++ length = NVME_PAGE_SIZE / sizeof(struct nvme_cqe); ++ ++ rc = nvme_init_cq(ctrl, cq, q_idx, length); ++ if (rc) { ++ goto err; ++ } ++ ++ cmd_create_cq = nvme_get_next_sqe(&ctrl->admin_sq, ++ NVME_SQE_OPC_ADMIN_CREATE_IO_CQ, NULL, ++ cq->cqe, NULL); ++ if (!cmd_create_cq) { ++ goto err_destroy_cq; ++ } ++ ++ cmd_create_cq->dword[10] = (cq->common.mask << 16) | (q_idx >> 1); ++ cmd_create_cq->dword[11] = 1 /* physically contiguous */; ++ ++ nvme_commit_sqe(&ctrl->admin_sq); ++ ++ struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq); ++ ++ if (!nvme_is_cqe_success(&cqe)) { ++ grub_dprintf("nvme", "create io cq failed: %08x %08x %08x %08x\n", ++ cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]); ++ ++ goto err_destroy_cq; ++ } ++ ++ return 0; ++ ++err_destroy_cq: ++ nvme_destroy_cq(cq); ++err: ++ return -1; ++} ++ ++/* Returns 0 on success. */ ++static int ++nvme_create_io_sq(struct nvme_ctrl *ctrl, struct nvme_sq *sq, grub_uint16_t q_idx, struct nvme_cq *cq) ++{ ++ int rc; ++ struct nvme_sqe *cmd_create_sq; ++ grub_uint32_t length = 1 + (ctrl->reg->cap & 0xffff); ++ if (length > NVME_PAGE_SIZE / sizeof(struct nvme_cqe)) ++ length = NVME_PAGE_SIZE / sizeof(struct nvme_cqe); ++ ++ rc = nvme_init_sq(ctrl, sq, q_idx, length, cq); ++ if (rc) { ++ goto err; ++ } ++ ++ cmd_create_sq = nvme_get_next_sqe(&ctrl->admin_sq, ++ NVME_SQE_OPC_ADMIN_CREATE_IO_SQ, NULL, ++ sq->sqe, NULL); ++ if (!cmd_create_sq) { ++ goto err_destroy_sq; ++ } ++ ++ cmd_create_sq->dword[10] = (sq->common.mask << 16) | (q_idx >> 1); ++ cmd_create_sq->dword[11] = (q_idx >> 1) << 16 | 1 /* contiguous */; ++ grub_dprintf("nvme", "sq %p create dword10 %08x dword11 %08x\n", sq, ++ cmd_create_sq->dword[10], cmd_create_sq->dword[11]); ++ ++ nvme_commit_sqe(&ctrl->admin_sq); ++ ++ struct nvme_cqe cqe = nvme_wait(&ctrl->admin_sq); ++ ++ if (!nvme_is_cqe_success(&cqe)) { ++ grub_dprintf("nvme", "create io sq failed: %08x %08x %08x %08x\n", ++ cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]); ++ goto err_destroy_sq; ++ } ++ ++ return 0; ++ ++err_destroy_sq: ++ nvme_destroy_sq(sq); ++err: ++ return -1; ++} ++ ++/* Reads count sectors into buf. The buffer cannot cross page boundaries. */ ++static int ++nvme_io_xfer(struct nvme_namespace *ns, grub_uint64_t lba, void *prp1, void *prp2, ++ grub_uint16_t count, int write) ++{ ++ if (((grub_uint32_t)prp1 & 0x3) || ((grub_uint32_t)prp2 & 0x3)) { ++ /* Buffer is misaligned */ ++ return -1; ++ } ++ ++ struct nvme_sqe *io_read = nvme_get_next_sqe(&ns->ctrl->io_sq, ++ write ? NVME_SQE_OPC_IO_WRITE ++ : NVME_SQE_OPC_IO_READ, ++ NULL, prp1, prp2); ++ io_read->nsid = ns->ns_id; ++ io_read->dword[10] = (grub_uint32_t)lba; ++ io_read->dword[11] = (grub_uint32_t)(lba >> 32); ++ io_read->dword[12] = (1U << 31 /* limited retry */) | (count - 1); ++ ++ nvme_commit_sqe(&ns->ctrl->io_sq); ++ ++ struct nvme_cqe cqe = nvme_wait(&ns->ctrl->io_sq); ++ ++ if (!nvme_is_cqe_success(&cqe)) { ++ grub_dprintf("nvme", "read io: %08x %08x %08x %08x\n", ++ cqe.dword[0], cqe.dword[1], cqe.dword[2], cqe.dword[3]); ++ ++ return -1; ++ } ++ ++ grub_dprintf("nvme", "ns %u %s lba %llu+%u\n", ns->ns_id, write ? "write" : "read", ++ lba, count); ++ return count; ++} ++ ++// Transfer up to one page of data using the internal dma bounce buffer ++static int ++nvme_bounce_xfer(struct nvme_namespace *ns, grub_uint64_t lba, void *buf, grub_uint16_t count, ++ int write) ++{ ++ grub_uint16_t const max_blocks = NVME_PAGE_SIZE / ns->block_size; ++ grub_uint16_t blocks = count < max_blocks ? count : max_blocks; ++ ++ if (write) ++ grub_memcpy(nvme_dma_buffer, buf, blocks * ns->block_size); ++ ++ int res = nvme_io_xfer(ns, lba, nvme_dma_buffer, NULL, blocks, write); ++ ++ if (!write && res >= 0) ++ grub_memcpy(buf, nvme_dma_buffer, res * ns->block_size); ++ ++ return res; ++} ++ ++#define NVME_MAX_PRPL_ENTRIES 15 /* Allows requests up to 64kb */ ++ ++// Transfer data using page list (if applicable) ++static int ++nvme_prpl_xfer(struct nvme_namespace *ns, grub_uint64_t lba, void *buf, grub_uint16_t count, ++ int write) ++{ ++ grub_uint32_t base = (long)buf; ++ grub_int32_t size; ++ ++ if (count > ns->max_req_size) ++ count = ns->max_req_size; ++ ++ size = count * ns->block_size; ++ /* Special case for transfers that fit into PRP1, but are unaligned */ ++ if (((size + (base & ~NVME_PAGE_MASK)) <= NVME_PAGE_SIZE)) ++ goto single; ++ ++ /* Every request has to be page aligned */ ++ if (base & ~NVME_PAGE_MASK) ++ goto bounce; ++ ++ /* Make sure a full block fits into the last chunk */ ++ if (size & (ns->block_size - 1ULL)) ++ goto bounce; ++ ++ /* Build PRP list if we need to describe more than 2 pages */ ++ if ((ns->block_size * count) > (NVME_PAGE_SIZE * 2)) { ++ grub_uint32_t prpl_len = 0; ++ grub_uint64_t *prpl = nvme_dma_buffer; ++ int first_page = 1; ++ for (; size > 0; base += NVME_PAGE_SIZE, size -= NVME_PAGE_SIZE) { ++ if (first_page) { ++ /* First page is special */ ++ first_page = 0; ++ continue; ++ } ++ if (prpl_len >= NVME_MAX_PRPL_ENTRIES) ++ goto bounce; ++ prpl[prpl_len++] = base; ++ } ++ return nvme_io_xfer(ns, lba, buf, prpl, count, write); ++ } ++ ++ /* Directly embed the 2nd page if we only need 2 pages */ ++ if ((ns->block_size * count) > NVME_PAGE_SIZE) ++ return nvme_io_xfer(ns, lba, buf, (char *) buf + NVME_PAGE_SIZE, count, write); ++ ++single: ++ /* One page is enough, don't expose anything else */ ++ return nvme_io_xfer(ns, lba, buf, NULL, count, write); ++ ++bounce: ++ /* Use bounce buffer to make transfer */ ++ return nvme_bounce_xfer(ns, lba, buf, count, write); ++} ++ ++static int ++nvme_create_io_queues(struct nvme_ctrl *ctrl) ++{ ++ if (nvme_create_io_cq(ctrl, &ctrl->io_cq, 3)) ++ goto err; ++ ++ if (nvme_create_io_sq(ctrl, &ctrl->io_sq, 2, &ctrl->io_cq)) ++ goto err_free_cq; ++ ++ return 0; ++ ++ err_free_cq: ++ nvme_destroy_cq(&ctrl->io_cq); ++ err: ++ return -1; ++} ++ ++/* Waits for CSTS.RDY to match rdy. Returns 0 on success. */ ++static int ++nvme_wait_csts_rdy(struct nvme_ctrl *ctrl, unsigned rdy) ++{ ++ // grub_uint32_t const max_to = 500 /* ms */ * ((ctrl->reg->cap >> 24) & 0xFFU); ++ // grub_uint32_t to = timer_calc(max_to); ++ grub_uint32_t csts; ++ ++ while (rdy != ((csts = ctrl->reg->csts) & NVME_CSTS_RDY)) { ++ // FIXME ++ //yield(); ++ ++ if (csts & NVME_CSTS_FATAL) { ++ grub_dprintf("nvme", "NVMe fatal error during controller shutdown\n"); ++ return -1; ++ } ++ ++ /* ++ if (timer_check(to)) { ++ warn_timeout(); ++ return -1; ++ }*/ ++ } ++ ++ return 0; ++} ++ ++/* Returns 0 on success. */ ++static int grub_nvme_controller_enable(struct nvme_ctrl *ctrl) ++{ ++ grub_pci_address_t addr; ++ int rc; ++ ++ addr = grub_pci_make_address (ctrl->pci, GRUB_PCI_REG_COMMAND); ++ grub_pci_write_word (addr, grub_pci_read_word (addr) | GRUB_PCI_COMMAND_BUS_MASTER); ++ ++ /* Turn the controller off. */ ++ ctrl->reg->cc = 0; ++ if (nvme_wait_csts_rdy(ctrl, 0)) { ++ grub_dprintf("nvme", "NVMe fatal error during controller shutdown\n"); ++ return -1; ++ } ++ ++ ctrl->doorbell_stride = 4U << ((ctrl->reg->cap >> 32) & 0xF); ++ ++ rc = nvme_init_cq(ctrl, &ctrl->admin_cq, 1, ++ NVME_PAGE_SIZE / sizeof(struct nvme_cqe)); ++ if (rc) { ++ return -1; ++ } ++ ++ rc = nvme_init_sq(ctrl, &ctrl->admin_sq, 0, ++ NVME_PAGE_SIZE / sizeof(struct nvme_sqe), &ctrl->admin_cq); ++ if (rc) { ++ goto err_destroy_admin_cq; ++ } ++ ++ ctrl->reg->aqa = ctrl->admin_cq.common.mask << 16 ++ | ctrl->admin_sq.common.mask; ++ ++ ctrl->reg->asq = (grub_uint32_t)ctrl->admin_sq.sqe; ++ ctrl->reg->acq = (grub_uint32_t)ctrl->admin_cq.cqe; ++ ++ grub_dprintf("nvme", " admin submission queue: %p\n", ctrl->admin_sq.sqe); ++ grub_dprintf("nvme", " admin completion queue: %p\n", ctrl->admin_cq.cqe); ++ ++ ctrl->reg->cc = NVME_CC_EN | (NVME_CQE_SIZE_LOG << 20) ++ | (NVME_SQE_SIZE_LOG << 16 /* IOSQES */); ++ ++ if (nvme_wait_csts_rdy(ctrl, 1)) { ++ grub_dprintf("nvme", "NVMe fatal error while enabling controller\n"); ++ goto err_destroy_admin_sq; ++ } ++ ++ /* The admin queue is set up and the controller is ready. Let's figure out ++ what namespaces we have. */ ++ ++ struct nvme_identify_ctrl *identify = nvme_admin_identify_ctrl(ctrl); ++ ++ if (!identify) { ++ grub_dprintf("nvme", "NVMe couldn't identify controller.\n"); ++ goto err_destroy_admin_sq; ++ } ++ ++ grub_dprintf("nvme", "NVMe has %u namespace%s.\n", ++ identify->nn, (identify->nn == 1) ? "" : "s"); ++ ++ ctrl->ns_count = identify->nn; ++ grub_uint8_t mdts = identify->mdts; ++ grub_free(identify); ++ ++ if ((ctrl->ns_count == 0) || nvme_create_io_queues(ctrl)) { ++ /* No point to continue, if the controller says it doesn't have ++ namespaces or we couldn't create I/O queues. */ ++ goto err_destroy_admin_sq; ++ } ++ ++ /* Give the controller a global number */ ++ ctrl->ctrlnum = grub_nvme_ctrlcnt++; ++ ++ /* Populate namespace IDs */ ++ for (grub_uint32_t ns_idx = 0; ns_idx < ctrl->ns_count; ns_idx++) { ++ nvme_probe_ns(ctrl, ns_idx, mdts); ++ } ++ ++ grub_dprintf("nvme", "NVMe initialization complete!\n"); ++ return 0; ++ ++ err_destroy_admin_sq: ++ nvme_destroy_sq(&ctrl->admin_sq); ++ err_destroy_admin_cq: ++ nvme_destroy_cq(&ctrl->admin_cq); ++ return -1; ++} ++ ++static int grub_nvme_pci_probe(grub_pci_device_t dev, grub_pci_id_t pciid __attribute__ ((unused)), void *data __attribute__ ((unused))) ++{ ++ grub_pci_address_t addr; ++ grub_uint32_t class, bar, version; ++ struct nvme_reg volatile *reg; ++ ++ class = grub_pci_read (grub_pci_make_address (dev, GRUB_PCI_REG_CLASS)); ++ if (class >> 16 != 0x0108) ++ return 0; ++ if ((class >> 8 & 0xff) != 2) { /* as of NVM 1.0e */ ++ grub_dprintf("nvme", "Found incompatble NVMe: prog-if=%02x\n", class >> 8 & 0xff); ++ return 0; ++ } ++ ++ bar = grub_pci_read (grub_pci_make_address (dev, GRUB_PCI_REG_ADDRESS_REG0)); ++ reg = grub_pci_device_map_range (dev, bar & GRUB_PCI_ADDR_MEM_MASK, sizeof (*reg)); ++ ++ addr = grub_pci_make_address (dev, GRUB_PCI_REG_COMMAND); ++ grub_pci_write_word (addr, grub_pci_read_word (addr) | GRUB_PCI_COMMAND_MEM_ENABLED); ++ ++ version = reg->vs; ++ grub_dprintf("nvme", "Found NVMe controller with version %u.%u.%u.\n", version >> 16, (version >> 8) & 0xFF, version & 0xFF); ++ grub_dprintf("nvme", " Capabilities %016llx\n", reg->cap); ++ ++ if (~reg->cap & NVME_CAP_CSS_NVME) { ++ grub_dprintf("nvme", "Controller doesn't speak NVMe command set. Skipping.\n"); ++ goto err; ++ } ++ ++ struct nvme_ctrl *ctrl = grub_malloc(sizeof(*ctrl)); ++ if (!ctrl) ++ goto err; ++ ++ grub_memset(ctrl, 0, sizeof(*ctrl)); ++ ++ ctrl->reg = reg; ++ ctrl->pci = dev; ++ ++ if (grub_nvme_controller_enable(ctrl)) ++ goto err_free_ctrl; ++ ++ return 0; ++ ++ err_free_ctrl: ++ grub_free(ctrl); ++ err: ++ grub_dprintf("nvme", "Failed to enable NVMe controller.\n"); ++ return 0; ++} ++ ++static int ++grub_nvme_iterate (grub_disk_dev_iterate_hook_t hook, void *hook_data, grub_disk_pull_t pull) ++{ ++ struct nvme_namespace *ns; ++ ++ if (pull != GRUB_DISK_PULL_NONE) ++ return 0; ++ ++ FOR_LIST_ELEMENTS(ns, grub_nvme_namespaces) ++ if (hook (ns->devname, hook_data)) ++ return 1; ++ ++ return 0; ++} ++ ++static grub_err_t ++grub_nvme_open (const char *name __attribute ((unused)), grub_disk_t disk __attribute ((unused))) ++{ ++ struct nvme_namespace *ns; ++ ++ FOR_LIST_ELEMENTS(ns, grub_nvme_namespaces) ++ if (grub_strcmp (ns->devname, name) == 0) ++ break; ++ ++ if (! ns) ++ return grub_error (GRUB_ERR_UNKNOWN_DEVICE, "can't open device"); ++ ++ disk->total_sectors = ns->lba_count; ++ disk->max_agglomerate = ns->max_req_size; ++ ++ disk->id = ns->nsnum; /* global id of the namespace */ ++ ++ disk->data = ns; ++ ++ return 0; ++} ++ ++static grub_err_t ++nvme_readwrite(struct nvme_namespace *ns, grub_disk_addr_t sector, grub_size_t num_sectors, char *buf, int write) ++{ ++ for (int i = 0; i < num_sectors;) { ++ grub_uint16_t blocks_remaining = num_sectors - i; ++ char *op_buf = buf + i * ns->block_size; ++ int blocks = nvme_prpl_xfer(ns, sector + i, op_buf, blocks_remaining, write); ++ if (blocks < 0) ++ return GRUB_ERR_IO; ++ i += blocks; ++ } ++ return GRUB_ERR_NONE; ++} ++ ++static grub_err_t ++grub_nvme_read (grub_disk_t disk, grub_disk_addr_t sector, grub_size_t num_sectors, char *buf) ++{ ++ return nvme_readwrite((struct nvme_namespace *) disk->data, sector, num_sectors, buf, 0); ++} ++ ++static grub_err_t ++grub_nvme_write (grub_disk_t disk, grub_disk_addr_t sector, grub_size_t num_sectors, const char *buf) ++{ ++ return nvme_readwrite((struct nvme_namespace *) disk->data, sector, num_sectors, buf, 1); ++} ++ ++static struct grub_disk_dev grub_nvme_dev = ++ { ++ .name = "nvme", ++ .id = GRUB_DISK_DEVICE_NVME_ID, ++ .disk_iterate = grub_nvme_iterate, ++ .disk_open = grub_nvme_open, ++ .disk_read = grub_nvme_read, ++ .disk_write = grub_nvme_write, ++ .next = 0 ++ }; ++ ++GRUB_MOD_INIT(nvme) ++{ ++ grub_stop_disk_firmware (); ++ grub_pci_iterate (grub_nvme_pci_probe, NULL); ++ grub_disk_dev_register (&grub_nvme_dev); ++} ++ ++GRUB_MOD_FINI(nvme) ++{ ++ grub_disk_dev_unregister (&grub_nvme_dev); ++} +diff --git a/include/grub/disk.h b/include/grub/disk.h +index fbf23df7f..186e76f0b 100644 +--- a/include/grub/disk.h ++++ b/include/grub/disk.h +@@ -52,6 +52,7 @@ enum grub_disk_dev_id + GRUB_DISK_DEVICE_UBOOTDISK_ID, + GRUB_DISK_DEVICE_XEN, + GRUB_DISK_DEVICE_OBDISK_ID, ++ GRUB_DISK_DEVICE_NVME_ID + }; + + struct grub_disk; +-- +2.39.2 +