^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Persistent Memory Driver
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (c) 2014-2015, Intel Corporation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/hdreg.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/platform_device.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/set_memory.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/moduleparam.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/badblocks.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/memremap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/vmalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/blk-mq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/pfn_t.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/uio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/dax.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/nd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <asm/cacheflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include "pmem.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include "pfn.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include "nd.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) static struct device *to_dev(struct pmem_device *pmem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * nvdimm bus services need a 'dev' parameter, and we record the device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * at init in bb.dev.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) return pmem->bb.dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) static struct nd_region *to_region(struct pmem_device *pmem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) return to_nd_region(to_dev(pmem)->parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) static void hwpoison_clear(struct pmem_device *pmem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) phys_addr_t phys, unsigned int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) unsigned long pfn_start, pfn_end, pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) /* only pmem in the linear map supports HWPoison */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) if (is_vmalloc_addr(pmem->virt_addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) pfn_start = PHYS_PFN(phys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) pfn_end = pfn_start + PHYS_PFN(len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) for (pfn = pfn_start; pfn < pfn_end; pfn++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) struct page *page = pfn_to_page(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * Note, no need to hold a get_dev_pagemap() reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * here since we're in the driver I/O path and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * outstanding I/O requests pin the dev_pagemap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) if (test_and_clear_pmem_poison(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) clear_mce_nospec(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) phys_addr_t offset, unsigned int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) struct device *dev = to_dev(pmem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) sector_t sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) long cleared;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) blk_status_t rc = BLK_STS_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) sector = (offset - pmem->data_offset) / 512;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) if (cleared < len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) rc = BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) if (cleared > 0 && cleared / 512) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) hwpoison_clear(pmem, pmem->phys_addr + offset, cleared);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) cleared /= 512;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) dev_dbg(dev, "%#llx clear %ld sector%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) (unsigned long long) sector, cleared,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) cleared > 1 ? "s" : "");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) badblocks_clear(&pmem->bb, sector, cleared);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) if (pmem->bb_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) sysfs_notify_dirent(pmem->bb_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) arch_invalidate_pmem(pmem->virt_addr + offset, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) static void write_pmem(void *pmem_addr, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) unsigned int off, unsigned int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) unsigned int chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) void *mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) while (len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) mem = kmap_atomic(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) chunk = min_t(unsigned int, len, PAGE_SIZE - off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) memcpy_flushcache(pmem_addr, mem + off, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) kunmap_atomic(mem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) len -= chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) off = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) page++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) pmem_addr += chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) static blk_status_t read_pmem(struct page *page, unsigned int off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) void *pmem_addr, unsigned int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) unsigned int chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) unsigned long rem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) void *mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) while (len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) mem = kmap_atomic(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) chunk = min_t(unsigned int, len, PAGE_SIZE - off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) rem = copy_mc_to_kernel(mem + off, pmem_addr, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) kunmap_atomic(mem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) if (rem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) return BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) len -= chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) off = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) page++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) pmem_addr += chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) return BLK_STS_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) static blk_status_t pmem_do_read(struct pmem_device *pmem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) struct page *page, unsigned int page_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) sector_t sector, unsigned int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) blk_status_t rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) void *pmem_addr = pmem->virt_addr + pmem_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) return BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) rc = read_pmem(page, page_off, pmem_addr, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) flush_dcache_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) static blk_status_t pmem_do_write(struct pmem_device *pmem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) struct page *page, unsigned int page_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) sector_t sector, unsigned int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) blk_status_t rc = BLK_STS_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) bool bad_pmem = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) void *pmem_addr = pmem->virt_addr + pmem_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) bad_pmem = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) * Note that we write the data both before and after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) * clearing poison. The write before clear poison
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) * handles situations where the latest written data is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * preserved and the clear poison operation simply marks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) * the address range as valid without changing the data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * In this case application software can assume that an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) * interrupted write will either return the new good
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * data or an error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) * However, if pmem_clear_poison() leaves the data in an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) * indeterminate state we need to perform the write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * after clear poison.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) flush_dcache_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) write_pmem(pmem_addr, page, page_off, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) if (unlikely(bad_pmem)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) rc = pmem_clear_poison(pmem, pmem_off, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) write_pmem(pmem_addr, page, page_off, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) static blk_qc_t pmem_submit_bio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) blk_status_t rc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) bool do_acct;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) unsigned long start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) struct bio_vec bvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) struct bvec_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) struct pmem_device *pmem = bio->bi_disk->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) struct nd_region *nd_region = to_region(pmem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) if (bio->bi_opf & REQ_PREFLUSH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) ret = nvdimm_flush(nd_region, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) do_acct = blk_queue_io_stat(bio->bi_disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) if (do_acct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) start = bio_start_io_acct(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) bio_for_each_segment(bvec, bio, iter) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) if (op_is_write(bio_op(bio)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) iter.bi_sector, bvec.bv_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) iter.bi_sector, bvec.bv_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) if (rc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) bio->bi_status = rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) if (do_acct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) bio_end_io_acct(bio, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) if (bio->bi_opf & REQ_FUA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) ret = nvdimm_flush(nd_region, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) bio->bi_status = errno_to_blk_status(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) bio_endio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) return BLK_QC_T_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) static int pmem_rw_page(struct block_device *bdev, sector_t sector,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) struct page *page, unsigned int op)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) struct pmem_device *pmem = bdev->bd_disk->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) blk_status_t rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) if (op_is_write(op))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) rc = pmem_do_write(pmem, page, 0, sector, thp_size(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) rc = pmem_do_read(pmem, page, 0, sector, thp_size(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * The ->rw_page interface is subtle and tricky. The core
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * retries on any error, so we can only invoke page_endio() in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * the successful completion case. Otherwise, we'll see crashes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * caused by double completion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) if (rc == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) page_endio(page, op_is_write(op), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) return blk_status_to_errno(rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) long nr_pages, void **kaddr, pfn_t *pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) if (unlikely(is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) PFN_PHYS(nr_pages))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) if (kaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) *kaddr = pmem->virt_addr + offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) if (pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) * If badblocks are present, limit known good range to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) * requested range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) if (unlikely(pmem->bb.count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) return nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) return PHYS_PFN(pmem->size - pmem->pfn_pad - offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) static const struct block_device_operations pmem_fops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) .owner = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) .submit_bio = pmem_submit_bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) .rw_page = pmem_rw_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) size_t nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) struct pmem_device *pmem = dax_get_private(dax_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) return blk_status_to_errno(pmem_do_write(pmem, ZERO_PAGE(0), 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) PFN_PHYS(pgoff) >> SECTOR_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) PAGE_SIZE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) static long pmem_dax_direct_access(struct dax_device *dax_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) struct pmem_device *pmem = dax_get_private(dax_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) * Use the 'no check' versions of copy_from_iter_flushcache() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) * copy_mc_to_iter() to bypass HARDENED_USERCOPY overhead. Bounds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) * checking, both file offset and device offset, is handled by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) * dax_iomap_actor()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) void *addr, size_t bytes, struct iov_iter *i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) return _copy_from_iter_flushcache(addr, bytes, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) void *addr, size_t bytes, struct iov_iter *i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) return _copy_mc_to_iter(addr, bytes, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) static const struct dax_operations pmem_dax_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) .direct_access = pmem_dax_direct_access,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) .dax_supported = generic_fsdax_supported,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) .copy_from_iter = pmem_copy_from_iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) .copy_to_iter = pmem_copy_to_iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) .zero_page_range = pmem_dax_zero_page_range,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) static const struct attribute_group *pmem_attribute_groups[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) &dax_attribute_group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) static void pmem_pagemap_cleanup(struct dev_pagemap *pgmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) struct request_queue *q =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) container_of(pgmap->ref, struct request_queue, q_usage_counter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) blk_cleanup_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) static void pmem_release_queue(void *pgmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) pmem_pagemap_cleanup(pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) static void pmem_pagemap_kill(struct dev_pagemap *pgmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) struct request_queue *q =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) container_of(pgmap->ref, struct request_queue, q_usage_counter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) blk_freeze_queue_start(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) static void pmem_release_disk(void *__pmem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) struct pmem_device *pmem = __pmem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) kill_dax(pmem->dax_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) put_dax(pmem->dax_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) del_gendisk(pmem->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) put_disk(pmem->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) static const struct dev_pagemap_ops fsdax_pagemap_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) .kill = pmem_pagemap_kill,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) .cleanup = pmem_pagemap_cleanup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) static int pmem_attach_disk(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) struct nd_namespace_common *ndns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) struct nd_region *nd_region = to_nd_region(dev->parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) int nid = dev_to_node(dev), fua;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) struct resource *res = &nsio->res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) struct range bb_range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) struct nd_pfn *nd_pfn = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) struct dax_device *dax_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) struct nd_pfn_sb *pfn_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) struct pmem_device *pmem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) struct request_queue *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) struct device *gendev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) struct gendisk *disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) void *addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) unsigned long flags = 0UL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) if (!pmem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) rc = devm_namespace_enable(dev, ndns, nd_info_block_reserve());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) /* while nsio_rw_bytes is active, parse a pfn info block if present */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) if (is_nd_pfn(dev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) nd_pfn = to_nd_pfn(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) rc = nvdimm_setup_pfn(nd_pfn, &pmem->pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) if (rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) /* we're attaching a block device, disable raw namespace access */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) devm_namespace_disable(dev, ndns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) dev_set_drvdata(dev, pmem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) pmem->phys_addr = res->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) pmem->size = resource_size(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) fua = nvdimm_has_flush(nd_region);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) if (!IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) || fua < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) dev_warn(dev, "unable to guarantee persistence of writes\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) fua = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) if (!devm_request_mem_region(dev, res->start, resource_size(res),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) dev_name(&ndns->dev))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) dev_warn(dev, "could not reserve region %pR\n", res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) q = blk_alloc_queue(dev_to_node(dev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) if (!q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) pmem->pfn_flags = PFN_DEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) pmem->pgmap.ref = &q->q_usage_counter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) if (is_nd_pfn(dev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) pmem->pgmap.type = MEMORY_DEVICE_FS_DAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) pmem->pgmap.ops = &fsdax_pagemap_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) addr = devm_memremap_pages(dev, &pmem->pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) pfn_sb = nd_pfn->pfn_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) pmem->pfn_pad = resource_size(res) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) range_len(&pmem->pgmap.range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) pmem->pfn_flags |= PFN_MAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) bb_range = pmem->pgmap.range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) bb_range.start += pmem->data_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) } else if (pmem_should_map_pages(dev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) pmem->pgmap.range.start = res->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) pmem->pgmap.range.end = res->end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) pmem->pgmap.nr_range = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) pmem->pgmap.type = MEMORY_DEVICE_FS_DAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) pmem->pgmap.ops = &fsdax_pagemap_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) addr = devm_memremap_pages(dev, &pmem->pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) pmem->pfn_flags |= PFN_MAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) bb_range = pmem->pgmap.range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) addr = devm_memremap(dev, pmem->phys_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) pmem->size, ARCH_MEMREMAP_PMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) if (devm_add_action_or_reset(dev, pmem_release_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) &pmem->pgmap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) bb_range.start = res->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) bb_range.end = res->end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) if (IS_ERR(addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) return PTR_ERR(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) pmem->virt_addr = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) blk_queue_write_cache(q, true, fua);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) blk_queue_physical_block_size(q, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) blk_queue_logical_block_size(q, pmem_sector_size(ndns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) blk_queue_max_hw_sectors(q, UINT_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) if (pmem->pfn_flags & PFN_MAP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) blk_queue_flag_set(QUEUE_FLAG_DAX, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) disk = alloc_disk_node(0, nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) if (!disk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) pmem->disk = disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) disk->fops = &pmem_fops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) disk->queue = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) disk->flags = GENHD_FL_EXT_DEVT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) disk->private_data = pmem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) nvdimm_namespace_disk_name(ndns, disk->disk_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) / 512);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) if (devm_init_badblocks(dev, &pmem->bb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) disk->bb = &pmem->bb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) if (is_nvdimm_sync(nd_region))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) flags = DAXDEV_F_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) if (IS_ERR(dax_dev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) put_disk(disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) return PTR_ERR(dax_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) pmem->dax_dev = dax_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) gendev = disk_to_dev(disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) gendev->groups = pmem_attribute_groups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) device_add_disk(dev, disk, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) if (devm_add_action_or_reset(dev, pmem_release_disk, pmem))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) nvdimm_check_and_set_ro(disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) pmem->bb_state = sysfs_get_dirent(disk_to_dev(disk)->kobj.sd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) "badblocks");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) if (!pmem->bb_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) dev_warn(dev, "'badblocks' notification disabled\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) static int nd_pmem_probe(struct device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) struct nd_namespace_common *ndns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) ndns = nvdimm_namespace_common_probe(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) if (IS_ERR(ndns))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) return PTR_ERR(ndns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) if (is_nd_btt(dev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) return nvdimm_namespace_attach_btt(ndns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) if (is_nd_pfn(dev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) return pmem_attach_disk(dev, ndns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) ret = devm_namespace_enable(dev, ndns, nd_info_block_reserve());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) ret = nd_btt_probe(dev, ndns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) * We have two failure conditions here, there is no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) * info reserver block or we found a valid info reserve block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) * but failed to initialize the pfn superblock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) * For the first case consider namespace as a raw pmem namespace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) * and attach a disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) * For the latter, consider this a success and advance the namespace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) * seed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) ret = nd_pfn_probe(dev, ndns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) else if (ret == -EOPNOTSUPP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) ret = nd_dax_probe(dev, ndns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) else if (ret == -EOPNOTSUPP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) /* probe complete, attach handles namespace enabling */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) devm_namespace_disable(dev, ndns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) return pmem_attach_disk(dev, ndns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) static int nd_pmem_remove(struct device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) struct pmem_device *pmem = dev_get_drvdata(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) if (is_nd_btt(dev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) nvdimm_namespace_detach_btt(to_nd_btt(dev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) * Note, this assumes nd_device_lock() context to not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) * race nd_pmem_notify()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) sysfs_put(pmem->bb_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) pmem->bb_state = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) nvdimm_flush(to_nd_region(dev->parent), NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) static void nd_pmem_shutdown(struct device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) nvdimm_flush(to_nd_region(dev->parent), NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) struct nd_region *nd_region;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) resource_size_t offset = 0, end_trunc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) struct nd_namespace_common *ndns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) struct nd_namespace_io *nsio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) struct badblocks *bb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) struct range range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) struct kernfs_node *bb_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) if (event != NVDIMM_REVALIDATE_POISON)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) if (is_nd_btt(dev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) struct nd_btt *nd_btt = to_nd_btt(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) ndns = nd_btt->ndns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) nd_region = to_nd_region(ndns->dev.parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) nsio = to_nd_namespace_io(&ndns->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) bb = &nsio->bb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) bb_state = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) struct pmem_device *pmem = dev_get_drvdata(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) nd_region = to_region(pmem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) bb = &pmem->bb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) bb_state = pmem->bb_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) if (is_nd_pfn(dev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) struct nd_pfn *nd_pfn = to_nd_pfn(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) ndns = nd_pfn->ndns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) offset = pmem->data_offset +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) __le32_to_cpu(pfn_sb->start_pad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) ndns = to_ndns(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) nsio = to_nd_namespace_io(&ndns->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) range.start = nsio->res.start + offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) range.end = nsio->res.end - end_trunc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) nvdimm_badblocks_populate(nd_region, bb, &range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) if (bb_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) sysfs_notify_dirent(bb_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) MODULE_ALIAS("pmem");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) static struct nd_device_driver nd_pmem_driver = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) .probe = nd_pmem_probe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) .remove = nd_pmem_remove,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) .notify = nd_pmem_notify,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) .shutdown = nd_pmem_shutdown,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) .drv = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) .name = "nd_pmem",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) .type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) module_nd_driver(nd_pmem_driver);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) MODULE_LICENSE("GPL v2");