From 18e75a7134e0130b925fffab13f41c1ffc4d9f05 Mon Sep 17 00:00:00 2001 From: Robert Elliott Date: Fri, 22 May 2015 16:46:21 -0500 Subject: [PATCH] pmem cache type patch Author: Robert Elliott Date: Tue Apr 28 19:14:53 2015 -0500 pmem: cache_type, non-temporal memcpy experiments WARNING: Not for inclusion in the kernel - just for experimentation. Add modparams to select cache_type and various kinds of memcpy with non-temporal loads and stores. Parameters are printed to the kernel serial log at module load time. Example usage: modprobe pmem pmem_cachetype=2 pmem_readscan=2 pmem_ntw=1 pmem_ntr=1 x86 offers several non-temporal instructions: * 8 byte: movnti (store) from normal registers * 16 byte: movntdq (store) and movntdqa (load) using xmm registers (SSE) * 32 byte: vmovntdq and vmovntdqa using ymm registers (AVX) * 64 byte: vmovntdq and vmovntdqa using zmm registers (AVX512) The 32-byte AVX instructions are used by this patch. Normal memcpy is used for unaligned pmem_rw_bytes accesses, so is unsafe for WB mode. Module parameters ================= pmem_cachetype=n (default 3) Select the cache type (which ioremap function to use to map the NVDIMM memory) 0 = UC (uncacheable) - slow writes, slow reads 1 = WB (writeback) - fast unsafe writes, fast reads 2 = WC (write combining) - fast writes, slow reads 3 = WT (writethrough) - slow writes, fast reads WB writes are safe if: * non-temporal stores are exclusively used * clflush instructions are added pmem_readscan=n (default 0) 0 = no read scan 1 = read the entire memory range, looking to trigger UC memory errors The rate is also printed, serving as a quick performance check (uses a 64 byte loop with NT loads). pmem_clean=n (default 0) 0 = no clean 1 = overwrite the entire memory range, possibly clearing UC memory errors (dangerous, destroys all data) The rate is also printed, serving as a quick performance check (uses a 64 byte loop with NT stores). pmem_ntw=n (default 3) Use non-temporal stores when writing persistent memory 0 = memcpy (unsafe for WB) 1 = 64 byte loop with NT stores 2 = 128 byte loop with NT stores 3 = 64 byte loop with NT stores, plus use NT loads from normal memory (may be better cache usage) 4 = 128 byte loop with NT stores, plus use NT loads from normal memory 5 = __copy_from_user (existing kernel function with 8 byte NT instructions) 6 = no write at all (nop)(dangerous) 7 = 64-byte loop, store only (write garbage)(dangerous) pmem_ntr=n (default 3) Use non-temporal loads when reading persistent memory 0 = memcpy 1 = 64 byte loop with NT loads 2 = 128 byte loop with NT loads 3 = 64 byte loop with NT loads, plus use NT stores to normal memory 4 = 128 byte loop with NT loads, plus use NT stores to normal memory 5 = memcpy 6 = no load at all (nop)(dangerous) 7 = 64-byte loop, load only (return garbage)(dangerous) pmm_ntw=6 pmem_ntr=6 exhibits the block layer IOPS limits. Signed-off-by: Robert Elliott --- drivers/block/nd/pmem.c | 550 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 539 insertions(+), 11 deletions(-) diff --git a/drivers/block/nd/pmem.c b/drivers/block/nd/pmem.c index 7b5cedf1f2a4..f378ef81733f 100644 --- a/drivers/block/nd/pmem.c +++ b/drivers/block/nd/pmem.c @@ -26,6 +26,382 @@ #include #include "nd.h" +static int pmem_cachetype; /* default UC */ +module_param(pmem_cachetype, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(pmem_cachetype, + "Select cache attribute for pmem driver (0=UC, 1=WB 2=WC 3=WT)"); + +static int pmem_readscan; +module_param(pmem_readscan, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(pmem_readscan, + "Read scan pmem device upon init (trigger ECC errors)"); + +static int pmem_clean; +module_param(pmem_clean, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(pmem_clean, + "Clean pmem device upon init (write garbage, but cleans the ECC)"); + +static int pmem_ntw = 3; +module_param(pmem_ntw, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(pmem_ntw, + "Use non-temporal stores for block writes in pmem (0=memcpy, 1=64 byte NT, 2=128 byte NT, 3=64 dual NT, 4=128 dual NT, 5=copy_from_user, 6=nop, 7=64-byte NT-store only)"); + +static int pmem_ntr = 3; +module_param(pmem_ntr, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(pmem_ntr, + "Use non-temporal loads for block reads in pmem (0=memcpy, 1=64 byte NT, 2=128 byte NT, 3=64 dual NT, 4=128 dual NT, 5=memcpy, 6=nop, 7=64-byte NT-load only)"); + +/* load: normal, store: non-temporal, loop: 64 bytes */ +static void memcpy_lt_snt_64(void *to, const void *from, size_t size) +{ + u64 bs = 64; + int i; + + BUG_ON(!IS_ALIGNED(size, bs)); + BUG_ON(!IS_ALIGNED((u64)to, bs)); + BUG_ON(!IS_ALIGNED((u64)from, bs)); + + for (i = 0; i < size; i += bs) { + __asm__ __volatile__ ( +#if 0 + /* 16-byte SSE instructions */ + "movdqa (%0), %%xmm0\n" + "movdqa 16(%0), %%xmm1\n" + "movdqa 32(%0), %%xmm2\n" + "movdqa 48(%0), %%xmm3\n" + "movntdq %%xmm0, (%1)\n" + "movntdq %%xmm1, 16(%1)\n" + "movntdq %%xmm2, 32(%1)\n" + "movntdq %%xmm3, 48(%1)\n" +#endif + /* 32-byte AVX instructions */ + "vmovdqa (%0), %%ymm0\n" + "vmovdqa 32(%0), %%ymm1\n" + "vmovntdq %%ymm0, (%1)\n" + "vmovntdq %%ymm1, 32(%1)\n" + : + : "r" (from), "r" (to) + : "memory"); + + to += bs; + from += bs; + } + + __asm__ __volatile__ ( + " sfence\n" : : + ); +} + +/* load: skip, store: non-temporal, loop: 64 bytes */ +static void memcpy_lskip_snt_64(void *to, const void *from, size_t size) +{ + u64 bs = 64; + int i; + + BUG_ON(!IS_ALIGNED(size, bs)); + BUG_ON(!IS_ALIGNED((u64)to, bs)); + BUG_ON(!IS_ALIGNED((u64)from, bs)); + + for (i = 0; i < size; i += bs) { + __asm__ __volatile__ ( +#if 0 + "movntdq %%xmm0, (%1)\n" + "movntdq %%xmm1, 16(%1)\n" + "movntdq %%xmm2, 32(%1)\n" + "movntdq %%xmm3, 48(%1)\n" +#endif + "vmovntdq %%ymm0, (%1)\n" + "vmovntdq %%ymm1, 32(%1)\n" + : + : "r" (from), "r" (to) + : "memory"); + + to += bs; + from += bs; + } + + __asm__ __volatile__ ( + " sfence\n" : : + ); +} + +/* load: non-temporal, store: non-temporal, loop: 64 bytes */ +static void memcpy_lnt_snt_64(void *to, const void *from, size_t size) +{ + u64 bs = 64; + int i; + + BUG_ON(!IS_ALIGNED(size, bs)); + BUG_ON(!IS_ALIGNED((u64)to, bs)); + BUG_ON(!IS_ALIGNED((u64)from, bs)); + + for (i = 0; i < size; i += bs) { + __asm__ __volatile__ ( +#if 0 + "movntdqa (%0), %%xmm0\n" + "movntdqa 16(%0), %%xmm1\n" + "movntdqa 32(%0), %%xmm2\n" + "movntdqa 48(%0), %%xmm3\n" + "movntdq %%xmm0, (%1)\n" + "movntdq %%xmm1, 16(%1)\n" + "movntdq %%xmm2, 32(%1)\n" + "movntdq %%xmm3, 48(%1)\n" +#endif + "vmovntdqa (%0), %%ymm0\n" + "vmovntdqa 32(%0), %%ymm1\n" + "vmovntdq %%ymm0, (%1)\n" + "vmovntdq %%ymm1, 32(%1)\n" + : + : "r" (from), "r" (to) + : "memory"); + + to += bs; + from += bs; + } + + __asm__ __volatile__ ( + " sfence\n" : : + ); +} + +/* load: normal, store: non-temporal, loop: 128 bytes */ +static void memcpy_lt_snt_128(void *to, const void *from, size_t size) +{ + u64 bs = 128; + int i; + + BUG_ON(!IS_ALIGNED(size, bs)); + BUG_ON(!IS_ALIGNED((u64)to, bs)); + BUG_ON(!IS_ALIGNED((u64)from, bs)); + + for (i = 0; i < size; i += bs) { + __asm__ __volatile__ ( +#if 0 + /* hard to use prefetch effectively */ + "prefetchnta 128(%0)\n" + "prefetchnta 192(%0)\n" +#endif +#if 0 + "movdqa (%0), %%xmm0\n" + "movdqa 16(%0), %%xmm1\n" + "movdqa 32(%0), %%xmm2\n" + "movdqa 48(%0), %%xmm3\n" + "movdqa 64(%0), %%xmm4\n" + "movdqa 80(%0), %%xmm5\n" + "movdqa 96(%0), %%xmm6\n" + "movdqa 112(%0), %%xmm7\n" + "movntdq %%xmm0, (%1)\n" + "movntdq %%xmm1, 16(%1)\n" + "movntdq %%xmm2, 32(%1)\n" + "movntdq %%xmm3, 48(%1)\n" + "movntdq %%xmm4, 64(%1)\n" + "movntdq %%xmm5, 80(%1)\n" + "movntdq %%xmm6, 96(%1)\n" + "movntdq %%xmm7, 112(%1)\n" +#endif + "vmovdqa (%0), %%ymm0\n" + "vmovdqa 32(%0), %%ymm1\n" + "vmovdqa 64(%0), %%ymm2\n" + "vmovdqa 96(%0), %%ymm3\n" + "vmovntdq %%ymm0, (%1)\n" + "vmovntdq %%ymm1, 32(%1)\n" + "vmovntdq %%ymm2, 64(%1)\n" + "vmovntdq %%ymm3, 96(%1)\n" + : + : "r" (from), "r" (to) + : "memory"); + + to += bs; + from += bs; + } + + __asm__ __volatile__ ( + " sfence\n" : : + ); +} + +/* load: non-temporal, store: non-temporal, loop: 128 bytes */ +static void memcpy_lnt_snt_128(void *to, const void *from, size_t size) +{ + u64 bs = 128; + int i; + + BUG_ON(!IS_ALIGNED(size, bs)); + BUG_ON(!IS_ALIGNED((u64)to, bs)); + BUG_ON(!IS_ALIGNED((u64)from, bs)); + + for (i = 0; i < size; i += bs) { + __asm__ __volatile__ ( +#if 0 + "prefetchnta 128(%0)\n" + "prefetchnta 192(%0)\n" +#endif +#if 0 + "movntdqa (%0), %%xmm0\n" + "movntdqa 16(%0), %%xmm1\n" + "movntdqa 32(%0), %%xmm2\n" + "movntdqa 48(%0), %%xmm3\n" + "movntdqa 64(%0), %%xmm4\n" + "movntdqa 80(%0), %%xmm5\n" + "movntdqa 96(%0), %%xmm6\n" + "movntdqa 112(%0), %%xmm7\n" + "movntdq %%xmm0, (%1)\n" + "movntdq %%xmm1, 16(%1)\n" + "movntdq %%xmm2, 32(%1)\n" + "movntdq %%xmm3, 48(%1)\n" + "movntdq %%xmm4, 64(%1)\n" + "movntdq %%xmm5, 80(%1)\n" + "movntdq %%xmm6, 96(%1)\n" + "movntdq %%xmm7, 112(%1)\n" +#endif + "vmovntdqa (%0), %%ymm0\n" + "vmovntdqa 32(%0), %%ymm1\n" + "vmovntdqa 64(%0), %%ymm2\n" + "vmovntdqa 96(%0), %%ymm3\n" + "vmovntdq %%ymm0, (%1)\n" + "vmovntdq %%ymm1, 32(%1)\n" + "vmovntdq %%ymm2, 64(%1)\n" + "vmovntdq %%ymm3, 96(%1)\n" + : + : "r" (from), "r" (to) + : "memory"); + + to += bs; + from += bs; + } + + __asm__ __volatile__ ( + " sfence\n" : : + ); +} + +/* load: non-temporal, store: normal, loop: 64 bytes */ +static void memcpy_lnt_st_64(void *to, const void *from, size_t size) +{ + u64 bs = 64; + int i; + + BUG_ON(!IS_ALIGNED(size, bs)); + BUG_ON(!IS_ALIGNED((u64)to, bs)); + BUG_ON(!IS_ALIGNED((u64)from, bs)); + + for (i = 0; i < size; i += bs) { + __asm__ __volatile__ ( +#if 0 + "movntdqa (%0), %%xmm0\n" + "movntdqa 16(%0), %%xmm1\n" + "movntdqa 32(%0), %%xmm2\n" + "movntdqa 48(%0), %%xmm3\n" + "movdqa %%xmm0, (%1)\n" + "movdqa %%xmm1, 16(%1)\n" + "movdqa %%xmm2, 32(%1)\n" + "movdqa %%xmm3, 48(%1)\n" +#endif + "vmovntdqa (%0), %%ymm0\n" + "vmovntdqa 32(%0), %%ymm1\n" + "vmovdqa %%ymm0, (%1)\n" + "vmovdqa %%ymm1, 32(%1)\n" + : + : "r" (from), "r" (to) + : "memory"); + + to += bs; + from += bs; + } + + __asm__ __volatile__ ( + " sfence\n" : : + ); +} + +/* load: non-temporal, store: skip, loop: 64 bytes */ +static void memcpy_lnt_sskip_64(void *to, const void *from, size_t size) +{ + u64 bs = 64; + int i; + + BUG_ON(!IS_ALIGNED(size, bs)); + BUG_ON(!IS_ALIGNED((u64)to, bs)); + BUG_ON(!IS_ALIGNED((u64)from, bs)); + + for (i = 0; i < size; i += bs) { + __asm__ __volatile__ ( +#if 0 + "movntdqa (%0), %%xmm0\n" + "movntdqa 16(%0), %%xmm1\n" + "movntdqa 32(%0), %%xmm2\n" + "movntdqa 48(%0), %%xmm3\n" +#endif + "vmovntdqa (%0), %%ymm0\n" + "vmovntdqa 32(%0), %%ymm1\n" + : + : "r" (from), "r" (to) + : "memory"); + + to += bs; + from += bs; + } + + __asm__ __volatile__ ( + " sfence\n" : : + ); +} + +/* load: non-temporal, store: normal, loop: 128 bytes */ +static void memcpy_lnt_st_128(void *to, const void *from, size_t size) +{ + u64 bs = 128; + int i; + + BUG_ON(!IS_ALIGNED(size, bs)); + BUG_ON(!IS_ALIGNED((u64)to, bs)); + BUG_ON(!IS_ALIGNED((u64)from, bs)); + + for (i = 0; i < size; i += bs) { + __asm__ __volatile__ ( +#if 0 + "prefetchnta 128(%0)\n" + "prefetchnta 192(%0)\n" +#endif +#if 0 + "movntdqa (%0), %%xmm0\n" + "movntdqa 16(%0), %%xmm1\n" + "movntdqa 32(%0), %%xmm2\n" + "movntdqa 48(%0), %%xmm3\n" + "movntdqa 64(%0), %%xmm4\n" + "movntdqa 80(%0), %%xmm5\n" + "movntdqa 96(%0), %%xmm6\n" + "movntdqa 112(%0), %%xmm7\n" + "movdqa %%xmm0, (%1)\n" + "movdqa %%xmm1, 16(%1)\n" + "movdqa %%xmm2, 32(%1)\n" + "movdqa %%xmm3, 48(%1)\n" + "movdqa %%xmm4, 64(%1)\n" + "movdqa %%xmm5, 80(%1)\n" + "movdqa %%xmm6, 96(%1)\n" + "movdqa %%xmm7, 112(%1)\n" +#endif + "vmovntdqa (%0), %%ymm0\n" + "vmovntdqa 32(%0), %%ymm1\n" + "vmovntdqa 64(%0), %%ymm2\n" + "vmovntdqa 96(%0), %%ymm3\n" + "vmovdqa %%ymm0, (%1)\n" + "vmovdqa %%ymm1, 32(%1)\n" + "vmovdqa %%ymm2, 64(%1)\n" + "vmovdqa %%ymm3, 96(%1)\n" + : + : "r" (from), "r" (to) + : "memory"); + + to += bs; + from += bs; + } + + __asm__ __volatile__ ( + " sfence\n" : : + ); +} + struct pmem_device { struct request_queue *pmem_queue; struct gendisk *pmem_disk; @@ -37,6 +413,81 @@ struct pmem_device { size_t size; }; +/* pick the type of memcpy for a read from NVDIMMs */ +static void memcpy_ntr(void *to, const void *from, size_t size) +{ + switch (pmem_ntr) { + case 1: + memcpy_lnt_st_64(to, from, size); + break; + case 2: + memcpy_lnt_st_128(to, from, size); + break; + case 3: + memcpy_lnt_snt_64(to, from, size); + break; + case 4: + memcpy_lnt_snt_128(to, from, size); + break; + case 6: + /* nop */ + break; + case 7: + memcpy_lnt_sskip_64(to, from, size); + break; + default: + memcpy(to, from, size); + break; + } +} + +/* pick the type of memcpy for a write to NVDIMMs */ +static void memcpy_ntw(void *to, const void *from, size_t size) +{ + int ret; + switch (pmem_ntw) { + case 1: + memcpy_lt_snt_64(to, from, size); + ret = 0; + break; + case 2: + memcpy_lt_snt_128(to, from, size); + ret = 0; + break; + case 3: + memcpy_lnt_snt_64(to, from, size); + ret = 0; + break; + case 4: + memcpy_lnt_snt_128(to, from, size); + ret = 0; + break; + case 5: + ret = __copy_from_user(to, from, size); + if (ret) + goto exit; + case 6: + /* nop */ + ret = 0; + break; + case 7: + memcpy_lskip_snt_64(to, from, size); + ret = 0; + break; + default: + memcpy(to, from, size); + ret = 0; + break; + } +exit: + /* if __copy_from_user or other memcpy functions with return + * values are used, the return value should really be + * propagated upstream. Since most memcpys assume success, + * forgo this for now + */ + return; +} + static int pmem_major; static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, @@ -47,11 +498,11 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, size_t pmem_off = sector << 9; if (rw == READ) { - memcpy(mem + off, pmem->virt_addr + pmem_off, len); + memcpy_ntr(mem + off, pmem->virt_addr + pmem_off, len); flush_dcache_page(page); } else { flush_dcache_page(page); - memcpy(pmem->virt_addr + pmem_off, mem + off, len); + memcpy_ntw(pmem->virt_addr + pmem_off, mem + off, len); } kunmap_atomic(mem); @@ -109,10 +560,26 @@ static int pmem_rw_bytes(struct nd_io *ndio, void *buf, size_t offset, return -EFAULT; } - if (rw == READ) - memcpy(buf, pmem->virt_addr + offset, n); - else - memcpy(pmem->virt_addr + offset, buf, n); + /* NOTE: Plain memcpy is used for unaligned accesses, meaning + * this is not safe for WB mode. + * + * All btt accesses come through here; many are not aligned. + */ + if (rw == READ) { + if (IS_ALIGNED((u64) buf, 64) && + IS_ALIGNED((u64) pmem->virt_addr + offset, 64) && + IS_ALIGNED(n, 64)) + memcpy_ntr(buf, pmem->virt_addr + offset, n); + else + memcpy(buf, pmem->virt_addr + offset, n); + } else { + if (IS_ALIGNED((u64) buf, 64) && + IS_ALIGNED((u64) pmem->virt_addr + offset, 64) && + IS_ALIGNED(n, 64)) + memcpy_ntw(pmem->virt_addr + offset, buf, n); + else + memcpy(pmem->virt_addr + offset, buf, n); + } return 0; } @@ -143,6 +610,7 @@ static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res, struct pmem_device *pmem; struct gendisk *disk; int err; + u64 ts, te; err = -ENOMEM; pmem = kzalloc(sizeof(*pmem), GFP_KERNEL); @@ -152,21 +620,78 @@ static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res, pmem->phys_addr = res->start; pmem->size = resource_size(res); + dev_info(dev, + "mapping phys=0x%llx (%lld GiB) size=0x%zx (%ld GiB)\n", + pmem->phys_addr, pmem->phys_addr / (1024*1024*1024), + pmem->size, pmem->size / (1024*1024*1024)); + err = -EINVAL; if (!request_mem_region(pmem->phys_addr, pmem->size, "pmem")) { dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n", &pmem->phys_addr, pmem->size); goto out_free_dev; } - /* - * Map the memory as non-cachable, as we can't write back the contents - * of the CPU caches in case of a crash. - */ err = -ENOMEM; - pmem->virt_addr = ioremap_nocache(pmem->phys_addr, pmem->size); + switch (pmem_cachetype) { + case 0: /* UC */ + pmem->virt_addr = ioremap_nocache(pmem->phys_addr, pmem->size); + break; + case 1: /* WB */ + /* WB is unsafe unless system flushes caches on power loss */ + pmem->virt_addr = ioremap_cache(pmem->phys_addr, pmem->size); + break; + case 2: /* WC */ + /* WC is unsafe unless system flushes buffers on power loss */ + pmem->virt_addr = ioremap_wc(pmem->phys_addr, pmem->size); + break; + case 3: /* WT */ + default: + pmem->virt_addr = ioremap_wt(pmem->phys_addr, pmem->size); + break; + } + + dev_info(dev, + "mapped: cache_type=%d virt=0x%p phys=0x%llx (%lld GiB) size=0x%zx (%ld GiB)\n", + pmem_cachetype, + pmem->virt_addr, + pmem->phys_addr, pmem->phys_addr / (1024*1024*1024), + pmem->size, pmem->size / (1024*1024*1024)); + if (!pmem->virt_addr) goto out_release_region; + if (pmem_clean) { + /* write all of NVDIMM memory to clear any ECC errors */ + dev_info(dev, + "write clean starting: virt=0x%p phys=0x%llx (%lld GiB) size=0x%zx (%ld GiB)\n", + pmem->virt_addr, + pmem->phys_addr, pmem->phys_addr / (1024*1024*1024), + pmem->size, pmem->size / (1024*1024*1024)); + ts = local_clock(); + memcpy_lskip_snt_64(pmem->virt_addr, NULL, pmem->size); + te = local_clock(); + dev_info(dev, + "write clean complete: ct=%d in %lld GB/s\n", + pmem_cachetype, + pmem->size / (te - ts)); /* B/ns equals GB/s */ + } + + /* read all of NVDIMM memory to trigger any ECC errors now */ + if (pmem_readscan) { + dev_info(dev, + "read scan starting: virt=0x%p phys=0x%llx (%lld GiB) size=0x%zx (%ld GiB)\n", + pmem->virt_addr, + pmem->phys_addr, pmem->phys_addr / (1024*1024*1024), + pmem->size, pmem->size / (1024*1024*1024)); + ts = local_clock(); + memcpy_lnt_sskip_64(0, pmem->virt_addr, pmem->size); + te = local_clock(); + dev_info(dev, + "read scan complete: ct=%d in %lld GB/s\n", + pmem_cachetype, + pmem->size / (te - ts)); /* B/ns equals GB/s */ + } + pmem->pmem_queue = blk_alloc_queue(GFP_KERNEL); if (!pmem->pmem_queue) goto out_unmap; @@ -276,6 +801,9 @@ static int __init pmem_init(void) { int error; + pr_info("pmem loading with pmem_readscan=%d pmem_clean=%d pmem_cachetype=%d pmem_ntw=%d pmem_ntr=%d\n", + pmem_readscan, pmem_clean, pmem_cachetype, pmem_ntw, pmem_ntr); + pmem_major = register_blkdev(0, "pmem"); if (pmem_major < 0) return pmem_major; -- 1.8.3.1