From a8e2e0b434c081722330335f7e9628e0d8d17494 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 2 Jul 2013 11:15:15 +0530 Subject: [PATCH 01/11] mm/cma: Move dma contiguous changes into a seperate config We want to use CMA for allocating hash page table and real mode area for PPC64. Hence move DMA contiguous related changes into a seperate config so that ppc64 can enable CMA without requiring DMA contiguous. Acked-by: Michal Nazarewicz Acked-by: Paul Mackerras Signed-off-by: Aneesh Kumar K.V [removed defconfig changes] Signed-off-by: Marek Szyprowski (cherry picked from commit f825c736e75b11adb59ec52a4a1096efddd2ec97) Signed-off-by: Alex Shi --- arch/arm/include/asm/dma-contiguous.h | 2 +- arch/arm/mm/dma-mapping.c | 6 +++--- drivers/base/Kconfig | 20 ++++---------------- drivers/base/Makefile | 2 +- include/linux/dma-contiguous.h | 2 +- mm/Kconfig | 24 ++++++++++++++++++++++++ 6 files changed, 34 insertions(+), 22 deletions(-) diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h index 3ed37b4d93da..e072bb2ba1b1 100644 --- a/arch/arm/include/asm/dma-contiguous.h +++ b/arch/arm/include/asm/dma-contiguous.h @@ -2,7 +2,7 @@ #define ASMARM_DMA_CONTIGUOUS_H #ifdef __KERNEL__ -#ifdef CONFIG_CMA +#ifdef CONFIG_DMA_CMA #include #include diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ef3e0f3aac96..1fb40dc37ec2 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -358,7 +358,7 @@ static int __init atomic_pool_init(void) if (!pages) goto no_pages; - if (IS_ENABLED(CONFIG_CMA)) + if (IS_ENABLED(CONFIG_DMA_CMA)) ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page, atomic_pool_init); else @@ -670,7 +670,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, addr = __alloc_simple_buffer(dev, size, gfp, &page); else if (!(gfp & __GFP_WAIT)) addr = __alloc_from_pool(size, &page); - else if (!IS_ENABLED(CONFIG_CMA)) + else if (!IS_ENABLED(CONFIG_DMA_CMA)) addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); else addr = __alloc_from_contiguous(dev, size, prot, &page, caller); @@ -759,7 +759,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, __dma_free_buffer(page, size); } else if (__free_from_pool(cpu_addr, size)) { return; - } else if (!IS_ENABLED(CONFIG_CMA)) { + } else if (!IS_ENABLED(CONFIG_DMA_CMA)) { __dma_free_remap(cpu_addr, size); __dma_free_buffer(page, size); } else { diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 07abd9d76f7f..10cd80af2aec 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -202,11 +202,9 @@ config DMA_SHARED_BUFFER APIs extension; the file's descriptor can then be passed on to other driver. -config CMA - bool "Contiguous Memory Allocator" - depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK - select MIGRATION - select MEMORY_ISOLATION +config DMA_CMA + bool "DMA Contiguous Memory Allocator" + depends on HAVE_DMA_CONTIGUOUS && CMA help This enables the Contiguous Memory Allocator which allows drivers to allocate big physically-contiguous blocks of memory for use with @@ -215,17 +213,7 @@ config CMA For more information see . If unsure, say "n". -if CMA - -config CMA_DEBUG - bool "CMA debug messages (DEVELOPMENT)" - depends on DEBUG_KERNEL - help - Turns on debug messages in CMA. This produces KERN_DEBUG - messages for every CMA call as well as various messages while - processing calls such as dma_alloc_from_contiguous(). - This option does not affect warning and error messages. - +if DMA_CMA comment "Default contiguous memory area size:" config CMA_SIZE_MBYTES diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 4e22ce3ed73d..5d93bb519753 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -6,7 +6,7 @@ obj-y := core.o bus.o dd.o syscore.o \ attribute_container.o transport_class.o \ topology.o obj-$(CONFIG_DEVTMPFS) += devtmpfs.o -obj-$(CONFIG_CMA) += dma-contiguous.o +obj-$(CONFIG_DMA_CMA) += dma-contiguous.o obj-y += power/ obj-$(CONFIG_HAS_DMA) += dma-mapping.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index 01b5c84be828..00141d3325fe 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -57,7 +57,7 @@ struct cma; struct page; struct device; -#ifdef CONFIG_CMA +#ifdef CONFIG_DMA_CMA /* * There is always at least global CMA area and a few optional device diff --git a/mm/Kconfig b/mm/Kconfig index e742d06285b7..26a5f815cfc3 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -477,3 +477,27 @@ config FRONTSWAP and swap data is stored as normal on the matching swap device. If unsure, say Y to enable frontswap. + +config CMA + bool "Contiguous Memory Allocator" + depends on HAVE_MEMBLOCK + select MIGRATION + select MEMORY_ISOLATION + help + This enables the Contiguous Memory Allocator which allows other + subsystems to allocate big physically-contiguous blocks of memory. + CMA reserves a region of memory and allows only movable pages to + be allocated from it. This way, the kernel can use the memory for + pagecache and when a subsystem requests for contiguous area, the + allocated pages are migrated away to serve the contiguous request. + + If unsure, say "n". + +config CMA_DEBUG + bool "CMA debug messages (DEVELOPMENT)" + depends on DEBUG_KERNEL && CMA + help + Turns on debug messages in CMA. This produces KERN_DEBUG + messages for every CMA call as well as various messages while + processing calls such as dma_alloc_from_contiguous(). + This option does not affect warning and error messages. From 1980dd3801892bcf7e4dfffc137d2119e13150b8 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 29 Jul 2013 14:31:45 +0200 Subject: [PATCH 02/11] drivers: dma-contiguous: clean source code and prepare for device tree This patch cleans the initialization of dma contiguous framework. The all-in-one dma_declare_contiguous() function is now separated into dma_contiguous_reserve_area() which only steals the the memory from memblock allocator and dma_contiguous_add_device() function, which assigns given device to the specified reserved memory area. This improves the flexibility in defining contiguous memory areas and assigning device to them, because now it is possible to assign more than one device to the given contiguous memory area. Such split in initialization procedure is also required for upcoming device tree support. Signed-off-by: Marek Szyprowski Acked-by: Kyungmin Park Acked-by: Michal Nazarewicz Acked-by: Tomasz Figa (cherry picked from commit a2547380393ac82c659b40182b0da8d05a8365f3) Signed-off-by: Alex Shi --- arch/arm/include/asm/dma-contiguous.h | 1 - arch/x86/include/asm/dma-contiguous.h | 1 - drivers/base/dma-contiguous.c | 119 ++++++++++---------------- include/asm-generic/dma-contiguous.h | 28 ------ include/linux/device.h | 2 +- include/linux/dma-contiguous.h | 62 +++++++++++++- 6 files changed, 105 insertions(+), 108 deletions(-) delete mode 100644 include/asm-generic/dma-contiguous.h diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h index e072bb2ba1b1..4f8e9e5514b1 100644 --- a/arch/arm/include/asm/dma-contiguous.h +++ b/arch/arm/include/asm/dma-contiguous.h @@ -5,7 +5,6 @@ #ifdef CONFIG_DMA_CMA #include -#include void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size); diff --git a/arch/x86/include/asm/dma-contiguous.h b/arch/x86/include/asm/dma-contiguous.h index c09241659971..b4b38bacb404 100644 --- a/arch/x86/include/asm/dma-contiguous.h +++ b/arch/x86/include/asm/dma-contiguous.h @@ -4,7 +4,6 @@ #ifdef __KERNEL__ #include -#include static inline void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { } diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c index 0ca54421ce97..99802d6f3c60 100644 --- a/drivers/base/dma-contiguous.c +++ b/drivers/base/dma-contiguous.c @@ -96,7 +96,7 @@ static inline __maybe_unused phys_addr_t cma_early_percent_memory(void) #endif /** - * dma_contiguous_reserve() - reserve area for contiguous memory handling + * dma_contiguous_reserve() - reserve area(s) for contiguous memory handling * @limit: End address of the reserved memory (optional, 0 for any). * * This function reserves memory from early allocator. It should be @@ -124,22 +124,29 @@ void __init dma_contiguous_reserve(phys_addr_t limit) #endif } - if (selected_size) { + if (selected_size && !dma_contiguous_default_area) { pr_debug("%s: reserving %ld MiB for global area\n", __func__, (unsigned long)selected_size / SZ_1M); - dma_declare_contiguous(NULL, selected_size, 0, limit); + dma_contiguous_reserve_area(selected_size, 0, limit, + &dma_contiguous_default_area); } }; static DEFINE_MUTEX(cma_mutex); -static __init int cma_activate_area(unsigned long base_pfn, unsigned long count) +static int __init cma_activate_area(struct cma *cma) { - unsigned long pfn = base_pfn; - unsigned i = count >> pageblock_order; + int bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long); + unsigned long base_pfn = cma->base_pfn, pfn = base_pfn; + unsigned i = cma->count >> pageblock_order; struct zone *zone; + cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); + + if (!cma->bitmap) + return -ENOMEM; + WARN_ON_ONCE(!pfn_valid(pfn)); zone = page_zone(pfn_to_page(pfn)); @@ -153,92 +160,53 @@ static __init int cma_activate_area(unsigned long base_pfn, unsigned long count) } init_cma_reserved_pageblock(pfn_to_page(base_pfn)); } while (--i); + return 0; } -static __init struct cma *cma_create_area(unsigned long base_pfn, - unsigned long count) -{ - int bitmap_size = BITS_TO_LONGS(count) * sizeof(long); - struct cma *cma; - int ret = -ENOMEM; - - pr_debug("%s(base %08lx, count %lx)\n", __func__, base_pfn, count); - - cma = kmalloc(sizeof *cma, GFP_KERNEL); - if (!cma) - return ERR_PTR(-ENOMEM); - - cma->base_pfn = base_pfn; - cma->count = count; - cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - - if (!cma->bitmap) - goto no_mem; - - ret = cma_activate_area(base_pfn, count); - if (ret) - goto error; - - pr_debug("%s: returned %p\n", __func__, (void *)cma); - return cma; - -error: - kfree(cma->bitmap); -no_mem: - kfree(cma); - return ERR_PTR(ret); -} - -static struct cma_reserved { - phys_addr_t start; - unsigned long size; - struct device *dev; -} cma_reserved[MAX_CMA_AREAS] __initdata; -static unsigned cma_reserved_count __initdata; +static struct cma cma_areas[MAX_CMA_AREAS]; +static unsigned cma_area_count; static int __init cma_init_reserved_areas(void) { - struct cma_reserved *r = cma_reserved; - unsigned i = cma_reserved_count; + int i; - pr_debug("%s()\n", __func__); - - for (; i; --i, ++r) { - struct cma *cma; - cma = cma_create_area(PFN_DOWN(r->start), - r->size >> PAGE_SHIFT); - if (!IS_ERR(cma)) - dev_set_cma_area(r->dev, cma); + for (i = 0; i < cma_area_count; i++) { + int ret = cma_activate_area(&cma_areas[i]); + if (ret) + return ret; } + return 0; } core_initcall(cma_init_reserved_areas); /** - * dma_declare_contiguous() - reserve area for contiguous memory handling - * for particular device - * @dev: Pointer to device structure. - * @size: Size of the reserved memory. - * @base: Start address of the reserved memory (optional, 0 for any). + * dma_contiguous_reserve_area() - reserve custom contiguous area + * @size: Size of the reserved area (in bytes), + * @base: Base address of the reserved area optional, use 0 for any * @limit: End address of the reserved memory (optional, 0 for any). + * @res_cma: Pointer to store the created cma region. * - * This function reserves memory for specified device. It should be - * called by board specific code when early allocator (memblock or bootmem) - * is still activate. + * This function reserves memory from early allocator. It should be + * called by arch specific code once the early allocator (memblock or bootmem) + * has been activated and all other subsystems have already allocated/reserved + * memory. This function allows to create custom reserved areas for specific + * devices. */ -int __init dma_declare_contiguous(struct device *dev, phys_addr_t size, - phys_addr_t base, phys_addr_t limit) +int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, + phys_addr_t limit, struct cma **res_cma) { - struct cma_reserved *r = &cma_reserved[cma_reserved_count]; + struct cma *cma = &cma_areas[cma_area_count]; phys_addr_t alignment; + int ret = 0; pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__, (unsigned long)size, (unsigned long)base, (unsigned long)limit); /* Sanity checks */ - if (cma_reserved_count == ARRAY_SIZE(cma_reserved)) { + if (cma_area_count == ARRAY_SIZE(cma_areas)) { pr_err("Not enough slots for CMA reserved regions!\n"); return -ENOSPC; } @@ -256,7 +224,7 @@ int __init dma_declare_contiguous(struct device *dev, phys_addr_t size, if (base) { if (memblock_is_region_reserved(base, size) || memblock_reserve(base, size) < 0) { - base = -EBUSY; + ret = -EBUSY; goto err; } } else { @@ -266,7 +234,7 @@ int __init dma_declare_contiguous(struct device *dev, phys_addr_t size, */ phys_addr_t addr = __memblock_alloc_base(size, alignment, limit); if (!addr) { - base = -ENOMEM; + ret = -ENOMEM; goto err; } else { base = addr; @@ -277,10 +245,11 @@ int __init dma_declare_contiguous(struct device *dev, phys_addr_t size, * Each reserved area must be initialised later, when more kernel * subsystems (like slab allocator) are available. */ - r->start = base; - r->size = size; - r->dev = dev; - cma_reserved_count++; + cma->base_pfn = PFN_DOWN(base); + cma->count = size >> PAGE_SHIFT; + *res_cma = cma; + cma_area_count++; + pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M, (unsigned long)base); @@ -289,7 +258,7 @@ int __init dma_declare_contiguous(struct device *dev, phys_addr_t size, return 0; err: pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); - return base; + return ret; } /** diff --git a/include/asm-generic/dma-contiguous.h b/include/asm-generic/dma-contiguous.h deleted file mode 100644 index 294b1e755ab2..000000000000 --- a/include/asm-generic/dma-contiguous.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef ASM_DMA_CONTIGUOUS_H -#define ASM_DMA_CONTIGUOUS_H - -#ifdef __KERNEL__ -#ifdef CONFIG_CMA - -#include -#include - -static inline struct cma *dev_get_cma_area(struct device *dev) -{ - if (dev && dev->cma_area) - return dev->cma_area; - return dma_contiguous_default_area; -} - -static inline void dev_set_cma_area(struct device *dev, struct cma *cma) -{ - if (dev) - dev->cma_area = cma; - if (!dev && !dma_contiguous_default_area) - dma_contiguous_default_area = cma; -} - -#endif -#endif - -#endif diff --git a/include/linux/device.h b/include/linux/device.h index c0a126125325..d98ec771de42 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -698,7 +698,7 @@ struct device { struct dma_coherent_mem *dma_mem; /* internal for coherent mem override */ -#ifdef CONFIG_CMA +#ifdef CONFIG_DMA_CMA struct cma *cma_area; /* contiguous memory area for dma allocations */ #endif diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index 00141d3325fe..3b28f937d959 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -67,9 +67,53 @@ struct device; extern struct cma *dma_contiguous_default_area; +static inline struct cma *dev_get_cma_area(struct device *dev) +{ + if (dev && dev->cma_area) + return dev->cma_area; + return dma_contiguous_default_area; +} + +static inline void dev_set_cma_area(struct device *dev, struct cma *cma) +{ + if (dev) + dev->cma_area = cma; +} + +static inline void dma_contiguous_set_default(struct cma *cma) +{ + dma_contiguous_default_area = cma; +} + void dma_contiguous_reserve(phys_addr_t addr_limit); -int dma_declare_contiguous(struct device *dev, phys_addr_t size, - phys_addr_t base, phys_addr_t limit); + +int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, + phys_addr_t limit, struct cma **res_cma); + +/** + * dma_declare_contiguous() - reserve area for contiguous memory handling + * for particular device + * @dev: Pointer to device structure. + * @size: Size of the reserved memory. + * @base: Start address of the reserved memory (optional, 0 for any). + * @limit: End address of the reserved memory (optional, 0 for any). + * + * This function reserves memory for specified device. It should be + * called by board specific code when early allocator (memblock or bootmem) + * is still activate. + */ + +static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size, + phys_addr_t base, phys_addr_t limit) +{ + struct cma *cma; + int ret; + ret = dma_contiguous_reserve_area(size, base, limit, &cma); + if (ret == 0) + dev_set_cma_area(dev, cma); + + return ret; +} struct page *dma_alloc_from_contiguous(struct device *dev, int count, unsigned int order); @@ -80,8 +124,22 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages, #define MAX_CMA_AREAS (0) +static inline struct cma *dev_get_cma_area(struct device *dev) +{ + return NULL; +} + +static inline void dev_set_cma_area(struct device *dev, struct cma *cma) { } + +static inline void dma_contiguous_set_default(struct cma *cma) { } + static inline void dma_contiguous_reserve(phys_addr_t limit) { } +static inline int dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, + phys_addr_t limit, struct cma **res_cma) { + return -ENOSYS; +} + static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size, phys_addr_t base, phys_addr_t limit) From 97e4a880c9677b82ddb41e4613d0d4b6a3479a80 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 12 Dec 2013 19:28:33 +0000 Subject: [PATCH 03/11] arm64: Enable CMA arm64 bit targets need the features CMA provides. Add the appropriate hooks, header files, and Kconfig to allow this to happen. Cc: Will Deacon Cc: Marek Szyprowski Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas (cherry picked from commit 6ac2104debc235b745265b64d610237a6833fd53) Signed-off-by: Alex Shi Conflicts: arch/arm64/Kconfig (cherry picked from commit 0a4b0f00f4c9cb80a0fb11b9e0f898f82745e370) Signed-off-by: Alex Shi Conflicts: arch/arm64/mm/dma-mapping.c --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/dma-contiguous.h | 29 ++++++++++++++++++++++++ arch/arm64/mm/dma-mapping.c | 30 +++++++++++++++++++++++-- arch/arm64/mm/init.c | 3 +++ 4 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/include/asm/dma-contiguous.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 56b3f6d447ae..1c650048a4e3 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -21,6 +21,7 @@ config ARM64 select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_API_DEBUG select HAVE_DMA_ATTRS + select HAVE_DMA_CONTIGUOUS select HAVE_GENERIC_DMA_COHERENT select HAVE_GENERIC_HARDIRQS select HAVE_HW_BREAKPOINT if PERF_EVENTS diff --git a/arch/arm64/include/asm/dma-contiguous.h b/arch/arm64/include/asm/dma-contiguous.h new file mode 100644 index 000000000000..d6aacb61ff4a --- /dev/null +++ b/arch/arm64/include/asm/dma-contiguous.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2013, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _ASM_DMA_CONTIGUOUS_H +#define _ASM_DMA_CONTIGUOUS_H + +#ifdef __KERNEL__ +#ifdef CONFIG_DMA_CMA + +#include +#include + +static inline void +dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { } + +#endif +#endif + +#endif diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 4bd7579ec9e6..c4c005cc2ed2 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -36,14 +37,39 @@ static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size, if (IS_ENABLED(CONFIG_ZONE_DMA32) && dev->coherent_dma_mask <= DMA_BIT_MASK(32)) flags |= GFP_DMA32; - return swiotlb_alloc_coherent(dev, size, dma_handle, flags); + if (IS_ENABLED(CONFIG_DMA_CMA)) { + struct page *page; + + page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, + get_order(size)); + if (!page) + return NULL; + + *dma_handle = phys_to_dma(dev, page_to_phys(page)); + return page_address(page); + } else { + return swiotlb_alloc_coherent(dev, size, dma_handle, flags); + } } static void arm64_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs) { - swiotlb_free_coherent(dev, size, vaddr, dma_handle); + if (dev == NULL) { + WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); + return; + } + + if (IS_ENABLED(CONFIG_DMA_CMA)) { + phys_addr_t paddr = dma_to_phys(dev, dma_handle); + + dma_release_from_contiguous(dev, + phys_to_page(paddr), + size >> PAGE_SHIFT); + } else { + swiotlb_free_coherent(dev, size, vaddr, dma_handle); + } } static struct dma_map_ops arm64_swiotlb_dma_ops = { diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f497ca77925a..851d3c137e03 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -173,6 +174,8 @@ void __init arm64_memblock_init(void) memblock_reserve(base, size); } + dma_contiguous_reserve(0); + memblock_allow_resize(); memblock_dump_all(); } From 9d93ccedc26f8591ca4e6f2b65dcbf2bdbfd6453 Mon Sep 17 00:00:00 2001 From: Pankaj Dubey Date: Fri, 24 Jan 2014 08:23:08 +0000 Subject: [PATCH 04/11] arm64: fix build error if DMA_CMA is enabled arm64/include/asm/dma-contiguous.h is trying to include which does not exist, and thus failing build for arm64 if we enable CONFIG_DMA_CMA. This patch fixes build error by removing unwanted header inclusion from arm64's dma-contiguous.h. Signed-off-by: Pankaj Dubey Signed-off-by: Somraj Mani Acked-by: Laura Abbott Signed-off-by: Catalin Marinas (cherry picked from commit ac525f59fb011e05e77c5be8b9834883ee1d5613) Signed-off-by: Alex Shi --- arch/arm64/include/asm/dma-contiguous.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/include/asm/dma-contiguous.h b/arch/arm64/include/asm/dma-contiguous.h index d6aacb61ff4a..14c4c0ca7f2a 100644 --- a/arch/arm64/include/asm/dma-contiguous.h +++ b/arch/arm64/include/asm/dma-contiguous.h @@ -18,7 +18,6 @@ #ifdef CONFIG_DMA_CMA #include -#include static inline void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { } From 7e7f87dda0fbc095f27f3ccd97aed18c2542b93c Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 4 Feb 2014 23:08:57 +0000 Subject: [PATCH 05/11] arm64: Align CMA sizes to PAGE_SIZE dma_alloc_from_contiguous takes number of pages for a size. Align up the dma size passed in to page size to avoid truncation and allocation failures on sizes less than PAGE_SIZE. Cc: Will Deacon Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas (cherry picked from commit ccc9e244eb1b9654915634827322932cbafd8244) Signed-off-by: Alex Shi --- arch/arm64/mm/dma-mapping.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index c4c005cc2ed2..0150f5d14eda 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -40,6 +40,7 @@ static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size, if (IS_ENABLED(CONFIG_DMA_CMA)) { struct page *page; + size = PAGE_ALIGN(size); page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, get_order(size)); if (!page) From 475e06edbfdad8f7bbc5edb3d803c52e27d84c2a Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 9 May 2014 15:58:14 +0100 Subject: [PATCH 06/11] DMA-API: provide a helper to set both DMA and coherent DMA masks Provide a helper to set both the DMA and coherent DMA masks to the same value - this avoids duplicated code in a number of drivers, sometimes with buggy error handling, and also allows us identify which drivers do things differently. Signed-off-by: Russell King (cherry picked from commit 4aa806b771d16b810771d86ce23c4c3160888db3) Signed-off-by: Ryan Harkin Signed-off-by: Mark Brown --- Documentation/DMA-API-HOWTO.txt | 37 ++++++++++++++++++++------------- Documentation/DMA-API.txt | 8 +++++++ include/linux/dma-mapping.h | 14 +++++++++++++ 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt index 14129f149a75..5e983031cc11 100644 --- a/Documentation/DMA-API-HOWTO.txt +++ b/Documentation/DMA-API-HOWTO.txt @@ -101,14 +101,23 @@ style to do this even if your device holds the default setting, because this shows that you did think about these issues wrt. your device. -The query is performed via a call to dma_set_mask(): +The query is performed via a call to dma_set_mask_and_coherent(): - int dma_set_mask(struct device *dev, u64 mask); + int dma_set_mask_and_coherent(struct device *dev, u64 mask); -The query for consistent allocations is performed via a call to -dma_set_coherent_mask(): +which will query the mask for both streaming and coherent APIs together. +If you have some special requirements, then the following two separate +queries can be used instead: - int dma_set_coherent_mask(struct device *dev, u64 mask); + The query for streaming mappings is performed via a call to + dma_set_mask(): + + int dma_set_mask(struct device *dev, u64 mask); + + The query for consistent allocations is performed via a call + to dma_set_coherent_mask(): + + int dma_set_coherent_mask(struct device *dev, u64 mask); Here, dev is a pointer to the device struct of your device, and mask is a bit mask describing which bits of an address your device @@ -137,7 +146,7 @@ exactly why. The standard 32-bit addressing device would do something like this: - if (dma_set_mask(dev, DMA_BIT_MASK(32))) { + if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))) { printk(KERN_WARNING "mydev: No suitable DMA available.\n"); goto ignore_this_device; @@ -171,22 +180,20 @@ the case would look like this: int using_dac, consistent_using_dac; - if (!dma_set_mask(dev, DMA_BIT_MASK(64))) { + if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) { using_dac = 1; consistent_using_dac = 1; - dma_set_coherent_mask(dev, DMA_BIT_MASK(64)); - } else if (!dma_set_mask(dev, DMA_BIT_MASK(32))) { + } else if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))) { using_dac = 0; consistent_using_dac = 0; - dma_set_coherent_mask(dev, DMA_BIT_MASK(32)); } else { printk(KERN_WARNING "mydev: No suitable DMA available.\n"); goto ignore_this_device; } -dma_set_coherent_mask() will always be able to set the same or a -smaller mask as dma_set_mask(). However for the rare case that a +The coherent coherent mask will always be able to set the same or a +smaller mask as the streaming mask. However for the rare case that a device driver only uses consistent allocations, one would have to check the return value from dma_set_coherent_mask(). @@ -199,9 +206,9 @@ address you might do something like: goto ignore_this_device; } -When dma_set_mask() is successful, and returns zero, the kernel saves -away this mask you have provided. The kernel will use this -information later when you make DMA mappings. +When dma_set_mask() or dma_set_mask_and_coherent() is successful, and +returns zero, the kernel saves away this mask you have provided. The +kernel will use this information later when you make DMA mappings. There is a case which we are aware of at this time, which is worth mentioning in this documentation. If your device supports multiple diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 78a6c569d204..e865279cec58 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -141,6 +141,14 @@ won't change the current mask settings. It is more intended as an internal API for use by the platform than an external API for use by driver writers. +int +dma_set_mask_and_coherent(struct device *dev, u64 mask) + +Checks to see if the mask is possible and updates the device +streaming and coherent DMA mask parameters if it is. + +Returns: 0 if successful and a negative error if not. + int dma_set_mask(struct device *dev, u64 mask) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 94af41858513..8f7a2e84e527 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -97,6 +97,20 @@ static inline int dma_set_coherent_mask(struct device *dev, u64 mask) } #endif +/* + * Set both the DMA mask and the coherent DMA mask to the same thing. + * Note that we don't check the return value from dma_set_coherent_mask() + * as the DMA API guarantees that the coherent DMA mask can be set to + * the same or smaller than the streaming DMA mask. + */ +static inline int dma_set_mask_and_coherent(struct device *dev, u64 mask) +{ + int rc = dma_set_mask(dev, mask); + if (rc == 0) + dma_set_coherent_mask(dev, mask); + return rc; +} + extern u64 dma_get_required_mask(struct device *dev); static inline unsigned int dma_get_max_seg_size(struct device *dev) From 1487ad49044023a8ce9b7fc5a9cdf6bbec0b8748 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 9 May 2014 15:58:15 +0100 Subject: [PATCH 07/11] DMA-API: provide a helper to setup DMA masks Many drivers contain code such as: dev->dma_mask = &dev->coherent_dma_mask; dev->coherent_dma_mask = MASK; Let's move this pattern out of drivers and have the DMA API provide a helper for it. This helper uses dma_set_mask_and_coherent() to allow platform issues to be properly dealt with via dma_set_mask()/ dma_is_supported(). Signed-off-by: Russell King (cherry picked from commit fa6a8d6d65b19ab44e5244ea499bcd553cc72343) Signed-off-by: Ryan Harkin Signed-off-by: Mark Brown --- include/linux/dma-mapping.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 8f7a2e84e527..48ef6f50d86c 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -111,6 +111,16 @@ static inline int dma_set_mask_and_coherent(struct device *dev, u64 mask) return rc; } +/* + * Similar to the above, except it deals with the case where the device + * does not have dev->dma_mask appropriately setup. + */ +static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask) +{ + dev->dma_mask = &dev->coherent_dma_mask; + return dma_set_mask_and_coherent(dev, mask); +} + extern u64 dma_get_required_mask(struct device *dev); static inline unsigned int dma_get_max_seg_size(struct device *dev) From 03b0d29e0369054e638328b3994b8cfbb9d34a35 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 9 May 2014 15:58:16 +0100 Subject: [PATCH 08/11] arm64: Fix DMA range invalidation for cache line unaligned buffers If the buffer needing cache invalidation for inbound DMA does start or end on a cache line aligned address, we need to use the non-destructive clean&invalidate operation. This issue was introduced by commit 7363590d2c46 (arm64: Implement coherent DMA API based on swiotlb). Signed-off-by: Catalin Marinas Reported-by: Jon Medhurst (Tixy) (cherry picked from commit ebf81a938dade3b450eb11c57fa744cfac4b523f) Signed-off-by: Ryan Harkin Signed-off-by: Mark Brown --- arch/arm64/mm/cache.S | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 4726b8209d37..39b6542b138e 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -175,12 +175,19 @@ ENDPROC(__flush_dcache_area) __dma_inv_range: dcache_line_size x2, x3 sub x3, x2, #1 - bic x0, x0, x3 + tst x1, x3 // end cache line aligned? bic x1, x1, x3 -1: dc ivac, x0 // invalidate D / U line - add x0, x0, x2 + b.eq 1f + dc civac, x1 // clean & invalidate D / U line +1: tst x0, x3 // start cache line aligned? + bic x0, x0, x3 + b.eq 2f + dc civac, x0 // clean & invalidate D / U line + b 3f +2: dc ivac, x0 // invalidate D / U line +3: add x0, x0, x2 cmp x0, x1 - b.lo 1b + b.lo 2b dsb sy ret ENDPROC(__dma_inv_range) From f44acdf9c761dbbd932ff7bdb8130c85668d3201 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 9 May 2014 15:58:17 +0100 Subject: [PATCH 09/11] arm64: Replace ZONE_DMA32 with ZONE_DMA On arm64 we do not have two DMA zones, so it does not make sense to implement ZONE_DMA32. This patch changes ZONE_DMA32 with ZONE_DMA, the latter covering 32-bit dma address space to honour GFP_DMA allocations. Signed-off-by: Catalin Marinas (cherry picked from commit 19e7640d1f2302c20df2733e3e3df49acb17189e) Signed-off-by: Ryan Harkin Conflicts: arch/arm64/mm/dma-mapping.c Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 2 +- arch/arm64/mm/dma-mapping.c | 4 ++-- arch/arm64/mm/init.c | 31 ++++++++++++++++--------------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1c650048a4e3..e418519ec904 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -78,7 +78,7 @@ config GENERIC_CSUM config GENERIC_CALIBRATE_DELAY def_bool y -config ZONE_DMA32 +config ZONE_DMA def_bool y config ARCH_DMA_ADDR_T_64BIT diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 550873ace597..1baca2efb234 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -34,9 +34,9 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, struct dma_attrs *attrs) { - if (IS_ENABLED(CONFIG_ZONE_DMA32) && + if (IS_ENABLED(CONFIG_ZONE_DMA) && dev->coherent_dma_mask <= DMA_BIT_MASK(32)) - flags |= GFP_DMA32; + flags |= GFP_DMA; if (IS_ENABLED(CONFIG_DMA_CMA)) { struct page *page; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 851d3c137e03..63f679b276b6 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -68,22 +69,22 @@ static int __init early_initrd(char *p) } early_param("initrd", early_initrd); -#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) - static void __init zone_sizes_init(unsigned long min, unsigned long max) { struct memblock_region *reg; unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; - unsigned long max_dma32 = min; + unsigned long max_dma = min; memset(zone_size, 0, sizeof(zone_size)); -#ifdef CONFIG_ZONE_DMA32 /* 4GB maximum for 32-bit only capable devices */ - max_dma32 = max(min, min(max, MAX_DMA32_PFN)); - zone_size[ZONE_DMA32] = max_dma32 - min; -#endif - zone_size[ZONE_NORMAL] = max - max_dma32; + if (IS_ENABLED(CONFIG_ZONE_DMA)) { + unsigned long max_dma_phys = + (unsigned long)dma_to_phys(NULL, DMA_BIT_MASK(32) + 1); + max_dma = max(min, min(max, max_dma_phys >> PAGE_SHIFT)); + zone_size[ZONE_DMA] = max_dma - min; + } + zone_size[ZONE_NORMAL] = max - max_dma; memcpy(zhole_size, zone_size, sizeof(zhole_size)); @@ -93,15 +94,15 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) if (start >= max) continue; -#ifdef CONFIG_ZONE_DMA32 - if (start < max_dma32) { - unsigned long dma_end = min(end, max_dma32); - zhole_size[ZONE_DMA32] -= dma_end - start; + + if (IS_ENABLED(CONFIG_ZONE_DMA) && start < max_dma) { + unsigned long dma_end = min(end, max_dma); + zhole_size[ZONE_DMA] -= dma_end - start; } -#endif - if (end > max_dma32) { + + if (end > max_dma) { unsigned long normal_end = min(end, max); - unsigned long normal_start = max(start, max_dma32); + unsigned long normal_start = max(start, max_dma); zhole_size[ZONE_NORMAL] -= normal_end - normal_start; } } From dbcc9311db25d7d67bcb1ddc99168a810dc5ff07 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 9 May 2014 15:58:18 +0100 Subject: [PATCH 10/11] arm64: Use swiotlb late initialisation Since arm64 does not support ISA, there is no need for early swiotlb initialisation. This patch switches the DMA mapping code to swiotlb_tlb_late_init_with_default_size(). A side effect of this is that GFP_DMA is used for the swiotlb buffer and devices with a 32-bit coherent mask are correctly supported. Signed-off-by: Catalin Marinas (cherry picked from commit 3690951fc6d42f3a0903987677d0e592c49dd8db) Signed-off-by: Ryan Harkin Conflicts: arch/arm64/mm/init.c Signed-off-by: Mark Brown --- arch/arm64/mm/dma-mapping.c | 10 ++++++++-- arch/arm64/mm/init.c | 2 -- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 1baca2efb234..a48f1da828f6 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -248,11 +248,17 @@ struct dma_map_ops coherent_swiotlb_dma_ops = { }; EXPORT_SYMBOL(coherent_swiotlb_dma_ops); -void __init arm64_swiotlb_init(void) +extern int swiotlb_late_init_with_default_size(size_t default_size); + +static int __init swiotlb_late_init(void) { + size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT); + dma_ops = &coherent_swiotlb_dma_ops; - swiotlb_init(1); + + return swiotlb_late_init_with_default_size(swiotlb_size); } +subsys_initcall(swiotlb_late_init); #define PREALLOC_DMA_DEBUG_ENTRIES 4096 diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 63f679b276b6..fa3651855334 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -287,8 +287,6 @@ void __init mem_init(void) unsigned long reserved_pages, free_pages; struct memblock_region *reg; - arm64_swiotlb_init(); - max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map; #ifndef CONFIG_SPARSEMEM_VMEMMAP From 3abb7f235bacff959dc5a12c4ef77a4f358d2dc5 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Fri, 9 May 2014 15:58:19 +0100 Subject: [PATCH 11/11] arm64: Make default dma_ops to be noncoherent Currently arm64 dma_ops is by default made coherent which makes it opposite in default policy from arm. Make default dma_ops to be noncoherent (same as arm), as currently there aren't any dma-capable drivers which assumes coherent ops Signed-off-by: Ritesh Harjani Acked-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit c7a4a7658d689f664050c45493d79adf053f226e) Signed-off-by: Jon Medhurst Signed-off-by: Mark Brown --- arch/arm64/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index a48f1da828f6..d3d9bf5884f8 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -254,7 +254,7 @@ static int __init swiotlb_late_init(void) { size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT); - dma_ops = &coherent_swiotlb_dma_ops; + dma_ops = &noncoherent_swiotlb_dma_ops; return swiotlb_late_init_with_default_size(swiotlb_size); }