[ARM] mm: add page allocator for modifying cache attributes

ARM CPUs with speculative prefetching have undefined behaviors when the same physical page is mapped to two different virtual addresses with conflicting cache attributes. since many recent systems include IOMMU functionality (i.e., remapping of discontiguous physical pages into a virtually-contiguous address range for I/O devices), it is desirable to support allocating any available OS memory for use by the I/O devices. however, since many systems do not support cache coherency between the CPU and DMA devices, these devices are left with using DMA-coherent allocations from the OS (which severely limits the benefit of an IOMMU) or performing cache maintenance (which can be a severe performance loss, particularly on systems with outer caches, compared to using DMA-coherent memory). this change adds an API for allocating pages from the OS with specific cache maintenance properties and ensures that the kernel's mapping of the page reflects the desired cache attributes, in line with the ARMv7 architectural requirements Change-Id: If0bd3cfe339b9a9b10fd6d45a748cd5e65931cf0 Signed-off-by: Gary King <gking@nvidia.com>
2026-06-11 05:17:10 +09:00 · 2010-08-02 15:55:16 -07:00
parent ac21b32104
commit 54d4145704
4 changed files with 198 additions and 0 deletions
--- a/arch/arm/include/asm/attrib_alloc.h
+++ b/arch/arm/include/asm/attrib_alloc.h
@@ -0,0 +1,47 @@
+/*
+ * arch/arm/include/asm/attrib_alloc.h
+ *
+ * Page allocator with custom cache attributes
+ *
+ * Copyright (c) 2010, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __ARCH_ARM_ATTRIB_ALLOC_H
+#define __ARCH_ARM_ATTRIB_ALLOC_H
+
+#include <linux/types.h>
+#include <asm/page.h>
+
+struct page *arm_attrib_alloc_pages_exact_node(int nid, gfp_t gfp,
+					       size_t size, pgprot_t prot);
+
+void arm_attrib_free_pages_exact(struct page *page, size_t size);
+
+static inline
+struct page *arm_attrib_alloc_pages_exact(gfp_t gfp, size_t size,
+					  pgprot_t prot)
+{
+	return arm_attrib_alloc_pages_exact_node(-1, gfp, size, prot);
+}
+
+#define arm_attrib_alloc_page(gfp, prot)	\
+	arm_attrib_alloc_pages_exact((gfp), PAGE_SIZE, (prot))
+
+#define arm_attrib_free_page(page)		\
+	arm_attrib_free_pages_exact((page), PAGE_SIZE)
+
+#endif /* __ARCH_ARM_ATTRIB_ALLOC_H */
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -808,6 +808,29 @@ config ARM_L1_CACHE_SHIFT
 	default 6 if ARM_L1_CACHE_SHIFT_6
 	default 5

+config ARM_ATTRIB_ALLOCATOR
+	bool "Support custom cache attribute allocations in low memory"
+	select ARCH_LOWMEM_IN_PTES if (CPU_V7)
+	depends on MMU && !CPU_CACHE_VIVT
+	help
+	  Historically, the kernel has only reserved a small region
+	  of physical memory for uncached access, and relied on
+	  explicit cache maintenance for ensuring coherency between
+	  the CPU and DMA.
+
+	  However, many recent systems support mapping discontiguous
+	  physical pages into contiguous DMA addresses (so-called
+	  system MMUs). For some DMA clients (notably graphics and
+	  multimedia engines), performing explict cache maintenance
+	  between CPU and DMA mappings can be prohibitively expensive,
+	  and since ARMv7, mapping the same physical page with different
+	  cache attributes is disallowed and has unpredictable behavior.
+
+	  Say 'Y' here to include page allocation support with explicit
+	  cache attributes; on ARMv7 systems this will also force the
+	  kernel's page tables to be mapped using page tables rather
+	  than sections.
+
 config ARM_DMA_MEM_BUFFERABLE
 	bool "Use non-cacheable memory for DMA" if CPU_V6 && !CPU_V7
 	depends on !(MACH_REALVIEW_PB1176 || REALVIEW_EB_ARM11MP || \
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -12,6 +12,8 @@ ifneq ($(CONFIG_MMU),y)
 obj-y				+= nommu.o
 endif

+obj-$(CONFIG_ARM_ATTRIB_ALLOCATOR) += attrib_alloc.o
+
 obj-$(CONFIG_MODULES)		+= proc-syms.o

 obj-$(CONFIG_ALIGNMENT_TRAP)	+= alignment.o
--- a/arch/arm/mm/attrib_alloc.c
+++ b/arch/arm/mm/attrib_alloc.c
@@ -0,0 +1,126 @@
+/*
+ * arch/arm/mm/attrib_alloc.c
+ *
+ * Page allocator with custom cache attributes
+ *
+ * Copyright (c) 2010, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/highmem.h>
+#include <linux/gfp.h>
+#include <linux/page-flags.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/fixmap.h>
+#include <asm/outercache.h>
+#include <asm/attrib_alloc.h>
+#include "mm.h"
+
+static void update_kmap_pte(struct page *page, pgprot_t prot)
+{
+#ifdef CONFIG_HIGHMEM
+	unsigned long addr;
+	pte_t *pte;
+
+	addr = (unsigned long)kmap_high_get(page);
+	BUG_ON(!PageHighMem(page) || addr >= FIXADDR_START);
+	if (!addr)
+		return;
+
+	pte = &pkmap_page_table[PKMAP_NR(addr)];
+	set_pte_at(&init_mm, addr, pte, mk_pte(page, __pgprot(prot)));
+	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+	kunmap_high(page);
+#endif
+}
+
+static void update_pte(struct page *page, pgprot_t prot)
+{
+#ifdef CONFIG_ARCH_LOWMEM_IN_PTES
+	unsigned long addr = (unsigned long)page_address(page);
+	pgd_t *pgd = pgd_offset_k(addr);
+	pmd_t *pmd = pmd_offset(pgd, addr);
+	pte_t *pte;
+
+	BUG_ON(pmd_none(*pmd));
+	pte = pte_offset_kernel(pmd, addr);
+	set_pte_at(&init_mm, addr, pte, mk_pte(page, __pgprot(prot)));
+	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+#endif
+}
+
+void arm_attrib_free_pages_exact(struct page *page, size_t size)
+{
+	struct page *p, *e;
+
+	size = PAGE_ALIGN(size);
+	e = page + (size >> PAGE_SHIFT);
+
+	/* reset the page's mappings back to the standard kernel mappings
+	 * before returning it to the page allocator */
+	for (p = page; p < e; p++) {
+		if (PageHighMem(p))
+			update_kmap_pte(p, pgprot_kernel);
+		else
+			update_pte(p, pgprot_kernel);
+
+		__free_page(p);
+	}
+}
+
+struct page *arm_attrib_alloc_pages_exact_node(int nid, gfp_t gfp,
+					       size_t size, pgprot_t prot)
+{
+	struct page *page, *p, *e;
+	unsigned int order;
+	unsigned long base;
+
+	size = PAGE_ALIGN(size);
+	order = get_order(size);
+	page = alloc_pages_node(nid, gfp, order);
+
+	if (!page)
+		return NULL;
+
+	split_page(page, order);
+
+	e = page + (1 << order);
+	for (p = page + (size >> PAGE_SHIFT); p < e; p++)
+		__free_page(p);
+
+	e = page + (size >> PAGE_SHIFT);
+
+	for (p = page; p < e; p++) {
+		__flush_dcache_page(page_mapping(p), p);
+
+		/* even though a freshly-allocated highmem page shouldn't
+		 * be mapped, because the kmaps are flushed lazily, it
+		 * is possible that a mapping from an old kmap_high call
+		 * is still present, and its cache attributes need to
+		 * be updated to match the new expectations */
+		if (PageHighMem(p))
+			update_kmap_pte(p, prot);
+		else
+			update_pte(p, prot);
+	}
+	base = page_to_phys(page);
+	outer_flush_range(base, base + (size >> PAGE_SHIFT));
+	return page;
+}