Linux ioremap 的实现

Linux ioremap 的实现Linuxioremap 的实现 linux memory ioremap 在 linuxkernel 的代码中 经常看到 ioremap 函数 其功能是将给定的物理地址映射为虚拟地址 注意 此处的物理地址并不是真正内存的物理地址 而是 cpu 上的 iomemory 可以参考芯片 ReferenceMan 中断 memorymap 章节 本文主要学习 iorem

Linux ioremap 的实现

linux, memory, ioremap

#define ioremap(cookie,size) __arch_ioremap((cookie), (size), MT_DEVICE) #define MT_DEVICE 0 #define __arch_ioremap __arm_ioremap void __iomem * __arm_ioremap(unsigned long phys_addr, size_t size, unsigned int mtype) { return __arm_ioremap_caller(phys_addr, size, mtype, __builtin_return_address(0)); } void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size, unsigned int mtype, void *caller) { unsigned long last_addr; unsigned long offset = phys_addr & ~PAGE_MASK; unsigned long pfn = __phys_to_pfn(phys_addr); /* * Don't allow wraparound or zero size */ last_addr = phys_addr + size - 1; if (!size || last_addr < phys_addr) return NULL; return __arm_ioremap_pfn_caller(pfn, offset, size, mtype, caller); } void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset, size_t size, unsigned int mtype, void *caller) { const struct mem_type *type; int err; unsigned long addr; struct vm_struct * area; /* * High mappings must be supersection aligned */ // 高端内存需要对齐到 supersection if (pfn >= 0x && (__pfn_to_phys(pfn) & ~SUPERSECTION_MASK)) return NULL; /* * Don't allow RAM to be mapped - this causes problems with ARMv6+ */ // map 的不能是 RAM ,只能是 soc 的 io memory /* int pfn_valid(unsigned long pfn) { return memblock_is_memory(pfn << PAGE_SHIFT); } */ if (WARN_ON(pfn_valid(pfn))) return NULL; // get_mem_type 的实现见后文 // 从前文的定义可知, mtype 为 MT_DEVICE type = get_mem_type(mtype); if (!type) return NULL; /* * Page align the mapping size, taking account of any offset. */ size = PAGE_ALIGN(offset + size); // get_vm_area_caller 函数的实现见后面 area = get_vm_area_caller(size, VM_IOREMAP, caller); if (!area) return NULL; addr = (unsigned long)area->addr; #ifndef CONFIG_SMP if (DOMAIN_IO == 0 && (((cpu_architecture() >= CPU_ARCH_ARMv6) && (get_cr() & CR_XP)) || cpu_is_xsc3()) && pfn >= 0x && !((__pfn_to_phys(pfn) | size | addr) & ~SUPERSECTION_MASK)) { area->flags |= VM_ARM_SECTION_MAPPING; err = remap_area_supersections(addr, pfn, size, type); } else if (!((__pfn_to_phys(pfn) | size | addr) & ~PMD_MASK)) { area->flags |= VM_ARM_SECTION_MAPPING; err = remap_area_sections(addr, pfn, size, type); } else #endif // ioremap_page_range 函数的实现见后文 err = ioremap_page_range(addr, addr + size, __pfn_to_phys(pfn), __pgprot(type->prot_pte)); if (err) { vunmap((void *)addr); return NULL; } flush_cache_vmap(addr, addr + size); return (void __iomem *) (offset + addr); } 
const struct mem_type *get_mem_type(unsigned int type) { return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL; }
static struct mem_type mem_types[] = { [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */ .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED | L_PTE_SHARED, .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PROT_SECT_DEVICE | PMD_SECT_S, .domain = DOMAIN_IO, }, [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */ .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED, .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PROT_SECT_DEVICE, .domain = DOMAIN_IO, }, [MT_DEVICE_CACHED] = { /* ioremap_cached */ .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED, .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB, .domain = DOMAIN_IO, }, [MT_DEVICE_WC] = { /* ioremap_wc */ .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC, .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PROT_SECT_DEVICE, .domain = DOMAIN_IO, }, [MT_UNCACHED] = { .prot_pte = PROT_PTE_DEVICE, .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, .domain = DOMAIN_IO, }, [MT_CACHECLEAN] = { .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, .domain = DOMAIN_KERNEL, }, [MT_MINICLEAN] = { .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE, .domain = DOMAIN_KERNEL, }, [MT_LOW_VECTORS] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_RDONLY, .prot_l1 = PMD_TYPE_TABLE, .domain = DOMAIN_USER, }, [MT_HIGH_VECTORS] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_USER | L_PTE_RDONLY, .prot_l1 = PMD_TYPE_TABLE, .domain = DOMAIN_USER, }, [MT_MEMORY] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, }, [MT_ROM] = { .prot_sect = PMD_TYPE_SECT, .domain = DOMAIN_KERNEL, }, [MT_MEMORY_NONCACHED] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_MT_BUFFERABLE, .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, }, [MT_MEMORY_DTCM] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_XN, .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, .domain = DOMAIN_KERNEL, }, [MT_MEMORY_ITCM] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, .prot_l1 = PMD_TYPE_TABLE, .domain = DOMAIN_KERNEL, }, };
struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, void *caller) { /* * Just any arbitrary offset to the start of the vmalloc VM area: the * current 8MB value just means that there will be a 8MB "hole" after the * physical memory until the kernel virtual memory starts. That means that * any out-of-bounds memory accesses will hopefully be caught. * The vmalloc() routines leaves a hole of 4kB between each vmalloced * area for the same reason. ;) * * Note that platforms may override VMALLOC_START, but they must provide * VMALLOC_END. VMALLOC_END defines the (exclusive) limit of this space, * which may not overlap IO space. */ /* #ifndef VMALLOC_START #define VMALLOC_OFFSET (8*1024*1024) // high_memory 在 arch/arm/mm/init.c 文件中的 bootmem_init 函数中赋值,该函数的实现见后文 #define VMALLOC_START (((unsigned long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) #endif */ /* vmalloc ending address */ #define VMALLOC_END 0xfUL return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, -1, GFP_KERNEL, caller); } static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long align, unsigned long flags, unsigned long start, unsigned long end, int node, gfp_t gfp_mask, void *caller) { static struct vmap_area *va; struct vm_struct *area; BUG_ON(in_interrupt()); /* bits in flags of vmalloc's vm_struct below */ // #define VM_IOREMAP 0x00000001 /* ioremap() and friends */ if (flags & VM_IOREMAP) { int bit = fls(size); if (bit > IOREMAP_MAX_ORDER) bit = IOREMAP_MAX_ORDER; else if (bit < PAGE_SHIFT) bit = PAGE_SHIFT; align = 1ul << bit; } size = PAGE_ALIGN(size); if (unlikely(!size)) return NULL; / * kzalloc_node - allocate zeroed memory from a particular memory node. * @size: how many bytes of memory are required. * @flags: the type of memory to allocate (see kmalloc). * @node: memory node from which to allocate */ // 分配一个 vm_struct 结构体 area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node); if (unlikely(!area)) return NULL; /* * We always allocate a guard page. */ size += PAGE_SIZE; // start 和 end 分别为 VMALLOC_START 和 VMALLOC_END // align 为 1 // alloc_vmap_area 函数的注释: /* * Allocate a region of KVA of the specified size and alignment, within the * vstart and vend. */ // 已经使用的 vm 的信息分别存在各个 vmap_area 结构体中 // 所有的 vmap_area 结构体都在红黑树 vmap_area_root 中 // alloc_vmap_area 函数的主要功能是,查找红黑树 vmap_area_root ,找到 start 和 end 之间满足 size 大小的未使用空间, // 创建一个 vmap_area 结构体,并用找到的未使用空间信息初始化该结构体,然后将该结构体插入到红黑树 vmap_area_root 中 va = alloc_vmap_area(size, align, start, end, node, gfp_mask); if (IS_ERR(va)) { kfree(area); return NULL; } /* * When this function is called from __vmalloc_node_range, * we do not add vm_struct to vmlist here to avoid * accessing uninitialized members of vm_struct such as * pages and nr_pages fields. They will be set later. * To distinguish it from others, we use a VM_UNLIST flag. */ if (flags & VM_UNLIST) setup_vmalloc_vm(area, va, flags, caller); else // 看前面注释可知,上面的 if 分支只是个特殊情况,我们只分析 else 分支 // insert_vmalloc_vm 函数的实现见后文 insert_vmalloc_vm(area, va, flags, caller); return area; } 
void __init bootmem_init(void) { unsigned long min, max_low, max_high; max_low = max_high = 0; // 找到内存的起始地址, 低端内存的最高地址, 高端内存的最高地址 // find_limits 函数实现见后文 find_limits(&min, &max_low, &max_high); arm_bootmem_init(min, max_low); /* * Sparsemem tries to allocate bootmem in memory_present(), * so must be done after the fixed reservations */ arm_memory_present(); /* * sparse_init() needs the bootmem allocator up and running. */ sparse_init(); /* * Now free the memory - free_area_init_node needs * the sparse mem_map arrays initialized by sparse_init() * for memmap_init_zone(), otherwise all PFNs are invalid. */ arm_bootmem_free(min, max_low, max_high); // high_memory 为高端内存的起始虚拟地址 high_memory = __va(((phys_addr_t)max_low << PAGE_SHIFT) - 1) + 1; /* * This doesn't seem to be used by the Linux memory manager any * more, but is used by ll_rw_block. If we can get rid of it, we * also get rid of some of the stuff above as well. * * Note: max_low_pfn and max_pfn reflect the number of _pages_ in * the system, not the maximum PFN. */ max_low_pfn = max_low - PHYS_PFN_OFFSET; max_pfn = max_high - PHYS_PFN_OFFSET; } 
static void __init find_limits(unsigned long *min, unsigned long *max_low, unsigned long *max_high) { struct meminfo *mi = &meminfo; int i; *min = -1UL; *max_low = *max_high = 0; for_each_bank (i, mi) { struct membank *bank = &mi->bank[i]; unsigned long start, end; start = bank_pfn_start(bank); end = bank_pfn_end(bank); if (*min > start) *min = start; if (*max_high < end) *max_high = end; // 如果是高端内存,就不用更新 max_low 了 // 参考后面的 sanity_check_meminfo 函数 if (bank->highmem) continue; if (*max_low < end) *max_low = end; } } 
void __init sanity_check_meminfo(void) { int i, j, highmem = 0; for (i = 0, j = 0; i < meminfo.nr_banks; i++) { struct membank *bank = &meminfo.bank[j]; *bank = meminfo.bank[i]; #ifdef CONFIG_HIGHMEM // static void * __initdata vmalloc_min = (void *)(VMALLOC_END - SZ_128M); if (__va(bank->start) >= vmalloc_min || __va(bank->start) < (void *)PAGE_OFFSET) highmem = 1; bank->highmem = highmem; ... #else bank->highmem = highmem; ... } ... } 

回到函数 get_vm_area_caller 的实现。

static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, unsigned long flags, void *caller) { // vm(vm_struct) 结构体在函数 __get_vm_area_node 中分配 // va(vmap_area) 结构体,在函数 __get_vm_area_node 中通过调用 alloc_vmap_area 分配 setup_vmalloc_vm(vm, va, flags, caller); insert_vmalloc_vmlist(vm); }
static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, unsigned long flags, void *caller) { vm->flags = flags; vm->addr = (void *)va->va_start; vm->size = va->va_end - va->va_start; vm->caller = caller; va->vm = vm; va->flags |= VM_VM_AREA; }
static void insert_vmalloc_vmlist(struct vm_struct *vm) { struct vm_struct *tmp, p; vm->flags &= ~VM_UNLIST; write_lock(&vmlist_lock); // 将 vm_struct 结构体插入的 vmlist 中 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { if (tmp->addr >= vm->addr) break; } vm->next = *p; *p = vm; write_unlock(&vmlist_lock); } 
int ioremap_page_range(unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pgd_t *pgd; unsigned long start; unsigned long next; int err; BUG_ON(addr >= end); start = addr; phys_addr -= addr; pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot); if (err) break; } while (pgd++, addr = next, addr != end); flush_cache_vmap(start, end); return err; } static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pud_t *pud; unsigned long next; phys_addr -= addr; pud = pud_alloc(&init_mm, pgd, addr); if (!pud) return -ENOMEM; do { next = pud_addr_end(addr, end); if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot)) return -ENOMEM; } while (pud++, addr = next, addr != end); return 0; } static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pmd_t *pmd; unsigned long next; phys_addr -= addr; pmd = pmd_alloc(&init_mm, pud, addr); if (!pmd) return -ENOMEM; do { next = pmd_addr_end(addr, end); if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot)) return -ENOMEM; } while (pmd++, addr = next, addr != end); return 0; } static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pte_t *pte; u64 pfn; pfn = phys_addr >> PAGE_SHIFT; pte = pte_alloc_kernel(pmd, addr); if (!pte) return -ENOMEM; do { BUG_ON(!pte_none(*pte)); set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot)); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); return 0; } 

 

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请联系我们举报,一经查实,本站将立刻删除。

发布者:全栈程序员-站长,转载请注明出处:https://javaforall.net/232337.html原文链接:https://javaforall.net

(0)
全栈程序员-站长的头像全栈程序员-站长


相关推荐

  • C语言编写一个计算器(附全源代码)「建议收藏」

    C语言编写一个计算器(附全源代码)「建议收藏」这个计算器其实是我老师布置的一个c语言大作业,捉摸着搞了那么久的东西不能浪费了吧,于是我分享下我的代码和大概思路给个关注点个赞,后续我会分享更多我们学生党的作业问题白嫖党们先看代码,我就先上上全代码,干!(一)软件环境:Devc++我用的这个软件哈,个人感觉这里功能简单,特别容易上手。看图说话,是不是很简单嘛,又不复杂。(二)设计方案根据自学所得栈进行数据和符号的存储再输出,先设立单独的数据栈符号栈,我们以top=-1为标准,判断其是否为空栈,当然也用到了学过的struct来构建栈,先把字符存

    2022年6月3日
    237
  • 最小二乘法求回归方程的推导[通俗易懂]

    最小二乘法求回归方程的推导[通俗易懂]这里手写的最小二乘法​​​​​​​的推导过程。  

    2025年8月1日
    5
  • 分布式系统下的纠删码技术(一) — Erasure Code (EC)

    分布式系统下的纠删码技术(一) — Erasure Code (EC) 近几个月主要参与一个分布式存储系统的纠删码部分(用于数据容错),纠删码在学术界出现比较早,现在ceph,微软的存储系统,Hadoop3.0等都用了EC。文章会分为多篇,主要将ErasureCode,LRC, 以及相关的数学基础,作为学习总结。 一、纠删码简介      分布式系统需要在硬件失效等故障发生后仍然能继续提供服务。就数据而言,HDFS采用每份数据3副本的方式,保…

    2025年7月2日
    3
  • 课程表课程设计_工程经济学课程设计

    课程表课程设计_工程经济学课程设计一、需求分析由于大学上课教室分散,学生经常会忘记自己的课程或是上课的教室。这在一定程度上促进学生课表软件的开发。使其可以帮助学生记录自己的课程和对自己课程的掌握。因为手机相对笔记本电脑更加具有便携性

    2022年8月3日
    9
  • RangeValidator1 日期验证格式

    RangeValidator1 日期验证格式13.3验证控件的类型到目前为止,已经讨论了验证的相关理论。ASP.NET2.0提供了5种验证控件,表13-1对此进行了描述。然后,将介绍每种控件的细节,首先是表格式概述。13.3.1类型表表13-1控件名适用情况RequiredFieldValidator为了避免空值,例如当用户输入密码以建立新账户时…

    2022年7月12日
    18
  • 无需Visual Studio,5容易的 – 分为报告

    无需Visual Studio,5容易的 – 分为报告

    2022年1月5日
    52

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注

关注全栈程序员社区公众号