This is the top-level function called from setup_arch(). When this function returns, the page tables have been fully setup. Be aware that this is all x86 specific.
351 void __init paging_init(void) 352 { 353 pagetable_init(); 354 355 load_cr3(swapper_pg_dir); 356 357 #if CONFIG_X86_PAE 362 if (cpu_has_pae) 363 set_in_cr4(X86_CR4_PAE); 364 #endif 365 366 __flush_tlb_all(); 367 368 #ifdef CONFIG_HIGHMEM 369 kmap_init(); 370 #endif 371 zone_sizes_init(); 372 }
This function is responsible for statically inialising a pagetable starting with a statically defined PGD called swapper_pg_dir. At the very least, a PTE will be available that points to every page frame in ZONE_NORMAL.
205 static void __init pagetable_init (void) 206 { 207 unsigned long vaddr, end; 208 pgd_t *pgd, *pgd_base; 209 int i, j, k; 210 pmd_t *pmd; 211 pte_t *pte, *pte_base; 212 213 /* 214 * This can be zero as well - no problem, in that case we exit 215 * the loops anyway due to the PTRS_PER_* conditions. 216 */ 217 end = (unsigned long)__va(max_low_pfn*PAGE_SIZE); 218 219 pgd_base = swapper_pg_dir; 220 #if CONFIG_X86_PAE 221 for (i = 0; i < PTRS_PER_PGD; i++) 222 set_pgd(pgd_base + i, __pgd(1 + __pa(empty_zero_page))); 223 #endif 224 i = __pgd_offset(PAGE_OFFSET); 225 pgd = pgd_base + i;
This first block initialises the PGD. It does this by pointing each entry to the global zero page. Entries needed to reference available memory in ZONE_NORMAL will be allocated later.
227 for (; i < PTRS_PER_PGD; pgd++, i++) { 228 vaddr = i*PGDIR_SIZE; 229 if (end && (vaddr >= end)) 230 break; 231 #if CONFIG_X86_PAE 232 pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); 233 set_pgd(pgd, __pgd(__pa(pmd) + 0x1)); 234 #else 235 pmd = (pmd_t *)pgd; 236 #endif 237 if (pmd != pmd_offset(pgd, 0)) 238 BUG();
This loop begins setting up valid PMD entries to point to. In the PAE case, pages are allocated with alloc_bootmem_low_pages() and the PGD is set appropriately. Without PAE, there is no middle directory, so it is just “folded” back onto the PGD to preserve the illustion of a 3-level pagetable.
239 for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { 240 vaddr = i*PGDIR_SIZE + j*PMD_SIZE; 241 if (end && (vaddr >= end)) 242 break; 243 if (cpu_has_pse) { 244 unsigned long __pe; 245 246 set_in_cr4(X86_CR4_PSE); 247 boot_cpu_data.wp_works_ok = 1; 248 __pe = _KERNPG_TABLE + _PAGE_PSE + __pa(vaddr); 249 /* Make it "global" too if supported */ 250 if (cpu_has_pge) { 251 set_in_cr4(X86_CR4_PGE); 252 __pe += _PAGE_GLOBAL; 253 } 254 set_pmd(pmd, __pmd(__pe)); 255 continue; 256 } 257 258 pte_base = pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); 259
Initialise each entry in the PMD. This loop will only execute unless PAE is enabled. Remember that without PAE, PTRS_PER_PMD is 1.
260 for (k = 0; k < PTRS_PER_PTE; pte++, k++) { 261 vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE; 262 if (end && (vaddr >= end)) 263 break; 264 *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL); 265 } 266 set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base))); 267 if (pte_base != pte_offset(pmd, 0)) 268 BUG(); 269 270 } 271 }
Initialise the PTEs.
273 /* 274 * Fixed mappings, only the page table structure has to be 275 * created - mappings will be set by set_fixmap(): 276 */ 277 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; 278 fixrange_init(vaddr, 0, pgd_base); 279 280 #if CONFIG_HIGHMEM 281 /* 282 * Permanent kmaps: 283 */ 284 vaddr = PKMAP_BASE; 285 fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); 286 287 pgd = swapper_pg_dir + __pgd_offset(vaddr); 288 pmd = pmd_offset(pgd, vaddr); 289 pte = pte_offset(pmd, vaddr); 290 pkmap_page_table = pte; 291 #endif 292 293 #if CONFIG_X86_PAE 294 /* 295 * Add low memory identity-mappings - SMP needs it when 296 * starting up on an AP from real-mode. In the non-PAE 297 * case we already have these mappings through head.S. 298 * All user-space mappings are explicitly cleared after 299 * SMP startup. 300 */ 301 pgd_base[0] = pgd_base[USER_PTRS_PER_PGD]; 302 #endif 303 }
At this point, page table entries have been setup which reference all parts of ZONE_NORMAL. The remaining regions needed are those for fixed mappings and those needed for mapping high memory pages with kmap().
This function creates valid PGDs and PMDs for fixed virtual address mappings.
167 static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base) 168 { 169 pgd_t *pgd; 170 pmd_t *pmd; 171 pte_t *pte; 172 int i, j; 173 unsigned long vaddr; 174 175 vaddr = start; 176 i = __pgd_offset(vaddr); 177 j = __pmd_offset(vaddr); 178 pgd = pgd_base + i; 179 180 for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { 181 #if CONFIG_X86_PAE 182 if (pgd_none(*pgd)) { 183 pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); 184 set_pgd(pgd, __pgd(__pa(pmd) + 0x1)); 185 if (pmd != pmd_offset(pgd, 0)) 186 printk("PAE BUG #02!\n"); 187 } 188 pmd = pmd_offset(pgd, vaddr); 189 #else 190 pmd = (pmd_t *)pgd; 191 #endif 192 for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { 193 if (pmd_none(*pmd)) { 194 pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); 195 set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte))); 196 if (pte != pte_offset(pmd, 0)) 197 BUG(); 198 } 199 vaddr += PMD_SIZE; 200 } 201 j = 0; 202 } 203 }
This function only exists if CONFIG_HIGHMEM is set during compile time. It is responsible for caching where the beginning of the kmap region is, the PTE referencing it and the protection for the page tables. This means the PGD will not have to be checked every time kmap() is used.
74 #if CONFIG_HIGHMEM 75 pte_t *kmap_pte; 76 pgprot_t kmap_prot; 77 78 #define kmap_get_fixmap_pte(vaddr) \ 79 pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) 80 81 void __init kmap_init(void) 82 { 83 unsigned long kmap_vstart; 84 85 /* cache the first kmap pte */ 86 kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); 87 kmap_pte = kmap_get_fixmap_pte(kmap_vstart); e8 89 kmap_prot = PAGE_KERNEL; 90 } 91 #endif /* CONFIG_HIGHMEM */
This function returns the struct page used by the PTE at address in mm's page tables.
405 static struct page * follow_page(struct mm_struct *mm, unsigned long address, int write) 406 { 407 pgd_t *pgd; 408 pmd_t *pmd; 409 pte_t *ptep, pte; 410 411 pgd = pgd_offset(mm, address); 412 if (pgd_none(*pgd) || pgd_bad(*pgd)) 413 goto out; 414 415 pmd = pmd_offset(pgd, address); 416 if (pmd_none(*pmd) || pmd_bad(*pmd)) 417 goto out; 418 419 ptep = pte_offset(pmd, address); 420 if (!ptep) 421 goto out; 422 423 pte = *ptep; 424 if (pte_present(pte)) { 425 if (!write || 426 (pte_write(pte) && pte_dirty(pte))) 427 return pte_page(pte); 428 } 429 430 out: 431 return 0; 432 }