Source: include/asm-i386/highmem.c
This API is used by callers willing to block.
62 #define kmap(page) __kmap(page, 0)
Source: include/asm-i386/highmem.c
63 #define kmap_nonblock(page) __kmap(page, 1)
Source: include/asm-i386/highmem.h
The call graph for this function is shown in Figure 9.1.
65 static inline void *kmap(struct page *page, int nonblocking) 66 { 67 if (in_interrupt()) 68 out_of_line_bug(); 69 if (page < highmem_start_page) 70 return page_address(page); 71 return kmap_high(page); 72 }
132 void *kmap_high(struct page *page, int nonblocking) 133 { 134 unsigned long vaddr; 135 142 spin_lock(&kmap_lock); 143 vaddr = (unsigned long) page->virtual; 144 if (!vaddr) { 145 vaddr = map_new_virtual(page, nonblocking); 146 if (!vaddr) 147 goto out; 148 } 149 pkmap_count[PKMAP_NR(vaddr)]++; 150 if (pkmap_count[PKMAP_NR(vaddr)] < 2) 151 BUG(); 152 out: 153 spin_unlock(&kmap_lock); 154 return (void*) vaddr; 155 }
This function is divided into three principle parts. The scanning for a free slot, waiting on a queue if none is avaialble and mapping the page.
80 static inline unsigned long map_new_virtual(struct page *page) 81 { 82 unsigned long vaddr; 83 int count; 84 85 start: 86 count = LAST_PKMAP; 87 /* Find an empty entry */ 88 for (;;) { 89 last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK; 90 if (!last_pkmap_nr) { 91 flush_all_zero_pkmaps(); 92 count = LAST_PKMAP; 93 } 94 if (!pkmap_count[last_pkmap_nr]) 95 break; /* Found a usable entry */ 96 if (--count) 97 continue; 98 99 if (nonblocking) 100 return 0;
105 { 106 DECLARE_WAITQUEUE(wait, current); 107 108 current->state = TASK_UNINTERRUPTIBLE; 109 add_wait_queue(&pkmap_map_wait, &wait); 110 spin_unlock(&kmap_lock); 111 schedule(); 112 remove_wait_queue(&pkmap_map_wait, &wait); 113 spin_lock(&kmap_lock); 114 115 /* Somebody else might have mapped it while we slept */ 116 if (page->virtual) 117 return (unsigned long) page->virtual; 118 119 /* Re-start */ 120 goto start; 121 } 122 }
If there is no available slot after scanning all the pages once, we sleep on the pkmap_map_wait queue until we are woken up after an unmap
123 vaddr = PKMAP_ADDR(last_pkmap_nr); 124 set_pte(&(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot)); 125 126 pkmap_count[last_pkmap_nr] = 1; 127 page->virtual = (void *) vaddr; 128 129 return vaddr; 130 }
A slot has been found, map the page
This function cycles through the pkmap_count array and sets all entries from 1 to 0 before flushing the TLB.
42 static void flush_all_zero_pkmaps(void) 43 { 44 int i; 45 46 flush_cache_all(); 47 48 for (i = 0; i < LAST_PKMAP; i++) { 49 struct page *page; 50 57 if (pkmap_count[i] != 1) 58 continue; 59 pkmap_count[i] = 0; 60 61 /* sanity check */ 62 if (pte_none(pkmap_page_table[i])) 63 BUG(); 64 72 page = pte_page(pkmap_page_table[i]); 73 pte_clear(&pkmap_page_table[i]); 74 75 page->virtual = NULL; 76 } 77 flush_tlb_all(); 78 }
The following is an example km_type enumeration for the x86. It lists the different uses interrupts have for atomically calling kmap. Note how KM_TYPE_NR is the last element so it doubles up as a count of the number of elements.
4 enum km_type { 5 KM_BOUNCE_READ, 6 KM_SKB_SUNRPC_DATA, 7 KM_SKB_DATA_SOFTIRQ, 8 KM_USER0, 9 KM_USER1, 10 KM_BH_IRQ, 11 KM_TYPE_NR 12 };
Source: include/asm-i386/highmem.h
This is the atomic version of kmap(). Note that at no point is a spinlock held or does it sleep. A spinlock is not required as every processor has its own reserved space.
89 static inline void *kmap_atomic(struct page *page, enum km_type type) 90 { 91 enum fixed_addresses idx; 92 unsigned long vaddr; 93 94 if (page < highmem_start_page) 95 return page_address(page); 96 97 idx = type + KM_TYPE_NR*smp_processor_id(); 98 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); 99 #if HIGHMEM_DEBUG 100 if (!pte_none(*(kmap_pte-idx))) 101 out_of_line_bug(); 102 #endif 103 set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); 104 __flush_tlb_one(vaddr); 105 106 return (void*) vaddr; 107 }
Source: include/asm-i386/highmem.h
74 static inline void kunmap(struct page *page) 75 { 76 if (in_interrupt()) 77 out_of_line_bug(); 78 if (page < highmem_start_page) 79 return; 80 kunmap_high(page); 81 }
This is the architecture independent part of the kunmap() operation.
157 void kunmap_high(struct page *page) 158 { 159 unsigned long vaddr; 160 unsigned long nr; 161 int need_wakeup; 162 163 spin_lock(&kmap_lock); 164 vaddr = (unsigned long) page->virtual; 165 if (!vaddr) 166 BUG(); 167 nr = PKMAP_NR(vaddr); 168 173 need_wakeup = 0; 174 switch (--pkmap_count[nr]) { 175 case 0: 176 BUG(); 177 case 1: 188 need_wakeup = waitqueue_active(&pkmap_map_wait); 189 } 190 spin_unlock(&kmap_lock); 191 192 /* do wake-up, if needed, race-free outside of the spin lock */ 193 if (need_wakeup) 194 wake_up(&pkmap_map_wait); 195 }
Source: include/asm-i386/highmem.h
This entire function is debug code. The reason is that as pages are only mapped here atomically, they will only be used in a tiny place for a short time before being unmapped. It is safe to leave the page there as it will not be referenced after unmapping and another mapping to the same slot will simply replce it.
109 static inline void kunmap_atomic(void *kvaddr, enum km_type type) 110 { 111 #if HIGHMEM_DEBUG 112 unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; 113 enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); 114 115 if (vaddr < FIXADDR_START) // FIXME 116 return; 117 118 if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) 119 out_of_line_bug(); 120 121 /* 122 * force other mappings to Oops if they'll try to access 123 * this pte without first remap it 124 */ 125 pte_clear(kmap_pte-idx); 126 __flush_tlb_one(vaddr); 127 #endif 128 }
The call graph for this function is shown in Figure 9.3. High level function for the creation of bounce buffers. It is broken into two major parts, the allocation of the necessary resources, and the copying of data from the template.
405 struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig) 406 { 407 struct page *page; 408 struct buffer_head *bh; 409 410 if (!PageHighMem(bh_orig->b_page)) 411 return bh_orig; 412 413 bh = alloc_bounce_bh(); 420 page = alloc_bounce_page(); 421 422 set_bh_page(bh, page, 0); 423
424 bh->b_next = NULL; 425 bh->b_blocknr = bh_orig->b_blocknr; 426 bh->b_size = bh_orig->b_size; 427 bh->b_list = -1; 428 bh->b_dev = bh_orig->b_dev; 429 bh->b_count = bh_orig->b_count; 430 bh->b_rdev = bh_orig->b_rdev; 431 bh->b_state = bh_orig->b_state; 432 #ifdef HIGHMEM_DEBUG 433 bh->b_flushtime = jiffies; 434 bh->b_next_free = NULL; 435 bh->b_prev_free = NULL; 436 /* bh->b_this_page */ 437 bh->b_reqnext = NULL; 438 bh->b_pprev = NULL; 439 #endif 440 /* bh->b_page */ 441 if (rw == WRITE) { 442 bh->b_end_io = bounce_end_io_write; 443 copy_from_high_bh(bh, bh_orig); 444 } else 445 bh->b_end_io = bounce_end_io_read; 446 bh->b_private = (void *)bh_orig; 447 bh->b_rsector = bh_orig->b_rsector; 448 #ifdef HIGHMEM_DEBUG 449 memset(&bh->b_wait, -1, sizeof(bh->b_wait)); 450 #endif 451 452 return bh; 453 }
Populate the newly created buffer_head
This function first tries to allocate a buffer_head from the slab allocator and if that fails, an emergency pool will be used.
369 struct buffer_head *alloc_bounce_bh (void) 370 { 371 struct list_head *tmp; 372 struct buffer_head *bh; 373 374 bh = kmem_cache_alloc(bh_cachep, SLAB_NOHIGHIO); 375 if (bh) 376 return bh; 380 381 wakeup_bdflush();
383 repeat_alloc: 387 tmp = &emergency_bhs; 388 spin_lock_irq(&emergency_lock); 389 if (!list_empty(tmp)) { 390 bh = list_entry(tmp->next, struct buffer_head, b_inode_buffers); 391 list_del(tmp->next); 392 nr_emergency_bhs--; 393 } 394 spin_unlock_irq(&emergency_lock); 395 if (bh) 396 return bh; 397 398 /* we need to wait I/O completion */ 399 run_task_queue(&tq_disk); 400 401 yield(); 402 goto repeat_alloc; 403 }
The allocation from the slab failed so allocate from the emergency pool.
This function is essentially identical to alloc_bounce_bh(). It first tries to allocate a page from the buddy allocator and if that fails, an emergency pool will be used.
333 struct page *alloc_bounce_page (void) 334 { 335 struct list_head *tmp; 336 struct page *page; 337 338 page = alloc_page(GFP_NOHIGHIO); 339 if (page) 340 return page; 344 345 wakeup_bdflush();
347 repeat_alloc: 351 tmp = &emergency_pages; 352 spin_lock_irq(&emergency_lock); 353 if (!list_empty(tmp)) { 354 page = list_entry(tmp->next, struct page, list); 355 list_del(tmp->next); 356 nr_emergency_pages--; 357 } 358 spin_unlock_irq(&emergency_lock); 359 if (page) 360 return page; 361 362 /* we need to wait I/O completion */ 363 run_task_queue(&tq_disk); 364 365 yield(); 366 goto repeat_alloc; 367 }
This function is called when a bounce buffer used for writing to a device completes IO. As the buffer is copied from high memory and to the device, there is nothing left to do except reclaim the resources
319 static void bounce_end_io_write (struct buffer_head *bh, int uptodate) 320 { 321 bounce_end_io(bh, uptodate); 322 }
This is called when data has been read from the device and needs to be copied to high memory. It is called from interrupt so has to be more careful
324 static void bounce_end_io_read (struct buffer_head *bh, int uptodate) 325 { 326 struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private); 327 328 if (uptodate) 329 copy_to_high_bh_irq(bh_orig, bh); 330 bounce_end_io(bh, uptodate); 331 }
This function copies data from a high memory buffer_head to a bounce buffer.
215 static inline void copy_from_high_bh (struct buffer_head *to, 216 struct buffer_head *from) 217 { 218 struct page *p_from; 219 char *vfrom; 220 221 p_from = from->b_page; 222 223 vfrom = kmap_atomic(p_from, KM_USER0); 224 memcpy(to->b_data, vfrom + bh_offset(from), to->b_size); 225 kunmap_atomic(vfrom, KM_USER0); 226 }
Called from interrupt after the device has finished writing data to the bounce buffer. This function copies data to high memory
228 static inline void copy_to_high_bh_irq (struct buffer_head *to, 229 struct buffer_head *from) 230 { 231 struct page *p_to; 232 char *vto; 233 unsigned long flags; 234 235 p_to = to->b_page; 236 __save_flags(flags); 237 __cli(); 238 vto = kmap_atomic(p_to, KM_BOUNCE_READ); 239 memcpy(vto + bh_offset(to), from->b_data, to->b_size); 240 kunmap_atomic(vto, KM_BOUNCE_READ); 241 __restore_flags(flags); 242 }
Reclaims the resources used by the bounce buffers. If emergency pools are depleted, the resources are added to it.
244 static inline void bounce_end_io (struct buffer_head *bh, int uptodate) 245 { 246 struct page *page; 247 struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private); 248 unsigned long flags; 249 250 bh_orig->b_end_io(bh_orig, uptodate); 251 252 page = bh->b_page; 253 254 spin_lock_irqsave(&emergency_lock, flags); 255 if (nr_emergency_pages >= POOL_SIZE) 256 __free_page(page); 257 else { 258 /* 259 * We are abusing page->list to manage 260 * the highmem emergency pool: 261 */ 262 list_add(&page->list, &emergency_pages); 263 nr_emergency_pages++; 264 } 265 266 if (nr_emergency_bhs >= POOL_SIZE) { 267 #ifdef HIGHMEM_DEBUG 268 /* Don't clobber the constructed slab cache */ 269 init_waitqueue_head(&bh->b_wait); 270 #endif 271 kmem_cache_free(bh_cachep, bh); 272 } else { 273 /* 274 * Ditto in the bh case, here we abuse b_inode_buffers: 275 */ 276 list_add(&bh->b_inode_buffers, &emergency_bhs); 277 nr_emergency_bhs++; 278 } 279 spin_unlock_irqrestore(&emergency_lock, flags); 280 }
There is only one function of relevance to the emergency pools and that is the init function. It is called during system startup and then the code is deleted as it is never needed again
Create a pool for emergency pages and for emergency buffer_heads
282 static __init int init_emergency_pool(void) 283 { 284 struct sysinfo i; 285 si_meminfo(&i); 286 si_swapinfo(&i); 287 288 if (!i.totalhigh) 289 return 0; 290 291 spin_lock_irq(&emergency_lock); 292 while (nr_emergency_pages < POOL_SIZE) { 293 struct page * page = alloc_page(GFP_ATOMIC); 294 if (!page) { 295 printk("couldn't refill highmem emergency pages"); 296 break; 297 } 298 list_add(&page->list, &emergency_pages); 299 nr_emergency_pages++; 300 }
301 while (nr_emergency_bhs < POOL_SIZE) { 302 struct buffer_head * bh = kmem_cache_alloc(bh_cachep, SLAB_ATOMIC); 303 if (!bh) { 304 printk("couldn't refill highmem emergency bhs"); 305 break; 306 } 307 list_add(&bh->b_inode_buffers, &emergency_bhs); 308 nr_emergency_bhs++; 309 } 310 spin_unlock_irq(&emergency_lock); 311 printk("allocated %d pages and %d bhs reserved for the highmem bounces\n", 312 nr_emergency_pages, nr_emergency_bhs); 313 314 return 0; 315 }