This documentation is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
For more details see the file COPYING in the source distribution of Linux.
Table of Contents
bdget by gendisk and partition number
Table of Contents
list_add — add a new entry
void list_add ( | new, | |
head); |
struct list_head * | new; |
struct list_head * | head; |
list_add_tail — add a new entry
void list_add_tail ( | new, | |
head); |
struct list_head * | new; |
struct list_head * | head; |
list_replace — replace old entry by new one
void list_replace ( | old, | |
new); |
struct list_head * | old; |
struct list_head * | new; |
list_del_init — deletes entry from list and reinitialize it.
void list_del_init ( | entry); |
struct list_head * | entry; |
list_move — delete from one list and add as another's head
void list_move ( | list, | |
head); |
struct list_head * | list; |
struct list_head * | head; |
list_move_tail — delete from one list and add as another's tail
void list_move_tail ( | list, | |
head); |
struct list_head * | list; |
struct list_head * | head; |
list_is_last —
tests whether list is the last entry in list head
int list_is_last ( | list, | |
head); |
const struct list_head * | list; |
const struct list_head * | head; |
list_empty — tests whether a list is empty
int list_empty ( | head); |
const struct list_head * | head; |
list_empty_careful — tests whether a list is empty and not being modified
int list_empty_careful ( | head); |
const struct list_head * | head; |
list_is_singular — tests whether a list has just one entry.
int list_is_singular ( | head); |
const struct list_head * | head; |
list_cut_position — cut a list into two
void list_cut_position ( | list, | |
| head, | ||
entry); |
struct list_head * | list; |
struct list_head * | head; |
struct list_head * | entry; |
list_splice — join two lists, this is designed for stacks
void list_splice ( | list, | |
head); |
const struct list_head * | list; |
struct list_head * | head; |
list_splice_tail — join two lists, each list being a queue
void list_splice_tail ( | list, | |
head); |
struct list_head * | list; |
struct list_head * | head; |
list_splice_init — join two lists and reinitialise the emptied list.
void list_splice_init ( | list, | |
head); |
struct list_head * | list; |
struct list_head * | head; |
list_splice_tail_init — join two lists and reinitialise the emptied list
void list_splice_tail_init ( | list, | |
head); |
struct list_head * | list; |
struct list_head * | head; |
list_entry — get the struct for this entry
list_entry ( | ptr, | |
| type, | ||
member); |
| ptr; |
| type; |
| member; |
list_first_entry — get the first element from a list
list_first_entry ( | ptr, | |
| type, | ||
member); |
| ptr; |
| type; |
| member; |
list_for_each_prev — iterate over a list backwards
list_for_each_prev ( | pos, | |
head); |
| pos; |
| head; |
list_for_each_safe — iterate over a list safe against removal of list entry
list_for_each_safe ( | pos, | |
| n, | ||
head); |
| pos; |
| n; |
| head; |
list_for_each_prev_safe — iterate over a list backwards safe against removal of list entry
list_for_each_prev_safe ( | pos, | |
| n, | ||
head); |
| pos; |
| n; |
| head; |
list_for_each_entry — iterate over list of given type
list_for_each_entry ( | pos, | |
| head, | ||
member); |
| pos; |
| head; |
| member; |
list_for_each_entry_reverse — iterate backwards over list of given type.
list_for_each_entry_reverse ( | pos, | |
| head, | ||
member); |
| pos; |
| head; |
| member; |
list_prepare_entry —
prepare a pos entry for use in list_for_each_entry_continue
list_prepare_entry ( | pos, | |
| head, | ||
member); |
| pos; |
| head; |
| member; |
list_for_each_entry_continue — continue iteration over list of given type
list_for_each_entry_continue ( | pos, | |
| head, | ||
member); |
| pos; |
| head; |
| member; |
list_for_each_entry_continue_reverse — iterate backwards from the given point
list_for_each_entry_continue_reverse ( | pos, | |
| head, | ||
member); |
| pos; |
| head; |
| member; |
list_for_each_entry_from — iterate over list of given type from the current point
list_for_each_entry_from ( | pos, | |
| head, | ||
member); |
| pos; |
| head; |
| member; |
list_for_each_entry_safe — iterate over list of given type safe against removal of list entry
list_for_each_entry_safe ( | pos, | |
| n, | ||
| head, | ||
member); |
| pos; |
| n; |
| head; |
| member; |
list_for_each_entry_safe_continue —
list_for_each_entry_safe_continue ( | pos, | |
| n, | ||
| head, | ||
member); |
| pos; |
| n; |
| head; |
| member; |
list_for_each_entry_safe_from —
list_for_each_entry_safe_from ( | pos, | |
| n, | ||
| head, | ||
member); |
| pos; |
| n; |
| head; |
| member; |
list_for_each_entry_safe_reverse —
list_for_each_entry_safe_reverse ( | pos, | |
| n, | ||
| head, | ||
member); |
| pos; |
| n; |
| head; |
| member; |
hlist_for_each_entry — iterate over list of given type
hlist_for_each_entry ( | tpos, | |
| pos, | ||
| head, | ||
member); |
| tpos; |
| pos; |
| head; |
| member; |
hlist_for_each_entry_continue — iterate over a hlist continuing after current point
hlist_for_each_entry_continue ( | tpos, | |
| pos, | ||
member); |
| tpos; |
| pos; |
| member; |
hlist_for_each_entry_from — iterate over a hlist continuing from current point
hlist_for_each_entry_from ( | tpos, | |
| pos, | ||
member); |
| tpos; |
| pos; |
| member; |
Table of Contents
When writing drivers, you cannot in general use routines which are from the C Library. Some of the functions have been found generally useful and they are listed below. The behaviour of these functions may vary slightly from those defined by ANSI, and these deviations are noted in the text.
simple_strtoll — convert a string to a signed long long
long long simple_strtoll ( | cp, | |
| endp, | ||
base); |
const char * | cp; |
char ** | endp; |
unsigned int | base; |
simple_strtoul — convert a string to an unsigned long
unsigned long simple_strtoul ( | cp, | |
| endp, | ||
base); |
const char * | cp; |
char ** | endp; |
unsigned int | base; |
simple_strtol — convert a string to a signed long
long simple_strtol ( | cp, | |
| endp, | ||
base); |
const char * | cp; |
char ** | endp; |
unsigned int | base; |
simple_strtoull — convert a string to an unsigned long long
unsigned long long simple_strtoull ( | cp, | |
| endp, | ||
base); |
const char * | cp; |
char ** | endp; |
unsigned int | base; |
strict_strtoul — convert a string to an unsigned long strictly
int strict_strtoul ( | cp, | |
| base, | ||
res); |
const char * | cp; |
unsigned int | base; |
unsigned long * | res; |
strict_strtoul converts a string to an unsigned long only if the string is really an unsigned long string, any string containing any invalid char at the tail will be rejected and -EINVAL is returned, only a newline char at the tail is acceptible because people generally
echo 1024 > /sys/module/e1000/parameters/copybreak
echo will append a newline to the tail.
It returns 0 if conversion is successful and *res is set to the converted value, otherwise it returns -EINVAL and *res is set to 0.
simple_strtoul just ignores the successive invalid characters and return the converted value of prefix part of the string.
strict_strtol — convert a string to a long strictly
int strict_strtol ( | cp, | |
| base, | ||
res); |
const char * | cp; |
unsigned int | base; |
long * | res; |
strict_strtoull — convert a string to an unsigned long long strictly
int strict_strtoull ( | cp, | |
| base, | ||
res); |
const char * | cp; |
unsigned int | base; |
unsigned long long * | res; |
strict_strtoull converts a string to an unsigned long long only if the string is really an unsigned long long string, any string containing any invalid char at the tail will be rejected and -EINVAL is returned, only a newline char at the tail is acceptible because people generally
echo 1024 > /sys/module/e1000/parameters/copybreak
echo will append a newline to the tail of the string.
It returns 0 if conversion is successful and *res is set to the converted value, otherwise it returns -EINVAL and *res is set to 0.
simple_strtoull just ignores the successive invalid characters and return the converted value of prefix part of the string.
strict_strtoll — convert a string to a long long strictly
int strict_strtoll ( | cp, | |
| base, | ||
res); |
const char * | cp; |
unsigned int | base; |
long long * | res; |
vsnprintf — Format a string and place it in a buffer
int vsnprintf ( | buf, | |
| size, | ||
| fmt, | ||
args); |
char * | buf; |
size_t | size; |
const char * | fmt; |
va_list | args; |
bufThe buffer to place the result into
sizeThe size of the buffer, including the trailing null space
fmtThe format string to use
argsArguments for the format string
This function follows C99 vsnprintf, but has some extensions:
pS output the name of a text symbol with offset
ps output the name of a text symbol without offset
pF output the name of a function pointer with its offset
pf output the name of a function pointer without its offset
pR output the address range in a struct resource
n is ignored
The return value is the number of characters which would
be generated for the given input, excluding the trailing
'\0', as per ISO C99. If you want to have the exact
number of characters written into buf as return value
(not including the trailing '\0'), use vscnprintf. If the
return is greater than or equal to size, the resulting
string is truncated.
Call this function if you are already dealing with a va_list.
You probably want snprintf instead.
vscnprintf — Format a string and place it in a buffer
int vscnprintf ( | buf, | |
| size, | ||
| fmt, | ||
args); |
char * | buf; |
size_t | size; |
const char * | fmt; |
va_list | args; |
bufThe buffer to place the result into
sizeThe size of the buffer, including the trailing null space
fmtThe format string to use
argsArguments for the format string
The return value is the number of characters which have been written into
the buf not including the trailing '\0'. If size is <= 0 the function
returns 0.
Call this function if you are already dealing with a va_list.
You probably want scnprintf instead.
See the vsnprintf documentation for format string extensions over C99.
snprintf — Format a string and place it in a buffer
int snprintf ( | buf, | |
| size, | ||
| fmt, | ||
...); |
char * | buf; |
size_t | size; |
const char * | fmt; |
| ...; |
bufThe buffer to place the result into
sizeThe size of the buffer, including the trailing null space
fmtThe format string to use @...: Arguments for the format string
...variable arguments
The return value is the number of characters which would be
generated for the given input, excluding the trailing null,
as per ISO C99. If the return is greater than or equal to
size, the resulting string is truncated.
See the vsnprintf documentation for format string extensions over C99.
scnprintf — Format a string and place it in a buffer
int scnprintf ( | buf, | |
| size, | ||
| fmt, | ||
...); |
char * | buf; |
size_t | size; |
const char * | fmt; |
| ...; |
vsprintf — Format a string and place it in a buffer
int vsprintf ( | buf, | |
| fmt, | ||
args); |
char * | buf; |
const char * | fmt; |
va_list | args; |
bufThe buffer to place the result into
fmtThe format string to use
argsArguments for the format string
The function returns the number of characters written
into buf. Use vsnprintf or vscnprintf in order to avoid
buffer overflows.
Call this function if you are already dealing with a va_list.
You probably want sprintf instead.
See the vsnprintf documentation for format string extensions over C99.
sprintf — Format a string and place it in a buffer
int sprintf ( | buf, | |
| fmt, | ||
...); |
char * | buf; |
const char * | fmt; |
| ...; |
vbin_printf — Parse a format string and place args' binary value in a buffer
int vbin_printf ( | bin_buf, | |
| size, | ||
| fmt, | ||
args); |
u32 * | bin_buf; |
size_t | size; |
const char * | fmt; |
va_list | args; |
bin_bufThe buffer to place args' binary value
sizeThe size of the buffer(by words(32bits), not characters)
fmtThe format string to use
argsArguments for the format string
bstr_printf — Format a string from binary arguments and place it in a buffer
int bstr_printf ( | buf, | |
| size, | ||
| fmt, | ||
bin_buf); |
char * | buf; |
size_t | size; |
const char * | fmt; |
const u32 * | bin_buf; |
bufThe buffer to place the result into
sizeThe size of the buffer, including the trailing null space
fmtThe format string to use
bin_bufBinary arguments for the format string
This function like C99 vsnprintf, but the difference is that vsnprintf gets
arguments from stack, and bstr_printf gets arguments from bin_buf which is
a binary buffer that generated by vbin_printf.
The format follows C99 vsnprintf, but has some extensions: see vsnprintf comment for details.
The return value is the number of characters which would
be generated for the given input, excluding the trailing
'\0', as per ISO C99. If you want to have the exact
number of characters written into buf as return value
(not including the trailing '\0'), use vscnprintf. If the
return is greater than or equal to size, the resulting
string is truncated.
bprintf — Parse a format string and place args' binary value in a buffer
int bprintf ( | bin_buf, | |
| size, | ||
| fmt, | ||
...); |
u32 * | bin_buf; |
size_t | size; |
const char * | fmt; |
| ...; |
strnicmp — Case insensitive, length-limited string comparison
int strnicmp ( | s1, | |
| s2, | ||
len); |
const char * | s1; |
const char * | s2; |
size_t | len; |
strcpy —
Copy a NUL terminated string
char * strcpy ( | dest, | |
src); |
char * | dest; |
const char * | src; |
strncpy —
Copy a length-limited, NUL-terminated string
char * strncpy ( | dest, | |
| src, | ||
count); |
char * | dest; |
const char * | src; |
size_t | count; |
strlcpy —
Copy a NUL terminated string into a sized buffer
size_t strlcpy ( | dest, | |
| src, | ||
size); |
char * | dest; |
const char * | src; |
size_t | size; |
strcat —
Append one NUL-terminated string to another
char * strcat ( | dest, | |
src); |
char * | dest; |
const char * | src; |
strncat —
Append a length-limited, NUL-terminated string to another
char * strncat ( | dest, | |
| src, | ||
count); |
char * | dest; |
const char * | src; |
size_t | count; |
strlcat —
Append a length-limited, NUL-terminated string to another
size_t strlcat ( | dest, | |
| src, | ||
count); |
char * | dest; |
const char * | src; |
size_t | count; |
strncmp — Compare two length-limited strings
int strncmp ( | cs, | |
| ct, | ||
count); |
const char * | cs; |
const char * | ct; |
size_t | count; |
strchr — Find the first occurrence of a character in a string
char * strchr ( | s, | |
c); |
const char * | s; |
int | c; |
strrchr — Find the last occurrence of a character in a string
char * strrchr ( | s, | |
c); |
const char * | s; |
int | c; |
strnchr — Find a character in a length limited string
char * strnchr ( | s, | |
| count, | ||
c); |
const char * | s; |
size_t | count; |
int | c; |
strstrip —
Removes leading and trailing whitespace from s.
char * strstrip ( | s); |
char * | s; |
strnlen — Find the length of a length-limited string
size_t strnlen ( | s, | |
count); |
const char * | s; |
size_t | count; |
strspn —
Calculate the length of the initial substring of s which only contain letters in accept
size_t strspn ( | s, | |
accept); |
const char * | s; |
const char * | accept; |
strcspn —
Calculate the length of the initial substring of s which does not contain letters in reject
size_t strcspn ( | s, | |
reject); |
const char * | s; |
const char * | reject; |
strpbrk — Find the first occurrence of a set of characters
char * strpbrk ( | cs, | |
ct); |
const char * | cs; |
const char * | ct; |
sysfs_streq — return true if strings are equal, modulo trailing newline
bool sysfs_streq ( | s1, | |
s2); |
const char * | s1; |
const char * | s2; |
memset — Fill a region of memory with the given value
void * memset ( | s, | |
| c, | ||
count); |
void * | s; |
int | c; |
size_t | count; |
memcpy — Copy one area of memory to another
void * memcpy ( | dest, | |
| src, | ||
count); |
void * | dest; |
const void * | src; |
size_t | count; |
memmove — Copy one area of memory to another
void * memmove ( | dest, | |
| src, | ||
count); |
void * | dest; |
const void * | src; |
size_t | count; |
memcmp — Compare two areas of memory
int memcmp ( | cs, | |
| ct, | ||
count); |
const void * | cs; |
const void * | ct; |
size_t | count; |
memscan — Find a character in an area of memory.
void * memscan ( | addr, | |
| c, | ||
size); |
void * | addr; |
int | c; |
size_t | size; |
set_bit — Atomically set a bit in memory
void set_bit ( | nr, | |
addr); |
unsigned int | nr; |
volatile unsigned long * | addr; |
__set_bit — Set a bit in memory
void __set_bit ( | nr, | |
addr); |
int | nr; |
volatile unsigned long * | addr; |
clear_bit — Clears a bit in memory
void clear_bit ( | nr, | |
addr); |
int | nr; |
volatile unsigned long * | addr; |
__change_bit — Toggle a bit in memory
void __change_bit ( | nr, | |
addr); |
int | nr; |
volatile unsigned long * | addr; |
change_bit — Toggle a bit in memory
void change_bit ( | nr, | |
addr); |
int | nr; |
volatile unsigned long * | addr; |
test_and_set_bit — Set a bit and return its old value
int test_and_set_bit ( | nr, | |
addr); |
int | nr; |
volatile unsigned long * | addr; |
test_and_set_bit_lock — Set a bit and return its old value for lock
int test_and_set_bit_lock ( | nr, | |
addr); |
int | nr; |
volatile unsigned long * | addr; |
__test_and_set_bit — Set a bit and return its old value
int __test_and_set_bit ( | nr, | |
addr); |
int | nr; |
volatile unsigned long * | addr; |
test_and_clear_bit — Clear a bit and return its old value
int test_and_clear_bit ( | nr, | |
addr); |
int | nr; |
volatile unsigned long * | addr; |
__test_and_clear_bit — Clear a bit and return its old value
int __test_and_clear_bit ( | nr, | |
addr); |
int | nr; |
volatile unsigned long * | addr; |
test_and_change_bit — Change a bit and return its old value
int test_and_change_bit ( | nr, | |
addr); |
int | nr; |
volatile unsigned long * | addr; |
test_bit — Determine whether a bit is set
int test_bit ( | nr, | |
addr); |
int | nr; |
const volatile unsigned long * | addr; |
Table of Contents
The Linux kernel provides more basic utility functions.
__bitmap_shift_right — logical right shift of the bits in a bitmap
void __bitmap_shift_right ( | dst, | |
| src, | ||
| shift, | ||
bits); |
unsigned long * | dst; |
const unsigned long * | src; |
int | shift; |
int | bits; |
__bitmap_shift_left — logical left shift of the bits in a bitmap
void __bitmap_shift_left ( | dst, | |
| src, | ||
| shift, | ||
bits); |
unsigned long * | dst; |
const unsigned long * | src; |
int | shift; |
int | bits; |
bitmap_scnprintf — convert bitmap to an ASCII hex string.
int bitmap_scnprintf ( | buf, | |
| buflen, | ||
| maskp, | ||
nmaskbits); |
char * | buf; |
unsigned int | buflen; |
const unsigned long * | maskp; |
int | nmaskbits; |
__bitmap_parse — convert an ASCII hex string into a bitmap.
int __bitmap_parse ( | buf, | |
| buflen, | ||
| is_user, | ||
| maskp, | ||
nmaskbits); |
const char * | buf; |
unsigned int | buflen; |
int | is_user; |
unsigned long * | maskp; |
int | nmaskbits; |
bufpointer to buffer containing string.
buflenbuffer size in bytes. If string is smaller than this then it must be terminated with a \0.
is_userlocation of buffer, 0 indicates kernel space
maskppointer to bitmap array that will contain result.
nmaskbitssize of bitmap, in bits.
Commas group hex digits into chunks. Each chunk defines exactly 32
bits of the resultant bitmask. No chunk may specify a value larger
than 32 bits (-EOVERFLOW), and if a chunk specifies a smaller value
then leading 0-bits are prepended. -EINVAL is returned for illegal
characters and for grouping errors such as “1,,5”, “,44”, “,” and "".
Leading and trailing whitespace accepted, but not embedded whitespace.
bitmap_parse_user —
int bitmap_parse_user ( | ubuf, | |
| ulen, | ||
| maskp, | ||
nmaskbits); |
const char __user * | ubuf; |
unsigned int | ulen; |
unsigned long * | maskp; |
int | nmaskbits; |
bitmap_scnlistprintf — convert bitmap to list format ASCII string
int bitmap_scnlistprintf ( | buf, | |
| buflen, | ||
| maskp, | ||
nmaskbits); |
char * | buf; |
unsigned int | buflen; |
const unsigned long * | maskp; |
int | nmaskbits; |
bufbyte buffer into which string is placed
buflen
reserved size of buf, in bytes
maskppointer to bitmap to convert
nmaskbitssize of bitmap, in bits
Output format is a comma-separated list of decimal numbers and
ranges. Consecutively set bits are shown as two hyphen-separated
decimal numbers, the smallest and largest bit numbers set in
the range. Output format is compatible with the format
accepted as input by bitmap_parselist.
The return value is the number of characters which would be generated for the given input, excluding the trailing '\0', as per ISO C99.
bitmap_parselist — convert list format ASCII string to bitmap
int bitmap_parselist ( | bp, | |
| maskp, | ||
nmaskbits); |
const char * | bp; |
unsigned long * | maskp; |
int | nmaskbits; |
bpread nul-terminated user string from this buffer
maskpwrite resulting mask here
nmaskbitsnumber of bits in mask to be written
bitmap_remap — Apply map defined by a pair of bitmaps to another bitmap
void bitmap_remap ( | dst, | |
| src, | ||
| old, | ||
| new, | ||
bits); |
unsigned long * | dst; |
const unsigned long * | src; |
const unsigned long * | old; |
const unsigned long * | new; |
int | bits; |
dstremapped result
srcsubset to be remapped
olddefines domain of map
newdefines range of map
bitsnumber of bits in each of these bitmaps
Let old and new define a mapping of bit positions, such that
whatever position is held by the n-th set bit in old is mapped
to the n-th set bit in new. In the more general case, allowing
for the possibility that the weight 'w' of new is less than the
weight of old, map the position of the n-th set bit in old to
the position of the m-th set bit in new, where m == n % w.
If either of the old and new bitmaps are empty, or if src and
dst point to the same location, then this routine copies src
to dst.
The positions of unset bits in old are mapped to themselves
(the identify map).
Apply the above specified mapping to src, placing the result in
dst, clearing any bits previously set in dst.
For example, lets say that old has bits 4 through 7 set, and
new has bits 12 through 15 set. This defines the mapping of bit
position 4 to 12, 5 to 13, 6 to 14 and 7 to 15, and of all other
bit positions unchanged. So if say src comes into this routine
with bits 1, 5 and 7 set, then dst should leave with bits 1,
13 and 15 set.
bitmap_bitremap — Apply map defined by a pair of bitmaps to a single bit
int bitmap_bitremap ( | oldbit, | |
| old, | ||
| new, | ||
bits); |
int | oldbit; |
const unsigned long * | old; |
const unsigned long * | new; |
int | bits; |
oldbitbit position to be mapped
olddefines domain of map
newdefines range of map
bitsnumber of bits in each of these bitmaps
Let old and new define a mapping of bit positions, such that
whatever position is held by the n-th set bit in old is mapped
to the n-th set bit in new. In the more general case, allowing
for the possibility that the weight 'w' of new is less than the
weight of old, map the position of the n-th set bit in old to
the position of the m-th set bit in new, where m == n % w.
The positions of unset bits in old are mapped to themselves
(the identify map).
Apply the above specified mapping to bit position oldbit, returning
the new bit position.
For example, lets say that old has bits 4 through 7 set, and
new has bits 12 through 15 set. This defines the mapping of bit
position 4 to 12, 5 to 13, 6 to 14 and 7 to 15, and of all other
bit positions unchanged. So if say oldbit is 5, then this routine
returns 13.
bitmap_onto — translate one bitmap relative to another
void bitmap_onto ( | dst, | |
| orig, | ||
| relmap, | ||
bits); |
unsigned long * | dst; |
const unsigned long * | orig; |
const unsigned long * | relmap; |
int | bits; |
dstresulting translated bitmap
origoriginal untranslated bitmap
relmapbitmap relative to which translated
bitsnumber of bits in each of these bitmaps
Set the n-th bit of dst iff there exists some m such that the
n-th bit of relmap is set, the m-th bit of orig is set, and
the n-th bit of relmap is also the m-th _set_ bit of relmap.
(If you understood the previous sentence the first time your
read it, you're overqualified for your current job.)
In other words, orig is mapped onto (surjectively) dst,
using the the map { <n, m> | the n-th bit of relmap is the
m-th set bit of relmap }.
Any set bits in orig above bit number W, where W is the
weight of (number of set bits in) relmap are mapped nowhere.
In particular, if for all bits m set in orig, m >= W, then
dst will end up empty. In situations where the possibility
of such an empty result is not desired, one way to avoid it is
to use the bitmap_fold operator, below, to first fold the
orig bitmap over itself so that all its set bits x are in the
range 0 <= x < W. The bitmap_fold operator does this by
setting the bit (m % W) in dst, for each bit (m) set in orig.
Example [1] for bitmap_onto:
Let's say relmap has bits 30-39 set, and orig has bits
1, 3, 5, 7, 9 and 11 set. Then on return from this routine,
dst will have bits 31, 33, 35, 37 and 39 set.
When bit 0 is set in orig, it means turn on the bit in
dst corresponding to whatever is the first bit (if any)
that is turned on in relmap. Since bit 0 was off in the
above example, we leave off that bit (bit 30) in dst.
When bit 1 is set in orig (as in the above example), it
means turn on the bit in dst corresponding to whatever
is the second bit that is turned on in relmap. The second
bit in relmap that was turned on in the above example was
bit 31, so we turned on bit 31 in dst.
Similarly, we turned on bits 33, 35, 37 and 39 in dst,
because they were the 4th, 6th, 8th and 10th set bits
set in relmap, and the 4th, 6th, 8th and 10th bits of
orig (i.e. bits 3, 5, 7 and 9) were also set.
When bit 11 is set in orig, it means turn on the bit in
dst corresponding to whatever is the twelth bit that is
turned on in relmap. In the above example, there were
only ten bits turned on in relmap (30..39), so that bit
11 was set in orig had no affect on dst.
Example [2] for bitmap_fold + bitmap_onto:
Let's say relmap has these ten bits set:
40 41 42 43 45 48 53 61 74 95
(for the curious, that's 40 plus the first ten terms of the
Fibonacci sequence.)
Further lets say we use the following code, invoking
bitmap_fold then bitmap_onto, as suggested above to
avoid the possitility of an empty dst result:
unsigned long *tmp; // a temporary bitmap's bits
bitmap_fold(tmp, orig, bitmap_weight(relmap, bits), bits); bitmap_onto(dst, tmp, relmap, bits);
Then this table shows what various values of dst would be, for
various orig's. I list the zero-based positions of each set bit.
The tmp column shows the intermediate result, as computed by
using bitmap_fold to fold the orig bitmap modulo ten
(the weight of relmap).
orig tmp dst
0 0 40
1 1 41
9 9 95
10 0 40 (*)
1 3 5 7 1 3 5 7 41 43 48 61
0 1 2 3 4 0 1 2 3 4 40 41 42 43 45
0 9 18 27 0 9 8 7 40 61 74 95
0 10 20 30 0 40
0 11 22 33 0 1 2 3 40 41 42 43
0 12 24 36 0 2 4 6 40 42 45 53
78 102 211 1 2 8 41 42 74 (*)
(*) For these marked lines, if we hadn't first done bitmap_fold
into tmp, then the dst result would have been empty.
If either of orig or relmap is empty (no set bits), then dst
will be returned empty.
If (as explained above) the only set bits in orig are in positions
m where m >= W, (where W is the weight of relmap) then dst will
once again be returned empty.
All bits in dst not set by the above rule are cleared.
bitmap_fold — fold larger bitmap into smaller, modulo specified size
void bitmap_fold ( | dst, | |
| orig, | ||
| sz, | ||
bits); |
unsigned long * | dst; |
const unsigned long * | orig; |
int | sz; |
int | bits; |
bitmap_find_free_region — find a contiguous aligned mem region
int bitmap_find_free_region ( | bitmap, | |
| bits, | ||
order); |
unsigned long * | bitmap; |
int | bits; |
int | order; |
bitmaparray of unsigned longs corresponding to the bitmap
bitsnumber of bits in the bitmap
orderregion size (log base 2 of number of bits) to find
Find a region of free (zero) bits in a bitmap of bits bits and
allocate them (set them to one). Only consider regions of length
a power (order) of two, aligned to that power of two, which
makes the search algorithm much faster.
Return the bit offset in bitmap of the allocated region, or -errno on failure.
bitmap_release_region — release allocated bitmap region
void bitmap_release_region ( | bitmap, | |
| pos, | ||
order); |
unsigned long * | bitmap; |
int | pos; |
int | order; |
bitmap_allocate_region — allocate bitmap region
int bitmap_allocate_region ( | bitmap, | |
| pos, | ||
order); |
unsigned long * | bitmap; |
int | pos; |
int | order; |
bitmap_copy_le — copy a bitmap, putting the bits into little-endian order.
void bitmap_copy_le ( | dst, | |
| src, | ||
nbits); |
void * | dst; |
const unsigned long * | src; |
int | nbits; |
bitmap_pos_to_ord —
int bitmap_pos_to_ord ( | buf, | |
| pos, | ||
bits); |
const unsigned long * | buf; |
int | pos; |
int | bits; |
bufpointer to a bitmap
pos
a bit position in buf (0 <= pos < bits)
bits
number of valid bit positions in buf
Map the bit at position pos in buf (of length bits) to the
ordinal of which set bit it is. If it is not set or if pos
is not a valid bit position, map to -1.
If for example, just bits 4 through 7 are set in buf, then pos
values 4 through 7 will get mapped to 0 through 3, respectively,
and other pos values will get mapped to 0. When pos value 7
gets mapped to (returns) ord value 3 in this example, that means
that bit 7 is the 3rd (starting with 0th) set bit in buf.
The bit positions 0 through bits are valid positions in buf.
bitmap_ord_to_pos —
int bitmap_ord_to_pos ( | buf, | |
| ord, | ||
bits); |
const unsigned long * | buf; |
int | ord; |
int | bits; |
bufpointer to bitmap
ordordinal bit position (n-th set bit, n >= 0)
bits
number of valid bit positions in buf
Map the ordinal offset of bit ord in buf to its position in buf.
Value of ord should be in range 0 <= ord < weight(buf), else
results are undefined.
If for example, just bits 4 through 7 are set in buf, then ord
values 0 through 3 will get mapped to 4 through 7, respectively,
and all other ord values return undefined values. When ord value 3
gets mapped to (returns) pos value 7 in this example, that means
that the 3rd set bit (starting with 0th) is at position 7 in buf.
The bit positions 0 through bits are valid positions in buf.
get_option — Parse integer from an option string
int get_option ( | str, | |
pint); |
char ** | str; |
int * | pint; |
get_options — Parse a string into a list of integers
char * get_options ( | str, | |
| nints, | ||
ints); |
const char * | str; |
int | nints; |
int * | ints; |
This function parses a string containing a comma-separated list of integers, a hyphen-separated range of _positive_ integers, or a combination of both. The parse halts when the array is full, or when no more numbers can be retrieved from the string.
Return value is the character in the string which caused
the parse to end (typically a null terminator, if str is
completely parseable).
memparse — parse a string with mem suffixes into a number
unsigned long long memparse ( | ptr, | |
retptr); |
const char * | ptr; |
char ** | retptr; |
Parses a string into a number. The number stored at ptr is
potentially suffixed with K (for kilobytes, or 1024 bytes),
M (for megabytes, or 1048576 bytes), or G (for gigabytes, or
1073741824). If the number is suffixed with K, M, or G, then
the return value is the number multiplied by one kilobyte, one
megabyte, or one gigabyte, respectively.
crc7 — update the CRC7 for the data buffer
u8 crc7 ( | crc, | |
| buffer, | ||
len); |
u8 | crc; |
const u8 * | buffer; |
size_t | len; |
crc16 — compute the CRC-16 for the data buffer
u16 crc16 ( | crc, | |
| buffer, | ||
len); |
u16 | crc; |
u8 const * | buffer; |
size_t | len; |
crc_itu_t — Compute the CRC-ITU-T for the data buffer
u16 crc_itu_t ( | crc, | |
| buffer, | ||
len); |
u16 | crc; |
const u8 * | buffer; |
size_t | len; |
crc32_le — Calculate bitwise little-endian Ethernet AUTODIN II CRC32
u32 __pure crc32_le ( | crc, | |
| p, | ||
len); |
u32 | crc; |
unsigned char const * | p; |
size_t | len; |
kcalloc — allocate memory for an array. The memory is set to zero.
void * kcalloc ( | n, | |
| size, | ||
flags); |
size_t | n; |
size_t | size; |
gfp_t | flags; |
The flags argument may be one of:
GFP_USER - Allocate memory on behalf of user. May sleep.
GFP_KERNEL - Allocate normal kernel ram. May sleep.
GFP_ATOMIC - Allocation will not sleep. May use emergency pools.
For example, use this inside interrupt handlers.
GFP_HIGHUSER - Allocate pages from high memory.
GFP_NOIO - Do not do any I/O at all while trying to get memory.
GFP_NOFS - Do not make any fs calls while trying to get memory.
GFP_NOWAIT - Allocation will not sleep.
GFP_THISNODE - Allocate node-local memory only.
GFP_DMA - Allocation suitable for DMA.
Should only be used for kmalloc caches. Otherwise, use a
slab created with SLAB_DMA.
Also it is possible to set different flags by OR'ing
in one or more of the following additional flags:
__GFP_COLD - Request cache-cold pages instead of
trying to return cache-warm pages.
__GFP_HIGH - This allocation has high priority and may use emergency pools.
__GFP_NOFAIL - Indicate that this allocation is in no way allowed to fail
(think twice before using).
__GFP_NORETRY - If memory is not immediately available,
then give up at once.
__GFP_NOWARN - If allocation fails, don't issue any warnings.
__GFP_REPEAT - If allocation fails initially, try once more before failing.
There are other flags available as well, but these are not intended for general use, and so are not documented here. For a full list of potential flags, always refer to linux/gfp.h.
kmalloc_node — allocate memory from a specific node
void * kmalloc_node ( | size, | |
| flags, | ||
node); |
size_t | size; |
gfp_t | flags; |
int | node; |
kzalloc — allocate memory. The memory is set to zero.
void * kzalloc ( | size, | |
flags); |
size_t | size; |
gfp_t | flags; |
kzalloc_node — allocate zeroed memory from a particular memory node.
void * kzalloc_node ( | size, | |
| flags, | ||
node); |
size_t | size; |
gfp_t | flags; |
int | node; |
kmem_cache_create — Create a cache.
struct kmem_cache * kmem_cache_create ( | name, | |
| size, | ||
| align, | ||
| flags, | ||
ctor); |
const char * | name; |
size_t | size; |
size_t | align; |
unsigned long | flags; |
void (* | ctor |
nameA string which is used in /proc/slabinfo to identify this cache.
sizeThe size of objects to be created in this cache.
alignThe required alignment for the objects.
flagsSLAB flags
ctorA constructor for the objects.
Returns a ptr to the cache on success, NULL on failure.
Cannot be called within a int, but can be interrupted.
The ctor is run when new pages are allocated by the cache.
name must be valid until the cache is destroyed. This implies that
the module calling this has to destroy the cache before getting unloaded.
Note that kmem_cache_name is not guaranteed to return the same pointer,
therefore applications must manage it themselves.
The flags are
SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
to catch references to uninitialised memory.
SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
for buffer overruns.
SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
cacheline. This can be beneficial if you're counting cycles as closely
as davem.
kmem_cache_shrink — Shrink a cache.
int kmem_cache_shrink ( | cachep); |
struct kmem_cache * | cachep; |
kmem_cache_destroy — delete a cache
void kmem_cache_destroy ( | cachep); |
struct kmem_cache * | cachep; |
Remove a struct kmem_cache object from the slab cache.
It is expected this function will be called by a module when it is unloaded. This will remove the cache completely, and avoid a duplicate cache being allocated each time a module is loaded and unloaded, if the module doesn't have persistent in-kernel storage across loads and unloads.
The cache must be empty before calling this function.
The caller must guarantee that noone will allocate memory from the cache
during the kmem_cache_destroy.
kmem_cache_alloc — Allocate an object
void * kmem_cache_alloc ( | cachep, | |
flags); |
struct kmem_cache * | cachep; |
gfp_t | flags; |
kmem_cache_free — Deallocate an object
void kmem_cache_free ( | cachep, | |
objp); |
struct kmem_cache * | cachep; |
void * | objp; |
ksize — get the actual amount of memory allocated for a given object
size_t ksize ( | objp); |
const void * | objp; |
kmalloc may internally round up allocations and return more memory
than requested. ksize can be used to determine the actual amount of
memory allocated. The caller may use this additional memory, even though
a smaller amount of memory was initially specified with the kmalloc call.
The caller must guarantee that objp points to a valid object previously
allocated with either kmalloc or kmem_cache_alloc. The object
must not be freed during the duration of the call.
__copy_to_user_inatomic — Copy a block of data into user space, with less checking.
unsigned long __must_check __copy_to_user_inatomic ( | to, | |
| from, | ||
n); |
void __user * | to; |
const void * | from; |
unsigned long | n; |
toDestination address, in user space.
fromSource address, in kernel space.
nNumber of bytes to copy.
Copy data from kernel space to user space. Caller must check
the specified block with access_ok before calling this function.
The caller should also make sure he pins the user space address
so that we don't result in page fault and sleep.
Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault we return the initial request size (1, 2 or 4), as copy_*_user should do. If a store crosses a page boundary and gets a fault, the x86 will not write anything, so this is accurate.
__copy_to_user — Copy a block of data into user space, with less checking.
unsigned long __must_check __copy_to_user ( | to, | |
| from, | ||
n); |
void __user * | to; |
const void * | from; |
unsigned long | n; |
__copy_from_user — Copy a block of data from user space, with less checking.
unsigned long __copy_from_user ( | to, | |
| from, | ||
n); |
void * | to; |
const void __user * | from; |
unsigned long | n; |
toDestination address, in kernel space.
fromSource address, in user space.
nNumber of bytes to copy.
Copy data from user space to kernel space. Caller must check
the specified block with access_ok before calling this function.
Returns number of bytes that could not be copied. On success, this will be zero.
If some data could not be copied, this function will pad the copied data to the requested size using zero bytes.
An alternate version - __copy_from_user_inatomic - may be called from
atomic context and will fail rather than sleep. In this case the
uncopied bytes will *NOT* be padded with zeros. See fs/filemap.h
for explanation of why this is needed.
__strncpy_from_user — Copy a NUL terminated string from userspace, with less checking.
long __strncpy_from_user ( | dst, | |
| src, | ||
count); |
char * | dst; |
const char __user * | src; |
long | count; |
dst
Destination address, in kernel space. This buffer must be at
least count bytes long.
srcSource address, in user space.
countMaximum number of bytes to copy, including the trailing NUL.
Copies a NUL-terminated string from userspace to kernel space.
Caller must check the specified block with access_ok before calling
this function.
On success, returns the length of the string (not including the trailing NUL).
If access to userspace fails, returns -EFAULT (some data may have been copied).
If count is smaller than the length of the string, copies count bytes
and returns count.
strncpy_from_user — Copy a NUL terminated string from userspace.
long strncpy_from_user ( | dst, | |
| src, | ||
count); |
char * | dst; |
const char __user * | src; |
long | count; |
dst
Destination address, in kernel space. This buffer must be at
least count bytes long.
srcSource address, in user space.
countMaximum number of bytes to copy, including the trailing NUL.
Copies a NUL-terminated string from userspace to kernel space.
On success, returns the length of the string (not including the trailing NUL).
If access to userspace fails, returns -EFAULT (some data may have been copied).
If count is smaller than the length of the string, copies count bytes
and returns count.
clear_user — Zero a block of memory in user space.
unsigned long clear_user ( | to, | |
n); |
void __user * | to; |
unsigned long | n; |
__clear_user — Zero a block of memory in user space, with less checking.
unsigned long __clear_user ( | to, | |
n); |
void __user * | to; |
unsigned long | n; |
strnlen_user — Get the size of a string in user space.
long strnlen_user ( | s, | |
n); |
const char __user * | s; |
long | n; |
copy_to_user — Copy a block of data into user space.
unsigned long copy_to_user ( | to, | |
| from, | ||
n); |
void __user * | to; |
const void * | from; |
unsigned long | n; |
read_cache_pages — populate an address space with some pages & start reads against them
int read_cache_pages ( | mapping, | |
| pages, | ||
| filler, | ||
data); |
struct address_space * | mapping; |
struct list_head * | pages; |
int (* | filler |
void * | data; |
page_cache_sync_readahead — generic file readahead
void page_cache_sync_readahead ( | mapping, | |
| ra, | ||
| filp, | ||
| offset, | ||
req_size); |
struct address_space * | mapping; |
struct file_ra_state * | ra; |
struct file * | filp; |
pgoff_t | offset; |
unsigned long | req_size; |
mappingaddress_space which holds the pagecache and I/O vectors
rafile_ra_state which holds the readahead state
filp
passed on to ->readpage and ->readpages
offset
start offset into mapping, in pagecache page-sized units
req_sizehint: total size of the read which the caller is performing in pagecache pages
page_cache_async_readahead — file readahead for marked pages
void page_cache_async_readahead ( | mapping, | |
| ra, | ||
| filp, | ||
| page, | ||
| offset, | ||
req_size); |
struct address_space * | mapping; |
struct file_ra_state * | ra; |
struct file * | filp; |
struct page * | page; |
pgoff_t | offset; |
unsigned long | req_size; |
mappingaddress_space which holds the pagecache and I/O vectors
rafile_ra_state which holds the readahead state
filp
passed on to ->readpage and ->readpages
page
the page at offset which has the PG_readahead flag set
offset
start offset into mapping, in pagecache page-sized units
req_sizehint: total size of the read which the caller is performing in pagecache pages
filemap_flush — mostly a non-blocking flush
int filemap_flush ( | mapping); |
struct address_space * | mapping; |
filemap_fdatawait_range — wait for all under-writeback pages to complete in a given range
int filemap_fdatawait_range ( | mapping, | |
| start, | ||
end); |
struct address_space * | mapping; |
loff_t | start; |
loff_t | end; |
filemap_fdatawait — wait for all under-writeback pages to complete
int filemap_fdatawait ( | mapping); |
struct address_space * | mapping; |
filemap_write_and_wait_range — write out & wait on a file range
int filemap_write_and_wait_range ( | mapping, | |
| lstart, | ||
lend); |
struct address_space * | mapping; |
loff_t | lstart; |
loff_t | lend; |
add_to_page_cache_locked — add a locked page to the pagecache
int add_to_page_cache_locked ( | page, | |
| mapping, | ||
| offset, | ||
gfp_mask); |
struct page * | page; |
struct address_space * | mapping; |
pgoff_t | offset; |
gfp_t | gfp_mask; |
add_page_wait_queue — Add an arbitrary waiter to a page's wait queue
void add_page_wait_queue ( | page, | |
waiter); |
struct page * | page; |
wait_queue_t * | waiter; |
unlock_page — unlock a locked page
void unlock_page ( | page); |
struct page * | page; |
Unlocks the page and wakes up sleepers in ___wait_on_page_locked.
Also wakes sleepers in wait_on_page_writeback because the wakeup
mechananism between PageLocked pages and PageWriteback pages is shared.
But that's OK - sleepers in wait_on_page_writeback just go back to sleep.
The mb is necessary to enforce ordering between the clear_bit and the read
of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked).
end_page_writeback — end writeback against a page
void end_page_writeback ( | page); |
struct page * | page; |
__lock_page — get a lock on the page, assuming we need to sleep to get it
void __lock_page ( | page); |
struct page * | page; |
find_get_page — find and get a page reference
struct page * find_get_page ( | mapping, | |
offset); |
struct address_space * | mapping; |
pgoff_t | offset; |
find_lock_page — locate, pin and lock a pagecache page
struct page * find_lock_page ( | mapping, | |
offset); |
struct address_space * | mapping; |
pgoff_t | offset; |
find_or_create_page — locate or add a pagecache page
struct page * find_or_create_page ( | mapping, | |
| index, | ||
gfp_mask); |
struct address_space * | mapping; |
pgoff_t | index; |
gfp_t | gfp_mask; |
mappingthe page's address_space
indexthe page's index into the mapping
gfp_maskpage allocation mode
Locates a page in the pagecache. If the page is not present, a new page
is allocated using gfp_mask and is added to the pagecache and to the VM's
LRU list. The returned page is locked and has its reference count
incremented.
find_or_create_page may sleep, even if gfp_flags specifies an atomic
allocation!
find_or_create_page returns the desired page's address, or zero on
memory exhaustion.
find_get_pages_contig — gang contiguous pagecache lookup
unsigned find_get_pages_contig ( | mapping, | |
| index, | ||
| nr_pages, | ||
pages); |
struct address_space * | mapping; |
pgoff_t | index; |
unsigned int | nr_pages; |
struct page ** | pages; |
find_get_pages_tag —
find and return pages that match tag
unsigned find_get_pages_tag ( | mapping, | |
| index, | ||
| tag, | ||
| nr_pages, | ||
pages); |
struct address_space * | mapping; |
pgoff_t * | index; |
int | tag; |
unsigned int | nr_pages; |
struct page ** | pages; |
grab_cache_page_nowait — returns locked page at given index in given cache
struct page * grab_cache_page_nowait ( | mapping, | |
index); |
struct address_space * | mapping; |
pgoff_t | index; |
Same as grab_cache_page, but do not wait if the page is unavailable.
This is intended for speculative data generators, where the data can
be regenerated if the page couldn't be grabbed. This routine should
be safe to call while holding the lock for another page.
Clear __GFP_FS when allocating the page to avoid recursion into the fs and deadlock against the caller's locked page.
generic_file_aio_read — generic filesystem read routine
ssize_t generic_file_aio_read ( | iocb, | |
| iov, | ||
| nr_segs, | ||
pos); |
struct kiocb * | iocb; |
const struct iovec * | iov; |
unsigned long | nr_segs; |
loff_t | pos; |
filemap_fault — read in file data for page fault handling
int filemap_fault ( | vma, | |
vmf); |
struct vm_area_struct * | vma; |
struct vm_fault * | vmf; |
filemap_fault is invoked via the vma operations vector for a
mapped memory region to read in file data during a page fault.
The goto's are kind of ugly, but this streamlines the normal case of having it in the page cache, and handles the special cases reasonably without having a lot of duplicated code.
read_cache_page_async — read into page cache, fill it if needed
struct page * read_cache_page_async ( | mapping, | |
| index, | ||
| filler, | ||
data); |
struct address_space * | mapping; |
pgoff_t | index; |
int (* | filler |
void * | data; |
mappingthe page's address_space
indexthe page index
fillerfunction to perform the read
datadestination for read data
Same as read_cache_page, but don't wait for page to become unlocked after submitting it to the filler.
Read into the page cache. If a page already exists, and PageUptodate is
not set, try to fill the page but don't wait for it to become unlocked.
If the page does not get brought uptodate, return -EIO.
read_cache_page — read into page cache, fill it if needed
struct page * read_cache_page ( | mapping, | |
| index, | ||
| filler, | ||
data); |
struct address_space * | mapping; |
pgoff_t | index; |
int (* | filler |
void * | data; |
__generic_file_aio_write — write data to a file
ssize_t __generic_file_aio_write ( | iocb, | |
| iov, | ||
| nr_segs, | ||
ppos); |
struct kiocb * | iocb; |
const struct iovec * | iov; |
unsigned long | nr_segs; |
loff_t * | ppos; |
iocbIO state structure (file, offset, etc.)
iovvector with data to write
nr_segsnumber of segments in the vector
pposposition where to write
This function does all the work needed for actually writing data to a file. It does all basic checks, removes SUID from the file, updates modification times and calls proper subroutines depending on whether we do direct IO or a standard buffered write.
It expects i_mutex to be grabbed unless we work on a block device or similar object which does not need locking at all.
This function does *not* take care of syncing data in case of O_SYNC write. A caller has to handle it. This is mainly due to the fact that we want to avoid syncing under i_mutex.
generic_file_aio_write — write data to a file
ssize_t generic_file_aio_write ( | iocb, | |
| iov, | ||
| nr_segs, | ||
pos); |
struct kiocb * | iocb; |
const struct iovec * | iov; |
unsigned long | nr_segs; |
loff_t | pos; |
try_to_release_page — release old fs-specific metadata on a page
int try_to_release_page ( | page, | |
gfp_mask); |
struct page * | page; |
gfp_t | gfp_mask; |
pagethe page which the kernel is trying to free
gfp_maskmemory allocation flags (and I/O mode)
The address_space is to try to release any data against the page (presumably at page->private). If the release was successful, return `1'. Otherwise return zero.
This may also be called if PG_fscache is set on a page, indicating that the page is known to the local caching routines.
The gfp_mask argument specifies whether I/O may be performed to release
this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS).
zap_vma_ptes — remove ptes mapping the vma
int zap_vma_ptes ( | vma, | |
| address, | ||
size); |
struct vm_area_struct * | vma; |
unsigned long | address; |
unsigned long | size; |
get_user_pages — pin user pages in memory
int get_user_pages ( | tsk, | |
| mm, | ||
| start, | ||
| nr_pages, | ||
| write, | ||
| force, | ||
| pages, | ||
vmas); |
struct task_struct * | tsk; |
struct mm_struct * | mm; |
unsigned long | start; |
int | nr_pages; |
int | write; |
int | force; |
struct page ** | pages; |
struct vm_area_struct ** | vmas; |
tsktask_struct of target task
mmmm_struct of target mm
startstarting user address
nr_pagesnumber of pages from start to pin
writewhether pages will be written to by the caller
forcewhether to force write access even if user mapping is readonly. This will result in the page being COWed even in MAP_SHARED mappings. You do not want this.
pagesarray that receives pointers to the pages pinned. Should be at least nr_pages long. Or NULL, if caller only intends to ensure the pages are faulted in.
vmasarray of pointers to vmas corresponding to each page. Or NULL if the caller does not require them.
Returns number of pages pinned. This may be fewer than the number
requested. If nr_pages is 0 or negative, returns 0. If no pages
were pinned, returns -errno. Each page returned must be released
with a put_page call when it is finished with. vmas will only
remain valid while mmap_sem is held.
Must be called with mmap_sem held for read or write.
get_user_pages walks a process's page tables and takes a reference to each struct page that each user address corresponds to at a given instant. That is, it takes the page that would be accessed if a user thread accesses the given user virtual address at that instant.
This does not guarantee that the page exists in the user mappings when get_user_pages returns, and there may even be a completely different page there in some cases (eg. if mmapped pagecache has been invalidated and subsequently re faulted). However it does guarantee that the page won't be freed completely. And mostly callers simply care that the page contains data that was valid *at some point in time*. Typically, an IO or similar operation cannot guarantee anything stronger anyway because locks can't be held over the syscall boundary.
If write=0, the page must not be written to. If the page is written to, set_page_dirty (or set_page_dirty_lock, as appropriate) must be called after the page is finished with, and before put_page is called.
get_user_pages is typically used for fewer-copy IO operations, to get a handle on the memory by some means other than accesses via the user virtual addresses. The pages may be submitted for DMA to devices or accessed via their kernel linear mapping (via the kmap APIs). Care should be taken to use the correct cache flushing APIs.
See also get_user_pages_fast, for performance critical applications.
vm_insert_page — insert single page into user vma
int vm_insert_page ( | vma, | |
| addr, | ||
page); |
struct vm_area_struct * | vma; |
unsigned long | addr; |
struct page * | page; |
This allows drivers to insert individual pages they've allocated into a user vma.
The page has to be a nice clean _individual_ kernel allocation.
If you allocate a compound page, you need to have marked it as
such (__GFP_COMP), or manually just split the page up yourself
(see split_page).
NOTE! Traditionally this was done with “remap_pfn_range” which
took an arbitrary page protection parameter. This doesn't allow
that. Your vma protection will have to be set up correctly, which
means that if you want a shared writable mapping, you'd better
ask for a shared writable mapping!
The page does not need to be reserved.
vm_insert_pfn — insert single pfn into user vma
int vm_insert_pfn ( | vma, | |
| addr, | ||
pfn); |
struct vm_area_struct * | vma; |
unsigned long | addr; |
unsigned long | pfn; |
Similar to vm_inert_page, this allows drivers to insert individual pages they've allocated into a user vma. Same comments apply.
This function should only be called from a vm_ops->fault handler, and in that case the handler should return NULL.
vma cannot be a COW mapping.
As this is called only for pages that do not currently exist, we do not need to flush old virtual caches or the TLB.
remap_pfn_range — remap kernel memory to userspace
int remap_pfn_range ( | vma, | |
| addr, | ||
| pfn, | ||
| size, | ||
prot); |
struct vm_area_struct * | vma; |
unsigned long | addr; |
unsigned long | pfn; |
unsigned long | size; |
pgprot_t | prot; |
unmap_mapping_range — unmap the portion of all mmaps in the specified address_space corresponding to the specified page range in the underlying file.
void unmap_mapping_range ( | mapping, | |
| holebegin, | ||
| holelen, | ||
even_cows); |
struct address_space * | mapping; |
loff_t const | holebegin; |
loff_t const | holelen; |
int | even_cows; |
mappingthe address space containing mmaps to be unmapped.
holebegin
byte in first page to unmap, relative to the start of
the underlying file. This will be rounded down to a PAGE_SIZE
boundary. Note that this is different from truncate_pagecache, which
must keep the partial page. In contrast, we must get rid of
partial pages.
holelensize of prospective hole in bytes. This will be rounded up to a PAGE_SIZE boundary. A holelen of zero truncates to the end of the file.
even_cows1 when truncating a file, unmap even private COWed pages; but 0 when invalidating pagecache, don't throw away private data.
follow_pfn — look up PFN at a user virtual address
int follow_pfn ( | vma, | |
| address, | ||
pfn); |
struct vm_area_struct * | vma; |
unsigned long | address; |
unsigned long * | pfn; |
vm_unmap_aliases — unmap outstanding lazy aliases in the vmap layer
void vm_unmap_aliases ( | void); |
| void; |
The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily to amortize TLB flushing overheads. What this means is that any page you have now, may, in a former life, have been mapped into kernel virtual address by the vmap layer and so there might be some CPUs with TLB entries still referencing that page (additional to the regular 1:1 kernel mapping).
vm_unmap_aliases flushes all such lazy mappings. After it returns, we can be sure that none of the pages we have control over will have any aliases from the vmap layer.
vm_unmap_ram — unmap linear kernel address space set up by vm_map_ram
void vm_unmap_ram ( | mem, | |
count); |
const void * | mem; |
unsigned int | count; |
vm_map_ram — map pages linearly into kernel virtual address (vmalloc space)
void * vm_map_ram ( | pages, | |
| count, | ||
| node, | ||
prot); |
struct page ** | pages; |
unsigned int | count; |
int | node; |
pgprot_t | prot; |
vunmap —
release virtual mapping obtained by vmap
void vunmap ( | addr); |
const void * | addr; |
vmap — map an array of pages into virtually contiguous space
void * vmap ( | pages, | |
| count, | ||
| flags, | ||
prot); |
struct page ** | pages; |
unsigned int | count; |
unsigned long | flags; |
pgprot_t | prot; |
vmalloc — allocate virtually contiguous memory
void * vmalloc ( | size); |
unsigned long | size; |
vmalloc_user — allocate zeroed virtually contiguous memory for userspace
void * vmalloc_user ( | size); |
unsigned long | size; |
vmalloc_node — allocate memory on a specific node
void * vmalloc_node ( | size, | |
node); |
unsigned long | size; |
int | node; |
vmalloc_32 — allocate virtually contiguous memory (32bit addressable)
void * vmalloc_32 ( | size); |
unsigned long | size; |
vmalloc_32_user — allocate zeroed virtually contiguous 32bit memory
void * vmalloc_32_user ( | size); |
unsigned long | size; |
remap_vmalloc_range — map vmalloc pages to userspace
int remap_vmalloc_range ( | vma, | |
| addr, | ||
pgoff); |
struct vm_area_struct * | vma; |
void * | addr; |
unsigned long | pgoff; |
alloc_vm_area — allocate a range of kernel address space
struct vm_struct * alloc_vm_area ( | size); |
size_t | size; |
find_next_best_node — find the next node that should appear in a given node's fallback list
int find_next_best_node ( | node, | |
used_node_mask); |
int | node; |
nodemask_t * | used_node_mask; |
nodenode whose fallback list we're appending
used_node_masknodemask_t of already used nodes
We use a number of factors to determine which is the next node that should
appear on a given node's fallback list. The node should not have appeared
already in node's fallback list, and it should be the next closest node
according to the distance array (which contains arbitrary distance values
from each node to each node in the system), and should also prefer nodes
with no CPUs, since presumably they'll have very little allocation pressure
on them otherwise.
It returns -1 if no node is found.
free_bootmem_with_active_regions — Call free_bootmem_node for each active range
void free_bootmem_with_active_regions ( | nid, | |
max_low_pfn); |
int | nid; |
unsigned long | max_low_pfn; |
sparse_memory_present_with_active_regions — Call memory_present for each active range
void sparse_memory_present_with_active_regions ( | nid); |
int | nid; |
get_pfn_range_for_nid — Return the start and end page frames for a node
void __meminit get_pfn_range_for_nid ( | nid, | |
| start_pfn, | ||
end_pfn); |
unsigned int | nid; |
unsigned long * | start_pfn; |
unsigned long * | end_pfn; |
absent_pages_in_range — Return number of page frames in holes within a range
unsigned long absent_pages_in_range ( | start_pfn, | |
end_pfn); |
unsigned long | start_pfn; |
unsigned long | end_pfn; |
add_active_range — Register a range of PFNs backed by physical memory
void add_active_range ( | nid, | |
| start_pfn, | ||
end_pfn); |
unsigned int | nid; |
unsigned long | start_pfn; |
unsigned long | end_pfn; |
nidThe node ID the range resides on
start_pfnThe start PFN of the available physical memory
end_pfnThe end PFN of the available physical memory
These ranges are stored in an early_node_map[] and later used by
free_area_init_nodes to calculate zone sizes and holes. If the
range spans a memory hole, it is up to the architecture to ensure
the memory is not freed by the bootmem allocator. If possible
the range being registered will be merged with existing ranges.
remove_active_range — Shrink an existing registered range of PFNs
void remove_active_range ( | nid, | |
| start_pfn, | ||
end_pfn); |
unsigned int | nid; |
unsigned long | start_pfn; |
unsigned long | end_pfn; |
remove_all_active_ranges — Remove all currently registered regions
void remove_all_active_ranges ( | void); |
| void; |
find_min_pfn_with_active_regions — Find the minimum PFN registered
unsigned long find_min_pfn_with_active_regions ( | void); |
| void; |
free_area_init_nodes — Initialise all pg_data_t and zone data
void free_area_init_nodes ( | max_zone_pfn); |
unsigned long * | max_zone_pfn; |
This will call free_area_init_node for each active node in the system.
Using the page ranges provided by add_active_range, the size of each
zone in each node and their holes is calculated. If the maximum PFN
between two adjacent zones match, it is assumed that the zone is empty.
For example, if arch_max_dma_pfn == arch_max_dma32_pfn, it is assumed
that arch_max_dma32_pfn has no pages. It is also assumed that a zone
starts where the previous one ended. For example, ZONE_DMA32 starts
at arch_max_dma_pfn.
set_dma_reserve — set the specified number of pages reserved in the first zone
void set_dma_reserve ( | new_dma_reserve); |
unsigned long | new_dma_reserve; |
The per-cpu batchsize and zone watermarks are determined by present_pages. In the DMA zone, a significant percentage may be consumed by kernel image and other unfreeable allocations which can skew the watermarks badly. This function may optionally be used to account for unfreeable pages in the first zone (e.g., ZONE_DMA). The effect will be lower watermarks and smaller per-cpu batchsize.
setup_per_zone_wmarks — called when min_free_kbytes changes or when memory is hot-{added|removed}
void setup_per_zone_wmarks ( | void); |
| void; |
get_pageblock_flags_group — Return the requested group of flags for the pageblock_nr_pages block of pages
unsigned long get_pageblock_flags_group ( | page, | |
| start_bitidx, | ||
end_bitidx); |
struct page * | page; |
int | start_bitidx; |
int | end_bitidx; |
set_pageblock_flags_group — Set the requested group of flags for a pageblock_nr_pages block of pages
void set_pageblock_flags_group ( | page, | |
| flags, | ||
| start_bitidx, | ||
end_bitidx); |
struct page * | page; |
unsigned long | flags; |
int | start_bitidx; |
int | end_bitidx; |
mempool_create — create a memory pool
mempool_t * mempool_create ( | min_nr, | |
| alloc_fn, | ||
| free_fn, | ||
pool_data); |
int | min_nr; |
mempool_alloc_t * | alloc_fn; |
mempool_free_t * | free_fn; |
void * | pool_data; |
min_nrthe minimum number of elements guaranteed to be allocated for this pool.
alloc_fnuser-defined element-allocation function.
free_fnuser-defined element-freeing function.
pool_dataoptional private data available to the user-defined functions.
this function creates and allocates a guaranteed size, preallocated
memory pool. The pool can be used from the mempool_alloc and mempool_free
functions. This function might sleep. Both the alloc_fn and the free_fn
functions might sleep - as long as the mempool_alloc function is not called
from IRQ contexts.
mempool_resize — resize an existing memory pool
int mempool_resize ( | pool, | |
| new_min_nr, | ||
gfp_mask); |
mempool_t * | pool; |
int | new_min_nr; |
gfp_t | gfp_mask; |
pool
pointer to the memory pool which was allocated via
mempool_create.
new_min_nrthe new minimum number of elements guaranteed to be allocated for this pool.
gfp_maskthe usual allocation bitmask.
This function shrinks/grows the pool. In the case of growing,
it cannot be guaranteed that the pool will be grown to the new
size immediately, but new mempool_free calls will refill it.
Note, the caller must guarantee that no mempool_destroy is called
while this function is running. mempool_alloc & mempool_free
might be called (eg. from IRQ contexts) while this function executes.
mempool_destroy — deallocate a memory pool
void mempool_destroy ( | pool); |
mempool_t * | pool; |
mempool_alloc — allocate an element from a specific memory pool
void * mempool_alloc ( | pool, | |
gfp_mask); |
mempool_t * | pool; |
gfp_t | gfp_mask; |
mempool_free — return an element to the pool.
void mempool_free ( | element, | |
pool); |
void * | element; |
mempool_t * | pool; |
dma_pool_create — Creates a pool of consistent memory blocks, for dma.
struct dma_pool * dma_pool_create ( | name, | |
| dev, | ||
| size, | ||
| align, | ||
boundary); |
const char * | name; |
struct device * | dev; |
size_t | size; |
size_t | align; |
size_t | boundary; |
namename of pool, for diagnostics
devdevice that will be doing the DMA
sizesize of the blocks in this pool.
alignalignment requirement for blocks; must be a power of two
boundaryreturned blocks won't cross this power of two boundary
Returns a dma allocation pool with the requested characteristics, or
null if one can't be created. Given one of these pools, dma_pool_alloc
may be used to allocate memory. Such memory will all have “consistent”
DMA mappings, accessible by the device and its driver without using
cache flushing primitives. The actual size of blocks allocated may be
larger than requested because of alignment.
If boundary is nonzero, objects returned from dma_pool_alloc won't
cross that size boundary. This is useful for devices which have
addressing restrictions on individual DMA transfers, such as not crossing
boundaries of 4KBytes.
dma_pool_destroy — destroys a pool of dma memory blocks.
void dma_pool_destroy ( | pool); |
struct dma_pool * | pool; |
dma_pool_alloc — get a block of consistent memory
void * dma_pool_alloc ( | pool, | |
| mem_flags, | ||
handle); |
struct dma_pool * | pool; |
gfp_t | mem_flags; |
dma_addr_t * | handle; |
dma_pool_free — put block back into dma pool
void dma_pool_free ( | pool, | |
| vaddr, | ||
dma); |
struct dma_pool * | pool; |
void * | vaddr; |
dma_addr_t | dma; |
dmam_pool_create —
Managed dma_pool_create
struct dma_pool * dmam_pool_create ( | name, | |
| dev, | ||
| size, | ||
| align, | ||
allocation); |
const char * | name; |
struct device * | dev; |
size_t | size; |
size_t | align; |
size_t | allocation; |
dmam_pool_destroy —
Managed dma_pool_destroy
void dmam_pool_destroy ( | pool); |
struct dma_pool * | pool; |
balance_dirty_pages_ratelimited_nr — balance dirty memory state
void balance_dirty_pages_ratelimited_nr ( | mapping, | |
nr_pages_dirtied); |
struct address_space * | mapping; |
unsigned long | nr_pages_dirtied; |
mappingaddress_space which was dirtied
nr_pages_dirtiednumber of pages which the caller has just dirtied
Processes which are dirtying memory should call in here once for each page which was newly dirtied. The function will periodically check the system's dirty state and will initiate writeback if needed.
On really big machines, get_writeback_state is expensive, so try to avoid calling it too often (ratelimiting). But once we're over the dirty memory limit we decrease the ratelimiting by a lot, to prevent individual processes from overshooting the limit by (ratelimit_pages) each.
write_cache_pages — walk the list of dirty pages of the given address space and write all of them.
int write_cache_pages ( | mapping, | |
| wbc, | ||
| writepage, | ||
data); |
struct address_space * | mapping; |
struct writeback_control * | wbc; |
writepage_t | writepage; |
void * | data; |
mappingaddress space structure to write
wbc
subtract the number of written pages from *wbc->nr_to_write
writepagefunction called for each page
datadata passed to writepage function
If a page is already under I/O, write_cache_pages skips it, even
if it's dirty. This is desirable behaviour for memory-cleaning writeback,
but it is INCORRECT for data-integrity system calls such as fsync. fsync
and msync need to guarantee that all the data which was dirty at the time
the call was made get new I/O started against them. If wbc->sync_mode is
WB_SYNC_ALL then we were called for data integrity and we must wait for
existing IO to complete.
generic_writepages —
walk the list of dirty pages of the given address space and writepage all of them.
int generic_writepages ( | mapping, | |
wbc); |
struct address_space * | mapping; |
struct writeback_control * | wbc; |
write_one_page — write out a single page and optionally wait on I/O
int write_one_page ( | page, | |
wait); |
struct page * | page; |
int | wait; |
truncate_inode_pages_range — truncate range of pages specified by start & end byte offsets
void truncate_inode_pages_range ( | mapping, | |
| lstart, | ||
lend); |
struct address_space * | mapping; |
loff_t | lstart; |
loff_t | lend; |
mappingmapping to truncate
lstartoffset from which to truncate
lendoffset to which to truncate
Truncate the page cache, removing the pages that are between specified offsets (and zeroing out partial page (if lstart is not page aligned)).
Truncate takes two passes - the first pass is nonblocking. It will not block on page locks and it will not block on writeback. The second pass will wait. This is to prevent as much IO as possible in the affected region. The first pass will remove most pages, so the search cost of the second pass is low.
When looking at page->index outside the page lock we need to be careful to copy it into a local to avoid races (it could change at any time).
We pass down the cache-hot hint to the page freeing code. Even if the mapping is large, it is probably the case that the final pages are the most recently touched, and freeing happens in ascending file offset order.
truncate_inode_pages — truncate *all* the pages from an offset
void truncate_inode_pages ( | mapping, | |
lstart); |
struct address_space * | mapping; |
loff_t | lstart; |
invalidate_mapping_pages — Invalidate all the unlocked pages of one inode
unsigned long invalidate_mapping_pages ( | mapping, | |
| start, | ||
end); |
struct address_space * | mapping; |
pgoff_t | start; |
pgoff_t | end; |
invalidate_inode_pages2_range — remove range of pages from an address_space
int invalidate_inode_pages2_range ( | mapping, | |
| start, | ||
end); |
struct address_space * | mapping; |
pgoff_t | start; |
pgoff_t | end; |
invalidate_inode_pages2 — remove all pages from an address_space
int invalidate_inode_pages2 ( | mapping); |
struct address_space * | mapping; |
truncate_pagecache — unmap and remove pagecache that has been truncated
void truncate_pagecache ( | inode, | |
| old, | ||
new); |
struct inode * | inode; |
loff_t | old; |
loff_t | new; |
inode's new i_size must already be written before truncate_pagecache is called.
This function should typically be called before the filesystem releases resources associated with the freed range (eg. deallocates blocks). This way, pagecache will always stay logically coherent with on-disk format, and the filesystem would not have to deal with situations such as writepage being called for a page that has already had its underlying blocks deallocated.
Table of Contents
ipc_init — initialise IPC subsystem
int ipc_init ( | void); |
| void; |
ipc_init_ids — initialise IPC identifiers
void ipc_init_ids ( | ids); |
struct ipc_ids * | ids; |
ipc_init_proc_interface — Create a proc interface for sysipc types using a seq_file interface.
void ipc_init_proc_interface ( | path, | |
| header, | ||
| ids, | ||
show); |
const char * | path; |
const char * | header; |
int | ids; |
int (* | show |
ipc_findkey — find a key in an ipc identifier set
struct kern_ipc_perm * ipc_findkey ( | ids, | |
key); |
struct ipc_ids * | ids; |
key_t | key; |
ipc_addid — add an IPC identifier
int ipc_addid ( | ids, | |
| new, | ||
size); |
struct ipc_ids * | ids; |
struct kern_ipc_perm * | new; |
int | size; |
Add an entry 'new' to the IPC ids idr. The permissions object is initialised and the first free entry is set up and the id assigned is returned. The 'new' entry is returned in a locked state on success. On failure the entry is not locked and a negative err-code is returned.
Called with ipc_ids.rw_mutex held as a writer.
ipcget_new — create a new ipc object
int ipcget_new ( | ns, | |
| ids, | ||
| ops, | ||
params); |
struct ipc_namespace * | ns; |
struct ipc_ids * | ids; |
struct ipc_ops * | ops; |
struct ipc_params * | params; |
ipc_check_perms — check security and permissions for an IPC
int ipc_check_perms ( | ipcp, | |
| ops, | ||
params); |
struct kern_ipc_perm * | ipcp; |
struct ipc_ops * | ops; |
struct ipc_params * | params; |
ipcget_public — get an ipc object or create a new one
int ipcget_public ( | ns, | |
| ids, | ||
| ops, | ||
params); |
struct ipc_namespace * | ns; |
struct ipc_ids * | ids; |
struct ipc_ops * | ops; |
struct ipc_params * | params; |
ipc_rmid — remove an IPC identifier
void ipc_rmid ( | ids, | |
ipcp); |
struct ipc_ids * | ids; |
struct kern_ipc_perm * | ipcp; |
ipc_schedule_free — free ipc + rcu space
void ipc_schedule_free ( | head); |
struct rcu_head * | head; |
ipc_immediate_free — free ipc + rcu space
void ipc_immediate_free ( | head); |
struct rcu_head * | head; |
ipcperms — check IPC permissions
int ipcperms ( | ipcp, | |
flag); |
struct kern_ipc_perm * | ipcp; |
short | flag; |