diff options
author | Kay Sievers <kay.sievers@vrfy.org> | 2012-03-30 22:00:51 +0200 |
---|---|---|
committer | Kay Sievers <kay.sievers@vrfy.org> | 2012-03-30 22:00:51 +0200 |
commit | 45f1f1c32c22e480ccad4685a16912adc3799843 (patch) | |
tree | 431518bc4f346ecac07eb86c61feda898d8bbb4a | |
parent | bf85160ada0dd2f21405fdbbee717cc0e329925b (diff) | |
download | patches-45f1f1c32c22e480ccad4685a16912adc3799843.tar.gz |
remove udlfb, child reaper; add printk
-rw-r--r-- | prctl-child_reaper.patch | 175 | ||||
-rw-r--r-- | printk.patch | 1888 | ||||
-rw-r--r-- | series | 3 | ||||
-rw-r--r-- | udlfb.patch | 94 |
4 files changed, 1889 insertions, 271 deletions
diff --git a/prctl-child_reaper.patch b/prctl-child_reaper.patch deleted file mode 100644 index dc04f17..0000000 --- a/prctl-child_reaper.patch +++ /dev/null @@ -1,175 +0,0 @@ -From: Lennart Poettering <lennart@poettering.net> -Subject: prctl: add PR_{SET,GET}_CHILD_SUBREAPER to allow simple process supervision - -Userspace service managers/supervisors need to track their started -services. Many services daemonize by double-forking and get implicitely -re-parented to PID 1. The service manager will no longer be able to -receive the SIGCHLD signals for them, and is no longer in charge of -reaping the children with wait(). All information about the children -is lost at the moment PID 1 cleans up the re-parented processes. - -With this prctl, a service manager process can mark itself as a sort of -'sub-init', able to stay as the parent for all orphaned processes -created by the started services. All SIGCHLD signals will be delivered -to the service manager. - -Receiving SIGCHLD and doing wait() is in cases of a service-manager -much preferred over any possible asynchronous notification about -specific PIDs, because the service manager has full access to the -child process data in /proc and the PID can not be re-used until -the wait(), the service-manager itself is in charge of, has happended. - -As a side effect, the relevant parent PID information does not get lost -by a double-fork, which results in a more elaborate process tree and 'ps' -output: - -before: - # ps afx - 253 ? Ss 0:00 /bin/dbus-daemon --system --nofork - 294 ? Sl 0:00 /usr/libexec/polkit-1/polkitd - 328 ? S 0:00 /usr/sbin/modem-manager - 608 ? Sl 0:00 /usr/libexec/colord - 658 ? Sl 0:00 /usr/libexec/upowerd - 819 ? Sl 0:00 /usr/libexec/imsettings-daemon - 916 ? Sl 0:00 /usr/libexec/udisks-daemon - 917 ? S 0:00 \_ udisks-daemon: not polling any devices - -after: - # ps afx - 294 ? Ss 0:00 /bin/dbus-daemon --system --nofork - 426 ? Sl 0:00 \_ /usr/libexec/polkit-1/polkitd - 449 ? S 0:00 \_ /usr/sbin/modem-manager - 635 ? Sl 0:00 \_ /usr/libexec/colord - 705 ? Sl 0:00 \_ /usr/libexec/upowerd - 959 ? Sl 0:00 \_ /usr/libexec/udisks-daemon - 960 ? S 0:00 | \_ udisks-daemon: not polling any devices - 977 ? Sl 0:00 \_ /usr/libexec/packagekitd - -This prctl is orthogonal to PID namespaces. PID namespaces are isolated -from each other, while a service management process usually requires -the services to live in the same namespace, to be able to talk to each -other. - -Users of this will be the systemd per-user instance, which provides -init-like functionality for the user's login session and D-Bus, which -activates bus services on-demand. Both need init-like capabilities -to be able to properly keep track of the services they start. - -Many thanks to Oleg for several rounds of review and insights. - -Reviewed-by: Oleg Nesterov <oleg@redhat.com> -Signed-off-by: Lennart Poettering <lennart@poettering.net> -Signed-off-by: Kay Sievers <kay.sievers@vrfy.org> ---- - - include/linux/prctl.h | 3 +++ - include/linux/sched.h | 12 ++++++++++++ - kernel/exit.c | 28 +++++++++++++++++++++++----- - kernel/fork.c | 3 +++ - kernel/sys.c | 8 ++++++++ - 5 files changed, 49 insertions(+), 5 deletions(-) - ---- a/include/linux/prctl.h -+++ b/include/linux/prctl.h -@@ -114,4 +114,7 @@ - # define PR_SET_MM_START_BRK 6 - # define PR_SET_MM_BRK 7 - -+#define PR_SET_CHILD_SUBREAPER 36 -+#define PR_GET_CHILD_SUBREAPER 37 -+ - #endif /* _LINUX_PRCTL_H */ ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -552,6 +552,18 @@ struct signal_struct { - int group_stop_count; - unsigned int flags; /* see SIGNAL_* flags below */ - -+ /* -+ * PR_SET_CHILD_SUBREAPER marks a process, like a service -+ * manager, to re-parent orphan (double-forking) child processes -+ * to this process instead of 'init'. The service manager is -+ * able to receive SIGCHLD signals and is able to investigate -+ * the process until it calls wait(). All children of this -+ * process will inherit a flag if they should look for a -+ * child_subreaper process at exit. -+ */ -+ unsigned int is_child_subreaper:1; -+ unsigned int has_child_subreaper:1; -+ - /* POSIX.1b Interval Timers */ - struct list_head posix_timers; - ---- a/kernel/exit.c -+++ b/kernel/exit.c -@@ -686,11 +686,12 @@ static void exit_mm(struct task_struct * - } - - /* -- * When we die, we re-parent all our children. -- * Try to give them to another thread in our thread -- * group, and if no such member exists, give it to -- * the child reaper process (ie "init") in our pid -- * space. -+ * When we die, we re-parent all our children, and try to: -+ * 1. give them to another thread in our thread group, if such a -+ * member exists -+ * 2. give it to the first anchestor process which prctl'd itself -+ * as a child_subreaper for its children (like a service manager) -+ * 3. give it to the init process (PID 1) in our pid namespace - */ - static struct task_struct *find_new_reaper(struct task_struct *father) - __releases(&tasklist_lock) -@@ -721,6 +722,23 @@ static struct task_struct *find_new_reap - * forget_original_parent() must move them somewhere. - */ - pid_ns->child_reaper = init_pid_ns.child_reaper; -+ } else if (father->signal->has_child_subreaper) { -+ struct task_struct *reaper; -+ -+ /* find the first ancestor marked as child_subreaper */ -+ for (reaper = father->real_parent; -+ reaper != &init_task; -+ reaper = reaper->real_parent) { -+ if (same_thread_group(reaper, pid_ns->child_reaper)) -+ break; -+ if (!reaper->signal->is_child_subreaper) -+ continue; -+ thread = reaper; -+ do { -+ if (!(thread->flags & PF_EXITING)) -+ return reaper; -+ } while_each_thread(reaper, thread); -+ } - } - - return pid_ns->child_reaper; ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -984,6 +984,9 @@ static int copy_signal(unsigned long clo - sig->oom_score_adj = current->signal->oom_score_adj; - sig->oom_score_adj_min = current->signal->oom_score_adj_min; - -+ sig->has_child_subreaper = current->signal->has_child_subreaper || -+ current->signal->is_child_subreaper; -+ - mutex_init(&sig->cred_guard_mutex); - - return 0; ---- a/kernel/sys.c -+++ b/kernel/sys.c -@@ -1962,6 +1962,14 @@ SYSCALL_DEFINE5(prctl, int, option, unsi - case PR_SET_MM: - error = prctl_set_mm(arg2, arg3, arg4, arg5); - break; -+ case PR_SET_CHILD_SUBREAPER: -+ me->signal->is_child_subreaper = !!arg2; -+ error = 0; -+ break; -+ case PR_GET_CHILD_SUBREAPER: -+ error = put_user(me->signal->is_child_subreaper, -+ (int __user *) arg2); -+ break; - default: - error = -EINVAL; - break; diff --git a/printk.patch b/printk.patch new file mode 100644 index 0000000..45a60bb --- /dev/null +++ b/printk.patch @@ -0,0 +1,1888 @@ +From: Kay Sievers <kay@vrfy.org> +Subject: [WORK-IN-PROGRESS] printk: introduce structured and multi-facility logging + +- Record-based instead of byte-stream buffer. All records carry proper + timestamp, syslog facility, priority in the record header. + +- Records consume almost the same amount, sometimes less memory than + the old simple bytes stream buffer with printk_time enabled. The record + header is 16 bytes long plus some padding bytes if needed. The old byte + stream buffer needed 3 chars for the syslog prefix, 15 char for the + timestamp and a newline. + +- The buffer management is based on message sequence numbers. When records + need to be discarded, the heads move on to the next full record. Unlike + the old byte-stream buffer, no old logged lines get truncated or partly + overwritten by new ones. Sequence numbers also allow consumers of the log + stream to get notified if any message in the stream they are about to read + gets discarded during the time of reading. + +- Better buffered IO support for KERN_CONT continuation lines, when printk() + is called multiple times for a single line. The use of KERN_CONT is now + required; a few places in the kernel need trivial fixes here. The buffering + could possibly be extended to per-cpu variables to allow better + thread-safety for multiple printk() invocations for a single line. + +- Full-featured syslog facility value support. Different facilities can + tag their messages. All userspace-injected messages enforce + facility != 0 now, to be able to reliably distinguish them from the + kernel-generated messages. Independent subsystems like a baseband + processor or a kernel-related userspace process can use their own + unique facility values. Multiple independent log streams can co-exist + that way in the same buffer. All share the same global sequence + number counter to ensure proper ordering and to allow the consumers + of the log to realibly corrrelate the events from differnt facilities. + +- Subsystems and drivers can easily attach key/value pair dictionaries + to any printk() message. Log consumers can extract and recognize + these properties reliably, they are not mixed into the human-readable + text stream. + +- The dev_printk() output is reliably machine-readable now. In addition + to the printed message, it creates a log dictionary with the following + properties: + SUBSYSTEM= - the driver-core susbsytem name + DEVICE= + b12:8 - block dev_t + c127:3 - char dev_t + n8 - netdev ifindex + +sound:card0 - subsystem:devname + +- Modern support for multiple concurrent readers of /dev/kmsg. Full read(), + seek(), poll() support. Output of message sequence numbers, to allow + userspace log consumers to reliably reconnect and reconstruct their state + at any given time. After open("/dev/kmsg"), read() always returns all + currently available records in the buffer. If only future messages should + be read, SEEK_END can be used. In case records get overwritten while + /dev/kmsg is held open and not read, or records get faster overwritten + than they are read, the next read() will return -EIO and the current + reading position gets updated to the next available record. The passed + sequence numbers allow the log consumer to calculate the amount of lost + messages. + + $ cat /dev/kmsg + PRIORITY=5 + SEQNUM=0 + TIMESTAMP=0 + MESSAGE=Linux version 3.3.0+ (kay@mop) (gcc version 4.7.0 20120315 ... + + ... + + SYSLOG_FACILITY=0 + PRIORITY=7 + SEQNUM=268 + TIMESTAMP=399682 + MESSAGE=pci_root PNP0A03:00: host bridge window [io 0x0000-0x0cf7] (ignored) + SUBSYSTEM=acpi + DEVICE=+acpi:PNP0A03:00 + + ... + + SYSLOG_FACILITY=3 + PRIORITY=6 + SEQNUM=863 + TIMESTAMP=3679024 + MESSAGE=udevd[83]: starting version 175 + + ... + + PRIORITY=6 + SEQNUM=1012 + TIMESTAMP=7169447 + MESSAGE=usb 2-1.4: MAC-Address: 02:80:37:ec:02:00 + SUBSYSTEM=usb + DEVICE=c189:130 + +Signed-off-by: Kay Sievers <kay@vrfy.org> +--- + drivers/base/core.c | 49 + + drivers/char/mem.c | 40 - + include/linux/printk.h | 13 + kernel/printk.c | 1355 +++++++++++++++++++++++++++++++++---------------- + 4 files changed, 988 insertions(+), 469 deletions(-) + +--- a/drivers/base/core.c ++++ b/drivers/base/core.c +@@ -25,6 +25,7 @@ + #include <linux/mutex.h> + #include <linux/async.h> + #include <linux/pm_runtime.h> ++#include <linux/netdevice.h> + + #include "base.h" + #include "power/power.h" +@@ -1843,15 +1844,57 @@ void device_shutdown(void) + */ + + #ifdef CONFIG_PRINTK +- + int __dev_printk(const char *level, const struct device *dev, + struct va_format *vaf) + { ++ char dict[128]; ++ size_t dictlen = 0; ++ const char *subsys; ++ + if (!dev) + return printk("%s(NULL device *): %pV", level, vaf); + +- return printk("%s%s %s: %pV", +- level, dev_driver_string(dev), dev_name(dev), vaf); ++ if (dev->class) ++ subsys = dev->class->name; ++ else if (dev->bus) ++ subsys = dev->bus->name; ++ else ++ subsys = "(NULL subsystem)"; ++ ++ dictlen += snprintf(dict + dictlen, sizeof(dict) - dictlen, ++ "SUBSYSTEM=%s\n", subsys); ++ ++ /* ++ * Add device identifier DEVICE=: ++ * b12:8 block dev_t ++ * c127:3 char dev_t ++ * n8 netdev ifindex ++ * +sound:card0 subsystem:devname ++ */ ++ if (MAJOR(dev->devt)) { ++ char c; ++ ++ if (strcmp(subsys, "block") == 0) ++ c = 'b'; ++ else ++ c = 'c'; ++ dictlen += snprintf(dict + dictlen, sizeof(dict) - dictlen, ++ "DEVICE=%c%u:%u\n", ++ c, MAJOR(dev->devt), MINOR(dev->devt)); ++ } else if (strcmp(subsys, "net") == 0) { ++ struct net_device *net = to_net_dev(dev); ++ ++ dictlen += snprintf(dict + dictlen, sizeof(dict) - dictlen, ++ "DEVICE=n%u\n", net->ifindex); ++ } else { ++ dictlen += snprintf(dict + dictlen, sizeof(dict) - dictlen, ++ "DEVICE=+%s:%s\n", subsys, dev_name(dev)); ++ } ++ ++ return printk_emit(0, level[1] - '0', ++ dict, dictlen, ++ "%s %s: %pV", ++ dev_driver_string(dev), dev_name(dev), vaf); + } + EXPORT_SYMBOL(__dev_printk); + +--- a/drivers/char/mem.c ++++ b/drivers/char/mem.c +@@ -807,44 +807,6 @@ static const struct file_operations oldm + }; + #endif + +-static ssize_t kmsg_writev(struct kiocb *iocb, const struct iovec *iv, +- unsigned long count, loff_t pos) +-{ +- char *line, *p; +- int i; +- ssize_t ret = -EFAULT; +- size_t len = iov_length(iv, count); +- +- line = kmalloc(len + 1, GFP_KERNEL); +- if (line == NULL) +- return -ENOMEM; +- +- /* +- * copy all vectors into a single string, to ensure we do +- * not interleave our log line with other printk calls +- */ +- p = line; +- for (i = 0; i < count; i++) { +- if (copy_from_user(p, iv[i].iov_base, iv[i].iov_len)) +- goto out; +- p += iv[i].iov_len; +- } +- p[0] = '\0'; +- +- ret = printk("%s", line); +- /* printk can add a prefix */ +- if (ret > len) +- ret = len; +-out: +- kfree(line); +- return ret; +-} +- +-static const struct file_operations kmsg_fops = { +- .aio_write = kmsg_writev, +- .llseek = noop_llseek, +-}; +- + static const struct memdev { + const char *name; + umode_t mode; +@@ -863,7 +825,7 @@ static const struct memdev { + [7] = { "full", 0666, &full_fops, NULL }, + [8] = { "random", 0666, &random_fops, NULL }, + [9] = { "urandom", 0666, &urandom_fops, NULL }, +- [11] = { "kmsg", 0, &kmsg_fops, NULL }, ++ [11] = { "kmsg", 0644, &kmsg_fops, NULL }, + #ifdef CONFIG_CRASH_DUMP + [12] = { "oldmem", 0, &oldmem_fops, NULL }, + #endif +--- a/include/linux/printk.h ++++ b/include/linux/printk.h +@@ -95,8 +95,19 @@ extern int printk_needs_cpu(int cpu); + extern void printk_tick(void); + + #ifdef CONFIG_PRINTK ++asmlinkage __printf(5, 0) ++int vprintk_emit(int facility, int level, ++ const char *dict, size_t dictlen, ++ const char *fmt, va_list args); ++ + asmlinkage __printf(1, 0) + int vprintk(const char *fmt, va_list args); ++ ++asmlinkage __printf(5, 6) __cold ++asmlinkage int printk_emit(int facility, int level, ++ const char *dict, size_t dictlen, ++ const char *fmt, ...); ++ + asmlinkage __printf(1, 2) __cold + int printk(const char *fmt, ...); + +@@ -289,6 +300,8 @@ extern void dump_stack(void) __cold; + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) + #endif + ++extern const struct file_operations kmsg_fops; ++ + enum { + DUMP_PREFIX_NONE, + DUMP_PREFIX_ADDRESS, +--- a/kernel/printk.c ++++ b/kernel/printk.c +@@ -41,6 +41,7 @@ + #include <linux/cpu.h> + #include <linux/notifier.h> + #include <linux/rculist.h> ++#include <linux/poll.h> + + #include <asm/uaccess.h> + +@@ -54,8 +55,6 @@ void asmlinkage __attribute__((weak)) ea + { + } + +-#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) +- + /* printk's without a loglevel use this.. */ + #define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL + +@@ -99,24 +98,6 @@ EXPORT_SYMBOL_GPL(console_drivers); + static int console_locked, console_suspended; + + /* +- * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars +- * It is also used in interesting ways to provide interlocking in +- * console_unlock();. +- */ +-static DEFINE_RAW_SPINLOCK(logbuf_lock); +- +-#define LOG_BUF_MASK (log_buf_len-1) +-#define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK]) +- +-/* +- * The indices into log_buf are not constrained to log_buf_len - they +- * must be masked before subscripting +- */ +-static unsigned log_start; /* Index into log_buf: next char to be read by syslog() */ +-static unsigned con_start; /* Index into log_buf: next char to be sent to consoles */ +-static unsigned log_end; /* Index into log_buf: most-recently-written-char + 1 */ +- +-/* + * If exclusive_console is non-NULL then only this console is to be printed to. + */ + static struct console *exclusive_console; +@@ -146,12 +127,521 @@ EXPORT_SYMBOL(console_set_on_cmdline); + static int console_may_schedule; + + #ifdef CONFIG_PRINTK ++/* ++ * The printk log buffer consists of a chain of concatenated variable ++ * length records. Every record starts with a record header, containing ++ * the overall length of the record. ++ * ++ * The heads to the first and last entry in the buffer, as well as the ++ * sequence numbers of these both entries are maintained. ++ * ++ * If the heads indicate available messages, a length == 0 for the next ++ * messge indicates a wrap-around to the beginning of the buffer. ++ * ++ * Every record carries the monotonic timestamp in nanoseconds, as well as ++ * the standard userspace syslog level and syslog facility. The usual ++ * kernel messages use LOG_KERN; userspace-injected messages always carry ++ * a matching syslog facility, by default LOG_USER. The origin of every ++ * message can be reliably determined that way. ++ * ++ * The human readable log message directly follows the message header. The ++ * length of the message text is stored in the header, the message is _not_ ++ * terminated by a '\n' or '\0' character. ++ * ++ * Optionally, a message can carry a dictionary of properties (key/value pairs). ++ * The content of the dictionary is opaque to the printk code, it is stored ++ * only to provide userspace with a machine-readable message context. ++ * ++ * Examples for well-defined, commonly used property names are: ++ * DEVICE=b12:8 device identifier ++ * b12:8 block dev_t ++ * c127:3 char dev_t ++ * n8 netdev ifindex ++ * +sound:card0 subsystem:devname ++ * SUBSYSTEM=pci driver-core subsystem name ++ * ++ * Valid characters in property names are only [a-zA-Z0-9.-_]. The plain ++ * text value follows directly the after a '=' character. Any value that ++ * can possibly carry non-printable characters must be explicitely encoded ++ * as a binary value. ++ * ++ * Multiple properties are terminated by a '\n' character. ++ * ++ * A binary value is encoded by not using a '=' character as a delimiter, ++ * but a '\n' character followed directly by a non-aligned inlined little endian ++ * uint64_t, which specifies the number of binary bytes to read as value. ++ * Example for a binary property containing 4 bytes: ++ * "DEVICE=b12:8\nFIRMWARE_DUMP\n\x04\x00\x00\x00\x00\x00\x00\x00\x11\x12\x13\x14\nDRIVER=ahci\0" ++ * ++ * Any values containing possibly non-printable characters MUST be encoded ++ * as binary properties before being added to the dictionary. Values ++ * containing newlines could be interpreted as keys, and would be a security ++ * risk. Userspace must be able to trust the generated key/value output ++ * stream to be valid regarding the key and newline sequence. ++ * ++ * Example for a plain text dictionary: ++ * "DEVICE=b12:8\nSUBSYSTEM=pci\nDRIVER=ahci\n" ++ * ++ * Example of a message structure: ++ * 0000 ff 8f 00 00 00 00 00 00 monotonic time in nsec ++ * 0008 38 00 record is 56 bytes long ++ * 000a 0b 00 text is 11 bytes long ++ * 000c 1f 00 dictionary is 23 bytes long ++ * 000e 03 LOG_ERR (level) ++ * 000f 00 LOG_KERN (facility) ++ * 0010 69 74 27 73 20 61 20 6c "it's a l" ++ * 69 6e 65 "ine" ++ * 001b 44 45 56 49 43 "DEVIC" ++ * 45 3d 62 38 3a 32 0a 44 "E=b8:2\nD" ++ * 52 49 56 45 52 3d 62 75 "RIVER=bu" ++ * 67 0a "g\n" ++ * 0032 00 00 00 00 00 00 padding to next message header ++ * ++ * The 'struct log' buffer header must never be directly exported to ++ * userspace, it is a kernel-private implementation detail. ++ * ++ * The classic syslog() syscall format export must only export the human ++ * readable text message, with the syslog level and facility prepended ++ * as <>. Adding unconverted binary data would break the syslog() syscall ++ * export format. ++ * ++ * Interfaces that export the structured dictionary data, should follow the ++ * binary record format definition of the dictionary and use the following ++ * standard property names: ++ * SYSLOG_FACILITY= syslog facility number ++ * PRIORITY= message priority number ++ * SEQNUM= log record sequence number ++ * TIMESTAMP= monotonic time in microseconds ++ * MESSAGE= human readable message ++ * ++ * The optional dictionary properties attached to the record should be copied ++ * in the same output format directly after the standard message properties. ++ * ++ * In a flat text stream of records, every record should be terminated ++ * by "\n\n", which separates the records from each other. ++ */ ++ ++struct log { ++ u64 ts_nsec; /* timestamp in nanoseconds */ ++ u16 len; /* length of entire record */ ++ u16 text_len; /* length of text buffer */ ++ u16 dict_len; /* length of dictionary buffer */ ++ u8 level; /* syslog level (LOG_WARN, LOG_ERR, ...) */ ++ u8 facility; /* syslog facility (LOG_KERN == 0) */ ++}; + ++/* ++ * The logbuf_lock protects kmsg buffer, indices, counters. It is also ++ * used in interesting ways to provide interlocking in console_unlock(); ++ */ ++static DEFINE_RAW_SPINLOCK(logbuf_lock); ++ ++/* cpu currently holding logbuf_lock */ ++static volatile unsigned int logbuf_cpu = UINT_MAX; ++ ++/* record buffer */ ++#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) + static char __log_buf[__LOG_BUF_LEN]; + static char *log_buf = __log_buf; +-static int log_buf_len = __LOG_BUF_LEN; +-static unsigned logged_chars; /* Number of chars produced since last read+clear operation */ +-static int saved_console_loglevel = -1; ++static u32 log_buf_len = __LOG_BUF_LEN; ++ ++/* index and sequence number of the first record stored in the buffer */ ++static u64 log_first_seq; ++static u32 log_first_idx; ++ ++/* index and sequence number of the next record to store in the buffer */ ++static u64 log_next_seq; ++static u32 log_next_idx; ++ ++/* the next printk record to read after the last 'clear' command */ ++static u64 clear_seq; ++static u32 clear_idx; ++ ++/* the next printk record to read by syslog(READ) or /proc/kmsg */ ++static u64 syslog_seq; ++static u32 syslog_idx; ++ ++/* human readable text of the record */ ++static char *log_text(const struct log *msg) ++{ ++ return (char *)msg + sizeof(struct log); ++} ++ ++/* optional key/value pair dictionary attached to the record */ ++static char *log_dict(const struct log *msg) ++{ ++ return (char *)msg + sizeof(struct log) + msg->text_len; ++} ++ ++/* get record by index */ ++static struct log *log_from_idx(u32 idx) ++{ ++ return (struct log *)&log_buf[idx]; ++} ++ ++/* get next record; log_next_idx indicates the last record */ ++static u32 log_next(u32 idx) ++{ ++ u16 len = log_from_idx(idx)->len; ++ ++ /* length == 0 indicates the end of the buffer; wrap */ ++ if (len == 0) ++ return 0; ++ return idx + len; ++} ++ ++/* insert record into the buffer, discard old ones, update heads */ ++static void log_store(int facility, int level, ++ const char *dict, u16 dict_len, ++ const char *text, u16 text_len) ++{ ++ struct log *msg; ++ u32 size, pad_len; ++ ++ /* number of '\0' padding bytes to next message */ ++ size = sizeof(struct log) + text_len + dict_len; ++ pad_len = (-size) & (8 - 1); ++ ++ if (log_next_idx + size > log_buf_len) { ++ /* ++ * Next message does not fit at the end; wrap. Zero-out ++ * the remaining buffer to set len = 0, and leave a clean ++ * buffer for things like crash dump. ++ */ ++ memset(log_buf + log_next_idx, 0, log_buf_len - log_next_idx); ++ log_next_idx = 0; ++ } ++ ++ /* drop as many old messages as needed */ ++ for (;;) { ++ u32 free; ++ ++ if (log_next_idx >= log_first_idx) ++ free = log_buf_len - (log_next_idx - log_first_idx); ++ else ++ free = log_first_idx - log_next_idx; ++ free -= log_from_idx(log_next_idx)->len; ++ ++ if (free >= sizeof(struct log) + text_len + dict_len + pad_len) ++ break; ++ ++ log_first_idx = log_next(log_first_idx); ++ log_first_seq++; ++ } ++ ++ /* fill message */ ++ msg = log_from_idx(log_next_idx); ++ memcpy(log_text(msg), text, text_len); ++ msg->text_len = text_len; ++ memcpy(log_dict(msg), dict, dict_len); ++ msg->dict_len = dict_len; ++ msg->level = level; ++ msg->facility = facility; ++ msg->ts_nsec = local_clock(); ++ memset(log_dict(msg) + dict_len, 0, pad_len); ++ msg->len = sizeof(struct log) + text_len + dict_len + pad_len; ++ ++ /* insert message */ ++ log_next_idx += msg->len; ++ log_next_seq++; ++} ++ ++/* /dev/kmsg - userspace message inject/listen interface */ ++struct devkmsg_user { ++ u64 seq; ++ u32 idx; ++ struct mutex lock; ++ char buf[8192]; ++}; ++ ++static ssize_t devkmsg_writev(struct kiocb *iocb, const struct iovec *iv, ++ unsigned long count, loff_t pos) ++{ ++ char *buf, *line; ++ int i; ++ int level = default_message_loglevel; ++ int facility = 1; /* LOG_USER */ ++ size_t len = iov_length(iv, count); ++ ssize_t ret = len; ++ ++ buf = kmalloc(len+1, GFP_KERNEL); ++ if (buf == NULL) ++ return -ENOMEM; ++ ++ line = buf; ++ for (i = 0; i < count; i++) { ++ if (copy_from_user(line, iv[i].iov_base, iv[i].iov_len)) ++ goto out; ++ line += iv[i].iov_len; ++ } ++ ++ /* ++ * Extract and skip the syslog prefix <[0-9]*>. Coming from userspace ++ * the decimal value represents 32bit, the lower 3 bit are the log ++ * level, the rest are the log facility. ++ * ++ * If no prefix or no userspace facility is specified, we ++ * enforce LOG_USER, to be able to reliably distinguish ++ * kernel-generated messages from userspace-injected ones. ++ */ ++ line = buf; ++ if (line[0] == '<') { ++ char *endp = NULL; ++ ++ i = simple_strtoul(line+1, &endp, 10); ++ if (endp && endp[0] == '>') { ++ level = i & 7; ++ if (i >> 3) ++ facility = i >> 3; ++ endp++; ++ len -= endp - line; ++ line = endp; ++ } ++ } ++ line[len] = '\0'; ++ ++ printk_emit(facility, level, NULL, 0, "%s", line); ++out: ++ kfree(buf); ++ return ret; ++} ++ ++static ssize_t devkmsg_read(struct file *file, char __user *buf, ++ size_t count, loff_t *ppos) ++{ ++ struct devkmsg_user *user = file->private_data; ++ struct log *msg; ++ bool binary = false; ++ size_t i; ++ size_t len; ++ ssize_t ret; ++ ++ if (!user) ++ return -EBADF; ++ ++ mutex_lock(&user->lock); ++ raw_spin_lock(&logbuf_lock); ++ while (user->seq == log_next_seq) { ++ if (file->f_flags & O_NONBLOCK) { ++ ret = -EAGAIN; ++ raw_spin_unlock(&logbuf_lock); ++ goto out; ++ } ++ ++ raw_spin_unlock(&logbuf_lock); ++ ret = wait_event_interruptible(log_wait, ++ user->seq != log_next_seq); ++ if (ret) ++ goto out; ++ raw_spin_lock(&logbuf_lock); ++ } ++ ++ if (user->seq < log_first_seq) { ++ /* our last seen message is gone, return error and reset */ ++ user->idx = log_first_idx; ++ user->seq = log_first_seq; ++ ret = -EPIPE; ++ raw_spin_unlock(&logbuf_lock); ++ goto out; ++ } ++ ++ msg = log_from_idx(user->idx); ++ len = 0; ++ ++ if (msg->facility) { ++ memcpy(user->buf + len, "SYSLOG_FACILITY=", 16); ++ len += 16; ++ len += sprintf(user->buf + len, "%u", msg->facility); ++ user->buf[len++] = '\n'; ++ } ++ ++ memcpy(user->buf + len, "PRIORITY=", 9); ++ len += 9; ++ user->buf[len++] = '0' + msg->level; ++ user->buf[len++] = '\n'; ++ ++ memcpy(user->buf + len, "SEQNUM=", 7); ++ len += 7; ++ len += sprintf(user->buf + len, "%llu", user->seq); ++ user->buf[len++] = '\n'; ++ ++ if (msg->ts_nsec) { ++ memcpy(user->buf + len, "TIMESTAMP=", 10); ++ len += 10; ++ len += sprintf(user->buf + len, "%llu", msg->ts_nsec / 1000); ++ user->buf[len++] = '\n'; ++ } ++ ++ memcpy(user->buf + len, "MESSAGE", 7); ++ len += 7; ++ if (len + msg->text_len + msg->dict_len + 2 > sizeof(user->buf)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ /* any non-printable characters switch to binary property output */ ++ for (i = 0; i < msg->text_len; i++) { ++ char c = log_text(msg)[i]; ++ ++ if (c < ' ' || c >= 128) { ++ binary = true; ++ break; ++ } ++ } ++ if (binary) { ++ u64 len_le64; ++ ++ /* add unit64_t little endian length of binary data */ ++ user->buf[len++] = '\n'; ++ len_le64 = cpu_to_le64(msg->text_len+1); ++ memcpy(user->buf + len, &len_le64, sizeof(len_le64)); ++ len += sizeof(len_le64); ++ } else { ++ /* plain text value */ ++ user->buf[len++] = '='; ++ } ++ ++ memcpy(user->buf + len, log_text(msg), msg->text_len); ++ len += msg->text_len; ++ user->buf[len++] = '\n'; ++ ++ /* ++ * Copy dictionary; all keys, including the last one, are expected to ++ * be terminated by '\n'. ++ */ ++ memcpy(user->buf + len, log_dict(msg), msg->dict_len); ++ len += msg->dict_len; ++ ++ /* ++ * Terminate the record by a second '\n' to make the /dev/kmgs' ++ * output parsable as a text stream and human readable. ++ */ ++ user->buf[len++] = '\n'; ++ ++ user->idx = log_next(user->idx); ++ user->seq++; ++ raw_spin_unlock(&logbuf_lock); ++ ++ if (len > count) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (copy_to_user(buf, user->buf, len)) { ++ ret = -EFAULT; ++ goto out; ++ } ++ ret = len; ++out: ++ mutex_unlock(&user->lock); ++ return ret; ++} ++ ++static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) ++{ ++ struct devkmsg_user *user = file->private_data; ++ loff_t ret = 0; ++ ++ if (!user) ++ return -EBADF; ++ if (offset) ++ return -ESPIPE; ++ ++ raw_spin_lock(&logbuf_lock); ++ switch (whence) { ++ case SEEK_SET: ++ /* the first record */ ++ user->idx = log_first_idx; ++ user->seq = log_first_seq; ++ break; ++ case SEEK_CUR: ++ /* the first record after the last 'clear' */ ++ user->idx = clear_idx; ++ user->seq = clear_seq; ++ break; ++ case SEEK_END: ++ /* after the last record */ ++ user->idx = log_next_idx; ++ user->seq = log_next_seq; ++ break; ++ default: ++ ret = -EINVAL; ++ } ++ raw_spin_unlock(&logbuf_lock); ++ return ret; ++} ++ ++static unsigned int devkmsg_poll(struct file *file, poll_table *wait) ++{ ++ struct devkmsg_user *user = file->private_data; ++ int ret = 0; ++ ++ if (!user) ++ return POLLERR|POLLNVAL; ++ ++ poll_wait(file, &log_wait, wait); ++ ++ raw_spin_lock(&logbuf_lock); ++ if (user->seq < log_next_seq) { ++ /* return error when data has vanished underneath us */ ++ if (user->seq < log_first_seq) ++ ret = POLLIN|POLLRDNORM|POLLERR|POLLPRI; ++ ret = POLLIN|POLLRDNORM; ++ } ++ raw_spin_unlock(&logbuf_lock); ++ ++ return ret; ++} ++ ++static int devkmsg_open(struct inode *inode, struct file *file) ++{ ++ struct devkmsg_user *user; ++ int err; ++ ++ /* write-only does not need any file context */ ++ if ((file->f_flags & O_ACCMODE) == O_WRONLY) ++ return 0; ++ ++ err = security_syslog(SYSLOG_ACTION_READ_ALL); ++ if (err) ++ return err; ++ ++ user = kmalloc(sizeof(struct devkmsg_user), GFP_KERNEL); ++ if (!user) ++ return -ENOMEM; ++ ++ mutex_init(&user->lock); ++ ++ raw_spin_lock(&logbuf_lock); ++ user->idx = log_first_idx; ++ user->seq = log_first_seq; ++ raw_spin_unlock(&logbuf_lock); ++ ++ file->private_data = user; ++ return 0; ++} ++ ++static int devkmsg_release(struct inode *inode, struct file *file) ++{ ++ struct devkmsg_user *user = file->private_data; ++ ++ if (!user) ++ return 0; ++ ++ mutex_destroy(&user->lock); ++ kfree(user); ++ return 0; ++} ++ ++const struct file_operations kmsg_fops = { ++ .open = devkmsg_open, ++ .read = devkmsg_read, ++ .aio_write = devkmsg_writev, ++ .llseek = devkmsg_llseek, ++ .poll = devkmsg_poll, ++ .llseek = noop_llseek, ++ .release = devkmsg_release, ++}; + + #ifdef CONFIG_KEXEC + /* +@@ -165,9 +655,9 @@ static int saved_console_loglevel = -1; + void log_buf_kexec_setup(void) + { + VMCOREINFO_SYMBOL(log_buf); +- VMCOREINFO_SYMBOL(log_end); + VMCOREINFO_SYMBOL(log_buf_len); +- VMCOREINFO_SYMBOL(logged_chars); ++ VMCOREINFO_SYMBOL(log_first_idx); ++ VMCOREINFO_SYMBOL(log_next_idx); + } + #endif + +@@ -191,7 +681,6 @@ early_param("log_buf_len", log_buf_len_s + void __init setup_log_buf(int early) + { + unsigned long flags; +- unsigned start, dest_idx, offset; + char *new_log_buf; + int free; + +@@ -219,20 +708,8 @@ void __init setup_log_buf(int early) + log_buf_len = new_log_buf_len; + log_buf = new_log_buf; + new_log_buf_len = 0; +- free = __LOG_BUF_LEN - log_end; +- +- offset = start = min(con_start, log_start); +- dest_idx = 0; +- while (start != log_end) { +- unsigned log_idx_mask = start & (__LOG_BUF_LEN - 1); +- +- log_buf[dest_idx] = __log_buf[log_idx_mask]; +- start++; +- dest_idx++; +- } +- log_start -= offset; +- con_start -= offset; +- log_end -= offset; ++ free = __LOG_BUF_LEN - log_next_idx; ++ memcpy(log_buf, __log_buf, __LOG_BUF_LEN); + raw_spin_unlock_irqrestore(&logbuf_lock, flags); + + pr_info("log_buf_len: %d\n", log_buf_len); +@@ -332,11 +809,160 @@ static int check_syslog_permissions(int + return 0; + } + ++#if defined(CONFIG_PRINTK_TIME) ++static bool printk_time = 1; ++#else ++static bool printk_time; ++#endif ++module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); ++ ++static int syslog_print_line(u32 idx, char *text, size_t size) ++{ ++ struct log *msg; ++ size_t len; ++ ++ msg = log_from_idx(idx); ++ if (!text) { ++ /* calculate length only */ ++ len = 3; ++ ++ if (msg->facility) ++ len++; ++ if (msg->facility > 12) ++ len++; ++ ++ if (printk_time) ++ len += 15; ++ ++ len += msg->text_len; ++ len++; ++ return len; ++ } ++ ++ len = sprintf(text, "<%u>", (msg->facility << 3) | msg->level); ++ ++ if (printk_time) { ++ unsigned long long t = msg->ts_nsec; ++ unsigned long rem_ns = do_div(t, 1000000000); ++ ++ len += sprintf(text + len, "[%5lu.%06lu] ", ++ (unsigned long) t, rem_ns / 1000); ++ } ++ ++ if (len + msg->text_len > size) ++ return -EINVAL; ++ memcpy(text + len, log_text(msg), msg->text_len); ++ len += msg->text_len; ++ text[len++] = '\n'; ++ return len; ++} ++ ++static int syslog_print(char __user *buf, int size) ++{ ++ char *text; ++ int len; ++ ++ text = kmalloc(1024, GFP_KERNEL); ++ if (!text) ++ return -ENOMEM; ++ ++ raw_spin_lock_irq(&logbuf_lock); ++ if (syslog_seq < log_first_seq) { ++ /* messages are gone, move to first one */ ++ syslog_seq = log_first_seq; ++ syslog_idx = log_first_idx; ++ } ++ len = syslog_print_line(syslog_idx, text, 1024); ++ syslog_idx = log_next(syslog_idx); ++ syslog_seq++; ++ raw_spin_unlock_irq(&logbuf_lock); ++ ++ if (len > 0 && copy_to_user(buf, text, len)) ++ len = -EFAULT; ++ ++ kfree(text); ++ return len; ++} ++ ++static int syslog_print_all(char __user *buf, int size, bool clear) ++{ ++ char *text; ++ int len = 0; ++ ++ text = kmalloc(1024, GFP_KERNEL); ++ if (!text) ++ return -ENOMEM; ++ ++ raw_spin_lock_irq(&logbuf_lock); ++ if (buf) { ++ u64 next_seq; ++ u64 seq; ++ u32 idx; ++ ++ if (clear_seq < log_first_seq) { ++ /* messages are gone, move to first available one */ ++ clear_seq = log_first_seq; ++ clear_idx = log_first_idx; ++ } ++ ++ /* ++ * Find first record that fits, including all following records, ++ * into the user-provided buffer for this dump. ++ */ ++ for (idx = clear_idx; idx < log_next_idx; idx = log_next(idx)) ++ len += syslog_print_line(idx, NULL, 0); ++ seq = clear_seq; ++ idx = clear_idx; ++ while (len > size) { ++ len -= syslog_print_line(idx, NULL, 0); ++ seq++; ++ idx = log_next(idx); ++ } ++ ++ /* last message in this dump */ ++ next_seq = log_next_seq; ++ ++ len = 0; ++ while (len >= 0 && seq < next_seq) { ++ int textlen; ++ ++ textlen = syslog_print_line(idx, text, 1024); ++ if (textlen < 0) { ++ len = textlen; ++ break; ++ } ++ idx = log_next(idx); ++ seq++; ++ ++ raw_spin_unlock_irq(&logbuf_lock); ++ if (copy_to_user(buf + len, text, textlen)) ++ len = -EFAULT; ++ else ++ len += textlen; ++ raw_spin_lock_irq(&logbuf_lock); ++ ++ if (seq < log_first_seq) { ++ /* messages are gone, move to next one */ ++ seq = log_first_seq; ++ idx = log_first_idx; ++ } ++ } ++ } ++ ++ if (clear) { ++ clear_seq = log_next_seq; ++ clear_idx = log_next_idx; ++ } ++ raw_spin_unlock_irq(&logbuf_lock); ++ ++ kfree(text); ++ return len; ++} ++ + int do_syslog(int type, char __user *buf, int len, bool from_file) + { +- unsigned i, j, limit, count; +- int do_clear = 0; +- char c; ++ bool clear = false; ++ static int saved_console_loglevel = -1; + int error; + + error = check_syslog_permissions(type, from_file); +@@ -364,28 +990,14 @@ int do_syslog(int type, char __user *buf + goto out; + } + error = wait_event_interruptible(log_wait, +- (log_start - log_end)); ++ syslog_seq != log_next_seq); + if (error) + goto out; +- i = 0; +- raw_spin_lock_irq(&logbuf_lock); +- while (!error && (log_start != log_end) && i < len) { +- c = LOG_BUF(log_start); +- log_start++; +- raw_spin_unlock_irq(&logbuf_lock); +- error = __put_user(c,buf); +- buf++; +- i++; +- cond_resched(); +- raw_spin_lock_irq(&logbuf_lock); +- } +- raw_spin_unlock_irq(&logbuf_lock); +- if (!error) +- error = i; ++ error = syslog_print(buf, len); + break; + /* Read/clear last kernel messages */ + case SYSLOG_ACTION_READ_CLEAR: +- do_clear = 1; ++ clear = true; + /* FALL THRU */ + /* Read last kernel messages */ + case SYSLOG_ACTION_READ_ALL: +@@ -399,52 +1011,11 @@ int do_syslog(int type, char __user *buf + error = -EFAULT; + goto out; + } +- count = len; +- if (count > log_buf_len) +- count = log_buf_len; +- raw_spin_lock_irq(&logbuf_lock); +- if (count > logged_chars) +- count = logged_chars; +- if (do_clear) +- logged_chars = 0; +- limit = log_end; +- /* +- * __put_user() could sleep, and while we sleep +- * printk() could overwrite the messages +- * we try to copy to user space. Therefore +- * the messages are copied in reverse. <manfreds> +- */ +- for (i = 0; i < count && !error; i++) { +- j = limit-1-i; +- if (j + log_buf_len < log_end) +- break; +- c = LOG_BUF(j); +- raw_spin_unlock_irq(&logbuf_lock); +- error = __put_user(c,&buf[count-1-i]); +- cond_resched(); +- raw_spin_lock_irq(&logbuf_lock); +- } +- raw_spin_unlock_irq(&logbuf_lock); +- if (error) +- break; +- error = i; +- if (i != count) { +- int offset = count-error; +- /* buffer overflow during copy, correct user buffer. */ +- for (i = 0; i < error; i++) { +- if (__get_user(c,&buf[i+offset]) || +- __put_user(c,&buf[i])) { +- error = -EFAULT; +- break; +- } +- cond_resched(); +- } +- } ++ error = syslog_print_all(buf, len, clear); + break; + /* Clear ring buffer */ + case SYSLOG_ACTION_CLEAR: +- logged_chars = 0; +- break; ++ syslog_print_all(NULL, 0, true); + /* Disable logging to console */ + case SYSLOG_ACTION_CONSOLE_OFF: + if (saved_console_loglevel == -1) +@@ -472,7 +1043,28 @@ int do_syslog(int type, char __user *buf + break; + /* Number of chars in the log buffer */ + case SYSLOG_ACTION_SIZE_UNREAD: +- error = log_end - log_start; ++ raw_spin_lock_irq(&logbuf_lock); ++ if (syslog_seq < log_first_seq) { ++ /* messages are gone, move to first one */ ++ syslog_seq = log_first_seq; ++ syslog_idx = log_first_idx; ++ } ++ if (from_file) { ++ /* ++ * Optimize for poll(/"proc/kmsg") which simply checks ++ * for pending data, not the size; return the count of ++ * records, not the length. ++ */ ++ error = log_next_idx - syslog_idx; ++ } else { ++ u32 idx; ++ ++ error = 0; ++ for (idx = syslog_idx; idx < log_next_idx; ++ idx = log_next(idx)) ++ error += syslog_print_line(idx, NULL, 0); ++ } ++ raw_spin_unlock_irq(&logbuf_lock); + break; + /* Size of the log buffer */ + case SYSLOG_ACTION_SIZE_BUFFER: +@@ -501,29 +1093,11 @@ void kdb_syslog_data(char *syslog_data[4 + { + syslog_data[0] = log_buf; + syslog_data[1] = log_buf + log_buf_len; +- syslog_data[2] = log_buf + log_end - +- (logged_chars < log_buf_len ? logged_chars : log_buf_len); +- syslog_data[3] = log_buf + log_end; ++ syslog_data[2] = log_buf + log_first_idx; ++ syslog_data[3] = log_buf + log_next_idx; + } + #endif /* CONFIG_KGDB_KDB */ + +-/* +- * Call the console drivers on a range of log_buf +- */ +-static void __call_console_drivers(unsigned start, unsigned end) +-{ +- struct console *con; +- +- for_each_console(con) { +- if (exclusive_console && con != exclusive_console) +- continue; +- if ((con->flags & CON_ENABLED) && con->write && +- (cpu_online(smp_processor_id()) || +- (con->flags & CON_ANYTIME))) +- con->write(con, &LOG_BUF(start), end - start); +- } +-} +- + static bool __read_mostly ignore_loglevel; + + static int __init ignore_loglevel_setup(char *str) +@@ -540,142 +1114,33 @@ MODULE_PARM_DESC(ignore_loglevel, "ignor + "print all kernel messages to the console."); + + /* +- * Write out chars from start to end - 1 inclusive +- */ +-static void _call_console_drivers(unsigned start, +- unsigned end, int msg_log_level) +-{ +- trace_console(&LOG_BUF(0), start, end, log_buf_len); +- +- if ((msg_log_level < console_loglevel || ignore_loglevel) && +- console_drivers && start != end) { +- if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) { +- /* wrapped write */ +- __call_console_drivers(start & LOG_BUF_MASK, +- log_buf_len); +- __call_console_drivers(0, end & LOG_BUF_MASK); +- } else { +- __call_console_drivers(start, end); +- } +- } +-} +- +-/* +- * Parse the syslog header <[0-9]*>. The decimal value represents 32bit, the +- * lower 3 bit are the log level, the rest are the log facility. In case +- * userspace passes usual userspace syslog messages to /dev/kmsg or +- * /dev/ttyprintk, the log prefix might contain the facility. Printk needs +- * to extract the correct log level for in-kernel processing, and not mangle +- * the original value. +- * +- * If a prefix is found, the length of the prefix is returned. If 'level' is +- * passed, it will be filled in with the log level without a possible facility +- * value. If 'special' is passed, the special printk prefix chars are accepted +- * and returned. If no valid header is found, 0 is returned and the passed +- * variables are not touched. +- */ +-static size_t log_prefix(const char *p, unsigned int *level, char *special) +-{ +- unsigned int lev = 0; +- char sp = '\0'; +- size_t len; +- +- if (p[0] != '<' || !p[1]) +- return 0; +- if (p[2] == '>') { +- /* usual single digit level number or special char */ +- switch (p[1]) { +- case '0' ... '7': +- lev = p[1] - '0'; +- break; +- case 'c': /* KERN_CONT */ +- case 'd': /* KERN_DEFAULT */ +- sp = p[1]; +- break; +- default: +- return 0; +- } +- len = 3; +- } else { +- /* multi digit including the level and facility number */ +- char *endp = NULL; +- +- lev = (simple_strtoul(&p[1], &endp, 10) & 7); +- if (endp == NULL || endp[0] != '>') +- return 0; +- len = (endp + 1) - p; +- } +- +- /* do not accept special char if not asked for */ +- if (sp && !special) +- return 0; +- +- if (special) { +- *special = sp; +- /* return special char, do not touch level */ +- if (sp) +- return len; +- } +- +- if (level) +- *level = lev; +- return len; +-} +- +-/* + * Call the console drivers, asking them to write out + * log_buf[start] to log_buf[end - 1]. + * The console_lock must be held. + */ +-static void call_console_drivers(unsigned start, unsigned end) ++static void call_console_drivers(int level, const char *text, size_t len) + { +- unsigned cur_index, start_print; +- static int msg_level = -1; ++ struct console *con; + +- BUG_ON(((int)(start - end)) > 0); ++ trace_console(text, 0, len, len); + +- cur_index = start; +- start_print = start; +- while (cur_index != end) { +- if (msg_level < 0 && ((end - cur_index) > 2)) { +- /* strip log prefix */ +- cur_index += log_prefix(&LOG_BUF(cur_index), &msg_level, NULL); +- start_print = cur_index; +- } +- while (cur_index != end) { +- char c = LOG_BUF(cur_index); +- +- cur_index++; +- if (c == '\n') { +- if (msg_level < 0) { +- /* +- * printk() has already given us loglevel tags in +- * the buffer. This code is here in case the +- * log buffer has wrapped right round and scribbled +- * on those tags +- */ +- msg_level = default_message_loglevel; +- } +- _call_console_drivers(start_print, cur_index, msg_level); +- msg_level = -1; +- start_print = cur_index; +- break; +- } +- } +- } +- _call_console_drivers(start_print, end, msg_level); +-} ++ if (level >= console_loglevel && !ignore_loglevel) ++ return; ++ if (!console_drivers) ++ return; + +-static void emit_log_char(char c) +-{ +- LOG_BUF(log_end) = c; +- log_end++; +- if (log_end - log_start > log_buf_len) +- log_start = log_end - log_buf_len; +- if (log_end - con_start > log_buf_len) +- con_start = log_end - log_buf_len; +- if (logged_chars < log_buf_len) +- logged_chars++; ++ for_each_console(con) { ++ if (exclusive_console && con != exclusive_console) ++ continue; ++ if (!(con->flags & CON_ENABLED)) ++ continue; ++ if (!con->write) ++ continue; ++ if (!cpu_online(smp_processor_id()) && ++ !(con->flags & CON_ANYTIME)) ++ continue; ++ con->write(con, text, len); ++ } + } + + /* +@@ -700,16 +1165,6 @@ static void zap_locks(void) + sema_init(&console_sem, 1); + } + +-#if defined(CONFIG_PRINTK_TIME) +-static bool printk_time = 1; +-#else +-static bool printk_time = 0; +-#endif +-module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); +- +-static bool always_kmsg_dump; +-module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR); +- + /* Check if we have any console registered that can be called early in boot. */ + static int have_callable_console(void) + { +@@ -722,51 +1177,6 @@ static int have_callable_console(void) + return 0; + } + +-/** +- * printk - print a kernel message +- * @fmt: format string +- * +- * This is printk(). It can be called from any context. We want it to work. +- * +- * We try to grab the console_lock. If we succeed, it's easy - we log the output and +- * call the console drivers. If we fail to get the semaphore we place the output +- * into the log buffer and return. The current holder of the console_sem will +- * notice the new output in console_unlock(); and will send it to the +- * consoles before releasing the lock. +- * +- * One effect of this deferred printing is that code which calls printk() and +- * then changes console_loglevel may break. This is because console_loglevel +- * is inspected when the actual printing occurs. +- * +- * See also: +- * printf(3) +- * +- * See the vsnprintf() documentation for format string extensions over C99. +- */ +- +-asmlinkage int printk(const char *fmt, ...) +-{ +- va_list args; +- int r; +- +-#ifdef CONFIG_KGDB_KDB +- if (unlikely(kdb_trap_printk)) { +- va_start(args, fmt); +- r = vkdb_printf(fmt, args); +- va_end(args); +- return r; +- } +-#endif +- va_start(args, fmt); +- r = vprintk(fmt, args); +- va_end(args); +- +- return r; +-} +- +-/* cpu currently holding logbuf_lock */ +-static volatile unsigned int printk_cpu = UINT_MAX; +- + /* + * Can we actually use the console at this time on this cpu? + * +@@ -810,17 +1220,12 @@ static int console_trylock_for_printk(un + retval = 0; + } + } +- printk_cpu = UINT_MAX; ++ logbuf_cpu = UINT_MAX; + if (wake) + up(&console_sem); + raw_spin_unlock(&logbuf_lock); + return retval; + } +-static const char recursion_bug_msg [] = +- KERN_CRIT "BUG: recent printk recursion!\n"; +-static int recursion_bug; +-static int new_text_line = 1; +-static char printk_buf[1024]; + + int printk_delay_msec __read_mostly; + +@@ -836,15 +1241,22 @@ static inline void printk_delay(void) + } + } + +-asmlinkage int vprintk(const char *fmt, va_list args) +-{ +- int printed_len = 0; +- int current_log_level = default_message_loglevel; ++asmlinkage int vprintk_emit(int facility, int level, ++ const char *dict, size_t dictlen, ++ const char *fmt, va_list args) ++{ ++ static int recursion_bug; ++ static char buf[1024]; ++ static size_t buflen; ++ static int buflevel; ++ static char textbuf[1024]; ++ char *text = textbuf; ++ size_t textlen; + unsigned long flags; + int this_cpu; +- char *p; +- size_t plen; +- char special; ++ bool newline = false; ++ bool cont = false; ++ int printed_len = 0; + + boot_delay_msec(); + printk_delay(); +@@ -856,7 +1268,7 @@ asmlinkage int vprintk(const char *fmt, + /* + * Ouch, printk recursed into itself! + */ +- if (unlikely(printk_cpu == this_cpu)) { ++ if (unlikely(logbuf_cpu == this_cpu)) { + /* + * If a crash is occurring during printk() on this CPU, + * then try to get the crash message out but make sure +@@ -873,97 +1285,92 @@ asmlinkage int vprintk(const char *fmt, + + lockdep_off(); + raw_spin_lock(&logbuf_lock); +- printk_cpu = this_cpu; ++ logbuf_cpu = this_cpu; + + if (recursion_bug) { ++ static const char recursion_msg[] = ++ "BUG: recent printk recursion!"; ++ + recursion_bug = 0; +- strcpy(printk_buf, recursion_bug_msg); +- printed_len = strlen(recursion_bug_msg); +- } +- /* Emit the output into the temporary buffer */ +- printed_len += vscnprintf(printk_buf + printed_len, +- sizeof(printk_buf) - printed_len, fmt, args); +- +- p = printk_buf; +- +- /* Read log level and handle special printk prefix */ +- plen = log_prefix(p, ¤t_log_level, &special); +- if (plen) { +- p += plen; +- +- switch (special) { +- case 'c': /* Strip <c> KERN_CONT, continue line */ +- plen = 0; +- break; +- case 'd': /* Strip <d> KERN_DEFAULT, start new line */ +- plen = 0; +- default: +- if (!new_text_line) { +- emit_log_char('\n'); +- new_text_line = 1; +- } +- } ++ printed_len += strlen(recursion_msg); ++ /* emit KERN_CRIT message */ ++ log_store(0, 2, NULL, 0, recursion_msg, printed_len); + } + + /* +- * Copy the output into log_buf. If the caller didn't provide +- * the appropriate log prefix, we insert them here ++ * The printf needs to come first; we need the syslog ++ * prefix which might be passed-in as a parameter. + */ +- for (; *p; p++) { +- if (new_text_line) { +- new_text_line = 0; +- +- if (plen) { +- /* Copy original log prefix */ +- int i; +- +- for (i = 0; i < plen; i++) +- emit_log_char(printk_buf[i]); +- printed_len += plen; +- } else { +- /* Add log prefix */ +- emit_log_char('<'); +- emit_log_char(current_log_level + '0'); +- emit_log_char('>'); +- printed_len += 3; +- } ++ textlen = vscnprintf(text, sizeof(textbuf), fmt, args); + +- if (printk_time) { +- /* Add the current time stamp */ +- char tbuf[50], *tp; +- unsigned tlen; +- unsigned long long t; +- unsigned long nanosec_rem; +- +- t = cpu_clock(printk_cpu); +- nanosec_rem = do_div(t, 1000000000); +- tlen = sprintf(tbuf, "[%5lu.%06lu] ", +- (unsigned long) t, +- nanosec_rem / 1000); +- +- for (tp = tbuf; tp < tbuf + tlen; tp++) +- emit_log_char(*tp); +- printed_len += tlen; +- } ++ while (textlen && text[textlen-1] == '\n') { ++ /* mark and strip a trailing newline */ ++ textlen--; ++ newline = true; ++ } + +- if (!*p) +- break; ++ /* strip syslog prefix and extract log level or flags */ ++ if (text[0] == '<' && text[1] && text[2] == '>') { ++ switch (text[1]) { ++ case '0' ... '7': ++ if (level == -1) ++ level = text[1] - '0'; ++ text += 3; ++ textlen -= 3; ++ break; ++ case 'c': /* KERN_CONT */ ++ cont = true; ++ case 'd': /* KERN_DEFAULT */ ++ text += 3; ++ textlen -= 3; ++ break; + } ++ } + +- emit_log_char(*p); +- if (*p == '\n') +- new_text_line = 1; ++ if (buflen && (!cont || dict)) { ++ /* no continuation; flush existing buffer */ ++ log_store(facility, buflevel, NULL, 0, buf, buflen); ++ printed_len += buflen; ++ buflen = 0; ++ } ++ ++ if (buflen == 0) { ++ /* remember level for first message in the buffer */ ++ if (level == -1) ++ buflevel = default_message_loglevel; ++ else ++ buflevel = level; ++ } ++ ++ if (buflen || !newline) { ++ /* append to existing buffer, or buffer until next message */ ++ if (buflen + textlen > sizeof(buf)) ++ textlen = sizeof(buf) - buflen; ++ memcpy(buf + buflen, text, textlen); ++ buflen += textlen; ++ } ++ ++ if (newline) { ++ /* end of line; flush buffer */ ++ if (buflen) { ++ log_store(facility, buflevel, ++ dict, dictlen, buf, buflen); ++ printed_len += buflen; ++ buflen = 0; ++ } else { ++ log_store(facility, buflevel, ++ dict, dictlen, text, textlen); ++ printed_len += textlen; ++ } + } + + /* +- * Try to acquire and then immediately release the +- * console semaphore. The release will do all the +- * actual magic (print out buffers, wake up klogd, +- * etc). ++ * Try to acquire and then immediately release the console semaphore. ++ * The release will print out buffers and wake up /dev/kmsg and syslog() ++ * users. + * +- * The console_trylock_for_printk() function +- * will release 'logbuf_lock' regardless of whether it +- * actually gets the semaphore or not. ++ * The console_trylock_for_printk() function will release 'logbuf_lock' ++ * regardless of whether it actually gets the console semaphore or not. + */ + if (console_trylock_for_printk(this_cpu)) + console_unlock(); +@@ -974,12 +1381,73 @@ out_restore_irqs: + + return printed_len; + } +-EXPORT_SYMBOL(printk); ++EXPORT_SYMBOL(vprintk_emit); ++ ++asmlinkage int vprintk(const char *fmt, va_list args) ++{ ++ return vprintk_emit(0, -1, NULL, 0, fmt, args); ++} + EXPORT_SYMBOL(vprintk); + ++asmlinkage int printk_emit(int facility, int level, ++ const char *dict, size_t dictlen, ++ const char *fmt, ...) ++{ ++ va_list args; ++ int r; ++ ++ va_start(args, fmt); ++ r = vprintk_emit(facility, level, dict, dictlen, fmt, args); ++ va_end(args); ++ ++ return r; ++} ++EXPORT_SYMBOL(printk_emit); ++ ++/** ++ * printk - print a kernel message ++ * @fmt: format string ++ * ++ * This is printk(). It can be called from any context. We want it to work. ++ * ++ * We try to grab the console_lock. If we succeed, it's easy - we log the ++ * output and call the console drivers. If we fail to get the semaphore, we ++ * place the output into the log buffer and return. The current holder of ++ * the console_sem will notice the new output in console_unlock(); and will ++ * send it to the consoles before releasing the lock. ++ * ++ * One effect of this deferred printing is that code which calls printk() and ++ * then changes console_loglevel may break. This is because console_loglevel ++ * is inspected when the actual printing occurs. ++ * ++ * See also: ++ * printf(3) ++ * ++ * See the vsnprintf() documentation for format string extensions over C99. ++ */ ++asmlinkage int printk(const char *fmt, ...) ++{ ++ va_list args; ++ int r; ++ ++#ifdef CONFIG_KGDB_KDB ++ if (unlikely(kdb_trap_printk)) { ++ va_start(args, fmt); ++ r = vkdb_printf(fmt, args); ++ va_end(args); ++ return r; ++ } ++#endif ++ va_start(args, fmt); ++ r = vprintk_emit(0, -1, NULL, 0, fmt, args); ++ va_end(args); ++ ++ return r; ++} ++EXPORT_SYMBOL(printk); + #else + +-static void call_console_drivers(unsigned start, unsigned end) ++static void call_console_drivers(int level, const char *text, size_t len) + { + } + +@@ -1217,7 +1685,7 @@ int is_console_locked(void) + } + + /* +- * Delayed printk facility, for scheduler-internal messages: ++ * Delayed printk version, for scheduler-internal messages: + */ + #define PRINTK_BUF_SIZE 512 + +@@ -1253,6 +1721,10 @@ void wake_up_klogd(void) + this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); + } + ++/* the next printk record to write to the console */ ++static u64 console_seq; ++static u32 console_idx; ++ + /** + * console_unlock - unlock the console system + * +@@ -1263,15 +1735,16 @@ void wake_up_klogd(void) + * by printk(). If this is the case, console_unlock(); emits + * the output prior to releasing the lock. + * +- * If there is output waiting for klogd, we wake it up. ++ * If there is output waiting, we wake it /dev/kmsg and syslog() users. + * + * console_unlock(); may be called from any context. + */ + void console_unlock(void) + { ++ static u64 seen_seq; + unsigned long flags; +- unsigned _con_start, _log_end; +- unsigned wake_klogd = 0, retry = 0; ++ bool wake_klogd = false; ++ bool retry; + + if (console_suspended) { + up(&console_sem); +@@ -1281,17 +1754,40 @@ void console_unlock(void) + console_may_schedule = 0; + + again: +- for ( ; ; ) { ++ for (;;) { ++ struct log *msg; ++ static char text[1024]; ++ size_t len; ++ int level; ++ + raw_spin_lock_irqsave(&logbuf_lock, flags); +- wake_klogd |= log_start - log_end; +- if (con_start == log_end) +- break; /* Nothing to print */ +- _con_start = con_start; +- _log_end = log_end; +- con_start = log_end; /* Flush */ ++ if (seen_seq != log_next_seq) { ++ wake_klogd = true; ++ seen_seq = log_next_seq; ++ } ++ ++ if (console_seq == log_next_seq) ++ break; ++ ++ if (console_seq < log_first_seq) { ++ /* messages are gone, move to first one */ ++ console_seq = log_first_seq; ++ console_idx = log_first_idx; ++ } ++ msg = log_from_idx(console_idx); ++ level = msg->level; ++ len = msg->text_len; ++ if (len+1 >= sizeof(text)) ++ len = sizeof(text)-1; ++ memcpy(text, log_text(msg), len); ++ text[len++] = '\n'; ++ ++ console_idx = log_next(console_idx); ++ console_seq++; + raw_spin_unlock(&logbuf_lock); ++ + stop_critical_timings(); /* don't trace print latency */ +- call_console_drivers(_con_start, _log_end); ++ call_console_drivers(level, text, len); + start_critical_timings(); + local_irq_restore(flags); + } +@@ -1312,8 +1808,7 @@ again: + * flush, no worries. + */ + raw_spin_lock(&logbuf_lock); +- if (con_start != log_end) +- retry = 1; ++ retry = console_seq != log_next_seq; + raw_spin_unlock_irqrestore(&logbuf_lock, flags); + + if (retry && console_trylock()) +@@ -1549,7 +2044,8 @@ void register_console(struct console *ne + * for us. + */ + raw_spin_lock_irqsave(&logbuf_lock, flags); +- con_start = log_start; ++ console_seq = syslog_seq; ++ console_idx = syslog_idx; + raw_spin_unlock_irqrestore(&logbuf_lock, flags); + /* + * We're about to replay the log buffer. Only do this to the +@@ -1758,6 +2254,9 @@ int kmsg_dump_unregister(struct kmsg_dum + } + EXPORT_SYMBOL_GPL(kmsg_dump_unregister); + ++static bool always_kmsg_dump; ++module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR); ++ + /** + * kmsg_dump - dump kernel log to kernel message dumpers. + * @reason: the reason (oops, panic etc) for dumping +@@ -1767,8 +2266,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_unregister); + */ + void kmsg_dump(enum kmsg_dump_reason reason) + { +- unsigned long end; +- unsigned chars; ++ u64 idx; + struct kmsg_dumper *dumper; + const char *s1, *s2; + unsigned long l1, l2; +@@ -1780,24 +2278,27 @@ void kmsg_dump(enum kmsg_dump_reason rea + /* Theoretically, the log could move on after we do this, but + there's not a lot we can do about that. The new messages + will overwrite the start of what we dump. */ +- raw_spin_lock_irqsave(&logbuf_lock, flags); +- end = log_end & LOG_BUF_MASK; +- chars = logged_chars; +- raw_spin_unlock_irqrestore(&logbuf_lock, flags); + +- if (chars > end) { +- s1 = log_buf + log_buf_len - chars + end; +- l1 = chars - end; ++ raw_spin_lock_irqsave(&logbuf_lock, flags); ++ if (syslog_seq < log_first_seq) ++ idx = syslog_idx; ++ else ++ idx = log_first_idx; ++ ++ if (idx > log_next_idx) { ++ s1 = log_buf; ++ l1 = log_next_idx; + +- s2 = log_buf; +- l2 = end; ++ s2 = log_buf + idx; ++ l2 = log_buf_len - idx; + } else { + s1 = ""; + l1 = 0; + +- s2 = log_buf + end - chars; +- l2 = chars; ++ s2 = log_buf + idx; ++ l2 = log_next_idx - idx; + } ++ raw_spin_unlock_irqrestore(&logbuf_lock, flags); + + rcu_read_lock(); + list_for_each_entry_rcu(dumper, &dump_list, list) @@ -1,2 +1 @@ -prctl-child_reaper.patch -udlfb.patch +printk.patch diff --git a/udlfb.patch b/udlfb.patch deleted file mode 100644 index ddcb393..0000000 --- a/udlfb.patch +++ /dev/null @@ -1,94 +0,0 @@ -From: Kay Sievers <kay.sievers@vrfy.org> -Subject: udlfb: remove sysfs framebuffer device with USB .disconnect() - -The USB graphics card driver delays the unregistering of the framebuffer -device to a workqueue, which breaks the userspace visible remove event -sequence. - -The framebuffer device is a direct child of the USB interface which is -removed immedeately after the USB .disconnect() callback. But the fb device -in /sys stays around until its final cleanup, at a time where all the parent -devices have been removed already. - -To work around that, we remove the sysfs fb device directly in the USB -.disconnect() callback and leave only the cleanup of the internal fb -data to the delayed work. - -Before: - add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2 (usb) - add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0 (usb) - add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0/graphics/fb0 (graphics) - remove /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0 (usb) - remove /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2 (usb) - remove /2-1.2:1.0/graphics/fb0 (graphics) - -After: - add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2 (usb) - add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0 (usb) - add /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0/graphics/fb1 (graphics) - remove /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0/graphics/fb1 (graphics) - remove /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2/2-1.2:1.0 (usb) - remove /devices/pci0000:00/0000:00:1d.0/usb2/2-1/2-1.2 (usb) - -Signed-off-by: Kay Sievers <kay.sievers@vrfy.org> ---- - drivers/video/fbmem.c | 18 +++++++++++++++++- - drivers/video/udlfb.c | 2 +- - include/linux/fb.h | 1 + - 3 files changed, 19 insertions(+), 2 deletions(-) - ---- a/drivers/video/fbmem.c -+++ b/drivers/video/fbmem.c -@@ -1672,7 +1672,7 @@ static int do_unregister_framebuffer(str - registered_fb[i] = NULL; - num_registered_fb--; - fb_cleanup_device(fb_info); -- device_destroy(fb_class, MKDEV(FB_MAJOR, i)); -+ unlink_framebuffer(fb_info); - event.info = fb_info; - fb_notifier_call_chain(FB_EVENT_FB_UNREGISTERED, &event); - -@@ -1681,6 +1681,22 @@ static int do_unregister_framebuffer(str - return 0; - } - -+int unlink_framebuffer(struct fb_info *fb_info) -+{ -+ int i; -+ -+ i = fb_info->node; -+ if (i < 0 || i >= FB_MAX || registered_fb[i] != fb_info) -+ return -EINVAL; -+ -+ if (fb_info->dev) { -+ device_destroy(fb_class, MKDEV(FB_MAJOR, i)); -+ fb_info->dev = NULL; -+ } -+ return 0; -+} -+EXPORT_SYMBOL(unlink_framebuffer); -+ - void remove_conflicting_framebuffers(struct apertures_struct *a, - const char *name, bool primary) - { ---- a/drivers/video/udlfb.c -+++ b/drivers/video/udlfb.c -@@ -1739,7 +1739,7 @@ static void dlfb_usb_disconnect(struct u - for (i = 0; i < ARRAY_SIZE(fb_device_attrs); i++) - device_remove_file(info->dev, &fb_device_attrs[i]); - device_remove_bin_file(info->dev, &edid_attr); -- -+ unlink_framebuffer(info); - usb_set_intfdata(interface, NULL); - - /* if clients still have us open, will be freed on last close */ ---- a/include/linux/fb.h -+++ b/include/linux/fb.h -@@ -1003,6 +1003,7 @@ extern ssize_t fb_sys_write(struct fb_in - /* drivers/video/fbmem.c */ - extern int register_framebuffer(struct fb_info *fb_info); - extern int unregister_framebuffer(struct fb_info *fb_info); -+extern int unlink_framebuffer(struct fb_info *fb_info); - extern void remove_conflicting_framebuffers(struct apertures_struct *a, - const char *name, bool primary); - extern int fb_prepare_logo(struct fb_info *fb_info, int rotate); |