diff options
author | Daniel Bristot de Oliveira <bristot@redhat.com> | 2020-10-12 11:19:00 +0200 |
---|---|---|
committer | Daniel Bristot de Oliveira <bristot@redhat.com> | 2020-10-12 11:19:00 +0200 |
commit | 741b46474cdbdbacf14c63361652835ed0da9358 (patch) | |
tree | a3c5be331c8c22fc4c898202ca32faf761afc272 | |
parent | 1d3d9329a99b8047ea0886df370de6ddf4c7f5c9 (diff) | |
download | stalld-741b46474cdbdbacf14c63361652835ed0da9358.tar.gz |
stalld: Do not die if sched_debug returns an invalid value
Instead of dying when an invalid value of nr of tasks is read from
sched_debug, warn and return an error to main.
Man will let the main try again.
Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
-rw-r--r-- | src/stalld.c | 68 |
1 files changed, 57 insertions, 11 deletions
diff --git a/src/stalld.c b/src/stalld.c index 6d7746c..0f80369 100644 --- a/src/stalld.c +++ b/src/stalld.c @@ -148,7 +148,6 @@ int boost_policy; /* * print any error messages and exit */ - void die(const char *fmt, ...) { va_list ap; @@ -169,6 +168,25 @@ void die(const char *fmt, ...) } /* + * printy the error messages and but do not exit. + */ +void warn(const char *fmt, ...) +{ + va_list ap; + + if (errno) + perror("stalld: "); + + va_start(ap, fmt); + fprintf(stderr, " "); + vfprintf(stderr, fmt, ap); + va_end(ap); + + fprintf(stderr, "\n"); +} + + +/* * path to file for storing daemon pid */ char pidfile[MAXPATHLEN]; @@ -506,8 +524,10 @@ long get_long_from_str(char *start) errno = 0; value = strtol(start, &end, 10); - if (errno || start == end) - die("Invalid ID '%s'", value); + if (errno || start == end) { + warn("Invalid ID '%s'", value); + return -1; + } return value; } @@ -691,12 +711,22 @@ int parse_cpu_info(struct cpu_info *cpu_info, char *buffer, int buffer_size) struct task_info *old_tasks = cpu_info->starving; int nr_old_tasks = cpu_info->nr_waiting_tasks; + long nr_running, nr_rt_running; int cpu = cpu_info->id; char *cpu_buffer; + int retval = 0; cpu_buffer = alloc_and_fill_cpu_buffer(cpu, buffer, buffer_size); if (!cpu_buffer) - return -1; + return -ENOMEM; + + nr_running = get_variable_long_value(cpu_buffer, ".nr_running"); + nr_rt_running = get_variable_long_value(cpu_buffer, ".rt_nr_running"); + + if ((nr_running == -1) || (nr_rt_running == -1)) { + retval = -EINVAL; + goto out_free; + } cpu_info->nr_running = get_variable_long_value(cpu_buffer, ".nr_running"); cpu_info->nr_rt_running = get_variable_long_value(cpu_buffer, ".rt_nr_running"); @@ -708,9 +738,10 @@ int parse_cpu_info(struct cpu_info *cpu_info, char *buffer, int buffer_size) free(old_tasks); } +out_free: free(cpu_buffer); - return 0; + return retval; } int get_current_policy(int pid, struct sched_attr *attr) @@ -1169,10 +1200,18 @@ void *cpu_main(void *data) while (cpu->thread_running) { retval = read_sched_debug(buffer, BUFFER_SIZE); - if(!retval) - die("fail reading sched debug file!"); + if(!retval) { + warn("fail reading sched debug file"); + warn("Dazed and confused, but trying to continue"); + continue; + } - parse_cpu_info(cpu, buffer, BUFFER_SIZE); + retval = parse_cpu_info(cpu, buffer, BUFFER_SIZE); + if (retval) { + warn("error parsing CPU info"); + warn("Dazed and confused, but trying to continue"); + continue; + } if (config_verbose) print_waiting_tasks(cpu); @@ -1248,8 +1287,10 @@ int conservative_main(struct cpu_info *cpus, int nr_cpus) while (1) { retval = read_sched_debug(buffer, BUFFER_SIZE); - if(!retval) - die("fail reading sched debug file!"); + if(!retval) { + warn("Dazed and confused, but trying to continue"); + continue; + } for (i = 0; i < nr_cpus; i++) { if (!should_monitor(i)) @@ -1260,7 +1301,12 @@ int conservative_main(struct cpu_info *cpus, int nr_cpus) if (cpu->thread_running) continue; - parse_cpu_info(cpu, buffer, BUFFER_SIZE); + retval = parse_cpu_info(cpu, buffer, BUFFER_SIZE); + if (retval) { + warn("error parsing CPU info"); + warn("Dazed and confused, but trying to continue"); + continue; + } if (config_verbose) printf("\tchecking cpu %d - rt: %d - starving: %d\n", |