aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@fusionio.com>2012-08-15 16:10:55 -0400
committerChris Mason <chris.mason@fusionio.com>2012-08-15 16:10:55 -0400
commit9e066e233bbe8cff186975681d35023f9e6f1b02 (patch)
tree08c0924393423a2bf105d77c53d2524a5fbb0f85
downloadblktrace-9e066e233bbe8cff186975681d35023f9e6f1b02.tar.gz
iowatcher: Initial revision
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
-rw-r--r--iowatcher/COPYING341
-rw-r--r--iowatcher/Makefile37
-rw-r--r--iowatcher/README62
-rw-r--r--iowatcher/blkparse.c690
-rw-r--r--iowatcher/blkparse.h73
-rw-r--r--iowatcher/list.h449
-rw-r--r--iowatcher/main.c687
-rw-r--r--iowatcher/plot.c710
-rw-r--r--iowatcher/plot.h115
-rw-r--r--iowatcher/tracers.c164
-rw-r--r--iowatcher/tracers.h27
11 files changed, 3355 insertions, 0 deletions
diff --git a/iowatcher/COPYING b/iowatcher/COPYING
new file mode 100644
index 0000000..10828e0
--- /dev/null
+++ b/iowatcher/COPYING
@@ -0,0 +1,341 @@
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/iowatcher/Makefile b/iowatcher/Makefile
new file mode 100644
index 0000000..f15e5b2
--- /dev/null
+++ b/iowatcher/Makefile
@@ -0,0 +1,37 @@
+C = gcc
+CFLAGS = -Wall -O0 -g -W
+ALL_CFLAGS = $(CFLAGS) -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
+
+PROGS = iowatcher
+INSTALL = install
+prefix = /usr/local
+bindir = $(prefix)/bin
+
+export prefix INSTALL
+
+ALL = $(PROGS)
+
+$(PROGS): | depend
+
+all: $(ALL)
+
+%.o: %.c
+ $(CC) -o $*.o -c $(ALL_CFLAGS) $<
+
+iowatcher: blkparse.o plot.o main.o tracers.o
+ $(CC) $(ALL_CFLAGS) -o $@ $(filter %.o,$^) -lm
+
+depend:
+ @$(CC) -MM $(ALL_CFLAGS) *.c 1> .depend
+
+clean:
+ -rm -f *.o $(PROGS) .depend
+
+install: all
+ $(INSTALL) -m 755 -d $(DESTDIR)$(bindir)
+ $(INSTALL) -m 755 $(ALL) $(DESTDIR)$(bindir)
+
+ifneq ($(wildcard .depend),)
+include .depend
+endif
+
diff --git a/iowatcher/README b/iowatcher/README
new file mode 100644
index 0000000..dfc328a
--- /dev/null
+++ b/iowatcher/README
@@ -0,0 +1,62 @@
+iowatcher graphs the results of a blktrace run. It has a few different modes:
+
+* Graph the result of an existing blktrace
+
+* Start a new blktrace
+
+* Start a new blktrace and a benchmark run
+
+The basic options:
+
+ -d controls which device you are string. You can only trace one device
+ at a time for now. It is sent directly to blktrace
+
+ -t controls the name of the blktrace file. iowatcher uses a dump from
+ blkparse, so -t tries to guess the name of the corresponding
+ per CPU blktrace data files if the dump file doesn't already exist.
+
+ If you want more than one trace in a given graph, you can specify
+ -t more than once.
+
+ -l Sets a label for a trace file. The labels are added in the same
+ order the trace files are added.
+
+ -T Set a title for the graph. This goes at the top of the image.
+
+ -o output filename. The default is trace.svg. iowatcher is
+ only able to create svg for now.
+
+ -r control the duration in seconds for the rolling average.
+ iowatcher tries to smooth out bumpy graphs by averaging the
+ current second with seconds from the past. Longer numbers here
+ give you flatter graphs.
+
+ -O add a single graph to the output. By default all the graphs
+ are included, but with -O you get only the graphs you ask for.
+ -O may be used more than once.
+
+ -N remove a single graph from the output. This may also be used more
+ than once.
+
+ Choices for -O and -N are:
+ io, tput, latency, queue_depth, iops
+
+Examples:
+
+ # generate graph from the existing trace.dump
+ iowatcher -t trace.dump -o trace.svg
+
+ # skip the IO graph
+ iowatcher -t trace.dump -o trace.svg -N io
+
+ # only graph tput and latency
+ iowatcher -t trace.dump -o trace.svg -O tput -O latency
+
+ # generate a graph from two runs, and label them
+ iowatcher -t ext4.dump -t xfs.dump -l Ext4 -l XFS -o trace.svg
+
+ # Run a fio benchmark and store the trace in trace.dump
+ # add a title to the top. Use /dev/sda for blktrace
+ iowatcher -d /dev/sda -t trace.dump -T 'Fio Benchmark' -p 'fio some_job_file'
+
+Please email chris.mason@fusionio.com with any questions
diff --git a/iowatcher/blkparse.c b/iowatcher/blkparse.c
new file mode 100644
index 0000000..a587d54
--- /dev/null
+++ b/iowatcher/blkparse.c
@@ -0,0 +1,690 @@
+/*
+ * Copyright (C) 2012 Fusion-io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Parts of this file were imported from Jens Axboe's blktrace sources (also GPL)
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <inttypes.h>
+#include <string.h>
+#include <asm/types.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <math.h>
+
+#include "plot.h"
+#include "blkparse.h"
+#include "list.h"
+#include "tracers.h"
+
+#define IO_HASH_TABLE_BITS 11
+#define IO_HASH_TABLE_SIZE (1 << IO_HASH_TABLE_BITS)
+static struct list_head io_hash_table[IO_HASH_TABLE_SIZE];
+static u64 ios_in_flight = 0;
+
+
+/*
+ * Trace categories
+ */
+enum {
+ BLK_TC_READ = 1 << 0, /* reads */
+ BLK_TC_WRITE = 1 << 1, /* writes */
+ BLK_TC_FLUSH = 1 << 2, /* flush */
+ BLK_TC_SYNC = 1 << 3, /* sync */
+ BLK_TC_QUEUE = 1 << 4, /* queueing/merging */
+ BLK_TC_REQUEUE = 1 << 5, /* requeueing */
+ BLK_TC_ISSUE = 1 << 6, /* issue */
+ BLK_TC_COMPLETE = 1 << 7, /* completions */
+ BLK_TC_FS = 1 << 8, /* fs requests */
+ BLK_TC_PC = 1 << 9, /* pc requests */
+ BLK_TC_NOTIFY = 1 << 10, /* special message */
+ BLK_TC_AHEAD = 1 << 11, /* readahead */
+ BLK_TC_META = 1 << 12, /* metadata */
+ BLK_TC_DISCARD = 1 << 13, /* discard requests */
+ BLK_TC_DRV_DATA = 1 << 14, /* binary driver data */
+ BLK_TC_FUA = 1 << 15, /* fua requests */
+
+ BLK_TC_END = 1 << 15, /* we've run out of bits! */
+};
+
+#define BLK_TC_SHIFT (16)
+#define BLK_TC_ACT(act) ((act) << BLK_TC_SHIFT)
+#define BLK_DATADIR(a) (((a) >> BLK_TC_SHIFT) & (BLK_TC_READ | BLK_TC_WRITE))
+
+/*
+ * Basic trace actions
+ */
+enum {
+ __BLK_TA_QUEUE = 1, /* queued */
+ __BLK_TA_BACKMERGE, /* back merged to existing rq */
+ __BLK_TA_FRONTMERGE, /* front merge to existing rq */
+ __BLK_TA_GETRQ, /* allocated new request */
+ __BLK_TA_SLEEPRQ, /* sleeping on rq allocation */
+ __BLK_TA_REQUEUE, /* request requeued */
+ __BLK_TA_ISSUE, /* sent to driver */
+ __BLK_TA_COMPLETE, /* completed by driver */
+ __BLK_TA_PLUG, /* queue was plugged */
+ __BLK_TA_UNPLUG_IO, /* queue was unplugged by io */
+ __BLK_TA_UNPLUG_TIMER, /* queue was unplugged by timer */
+ __BLK_TA_INSERT, /* insert request */
+ __BLK_TA_SPLIT, /* bio was split */
+ __BLK_TA_BOUNCE, /* bio was bounced */
+ __BLK_TA_REMAP, /* bio was remapped */
+ __BLK_TA_ABORT, /* request aborted */
+ __BLK_TA_DRV_DATA, /* binary driver data */
+};
+
+/*
+ * Notify events.
+ */
+enum blktrace_notify {
+ __BLK_TN_PROCESS = 0, /* establish pid/name mapping */
+ __BLK_TN_TIMESTAMP, /* include system clock */
+ __BLK_TN_MESSAGE, /* Character string message */
+};
+
+/*
+ * Trace actions in full. Additionally, read or write is masked
+ */
+#define BLK_TA_QUEUE (__BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_BACKMERGE (__BLK_TA_BACKMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_FRONTMERGE (__BLK_TA_FRONTMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_GETRQ (__BLK_TA_GETRQ | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_SLEEPRQ (__BLK_TA_SLEEPRQ | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_REQUEUE (__BLK_TA_REQUEUE | BLK_TC_ACT(BLK_TC_REQUEUE))
+#define BLK_TA_ISSUE (__BLK_TA_ISSUE | BLK_TC_ACT(BLK_TC_ISSUE))
+#define BLK_TA_COMPLETE (__BLK_TA_COMPLETE| BLK_TC_ACT(BLK_TC_COMPLETE))
+#define BLK_TA_PLUG (__BLK_TA_PLUG | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_UNPLUG_IO (__BLK_TA_UNPLUG_IO | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_UNPLUG_TIMER (__BLK_TA_UNPLUG_TIMER | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_INSERT (__BLK_TA_INSERT | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_SPLIT (__BLK_TA_SPLIT)
+#define BLK_TA_BOUNCE (__BLK_TA_BOUNCE)
+#define BLK_TA_REMAP (__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_ABORT (__BLK_TA_ABORT | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_DRV_DATA (__BLK_TA_DRV_DATA | BLK_TC_ACT(BLK_TC_DRV_DATA))
+
+#define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
+#define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
+#define BLK_TN_MESSAGE (__BLK_TN_MESSAGE | BLK_TC_ACT(BLK_TC_NOTIFY))
+
+#define BLK_IO_TRACE_MAGIC 0x65617400
+#define BLK_IO_TRACE_VERSION 0x07
+/*
+ * The trace itself
+ */
+struct blk_io_trace {
+ __u32 magic; /* MAGIC << 8 | version */
+ __u32 sequence; /* event number */
+ __u64 time; /* in nanoseconds */
+ __u64 sector; /* disk offset */
+ __u32 bytes; /* transfer length */
+ __u32 action; /* what happened */
+ __u32 pid; /* who did it */
+ __u32 device; /* device identifier (dev_t) */
+ __u32 cpu; /* on what cpu did it happen */
+ __u16 error; /* completion error */
+ __u16 pdu_len; /* length of data after this trace */
+};
+
+struct pending_io {
+ /* sector offset of this IO */
+ u64 sector;
+
+ /* time this IO was dispatched */
+ u64 dispatch_time;
+ /* time this IO was finished */
+ u64 completion_time;
+ struct list_head hash_list;
+};
+
+#define MINORBITS 20
+#define MINORMASK ((1 << MINORBITS) - 1)
+#define SECONDS(x) ((unsigned long long)(x) / 1000000000)
+#define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000)
+#define DOUBLE_TO_NANO_ULL(d) ((unsigned long long)((d) * 1000000000))
+#define CHECK_MAGIC(t) (((t)->magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
+
+void init_io_hash_table(void)
+{
+ int i;
+ struct list_head *head;
+
+ for (i = 0; i < IO_HASH_TABLE_SIZE; i++) {
+ head = io_hash_table + i;
+ INIT_LIST_HEAD(head);
+ }
+}
+
+/* taken from the kernel hash.h */
+static inline u64 hash_sector(u64 val)
+{
+ u64 hash = val;
+
+ /* Sigh, gcc can't optimise this alone like it does for 32 bits. */
+ u64 n = hash;
+ n <<= 18;
+ hash -= n;
+ n <<= 33;
+ hash -= n;
+ n <<= 3;
+ hash += n;
+ n <<= 3;
+ hash -= n;
+ n <<= 4;
+ hash += n;
+ n <<= 2;
+ hash += n;
+
+ /* High bits are more random, so use them. */
+ return hash >> (64 - IO_HASH_TABLE_BITS);
+}
+
+static int hash_table_insert(struct pending_io *ins_pio)
+{
+ u64 sector = ins_pio->sector;
+ int slot = hash_sector(sector);
+ struct list_head *head;
+ struct pending_io *pio;
+
+ head = io_hash_table + slot;
+ list_for_each_entry(pio, head, hash_list) {
+ if (pio->sector == sector)
+ return -EEXIST;
+ }
+ list_add_tail(&ins_pio->hash_list, head);
+ return 0;
+}
+
+static struct pending_io *hash_table_search(u64 sector)
+{
+ int slot = hash_sector(sector);
+ struct list_head *head;
+ struct pending_io *pio;
+
+ head = io_hash_table + slot;
+ list_for_each_entry(pio, head, hash_list) {
+ if (pio->sector == sector)
+ return pio;
+ }
+ return NULL;
+}
+
+static int hash_dispatched_io(struct blk_io_trace *io)
+{
+ struct pending_io *pio;
+ int ret;
+
+ pio = calloc(1, sizeof(*pio));
+ pio->sector = io->sector;
+ pio->dispatch_time = io->time;
+
+ ret = hash_table_insert(pio);
+ if (ret == -EEXIST) {
+ /* crud, the IO isn't here */
+ free(pio);
+ }
+ return ret;
+}
+
+static struct pending_io *hash_completed_io(struct blk_io_trace *io)
+{
+ struct pending_io *pio;
+
+ pio = hash_table_search(io->sector);
+
+ if (!pio)
+ return NULL;
+ return pio;
+}
+
+static void handle_notify(struct trace *trace)
+{
+ struct blk_io_trace *io = trace->io;
+ void *payload = (char *)io + sizeof(*io);
+ u32 two32[2];
+
+
+ if (io->action != BLK_TN_TIMESTAMP)
+ return;
+
+ if (io->pdu_len != sizeof(two32))
+ return;
+
+ memcpy(two32, payload, sizeof(two32));
+ trace->start_timestamp = io->time;
+ trace->abs_start_time.tv_sec = two32[0];
+ trace->abs_start_time.tv_nsec = two32[1];
+ if (trace->abs_start_time.tv_nsec < 0) {
+ trace->abs_start_time.tv_sec--;
+ trace->abs_start_time.tv_nsec += 1000000000;
+ }
+}
+
+int next_record(struct trace *trace)
+{
+ int skip = trace->io->pdu_len;
+ u64 offset;
+
+ trace->cur += sizeof(*trace->io) + skip;
+ offset = trace->cur - trace->start;
+ if (offset >= trace->len)
+ return 1;
+
+ trace->io = (struct blk_io_trace *)trace->cur;
+ return 0;
+}
+
+void first_record(struct trace *trace)
+{
+ trace->cur = trace->start;
+ trace->io = (struct blk_io_trace *)trace->cur;
+}
+
+u64 find_last_time(struct trace *trace)
+{
+ char *p = trace->start + trace->len;
+ struct blk_io_trace *test;
+ int search_len = 0;
+ u64 found = 0;
+
+ if (trace->len < sizeof(*trace->io))
+ return 0;
+ p -= sizeof(*trace->io);
+ while (p >= trace->start) {
+ test = (struct blk_io_trace *)p;
+ if (CHECK_MAGIC(test) &&
+ !(test->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
+ u64 offset = p - trace->start;
+ if (offset + sizeof(*test) + test->pdu_len == trace->len) {
+ return test->time;
+ }
+ }
+ p--;
+ search_len++;
+ if (search_len > 8192) {
+ break;
+ }
+ }
+
+ /* searching backwards didn't work out, we'll have to scan the file */
+ first_record(trace);
+ while (1) {
+ if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY)))
+ found = trace->io->time;
+ if (next_record(trace))
+ break;
+ }
+ first_record(trace);
+ return found;
+}
+
+u64 find_highest_offset(struct trace *trace)
+{
+ u64 found = 0;
+ u64 max = 0;
+ first_record(trace);
+ while (1) {
+ if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
+ found = trace->io->sector << 9;
+ found += trace->io->bytes;
+
+ if (max < found) {
+ max = found;
+ }
+ }
+ if (next_record(trace))
+ break;
+ }
+ first_record(trace);
+ return max;
+}
+
+int filter_outliers(struct trace *trace, u64 max_offset,
+ u64 *yzoom_min, u64 *yzoom_max)
+{
+ int hits[11];
+ u64 max_per_bucket[11];
+ u64 bytes_per_bucket = max_offset / 10;
+ int slot;
+ int fat_count = 0;
+
+ memset(hits, 0, sizeof(int) * 11);
+ memset(max_per_bucket, 0, sizeof(u64) * 11);
+ first_record(trace);
+ while (1) {
+ if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
+ u64 top = (trace->io->sector << 9) + trace->io->bytes;
+ slot = (int)(top / bytes_per_bucket);
+ hits[slot]++;
+ if (top > max_per_bucket[slot])
+ max_per_bucket[slot] = top;
+ }
+ if (next_record(trace))
+ break;
+ }
+ first_record(trace);
+ for (slot = 0; slot < 11; slot++) {
+ if (hits[slot] > fat_count) {
+ fat_count = hits[slot];
+ }
+ }
+
+ *yzoom_max = max_offset;
+ for (slot = 10; slot >= 0; slot--) {
+ double d = hits[slot];
+
+ if (d >= (double)fat_count * .05) {
+ *yzoom_max = max_per_bucket[slot];
+ break;
+ }
+ }
+
+ *yzoom_min = 0;
+ for (slot = 0; slot < 10; slot++) {
+ double d = hits[slot];
+
+ if (d >= (double)fat_count * .05) {
+ *yzoom_min = slot * bytes_per_bucket;
+ break;
+ }
+ }
+ return 0;
+}
+
+static char *find_trace_file(char *filename)
+{
+ int ret;
+ struct stat st;
+ char line[1024];
+ char *dot;
+ char *try;
+
+ ret = stat(filename, &st);
+ if (ret == 0)
+ return strdup(filename);
+
+ snprintf(line, 1024, "%s.%s", filename, "dump");
+ ret = stat(filename, &st);
+ if (ret == 0)
+ return strdup(line);
+
+ try = strdup(filename);
+ dot = strrchr(try, '.');
+ if (!dot || strcmp(".dump", dot) != 0) {
+ if (dot)
+ *dot = '\0';
+ snprintf(line, 1024, "%s%s", try, ".blktrace.0");
+ ret = stat(line, &st);
+ if (ret == 0) {
+ blktrace_to_dump(try);
+ snprintf(line, 1024, "%s.%s", try, "dump");
+ ret = stat(line, &st);
+ if (ret == 0) {
+ free(try);
+ return strdup(line);
+ }
+ }
+ }
+ free(try);
+ return NULL;
+}
+struct trace *open_trace(char *filename)
+{
+ int fd;
+ char *p;
+ struct stat st;
+ int ret;
+ struct trace *trace;
+ char *found_filename;
+
+ trace = calloc(1, sizeof(*trace));
+ if (!trace) {
+ fprintf(stderr, "unable to allocate memory for trace\n");
+ return NULL;
+ }
+
+ found_filename = find_trace_file(filename);
+ if (!found_filename) {
+ fprintf(stderr, "Unable to find trace file %s\n", filename);
+ goto fail;
+ }
+ free(filename);
+ filename = found_filename;
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "Unable to open trace file %s err %s\n", filename, strerror(errno));
+ goto fail;
+ }
+ ret = fstat(fd, &st);
+ if (ret < 0) {
+ fprintf(stderr, "stat failed on %s err %s\n", filename, strerror(errno));
+ goto fail_fd;
+ }
+ p = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (p == MAP_FAILED) {
+ fprintf(stderr, "Unable to mmap trace file %s, err %s\n", filename, strerror(errno));
+ goto fail_fd;
+ }
+ trace->fd = fd;
+ trace->len = st.st_size;
+ trace->start = p;
+ trace->cur = p;
+ trace->io = (struct blk_io_trace *)p;
+ return trace;
+
+fail_fd:
+ close(fd);
+fail:
+ free(trace);
+ return NULL;
+}
+static inline int tput_event(struct trace *trace)
+{
+ if (trace->found_completion)
+ return __BLK_TA_COMPLETE;
+ if (trace->found_issue)
+ return __BLK_TA_ISSUE;
+ if (trace->found_queue)
+ return __BLK_TA_QUEUE;
+
+ return __BLK_TA_COMPLETE;
+}
+
+static inline int io_event(struct trace *trace)
+{
+ if (trace->found_queue)
+ return __BLK_TA_QUEUE;
+ if (trace->found_issue)
+ return __BLK_TA_ISSUE;
+ if (trace->found_completion)
+ return __BLK_TA_COMPLETE;
+
+ return __BLK_TA_COMPLETE;
+}
+
+void add_tput(struct trace *trace, struct graph_line_data *gld)
+{
+ struct blk_io_trace *io = trace->io;
+ int action = io->action & 0xffff;
+ int seconds;
+
+ if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
+ return;
+
+ if (action != tput_event(trace))
+ return;
+
+ seconds = SECONDS(io->time);
+ if (seconds > gld->seconds) {
+ fprintf(stderr, "Bad record %d %d %d\n", seconds, gld->seconds, action);
+ abort();
+ }
+
+ gld->data[seconds].sum += io->bytes;
+ gld->data[seconds].count = 1;
+ if (gld->data[seconds].sum > gld->max)
+ gld->max = gld->data[seconds].sum;
+}
+
+void add_io(struct trace *trace, struct graph_dot_data *gdd_writes,
+ struct graph_dot_data *gdd_reads)
+{
+ struct blk_io_trace *io = trace->io;
+ int action = io->action & 0xffff;
+ u64 offset;
+
+ if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
+ return;
+
+ if (action != io_event(trace))
+ return;
+
+ offset = io->sector << 9;
+
+ if (BLK_DATADIR(io->action) & BLK_TC_READ)
+ set_gdd_bit(gdd_reads, offset, io->bytes, io->time);
+ else if (BLK_DATADIR(io->action) & BLK_TC_WRITE)
+ set_gdd_bit(gdd_writes, offset, io->bytes, io->time);
+}
+
+void add_pending_io(struct trace *trace, struct graph_line_data *gld)
+{
+ int ret;
+ int seconds;
+ struct blk_io_trace *io = trace->io;
+ int action = io->action & 0xffff;
+ double avg;
+
+ if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
+ return;
+
+ if (action != __BLK_TA_ISSUE)
+ return;
+
+ seconds = SECONDS(io->time);
+ if (seconds > gld->seconds) {
+ fprintf(stderr, "Bad record %d %d\n", seconds, gld->seconds);
+ abort();
+ }
+
+ ret = hash_dispatched_io(trace->io);
+ if (ret)
+ return;
+
+ ios_in_flight++;
+
+ gld->data[seconds].sum += ios_in_flight;
+ gld->data[seconds].count++;
+
+ avg = (double)gld->data[seconds].sum / gld->data[seconds].count;
+ if (gld->max < (u64)avg) {
+ gld->max = avg;
+ }
+}
+
+void add_completed_io(struct trace *trace,
+ struct graph_line_data *latency_gld)
+{
+ struct blk_io_trace *io = trace->io;
+ int seconds;
+ int action = io->action & 0xffff;
+ struct pending_io *pio;
+ double avg;
+ u64 latency;
+
+ if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
+ return;
+
+ if (action != __BLK_TA_COMPLETE)
+ return;
+
+ seconds = SECONDS(io->time);
+
+ pio = hash_completed_io(trace->io);
+ if (!pio)
+ return;
+
+ if (ios_in_flight > 0)
+ ios_in_flight--;
+ if (io->time >= pio->dispatch_time) {
+ latency = io->time - pio->dispatch_time;
+ latency_gld->data[seconds].sum += latency;
+ latency_gld->data[seconds].count++;
+ }
+
+ list_del(&pio->hash_list);
+ free(pio);
+
+ avg = (double)latency_gld->data[seconds].sum /
+ latency_gld->data[seconds].count;
+ if (latency_gld->max < (u64)avg) {
+ latency_gld->max = avg;
+ }
+}
+
+void add_iop(struct trace *trace, struct graph_line_data *gld)
+{
+ struct blk_io_trace *io = trace->io;
+ int action = io->action & 0xffff;
+ int seconds;
+
+ if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
+ return;
+
+ /* iops and tput use the same events */
+ if (action != tput_event(trace))
+ return;
+
+ seconds = SECONDS(io->time);
+ if (seconds > gld->seconds) {
+ fprintf(stderr, "Bad record %d %d\n", seconds, gld->seconds);
+ abort();
+ }
+
+ gld->data[seconds].sum += 1;
+ gld->data[seconds].count = 1;
+ if (gld->data[seconds].sum > gld->max)
+ gld->max = gld->data[seconds].sum;
+}
+
+void check_record(struct trace *trace)
+{
+ struct blk_io_trace *io = trace->io;
+ int action = io->action & 0xffff;
+
+ if (!(io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
+ switch (action) {
+ case __BLK_TA_COMPLETE:
+ trace->found_completion = 1;
+ break;
+ case __BLK_TA_ISSUE:
+ trace->found_issue = 1;
+ break;
+ case __BLK_TA_QUEUE:
+ trace->found_queue = 1;
+ break;
+ };
+ }
+ handle_notify(trace);
+}
diff --git a/iowatcher/blkparse.h b/iowatcher/blkparse.h
new file mode 100644
index 0000000..3f32430
--- /dev/null
+++ b/iowatcher/blkparse.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2012 Fusion-io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Parts of this file were imported from Jens Axboe's blktrace sources (also GPL)
+ */
+#ifndef __IOWATCH_BLKPARSE__
+#define __IOWATCH_BLKPARSE__
+#define MINORBITS 20
+#define MINORMASK ((1 << MINORBITS) - 1)
+#define SECONDS(x) ((unsigned long long)(x) / 1000000000)
+#define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000)
+#define DOUBLE_TO_NANO_ULL(d) ((unsigned long long)((d) * 1000000000))
+#define CHECK_MAGIC(t) (((t)->magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
+
+struct trace {
+ int fd;
+ u64 len;
+ char *start;
+ char *cur;
+ struct blk_io_trace *io;
+ u64 start_timestamp;
+ struct timespec abs_start_time;
+
+ /*
+ * flags for the things we find in the stream
+ * we prefer different events for different things
+ */
+ int found_issue;
+ int found_completion;
+ int found_queue;
+};
+
+static inline unsigned int MAJOR(unsigned int dev)
+{
+ return dev >> MINORBITS;
+}
+
+static inline unsigned int MINOR(unsigned int dev)
+{
+ return dev & MINORMASK;
+}
+
+void init_io_hash_table(void);
+struct trace *open_trace(char *filename);
+u64 find_last_time(struct trace *trace);
+u64 find_highest_offset(struct trace *trace);
+int filter_outliers(struct trace *trace, u64 max_offset,
+ u64 *yzoom_min, u64 *yzoom_max);
+
+void add_iop(struct trace *trace, struct graph_line_data *gld);
+void check_record(struct trace *trace);
+void add_completed_io(struct trace *trace,
+ struct graph_line_data *latency_gld);
+void add_io(struct trace *trace, struct graph_dot_data *gdd_writes,
+ struct graph_dot_data *gdd_reads);
+void add_tput(struct trace *trace, struct graph_line_data *gld);
+void add_pending_io(struct trace *trace, struct graph_line_data *gld);
+int next_record(struct trace *trace);
+void first_record(struct trace *trace);
+#endif
diff --git a/iowatcher/list.h b/iowatcher/list.h
new file mode 100644
index 0000000..90c993e
--- /dev/null
+++ b/iowatcher/list.h
@@ -0,0 +1,449 @@
+/*
+ * Copyright (C) 2012 Fusion-io. All rights reserved.
+ *
+ * This header was taken from the Linux kernel
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+
+#define LIST_POISON1 ((void *) 0x00100100)
+#define LIST_POISON2 ((void *) 0x00200200)
+
+#undef offsetof
+#ifdef __compiler_offsetof
+#define offsetof(TYPE,MEMBER) __compiler_offsetof(TYPE,MEMBER)
+#else
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+ struct list_head name = LIST_HEAD_INIT(name)
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+ list->next = list;
+ list->prev = list;
+}
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+#ifndef CONFIG_DEBUG_LIST
+static inline void __list_add(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+#else
+extern void __list_add(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next);
+#endif
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+#ifndef CONFIG_DEBUG_LIST
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+#else
+extern void list_add(struct list_head *new, struct list_head *head);
+#endif
+
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+#ifndef CONFIG_DEBUG_LIST
+static inline void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ entry->next = LIST_POISON1;
+ entry->prev = LIST_POISON2;
+}
+#else
+extern void list_del(struct list_head *entry);
+#endif
+
+/**
+ * list_replace - replace old entry by new one
+ * @old : the element to be replaced
+ * @new : the new element to insert
+ * Note: if 'old' was empty, it will be overwritten.
+ */
+static inline void list_replace(struct list_head *old,
+ struct list_head *new)
+{
+ new->next = old->next;
+ new->next->prev = new;
+ new->prev = old->prev;
+ new->prev->next = new;
+}
+
+static inline void list_replace_init(struct list_head *old,
+ struct list_head *new)
+{
+ list_replace(old, new);
+ INIT_LIST_HEAD(old);
+}
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+ __list_del(list->prev, list->next);
+ list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+ struct list_head *head)
+{
+ __list_del(list->prev, list->next);
+ list_add_tail(list, head);
+}
+
+/**
+ * list_is_last - tests whether @list is the last entry in list @head
+ * @list: the entry to test
+ * @head: the head of the list
+ */
+static inline int list_is_last(const struct list_head *list,
+ const struct list_head *head)
+{
+ return list->next == head;
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(const struct list_head *head)
+{
+ return head->next == head;
+}
+
+/**
+ * list_empty_careful - tests whether a list is empty and not being modified
+ * @head: the list to test
+ *
+ * Description:
+ * tests whether a list is empty _and_ checks that no other CPU might be
+ * in the process of modifying either member (next or prev)
+ *
+ * NOTE: using list_empty_careful() without synchronization
+ * can only be safe if the only activity that can happen
+ * to the list entry is list_del_init(). Eg. it cannot be used
+ * if another CPU could re-list_add() it.
+ */
+static inline int list_empty_careful(const struct list_head *head)
+{
+ struct list_head *next = head->next;
+ return (next == head) && (next == head->prev);
+}
+
+static inline void __list_splice(struct list_head *list,
+ struct list_head *head)
+{
+ struct list_head *first = list->next;
+ struct list_head *last = list->prev;
+ struct list_head *at = head->next;
+
+ first->prev = head;
+ head->next = first;
+
+ last->next = at;
+ at->prev = last;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(struct list_head *list, struct list_head *head)
+{
+ if (!list_empty(list))
+ __list_splice(list, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+ struct list_head *head)
+{
+ if (!list_empty(list)) {
+ __list_splice(list, head);
+ INIT_LIST_HEAD(list);
+ }
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+/**
+ * list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop cursor.
+ * @head: the head for your list.
+ */
+#define list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); \
+ pos = pos->next)
+
+/**
+ * __list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop cursor.
+ * @head: the head for your list.
+ *
+ * This variant differs from list_for_each() in that it's the
+ * simplest possible list iteration code, no prefetching is done.
+ * Use this for code that knows the list to be very short (empty
+ * or 1 entry) most of the time.
+ */
+#define __list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); pos = pos->next)
+
+/**
+ * list_for_each_prev - iterate over a list backwards
+ * @pos: the &struct list_head to use as a loop cursor.
+ * @head: the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+ for (pos = (head)->prev; pos != (head); \
+ pos = pos->prev)
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos: the &struct list_head to use as a loop cursor.
+ * @n: another &struct list_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+
+/**
+ * list_for_each_entry - iterate over list of given type
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_reverse - iterate backwards over list of given type.
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_reverse(pos, head, member) \
+ for (pos = list_entry((head)->prev, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+/**
+ * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue
+ * @pos: the type * to use as a start point
+ * @head: the head of the list
+ * @member: the name of the list_struct within the struct.
+ *
+ * Prepares a pos entry for use as a start point in list_for_each_entry_continue.
+ */
+#define list_prepare_entry(pos, head, member) \
+ ((pos) ? : list_entry(head, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_continue - continue iteration over list of given type
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ *
+ * Continue to iterate over list of given type, continuing after
+ * the current position.
+ */
+#define list_for_each_entry_continue(pos, head, member) \
+ for (pos = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_from - iterate over list of given type from the current point
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type, continuing from current position.
+ */
+#define list_for_each_entry_from(pos, head, member) \
+ for (; &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos: the type * to use as a loop cursor.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ n = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_continue
+ * @pos: the type * to use as a loop cursor.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type, continuing after current point,
+ * safe against removal of list entry.
+ */
+#define list_for_each_entry_safe_continue(pos, n, head, member) \
+ for (pos = list_entry(pos->member.next, typeof(*pos), member), \
+ n = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_from
+ * @pos: the type * to use as a loop cursor.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type from current point, safe against
+ * removal of list entry.
+ */
+#define list_for_each_entry_safe_from(pos, n, head, member) \
+ for (n = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_reverse
+ * @pos: the type * to use as a loop cursor.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ *
+ * Iterate backwards over list of given type, safe against removal
+ * of list entry.
+ */
+#define list_for_each_entry_safe_reverse(pos, n, head, member) \
+ for (pos = list_entry((head)->prev, typeof(*pos), member), \
+ n = list_entry(pos->member.prev, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+
+#endif
diff --git a/iowatcher/main.c b/iowatcher/main.c
new file mode 100644
index 0000000..ded1afe
--- /dev/null
+++ b/iowatcher/main.c
@@ -0,0 +1,687 @@
+/*
+ * Copyright (C) 2012 Fusion-io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Parts of this file were imported from Jens Axboe's blktrace sources (also GPL)
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <inttypes.h>
+#include <string.h>
+#include <asm/types.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <math.h>
+#include <getopt.h>
+
+#include "plot.h"
+#include "blkparse.h"
+#include "list.h"
+#include "tracers.h"
+
+LIST_HEAD(all_traces);
+
+static int color_index = 0;
+char *colors[] = {
+ "blue", "darkgreen",
+ "red", "aqua",
+ "orange", "darkviolet",
+ "brown", "#00FF00",
+ "yellow", "coral",
+ "black", "darkred",
+ "fuchsia", "crimson",
+ NULL };
+
+char *pick_color(void) {
+ char *ret = colors[color_index];
+ if (!ret) {
+ color_index = 0;
+ ret = colors[color_index];
+ }
+ color_index++;
+ return ret;
+}
+
+char *pick_cpu_color(void) {
+ char *ret = colors[color_index];
+ if (!ret) {
+ color_index = 0;
+ ret = colors[color_index];
+ }
+ color_index++;
+ return ret;
+}
+
+enum {
+ IO_GRAPH_INDEX = 0,
+ TPUT_GRAPH_INDEX,
+ LATENCY_GRAPH_INDEX,
+ QUEUE_DEPTH_GRAPH_INDEX,
+ IOPS_GRAPH_INDEX,
+ TOTAL_GRAPHS
+};
+
+static char *graphs_by_name[] = {
+ "io",
+ "tput",
+ "latency",
+ "queue-depth",
+ "iops",
+};
+
+static int active_graphs[TOTAL_GRAPHS];
+static int last_active_graph = IOPS_GRAPH_INDEX;
+
+static int label_index = 0;
+static int num_traces = 0;
+static int longest_label = 0;
+
+struct trace_file {
+ struct list_head list;
+ char *filename;
+ char *label;
+ struct trace *trace;
+ int seconds;
+ int stop_seconds;
+ u64 max_offset;
+
+ char *read_color;
+ char *write_color;
+
+ struct graph_line_data *tput_gld;
+ struct graph_line_data *iop_gld;
+ struct graph_line_data *latency_gld;
+ struct graph_line_data *queue_depth_gld;
+ struct graph_dot_data *gdd_writes;
+ struct graph_dot_data *gdd_reads;
+};
+
+static void enable_all_graphs(void)
+{
+ int i;
+ for (i = 0; i < TOTAL_GRAPHS; i++)
+ active_graphs[i] = 1;
+}
+
+static void disable_all_graphs(void)
+{
+ int i;
+ for (i = 0; i < TOTAL_GRAPHS; i++)
+ active_graphs[i] = 0;
+}
+
+static int enable_one_graph(char *name)
+{
+ int i;
+ for (i = 0; i < TOTAL_GRAPHS; i++) {
+ if (strcmp(name, graphs_by_name[i]) == 0) {
+ active_graphs[i] = 1;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+static int disable_one_graph(char *name)
+{
+ int i;
+ for (i = 0; i < TOTAL_GRAPHS; i++) {
+ if (strcmp(name, graphs_by_name[i]) == 0) {
+ active_graphs[i] = 0;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+static int last_graph(void)
+{
+ int i;
+ for (i = TOTAL_GRAPHS - 1; i >= 0; i--) {
+ if (active_graphs[i]) {
+ return i;
+ }
+ }
+ return -ENOENT;
+}
+
+static void add_trace_file(char *filename)
+{
+ struct trace_file *tf;
+
+ tf = calloc(1, sizeof(*tf));
+ if (!tf) {
+ fprintf(stderr, "Unable to allocate memory\n");
+ exit(1);
+ }
+ tf->filename = strdup(filename);
+ list_add_tail(&tf->list, &all_traces);
+ tf->read_color = pick_color();
+ tf->write_color = pick_color();
+ num_traces++;
+}
+
+static void setup_trace_file_graphs(void)
+{
+ struct trace_file *tf;
+
+ list_for_each_entry(tf, &all_traces, list) {
+ tf->tput_gld = alloc_line_data(tf->seconds, tf->stop_seconds);
+ tf->latency_gld = alloc_line_data(tf->seconds, tf->stop_seconds);
+ tf->queue_depth_gld = alloc_line_data(tf->seconds, tf->stop_seconds);
+ tf->iop_gld = alloc_line_data(tf->seconds, tf->stop_seconds);
+ tf->gdd_writes = alloc_dot_data(tf->seconds, tf->max_offset, tf->stop_seconds);
+ tf->gdd_reads = alloc_dot_data(tf->seconds, tf->max_offset, tf->stop_seconds);
+ }
+}
+
+static void read_traces(void)
+{
+ struct trace_file *tf;
+ struct trace *trace;
+ u64 last_time;
+ u64 ymin;
+ u64 ymax;
+
+ list_for_each_entry(tf, &all_traces, list) {
+ trace = open_trace(tf->filename);
+ if (!trace)
+ exit(1);
+
+ last_time = find_last_time(trace);
+ tf->trace = trace;
+ tf->seconds = SECONDS(last_time);
+ tf->stop_seconds = SECONDS(last_time);
+ tf->max_offset = find_highest_offset(trace);
+
+ filter_outliers(trace, tf->max_offset, &ymin, &ymax);
+ tf->max_offset = ymax;
+ }
+}
+
+static void read_trace_events(void)
+{
+
+ struct trace_file *tf;
+ struct trace *trace;
+ int ret;
+
+ list_for_each_entry(tf, &all_traces, list) {
+ trace = tf->trace;
+ first_record(trace);
+ while (1) {
+ check_record(trace);
+ add_tput(trace, tf->tput_gld);
+ add_iop(trace, tf->iop_gld);
+ add_io(trace, tf->gdd_writes, tf->gdd_reads);
+ add_pending_io(trace, tf->queue_depth_gld);
+ add_completed_io(trace, tf->latency_gld);
+ ret = next_record(trace);
+ if (ret)
+ break;
+ }
+ }
+}
+
+static void set_trace_label(char *label)
+{
+ int cur = 0;
+ struct trace_file *tf;
+ int len = strlen(label);
+
+ if (len > longest_label)
+ longest_label = len;
+
+ list_for_each_entry(tf, &all_traces, list) {
+ if (cur == label_index) {
+ tf->label = strdup(label);
+ label_index++;
+ break;
+ }
+ cur++;
+ }
+}
+
+static char *graph_title = "";
+static char *output_filename = "trace.svg";
+static char *blktrace_device = NULL;
+static char *blktrace_outfile = "trace";
+static char *blktrace_dest_dir = ".";
+static char *program_to_run = NULL;
+
+static void set_blktrace_outfile(char *arg)
+{
+ char *s = strdup(arg);
+ char *last_dot = strrchr(s, '.');
+
+ if (last_dot) {
+ if (strcmp(last_dot, ".dump") == 0)
+ *last_dot = '\0';
+ }
+ blktrace_outfile = s;
+}
+
+
+char *option_string = "hT:t:o:l:r:O:N:d:p:";
+static struct option long_options[] = {
+ {"title", required_argument, 0, 'T'},
+ {"trace", required_argument, 0, 't'},
+ {"output", required_argument, 0, 'o'},
+ {"label", required_argument, 0, 'l'},
+ {"rolling", required_argument, 0, 'r'},
+ {"no-graph", required_argument, 0, 'N'},
+ {"only-graph", required_argument, 0, 'O'},
+ {"device", required_argument, 0, 'd'},
+ {"prog", required_argument, 0, 'p'},
+ {"help", required_argument, 0, 'h'},
+ {0, 0, 0, 0}
+};
+
+static void print_usage(void)
+{
+ fprintf(stderr, "iowatcher usage:\n"
+ "\t-d (--device): device for blktrace to trace\n"
+ "\t-t (--trace): trace file name (more than one allowed)\n"
+ "\t-l (--label): trace label in the graph\n"
+ "\t-o (--output): output file name (SVG only)\n"
+ "\t-p (--prog): program to run while blktrace is run\n"
+ "\t-r (--rolling): number of seconds in the rolling averge\n"
+ "\t-T (--title): graph title\n"
+ "\t-N (--no-graph): skip a single graph (io, tput, latency, queue_depth, iops)\n"
+ "\t-O (--only-graph): add a single graph (io, tput, latency, queue_depth, iops)\n"
+ );
+ exit(1);
+}
+
+static int parse_options(int ac, char **av)
+{
+ int c;
+ int disabled = 0;
+
+ while (1) {
+ // int this_option_optind = optind ? optind : 1;
+ int option_index = 0;
+
+ c = getopt_long(ac, av, option_string,
+ long_options, &option_index);
+
+ if (c == -1)
+ break;
+
+ switch(c) {
+ case 'h':
+ print_usage();
+ break;
+ case 'T':
+ graph_title = strdup(optarg);
+ break;
+ case 't':
+ add_trace_file(optarg);
+ set_blktrace_outfile(optarg);
+ break;
+ case 'o':
+ output_filename = strdup(optarg);
+ break;
+ case 'l':
+ set_trace_label(optarg);
+ break;
+ case 'r':
+ set_rolling_avg(atoi(optarg));
+ break;
+ case 'O':
+ if (!disabled) {
+ disable_all_graphs();
+ disabled = 1;
+ }
+ enable_one_graph(optarg);
+ break;
+ case 'N':
+ disable_one_graph(optarg);
+ break;
+ case 'd':
+ blktrace_device = strdup(optarg);
+ break;
+ case 'p':
+ program_to_run = strdup(optarg);
+ break;
+ case '?':
+ print_usage();
+ break;
+ default:
+ break;
+ }
+ }
+ return 0;
+}
+
+static void compare_max_tf(struct trace_file *tf, int *seconds, u64 *max_offset)
+{
+ if (tf->seconds > *seconds)
+ *seconds = tf->seconds;
+ if (tf->max_offset > *max_offset)
+ *max_offset = tf->max_offset;
+}
+
+static void set_all_max_tf(int seconds, u64 max_offset)
+{
+ struct trace_file *tf;
+
+ list_for_each_entry(tf, &all_traces, list) {
+ tf->seconds = seconds;
+ tf->max_offset = max_offset;
+ }
+}
+
+static void plot_io(struct plot *plot, int seconds, u64 max_offset)
+{
+ struct trace_file *tf;
+
+ if (active_graphs[IO_GRAPH_INDEX] == 0)
+ return;
+
+ plot->add_xlabel = last_active_graph == IO_GRAPH_INDEX;
+ setup_axis(plot);
+
+ svg_alloc_legend(plot, num_traces * 2);
+
+ set_plot_label(plot, "Device IO");
+ set_ylabel(plot, "Offset (MB)");
+ set_yticks(plot, 4, 0, max_offset / (1024 * 1024), "");
+ set_xticks(plot, 9, 0, seconds);
+
+ list_for_each_entry(tf, &all_traces, list) {
+ char *label = tf->label;
+
+ if (!label)
+ label = "";
+ svg_io_graph(plot, tf->gdd_reads, tf->read_color);
+ if (tf->gdd_reads->total_ios)
+ svg_add_legend(plot, label, " Reads", tf->read_color);
+
+ svg_io_graph(plot, tf->gdd_writes, tf->write_color);
+ if (tf->gdd_writes->total_ios) {
+ svg_add_legend(plot, label, " Writes", tf->write_color);
+ }
+ }
+ if (plot->add_xlabel)
+ set_xlabel(plot, "Time (seconds)");
+ svg_write_legend(plot);
+ close_plot(plot);
+}
+
+static void plot_tput(struct plot *plot, int seconds)
+{
+ struct trace_file *tf;
+ char *units;
+ char line[128];
+ u64 max = 0;
+
+ if (active_graphs[TPUT_GRAPH_INDEX] == 0)
+ return;
+
+ if (num_traces > 1)
+ svg_alloc_legend(plot, num_traces);
+ list_for_each_entry(tf, &all_traces, list) {
+ if (tf->tput_gld->max > max)
+ max = tf->tput_gld->max;
+ }
+ list_for_each_entry(tf, &all_traces, list)
+ tf->tput_gld->max = max;
+
+ plot->add_xlabel = last_active_graph == TPUT_GRAPH_INDEX;
+ setup_axis(plot);
+ set_plot_label(plot, "Throughput");
+
+ tf = list_entry(all_traces.next, struct trace_file, list);
+
+ scale_line_graph_bytes(&max, &units, 1024);
+ sprintf(line, "%sB/s", units);
+ set_ylabel(plot, line);
+ set_yticks(plot, 4, 0, max, "");
+ set_xticks(plot, 9, 0, seconds);
+
+ list_for_each_entry(tf, &all_traces, list) {
+ svg_line_graph(plot, tf->tput_gld, tf->read_color);
+ if (num_traces > 1)
+ svg_add_legend(plot, tf->label, "", tf->read_color);
+ }
+
+ if (plot->add_xlabel)
+ set_xlabel(plot, "Time (seconds)");
+ if (num_traces > 1)
+ svg_write_legend(plot);
+ close_plot(plot);
+}
+
+static void plot_latency(struct plot *plot, int seconds)
+{
+ struct trace_file *tf;
+ char *units;
+ char line[128];
+ u64 max = 0;
+
+ if (active_graphs[LATENCY_GRAPH_INDEX] == 0)
+ return;
+
+ if (num_traces > 1)
+ svg_alloc_legend(plot, num_traces);
+ list_for_each_entry(tf, &all_traces, list) {
+ if (tf->latency_gld->max > max)
+ max = tf->latency_gld->max;
+ }
+ list_for_each_entry(tf, &all_traces, list)
+ tf->latency_gld->max = max;
+
+ plot->add_xlabel = last_active_graph == TPUT_GRAPH_INDEX;
+ setup_axis(plot);
+ set_plot_label(plot, "IO Latency");
+
+ tf = list_entry(all_traces.next, struct trace_file, list);
+
+ scale_line_graph_time(&max, &units);
+ sprintf(line, "latency (%ss)", units);
+ set_ylabel(plot, line);
+ set_yticks(plot, 4, 0, max, "");
+ set_xticks(plot, 9, 0, seconds);
+
+ list_for_each_entry(tf, &all_traces, list) {
+ svg_line_graph(plot, tf->latency_gld, tf->read_color);
+ if (num_traces > 1)
+ svg_add_legend(plot, tf->label, "", tf->read_color);
+ }
+
+ if (plot->add_xlabel)
+ set_xlabel(plot, "Time (seconds)");
+ if (num_traces > 1)
+ svg_write_legend(plot);
+ close_plot(plot);
+}
+
+static void plot_queue_depth(struct plot *plot, int seconds)
+{
+ struct trace_file *tf;
+
+ if (active_graphs[QUEUE_DEPTH_GRAPH_INDEX] == 0)
+ return;
+
+ plot->add_xlabel = last_active_graph == QUEUE_DEPTH_GRAPH_INDEX;
+
+ setup_axis(plot);
+ set_plot_label(plot, "Queue Depth");
+ if (num_traces > 1)
+ svg_alloc_legend(plot, num_traces);
+
+ tf = list_entry(all_traces.next, struct trace_file, list);
+ set_ylabel(plot, "Pending IO");
+ set_yticks(plot, 4, 0, tf->queue_depth_gld->max, "");
+ set_xticks(plot, 9, 0, seconds);
+
+ list_for_each_entry(tf, &all_traces, list) {
+ svg_line_graph(plot, tf->queue_depth_gld, tf->read_color);
+ if (num_traces > 1)
+ svg_add_legend(plot, tf->label, "", tf->read_color);
+ }
+
+ if (plot->add_xlabel)
+ set_xlabel(plot, "Time (seconds)");
+ if (num_traces > 1)
+ svg_write_legend(plot);
+ close_plot(plot);
+}
+
+static void plot_iops(struct plot *plot, int seconds)
+{
+ struct trace_file *tf;
+ char *units;
+ u64 max = 0;
+
+ if (active_graphs[IOPS_GRAPH_INDEX] == 0)
+ return;
+
+ list_for_each_entry(tf, &all_traces, list) {
+ if (tf->iop_gld->max > max)
+ max = tf->iop_gld->max;
+ }
+
+ list_for_each_entry(tf, &all_traces, list)
+ tf->iop_gld->max = max;
+
+
+ plot->add_xlabel = last_active_graph == IOPS_GRAPH_INDEX;
+ setup_axis(plot);
+ set_plot_label(plot, "IOPs");
+ if (num_traces > 1)
+ svg_alloc_legend(plot, num_traces);
+
+ tf = list_entry(all_traces.next, struct trace_file, list);
+
+ scale_line_graph_bytes(&max, &units, 1000);
+ set_ylabel(plot, "IO/s");
+
+ set_yticks(plot, 4, 0, max, units);
+ set_xticks(plot, 9, 0, seconds);
+
+ list_for_each_entry(tf, &all_traces, list) {
+ svg_line_graph(plot, tf->iop_gld, tf->read_color);
+ if (num_traces > 1)
+ svg_add_legend(plot, tf->label, "", tf->read_color);
+ }
+
+ if (plot->add_xlabel)
+ set_xlabel(plot, "Time (seconds)");
+ if (num_traces > 1)
+ svg_write_legend(plot);
+
+ close_plot(plot);
+}
+
+int main(int ac, char **av)
+{
+ struct plot *plot;
+ int seconds = 0;
+ u64 max_offset = 0;
+ int fd;
+ struct trace_file *tf;
+ int ret;
+
+ init_io_hash_table();
+
+ enable_all_graphs();
+
+ parse_options(ac, av);
+
+ last_active_graph = last_graph();
+
+ if (list_empty(&all_traces)) {
+ fprintf(stderr, "No traces found, exiting\n");
+ exit(1);
+ }
+
+ if (blktrace_device) {
+ ret = start_blktrace(blktrace_device, blktrace_outfile,
+ blktrace_dest_dir);
+ if (ret) {
+ fprintf(stderr, "exiting due to blktrace failure\n");
+ exit(1);
+ }
+ if (program_to_run) {
+ ret = run_program(program_to_run);
+ if (ret) {
+ fprintf(stderr, "failed to run %s\n",
+ program_to_run);
+ exit(1);
+ }
+ wait_for_tracers();
+ blktrace_to_dump(blktrace_outfile);
+ } else {
+ /* no program specified, just wait for
+ * blktrace to exit
+ */
+ wait_for_tracers();
+ }
+ }
+
+ /* step one, read all the traces */
+ read_traces();
+
+ /* step two, find the maxes for time and offset */
+ list_for_each_entry(tf, &all_traces, list)
+ compare_max_tf(tf, &seconds, &max_offset);
+
+ /* push the max we found into all the tfs */
+ set_all_max_tf(seconds, max_offset);
+
+ /* alloc graphing structs for all the traces */
+ setup_trace_file_graphs();
+
+ /* run through all the traces and read their events */
+ read_trace_events();
+
+ fd = open(output_filename, O_CREAT | O_TRUNC | O_WRONLY, 0600);
+ if (fd < 0) {
+ fprintf(stderr, "Unable to open output file %s %s\n",
+ output_filename, strerror(errno));
+ exit(1);
+ }
+
+ write_svg_header(fd);
+ plot = alloc_plot(fd);
+
+ if (active_graphs[IO_GRAPH_INDEX])
+ set_legend_width(longest_label + strlen("writes"));
+ else if (num_traces > 1)
+ set_legend_width(longest_label);
+ else
+ set_legend_width(0);
+
+ set_plot_title(plot, graph_title);
+
+ plot_io(plot, seconds, max_offset);
+ plot_tput(plot, seconds);
+ plot_latency(plot, seconds);
+ plot_queue_depth(plot, seconds);
+ plot_iops(plot, seconds);
+
+ /* once for all */
+ close_plot(plot);
+ close(fd);
+ return 0;
+}
diff --git a/iowatcher/plot.c b/iowatcher/plot.c
new file mode 100644
index 0000000..785d230
--- /dev/null
+++ b/iowatcher/plot.c
@@ -0,0 +1,710 @@
+/*
+ * Copyright (C) 2012 Fusion-io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Parts of this file were imported from Jens Axboe's blktrace sources (also GPL)
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <inttypes.h>
+#include <string.h>
+#include <asm/types.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <math.h>
+
+#include "plot.h"
+
+static int graph_width = 600;
+static int graph_height = 150;
+static int graph_inner_margin = 2;
+static int graph_tick_len = 5;
+static int graph_left_pad = 120;
+static int tick_label_pad = 16;
+static int tick_font_size = 15;
+static char *font_family = "sans-serif";
+
+/* this is the title for the whole page */
+static int plot_title_height = 50;
+static int plot_title_font_size = 25;
+
+/* this is the label at the top of each plot */
+static int plot_label_height = 60;
+static int plot_label_font_size = 20;
+
+/* label for each axis is slightly smaller */
+static int axis_label_font_size = 16;
+
+int legend_x_off = 45;
+int legend_y_off = -10;
+int legend_font_size = 15;
+int legend_width = 80;
+
+static int line_len = 1024;
+static char line[1024];
+
+static int rolling_avg_secs = 0;
+
+struct graph_line_data *alloc_line_data(int seconds, int stop_seconds)
+{
+ int size = sizeof(struct graph_line_data) + (stop_seconds + 1) * sizeof(struct graph_line_pair);
+ struct graph_line_data *gld;
+
+ gld = calloc(1, size);
+ if (!gld) {
+ fprintf(stderr, "Unable to allocate memory for graph data\n");
+ exit(1);
+ }
+ gld->seconds = seconds;
+ gld->stop_seconds = stop_seconds;
+ return gld;
+}
+
+void free_line_data(struct graph_line_data *gld)
+{
+ free(gld->label);
+ free(gld);
+}
+
+struct graph_dot_data *alloc_dot_data(int seconds, u64 max_offset, int stop_seconds)
+{
+ int size;
+ int arr_size;
+ int rows = graph_height;
+ int cols = graph_width;
+ struct graph_dot_data *gdd;
+
+ size = sizeof(struct graph_dot_data);
+
+ /* the number of bits */
+ arr_size = (rows + 1) * cols;
+
+ /* the number of bytes */
+ arr_size /= 8;
+
+ gdd = calloc(1, size + arr_size);
+ if (!gdd) {
+ fprintf(stderr, "Unable to allocate memory for graph data\n");
+ exit(1);
+ }
+ gdd->seconds = seconds;
+ gdd->stop_seconds = stop_seconds;
+ gdd->rows = rows;
+ gdd->cols = cols;
+ gdd->max_offset = max_offset;
+ return gdd;
+}
+
+void free_dot_data(struct graph_dot_data *gdd)
+{
+ free(gdd);
+}
+
+void set_gdd_bit(struct graph_dot_data *gdd, u64 offset, int bytes, double time)
+{
+ double bytes_per_row = (double)gdd->max_offset / gdd->rows;
+ double secs_per_col = (double)gdd->seconds / gdd->cols;
+ double col;
+ double row;
+ int col_int;
+ int row_int;
+ int bit_index;
+ int arr_index;
+ int bit_mod;
+ int mod = (int)bytes_per_row;
+
+ if (offset > gdd->max_offset)
+ return;
+
+ gdd->total_ios++;
+ while (bytes > 0) {
+ time = time / 1000000000.0;
+ row = (double)offset / bytes_per_row;
+ col = time / secs_per_col;
+
+ col_int = floor(col);
+ row_int = floor(row);
+
+ bit_index = row_int * gdd->cols + col_int;
+ arr_index = bit_index / 8;
+ bit_mod = bit_index % 8;
+
+ gdd->data[arr_index] |= 1 << bit_mod;
+ offset += mod;
+ bytes -= mod;
+ }
+}
+
+void print_gdd(struct graph_dot_data *gdd)
+{
+ int col = 0;
+ int row = 0;
+ int arr_index;
+ u64 val;
+ int bit_index;
+ int bit_mod;
+
+ for (row = gdd->rows - 1; row >= 0; row--) {
+ for (col = 0; col < gdd->cols; col++) {
+ bit_index = row * gdd->cols + col;
+ arr_index = bit_index / sizeof(unsigned long);
+ bit_mod = bit_index % sizeof(unsigned long);
+
+ val = gdd->data[arr_index];
+ if (val & (1 << bit_mod))
+ printf("*");
+ else
+ printf(" ");
+ }
+ printf("\n");
+ }
+}
+
+static double rolling_avg(struct graph_line_pair *data, int index, int distance)
+{
+ double sum = 0;
+ int start;
+
+ if (distance < 0)
+ distance = 1;
+ if (distance > index) {
+ start = 0;
+ } else {
+ start = index - distance;
+ }
+ distance = 0;
+ while (start <= index) {
+ double avg;
+
+ if (data[start].count)
+ avg = ((double)data[start].sum) / data[start].count;
+ else
+ avg= 0;
+
+ sum += avg;
+ distance++;
+ start++;
+ }
+ return sum / distance;
+}
+
+void write_svg_header(int fd)
+{
+ char *header = "<svg xmlns=\"http://www.w3.org/2000/svg\"\nxmlns:xlink=\"http://www.w3.org/1999/xlink\">\n";
+ char *filter1 ="<filter id=\"shadow\">\n "
+ "<feOffset result=\"offOut\" in=\"SourceAlpha\" dx=\"4\" dy=\"4\" />\n "
+ "<feGaussianBlur result=\"blurOut\" in=\"offOut\" stdDeviation=\"2\" />\n "
+ "<feBlend in=\"SourceGraphic\" in2=\"blurOut\" mode=\"normal\" />\n "
+ "</filter>\n";
+ char *filter2 ="<filter id=\"textshadow\" x=\"0\" y=\"0\" width=\"200%\" height=\"200%\">\n "
+ "<feOffset result=\"offOut\" in=\"SourceAlpha\" dx=\"1\" dy=\"1\" />\n "
+ "<feGaussianBlur result=\"blurOut\" in=\"offOut\" stdDeviation=\"1.5\" />\n "
+ "<feBlend in=\"SourceGraphic\" in2=\"blurOut\" mode=\"normal\" />\n "
+ "</filter>\n";
+ char *filter3 ="<filter id=\"labelshadow\" x=\"0\" y=\"0\" width=\"200%\" height=\"200%\">\n "
+ "<feOffset result=\"offOut\" in=\"SourceGraphic\" dx=\"3\" dy=\"3\" />\n "
+ "<feColorMatrix result=\"matrixOut\" in=\"offOut\" type=\"matrix\" "
+ "values=\"0.2 0 0 0 0 0 0.2 0 0 0 0 0 0.2 0 0 0 0 0 1 0\" /> "
+ "<feGaussianBlur result=\"blurOut\" in=\"offOut\" stdDeviation=\"2\" />\n "
+ "<feBlend in=\"SourceGraphic\" in2=\"blurOut\" mode=\"normal\" />\n "
+ "</filter>\n";
+ char *defs_start = "<defs>\n";
+ char *defs_close = "</defs>\n";
+
+ write(fd, header, strlen(header));
+ write(fd, defs_start, strlen(defs_start));
+ write(fd, filter1, strlen(filter1));
+ write(fd, filter2, strlen(filter2));
+ write(fd, filter3, strlen(filter3));
+ write(fd, defs_close, strlen(defs_close));
+}
+
+void write_drop_shadow(struct plot *plot)
+{
+ snprintf(line, line_len, "<rect x=\"0\" y=\"%d\" width=\"%d\" height=\"%d\" fill=\"white\"/>\n",
+ plot->start_y_offset, plot->total_width, 45);
+ write(plot->fd, line, strlen(line));
+
+ snprintf(line, line_len, "<path d=\"M %d %d h %d v %d h %d t %d %d V %d H %d Z\" "
+ "fill=\"white\" filter=\"url(#shadow)\"/>",
+ 0, plot->start_y_offset,
+ plot->total_width - graph_left_pad / 2,
+ -plot->total_height, 24, 1, 1,
+ plot->start_y_offset + 10, 0);
+ write(plot->fd, line, strlen(line));
+
+ snprintf(line, line_len, "<path d=\"M %d %d H %d V %d h %d V %d H %d Z\" "
+ "fill=\"white\"/>",
+ 0, plot->start_y_offset - 15, /* start */
+ plot->total_width - graph_left_pad / 2 - 10, /* hline over */
+ plot->start_y_offset - plot->total_height, /* vline up */
+ 15, /*hline over */
+ plot->start_y_offset, /* vline back down */
+ 0);
+ write(plot->fd, line, strlen(line));
+
+ plot->start_y_offset += 45;
+}
+
+/* svg y offset for the traditional 0,0 (bottom left corner) of the plot */
+static int axis_y(void)
+{
+ return plot_label_height + graph_height + graph_inner_margin;
+}
+
+/* this gives you the correct pixel for a given offset from the bottom left y axis */
+static int axis_y_off(int y)
+{
+ return plot_label_height + graph_height - y;
+}
+
+/* svg x axis offset from 0 */
+static int axis_x(void)
+{
+ return graph_left_pad;
+}
+
+/* the correct pixel for a given X offset */
+static int axis_x_off(int x)
+{
+ return graph_left_pad + graph_inner_margin + x;
+}
+
+/*
+ * this draws a backing rectangle for the plot and it
+ * also creates a new svg element so our offsets can
+ * be relative to this one plot.
+ */
+void setup_axis(struct plot *plot)
+{
+ int ret;
+ int len;
+ int fd = plot->fd;
+ int bump_height = tick_font_size * 3 + axis_label_font_size;
+
+
+ plot->total_width = axis_x_off(graph_width) + graph_left_pad / 2 + legend_width;
+ plot->total_height = axis_y() + tick_label_pad + tick_font_size;
+
+ if (plot->add_xlabel)
+ plot->total_height += bump_height;
+
+ /* backing rect */
+ snprintf(line, line_len, "<rect x=\"0\" y=\"%d\" width=\"%d\" "
+ "height=\"%d\" fill=\"white\" stroke=\"none\"/>",
+ plot->start_y_offset, plot->total_width + 40,
+ plot->total_height + 20);
+ len = strlen(line);
+ write(fd, line, len);
+ snprintf(line, line_len, "<rect x=\"15\" y=\"%d\" width=\"%d\" "
+ "filter=\"url(#shadow)\" "
+ "height=\"%d\" fill=\"white\" stroke=\"none\"/>",
+ plot->start_y_offset, plot->total_width, plot->total_height);
+ len = strlen(line);
+ write(fd, line, len);
+ plot->total_height += 20;
+
+
+ /* create an svg object for all our coords to be relative against */
+ snprintf(line, line_len, "<svg x=\"%d\" y=\"%d\">\n", plot->start_x_offset, plot->start_y_offset);
+ write(fd, line, strlen(line));
+
+ snprintf(line, 1024, "<path d=\"M%d %d h %d V %d H %d Z\" stroke=\"black\" stroke-width=\"2\" fill=\"none\"/>\n",
+ axis_x(), axis_y(),
+ graph_width + graph_inner_margin * 2, axis_y_off(graph_height) - graph_inner_margin,
+ axis_x());
+ len = strlen(line);
+ ret = write(fd, line, len);
+ if (ret != len) {
+ fprintf(stderr, "failed to write svg axis\n");
+ exit(1);
+ }
+}
+
+/* draw a plot title. This should be done only once,
+ * and it bumps the plot width/height numbers by
+ * what it draws.
+ *
+ * Call this before setting up the first axis
+ */
+void set_plot_title(struct plot *plot, char *title)
+{
+ int len;
+ int fd = plot->fd;
+
+ plot->total_height = plot_title_height;
+ plot->total_width = axis_x_off(graph_width) + graph_left_pad / 2 + legend_width;
+
+ /* backing rect */
+ snprintf(line, line_len, "<rect x=\"0\" y=\"%d\" width=\"%d\" height=\"%d\" fill=\"white\" stroke=\"none\"/>",
+ plot->start_y_offset, plot->total_width + 40, plot_title_height + 20);
+ len = strlen(line);
+ write(fd, line, len);
+
+ snprintf(line, line_len, "<text x=\"%d\" y=\"%d\" font-family=\"%s\" font-size=\"%d\" "
+ "font-weight=\"bold\" fill=\"black\" style=\"text-anchor: %s\">%s</text>\n",
+ axis_x_off(graph_width / 2),
+ plot->start_y_offset + plot_title_height / 2,
+ font_family, plot_title_font_size, "middle", title);
+ plot->start_y_offset += plot_title_height;
+ len = strlen(line);
+ write(fd, line, len);
+}
+
+/*
+ * create evenly spread out ticks along the xaxis. if tick only is set
+ * this just makes the ticks, otherwise it labels each tick as it goes
+ */
+void set_xticks(struct plot *plot, int num_ticks, int first, int last)
+{
+ int pixels_per_tick = graph_width / num_ticks;
+ int step = (last - first) / num_ticks;
+ int i;
+ int tick_y = axis_y_off(graph_tick_len) + graph_inner_margin;
+ int tick_x = axis_x();
+ int tick_only = plot->add_xlabel == 0;
+
+ int text_y = axis_y() + tick_label_pad;
+
+ char *middle = "middle";
+ char *start = "start";
+
+ for (i = 0; i < num_ticks; i++) {
+ char *anchor;
+ if (i != 0) {
+ snprintf(line, line_len, "<rect x=\"%d\" y=\"%d\" width=\"2\" height=\"%d\" style=\"stroke:none;fill:black;\"/>\n",
+ tick_x, tick_y, graph_tick_len);
+ write(plot->fd, line, strlen(line));
+ anchor = middle;
+ } else {
+ anchor = start;
+ }
+
+ if (!tick_only) {
+ snprintf(line, line_len, "<text x=\"%d\" y=\"%d\" font-family=\"%s\" font-size=\"%d\" "
+ "fill=\"black\" style=\"text-anchor: %s\">%d</text>\n",
+ tick_x, text_y, font_family, tick_font_size, anchor, step * i);
+ write(plot->fd, line, strlen(line));
+ }
+ tick_x += pixels_per_tick;
+ }
+
+ if (!tick_only) {
+ snprintf(line, line_len, "<text x=\"%d\" y=\"%d\" font-family=\"%s\" font-size=\"%d\" "
+ "fill=\"black\" style=\"text-anchor: middle\">%d</text>\n",
+ axis_x_off(graph_width - 2),
+ text_y, font_family, tick_font_size, last);
+ write(plot->fd, line, strlen(line));
+ }
+}
+
+void set_ylabel(struct plot *plot, char *label)
+{
+ int len;
+ int fd = plot->fd;
+
+ snprintf(line, line_len, "<text x=\"%d\" y=\"%d\" font-family=\"%s\" "
+ "transform=\"rotate(-90 %d %d)\" font-weight=\"bold\" "
+ "font-size=\"%d\" fill=\"black\" style=\"text-anchor: %s\">%s</text>\n",
+ graph_left_pad / 2 - axis_label_font_size,
+ axis_y_off(graph_height / 2),
+ font_family,
+ graph_left_pad / 2 - axis_label_font_size,
+ axis_y_off(graph_height / 2),
+ axis_label_font_size, "middle", label);
+ len = strlen(line);
+ write(fd, line, len);
+}
+
+void set_xlabel(struct plot *plot, char *label)
+{
+ int len;
+ int fd = plot->fd;
+ snprintf(line, line_len, "<text x=\"%d\" y=\"%d\" font-family=\"%s\" "
+ "font-weight=\"bold\" "
+ "font-size=\"%d\" fill=\"black\" style=\"text-anchor: %s\">%s</text>\n",
+ axis_x_off(graph_width / 2),
+ axis_y() + tick_font_size * 3 + axis_label_font_size / 2,
+ font_family,
+ axis_label_font_size, "middle", label);
+ len = strlen(line);
+ write(fd, line, len);
+
+}
+
+/*
+ * create evenly spread out ticks along the y axis.
+ * The ticks are labeled as it goes
+ */
+void set_yticks(struct plot *plot, int num_ticks, int first, int last, char *units)
+{
+ int pixels_per_tick = graph_height / num_ticks;
+ int step = (last - first) / num_ticks;
+ int i;
+ int tick_y = 0;
+ int text_x = axis_x() - 6;
+ int tick_x = axis_x();
+ char *anchor = "end";
+
+ for (i = 0; i < num_ticks; i++) {
+ if (i != 0) {
+ snprintf(line, line_len, "<line x1=\"%d\" y1=\"%d\" x2=\"%d\" y2=\"%d\" "
+ "style=\"stroke:lightgray;stroke-width:2;stroke-dasharray:9,12;\"/>\n",
+ tick_x, axis_y_off(tick_y),
+ axis_x_off(graph_width), axis_y_off(tick_y));
+ write(plot->fd, line, strlen(line));
+ }
+
+ snprintf(line, line_len, "<text x=\"%d\" y=\"%d\" font-family=\"%s\" font-size=\"%d\" "
+ "fill=\"black\" style=\"text-anchor: %s\">%d%s</text>\n",
+ text_x,
+ axis_y_off(tick_y - tick_font_size / 2),
+ font_family, tick_font_size, anchor, step * i, units);
+ write(plot->fd, line, strlen(line));
+ tick_y += pixels_per_tick;
+ }
+ snprintf(line, line_len, "<text x=\"%d\" y=\"%d\" font-family=\"%s\" font-size=\"%d\" "
+ "fill=\"black\" style=\"text-anchor: %s\">%d%s</text>\n",
+ text_x, axis_y_off(graph_height), font_family, tick_font_size, anchor, last, units);
+ write(plot->fd, line, strlen(line));
+}
+
+void set_plot_label(struct plot *plot, char *label)
+{
+ int len;
+ int fd = plot->fd;
+
+ snprintf(line, line_len, "<text x=\"%d\" y=\"%d\" font-family=\"%s\" "
+ "font-size=\"%d\" fill=\"black\" style=\"text-anchor: %s\">%s</text>\n",
+ axis_x() + graph_width / 2,
+ plot_label_height / 2,
+ font_family, plot_label_font_size, "middle", label);
+ len = strlen(line);
+ write(fd, line, len);
+}
+
+static void close_svg(int fd)
+{
+ char *close_line = "</svg>\n";
+
+ write(fd, close_line, strlen(close_line));
+}
+
+int close_plot(struct plot *plot)
+{
+ close_svg(plot->fd);
+ plot->start_y_offset += plot->total_height;
+ plot->add_xlabel = 0;
+ return 0;
+}
+
+struct plot *alloc_plot(int fd)
+{
+ struct plot *plot;
+ plot = calloc(1, sizeof(*plot));
+ if (!plot) {
+ fprintf(stderr, "Unable to allocate memory %s\n", strerror(errno));
+ exit(1);
+ }
+ plot->fd = fd;
+ return plot;
+}
+
+char *byte_unit_names[] = { "", "K", "M", "G", "T", "P", "E", "Z", "Y", "unobtainium" };
+int MAX_BYTE_UNIT_SCALE = 9;
+
+char *time_unit_names[] = { "n", "u", "m", "s" };
+int MAX_TIME_UNIT_SCALE = 3;
+
+void scale_line_graph_bytes(u64 *max, char **units, u64 factor)
+{
+ int scale = 0;
+ u64 val = *max;
+ u64 div = 1;
+ while (val > factor * 64) {
+ val /= factor;
+ scale++;
+ div *= factor;
+ }
+ *units = byte_unit_names[scale];
+ if (scale == 0)
+ return;
+
+ if (scale > MAX_BYTE_UNIT_SCALE)
+ scale = MAX_BYTE_UNIT_SCALE;
+
+ *max /= div;
+}
+
+void scale_line_graph_time(u64 *max, char **units)
+{
+ int scale = 0;
+ u64 val = *max;
+ u64 div = 1;
+ while (val > 1000 * 10) {
+ val /= 1000;
+ scale++;
+ div *= 1000;
+ if (scale == MAX_TIME_UNIT_SCALE)
+ break;
+ }
+ *units = time_unit_names[scale];
+ if (scale == 0)
+ return;
+
+ *max /= div;
+}
+
+int svg_line_graph(struct plot *plot, struct graph_line_data *gld, char *color)
+{
+ int i;
+ double val;
+ int rolling;
+ int fd = plot->fd;
+ char *start = "<path d=\"";
+ double yscale = ((double)gld->max) / graph_height;
+ double xscale = (double)(gld->seconds - 1) / graph_width;
+ char c = 'M';
+ double x;
+
+ if (rolling_avg_secs)
+ rolling = rolling_avg_secs;
+ else
+ rolling = gld->stop_seconds / 25;
+
+ write(fd, start, strlen(start));
+ for (i = 0; i < gld->stop_seconds; i++) {
+ val = rolling_avg(gld->data, i, rolling);
+ val = val / yscale;
+ x = (double)i / xscale;
+ snprintf(line, line_len, "%c %d %d ", c, axis_x_off(x), axis_y_off(val));
+
+ c = 'L';
+ write(fd, line, strlen(line));
+ }
+ snprintf(line, line_len, "\" fill=\"none\" stroke=\"%s\" stroke-width=\"2\"/>\n", color);
+ write(fd, line, strlen(line));
+
+ return 0;
+}
+
+static int svg_add_io(int fd, int row, int col, char *color)
+{
+ snprintf(line, line_len, "<rect x=\"%d\" y=\"%d\" width=\"1.5\" height=\"1.5\" rx=\"0.5\" style=\"stroke:none;fill:%s\"/>\n",
+ axis_x_off(col), axis_y_off(row), color);
+ return write(fd, line, strlen(line));
+}
+
+int svg_io_graph(struct plot *plot, struct graph_dot_data *gdd, char *color)
+{
+ int fd = plot->fd;;
+ int col = 0;
+ int row = 0;
+ int arr_index;
+ unsigned char val;
+ int bit_index;
+ int bit_mod;
+
+ for (row = gdd->rows - 1; row >= 0; row--) {
+ for (col = 0; col < gdd->cols; col++) {
+ bit_index = row * gdd->cols + col;
+ arr_index = bit_index / 8;
+ bit_mod = bit_index % 8;
+
+ if (arr_index < 0)
+ continue;
+ val = gdd->data[arr_index];
+ if (val & (1 << bit_mod))
+ svg_add_io(fd, row, col, color);
+ }
+ }
+ return 0;
+}
+
+void svg_alloc_legend(struct plot *plot, int num_lines)
+{
+ char **lines = calloc(num_lines, sizeof(char *));
+ plot->legend_index = 0;
+ plot->legend_lines = lines;
+ plot->num_legend_lines = num_lines;
+}
+
+void svg_write_legend(struct plot *plot)
+{
+ int i;
+ int legend_line_x = axis_x_off(graph_width) + legend_x_off;
+ int legend_line_y = axis_y_off(graph_height) + legend_y_off;
+
+ if (plot->legend_index == 0)
+ return;
+
+ snprintf(line, line_len, "<rect x=\"%d\" y=\"%d\" width=\"%d\" height=\"%d\" "
+ "fill=\"white\" filter=\"url(#shadow)\"/>\n",
+ legend_line_x - 15,
+ legend_line_y - 12,
+ legend_width,
+ plot->legend_index * legend_font_size + legend_font_size / 2 + 12);
+
+ write(plot->fd, line, strlen(line));
+ for (i = 0; i < plot->legend_index; i++) {
+ write(plot->fd, plot->legend_lines[i],
+ strlen(plot->legend_lines[i]));
+ free(plot->legend_lines[i]);
+ }
+ free(plot->legend_lines);
+ plot->legend_index = 0;
+ plot->legend_lines = 0;
+}
+
+void svg_add_legend(struct plot *plot, char *text, char *extra, char *color)
+{
+ int legend_line_x = axis_x_off(graph_width) + legend_x_off;
+ int legend_line_y = axis_y_off(graph_height) + legend_y_off;
+
+ if (!text && (!extra || strlen(extra) == 0))
+ return;
+
+ legend_line_y += plot->legend_index * legend_font_size + legend_font_size / 2;
+ snprintf(line, line_len, "<path d=\"M %d %d h 8\" stroke=\"%s\" stroke-width=\"8\" "
+ "filter=\"url(#labelshadow)\"/> "
+ "<text x=\"%d\" y=\"%d\" font-family=\"%s\" font-size=\"%d\" "
+ "fill=\"black\" style=\"text-anchor: left\">%s%s</text>\n",
+ legend_line_x, legend_line_y,
+ color, legend_line_x + 13,
+ legend_line_y + 4, font_family, legend_font_size,
+ text, extra);
+
+ plot->legend_lines[plot->legend_index++] = strdup(line);
+}
+
+void set_legend_width(int longest_str)
+{
+ if (longest_str)
+ legend_width = longest_str * (legend_font_size * 3 / 4) + 25;
+ else
+ legend_width = 0;
+}
+
+void set_rolling_avg(int rolling)
+{
+ rolling_avg_secs = rolling;
+}
+
diff --git a/iowatcher/plot.h b/iowatcher/plot.h
new file mode 100644
index 0000000..655dac0
--- /dev/null
+++ b/iowatcher/plot.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2012 Fusion-io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+#ifndef __IOWATCH_PLOT__
+#define __IOWATCH_PLOT__
+#define MAX_TICKS 10
+
+typedef __u64 u64;
+typedef __u32 u32;
+typedef __u16 u16;
+
+struct plot {
+ int fd;
+
+ /* svg style y = 0 is the top of the graph */
+ int start_y_offset;
+
+ /* abs coords of the start of X start of the plot */
+ int start_x_offset;
+
+ int add_xlabel;
+
+ /*
+ * these two are for anyone that wants
+ * to add a plot after this one, it tells
+ * them how much space we took up
+ */
+ int total_height;
+ int total_width;
+ char **legend_lines;
+ int legend_index;
+ int num_legend_lines;
+};
+
+struct graph_line_pair {
+ u64 count;
+ u64 sum;
+};
+
+struct graph_line_data {
+ /* total number of seconds in this graph */
+ int seconds;
+
+ int stop_seconds;
+
+ /* Y max */
+ u64 max;
+
+ /* label for this graph */
+ char *label;
+ struct graph_line_pair data[];
+};
+
+struct graph_dot_data {
+ u64 max_offset;
+ u64 total_ios;
+
+ /* in pixels, number of rows in our bitmap */
+ int rows;
+ /* in pixels, number of cols in our bitmap */
+ int cols;
+
+ /* total number of seconds in this graph */
+ int seconds;
+ int stop_seconds;
+
+ /* label for the legend */
+ char *label;
+
+ /* bitmap, one bit for each cell to light up */
+ unsigned char data[];
+};
+
+int svg_io_graph(struct plot *plot, struct graph_dot_data *gdd, char *color);
+int svg_line_graph(struct plot *plot, struct graph_line_data *gld, char *color);
+struct graph_line_data *alloc_line_data(int seconds, int stop_seconds);
+void free_line_data(struct graph_line_data *gld);
+struct graph_dot_data *alloc_dot_data(int seconds, u64 max_offset, int stop_seconds);
+void free_dot_data(struct graph_dot_data *gdd);
+void set_gdd_bit(struct graph_dot_data *gdd, u64 offset, int bytes, double time);
+void print_gdd(struct graph_dot_data *gdd);
+void write_svg_header(int fd);
+struct plot *alloc_plot(int fd);
+int close_plot(struct plot *plot);
+void setup_axis(struct plot *plot);
+void set_xticks(struct plot *plot, int num_ticks, int first, int last);
+void set_yticks(struct plot *plot, int num_ticks, int first, int last, char *units);
+void set_plot_title(struct plot *plot, char *title);
+void set_plot_label(struct plot *plot, char *label);
+void set_xlabel(struct plot *plot, char *label);
+void set_ylabel(struct plot *plot, char *label);
+void scale_line_graph_bytes(u64 *max, char **units, u64 factor);
+void scale_line_graph_time(u64 *max, char **units);
+void write_drop_shadow(struct plot *plot);
+void write_drop_shadow_line(struct plot *plot);
+void svg_write_legend(struct plot *plot);
+void svg_add_legend(struct plot *plot, char *text, char *extra, char *color);
+void svg_alloc_legend(struct plot *plot, int num_lines);
+void set_legend_width(int longest_str);
+void set_rolling_avg(int rolling);
+#endif
diff --git a/iowatcher/tracers.c b/iowatcher/tracers.c
new file mode 100644
index 0000000..3309bef
--- /dev/null
+++ b/iowatcher/tracers.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2012 Fusion-io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Parts of this file were imported from Jens Axboe's blktrace sources (also GPL)
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <inttypes.h>
+#include <string.h>
+#include <asm/types.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <signal.h>
+#include <sys/wait.h>
+
+#include "plot.h"
+#include "blkparse.h"
+#include "list.h"
+
+static pid_t blktrace_pid = 0;
+
+char *blktrace_args[] = {
+ "-d", NULL,
+ "-b", "16384",
+ "-o", "trace",
+ "-D", ".",
+ "-a", "queue",
+ "-a", "complete",
+ "-a", "issue",
+ NULL,
+};
+
+#define DEVICE_INDEX 1
+#define DEST_DIR_INDEX 7
+#define TRACE_NAME_INDEX 5
+
+int stop_blktrace(void)
+{
+ int ret;
+ pid_t pid;
+ pid_t pid_ret;
+ int status = 0;
+
+ if (blktrace_pid == 0)
+ return 0;
+
+ pid = blktrace_pid;
+ blktrace_pid = 0;
+ ret = kill(pid, SIGTERM);
+ if (ret) {
+ fprintf(stderr, "failed to stop blktrace pid %lu error %s\n",
+ (unsigned long)blktrace_pid, strerror(errno));
+ return -errno;
+ }
+ pid_ret = waitpid(pid, &status, WUNTRACED);
+ if (pid_ret == pid && WIFEXITED(status) == 0) {
+ fprintf(stderr, "blktrace returns error %d\n", WEXITSTATUS(status));
+ }
+ return 0;
+}
+
+
+void stop_all_tracers(void)
+{
+ stop_blktrace();
+}
+
+void sig_handler_for_quit(int val)
+{
+ fprintf(stderr, "iowatcher exiting with %d, stopping tracers\n", val);
+ stop_all_tracers();
+}
+
+
+int start_blktrace(char *device, char *trace_name, char *dest)
+{
+ pid_t pid;
+ int ret;
+ char **arg = blktrace_args;
+ blktrace_args[DEVICE_INDEX] = device;
+
+ fprintf(stderr, "running blktrace");
+ if (dest)
+ blktrace_args[DEST_DIR_INDEX] = dest;
+ if (trace_name)
+ blktrace_args[TRACE_NAME_INDEX] = trace_name;
+
+ while(*arg) {
+ fprintf(stderr, " %s", *arg);
+ arg++;
+ }
+ fprintf(stderr, "\n");
+
+
+ pid = fork();
+ if (pid == 0) {
+ ret = execvp("blktrace", blktrace_args);
+ if (ret) {
+ fprintf(stderr, "failed to exec blktrace error %s\n", strerror(errno));
+ exit(errno);
+ }
+
+ } else {
+ blktrace_pid = pid;
+ signal(SIGTERM, sig_handler_for_quit);
+ signal(SIGINT, sig_handler_for_quit);
+ }
+ return 0;
+}
+
+int run_program(char *str)
+{
+ int ret;
+
+ fprintf(stderr, "running program %s\n", str);
+ ret = system(str);
+ if (ret == -1) {
+ fprintf(stderr, "failed to run program %s error %s\n", str, strerror(errno));
+ stop_all_tracers();
+ return -errno;
+ }
+ stop_blktrace();
+ return 0;
+}
+
+int wait_for_tracers(void)
+{
+ int status = 0;
+ if (blktrace_pid == 0)
+ return 0;
+
+ waitpid(blktrace_pid, &status, WUNTRACED);
+ blktrace_pid = 0;
+ return 0;
+}
+
+int blktrace_to_dump(char *trace_name)
+{
+ char line[1024];
+ snprintf(line, 1024, "blkparse -O -i %s -d '%s.%s'",
+ trace_name, trace_name, "dump");
+
+ system(line);
+ return 0;
+}
diff --git a/iowatcher/tracers.h b/iowatcher/tracers.h
new file mode 100644
index 0000000..f9b71d6
--- /dev/null
+++ b/iowatcher/tracers.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2012 Fusion-io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+#ifndef __IOWATCH_TRACERS
+#define __IOWATCH_TRACERS
+int run_program(char *str);
+int stop_blktrace(void);
+int start_blktrace(char *device, char *trace_name, char *dest);
+void stop_all_tracers(int val);
+int wait_for_tracers(void);
+int blktrace_to_dump(char *trace_name);
+
+#endif