aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2018-02-02 09:32:46 -0600
committerEric Sandeen <sandeen@redhat.com>2018-02-02 09:32:46 -0600
commit03c0cd8ffaed63b31f50698c4d9d1cb75c14d47b (patch)
tree2013efa9dfc7504de9320c07b7473daf91a069e9
parentb364a9c008fc049089844ec137225e1025ad7955 (diff)
downloadxfsprogs-dev-03c0cd8ffaed63b31f50698c4d9d1cb75c14d47b.tar.gz
xfs_scrub: optionally use SCSI READ VERIFY commands to scrub data blocks on disk
If we sense that we're talking to a raw SCSI disk, use the SCSI READ VERIFY command to ask the disk to verify a disk internally. This can sharply reduce the runtime of the data block verification phase on devices whose internal bandwidth exceeds their link bandwidth. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Eric Sandeen <sandeen@redhat.com> Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
-rw-r--r--configure.ac2
-rw-r--r--include/builddefs.in2
-rw-r--r--m4/package_libcdev.m430
-rw-r--r--scrub/Makefile8
-rw-r--r--scrub/disk.c146
-rw-r--r--scrub/disk.h1
6 files changed, 188 insertions, 1 deletions
diff --git a/configure.ac b/configure.ac
index 8eda010664..bb032e5c0a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -172,6 +172,8 @@ AC_PACKAGE_WANT_UNINORM_H
AC_HAVE_U8NORMALIZE
AC_HAVE_OPENAT
AC_HAVE_FSTATAT
+AC_HAVE_SG_IO
+AC_HAVE_HDIO_GETGEO
if test "$enable_blkid" = yes; then
AC_HAVE_BLKID_TOPO
diff --git a/include/builddefs.in b/include/builddefs.in
index 2f8d33fb56..d44faf91cf 100644
--- a/include/builddefs.in
+++ b/include/builddefs.in
@@ -125,6 +125,8 @@ HAVE_LIBATTR = @have_libattr@
HAVE_U8NORMALIZE = @have_u8normalize@
HAVE_OPENAT = @have_openat@
HAVE_FSTATAT = @have_fstatat@
+HAVE_SG_IO = @have_sg_io@
+HAVE_HDIO_GETGEO = @have_hdio_getgeo@
GCCFLAGS = -funsigned-char -fno-strict-aliasing -Wall
# -Wbitwise -Wno-transparent-union -Wno-old-initializer -Wno-decl
diff --git a/m4/package_libcdev.m4 b/m4/package_libcdev.m4
index e0abc12411..9258c271fd 100644
--- a/m4/package_libcdev.m4
+++ b/m4/package_libcdev.m4
@@ -390,3 +390,33 @@ AC_DEFUN([AC_HAVE_FSTATAT],
#include <unistd.h>])
AC_SUBST(have_fstatat)
])
+
+#
+# Check if we have the SG_IO ioctl
+#
+AC_DEFUN([AC_HAVE_SG_IO],
+ [ AC_MSG_CHECKING([for struct sg_io_hdr ])
+ AC_TRY_COMPILE([#include <scsi/sg.h>],
+ [
+ struct sg_io_hdr hdr;
+ ioctl(0, SG_IO, &hdr);
+ ], have_sg_io=yes
+ AC_MSG_RESULT(yes),
+ AC_MSG_RESULT(no))
+ AC_SUBST(have_sg_io)
+ ])
+
+#
+# Check if we have the HDIO_GETGEO ioctl
+#
+AC_DEFUN([AC_HAVE_HDIO_GETGEO],
+ [ AC_MSG_CHECKING([for struct hd_geometry ])
+ AC_TRY_COMPILE([#include <linux/hdreg.h>],
+ [
+ struct hd_geometry hdr;
+ ioctl(0, HDIO_GETGEO, &hdr);
+ ], have_hdio_getgeo=yes
+ AC_MSG_RESULT(yes),
+ AC_MSG_RESULT(no))
+ AC_SUBST(have_hdio_getgeo)
+ ])
diff --git a/scrub/Makefile b/scrub/Makefile
index 4b70efa733..1fb6e84f6d 100644
--- a/scrub/Makefile
+++ b/scrub/Makefile
@@ -70,6 +70,14 @@ CFILES += unicrash.c
LCFLAGS += -DHAVE_U8NORMALIZE
endif
+ifeq ($(HAVE_SG_IO),yes)
+LCFLAGS += -DHAVE_SG_IO
+endif
+
+ifeq ($(HAVE_HDIO_GETGEO),yes)
+LCFLAGS += -DHAVE_HDIO_GETGEO
+endif
+
default: depend $(LTCOMMAND)
phase5.o unicrash.o xfs.o: $(TOPDIR)/include/builddefs
diff --git a/scrub/disk.c b/scrub/disk.c
index e36ed6bc10..e12175c865 100644
--- a/scrub/disk.c
+++ b/scrub/disk.c
@@ -29,12 +29,19 @@
#include <sys/statvfs.h>
#include <sys/vfs.h>
#include <linux/fs.h>
+#ifdef HAVE_SG_IO
+# include <scsi/sg.h>
+#endif
+#ifdef HAVE_HDIO_GETGEO
+# include <linux/hdreg.h>
+#endif
#include "platform_defs.h"
#include "libfrog.h"
#include "xfs.h"
#include "path.h"
#include "xfs_fs.h"
#include "xfs_scrub.h"
+#include "common.h"
#include "disk.h"
#ifndef BLKROTATIONAL
@@ -94,12 +101,119 @@ disk_heads(
return __disk_heads(disk);
}
+/*
+ * Execute a SCSI VERIFY(16) to verify disk contents.
+ * For devices that support this command, this can sharply reduce the
+ * runtime of the data block verification phase if the storage device's
+ * internal bandwidth exceeds its link bandwidth. However, it only
+ * works if we're talking to a raw SCSI device, and only if we trust the
+ * firmware.
+ */
+#ifdef HAVE_SG_IO
+# define SENSE_BUF_LEN 64
+# define VERIFY16_CMDLEN 16
+# define VERIFY16_CMD 0x8F
+
+# ifndef SG_FLAG_Q_AT_TAIL
+# define SG_FLAG_Q_AT_TAIL 0x10
+# endif
+static int
+disk_scsi_verify(
+ struct disk *disk,
+ uint64_t startblock, /* lba */
+ uint64_t blockcount) /* lba */
+{
+ struct sg_io_hdr iohdr;
+ unsigned char cdb[VERIFY16_CMDLEN];
+ unsigned char sense[SENSE_BUF_LEN];
+ uint64_t llba;
+ uint64_t veri_len = blockcount;
+ int error;
+
+ assert(!debug_tweak_on("XFS_SCRUB_NO_SCSI_VERIFY"));
+
+ llba = startblock + (disk->d_start >> BBSHIFT);
+
+ /* Borrowed from sg_verify */
+ cdb[0] = VERIFY16_CMD;
+ cdb[1] = 0; /* skip PI, DPO, and byte check. */
+ cdb[2] = (llba >> 56) & 0xff;
+ cdb[3] = (llba >> 48) & 0xff;
+ cdb[4] = (llba >> 40) & 0xff;
+ cdb[5] = (llba >> 32) & 0xff;
+ cdb[6] = (llba >> 24) & 0xff;
+ cdb[7] = (llba >> 16) & 0xff;
+ cdb[8] = (llba >> 8) & 0xff;
+ cdb[9] = llba & 0xff;
+ cdb[10] = (veri_len >> 24) & 0xff;
+ cdb[11] = (veri_len >> 16) & 0xff;
+ cdb[12] = (veri_len >> 8) & 0xff;
+ cdb[13] = veri_len & 0xff;
+ cdb[14] = 0;
+ cdb[15] = 0;
+ memset(sense, 0, SENSE_BUF_LEN);
+
+ /* v3 SG_IO */
+ memset(&iohdr, 0, sizeof(iohdr));
+ iohdr.interface_id = 'S';
+ iohdr.dxfer_direction = SG_DXFER_NONE;
+ iohdr.cmdp = cdb;
+ iohdr.cmd_len = VERIFY16_CMDLEN;
+ iohdr.sbp = sense;
+ iohdr.mx_sb_len = SENSE_BUF_LEN;
+ iohdr.flags |= SG_FLAG_Q_AT_TAIL;
+ iohdr.timeout = 30000; /* 30s */
+
+ error = ioctl(disk->d_fd, SG_IO, &iohdr);
+ if (error)
+ return error;
+
+ dbg_printf("VERIFY(16) fd %d lba %"PRIu64" len %"PRIu64" info %x "
+ "status %d masked %d msg %d host %d driver %d "
+ "duration %d resid %d\n",
+ disk->d_fd, startblock, blockcount, iohdr.info,
+ iohdr.status, iohdr.masked_status, iohdr.msg_status,
+ iohdr.host_status, iohdr.driver_status, iohdr.duration,
+ iohdr.resid);
+
+ if (iohdr.info & SG_INFO_CHECK) {
+ dbg_printf("status: msg %x host %x driver %x\n",
+ iohdr.msg_status, iohdr.host_status,
+ iohdr.driver_status);
+ errno = EIO;
+ return -1;
+ }
+
+ return error;
+}
+#else
+# define disk_scsi_verify(...) (ENOTTY)
+#endif /* HAVE_SG_IO */
+
+/* Test the availability of the kernel scrub ioctl. */
+static bool
+disk_can_scsi_verify(
+ struct disk *disk)
+{
+ int error;
+
+ if (debug_tweak_on("XFS_SCRUB_NO_SCSI_VERIFY"))
+ return false;
+
+ error = disk_scsi_verify(disk, 0, 1);
+ return error == 0;
+}
+
/* Open a disk device and discover its geometry. */
struct disk *
disk_open(
const char *pathname)
{
+#ifdef HAVE_HDIO_GETGEO
+ struct hd_geometry bdgeo;
+#endif
struct disk *disk;
+ bool suspicious_disk = false;
int lba_sz;
int error;
@@ -130,13 +244,34 @@ disk_open(
error = ioctl(disk->d_fd, BLKBSZGET, &disk->d_blksize);
if (error)
disk->d_blksize = 0;
- disk->d_start = 0;
+#ifdef HAVE_HDIO_GETGEO
+ error = ioctl(disk->d_fd, HDIO_GETGEO, &bdgeo);
+ if (!error) {
+ /*
+ * dm devices will pass through ioctls, which means
+ * we can't use SCSI VERIFY unless the start is 0.
+ * Most dm devices don't set geometry (unlike scsi
+ * and nvme) so use a zeroed out CHS to screen them
+ * out.
+ */
+ if (bdgeo.start != 0 &&
+ (unsigned long long)bdgeo.heads * bdgeo.sectors *
+ bdgeo.sectors == 0)
+ suspicious_disk = true;
+ disk->d_start = bdgeo.start << BBSHIFT;
+ } else
+#endif
+ disk->d_start = 0;
} else {
disk->d_size = disk->d_sb.st_size;
disk->d_blksize = disk->d_sb.st_blksize;
disk->d_start = 0;
}
+ /* Can we issue SCSI VERIFY? */
+ if (!suspicious_disk && disk_can_scsi_verify(disk))
+ disk->d_flags |= DISK_FLAG_SCSI_VERIFY;
+
return disk;
out_close:
close(disk->d_fd);
@@ -159,6 +294,10 @@ disk_close(
return error;
}
+#define BTOLBAT(d, bytes) ((uint64_t)(bytes) >> (d)->d_lbalog)
+#define LBASIZE(d) (1ULL << (d)->d_lbalog)
+#define BTOLBA(d, bytes) (((uint64_t)(bytes) + LBASIZE(d) - 1) >> (d)->d_lbalog)
+
/* Read-verify an extent of a disk device. */
ssize_t
disk_read_verify(
@@ -167,5 +306,10 @@ disk_read_verify(
uint64_t start,
uint64_t length)
{
+ /* Convert to logical block size. */
+ if (disk->d_flags & DISK_FLAG_SCSI_VERIFY)
+ return disk_scsi_verify(disk, BTOLBAT(disk, start),
+ BTOLBA(disk, length));
+
return pread(disk->d_fd, buf, length, start);
}
diff --git a/scrub/disk.h b/scrub/disk.h
index 834678e813..8a00144fd8 100644
--- a/scrub/disk.h
+++ b/scrub/disk.h
@@ -20,6 +20,7 @@
#ifndef XFS_SCRUB_DISK_H_
#define XFS_SCRUB_DISK_H_
+#define DISK_FLAG_SCSI_VERIFY 0x1
struct disk {
struct stat d_sb;
int d_fd;