diff options
author | Gao Xiang <hsiangkao@linux.alibaba.com> | 2023-09-20 02:59:47 +0800 |
---|---|---|
committer | Gao Xiang <hsiangkao@linux.alibaba.com> | 2023-09-20 19:35:08 +0800 |
commit | e3dfe4b8db26522004b6ba526cd0eae8622ced1a (patch) | |
tree | d7d85aee863114e39717eb9554e470e1eaf34409 | |
parent | b097208781ecda1e382c8e0f47c5c07413c51676 (diff) | |
download | erofs-utils-e3dfe4b8db26522004b6ba526cd0eae8622ced1a.tar.gz |
erofs-utils: mkfs: support tgz streams for tarerofs
Introduce iostream to wrap up the input tarball stream for tarerofs.
Besides, add builtin tgz support if zlib is linked to mkfs.
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20230919185947.3996843-1-hsiangkao@linux.alibaba.com
-rw-r--r-- | configure.ac | 1 | ||||
-rw-r--r-- | include/erofs/tar.h | 21 | ||||
-rw-r--r-- | lib/tar.c | 311 | ||||
-rw-r--r-- | mkfs/main.c | 34 |
4 files changed, 281 insertions, 86 deletions
diff --git a/configure.ac b/configure.ac index 51ace67..13ee616 100644 --- a/configure.ac +++ b/configure.ac @@ -250,6 +250,7 @@ AC_CHECK_FUNCS(m4_flatten([ ftello64 pread64 pwrite64 + posix_fadvise fstatfs strdup strerror diff --git a/include/erofs/tar.h b/include/erofs/tar.h index b50db1d..a76f740 100644 --- a/include/erofs/tar.h +++ b/include/erofs/tar.h @@ -7,6 +7,9 @@ extern "C" { #endif +#if defined(HAVE_ZLIB) +#include <zlib.h> +#endif #include <sys/stat.h> #include "internal.h" @@ -21,8 +24,24 @@ struct erofs_pax_header { char *path, *link; }; +#define EROFS_IOS_DECODER_NONE 0 +#define EROFS_IOS_DECODER_GZIP 1 + +struct erofs_iostream { + union { + int fd; /* original fd */ + void *handler; + }; + u64 sz; + char *buffer; + unsigned int head, tail, bufsize; + int decoder; + bool feof; +}; + struct erofs_tarfile { struct erofs_pax_header global; + struct erofs_iostream ios; char *mapfile; int fd; @@ -30,6 +49,8 @@ struct erofs_tarfile { bool index_mode, aufs; }; +void erofs_iostream_close(struct erofs_iostream *ios); +int erofs_iostream_open(struct erofs_iostream *ios, int fd, int decoder); int tarerofs_parse_tar(struct erofs_inode *root, struct erofs_tarfile *tar); #ifdef __cplusplus @@ -3,6 +3,9 @@ #include <stdlib.h> #include <string.h> #include <sys/stat.h> +#if defined(HAVE_ZLIB) +#include <zlib.h> +#endif #include "erofs/print.h" #include "erofs/cache.h" #include "erofs/diskbuf.h" @@ -14,8 +17,6 @@ #include "erofs/blobchunk.h" #include "erofs/rebuild.h" -static char erofs_libbuf[16384]; - struct tar_header { char name[100]; /* 0-99 */ char mode[8]; /* 100-107 */ @@ -60,35 +61,167 @@ s64 erofs_read_from_fd(int fd, void *buf, u64 bytes) return i; } -/* - * skip this many bytes of input. Return 0 for success, >0 means this much - * left after input skipped. - */ -u64 erofs_lskip(int fd, u64 sz) +void erofs_iostream_close(struct erofs_iostream *ios) { - s64 cur = lseek(fd, 0, SEEK_CUR); + free(ios->buffer); + if (ios->decoder == EROFS_IOS_DECODER_GZIP) { +#if defined(HAVE_ZLIB) + gzclose(ios->handler); +#endif + return; + } + close(ios->fd); +} - if (cur >= 0) { - s64 end = lseek(fd, 0, SEEK_END) - cur; +int erofs_iostream_open(struct erofs_iostream *ios, int fd, int decoder) +{ + s64 fsz; + + ios->tail = ios->head = 0; + ios->decoder = decoder; + if (decoder == EROFS_IOS_DECODER_GZIP) { +#if defined(HAVE_ZLIB) + ios->handler = gzdopen(fd, "r"); + if (!ios->handler) + return -ENOMEM; + ios->sz = fsz = 0; + ios->bufsize = 32768; +#else + return -EOPNOTSUPP; +#endif + } else { + ios->fd = fd; + fsz = lseek(fd, 0, SEEK_END); + if (fsz <= 0) { + ios->feof = !fsz; + ios->sz = 0; + } else { + ios->feof = false; + ios->sz = fsz; + if (lseek(fd, 0, SEEK_SET)) + return -EIO; +#ifdef HAVE_POSIX_FADVISE + if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) + erofs_warn("failed to fadvise: %s, ignored.", + erofs_strerror(errno)); +#endif + } + ios->bufsize = 16384; + } - if (end > 0 && end < sz) - return sz - end; + do { + ios->buffer = malloc(ios->bufsize); + if (ios->buffer) + break; + ios->bufsize >>= 1; + } while (ios->bufsize >= 1024); - end = cur + sz; - if (end == lseek(fd, end, SEEK_SET)) - return 0; + if (!ios->buffer) + return -ENOMEM; + return 0; +} + +int erofs_iostream_read(struct erofs_iostream *ios, void **buf, u64 bytes) +{ + unsigned int rabytes = ios->tail - ios->head; + int ret; + + if (rabytes >= bytes) { + *buf = ios->buffer + ios->head; + ios->head += bytes; + return bytes; + } + + if (ios->head) { + memmove(ios->buffer, ios->buffer + ios->head, rabytes); + ios->head = 0; + ios->tail = rabytes; } - while (sz) { - int try = min_t(u64, sz, sizeof(erofs_libbuf)); - int or; + if (!ios->feof) { + if (ios->decoder == EROFS_IOS_DECODER_GZIP) { +#if defined(HAVE_ZLIB) + ret = gzread(ios->handler, ios->buffer + rabytes, + ios->bufsize - rabytes); + if (!ret) { + int errnum; + const char *errstr; - or = read(fd, erofs_libbuf, try); - if (or <= 0) - break; - else - sz -= or; + errstr = gzerror(ios->handler, &errnum); + if (errnum != Z_STREAM_END) { + erofs_err("failed to gzread: %s", errstr); + return -EIO; + } + ios->feof = true; + } + ios->tail += ret; +#else + return -EOPNOTSUPP; +#endif + } else { + ret = erofs_read_from_fd(ios->fd, ios->buffer + rabytes, + ios->bufsize - rabytes); + if (ret < 0) + return ret; + ios->tail += ret; + if (ret < ios->bufsize - rabytes) + ios->feof = true; + } } + *buf = ios->buffer; + ret = min_t(int, ios->tail, bytes); + ios->head = ret; + return ret; +} + +int erofs_iostream_bread(struct erofs_iostream *ios, void *buf, u64 bytes) +{ + u64 rem = bytes; + void *src; + int ret; + + do { + ret = erofs_iostream_read(ios, &src, rem); + if (ret < 0) + return ret; + memcpy(buf, src, ret); + rem -= ret; + } while (rem && ret); + + return bytes - rem; +} + +int erofs_iostream_lskip(struct erofs_iostream *ios, u64 sz) +{ + unsigned int rabytes = ios->tail - ios->head; + int ret; + void *dummy; + + if (rabytes >= sz) { + ios->head += sz; + return 0; + } + + sz -= rabytes; + ios->head = ios->tail = 0; + if (ios->feof) + return sz; + + if (ios->sz) { + s64 cur = lseek(ios->fd, sz, SEEK_CUR); + + if (cur > ios->sz) + return cur - ios->sz; + return 0; + } + + do { + ret = erofs_iostream_read(ios, &dummy, sz); + if (ret < 0) + return ret; + sz -= ret; + } while (!(ios->feof || !ret || !sz)); + return sz; } @@ -251,7 +384,8 @@ static int base64_decode(const char *src, int len, u8 *dst) return cp - dst; } -int tarerofs_parse_pax_header(int fd, struct erofs_pax_header *eh, u32 size) +int tarerofs_parse_pax_header(struct erofs_iostream *ios, + struct erofs_pax_header *eh, u32 size) { char *buf, *p; int ret; @@ -261,7 +395,7 @@ int tarerofs_parse_pax_header(int fd, struct erofs_pax_header *eh, u32 size) return -ENOMEM; p = buf; - ret = erofs_read_from_fd(fd, buf, size); + ret = erofs_iostream_bread(ios, buf, size); if (ret != size) goto out; @@ -407,10 +541,10 @@ void tarerofs_remove_inode(struct erofs_inode *inode) static int tarerofs_write_file_data(struct erofs_inode *inode, struct erofs_tarfile *tar) { - unsigned int j, rem; - int fd; + unsigned int j; + void *buf; + int fd, nread; u64 off; - char buf[65536]; if (!inode->i_diskbuf) { inode->i_diskbuf = calloc(1, sizeof(*inode->i_diskbuf)); @@ -425,12 +559,14 @@ static int tarerofs_write_file_data(struct erofs_inode *inode, return -EBADF; for (j = inode->i_size; j; ) { - rem = min_t(unsigned int, sizeof(buf), j); - - if (erofs_read_from_fd(tar->fd, buf, rem) != rem || - write(fd, buf, rem) != rem) - return -EIO; - j -= rem; + nread = erofs_iostream_read(&tar->ios, &buf, j); + if (nread < 0) + break; + if (write(fd, buf, nread) != nread) { + nread = -EIO; + break; + } + j -= nread; } erofs_diskbuf_commit(inode->i_diskbuf, inode->i_size); inode->with_diskbuf = true; @@ -445,7 +581,7 @@ static int tarerofs_write_file_index(struct erofs_inode *inode, ret = tarerofs_write_chunkes(inode, data_offset); if (ret) return ret; - if (erofs_lskip(tar->fd, inode->i_size)) + if (erofs_iostream_lskip(&tar->ios, inode->i_size)) return -EIO; return 0; } @@ -459,7 +595,7 @@ int tarerofs_parse_tar(struct erofs_inode *root, struct erofs_tarfile *tar) struct stat st; erofs_off_t tar_offset, data_offset; - struct tar_header th; + struct tar_header *th; struct erofs_dentry *d; struct erofs_inode *inode; unsigned int j, csum, cksum; @@ -474,7 +610,7 @@ int tarerofs_parse_tar(struct erofs_inode *root, struct erofs_tarfile *tar) restart: rem = tar->offset & 511; if (rem) { - if (erofs_lskip(tar->fd, 512 - rem)) { + if (erofs_iostream_lskip(&tar->ios, 512 - rem)) { ret = -EIO; goto out; } @@ -482,11 +618,14 @@ restart: } tar_offset = tar->offset; - ret = erofs_read_from_fd(tar->fd, &th, sizeof(th)); - if (ret != sizeof(th)) + ret = erofs_iostream_read(&tar->ios, (void **)&th, sizeof(*th)); + if (ret != sizeof(*th)) { + erofs_err("failed to read header block @ %llu", tar_offset); + ret = -EIO; goto out; - tar->offset += sizeof(th); - if (*th.name == '\0') { + } + tar->offset += sizeof(*th); + if (*th->name == '\0') { if (e) { /* end of tar 2 empty blocks */ ret = 1; goto out; @@ -495,14 +634,14 @@ restart: goto restart; } - if (strncmp(th.magic, "ustar", 5)) { + if (memcmp(th->magic, "ustar", 5)) { erofs_err("invalid tar magic @ %llu", tar_offset); ret = -EIO; goto out; } /* chksum field itself treated as ' ' */ - csum = tarerofs_otoi(th.chksum, sizeof(th.chksum)); + csum = tarerofs_otoi(th->chksum, sizeof(th->chksum)); if (errno) { erofs_err("invalid chksum @ %llu", tar_offset); ret = -EBADMSG; @@ -513,12 +652,12 @@ restart: cksum += (unsigned int)' '; ckksum = cksum; for (j = 0; j < 148; ++j) { - cksum += (unsigned int)((u8*)&th)[j]; - ckksum += (int)((char*)&th)[j]; + cksum += (unsigned int)((u8*)th)[j]; + ckksum += (int)((char*)th)[j]; } for (j = 156; j < 500; ++j) { - cksum += (unsigned int)((u8*)&th)[j]; - ckksum += (int)((char*)&th)[j]; + cksum += (unsigned int)((u8*)th)[j]; + ckksum += (int)((char*)th)[j]; } if (csum != cksum && csum != ckksum) { erofs_err("chksum mismatch @ %llu", tar_offset); @@ -526,14 +665,14 @@ restart: goto out; } - st.st_mode = tarerofs_otoi(th.mode, sizeof(th.mode)); + st.st_mode = tarerofs_otoi(th->mode, sizeof(th->mode)); if (errno) goto invalid_tar; if (eh.use_uid) { st.st_uid = eh.st.st_uid; } else { - st.st_uid = tarerofs_parsenum(th.uid, sizeof(th.uid)); + st.st_uid = tarerofs_parsenum(th->uid, sizeof(th->uid)); if (errno) goto invalid_tar; } @@ -541,7 +680,7 @@ restart: if (eh.use_gid) { st.st_gid = eh.st.st_gid; } else { - st.st_gid = tarerofs_parsenum(th.gid, sizeof(th.gid)); + st.st_gid = tarerofs_parsenum(th->gid, sizeof(th->gid)); if (errno) goto invalid_tar; } @@ -549,7 +688,7 @@ restart: if (eh.use_size) { st.st_size = eh.st.st_size; } else { - st.st_size = tarerofs_parsenum(th.size, sizeof(th.size)); + st.st_size = tarerofs_parsenum(th->size, sizeof(th->size)); if (errno) goto invalid_tar; } @@ -560,25 +699,25 @@ restart: ST_MTIM_NSEC(&st) = ST_MTIM_NSEC(&eh.st); #endif } else { - st.st_mtime = tarerofs_parsenum(th.mtime, sizeof(th.mtime)); + st.st_mtime = tarerofs_parsenum(th->mtime, sizeof(th->mtime)); if (errno) goto invalid_tar; } - if (th.typeflag <= '7' && !eh.path) { + if (th->typeflag <= '7' && !eh.path) { eh.path = path; j = 0; - if (*th.prefix) { - memcpy(path, th.prefix, sizeof(th.prefix)); - path[sizeof(th.prefix)] = '\0'; + if (*th->prefix) { + memcpy(path, th->prefix, sizeof(th->prefix)); + path[sizeof(th->prefix)] = '\0'; j = strlen(path); if (path[j - 1] != '/') { path[j] = '/'; path[++j] = '\0'; } } - memcpy(path + j, th.name, sizeof(th.name)); - path[j + sizeof(th.name)] = '\0'; + memcpy(path + j, th->name, sizeof(th->name)); + path[j + sizeof(th->name)] = '\0'; j = strlen(path); while (path[j - 1] == '/') path[--j] = '\0'; @@ -586,20 +725,30 @@ restart: data_offset = tar->offset; tar->offset += st.st_size; - if (th.typeflag == '0' || th.typeflag == '7' || th.typeflag == '1') { + switch(th->typeflag) { + case '0': + case '7': + case '1': st.st_mode |= S_IFREG; - } else if (th.typeflag == '2') { + break; + case '2': st.st_mode |= S_IFLNK; - } else if (th.typeflag == '3') { + break; + case '3': st.st_mode |= S_IFCHR; - } else if (th.typeflag == '4') { + break; + case '4': st.st_mode |= S_IFBLK; - } else if (th.typeflag == '5') { + break; + case '5': st.st_mode |= S_IFDIR; - } else if (th.typeflag == '6') { + break; + case '6': st.st_mode |= S_IFIFO; - } else if (th.typeflag == 'g') { - ret = tarerofs_parse_pax_header(tar->fd, &tar->global, st.st_size); + break; + case 'g': + ret = tarerofs_parse_pax_header(&tar->ios, &tar->global, + st.st_size); if (ret) goto out; if (tar->global.path) { @@ -611,31 +760,31 @@ restart: eh.link = strdup(tar->global.link); } goto restart; - } else if (th.typeflag == 'x') { - ret = tarerofs_parse_pax_header(tar->fd, &eh, st.st_size); + case 'x': + ret = tarerofs_parse_pax_header(&tar->ios, &eh, st.st_size); if (ret) goto out; goto restart; - } else if (th.typeflag == 'L') { + case 'L': free(eh.path); eh.path = malloc(st.st_size + 1); - if (st.st_size != erofs_read_from_fd(tar->fd, eh.path, - st.st_size)) + if (st.st_size != erofs_iostream_bread(&tar->ios, eh.path, + st.st_size)) goto invalid_tar; eh.path[st.st_size] = '\0'; goto restart; - } else if (th.typeflag == 'K') { + case 'K': free(eh.link); eh.link = malloc(st.st_size + 1); if (st.st_size > PATH_MAX || st.st_size != - erofs_read_from_fd(tar->fd, eh.link, st.st_size)) + erofs_iostream_bread(&tar->ios, eh.link, st.st_size)) goto invalid_tar; eh.link[st.st_size] = '\0'; goto restart; - } else { + default: erofs_info("unrecognized typeflag %xh @ %llu - ignoring", - th.typeflag, tar_offset); - (void)erofs_lskip(tar->fd, st.st_size); + th->typeflag, tar_offset); + (void)erofs_iostream_lskip(&tar->ios, st.st_size); ret = 0; goto out; } @@ -644,22 +793,22 @@ restart: if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) { int major, minor; - major = tarerofs_parsenum(th.devmajor, sizeof(th.devmajor)); + major = tarerofs_parsenum(th->devmajor, sizeof(th->devmajor)); if (errno) { erofs_err("invalid device major @ %llu", tar_offset); goto out; } - minor = tarerofs_parsenum(th.devminor, sizeof(th.devminor)); + minor = tarerofs_parsenum(th->devminor, sizeof(th->devminor)); if (errno) { erofs_err("invalid device minor @ %llu", tar_offset); goto out; } st.st_rdev = (major << 8) | (minor & 0xff) | ((minor & ~0xff) << 12); - } else if (th.typeflag == '1' || th.typeflag == '2') { + } else if (th->typeflag == '1' || th->typeflag == '2') { if (!eh.link) - eh.link = strndup(th.linkname, sizeof(th.linkname)); + eh.link = strndup(th->linkname, sizeof(th->linkname)); } if (tar->index_mode && !tar->mapfile && @@ -689,7 +838,7 @@ restart: DBG_BUGON(!d->inode); ret = erofs_set_opaque_xattr(d->inode); goto out; - } else if (th.typeflag == '1') { /* hard link cases */ + } else if (th->typeflag == '1') { /* hard link cases */ struct erofs_dentry *d2; bool dumb; diff --git a/mkfs/main.c b/mkfs/main.c index ea868bb..6d2b700 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -66,6 +66,9 @@ static struct option long_options[] = { {"block-list-file", required_argument, NULL, 515}, #endif {"ovlfs-strip", optional_argument, NULL, 516}, +#ifdef HAVE_ZLIB + {"gzip", no_argument, NULL, 517}, +#endif {0, 0, 0, 0}, }; @@ -111,6 +114,9 @@ static void usage(void) " --force-gid=# set all file gids to # (# = GID)\n" " --uid-offset=# add offset # to all file uids (# = id offset)\n" " --gid-offset=# add offset # to all file gids (# = id offset)\n" +#ifdef HAVE_ZLIB + " --gzip try to filter the tarball stream through gzip\n" +#endif " --help display this help and exit\n" " --ignore-mtime use build time instead of strict per-file modification time\n" " --max-extent-bytes=# set maximum decompressed extent size # in bytes\n" @@ -139,7 +145,7 @@ static unsigned int pclustersize_packed, pclustersize_max; static struct erofs_tarfile erofstar = { .global.xattrs = LIST_HEAD_INIT(erofstar.global.xattrs) }; -static bool tar_mode, rebuild_mode; +static bool tar_mode, rebuild_mode, gzip_supported; static unsigned int rebuild_src_count; static LIST_HEAD(rebuild_src_list); @@ -525,6 +531,9 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) else cfg.c_ovlfs_strip = false; break; + case 517: + gzip_supported = true; + break; case 1: usage(); exit(0); @@ -560,7 +569,17 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) erofs_err("missing argument: SOURCE(s)"); return -EINVAL; } else { - erofstar.fd = STDIN_FILENO; + int dupfd; + + dupfd = dup(STDIN_FILENO); + if (dupfd < 0) { + erofs_err("failed to duplicate STDIN_FILENO: %s", + strerror(errno)); + return -errno; + } + err = erofs_iostream_open(&erofstar.ios, dupfd, gzip_supported); + if (err) + return err; } } else { struct stat st; @@ -573,12 +592,15 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) } if (tar_mode) { - erofstar.fd = open(cfg.c_src_path, O_RDONLY); - if (erofstar.fd < 0) { + int fd = open(cfg.c_src_path, O_RDONLY); + + if (fd < 0) { erofs_err("failed to open file: %s", cfg.c_src_path); - usage(); return -errno; } + err = erofs_iostream_open(&erofstar.ios, fd, gzip_supported); + if (err) + return err; } else { err = lstat(cfg.c_src_path, &st); if (err) @@ -1182,6 +1204,8 @@ exit: erofs_rebuild_cleanup(); erofs_diskbuf_exit(); erofs_exit_configure(); + if (tar_mode) + erofs_iostream_close(&erofstar.ios); if (err) { erofs_err("\tCould not format the device : %s\n", |