Add tarfs, a filesystem backed by tarballs.

Sponsored by:	Juniper Networks, Inc.
Sponsored by:	Klara, Inc.
Reviewed by:	pauamma, imp
Differential Revision:	https://reviews.freebsd.org/D37753
This commit is contained in:
Dag-Erling Smørgrav 2023-02-02 18:18:41 +01:00
parent f29942229d
commit 69d94f4c76
18 changed files with 3911 additions and 0 deletions

View File

@ -757,6 +757,8 @@
fs
fusefs
..
tarfs
..
tmpfs
..
..

View File

@ -70,6 +70,7 @@ MAN= acct.5 \
style.Makefile.5 \
style.mdoc.5 \
sysctl.conf.5 \
tarfs.5 \
tmpfs.5 \
unionfs.5

103
share/man/man5/tarfs.5 Normal file
View File

@ -0,0 +1,103 @@
.\"-
.\" SPDX-License-Identifier: BSD-2-Clause
.\"
.\" Copyright (c) 2022 Klara, Inc.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.Dd February 2, 2023
.Dt TARFS 5
.Os
.Sh NAME
.Nm tarfs
.Nd tarball filesystem
.Sh SYNOPSIS
To compile this driver into the kernel, place the following line in
your kernel configuration file:
.Bd -ragged -offset indent
.Cd "options TARFS"
.Ed
.Pp
Alternatively, to load the driver as a module at boot time, place the
following line in
.Xr loader.conf 5 :
.Bd -literal -offset indent
tarfs_load="YES"
.Ed
.Sh DESCRIPTION
The
.Nm
driver implementes a read-only filesystem backed by a
.Xr tar 5
file.
Currently, only POSIX archives, optionally compressed with
.Xr zstd 1 ,
are supported.
.Pp
The preferred I/O size for
.Nm
filesystems can be adjusted using the
.Va vfs.tarfs.ioshift
sysctl setting and tunable.
Setting it to 0 will reset it to its default value.
Note that changes to this setting only apply to filesystems mounted
after the change.
.Sh DIAGNOSTICS
If enabled by the
.Dv TARFS_DEBUG
kernel option, the
.Va vfs.tarfs.debug
sysctl setting can be used to control debugging output from the
.Nm
driver.
Debugging output for individual sections of the driver can be enabled
by adding together the relevant values from the table below.
.Bl -column Value Description
.It 0x01 Ta Memory allocations
.It 0x02 Ta Checksum calculations
.It 0x04 Ta Filesystem operations (vfsops)
.It 0x08 Ta Path lookups
.It 0x10 Ta File operations (vnops)
.It 0x20 Ta General I/O
.It 0x40 Ta Decompression
.It 0x80 Ta Decompression index
.It 0x100 Ta Sparse file mapping
.El
.Sh SEE ALSO
.Xr tar 1 ,
.Xr zstd 1 ,
.Xr fstab 5 ,
.Xr tar 5 ,
.Xr mount 8 ,
.Xr sysctl 8
.Sh HISTORY
.An -nosplit
The
.Nm
driver was developed by
.An Stephen J. Kiernan Aq Mt stevek@FreeBSD.org
and
.An Dag-Erling Smørgrav Aq Mt des@FreeBSD.org
for Juniper Networks and Klara Systems.
This manual page was written by
.An Dag-Erling Smørgrav Aq Mt des@FreeBSD.org
for Juniper Networks and Klara Systems.

View File

@ -3615,6 +3615,10 @@ fs/smbfs/smbfs_smb.c optional smbfs
fs/smbfs/smbfs_subr.c optional smbfs
fs/smbfs/smbfs_vfsops.c optional smbfs
fs/smbfs/smbfs_vnops.c optional smbfs
fs/tarfs/tarfs_io.c optional tarfs compile-with "${NORMAL_C} -I$S/contrib/zstd/lib/freebsd"
fs/tarfs/tarfs_subr.c optional tarfs
fs/tarfs/tarfs_vfsops.c optional tarfs
fs/tarfs/tarfs_vnops.c optional tarfs
fs/udf/osta.c optional udf
fs/udf/udf_iconv.c optional udf_iconv
fs/udf/udf_vfsops.c optional udf

View File

@ -265,6 +265,7 @@ NULLFS opt_dontuse.h
PROCFS opt_dontuse.h
PSEUDOFS opt_dontuse.h
SMBFS opt_dontuse.h
TARFS opt_dontuse.h
TMPFS opt_dontuse.h
UDF opt_dontuse.h
UNIONFS opt_dontuse.h
@ -273,6 +274,9 @@ ZFS opt_dontuse.h
# Pseudofs debugging
PSEUDOFS_TRACE opt_pseudofs.h
# Tarfs debugging
TARFS_DEBUG opt_tarfs.h
# In-kernel GSS-API
KGSSAPI opt_kgssapi.h
KGSSAPI_DEBUG opt_kgssapi.h

254
sys/fs/tarfs/tarfs.h Normal file
View File

@ -0,0 +1,254 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2013 Juniper Networks, Inc.
* Copyright (c) 2022-2023 Klara, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _FS_TARFS_TARFS_H_
#define _FS_TARFS_TARFS_H_
#ifndef _KERNEL
#error Should only be included by kernel
#endif
MALLOC_DECLARE(M_TARFSMNT);
MALLOC_DECLARE(M_TARFSNODE);
MALLOC_DECLARE(M_TARFSNAME);
#ifdef SYSCTL_DECL
SYSCTL_DECL(_vfs_tarfs);
#endif
struct componentname;
struct mount;
struct vnode;
/*
* Internal representation of a tarfs file system node.
*/
struct tarfs_node {
TAILQ_ENTRY(tarfs_node) entries;
TAILQ_ENTRY(tarfs_node) dirents;
struct mtx lock;
struct vnode *vnode;
struct tarfs_mount *tmp;
enum vtype type;
ino_t ino;
off_t offset;
size_t size;
size_t physize;
char *name;
size_t namelen;
/* Node attributes */
uid_t uid;
gid_t gid;
mode_t mode;
unsigned int flags;
nlink_t nlink;
struct timespec atime;
struct timespec mtime;
struct timespec ctime;
struct timespec birthtime;
unsigned long gen;
/* Block map */
size_t nblk;
struct tarfs_blk *blk;
struct tarfs_node *parent;
union {
/* VDIR */
struct {
TAILQ_HEAD(, tarfs_node) dirhead;
off_t lastcookie;
struct tarfs_node *lastnode;
} dir;
/* VLNK */
struct {
char *name;
size_t namelen;
} link;
/* VBLK or VCHR */
dev_t rdev;
/* VREG */
struct tarfs_node *other;
};
};
/*
* Entry in sparse file block map.
*/
struct tarfs_blk {
off_t i; /* input (physical) offset */
off_t o; /* output (logical) offset */
size_t l; /* length */
};
/*
* Decompression buffer.
*/
#define TARFS_ZBUF_SIZE 1048576
struct tarfs_zbuf {
u_char buf[TARFS_ZBUF_SIZE];
size_t off; /* offset of contents */
size_t len; /* length of contents */
};
/*
* Internal representation of a tarfs mount point.
*/
struct tarfs_mount {
TAILQ_HEAD(, tarfs_node) allnodes;
struct mtx allnode_lock;
struct tarfs_node *root;
struct vnode *vp;
struct mount *vfs;
ino_t ino;
struct unrhdr *ino_unr;
size_t iosize;
size_t nblocks;
size_t nfiles;
time_t mtime; /* default mtime for directories */
struct tarfs_zio *zio;
struct vnode *znode;
};
struct tarfs_zio {
struct tarfs_mount *tmp;
/* decompression state */
#ifdef ZSTDIO
struct tarfs_zstd *zstd; /* decompression state (zstd) */
#endif
off_t ipos; /* current input position */
off_t opos; /* current output position */
/* index of compression frames */
unsigned int curidx; /* current index position*/
unsigned int nidx; /* number of index entries */
unsigned int szidx; /* index capacity */
struct tarfs_idx { off_t i, o; } *idx;
};
struct tarfs_fid {
u_short len; /* length of data in bytes */
u_short data0; /* force alignment */
ino_t ino;
unsigned long gen;
};
#define TARFS_NODE_LOCK(tnp) \
mtx_lock(&(tnp)->lock)
#define TARFS_NODE_UNLOCK(tnp) \
mtx_unlock(&(tnp)->lock)
#define TARFS_ALLNODES_LOCK(tnp) \
mtx_lock(&(tmp)->allnode_lock)
#define TARFS_ALLNODES_UNLOCK(tnp) \
mtx_unlock(&(tmp)->allnode_lock)
/*
* Data and metadata within tar files are aligned on 512-byte boundaries,
* to match the block size of the magnetic tapes they were originally
* intended for.
*/
#define TARFS_BSHIFT 9
#define TARFS_BLOCKSIZE (size_t)(1U << TARFS_BSHIFT)
#define TARFS_BLKOFF(l) ((l) % TARFS_BLOCKSIZE)
#define TARFS_BLKNUM(l) ((l) >> TARFS_BSHIFT)
#define TARFS_SZ2BLKS(sz) (((sz) + TARFS_BLOCKSIZE - 1) / TARFS_BLOCKSIZE)
/*
* Our preferred I/O size.
*/
extern unsigned int tarfs_ioshift;
#define TARFS_IOSHIFT_MIN TARFS_BSHIFT
#define TARFS_IOSHIFT_DEFAULT PAGE_SHIFT
#define TARFS_IOSHIFT_MAX PAGE_SHIFT
#define TARFS_ROOTINO ((ino_t)3)
#define TARFS_ZIOINO ((ino_t)4)
#define TARFS_MININO ((ino_t)65535)
#define TARFS_COOKIE_DOT 0
#define TARFS_COOKIE_DOTDOT 1
#define TARFS_COOKIE_EOF OFF_MAX
#define TARFS_ZIO_NAME ".tar"
#define TARFS_ZIO_NAMELEN (sizeof(TARFS_ZIO_NAME) - 1)
extern struct vop_vector tarfs_vnodeops;
static inline
struct tarfs_mount *
MP_TO_TARFS_MOUNT(struct mount *mp)
{
MPASS(mp != NULL && mp->mnt_data != NULL);
return (mp->mnt_data);
}
static inline
struct tarfs_node *
VP_TO_TARFS_NODE(struct vnode *vp)
{
MPASS(vp != NULL && vp->v_data != NULL);
return (vp->v_data);
}
int tarfs_alloc_node(struct tarfs_mount *tmp, const char *name,
size_t namelen, enum vtype type, off_t off, size_t sz,
time_t mtime, uid_t uid, gid_t gid, mode_t mode,
unsigned int flags, const char *linkname, dev_t rdev,
struct tarfs_node *parent, struct tarfs_node **node);
int tarfs_load_blockmap(struct tarfs_node *tnp, size_t realsize);
void tarfs_dump_tree(struct tarfs_node *tnp);
void tarfs_free_node(struct tarfs_node *tnp);
struct tarfs_node *
tarfs_lookup_dir(struct tarfs_node *tnp, off_t cookie);
struct tarfs_node *
tarfs_lookup_node(struct tarfs_node *tnp, struct tarfs_node *f,
struct componentname *cnp);
void tarfs_print_node(struct tarfs_node *tnp);
int tarfs_read_file(struct tarfs_node *tnp, size_t len, struct uio *uiop);
int tarfs_io_init(struct tarfs_mount *tmp);
int tarfs_io_fini(struct tarfs_mount *tmp);
int tarfs_io_read(struct tarfs_mount *tmp, bool raw,
struct uio *uiop);
ssize_t tarfs_io_read_buf(struct tarfs_mount *tmp, bool raw,
void *buf, off_t off, size_t len);
unsigned int
tarfs_strtofflags(const char *str, char **end);
#endif /* _FS_TARFS_TARFS_H_ */

65
sys/fs/tarfs/tarfs_dbg.h Normal file
View File

@ -0,0 +1,65 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2013 Juniper Networks, Inc.
* Copyright (c) 2022 Klara, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _FS_TARFS_TARFS_DBG_H_
#define _FS_TARFS_TARFS_DBG_H_
#ifndef _KERNEL
#error Should only be included by kernel
#endif
#ifdef TARFS_DEBUG
extern int tarfs_debug;
#define TARFS_DEBUG_ALLOC 0x01
#define TARFS_DEBUG_CHECKSUM 0x02
#define TARFS_DEBUG_FS 0x04
#define TARFS_DEBUG_LOOKUP 0x08
#define TARFS_DEBUG_VNODE 0x10
#define TARFS_DEBUG_IO 0x20
#define TARFS_DEBUG_ZIO 0x40
#define TARFS_DEBUG_ZIDX 0x80
#define TARFS_DEBUG_MAP 0x100
#define TARFS_DPF(category, fmt, ...) \
do { \
if ((tarfs_debug & TARFS_DEBUG_##category) != 0) \
printf(fmt, ## __VA_ARGS__); \
} while (0)
#define TARFS_DPF_IFF(category, cond, fmt, ...) \
do { \
if ((cond) \
&& (tarfs_debug & TARFS_DEBUG_##category) != 0) \
printf(fmt, ## __VA_ARGS__); \
} while (0)
#else
#define TARFS_DPF(category, fmt, ...)
#define TARFS_DPF_IFF(category, cond, fmt, ...)
#endif
#endif /* _FS_TARFS_TARFS_DBG_H_ */

727
sys/fs/tarfs/tarfs_io.c Normal file
View File

@ -0,0 +1,727 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2013 Juniper Networks, Inc.
* Copyright (c) 2022-2023 Klara, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "opt_tarfs.h"
#include "opt_zstdio.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/counter.h>
#include <sys/bio.h>
#include <sys/buf.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/sysctl.h>
#include <sys/uio.h>
#include <sys/vnode.h>
#ifdef ZSTDIO
#define ZSTD_STATIC_LINKING_ONLY
#include <contrib/zstd/lib/zstd.h>
#endif
#include <fs/tarfs/tarfs.h>
#include <fs/tarfs/tarfs_dbg.h>
#ifdef TARFS_DEBUG
SYSCTL_NODE(_vfs_tarfs, OID_AUTO, zio, CTLFLAG_RD, 0,
"Tar filesystem decompression layer");
COUNTER_U64_DEFINE_EARLY(tarfs_zio_inflated);
SYSCTL_COUNTER_U64(_vfs_tarfs_zio, OID_AUTO, inflated, CTLFLAG_RD,
&tarfs_zio_inflated, "Amount of compressed data inflated.");
COUNTER_U64_DEFINE_EARLY(tarfs_zio_consumed);
SYSCTL_COUNTER_U64(_vfs_tarfs_zio, OID_AUTO, consumed, CTLFLAG_RD,
&tarfs_zio_consumed, "Amount of compressed data consumed.");
COUNTER_U64_DEFINE_EARLY(tarfs_zio_bounced);
SYSCTL_COUNTER_U64(_vfs_tarfs_zio, OID_AUTO, bounced, CTLFLAG_RD,
&tarfs_zio_bounced, "Amount of decompressed data bounced.");
static int
tarfs_sysctl_handle_zio_reset(SYSCTL_HANDLER_ARGS)
{
unsigned int tmp;
int error;
tmp = 0;
if ((error = SYSCTL_OUT(req, &tmp, sizeof(tmp))) != 0)
return (error);
if (req->newptr != NULL) {
if ((error = SYSCTL_IN(req, &tmp, sizeof(tmp))) != 0)
return (error);
counter_u64_zero(tarfs_zio_inflated);
counter_u64_zero(tarfs_zio_consumed);
counter_u64_zero(tarfs_zio_bounced);
}
return (0);
}
SYSCTL_PROC(_vfs_tarfs_zio, OID_AUTO, reset,
CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW,
NULL, 0, tarfs_sysctl_handle_zio_reset, "IU",
"Reset compression counters.");
#endif
MALLOC_DEFINE(M_TARFSZSTATE, "tarfs zstate", "tarfs decompression state");
MALLOC_DEFINE(M_TARFSZBUF, "tarfs zbuf", "tarfs decompression buffers");
#define XZ_MAGIC (uint8_t[]){ 0xfd, 0x37, 0x7a, 0x58, 0x5a }
#define ZLIB_MAGIC (uint8_t[]){ 0x1f, 0x8b, 0x08 }
#define ZSTD_MAGIC (uint8_t[]){ 0x28, 0xb5, 0x2f, 0xfd }
#ifdef ZSTDIO
struct tarfs_zstd {
ZSTD_DStream *zds;
};
#endif
/* XXX review use of curthread / uio_td / td_cred */
/*
* Reads from the tar file according to the provided uio. If the archive
* is compressed and raw is false, reads the decompressed stream;
* otherwise, reads directly from the original file. Returns 0 on success
* and a positive errno value on failure.
*/
int
tarfs_io_read(struct tarfs_mount *tmp, bool raw, struct uio *uiop)
{
void *rl = NULL;
off_t off = uiop->uio_offset;
size_t len = uiop->uio_resid;
int error;
if (raw || tmp->znode == NULL) {
rl = vn_rangelock_rlock(tmp->vp, off, off + len);
error = vn_lock(tmp->vp, LK_SHARED);
if (error == 0) {
error = VOP_READ(tmp->vp, uiop,
IO_DIRECT|IO_NODELOCKED,
uiop->uio_td->td_ucred);
VOP_UNLOCK(tmp->vp);
}
vn_rangelock_unlock(tmp->vp, rl);
} else {
error = vn_lock(tmp->znode, LK_EXCLUSIVE);
if (error == 0) {
error = VOP_READ(tmp->znode, uiop,
IO_DIRECT | IO_NODELOCKED,
uiop->uio_td->td_ucred);
VOP_UNLOCK(tmp->znode);
}
}
TARFS_DPF(IO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__,
(size_t)off, len, error, uiop->uio_resid);
return (error);
}
/*
* Reads from the tar file into the provided buffer. If the archive is
* compressed and raw is false, reads the decompressed stream; otherwise,
* reads directly from the original file. Returns the number of bytes
* read on success, 0 on EOF, and a negative errno value on failure.
*/
ssize_t
tarfs_io_read_buf(struct tarfs_mount *tmp, bool raw,
void *buf, off_t off, size_t len)
{
struct uio auio;
struct iovec aiov;
ssize_t res;
int error;
if (len == 0) {
TARFS_DPF(IO, "%s(%zu, %zu) null\n", __func__,
(size_t)off, len);
return (0);
}
aiov.iov_base = buf;
aiov.iov_len = len;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_offset = off;
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_rw = UIO_READ;
auio.uio_resid = len;
auio.uio_td = curthread;
error = tarfs_io_read(tmp, raw, &auio);
if (error != 0) {
TARFS_DPF(IO, "%s(%zu, %zu) error %d\n", __func__,
(size_t)off, len, error);
return (-error);
}
res = len - auio.uio_resid;
if (res == 0 && len != 0) {
TARFS_DPF(IO, "%s(%zu, %zu) eof\n", __func__,
(size_t)off, len);
} else {
TARFS_DPF(IO, "%s(%zu, %zu) read %zd | %*D\n", __func__,
(size_t)off, len, res,
(int)(res > 8 ? 8 : res), (uint8_t *)buf, " ");
}
return (res);
}
#ifdef ZSTDIO
static void *
tarfs_zstate_alloc(void *opaque, size_t size)
{
(void)opaque;
return (malloc(size, M_TARFSZSTATE, M_WAITOK));
}
#endif
#ifdef ZSTDIO
static void
tarfs_zstate_free(void *opaque, void *address)
{
(void)opaque;
free(address, M_TARFSZSTATE);
}
#endif
#ifdef ZSTDIO
static ZSTD_customMem tarfs_zstd_mem = {
tarfs_zstate_alloc,
tarfs_zstate_free,
NULL,
};
#endif
/*
* Updates the decompression frame index, recording the current input and
* output offsets in a new index entry, and growing the index if
* necessary.
*/
static void
tarfs_zio_update_index(struct tarfs_zio *zio, off_t i, off_t o)
{
if (++zio->curidx >= zio->nidx) {
if (++zio->nidx > zio->szidx) {
zio->szidx *= 2;
zio->idx = realloc(zio->idx,
zio->szidx * sizeof(*zio->idx),
M_TARFSZSTATE, M_ZERO | M_WAITOK);
TARFS_DPF(ALLOC, "%s: resized zio index\n", __func__);
}
zio->idx[zio->curidx].i = i;
zio->idx[zio->curidx].o = o;
TARFS_DPF(ZIDX, "%s: index %u = i %zu o %zu\n", __func__,
zio->curidx, (size_t)zio->idx[zio->curidx].i,
(size_t)zio->idx[zio->curidx].o);
}
MPASS(zio->idx[zio->curidx].i == i);
MPASS(zio->idx[zio->curidx].o == o);
}
/*
* VOP_ACCESS for zio node.
*/
static int
tarfs_zaccess(struct vop_access_args *ap)
{
struct vnode *vp = ap->a_vp;
struct tarfs_zio *zio = vp->v_data;
struct tarfs_mount *tmp = zio->tmp;
accmode_t accmode = ap->a_accmode;
int error = EPERM;
if (accmode == VREAD) {
error = vn_lock(tmp->vp, LK_SHARED);
if (error == 0) {
error = VOP_ACCESS(tmp->vp, accmode, ap->a_cred, ap->a_td);
VOP_UNLOCK(tmp->vp);
}
}
TARFS_DPF(ZIO, "%s(%d) = %d\n", __func__, accmode, error);
return (error);
}
/*
* VOP_GETATTR for zio node.
*/
static int
tarfs_zgetattr(struct vop_getattr_args *ap)
{
struct vattr va;
struct vnode *vp = ap->a_vp;
struct tarfs_zio *zio = vp->v_data;
struct tarfs_mount *tmp = zio->tmp;
struct vattr *vap = ap->a_vap;
int error = 0;
VATTR_NULL(vap);
error = vn_lock(tmp->vp, LK_SHARED);
if (error == 0) {
error = VOP_GETATTR(tmp->vp, &va, ap->a_cred);
VOP_UNLOCK(tmp->vp);
if (error == 0) {
vap->va_type = VREG;
vap->va_mode = va.va_mode;
vap->va_nlink = 1;
vap->va_gid = va.va_gid;
vap->va_uid = va.va_uid;
vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
vap->va_fileid = TARFS_ZIOINO;
vap->va_size = zio->idx[zio->nidx - 1].o;
vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
vap->va_atime = va.va_atime;
vap->va_ctime = va.va_ctime;
vap->va_mtime = va.va_mtime;
vap->va_birthtime = tmp->root->birthtime;
vap->va_bytes = va.va_bytes;
}
}
TARFS_DPF(ZIO, "%s() = %d\n", __func__, error);
return (error);
}
#ifdef ZSTDIO
/*
* VOP_READ for zio node, zstd edition.
*/
static int
tarfs_zread_zstd(struct tarfs_zio *zio, struct uio *uiop)
{
void *ibuf = NULL, *obuf = NULL, *rl = NULL;
struct uio auio;
struct iovec aiov;
struct tarfs_mount *tmp = zio->tmp;
struct tarfs_zstd *zstd = zio->zstd;
struct thread *td = curthread;
ZSTD_inBuffer zib;
ZSTD_outBuffer zob;
off_t zsize;
off_t ipos, opos;
size_t ilen, olen;
size_t zerror;
off_t off = uiop->uio_offset;
size_t len = uiop->uio_resid;
size_t resid = uiop->uio_resid;
size_t bsize;
int error;
bool reset = false;
/* do we have to rewind? */
if (off < zio->opos) {
while (zio->curidx > 0 && off < zio->idx[zio->curidx].o)
zio->curidx--;
reset = true;
}
/* advance to the nearest index entry */
if (off > zio->opos) {
// XXX maybe do a binary search instead
while (zio->curidx < zio->nidx - 1 &&
off >= zio->idx[zio->curidx + 1].o) {
zio->curidx++;
reset = true;
}
}
/* reset the decompression stream if needed */
if (reset) {
zio->ipos = zio->idx[zio->curidx].i;
zio->opos = zio->idx[zio->curidx].o;
ZSTD_resetDStream(zstd->zds);
TARFS_DPF(ZIDX, "%s: skipping to index %u = i %zu o %zu\n", __func__,
zio->curidx, (size_t)zio->ipos, (size_t)zio->opos);
} else {
TARFS_DPF(ZIDX, "%s: continuing at i %zu o %zu\n", __func__,
(size_t)zio->ipos, (size_t)zio->opos);
}
/*
* Set up a temporary buffer for compressed data. Use the size
* recommended by the zstd library; this is usually 128 kB, but
* just in case, make sure it's a multiple of the page size and no
* larger than MAXBSIZE.
*/
bsize = roundup(ZSTD_CStreamOutSize(), PAGE_SIZE);
if (bsize > MAXBSIZE)
bsize = MAXBSIZE;
ibuf = malloc(bsize, M_TEMP, M_WAITOK);
zib.src = NULL;
zib.size = 0;
zib.pos = 0;
/*
* Set up the decompression buffer. If the target is not in
* kernel space, we will have to set up a bounce buffer.
*
* TODO: to avoid using a bounce buffer, map destination pages
* using vm_fault_quick_hold_pages().
*/
MPASS(zio->opos <= off);
MPASS(uiop->uio_iovcnt == 1);
MPASS(uiop->uio_iov->iov_len >= len);
if (uiop->uio_segflg == UIO_SYSSPACE) {
zob.dst = uiop->uio_iov->iov_base;
} else {
TARFS_DPF(ALLOC, "%s: allocating %zu-byte bounce buffer\n",
__func__, len);
zob.dst = obuf = malloc(len, M_TEMP, M_WAITOK);
}
zob.size = len;
zob.pos = 0;
/* lock tarball */
rl = vn_rangelock_rlock(tmp->vp, zio->ipos, OFF_MAX);
error = vn_lock(tmp->vp, LK_SHARED);
if (error != 0) {
goto fail_unlocked;
}
/* check size */
error = vn_getsize_locked(tmp->vp, &zsize, td->td_ucred);
if (error != 0) {
goto fail;
}
if (zio->ipos >= zsize) {
/* beyond EOF */
goto fail;
}
while (resid > 0) {
if (zib.pos == zib.size) {
/* request data from the underlying file */
aiov.iov_base = ibuf;
aiov.iov_len = bsize;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
auio.uio_offset = zio->ipos;
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_rw = UIO_READ;
auio.uio_resid = aiov.iov_len;
auio.uio_td = td;
error = VOP_READ(tmp->vp, &auio,
IO_DIRECT | IO_NODELOCKED,
td->td_ucred);
if (error != 0)
goto fail;
TARFS_DPF(ZIO, "%s: req %zu+%zu got %zu+%zu\n", __func__,
(size_t)zio->ipos, bsize,
(size_t)zio->ipos, bsize - auio.uio_resid);
zib.src = ibuf;
zib.size = bsize - auio.uio_resid;
zib.pos = 0;
}
MPASS(zib.pos <= zib.size);
if (zib.pos == zib.size) {
TARFS_DPF(ZIO, "%s: end of file after i %zu o %zu\n", __func__,
(size_t)zio->ipos, (size_t)zio->opos);
goto fail;
}
if (zio->opos < off) {
/* to be discarded */
zob.size = min(off - zio->opos, len);
zob.pos = 0;
} else {
zob.size = len;
zob.pos = zio->opos - off;
}
ipos = zib.pos;
opos = zob.pos;
/* decompress as much as possible */
zerror = ZSTD_decompressStream(zstd->zds, &zob, &zib);
zio->ipos += ilen = zib.pos - ipos;
zio->opos += olen = zob.pos - opos;
if (zio->opos > off)
resid -= olen;
if (ZSTD_isError(zerror)) {
TARFS_DPF(ZIO, "%s: inflate failed after i %zu o %zu: %s\n", __func__,
(size_t)zio->ipos, (size_t)zio->opos, ZSTD_getErrorName(zerror));
error = EIO;
goto fail;
}
if (zerror == 0 && olen == 0) {
TARFS_DPF(ZIO, "%s: end of stream after i %zu o %zu\n", __func__,
(size_t)zio->ipos, (size_t)zio->opos);
break;
}
if (zerror == 0) {
TARFS_DPF(ZIO, "%s: end of frame after i %zu o %zu\n", __func__,
(size_t)zio->ipos, (size_t)zio->opos);
tarfs_zio_update_index(zio, zio->ipos, zio->opos);
}
TARFS_DPF(ZIO, "%s: inflated %zu\n", __func__, olen);
#ifdef TARFS_DEBUG
counter_u64_add(tarfs_zio_inflated, olen);
#endif
}
fail:
VOP_UNLOCK(tmp->vp);
fail_unlocked:
if (error == 0) {
if (uiop->uio_segflg == UIO_SYSSPACE) {
uiop->uio_resid = resid;
} else if (len > resid) {
TARFS_DPF(ALLOC, "%s: bounced %zu bytes\n", __func__,
len - resid);
error = uiomove(obuf, len - resid, uiop);
#ifdef TARFS_DEBUG
counter_u64_add(tarfs_zio_bounced, len - resid);
#endif
}
}
if (obuf != NULL) {
TARFS_DPF(ALLOC, "%s: freeing bounce buffer\n", __func__);
free(obuf, M_TEMP);
}
if (rl != NULL)
vn_rangelock_unlock(tmp->vp, rl);
if (ibuf != NULL)
free(ibuf, M_TEMP);
TARFS_DPF(ZIO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__,
(size_t)off, len, error, uiop->uio_resid);
#ifdef TARFS_DEBUG
counter_u64_add(tarfs_zio_consumed, len - uiop->uio_resid);
#endif
if (error != 0) {
zio->curidx = 0;
zio->ipos = zio->idx[0].i;
zio->opos = zio->idx[0].o;
ZSTD_resetDStream(zstd->zds);
}
return (error);
}
#endif
/*
* VOP_READ for zio node.
*/
static int
tarfs_zread(struct vop_read_args *ap)
{
struct vnode *vp = ap->a_vp;
struct tarfs_zio *zio = vp->v_data;
struct uio *uiop = ap->a_uio;
#ifdef TARFS_DEBUG
off_t off = uiop->uio_offset;
size_t len = uiop->uio_resid;
#endif
int error;
TARFS_DPF(ZIO, "%s(%zu, %zu)\n", __func__,
(size_t)off, len);
#ifdef ZSTDIO
if (zio->zstd != NULL) {
error = tarfs_zread_zstd(zio, uiop);
} else
#endif
error = EFTYPE;
TARFS_DPF(ZIO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__,
(size_t)off, len, error, uiop->uio_resid);
return (error);
}
/*
* VOP_RECLAIM for zio node.
*/
static int
tarfs_zreclaim(struct vop_reclaim_args *ap)
{
struct vnode *vp = ap->a_vp;
TARFS_DPF(ZIO, "%s(%p)\n", __func__, vp);
vp->v_data = NULL;
vnode_destroy_vobject(vp);
cache_purge(vp);
return (0);
}
/*
* VOP_STRATEGY for zio node.
*/
static int
tarfs_zstrategy(struct vop_strategy_args *ap)
{
struct uio auio;
struct iovec iov;
struct vnode *vp = ap->a_vp;
struct buf *bp = ap->a_bp;
off_t off;
size_t len;
int error;
iov.iov_base = bp->b_data;
iov.iov_len = bp->b_bcount;
off = bp->b_iooffset;
len = bp->b_bcount;
bp->b_resid = len;
auio.uio_iov = &iov;
auio.uio_iovcnt = 1;
auio.uio_offset = off;
auio.uio_resid = len;
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_rw = UIO_READ;
auio.uio_td = curthread;
error = VOP_READ(vp, &auio, IO_DIRECT | IO_NODELOCKED, bp->b_rcred);
bp->b_flags |= B_DONE;
if (error != 0) {
bp->b_ioflags |= BIO_ERROR;
bp->b_error = error;
}
return (0);
}
static struct vop_vector tarfs_znodeops = {
.vop_default = &default_vnodeops,
.vop_access = tarfs_zaccess,
.vop_getattr = tarfs_zgetattr,
.vop_read = tarfs_zread,
.vop_reclaim = tarfs_zreclaim,
.vop_strategy = tarfs_zstrategy,
};
VFS_VOP_VECTOR_REGISTER(tarfs_znodeops);
/*
* Initializes the decompression layer.
*/
static struct tarfs_zio *
tarfs_zio_init(struct tarfs_mount *tmp, off_t i, off_t o)
{
struct tarfs_zio *zio;
struct vnode *zvp;
zio = malloc(sizeof(*zio), M_TARFSZSTATE, M_ZERO | M_WAITOK);
TARFS_DPF(ALLOC, "%s: allocated zio\n", __func__);
zio->tmp = tmp;
zio->szidx = 128;
zio->idx = malloc(zio->szidx * sizeof(*zio->idx), M_TARFSZSTATE,
M_ZERO | M_WAITOK);
zio->curidx = 0;
zio->nidx = 1;
zio->idx[zio->curidx].i = zio->ipos = i;
zio->idx[zio->curidx].o = zio->opos = o;
tmp->zio = zio;
TARFS_DPF(ALLOC, "%s: allocated zio index\n", __func__);
getnewvnode("tarfsz", tmp->vfs, &tarfs_znodeops, &zvp);
zvp->v_data = zio;
zvp->v_type = VREG;
zvp->v_mount = tmp->vfs;
vn_set_state(zvp, VSTATE_CONSTRUCTED);
tmp->znode = zvp;
TARFS_DPF(ZIO, "%s: created zio node\n", __func__);
return (zio);
}
/*
* Initializes the I/O layer, including decompression if the signature of
* a supported compression format is detected. Returns 0 on success and a
* positive errno value on failure.
*/
int
tarfs_io_init(struct tarfs_mount *tmp)
{
uint8_t *block;
struct tarfs_zio *zio = NULL;
ssize_t res;
int error = 0;
block = malloc(tmp->iosize, M_TEMP, M_ZERO | M_WAITOK);
res = tarfs_io_read_buf(tmp, true, block, 0, tmp->iosize);
if (res < 0) {
return (-res);
}
if (memcmp(block, XZ_MAGIC, sizeof(XZ_MAGIC)) == 0) {
printf("xz compression not supported\n");
error = EOPNOTSUPP;
goto bad;
} else if (memcmp(block, ZLIB_MAGIC, sizeof(ZLIB_MAGIC)) == 0) {
printf("zlib compression not supported\n");
error = EOPNOTSUPP;
goto bad;
} else if (memcmp(block, ZSTD_MAGIC, sizeof(ZSTD_MAGIC)) == 0) {
#ifdef ZSTDIO
zio = tarfs_zio_init(tmp, 0, 0);
zio->zstd = malloc(sizeof(*zio->zstd), M_TARFSZSTATE, M_WAITOK);
zio->zstd->zds = ZSTD_createDStream_advanced(tarfs_zstd_mem);
(void)ZSTD_initDStream(zio->zstd->zds);
#else
printf("zstd compression not supported\n");
error = EOPNOTSUPP;
goto bad;
#endif
}
bad:
free(block, M_TEMP);
return (error);
}
/*
* Tears down the decompression layer.
*/
static int
tarfs_zio_fini(struct tarfs_mount *tmp)
{
struct tarfs_zio *zio = tmp->zio;
int error = 0;
if (tmp->znode != NULL) {
error = vn_lock(tmp->znode, LK_EXCLUSIVE);
if (error != 0) {
TARFS_DPF(ALLOC, "%s: failed to lock znode", __func__);
return (error);
}
tmp->znode->v_mount = NULL;
vgone(tmp->znode);
vput(tmp->znode);
tmp->znode = NULL;
}
#ifdef ZSTDIO
if (zio->zstd != NULL) {
TARFS_DPF(ALLOC, "%s: freeing zstd state\n", __func__);
ZSTD_freeDStream(zio->zstd->zds);
free(zio->zstd, M_TARFSZSTATE);
}
#endif
if (zio->idx != NULL) {
TARFS_DPF(ALLOC, "%s: freeing index\n", __func__);
free(zio->idx, M_TARFSZSTATE);
}
TARFS_DPF(ALLOC, "%s: freeing zio\n", __func__);
free(zio, M_TARFSZSTATE);
tmp->zio = NULL;
return (error);
}
/*
* Tears down the I/O layer, including the decompression layer if
* applicable.
*/
int
tarfs_io_fini(struct tarfs_mount *tmp)
{
int error = 0;
if (tmp->zio != NULL) {
error = tarfs_zio_fini(tmp);
}
return (error);
}

603
sys/fs/tarfs/tarfs_subr.c Normal file
View File

@ -0,0 +1,603 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2013 Juniper Networks, Inc.
* Copyright (c) 2022-2023 Klara, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "opt_tarfs.h"
#include <sys/param.h>
#include <sys/stat.h>
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/fcntl.h>
#include <sys/libkern.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/sysctl.h>
#include <sys/vnode.h>
#include <vm/vm_param.h>
#include <fs/tarfs/tarfs.h>
#include <fs/tarfs/tarfs_dbg.h>
MALLOC_DEFINE(M_TARFSNAME, "tarfs name", "tarfs file names");
MALLOC_DEFINE(M_TARFSBLK, "tarfs blk", "tarfs block maps");
SYSCTL_NODE(_vfs, OID_AUTO, tarfs, CTLFLAG_RW, 0, "Tar filesystem");
unsigned int tarfs_ioshift = TARFS_IOSHIFT_DEFAULT;
static int
tarfs_sysctl_handle_ioshift(SYSCTL_HANDLER_ARGS)
{
unsigned int tmp;
int error;
tmp = *(unsigned int *)arg1;
if ((error = SYSCTL_OUT(req, &tmp, sizeof(tmp))) != 0)
return (error);
if (req->newptr != NULL) {
if ((error = SYSCTL_IN(req, &tmp, sizeof(tmp))) != 0)
return (error);
if (tmp == 0)
tmp = TARFS_IOSHIFT_DEFAULT;
if (tmp < TARFS_IOSHIFT_MIN)
tmp = TARFS_IOSHIFT_MIN;
if (tmp > TARFS_IOSHIFT_MAX)
tmp = TARFS_IOSHIFT_MAX;
*(unsigned int *)arg1 = tmp;
}
return (0);
}
SYSCTL_PROC(_vfs_tarfs, OID_AUTO, ioshift,
CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW | CTLFLAG_TUN,
&tarfs_ioshift, 0, tarfs_sysctl_handle_ioshift, "IU",
"Tar filesystem preferred I/O size (log 2)");
#ifdef TARFS_DEBUG
int tarfs_debug;
SYSCTL_INT(_vfs_tarfs, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_TUN,
&tarfs_debug, 0, "Tar filesystem debug mask");
#endif /* TARFS_DEBUG */
static void
tarfs_dump_tree_internal(struct tarfs_node *tnp, int indent)
{
struct tarfs_node *current;
const char *name;
if (tnp->type != VDIR)
return;
TAILQ_FOREACH(current, &tnp->dir.dirhead, dirents) {
if (current->name == NULL)
name = "<<root>>";
else
name = current->name;
printf("%*s%s\n", indent * 4, "", name);
if (current->type == VDIR)
tarfs_dump_tree_internal(current, indent + 1);
}
}
void
tarfs_dump_tree(struct tarfs_node *tnp)
{
const char *name;
if (tnp == NULL)
return;
if (tnp->name == NULL)
name = "<<root>>";
else
name = tnp->name;
printf("%s\n", name);
tarfs_dump_tree_internal(tnp, 1);
}
void
tarfs_print_node(struct tarfs_node *tnp)
{
if (tnp == NULL)
return;
printf("%s: node %p\n", __func__, tnp);
printf("\tvnode %p\n", tnp->vnode);
printf("\ttmp %p\n", tnp->tmp);
printf("\ttype %d\n", tnp->type);
printf("\tino %lu\n", tnp->ino);
printf("\tsize %zu\n", tnp->size);
printf("\tname %s\n",
(tnp->name == NULL) ? "<<root>>" : tnp->name);
printf("\tnamelen %zu\n", tnp->namelen);
printf("\tuid %d\n", tnp->uid);
printf("\tgid %d\n", tnp->gid);
printf("\tmode o%o\n", tnp->mode);
printf("\tflags %u\n", tnp->flags);
printf("\tnlink %lu\n", tnp->nlink);
printf("\tatime %d\n", (int)tnp->atime.tv_sec);
printf("\tmtime %d\n", (int)tnp->mtime.tv_sec);
printf("\tctime %d\n", (int)tnp->ctime.tv_sec);
printf("\tbirthtime %d\n", (int)tnp->birthtime.tv_sec);
printf("\tgen %lu\n", tnp->gen);
printf("\tparent %p\n", tnp->parent);
switch (tnp->type) {
case VDIR:
printf("\tdir.lastcookie %jd\n",
tnp->dir.lastcookie);
printf("\tdir.lastnode %p\n", tnp->dir.lastnode);
break;
case VBLK:
case VCHR:
printf("\trdev %lu\n", tnp->rdev);
break;
default:
break;
}
}
struct tarfs_node *
tarfs_lookup_node(struct tarfs_node *tnp, struct tarfs_node *f,
struct componentname *cnp)
{
boolean_t found;
struct tarfs_node *entry;
TARFS_DPF(LOOKUP, "%s: name: %.*s\n", __func__, (int)cnp->cn_namelen,
cnp->cn_nameptr);
found = false;
TAILQ_FOREACH(entry, &tnp->dir.dirhead, dirents) {
if (f != NULL && entry != f)
continue;
if (entry->namelen == cnp->cn_namelen &&
bcmp(entry->name, cnp->cn_nameptr,
entry->namelen) == 0) {
found = 1;
break;
}
}
if (found) {
if (entry->type == VREG && entry->other != NULL) {
TARFS_DPF_IFF(LOOKUP, "%s: following hard link %p\n",
__func__, entry);
entry = entry->other;
}
TARFS_DPF(LOOKUP, "%s: found tarfs_node %p\n", __func__,
entry);
return (entry);
}
TARFS_DPF(LOOKUP, "%s: no match found\n", __func__);
return (NULL);
}
struct tarfs_node *
tarfs_lookup_dir(struct tarfs_node *tnp, off_t cookie)
{
struct tarfs_node *current;
TARFS_DPF(LOOKUP, "%s: tarfs_node %p, cookie %jd\n", __func__, tnp,
cookie);
TARFS_DPF(LOOKUP, "%s: name: %s\n", __func__,
(tnp->name == NULL) ? "<<root>>" : tnp->name);
if (cookie == tnp->dir.lastcookie &&
tnp->dir.lastnode != NULL) {
TARFS_DPF(LOOKUP, "%s: Using cached entry: tarfs_node %p, "
"cookie %jd\n", __func__, tnp->dir.lastnode,
tnp->dir.lastcookie);
return (tnp->dir.lastnode);
}
TAILQ_FOREACH(current, &tnp->dir.dirhead, dirents) {
TARFS_DPF(LOOKUP, "%s: tarfs_node %p, current %p, ino %lu\n",
__func__, tnp, current, current->ino);
TARFS_DPF_IFF(LOOKUP, current->name != NULL,
"%s: name: %s\n", __func__, current->name);
if (current->ino == cookie) {
TARFS_DPF(LOOKUP, "%s: Found entry: tarfs_node %p, "
"cookie %lu\n", __func__, current,
current->ino);
break;
}
}
return (current);
}
int
tarfs_alloc_node(struct tarfs_mount *tmp, const char *name, size_t namelen,
enum vtype type, off_t off, size_t sz, time_t mtime, uid_t uid, gid_t gid,
mode_t mode, unsigned int flags, const char *linkname, dev_t rdev,
struct tarfs_node *parent, struct tarfs_node **retnode)
{
struct tarfs_node *tnp;
TARFS_DPF(ALLOC, "%s(%.*s)\n", __func__, (int)namelen, name);
tnp = malloc(sizeof(struct tarfs_node), M_TARFSNODE, M_WAITOK | M_ZERO);
mtx_init(&tnp->lock, "tarfs node lock", NULL, MTX_DEF);
tnp->gen = arc4random();
tnp->tmp = tmp;
if (namelen > 0) {
tnp->name = malloc(namelen + 1, M_TARFSNAME, M_WAITOK);
tnp->namelen = namelen;
memcpy(tnp->name, name, namelen);
tnp->name[namelen] = '\0';
}
tnp->type = type;
tnp->uid = uid;
tnp->gid = gid;
tnp->mode = mode;
tnp->nlink = 1;
vfs_timestamp(&tnp->atime);
tnp->mtime.tv_sec = mtime;
tnp->birthtime = tnp->atime;
tnp->ctime = tnp->mtime;
if (parent != NULL) {
tnp->ino = alloc_unr(tmp->ino_unr);
}
tnp->offset = off;
tnp->size = tnp->physize = sz;
switch (type) {
case VDIR:
MPASS(parent != tnp);
MPASS(parent != NULL || tmp->root == NULL);
TAILQ_INIT(&tnp->dir.dirhead);
tnp->nlink++;
if (parent == NULL) {
tnp->ino = TARFS_ROOTINO;
}
tnp->physize = 0;
break;
case VLNK:
tnp->link.name = malloc(sz + 1, M_TARFSNAME,
M_WAITOK);
tnp->link.namelen = sz;
memcpy(tnp->link.name, linkname, sz);
tnp->link.name[sz] = '\0';
break;
case VREG:
/* create dummy block map */
tnp->nblk = 1;
tnp->blk = malloc(sizeof(*tnp->blk), M_TARFSBLK, M_WAITOK);
tnp->blk[0].i = 0;
tnp->blk[0].o = 0;
tnp->blk[0].l = tnp->physize;
break;
case VFIFO:
/* Nothing extra to do */
break;
case VBLK:
case VCHR:
tnp->rdev = rdev;
tnp->physize = 0;
break;
default:
panic("%s: type %d not allowed", __func__, type);
}
if (parent != NULL) {
MPASS(parent->type == VDIR);
TARFS_NODE_LOCK(parent);
TAILQ_INSERT_TAIL(&parent->dir.dirhead, tnp, dirents);
parent->size += sizeof(struct tarfs_node);
tnp->parent = parent;
if (type == VDIR) {
parent->nlink++;
}
TARFS_NODE_UNLOCK(parent);
} else {
tnp->parent = tnp;
}
MPASS(tnp->ino != 0);
TARFS_ALLNODES_LOCK(tmp);
TAILQ_INSERT_TAIL(&tmp->allnodes, tnp, entries);
TARFS_ALLNODES_UNLOCK(tmp);
*retnode = tnp;
tmp->nfiles++;
return (0);
}
#define is09(ch) ((ch) >= '0' && (ch) <= '9')
int
tarfs_load_blockmap(struct tarfs_node *tnp, size_t realsize)
{
struct tarfs_blk *blk = NULL;
char *map = NULL;
size_t nmap = 0, nblk = 0;
char *p, *q;
ssize_t res;
unsigned int i;
long n;
/*
* Load the entire map into memory. We don't know how big it is,
* but as soon as we start reading it we will know how many
* entries it contains, and then we can count newlines.
*/
do {
nmap++;
if (tnp->size < nmap * TARFS_BLOCKSIZE) {
TARFS_DPF(MAP, "%s: map too large\n", __func__);
goto bad;
}
/* grow the map */
map = realloc(map, nmap * TARFS_BLOCKSIZE + 1, M_TARFSBLK,
M_ZERO | M_WAITOK);
/* read an additional block */
res = tarfs_io_read_buf(tnp->tmp, false,
map + (nmap - 1) * TARFS_BLOCKSIZE,
tnp->offset + (nmap - 1) * TARFS_BLOCKSIZE,
TARFS_BLOCKSIZE);
if (res < 0)
return (-res);
else if (res < TARFS_BLOCKSIZE)
return (EIO);
map[nmap * TARFS_BLOCKSIZE] = '\0'; /* sentinel */
if (nblk == 0) {
n = strtol(p = map, &q, 10);
if (q == p || *q != '\n' || n < 1)
goto syntax;
nblk = n;
}
for (n = 0, p = map; *p != '\0'; ++p) {
if (*p == '\n') {
++n;
}
}
TARFS_DPF(MAP, "%s: %ld newlines in map\n", __func__, n);
} while (n < nblk * 2 + 1);
TARFS_DPF(MAP, "%s: block map length %zu\n", __func__, nblk);
blk = malloc(sizeof(*blk) * nblk, M_TARFSBLK, M_WAITOK | M_ZERO);
p = strchr(map, '\n') + 1;
for (i = 0; i < nblk; i++) {
if (i == 0)
blk[i].i = nmap * TARFS_BLOCKSIZE;
else
blk[i].i = blk[i - 1].i + blk[i - 1].l;
n = strtol(p, &q, 10);
if (q == p || *q != '\n' || n < 0)
goto syntax;
p = q + 1;
blk[i].o = n;
n = strtol(p, &q, 10);
if (q == p || *q != '\n' || n < 0)
goto syntax;
p = q + 1;
blk[i].l = n;
TARFS_DPF(MAP, "%s: %3d %12zu %12zu %12zu\n", __func__,
i, blk[i].i, blk[i].o, blk[i].l);
/*
* Check block alignment if the block is of non-zero
* length (a zero-length block indicates the end of a
* trailing hole). Checking i indirectly checks the
* previous block's l. It's ok for the final block to
* have an uneven length.
*/
if (blk[i].l == 0) {
TARFS_DPF(MAP, "%s: zero-length block\n", __func__);
} else if (blk[i].i % TARFS_BLOCKSIZE != 0 ||
blk[i].o % TARFS_BLOCKSIZE != 0) {
TARFS_DPF(MAP, "%s: misaligned map entry\n", __func__);
goto bad;
}
/*
* Check that this block starts after the end of the
* previous one.
*/
if (i > 0 && blk[i].o < blk[i - 1].o + blk[i - 1].l) {
TARFS_DPF(MAP, "%s: overlapping map entries\n", __func__);
goto bad;
}
/*
* Check that the block is within the file, both
* physically and logically.
*/
if (blk[i].i + blk[i].l > tnp->physize ||
blk[i].o + blk[i].l > realsize) {
TARFS_DPF(MAP, "%s: map overflow\n", __func__);
goto bad;
}
}
free(map, M_TARFSBLK);
/* store in node */
free(tnp->blk, M_TARFSBLK);
tnp->nblk = nblk;
tnp->blk = blk;
tnp->size = realsize;
return (0);
syntax:
TARFS_DPF(MAP, "%s: syntax error in block map\n", __func__);
bad:
free(map, M_TARFSBLK);
free(blk, M_TARFSBLK);
return (EINVAL);
}
void
tarfs_free_node(struct tarfs_node *tnp)
{
struct tarfs_mount *tmp;
MPASS(tnp != NULL);
tmp = tnp->tmp;
switch (tnp->type) {
case VLNK:
if (tnp->link.name)
free(tnp->link.name, M_TARFSNAME);
break;
default:
break;
}
if (tnp->name != NULL)
free(tnp->name, M_TARFSNAME);
if (tnp->blk != NULL)
free(tnp->blk, M_TARFSBLK);
if (tnp->ino >= TARFS_MININO)
free_unr(tmp->ino_unr, tnp->ino);
free(tnp, M_TARFSNODE);
tmp->nfiles--;
}
int
tarfs_read_file(struct tarfs_node *tnp, size_t len, struct uio *uiop)
{
struct uio auio;
size_t resid = len;
size_t copylen;
unsigned int i;
int error;
TARFS_DPF(VNODE, "%s(%s, %zu, %zu)\n", __func__,
tnp->name, uiop->uio_offset, resid);
for (i = 0; i < tnp->nblk && resid > 0; ++i) {
if (uiop->uio_offset > tnp->blk[i].o + tnp->blk[i].l) {
/* skip this block */
continue;
}
while (resid > 0 &&
uiop->uio_offset < tnp->blk[i].o) {
/* move out some zeroes... */
copylen = tnp->blk[i].o - uiop->uio_offset;
if (copylen > resid)
copylen = resid;
if (copylen > ZERO_REGION_SIZE)
copylen = ZERO_REGION_SIZE;
auio = *uiop;
auio.uio_offset = 0;
auio.uio_resid = copylen;
error = uiomove(__DECONST(void *, zero_region),
copylen, &auio);
if (error != 0)
return (error);
TARFS_DPF(MAP, "%s(%s) = zero %zu\n", __func__,
tnp->name, copylen - auio.uio_resid);
uiop->uio_offset += copylen - auio.uio_resid;
uiop->uio_resid -= copylen - auio.uio_resid;
resid -= copylen - auio.uio_resid;
}
while (resid > 0 &&
uiop->uio_offset < tnp->blk[i].o + tnp->blk[i].l) {
/* now actual data */
copylen = tnp->blk[i].l;
if (copylen > resid)
copylen = resid;
auio = *uiop;
auio.uio_offset = tnp->offset + tnp->blk[i].i +
uiop->uio_offset - tnp->blk[i].o;
auio.uio_resid = copylen;
error = tarfs_io_read(tnp->tmp, false, &auio);
if (error != 0)
return (error);
TARFS_DPF(MAP, "%s(%s) = data %zu\n", __func__,
tnp->name, copylen - auio.uio_resid);
uiop->uio_offset += copylen - auio.uio_resid;
uiop->uio_resid -= copylen - auio.uio_resid;
resid -= copylen - auio.uio_resid;
}
}
TARFS_DPF(VNODE, "%s(%s) = %zu\n", __func__,
tnp->name, len - resid);
return (0);
}
/*
* XXX ugly file flag parser which could easily be a finite state machine
* driven by a small precomputed table.
*
* Note that unlike strtofflags(3), we make no attempt to handle negated
* flags, since they shouldn't appear in tar files.
*/
static const struct tarfs_flag {
const char *name;
unsigned int flag;
} tarfs_flags[] = {
{ "nodump", UF_NODUMP },
{ "uchg", UF_IMMUTABLE },
{ "uappnd", UF_APPEND },
{ "opaque", UF_OPAQUE },
{ "uunlnk", UF_NOUNLINK },
{ "arch", SF_ARCHIVED },
{ "schg", SF_IMMUTABLE },
{ "sappnd", SF_APPEND },
{ "sunlnk", SF_NOUNLINK },
{ NULL, 0 },
};
unsigned int
tarfs_strtofflags(const char *str, char **end)
{
const struct tarfs_flag *tf;
const char *p, *q;
unsigned int ret;
ret = 0;
for (p = q = str; *q != '\0'; p = q + 1) {
for (q = p; *q != '\0' && *q != ','; ++q) {
if (*q < 'a' || *q > 'z') {
goto end;
}
/* nothing */
}
for (tf = tarfs_flags; tf->name != NULL; tf++) {
if (strncmp(tf->name, p, q - p) == 0 &&
tf->name[q - p] == '\0') {
TARFS_DPF(ALLOC, "%s: %.*s = 0x%06x\n", __func__,
(int)(q - p), p, tf->flag);
ret |= tf->flag;
break;
}
}
if (tf->name == NULL) {
TARFS_DPF(ALLOC, "%s: %.*s = 0x??????\n",
__func__, (int)(q - p), p);
goto end;
}
}
end:
if (*end != NULL) {
*end = __DECONST(char *, q);
}
return (ret);
}

1173
sys/fs/tarfs/tarfs_vfsops.c Normal file

File diff suppressed because it is too large Load Diff

642
sys/fs/tarfs/tarfs_vnops.c Normal file
View File

@ -0,0 +1,642 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2013 Juniper Networks, Inc.
* Copyright (c) 2022-2023 Klara, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "opt_tarfs.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bio.h>
#include <sys/buf.h>
#include <sys/dirent.h>
#include <sys/fcntl.h>
#include <sys/limits.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <fs/tarfs/tarfs.h>
#include <fs/tarfs/tarfs_dbg.h>
static int
tarfs_open(struct vop_open_args *ap)
{
struct tarfs_node *tnp;
struct vnode *vp;
vp = ap->a_vp;
MPASS(VOP_ISLOCKED(vp));
tnp = VP_TO_TARFS_NODE(vp);
TARFS_DPF(VNODE, "%s(%p=%s, %o)\n", __func__,
tnp, tnp->name, ap->a_mode);
if (vp->v_type != VREG && vp->v_type != VDIR)
return (EOPNOTSUPP);
vnode_create_vobject(vp, tnp->size, ap->a_td);
return (0);
}
static int
tarfs_close(struct vop_close_args *ap)
{
#ifdef TARFS_DEBUG
struct tarfs_node *tnp;
struct vnode *vp;
vp = ap->a_vp;
MPASS(VOP_ISLOCKED(vp));
tnp = VP_TO_TARFS_NODE(vp);
TARFS_DPF(VNODE, "%s(%p=%s)\n", __func__,
tnp, tnp->name);
#else
(void)ap;
#endif
return (0);
}
static int
tarfs_access(struct vop_access_args *ap)
{
struct tarfs_node *tnp;
struct vnode *vp;
accmode_t accmode;
struct ucred *cred;
int error;
vp = ap->a_vp;
accmode = ap->a_accmode;
cred = ap->a_cred;
MPASS(VOP_ISLOCKED(vp));
tnp = VP_TO_TARFS_NODE(vp);
TARFS_DPF(VNODE, "%s(%p=%s, %o)\n", __func__,
tnp, tnp->name, accmode);
switch (vp->v_type) {
case VDIR:
case VLNK:
case VREG:
if ((accmode & VWRITE) != 0)
return (EROFS);
break;
case VBLK:
case VCHR:
case VFIFO:
break;
default:
return (EINVAL);
}
if ((accmode & VWRITE) != 0)
return (EPERM);
error = vaccess(vp->v_type, tnp->mode, tnp->uid,
tnp->gid, accmode, cred);
return (error);
}
static int
tarfs_getattr(struct vop_getattr_args *ap)
{
struct tarfs_node *tnp;
struct vnode *vp;
struct vattr *vap;
vp = ap->a_vp;
vap = ap->a_vap;
tnp = VP_TO_TARFS_NODE(vp);
TARFS_DPF(VNODE, "%s(%p=%s)\n", __func__,
tnp, tnp->name);
vap->va_type = vp->v_type;
vap->va_mode = tnp->mode;
vap->va_nlink = tnp->nlink;
vap->va_gid = tnp->gid;
vap->va_uid = tnp->uid;
vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
vap->va_fileid = tnp->ino;
vap->va_size = tnp->size;
vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
vap->va_atime = tnp->atime;
vap->va_ctime = tnp->ctime;
vap->va_mtime = tnp->mtime;
vap->va_birthtime = tnp->birthtime;
vap->va_gen = tnp->gen;
vap->va_flags = tnp->flags;
vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
tnp->rdev : NODEV;
vap->va_bytes = round_page(tnp->physize);
vap->va_filerev = 0;
return (0);
}
static int
tarfs_lookup(struct vop_cachedlookup_args *ap)
{
struct tarfs_node *dirnode, *parent, *tnp;
struct componentname *cnp;
struct vnode *dvp, **vpp;
#ifdef TARFS_DEBUG
struct vnode *vp;
#endif
int error;
dvp = ap->a_dvp;
vpp = ap->a_vpp;
cnp = ap->a_cnp;
*vpp = NULLVP;
dirnode = VP_TO_TARFS_NODE(dvp);
parent = dirnode->parent;
tnp = NULL;
TARFS_DPF(LOOKUP, "%s(%p=%s, %.*s)\n", __func__,
dirnode, dirnode->name,
(int)cnp->cn_namelen, cnp->cn_nameptr);
error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, curthread);
if (error != 0)
return (error);
if (cnp->cn_flags & ISDOTDOT) {
/* Do not allow .. on the root node */
if (parent == NULL || parent == dirnode)
return (ENOENT);
/* Allocate a new vnode on the matching entry */
error = vn_vget_ino(dvp, parent->ino, cnp->cn_lkflags,
vpp);
if (error != 0)
return (error);
} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
VREF(dvp);
*vpp = dvp;
#ifdef TARFS_DEBUG
} else if (dirnode == dirnode->tmp->root &&
(vp = dirnode->tmp->znode) != NULL &&
cnp->cn_namelen == TARFS_ZIO_NAMELEN &&
memcmp(cnp->cn_nameptr, TARFS_ZIO_NAME, TARFS_ZIO_NAMELEN) == 0) {
error = vn_lock(vp, cnp->cn_lkflags);
if (error != 0)
return (error);
vref(vp);
*vpp = vp;
return (0);
#endif
} else {
tnp = tarfs_lookup_node(dirnode, NULL, cnp);
if (tnp == NULL) {
TARFS_DPF(LOOKUP, "%s(%p=%s, %.*s): file not found\n", __func__,
dirnode, dirnode->name,
(int)cnp->cn_namelen, cnp->cn_nameptr);
return (ENOENT);
}
if ((cnp->cn_flags & ISLASTCN) == 0 &&
(tnp->type != VDIR && tnp->type != VLNK))
return (ENOTDIR);
error = vn_vget_ino(dvp, tnp->ino, cnp->cn_lkflags, vpp);
if (error != 0)
return (error);
}
#ifdef TARFS_DEBUG
if (tnp == NULL)
tnp = VP_TO_TARFS_NODE(*vpp);
TARFS_DPF(LOOKUP, "%s: found vnode %p, tarfs_node %p\n", __func__,
*vpp, tnp);
#endif /* TARFS_DEBUG */
/* Store the result the the cache if MAKEENTRY is specified in flags */
if ((cnp->cn_flags & MAKEENTRY) != 0 && cnp->cn_nameiop != CREATE)
cache_enter(dvp, *vpp, cnp);
return (error);
}
static int
tarfs_readdir(struct vop_readdir_args *ap)
{
struct dirent cde;
struct tarfs_node *current, *tnp;
struct vnode *vp;
struct uio *uio;
int *eofflag;
u_long **cookies;
int *ncookies;
off_t off;
u_int idx, ndirents;
int error;
vp = ap->a_vp;
uio = ap->a_uio;
eofflag = ap->a_eofflag;
cookies = ap->a_cookies;
ncookies = ap->a_ncookies;
if (vp->v_type != VDIR)
return (ENOTDIR);
tnp = VP_TO_TARFS_NODE(vp);
off = uio->uio_offset;
current = NULL;
ndirents = 0;
TARFS_DPF(VNODE, "%s(%p=%s, %zu, %zd)\n", __func__,
tnp, tnp->name, uio->uio_offset, uio->uio_resid);
if (uio->uio_offset == TARFS_COOKIE_EOF) {
TARFS_DPF(VNODE, "%s: EOF\n", __func__);
return (0);
}
if (uio->uio_offset == TARFS_COOKIE_DOT) {
TARFS_DPF(VNODE, "%s: Generating . entry\n", __func__);
/* fake . entry */
cde.d_fileno = tnp->ino;
cde.d_type = DT_DIR;
cde.d_namlen = 1;
cde.d_name[0] = '.';
cde.d_name[1] = '\0';
cde.d_reclen = GENERIC_DIRSIZ(&cde);
if (cde.d_reclen > uio->uio_resid)
goto full;
error = uiomove(&cde, cde.d_reclen, uio);
if (error)
return (error);
/* next is .. */
uio->uio_offset = TARFS_COOKIE_DOTDOT;
ndirents++;
}
if (uio->uio_offset == TARFS_COOKIE_DOTDOT) {
TARFS_DPF(VNODE, "%s: Generating .. entry\n", __func__);
/* fake .. entry */
MPASS(tnp->parent != NULL);
TARFS_NODE_LOCK(tnp->parent);
cde.d_fileno = tnp->parent->ino;
TARFS_NODE_UNLOCK(tnp->parent);
cde.d_type = DT_DIR;
cde.d_namlen = 2;
cde.d_name[0] = '.';
cde.d_name[1] = '.';
cde.d_name[2] = '\0';
cde.d_reclen = GENERIC_DIRSIZ(&cde);
if (cde.d_reclen > uio->uio_resid)
goto full;
error = uiomove(&cde, cde.d_reclen, uio);
if (error)
return (error);
/* next is first child */
current = TAILQ_FIRST(&tnp->dir.dirhead);
if (current == NULL)
goto done;
uio->uio_offset = current->ino;
TARFS_DPF(VNODE, "%s: [%u] setting current node to %p=%s\n",
__func__, ndirents, current, current->name);
ndirents++;
}
/* resuming previous call */
if (current == NULL) {
current = tarfs_lookup_dir(tnp, uio->uio_offset);
if (current == NULL) {
error = EINVAL;
goto done;
}
uio->uio_offset = current->ino;
TARFS_DPF(VNODE, "%s: [%u] setting current node to %p=%s\n",
__func__, ndirents, current, current->name);
}
for (;;) {
cde.d_fileno = current->ino;
switch (current->type) {
case VBLK:
cde.d_type = DT_BLK;
break;
case VCHR:
cde.d_type = DT_CHR;
break;
case VDIR:
cde.d_type = DT_DIR;
break;
case VFIFO:
cde.d_type = DT_FIFO;
break;
case VLNK:
cde.d_type = DT_LNK;
break;
case VREG:
cde.d_type = DT_REG;
break;
default:
panic("%s: tarfs_node %p, type %d\n", __func__,
current, current->type);
}
cde.d_namlen = current->namelen;
MPASS(tnp->namelen < sizeof(cde.d_name));
(void)memcpy(cde.d_name, current->name, current->namelen);
cde.d_name[current->namelen] = '\0';
cde.d_reclen = GENERIC_DIRSIZ(&cde);
if (cde.d_reclen > uio->uio_resid)
goto full;
error = uiomove(&cde, cde.d_reclen, uio);
if (error != 0)
goto done;
ndirents++;
/* next sibling */
current = TAILQ_NEXT(current, dirents);
if (current == NULL)
goto done;
uio->uio_offset = current->ino;
TARFS_DPF(VNODE, "%s: [%u] setting current node to %p=%s\n",
__func__, ndirents, current, current->name);
}
full:
if (cde.d_reclen > uio->uio_resid) {
TARFS_DPF(VNODE, "%s: out of space, returning\n",
__func__);
error = (ndirents == 0) ? EINVAL : 0;
}
done:
TARFS_DPF(VNODE, "%s: %u entries written\n", __func__, ndirents);
TARFS_DPF(VNODE, "%s: saving cache information\n", __func__);
if (current == NULL) {
uio->uio_offset = TARFS_COOKIE_EOF;
tnp->dir.lastcookie = 0;
tnp->dir.lastnode = NULL;
} else {
tnp->dir.lastcookie = current->ino;
tnp->dir.lastnode = current;
}
if (eofflag != NULL) {
TARFS_DPF(VNODE, "%s: Setting EOF flag\n", __func__);
*eofflag = (error == 0 && current == NULL);
}
/* Update for NFS */
if (error == 0 && cookies != NULL && ncookies != NULL) {
TARFS_DPF(VNODE, "%s: Updating NFS cookies\n", __func__);
current = NULL;
*cookies = malloc(ndirents * sizeof(off_t), M_TEMP, M_WAITOK);
*ncookies = ndirents;
for (idx = 0; idx < ndirents; idx++) {
if (off == TARFS_COOKIE_DOT)
off = TARFS_COOKIE_DOTDOT;
else {
if (off == TARFS_COOKIE_DOTDOT) {
current = TAILQ_FIRST(&tnp->dir.dirhead);
} else if (current != NULL) {
current = TAILQ_NEXT(current, dirents);
} else {
current = tarfs_lookup_dir(tnp, off);
current = TAILQ_NEXT(current, dirents);
}
if (current == NULL)
off = TARFS_COOKIE_EOF;
else
off = current->ino;
}
TARFS_DPF(VNODE, "%s: [%u] offset %zu\n", __func__,
idx, off);
(*cookies)[idx] = off;
}
MPASS(uio->uio_offset == off);
}
return (error);
}
static int
tarfs_read(struct vop_read_args *ap)
{
struct tarfs_node *tnp;
struct uio *uiop;
struct vnode *vp;
size_t len;
off_t resid;
int error;
uiop = ap->a_uio;
vp = ap->a_vp;
if (vp->v_type == VCHR || vp->v_type == VBLK)
return (EOPNOTSUPP);
if (vp->v_type != VREG)
return (EISDIR);
if (uiop->uio_offset < 0)
return (EINVAL);
tnp = VP_TO_TARFS_NODE(vp);
error = 0;
TARFS_DPF(VNODE, "%s(%p=%s, %zu, %zd)\n", __func__,
tnp, tnp->name, uiop->uio_offset, uiop->uio_resid);
while ((resid = uiop->uio_resid) > 0) {
if (tnp->size <= uiop->uio_offset)
break;
len = MIN(tnp->size - uiop->uio_offset, resid);
if (len == 0)
break;
error = tarfs_read_file(tnp, len, uiop);
if (error != 0 || resid == uiop->uio_resid)
break;
}
return (error);
}
static int
tarfs_readlink(struct vop_readlink_args *ap)
{
struct tarfs_node *tnp;
struct uio *uiop;
struct vnode *vp;
int error;
uiop = ap->a_uio;
vp = ap->a_vp;
MPASS(uiop->uio_offset == 0);
MPASS(vp->v_type == VLNK);
tnp = VP_TO_TARFS_NODE(vp);
TARFS_DPF(VNODE, "%s(%p=%s)\n", __func__,
tnp, tnp->name);
error = uiomove(tnp->link.name,
MIN(tnp->size, uiop->uio_resid), uiop);
return (error);
}
static int
tarfs_reclaim(struct vop_reclaim_args *ap)
{
struct tarfs_node *tnp;
struct vnode *vp;
vp = ap->a_vp;
tnp = VP_TO_TARFS_NODE(vp);
vfs_hash_remove(vp);
vnode_destroy_vobject(vp);
cache_purge(vp);
TARFS_NODE_LOCK(tnp);
tnp->vnode = NULLVP;
vp->v_data = NULL;
TARFS_NODE_UNLOCK(tnp);
return (0);
}
static int
tarfs_print(struct vop_print_args *ap)
{
struct tarfs_node *tnp;
struct vnode *vp;
vp = ap->a_vp;
tnp = VP_TO_TARFS_NODE(vp);
printf("tag tarfs, tarfs_node %p, links %lu\n",
tnp, tnp->nlink);
printf("\tmode 0%o, owner %d, group %d, size %zd\n",
tnp->mode, tnp->uid, tnp->gid,
tnp->size);
if (vp->v_type == VFIFO)
fifo_printinfo(vp);
printf("\n");
return (0);
}
static int
tarfs_strategy(struct vop_strategy_args *ap)
{
struct uio auio;
struct iovec iov;
struct tarfs_node *tnp;
struct buf *bp;
off_t off;
size_t len;
int error;
tnp = VP_TO_TARFS_NODE(ap->a_vp);
bp = ap->a_bp;
MPASS(bp->b_iocmd == BIO_READ);
MPASS(bp->b_iooffset >= 0);
MPASS(bp->b_bcount > 0);
MPASS(bp->b_bufsize >= bp->b_bcount);
TARFS_DPF(VNODE, "%s(%p=%s, %zu, %ld/%ld)\n", __func__, tnp,
tnp->name, (size_t)bp->b_iooffset, bp->b_bcount, bp->b_bufsize);
iov.iov_base = bp->b_data;
iov.iov_len = bp->b_bcount;
off = bp->b_iooffset;
len = bp->b_bcount;
bp->b_resid = len;
if (off > tnp->size) {
/* XXX read beyond EOF - figure out correct handling */
error = EIO;
goto out;
}
if (off + len > tnp->size) {
/* clip to file length */
len = tnp->size - off;
}
auio.uio_iov = &iov;
auio.uio_iovcnt = 1;
auio.uio_offset = off;
auio.uio_resid = len;
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_rw = UIO_READ;
auio.uio_td = curthread;
error = tarfs_read_file(tnp, len, &auio);
bp->b_resid -= len - auio.uio_resid;
out:
if (error != 0) {
bp->b_ioflags |= BIO_ERROR;
bp->b_error = error;
}
bp->b_flags |= B_DONE;
return (0);
}
static int
tarfs_vptofh(struct vop_vptofh_args *ap)
{
struct tarfs_fid *tfp;
struct tarfs_node *tnp;
tfp = (struct tarfs_fid *)ap->a_fhp;
tnp = VP_TO_TARFS_NODE(ap->a_vp);
tfp->len = sizeof(struct tarfs_fid);
tfp->ino = tnp->ino;
tfp->gen = tnp->gen;
return (0);
}
struct vop_vector tarfs_vnodeops = {
.vop_default = &default_vnodeops,
.vop_access = tarfs_access,
.vop_cachedlookup = tarfs_lookup,
.vop_close = tarfs_close,
.vop_getattr = tarfs_getattr,
.vop_lookup = vfs_cache_lookup,
.vop_open = tarfs_open,
.vop_print = tarfs_print,
.vop_read = tarfs_read,
.vop_readdir = tarfs_readdir,
.vop_readlink = tarfs_readlink,
.vop_reclaim = tarfs_reclaim,
.vop_strategy = tarfs_strategy,
.vop_vptofh = tarfs_vptofh,
};
VFS_VOP_VECTOR_REGISTER(tarfs_vnodeops);

View File

@ -740,6 +740,12 @@ static struct witness_blessed blessed_list[] = {
* parent directory vnode is locked.
*/
{ "ufs", "bufwait" },
/*
* The tarfs decompression stream vnode may be locked while a
* buffer belonging to a tarfs data vnode is locked.
*/
{ "tarfs", "bufwait" },
};
/*

View File

@ -369,6 +369,7 @@ SUBDIR= \
sym \
${_syscons} \
sysvipc \
tarfs \
tcp \
${_ti} \
tmpfs \

View File

@ -0,0 +1,23 @@
# $FreeBSD$
.PATH: ${.CURDIR:H:H}/fs/tarfs
KMOD= tarfs
SRCS= opt_tarfs.h \
vnode_if.h \
tarfs_io.c \
tarfs_subr.c \
tarfs_vnops.c \
tarfs_vfsops.c
.if !defined(KERNBUILDDIR)
CFLAGS+= -DZSTDIO
.ifdef TARFS_DEBUG
CFLAGS+= -DTARFS_DEBUG
.endif
.endif
SRCS+= opt_zstdio.h
CFLAGS+= -I${SRCTOP}/sys/contrib/zstd/lib/freebsd
.include <bsd.kmod.mk>

View File

@ -14,6 +14,7 @@ TESTSRC= ${SRCTOP}/contrib/netbsd-tests/fs
.if ${COMPILER_FEATURES:Mc++14} && ${MK_GOOGLETEST} != "no"
TESTS_SUBDIRS+= fusefs
.endif
TESTS_SUBDIRS+= tarfs
TESTS_SUBDIRS+= tmpfs
${PACKAGE}FILES+= h_funcs.subr

View File

@ -0,0 +1,10 @@
PACKAGE= tests
TESTSDIR= ${TESTSBASE}/sys/fs/tarfs
BINDIR= ${TESTSDIR}
PROGS+= mktar
ATF_TESTS_SH+= tarfs_test
.include <bsd.test.mk>

238
tests/sys/fs/tarfs/mktar.c Normal file
View File

@ -0,0 +1,238 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2023 Klara, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/stat.h>
#include <sys/wait.h>
#include <err.h>
#include <fcntl.h>
#include <paths.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#define PROGNAME "mktar"
#define SUBDIRNAME "directory"
#define SPARSEFILENAME "sparse_file"
#define HARDLINKNAME "hard_link"
#define SHORTLINKNAME "short_link"
#define LONGLINKNAME "long_link"
static bool opt_v;
static void verbose(const char *fmt, ...)
{
va_list ap;
if (!opt_v)
return;
fprintf(stderr, "%s: ", PROGNAME);
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
fprintf(stderr, "\n");
}
static void
mksparsefile(const char *filename, mode_t mode)
{
char buf[511];
ssize_t res;
int fd;
if ((fd = open(filename, O_RDWR|O_CREAT|O_TRUNC, mode)) < 0)
err(1, "%s", filename);
for (unsigned int i = 33; i <= 126; i++) {
memset(buf, i, sizeof(buf));
if (lseek(fd, 1048576LU * (i - 32), SEEK_SET) < 0)
err(1, "%s", filename);
res = write(fd, buf, sizeof(buf));
if (res < 0)
err(1, "%s", filename);
if (res != sizeof(buf))
errx(1, "%s: short write", filename);
}
close(fd);
}
static char *
mklonglinktarget(const char *dirname, const char *filename)
{
char *piece, *target;
if (asprintf(&piece, "%1$s/../%1$s/../%1$s/../%1$s/../", dirname) < 0)
err(1, "asprintf()");
if (asprintf(&target, "%1$s%1$s%1$s%1$s%1$s%1$s%1$s%1$s%2$s", piece, filename) < 0)
err(1, "asprintf()");
free(piece);
return target;
}
static void
mktar(void)
{
char *linktarget;
/* create a subdirectory */
verbose("mkdir %s", SUBDIRNAME);
if (mkdir(SUBDIRNAME, 0755) != 0)
err(1, "%s", SUBDIRNAME);
/* create a sparse file */
verbose("creating %s", SPARSEFILENAME);
mksparsefile(SPARSEFILENAME, 0644);
chflags(SPARSEFILENAME, UF_NODUMP);
/* create a hard link */
verbose("link %s %s", SPARSEFILENAME, HARDLINKNAME);
if (link(SPARSEFILENAME, HARDLINKNAME) != 0)
err(1, "%s", HARDLINKNAME);
/* create a symbolic link with a short target */
verbose("symlink %s %s", SPARSEFILENAME, SHORTLINKNAME);
if (symlink(SPARSEFILENAME, SHORTLINKNAME) != 0)
err(1, "%s", SHORTLINKNAME);
/* create a symbolic link with a long target */
linktarget = mklonglinktarget(SUBDIRNAME, SPARSEFILENAME);
verbose("symlink %s %s", linktarget, LONGLINKNAME);
if (symlink(linktarget, LONGLINKNAME) != 0)
err(1, "%s", LONGLINKNAME);
free(linktarget);
}
static void
usage(void)
{
fprintf(stderr, "usage: %s [-v] tarfile\n", PROGNAME);
exit(EXIT_FAILURE);
}
int
main(int argc, char *argv[])
{
const char *tarfilename;
char *dirname;
int opt, wstatus;
pid_t pid;
while ((opt = getopt(argc, argv, "v")) != -1)
switch (opt) {
case 'v':
opt_v = true;
break;
default:
usage();
}
argc -= optind;
argv += optind;
if (argc != 1)
usage();
tarfilename = *argv;
if (asprintf(&dirname, "%s%s.XXXXXXXX", _PATH_TMP, PROGNAME) < 0)
err(1, "asprintf()");
if (mkdtemp(dirname) == NULL)
err(1, "%s", dirname);
verbose("mkdir %s", dirname);
/* fork a child to create the files */
if ((pid = fork()) < 0)
err(1, "fork()");
if (pid == 0) {
verbose("cd %s", dirname);
if (chdir(dirname) != 0)
err(1, "%s", dirname);
verbose("umask 022");
umask(022);
mktar();
verbose("cd -");
exit(0);
}
if (waitpid(pid, &wstatus, 0) < 0)
err(1, "waitpid()");
if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0)
errx(1, "child failed");
/* fork a child to create the tarball */
if ((pid = fork()) < 0)
err(1, "fork()");
if (pid == 0) {
verbose("creating tarball");
execlp("tar", "tar",
"-c",
"-f", tarfilename,
"-C", dirname,
"--zstd",
#if 0
"--options", "zstd:frame-per-file",
#endif
".",
NULL);
err(1, "execlp()");
}
if (waitpid(pid, &wstatus, 0) < 0)
err(1, "waitpid()");
if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0)
errx(1, "child failed");
/* fork a child to delete everything */
if ((pid = fork()) < 0)
err(1, "fork()");
if (pid == 0) {
verbose("cd %s", dirname);
if (chdir(dirname) != 0)
err(1, "%s", dirname);
verbose("rm %s", LONGLINKNAME);
(void)unlink(LONGLINKNAME);
verbose("rm %s", SHORTLINKNAME);
(void)unlink(SHORTLINKNAME);
verbose("rm %s", HARDLINKNAME);
(void)unlink(HARDLINKNAME);
verbose("rm %s", SPARSEFILENAME);
(void)unlink(SPARSEFILENAME);
verbose("rm %s", SUBDIRNAME);
(void)rmdir(SUBDIRNAME);
verbose("cd -");
exit(0);
}
if (waitpid(pid, &wstatus, 0) < 0)
err(1, "waitpid()");
if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0)
errx(1, "child failed");
verbose("rmdir %s", dirname);
(void)rmdir(dirname);
exit(0);
}

View File

@ -0,0 +1,54 @@
#!/bin/sh
#-
# SPDX-License-Identifier: BSD-2-Clause
#
# Copyright (c) 2023 Klara, Inc.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
mktar="$(dirname $(realpath "$0"))"/mktar
mnt="$(realpath ${TMPDIR:-/tmp})/mnt.$$"
# expected SHA256 checksum of file contained in test tarball
sum=4da2143234486307bb44eaa610375301781a577d1172f362b88bb4b1643dee62
atf_test_case tarfs_test
tarfs_test_head() {
atf_set "require.user" "root"
}
tarfs_test_body() {
mkdir "${mnt}"
"${mktar}" tarfs_test.tar.zst
atf_check mount -rt tarfs tarfs_test.tar.zst "${mnt}"
atf_check_equal "$(stat -f%d,%i "${mnt}"/sparse_file)" "$(stat -f%d,%i "${mnt}"/hard_link)"
atf_check_equal "$(stat -f%d,%i "${mnt}"/sparse_file)" "$(stat -L -f%d,%i "${mnt}"/short_link)"
atf_check_equal "$(stat -f%d,%i "${mnt}"/sparse_file)" "$(stat -L -f%d,%i "${mnt}"/long_link)"
atf_check_equal "$(sha256 -q "${mnt}"/sparse_file)" ${sum}
}
tarfs_test_cleanup() {
umount "${mnt}"
}
atf_init_test_cases() {
atf_add_test_case tarfs_test
}