From 5ef1de5eddccce0e7b135bb9ed4decaa62fc19ce Mon Sep 17 00:00:00 2001 From: Andrew Deason Date: Fri, 30 Jan 2015 13:29:57 -0600 Subject: [PATCH] afs: Zero uninitialized uio structs In several places in the code, we allocate a 'struct uio' on the stack, or allocate one from non-zeroed memory. In most of these places, we initialize the structure by assigning individual fields to certain values. However, this leaves any remaining fields assigned to random garbage, if there are any additional fields in the struct uio that we don't know about. One such platform is Solaris, which has a field called uio_extflg, which exists in Solaris 11, Solaris 10, and possibly further back. One of the flags defined for this field in Solaris 11 is UIO_XUIO, which indicates that the structure is actually an xuio_t, which is larger than a normal uio_t and contains additional fields. So when we allocate a uio on the stack without initializing it, it can randomly appear to be an xuio_t, depending on what garbage was on the stack at the time. An xuio_t is a kind of extensible structure, which is used for things like async I/O or DMA, that kind of thing. One of the places we make use of such a uio_t is in afs_ustrategy, which we go through for cache reads and writes on most Unix platforms (but not Linux). When handling a read (reading from the disk cache into a mapped page), a copy of our stack-allocated uio eventually gets passed to VOP_READ. So VOP_READ for the cache filesystem will randomly interpret our uio_t as an xuio_t. In many scenarios, this (amazingly) does not cause any problems, since generally, Solaris code will not notice if something is flagged as an xuio_t, unless it is specifically written to handle specific xuio_t types. ZFS is one of the apparent few filesystem implementations that can handle xuio_t's, and will detect and specially handle a UIOTYPE_ZEROCOPY xuio_t differently than a regular uio_t. If ZFS gets a UIOTYPE_ZEROCOPY xuio_t, it appears to ignore the uio buffers passed in, and supplies its own buffers from its cache. This means that our VOP_READ request will return success, and act like it serviced the read just fine. However, the actual buffer that we passed in will remain untouched, and so we will return the page to the VFS filled with garbage data. The way this typically manifests is that seemingly random pages will contain random data. This seems to happen very rarely, though it may not always be obvious what is going on when this occurs. It is also worth noting that the above description on Solaris only happens with Solaris 11 and newer, and only with a ZFS disk cache. Anything older than Solaris 11 does not have the xuio_t framework (though other uio_extflg values can cause performance degradations), and all known non-ZFS local disk filesystems do not interpret special xuio_t structures (networked filesystems might have xuio_t handling, but they shouldn't be used for a cache). Bugs similar to this may also exist on other Unix clients, but at least this specific scenario should not occur on Linux (since we don't use afs_ustrategy), and newer Darwin (since we get a uio allocated for us). To fix this, zero out the entire uio structure before we use it, for all instances where we allocate a uio from the stack or from non-zeroed memory. Also zero out the accompanying iovec in many places, just to be safe. Some of these may not actually need to be zeroed (since we do actually initialize the whole thing, or a platform doesn't have any additional unknown uio fields), but it seems worthwhile to err on the side of caution. Thanks to Oracle for their assistance on this issue, and thanks to the organization experiencing this issue for their patience and persistence. Change-Id: I0eae0b49a70aee19f3a9ec118b03cfb3a6bd03a3 Reviewed-on: http://gerrit.openafs.org/11705 Tested-by: BuildBot Reviewed-by: Perry Ruiter Reviewed-by: Daria Brashear --- src/afs/AIX/osi_misc.c | 3 +++ src/afs/AIX/osi_vnodeops.c | 9 +++++++++ src/afs/DARWIN/osi_vnodeops.c | 24 +++++++++++++++--------- src/afs/FBSD/osi_vnodeops.c | 6 ++++++ src/afs/HPUX/osi_vnodeops.c | 12 ++++++++++++ src/afs/LINUX/osi_file.c | 6 ++++++ src/afs/LINUX/osi_vnodeops.c | 6 ++++++ src/afs/LINUX24/osi_file.c | 6 ++++++ src/afs/LINUX24/osi_vnodeops.c | 9 +++++++++ src/afs/OBSD/osi_vnodeops.c | 3 +++ src/afs/VNOPS/afs_vnop_strategy.c | 3 +++ src/afs/afs_osi_uio.c | 6 ++++-- 12 files changed, 82 insertions(+), 11 deletions(-) diff --git a/src/afs/AIX/osi_misc.c b/src/afs/AIX/osi_misc.c index 4994ac1b78..ed145c2d46 100644 --- a/src/afs/AIX/osi_misc.c +++ b/src/afs/AIX/osi_misc.c @@ -97,6 +97,9 @@ gop_rdwr(rw, vp, base, len, offset, segflg, unit, aresid) struct iovec uiovector; int code; + memset(&uio_struct, 0, sizeof(uio_struct)); + memset(&uiovector, 0, sizeof(uiovector)); + AFS_STATCNT(gop_rdwr); /* Set up the uio structure */ uiovector.iov_base = (caddr_t) base; diff --git a/src/afs/AIX/osi_vnodeops.c b/src/afs/AIX/osi_vnodeops.c index b86347e163..7fb5556fd9 100644 --- a/src/afs/AIX/osi_vnodeops.c +++ b/src/afs/AIX/osi_vnodeops.c @@ -643,6 +643,9 @@ afs_gn_fclear(struct vnode *vp, static int fclear_init = 0; struct vcache *avc = VTOAFS(vp); + memset(&uio, 0, sizeof(uio)); + memset(&iov, 0, sizeof(iov)); + AFS_STATCNT(afs_gn_fclear); if (!fclear_init) { memset(zero_buffer, 0, PAGESIZE); @@ -919,6 +922,9 @@ afs_vm_rdwr(struct vnode *vp, struct iovec tvec[16]; /* Should have access to #define */ afs_int32 tsize; + memset(&tuio, 0, sizeof(tuio)); + memset(&tvec, 0, sizeof(tvec)); + mixed = 1; finalOffset = xfrOffset + xfrSize; tsize = (afs_size_t) (xfrOffset + xfrSize - afs_vmMappingEnd); @@ -1086,6 +1092,9 @@ afs_vm_rdwr(struct vnode *vp, struct uio tuio; struct iovec tvec[16]; /* Should have access to #define */ + memset(&tuio, 0, sizeof(tuio)); + memset(&tvec, 0, sizeof(tvec)); + /* Purge dirty chunks of file if there are too many dirty chunks. * Inside the write loop, we only do this at a chunk boundary. * Clean up partial chunk if necessary at end of loop. diff --git a/src/afs/DARWIN/osi_vnodeops.c b/src/afs/DARWIN/osi_vnodeops.c index ce54e408d5..f0cb97d314 100644 --- a/src/afs/DARWIN/osi_vnodeops.c +++ b/src/afs/DARWIN/osi_vnodeops.c @@ -837,17 +837,20 @@ afs_vop_pagein(ap) int flags = ap->a_flags; struct ucred *cred; vm_offset_t ioaddr; + int code; + struct vcache *tvc = VTOAFS(vp); + int nocommit = flags & UPL_NOCOMMIT; #ifdef AFS_DARWIN80_ENV struct uio *uio; #else struct uio auio; struct iovec aiov; struct uio *uio = &auio; -#endif - int nocommit = flags & UPL_NOCOMMIT; - int code; - struct vcache *tvc = VTOAFS(vp); + memset(&auio, 0, sizeof(auio)); + memset(&aiov, 0, sizeof(aiov)); +#endif + #ifndef AFS_DARWIN80_ENV if (UBCINVALID(vp)) { #if DIAGNOSTIC @@ -982,18 +985,21 @@ afs_vop_pageout(ap) int flags = ap->a_flags; struct ucred *cred; vm_offset_t ioaddr; + int nocommit = flags & UPL_NOCOMMIT; + int iosize; + int code; + struct vcache *tvc = VTOAFS(vp); #ifdef AFS_DARWIN80_ENV struct uio *uio; #else struct uio auio; struct iovec aiov; struct uio *uio = &auio; -#endif - int nocommit = flags & UPL_NOCOMMIT; - int iosize; - int code; - struct vcache *tvc = VTOAFS(vp); + memset(&auio, 0, sizeof(auio)); + memset(&aiov, 0, sizeof(aiov)); +#endif + #ifndef AFS_DARWIN80_ENV if (UBCINVALID(vp)) { #if DIAGNOSTIC diff --git a/src/afs/FBSD/osi_vnodeops.c b/src/afs/FBSD/osi_vnodeops.c index c303545644..0e55cc1629 100644 --- a/src/afs/FBSD/osi_vnodeops.c +++ b/src/afs/FBSD/osi_vnodeops.c @@ -801,6 +801,9 @@ afs_vop_getpages(struct vop_getpages_args *ap) struct vnode *vp; struct vcache *avc; + memset(&uio, 0, sizeof(uio)); + memset(&iov, 0, sizeof(iov)); + vp = ap->a_vp; avc = VTOAFS(vp); if ((object = vp->v_object) == NULL) { @@ -993,6 +996,9 @@ afs_vop_putpages(struct vop_putpages_args *ap) struct vnode *vp; struct vcache *avc; + memset(&uio, 0, sizeof(uio)); + memset(&iov, 0, sizeof(iov)); + vp = ap->a_vp; avc = VTOAFS(vp); /* Perhaps these two checks should just be KASSERTs instead... */ diff --git a/src/afs/HPUX/osi_vnodeops.c b/src/afs/HPUX/osi_vnodeops.c index dd644fe84d..839d76e2c7 100644 --- a/src/afs/HPUX/osi_vnodeops.c +++ b/src/afs/HPUX/osi_vnodeops.c @@ -222,6 +222,9 @@ afs_bread(vp, lbn, bpp) struct iovec iov; struct uio uio; + memset(&uio, 0, sizeof(uio)); + memset(&iov, 0, sizeof(iov)); + AFS_STATCNT(afs_bread); fsbsize = vp->v_vfsp->vfs_bsize; offset = lbn * fsbsize; @@ -2085,6 +2088,9 @@ afs_readdir(vp, uiop, cred) dir_off_t offset; uint64_t tmp_offset; + memset(&auio, 0, sizeof(auio)); + memset(&aiov, 0, sizeof(aiov)); + count = uiop->uio_resid; /* Allocate temporary space for format conversion */ ibuf = kmem_alloc(2 * count); /* overkill - fix later */ @@ -2151,6 +2157,9 @@ afs_readdir3(vp, uiop, cred) int count, outcount; dir_off_t offset; + memset(&auio, 0, sizeof(auio)); + memset(&aiov, 0, sizeof(aiov)); + count = uiop->uio_resid; /* Allocate temporary space for format conversion */ ibuf = kmem_alloc(2 * count); /* overkill - fix later */ @@ -2521,6 +2530,9 @@ afs_hp_strategy(bp) extern caddr_t hdl_kmap_bp(); struct kthread *t = u.u_kthreadp; + memset(&tuio, 0, sizeof(tuio)); + memset(&tiovec, 0, sizeof(tiovec)); + AFS_STATCNT(afs_hp_strategy); /* * hdl_kmap_bp() saves "b_bcount" and restores it in hdl_remap_bp() after diff --git a/src/afs/LINUX/osi_file.c b/src/afs/LINUX/osi_file.c index 7389128168..8cdff4bf92 100644 --- a/src/afs/LINUX/osi_file.c +++ b/src/afs/LINUX/osi_file.c @@ -208,6 +208,9 @@ afs_osi_Read(struct osi_file *afile, int offset, void *aptr, struct iovec iov; afs_int32 code; + memset(&auio, 0, sizeof(auio)); + memset(&iov, 0, sizeof(iov)); + AFS_STATCNT(osi_Read); /* @@ -249,6 +252,9 @@ afs_osi_Write(struct osi_file *afile, afs_int32 offset, void *aptr, struct iovec iov; afs_int32 code; + memset(&auio, 0, sizeof(auio)); + memset(&iov, 0, sizeof(iov)); + AFS_STATCNT(osi_Write); if (!afile) { diff --git a/src/afs/LINUX/osi_vnodeops.c b/src/afs/LINUX/osi_vnodeops.c index 3efb2f4c89..70f2fb37ee 100644 --- a/src/afs/LINUX/osi_vnodeops.c +++ b/src/afs/LINUX/osi_vnodeops.c @@ -1855,6 +1855,9 @@ afs_linux_ireadlink(struct inode *ip, char *target, int maxlen, uio_seg_t seg) struct uio tuio; struct iovec iov; + memset(&tuio, 0, sizeof(tuio)); + memset(&iov, 0, sizeof(iov)); + setup_uio(&tuio, &iov, target, (afs_offs_t) 0, maxlen, UIO_READ, seg); code = afs_readlink(VTOAFS(ip), &tuio, credp); crfree(credp); @@ -2608,6 +2611,9 @@ afs_linux_page_writeback(struct inode *ip, struct page *pp, struct iovec iovec; int f_flags = 0; + memset(&tuio, 0, sizeof(tuio)); + memset(&iovec, 0, sizeof(iovec)); + buffer = kmap(pp) + offset; base = page_offset(pp) + offset; diff --git a/src/afs/LINUX24/osi_file.c b/src/afs/LINUX24/osi_file.c index b184602f2f..ef4012c772 100644 --- a/src/afs/LINUX24/osi_file.c +++ b/src/afs/LINUX24/osi_file.c @@ -190,6 +190,9 @@ afs_osi_Read(struct osi_file *afile, int offset, void *aptr, struct iovec iov; afs_int32 code; + memset(&auio, 0, sizeof(auio)); + memset(&iov, 0, sizeof(iov)); + AFS_STATCNT(osi_Read); /* @@ -231,6 +234,9 @@ afs_osi_Write(struct osi_file *afile, afs_int32 offset, void *aptr, struct iovec iov; afs_int32 code; + memset(&auio, 0, sizeof(auio)); + memset(&iov, 0, sizeof(iov)); + AFS_STATCNT(osi_Write); if (!afile) { diff --git a/src/afs/LINUX24/osi_vnodeops.c b/src/afs/LINUX24/osi_vnodeops.c index a17a439306..6928f492d0 100644 --- a/src/afs/LINUX24/osi_vnodeops.c +++ b/src/afs/LINUX24/osi_vnodeops.c @@ -1546,6 +1546,9 @@ afs_linux_ireadlink(struct inode *ip, char *target, int maxlen, uio_seg_t seg) struct uio tuio; struct iovec iov; + memset(&tuio, 0, sizeof(tuio)); + memset(&iov, 0, sizeof(iov)); + setup_uio(&tuio, &iov, target, (afs_offs_t) 0, maxlen, UIO_READ, seg); code = afs_readlink(VTOAFS(ip), &tuio, credp); crfree(credp); @@ -1814,6 +1817,9 @@ afs_linux_writepage_sync(struct inode *ip, struct page *pp, struct iovec iovec; int f_flags = 0; + memset(&tuio, 0, sizeof(tuio)); + memset(&iovec, 0, sizeof(iovec)); + buffer = kmap(pp) + offset; base = (((loff_t) pp->index) << PAGE_CACHE_SHIFT) + offset; @@ -1923,6 +1929,9 @@ afs_linux_updatepage(struct file *fp, struct page *pp, unsigned long offset, struct uio tuio; struct iovec iovec; + memset(&tuio, 0, sizeof(tuio)); + memset(&iovec, 0, sizeof(iovec)); + set_bit(PG_locked, &pp->flags); credp = crref(); diff --git a/src/afs/OBSD/osi_vnodeops.c b/src/afs/OBSD/osi_vnodeops.c index 35799f2dc9..2863361550 100644 --- a/src/afs/OBSD/osi_vnodeops.c +++ b/src/afs/OBSD/osi_vnodeops.c @@ -1033,6 +1033,9 @@ afs_obsd_strategy(void *v) long len = abp->b_bcount; int code; + memset(&tuio, 0, sizeof(tuio)); + memset(&tiovec, 0, sizeof(tiovec)); + AFS_STATCNT(afs_strategy); tuio.afsio_iov = tiovec; diff --git a/src/afs/VNOPS/afs_vnop_strategy.c b/src/afs/VNOPS/afs_vnop_strategy.c index f5cf404606..6e2970c010 100644 --- a/src/afs/VNOPS/afs_vnop_strategy.c +++ b/src/afs/VNOPS/afs_vnop_strategy.c @@ -46,6 +46,9 @@ int afs_ustrategy(struct buf *abp) afs_ucred_t *credp = u.u_cred; #endif + memset(&tuio, 0, sizeof(tuio)); + memset(&tiovec, 0, sizeof(tiovec)); + AFS_STATCNT(afs_ustrategy); #ifdef AFS_AIX41_ENV /* diff --git a/src/afs/afs_osi_uio.c b/src/afs/afs_osi_uio.c index 5d4f3aab56..eae1a99ac6 100644 --- a/src/afs/afs_osi_uio.c +++ b/src/afs/afs_osi_uio.c @@ -88,10 +88,12 @@ afsio_partialcopy(struct uio *auio, size_t len) { char *space; struct uio *newuio; struct iovec *newvec; + size_t space_len = sizeof(struct uio) + + sizeof(struct iovec) * AFS_MAXIOVCNT; /* Allocate a block that can contain both the UIO and the iovec */ - space = osi_AllocSmallSpace(sizeof(struct uio) + - sizeof(struct iovec) * AFS_MAXIOVCNT); + space = osi_AllocSmallSpace(space_len); + memset(space, 0, space_len); newuio = (struct uio *) space; newvec = (struct iovec *) (space + sizeof(struct uio));