diff --git a/acinclude.m4 b/acinclude.m4 index 01c2e17160..02b22b06c1 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -892,6 +892,7 @@ case $AFS_SYSNAME in *_linux* | *_umlinux*) LINUX_KMEM_CACHE_INIT LINUX_HAVE_GRAB_CACHE_PAGE_WRITE_BEGIN LINUX_HAVE_PAGEVEC_LRU_ADD_FILE + LINUX_HAVE_SPLICE_DIRECT_TO_ACTOR LINUX_STRUCT_TASK_HAS_CRED LINUX_STRUCT_PROC_DIR_ENTRY_HAS_OWNER LINUX_HAVE_KMEM_CACHE_T diff --git a/src/afs/LINUX/osi_fetchstore.c b/src/afs/LINUX/osi_fetchstore.c new file mode 100644 index 0000000000..b272c96d22 --- /dev/null +++ b/src/afs/LINUX/osi_fetchstore.c @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2009 Simon Wilkinson. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR `AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Linux specific store operations + * + * The idea of these operations is to reduce the number of copies + * that data incurs when passing from the disk cache through to the + * RX layer, and vice versa. + * + * In kernels which support it, we use the splice() operation - in + * older kernels, the filesystem's sendpage() operation is used directly. + * Either way, this means that we can get direct access to the page contents, + * rather than getting a copy. + */ + +#include +#include "afs/param.h" + +#include +#if defined(HAVE_SPLICE_DIRECT_TO_ACTOR) +# include +#else +# include +#endif + +#include "afs/sysincludes.h" +#include "afsincludes.h" + +#if defined(HAVE_SPLICE_DIRECT_TO_ACTOR) +static int +afs_linux_splice_actor(struct pipe_inode_info *pipe, + struct pipe_buffer *buf, + struct splice_desc *sd) +{ + struct rxfs_storeVariables *svar = sd->u.data; + size_t size; + int code; + + code = buf->ops->confirm(pipe, buf); + if (code) + return code; + + size = sd->len; + + /* Eventually, this could be rx_WritePage */ + code = rx_Write(svar->call, page_address(buf->page), size); + if (code != size) { + return -33; /* Can't get a proper rx error out from here */ + } + + return size; +} + +static int +afs_linux_ds_actor(struct pipe_inode_info *pipe, struct splice_desc *sd) +{ + return __splice_from_pipe(pipe, sd, afs_linux_splice_actor); +} + +/* This is a store proc which uses splice to reduce the number + * of page copies. */ +afs_int32 +afs_linux_storeproc(struct storeOps *ops, void *rock, struct dcache *tdc, + int *shouldwake, afs_size_t *bytesXferred) +{ + struct rxfs_storeVariables *svar = rock; + struct file *cacheFp; + struct splice_desc sd = { + .len = 0, + .total_len = tdc->f.chunkBytes, + .pos = 0, + .u.data = rock + }; + int code; + + /* Open the file, splice its contents */ + AFS_GUNLOCK(); + cacheFp = afs_linux_raw_open(&tdc->f.inode); + code = splice_direct_to_actor(cacheFp, &sd, afs_linux_ds_actor); + filp_close(cacheFp, NULL); + AFS_GLOCK(); + + /* If we're being called from a backing request, then wake up that + * request once the file server says its happy. Potentially, we should + * do this each time we rx_Write, but that would mean acquiring the + * GLOCK in the middle of our actor */ + if (shouldwake && *shouldwake && ((*ops->status)(rock) == 0)) { + *shouldwake = 0; + afs_wakeup(svar->vcache); + } + + if (code > 0) { + *bytesXferred+=code; + return 0; + } + + return code; +} + +# else + +static int +afs_linux_read_actor(read_descriptor_t *desc, struct page *page, + unsigned long offset, unsigned long size) +{ + struct rxfs_storeVariables *svar = desc->arg.data; + unsigned long count = desc->count; + int code; + + if (size > count) + size = count; + + /* Eventually, this could be rx_WritePage */ + code = rx_Write(svar->call, page_address(page) + offset, size); + + if (code != size) { + return -33; /* Can't get a proper rx error out from here */ + } + + desc->count = count - size; + desc->written += size; + + return size; +} + +afs_int32 +afs_linux_storeproc(struct storeOps *ops, void *rock, struct dcache *tdc, + int *shouldwake, afs_size_t *bytesXferred) +{ + struct rxfs_storeVariables *svar = rock; + struct file *cacheFp; + int code; + loff_t offset = 0; + + /* Open the file, splice its contents */ + AFS_GUNLOCK(); + cacheFp = afs_linux_raw_open(&tdc->f.inode); + code = cacheFp->f_op->sendfile(cacheFp, &offset, tdc->f.chunkBytes, + afs_linux_read_actor, rock); + filp_close(cacheFp, NULL); + AFS_GLOCK(); + + /* If we're being called from a backing request, then wake up that + * request once the file server says its happy. Potentially, we should + * do this each time we rx_Write, but that would mean acquiring the + * GLOCK in the middle of our actor */ + if (shouldwake && *shouldwake && ((*ops->status)(rock) == 0)) { + *shouldwake = 0; + afs_wakeup(svar->vcache); + } + + if (code > 0) { + *bytesXferred+=code; + return 0; + } + + return code; +} + +#endif diff --git a/src/afs/afs_fetchstore.c b/src/afs/afs_fetchstore.c index d539563a6d..bf00316573 100644 --- a/src/afs/afs_fetchstore.c +++ b/src/afs/afs_fetchstore.c @@ -273,6 +273,9 @@ struct storeOps rxfs_storeUfsOps = { .padd = rxfs_storePadd, .close = rxfs_storeClose, .destroy = rxfs_storeDestroy, +#ifdef AFS_LINUX26_ENV + .storeproc = afs_linux_storeproc +#endif }; static diff --git a/src/afs/afs_prototypes.h b/src/afs/afs_prototypes.h index 25916069b4..70c08d985f 100644 --- a/src/afs/afs_prototypes.h +++ b/src/afs/afs_prototypes.h @@ -610,6 +610,11 @@ extern void osi_ReleaseVM(struct vcache *avc, afs_ucred_t *acred); #endif +/* LINUX/osi_fetchstore.c */ +#ifdef AFS_LINUX26_ENV +extern int afs_linux_storeproc(struct storeOps *, void *, struct dcache *, + int *, afs_size_t *); +#endif /* ARCH/osi_misc.c */ extern void afs_osi_SetTime(osi_timeval_t * atv); diff --git a/src/cf/linux-test4.m4 b/src/cf/linux-test4.m4 index 4df6f51252..4d26a054da 100644 --- a/src/cf/linux-test4.m4 +++ b/src/cf/linux-test4.m4 @@ -1217,6 +1217,19 @@ AC_DEFUN([LINUX_HAVE_PAGEVEC_LRU_ADD_FILE], [ AC_DEFINE([HAVE_PAGEVEC_LRU_ADD_FILE], 1, [define if your kernel has __pagevec_lru_add_file()]) fi]) +AC_DEFUN([LINUX_HAVE_SPLICE_DIRECT_TO_ACTOR], [ + AC_MSG_CHECKING([for linux splice_direct_to_actor()]) + AC_CACHE_VAL([ac_cv_linux_splice_direct_to_actor], [ + AC_TRY_KBUILD( +[#include ], +[splice_direct_to_actor(NULL,NULL,NULL);], + ac_cv_linux_splice_direct_to_actor=yes, + ac_cv_linux_splice_direct_to_actor=no)]) + AC_MSG_RESULT($ac_cv_linux_splice_direct_to_actor) + if test "x$ac_cv_linux_splice_direct_to_actor" = "xyes"; then + AC_DEFINE([HAVE_SPLICE_DIRECT_TO_ACTOR], 1, [define if your kernel has splice_direct_to_actor()]) + fi]) + AC_DEFUN([LINUX_STRUCT_TASK_HAS_CRED], [ AC_MSG_CHECKING([if struct task has cred]) AC_CACHE_VAL([ac_cv_linux_struct_task_has_cred], [ diff --git a/src/libafs/Makefile.common.in b/src/libafs/Makefile.common.in index 6c758b7449..a9f44e346c 100644 --- a/src/libafs/Makefile.common.in +++ b/src/libafs/Makefile.common.in @@ -517,6 +517,8 @@ osi_timeout.o: $(TOP_SRCDIR)/afs/$(MKAFS_OSTYPE)/osi_timeout.c $(CRULE_NOOPT) osi_pagecopy.o: $(TOP_SRCDIR)/afs/$(MKAFS_OSTYPE)/osi_pagecopy.c $(CRULE_NOOPT) +osi_fetchstore.o: $(TOP_SRCDIR)/afs/$(MKAFS_OSTYPE)/osi_fetchstore.c + $(CRULE_NOOPT) clean: -$(RM) -rf STATIC* MODLOAD* $(AFS_OS_CLEAN) diff --git a/src/libafs/MakefileProto.LINUX.in b/src/libafs/MakefileProto.LINUX.in index bb5ebdad15..5a35d9d47b 100644 --- a/src/libafs/MakefileProto.LINUX.in +++ b/src/libafs/MakefileProto.LINUX.in @@ -45,7 +45,8 @@ AFS_OS_OBJS = \ osi_vnodeops.o \ - osi_pagecopy.o + osi_pagecopy.o \ + osi_fetchstore.o AFS_OS_PAGOBJS = \