Linux: Use splice to speed up cache storeback

This patch adds a new cache store function for Linux, which uses
splice() and direct access to the page cache, rather than doing
data copies to a temporary buffer between rx and afs. It removes
one copy, and some context switching, from the write codepath.

One side-effect here is that it will delay storebehinds from
returning control to the user. Instead of returning once the first
4k has been successfully transfered, we will wait until a cache
chunk has been transmitted. This is currently unavoidable, as we
can't take the GLOCK within a splice actor.

Change-Id: I5b0284d67febccf099710589908fad18b808332c
Reviewed-on: http://gerrit.openafs.org/903
Reviewed-by: Derrick Brashear <shadow@dementia.org>
Tested-by: Derrick Brashear <shadow@dementia.org>
This commit is contained in:
Simon Wilkinson 2009-11-20 15:08:25 +00:00 committed by Derrick Brashear
parent 292ec075d2
commit 34ffc9cd7d
7 changed files with 207 additions and 1 deletions

View File

@ -892,6 +892,7 @@ case $AFS_SYSNAME in *_linux* | *_umlinux*)
LINUX_KMEM_CACHE_INIT
LINUX_HAVE_GRAB_CACHE_PAGE_WRITE_BEGIN
LINUX_HAVE_PAGEVEC_LRU_ADD_FILE
LINUX_HAVE_SPLICE_DIRECT_TO_ACTOR
LINUX_STRUCT_TASK_HAS_CRED
LINUX_STRUCT_PROC_DIR_ENTRY_HAS_OWNER
LINUX_HAVE_KMEM_CACHE_T

View File

@ -0,0 +1,181 @@
/*
* Copyright (c) 2009 Simon Wilkinson. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR `AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Linux specific store operations
*
* The idea of these operations is to reduce the number of copies
* that data incurs when passing from the disk cache through to the
* RX layer, and vice versa.
*
* In kernels which support it, we use the splice() operation - in
* older kernels, the filesystem's sendpage() operation is used directly.
* Either way, this means that we can get direct access to the page contents,
* rather than getting a copy.
*/
#include <afsconfig.h>
#include "afs/param.h"
#include <linux/fs.h>
#if defined(HAVE_SPLICE_DIRECT_TO_ACTOR)
# include <linux/splice.h>
#else
# include <linux/pipe_fs_i.h>
#endif
#include "afs/sysincludes.h"
#include "afsincludes.h"
#if defined(HAVE_SPLICE_DIRECT_TO_ACTOR)
static int
afs_linux_splice_actor(struct pipe_inode_info *pipe,
struct pipe_buffer *buf,
struct splice_desc *sd)
{
struct rxfs_storeVariables *svar = sd->u.data;
size_t size;
int code;
code = buf->ops->confirm(pipe, buf);
if (code)
return code;
size = sd->len;
/* Eventually, this could be rx_WritePage */
code = rx_Write(svar->call, page_address(buf->page), size);
if (code != size) {
return -33; /* Can't get a proper rx error out from here */
}
return size;
}
static int
afs_linux_ds_actor(struct pipe_inode_info *pipe, struct splice_desc *sd)
{
return __splice_from_pipe(pipe, sd, afs_linux_splice_actor);
}
/* This is a store proc which uses splice to reduce the number
* of page copies. */
afs_int32
afs_linux_storeproc(struct storeOps *ops, void *rock, struct dcache *tdc,
int *shouldwake, afs_size_t *bytesXferred)
{
struct rxfs_storeVariables *svar = rock;
struct file *cacheFp;
struct splice_desc sd = {
.len = 0,
.total_len = tdc->f.chunkBytes,
.pos = 0,
.u.data = rock
};
int code;
/* Open the file, splice its contents */
AFS_GUNLOCK();
cacheFp = afs_linux_raw_open(&tdc->f.inode);
code = splice_direct_to_actor(cacheFp, &sd, afs_linux_ds_actor);
filp_close(cacheFp, NULL);
AFS_GLOCK();
/* If we're being called from a backing request, then wake up that
* request once the file server says its happy. Potentially, we should
* do this each time we rx_Write, but that would mean acquiring the
* GLOCK in the middle of our actor */
if (shouldwake && *shouldwake && ((*ops->status)(rock) == 0)) {
*shouldwake = 0;
afs_wakeup(svar->vcache);
}
if (code > 0) {
*bytesXferred+=code;
return 0;
}
return code;
}
# else
static int
afs_linux_read_actor(read_descriptor_t *desc, struct page *page,
unsigned long offset, unsigned long size)
{
struct rxfs_storeVariables *svar = desc->arg.data;
unsigned long count = desc->count;
int code;
if (size > count)
size = count;
/* Eventually, this could be rx_WritePage */
code = rx_Write(svar->call, page_address(page) + offset, size);
if (code != size) {
return -33; /* Can't get a proper rx error out from here */
}
desc->count = count - size;
desc->written += size;
return size;
}
afs_int32
afs_linux_storeproc(struct storeOps *ops, void *rock, struct dcache *tdc,
int *shouldwake, afs_size_t *bytesXferred)
{
struct rxfs_storeVariables *svar = rock;
struct file *cacheFp;
int code;
loff_t offset = 0;
/* Open the file, splice its contents */
AFS_GUNLOCK();
cacheFp = afs_linux_raw_open(&tdc->f.inode);
code = cacheFp->f_op->sendfile(cacheFp, &offset, tdc->f.chunkBytes,
afs_linux_read_actor, rock);
filp_close(cacheFp, NULL);
AFS_GLOCK();
/* If we're being called from a backing request, then wake up that
* request once the file server says its happy. Potentially, we should
* do this each time we rx_Write, but that would mean acquiring the
* GLOCK in the middle of our actor */
if (shouldwake && *shouldwake && ((*ops->status)(rock) == 0)) {
*shouldwake = 0;
afs_wakeup(svar->vcache);
}
if (code > 0) {
*bytesXferred+=code;
return 0;
}
return code;
}
#endif

View File

@ -273,6 +273,9 @@ struct storeOps rxfs_storeUfsOps = {
.padd = rxfs_storePadd,
.close = rxfs_storeClose,
.destroy = rxfs_storeDestroy,
#ifdef AFS_LINUX26_ENV
.storeproc = afs_linux_storeproc
#endif
};
static

View File

@ -610,6 +610,11 @@ extern void osi_ReleaseVM(struct vcache *avc, afs_ucred_t *acred);
#endif
/* LINUX/osi_fetchstore.c */
#ifdef AFS_LINUX26_ENV
extern int afs_linux_storeproc(struct storeOps *, void *, struct dcache *,
int *, afs_size_t *);
#endif
/* ARCH/osi_misc.c */
extern void afs_osi_SetTime(osi_timeval_t * atv);

View File

@ -1217,6 +1217,19 @@ AC_DEFUN([LINUX_HAVE_PAGEVEC_LRU_ADD_FILE], [
AC_DEFINE([HAVE_PAGEVEC_LRU_ADD_FILE], 1, [define if your kernel has __pagevec_lru_add_file()])
fi])
AC_DEFUN([LINUX_HAVE_SPLICE_DIRECT_TO_ACTOR], [
AC_MSG_CHECKING([for linux splice_direct_to_actor()])
AC_CACHE_VAL([ac_cv_linux_splice_direct_to_actor], [
AC_TRY_KBUILD(
[#include <linux/splice.h>],
[splice_direct_to_actor(NULL,NULL,NULL);],
ac_cv_linux_splice_direct_to_actor=yes,
ac_cv_linux_splice_direct_to_actor=no)])
AC_MSG_RESULT($ac_cv_linux_splice_direct_to_actor)
if test "x$ac_cv_linux_splice_direct_to_actor" = "xyes"; then
AC_DEFINE([HAVE_SPLICE_DIRECT_TO_ACTOR], 1, [define if your kernel has splice_direct_to_actor()])
fi])
AC_DEFUN([LINUX_STRUCT_TASK_HAS_CRED], [
AC_MSG_CHECKING([if struct task has cred])
AC_CACHE_VAL([ac_cv_linux_struct_task_has_cred], [

View File

@ -517,6 +517,8 @@ osi_timeout.o: $(TOP_SRCDIR)/afs/$(MKAFS_OSTYPE)/osi_timeout.c
$(CRULE_NOOPT)
osi_pagecopy.o: $(TOP_SRCDIR)/afs/$(MKAFS_OSTYPE)/osi_pagecopy.c
$(CRULE_NOOPT)
osi_fetchstore.o: $(TOP_SRCDIR)/afs/$(MKAFS_OSTYPE)/osi_fetchstore.c
$(CRULE_NOOPT)
clean:
-$(RM) -rf STATIC* MODLOAD* $(AFS_OS_CLEAN)

View File

@ -45,7 +45,8 @@ AFS_OS_OBJS = \
<all>
osi_vnodeops.o \
<linux26 linux_26>
osi_pagecopy.o
osi_pagecopy.o \
osi_fetchstore.o
<all>
AFS_OS_PAGOBJS = \