move cxgb_lt2.[ch] from NIC to TOE

move most offload functionality from NIC to TOE
factor out all socket and inpcb direct access
factor out access to locking in incpb, pcbinfo, and sockbuf
This commit is contained in:
Kip Macy 2008-04-19 03:22:43 +00:00
parent 3d970c5c0e
commit 46b0a854cc
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=178302
19 changed files with 2461 additions and 1709 deletions

View File

@ -518,7 +518,6 @@ dev/cs/if_cs_isa.c optional cs isa
dev/cs/if_cs_pccard.c optional cs pccard
dev/cxgb/cxgb_main.c optional cxgb pci
dev/cxgb/cxgb_offload.c optional cxgb pci
dev/cxgb/cxgb_l2t.c optional cxgb pci
dev/cxgb/cxgb_lro.c optional cxgb pci
dev/cxgb/cxgb_sge.c optional cxgb pci
dev/cxgb/cxgb_multiq.c optional cxgb pci

View File

@ -1,6 +1,6 @@
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
Copyright (c) 2007-2008, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
@ -9,7 +9,7 @@ modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
@ -723,9 +723,10 @@ cxgb_free(struct adapter *sc)
printf("cxgb_free: DEVMAP_BIT not set\n");
} else
printf("not offloading set\n");
#ifdef notyet
if (sc->flags & CXGB_OFLD_INIT)
cxgb_offload_deactivate(sc);
#endif
free(sc->filters, M_DEVBUF);
t3_sge_free(sc);
@ -1732,9 +1733,6 @@ offload_open(struct port_info *pi)
t3_tp_set_offload_mode(adapter, 1);
tdev->lldev = pi->ifp;
err = cxgb_offload_activate(adapter);
if (err)
goto out;
init_port_mtus(adapter);
t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd,
@ -1743,10 +1741,6 @@ offload_open(struct port_info *pi)
adapter->port[0].ifp->if_mtu : 0xffff);
init_smt(adapter);
/* Call back all registered clients */
cxgb_add_clients(tdev);
out:
/* restore them in case the offload module has changed them */
if (err) {
t3_tp_set_offload_mode(adapter, 0);
@ -1764,8 +1758,6 @@ offload_close(struct t3cdev *tdev)
if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
return (0);
/* Call back all registered clients */
cxgb_remove_clients(tdev);
tdev->lldev = NULL;
cxgb_set_dummy_ops(tdev);
t3_tp_set_offload_mode(adapter, 0);

File diff suppressed because it is too large Load Diff

View File

@ -36,13 +36,13 @@ $FreeBSD$
#ifdef CONFIG_DEFINED
#include <common/cxgb_version.h>
#include <cxgb_config.h>
#include <cxgb_l2t.h>
#include <ulp/tom/cxgb_l2t.h>
#include <common/cxgb_tcb.h>
#include <t3cdev.h>
#else
#include <dev/cxgb/common/cxgb_version.h>
#include <dev/cxgb/cxgb_config.h>
#include <dev/cxgb/cxgb_l2t.h>
#include <dev/cxgb/ulp/tom/cxgb_l2t.h>
#include <dev/cxgb/common/cxgb_tcb.h>
#include <dev/cxgb/t3cdev.h>
#endif

File diff suppressed because it is too large Load Diff

View File

@ -44,7 +44,6 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#include <sys/socket.h>
#include <sys/syslog.h>
#include <sys/socketvar.h>
#include <sys/uio.h>
#include <sys/file.h>
@ -64,6 +63,9 @@ __FBSDID("$FreeBSD$");
#include <dev/cxgb/cxgb_osdep.h>
#include <dev/cxgb/sys/mbufq.h>
#include <dev/cxgb/ulp/tom/cxgb_tcp_offload.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_fsm.h>
@ -75,7 +77,6 @@ __FBSDID("$FreeBSD$");
#include <dev/cxgb/common/cxgb_t3_cpl.h>
#include <dev/cxgb/common/cxgb_tcb.h>
#include <dev/cxgb/common/cxgb_ctl_defs.h>
#include <dev/cxgb/cxgb_l2t.h>
#include <dev/cxgb/cxgb_offload.h>
#include <vm/vm.h>
@ -93,6 +94,7 @@ __FBSDID("$FreeBSD$");
#include <dev/cxgb/ulp/tom/cxgb_tcp.h>
#include <dev/cxgb/ulp/tom/cxgb_vm.h>
static int (*pru_sosend)(struct socket *so, struct sockaddr *addr,
struct uio *uio, struct mbuf *top, struct mbuf *control,
int flags, struct thread *td);
@ -262,13 +264,13 @@ so_should_ddp(const struct toepcb *toep, int last_recv_len)
static inline int
is_ddp(const struct mbuf *m)
{
return (m->m_flags & M_DDP);
return ((m->m_flags & M_DDP) != 0);
}
static inline int
is_ddp_psh(const struct mbuf *m)
{
return is_ddp(m) && (m->m_pkthdr.csum_flags & DDP_BF_PSH);
return ((is_ddp(m) && (m->m_pkthdr.csum_flags & DDP_BF_PSH)) != 0);
}
static int
@ -398,11 +400,12 @@ t3_sosend(struct socket *so, struct uio *uio)
{
int rv, count, hold_resid, sent, iovcnt;
struct iovec iovtmp[TMP_IOV_MAX], *iovtmpp, *iov;
struct tcpcb *tp = sototcpcb(so);
struct tcpcb *tp = so_sototcpcb(so);
struct toepcb *toep = tp->t_toe;
struct mbuf *m;
struct uio uiotmp;
struct sockbuf *snd;
/*
* Events requiring iteration:
* - number of pages exceeds max hold pages for process or system
@ -418,11 +421,12 @@ t3_sosend(struct socket *so, struct uio *uio)
iovcnt = uio->uio_iovcnt;
iov = uio->uio_iov;
sent = 0;
snd = so_sockbuf_snd(so);
sendmore:
/*
* Make sure we don't exceed the socket buffer
*/
count = min(toep->tp_page_count, (sbspace(&so->so_snd) >> PAGE_SHIFT) + 2*PAGE_SIZE);
count = min(toep->tp_page_count, (sockbuf_sbspace(snd) >> PAGE_SHIFT) + 2*PAGE_SIZE);
rv = cxgb_hold_iovec_pages(&uiotmp, toep->tp_pages, &count, 0);
hold_resid = uiotmp.uio_resid;
if (rv)
@ -455,7 +459,7 @@ sendmore:
}
uio->uio_resid -= m->m_pkthdr.len;
sent += m->m_pkthdr.len;
sbappend(&so->so_snd, m);
sbappend(snd, m);
t3_push_frames(so, TRUE);
iov_adj(&uiotmp.uio_iov, &iovcnt, uiotmp.uio_resid);
}
@ -487,7 +491,7 @@ static int
cxgb_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
{
struct tcpcb *tp = sototcpcb(so);
struct tcpcb *tp = so_sototcpcb(so);
struct toedev *tdev;
int zcopy_thres, zcopy_enabled, rv;
@ -503,13 +507,15 @@ cxgb_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
* - blocking socket XXX for now
*
*/
if (tp->t_flags & TF_TOE) {
tdev = TOE_DEV(so);
if (tp && tp->t_flags & TF_TOE) {
struct toepcb *toep = tp->t_toe;
tdev = toep->tp_toedev;
zcopy_thres = TOM_TUNABLE(tdev, zcopy_sosend_partial_thres);
zcopy_enabled = TOM_TUNABLE(tdev, zcopy_sosend_enabled);
if (uio && (uio->uio_resid > zcopy_thres) &&
(uio->uio_iovcnt < TMP_IOV_MAX) && ((so->so_state & SS_NBIO) == 0)
(uio->uio_iovcnt < TMP_IOV_MAX) && ((so_state_get(so) & SS_NBIO) == 0)
&& zcopy_enabled) {
rv = t3_sosend(so, uio);
if (rv != EAGAIN)
@ -530,8 +536,9 @@ cxgb_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
static __inline void
sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord)
{
#ifdef notyet
SOCKBUF_LOCK_ASSERT(sb);
#endif
/*
* First, update for the new value of nextrecord. If necessary, make
* it the first record.
@ -554,13 +561,12 @@ sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord)
sb->sb_lastrecord = sb->sb_mb;
}
#define IS_NONBLOCKING(so) ((so)->so_state & SS_NBIO)
#define IS_NONBLOCKING(so) (so_state_get(so) & SS_NBIO)
static int
t3_soreceive(struct socket *so, int *flagsp, struct uio *uio)
{
struct tcpcb *tp = sototcpcb(so);
struct tcpcb *tp = so_sototcpcb(so);
struct toepcb *toep = tp->t_toe;
struct mbuf *m;
uint32_t offset;
@ -568,68 +574,83 @@ t3_soreceive(struct socket *so, int *flagsp, struct uio *uio)
int target; /* Read at least this many bytes */
int user_ddp_ok;
struct ddp_state *p;
struct inpcb *inp = sotoinpcb(so);
struct inpcb *inp = so_sotoinpcb(so);
int socket_state, socket_error;
struct sockbuf *rcv;
avail = offset = copied = copied_unacked = 0;
flags = flagsp ? (*flagsp &~ MSG_EOR) : 0;
err = sblock(&so->so_rcv, SBLOCKWAIT(flags));
rcv = so_sockbuf_rcv(so);
err = sblock(rcv, SBLOCKWAIT(flags));
p = &toep->tp_ddp_state;
if (err)
return (err);
SOCKBUF_LOCK(&so->so_rcv);
rcv = so_sockbuf_rcv(so);
sockbuf_lock(rcv);
if ((tp->t_flags & TF_TOE) == 0) {
sockbuf_unlock(rcv);
err = EAGAIN;
goto done_unlocked;
}
p->user_ddp_pending = 0;
restart:
if ((tp->t_flags & TF_TOE) == 0) {
sockbuf_unlock(rcv);
err = EAGAIN;
goto done_unlocked;
}
len = uio->uio_resid;
m = so->so_rcv.sb_mb;
target = (flags & MSG_WAITALL) ? len : so->so_rcv.sb_lowat;
m = rcv->sb_mb;
target = (flags & MSG_WAITALL) ? len : rcv->sb_lowat;
user_ddp_ok = p->ubuf_ddp_ready;
p->cancel_ubuf = 0;
if (len == 0)
goto done;
#if 0
while (m && m->m_len == 0) {
so->so_rcv.sb_mb = m_free(m);
m = so->so_rcv.sb_mb;
}
#endif
if (m)
goto got_mbuf;
/* empty receive queue */
if (copied >= target && (so->so_rcv.sb_mb == NULL) &&
if (copied >= target && (rcv->sb_mb == NULL) &&
!p->user_ddp_pending)
goto done;
socket_state = so_state_get(so);
socket_error = so_error_get(so);
rcv = so_sockbuf_rcv(so);
if (copied) {
if (so->so_error || tp->t_state == TCPS_CLOSED ||
(so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)))
if (socket_error || tp->t_state == TCPS_CLOSED ||
(socket_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)))
goto done;
} else {
if (so->so_state & SS_NOFDREF)
if (socket_state & SS_NOFDREF)
goto done;
if (so->so_error) {
err = so->so_error;
so->so_error = 0;
if (socket_error) {
err = socket_error;
socket_error = 0;
goto done;
}
if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
if (rcv->sb_state & SBS_CANTRCVMORE)
goto done;
if (so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED))
if (socket_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED))
goto done;
if (tp->t_state == TCPS_CLOSED) {
err = ENOTCONN;
goto done;
}
}
if (so->so_rcv.sb_mb && !p->user_ddp_pending) {
SOCKBUF_UNLOCK(&so->so_rcv);
if (rcv->sb_mb && !p->user_ddp_pending) {
sockbuf_unlock(rcv);
inp_wlock(inp);
t3_cleanup_rbuf(tp, copied_unacked);
inp_wunlock(inp);
SOCKBUF_LOCK(&so->so_rcv);
sockbuf_lock(rcv);
copied_unacked = 0;
goto restart;
}
@ -637,14 +658,15 @@ restart:
uio->uio_iov->iov_len > p->kbuf[0]->dgl_length &&
p->ubuf_ddp_ready) {
p->user_ddp_pending =
!t3_overlay_ubuf(so, uio, IS_NONBLOCKING(so), flags, 1, 1);
!t3_overlay_ubuf(toep, rcv, uio,
IS_NONBLOCKING(so), flags, 1, 1);
if (p->user_ddp_pending) {
p->kbuf_posted++;
user_ddp_ok = 0;
}
}
if (p->kbuf[0] && (p->kbuf_posted == 0)) {
t3_post_kbuf(so, 1, IS_NONBLOCKING(so));
t3_post_kbuf(toep, 1, IS_NONBLOCKING(so));
p->kbuf_posted++;
}
if (p->user_ddp_pending) {
@ -652,8 +674,7 @@ restart:
if (copied >= target)
user_ddp_ok = 0;
DPRINTF("sbwaiting 1\n");
if ((err = sbwait(&so->so_rcv)) != 0)
if ((err = sbwait(rcv)) != 0)
goto done;
//for timers to work await_ddp_completion(sk, flags, &timeo);
} else if (copied >= target)
@ -662,26 +683,27 @@ restart:
if (copied_unacked) {
int i = 0;
SOCKBUF_UNLOCK(&so->so_rcv);
sockbuf_unlock(rcv);
inp_wlock(inp);
t3_cleanup_rbuf(tp, copied_unacked);
inp_wunlock(inp);
copied_unacked = 0;
if (mp_ncpus > 1)
while (i++ < 200 && so->so_rcv.sb_mb == NULL)
while (i++ < 200 && rcv->sb_mb == NULL)
cpu_spinwait();
SOCKBUF_LOCK(&so->so_rcv);
sockbuf_lock(rcv);
}
if (so->so_rcv.sb_mb)
if (rcv->sb_mb)
goto restart;
DPRINTF("sbwaiting 2 copied=%d target=%d avail=%d so=%p mb=%p cc=%d\n", copied, target, avail, so,
so->so_rcv.sb_mb, so->so_rcv.sb_cc);
if ((err = sbwait(&so->so_rcv)) != 0)
goto done;
if ((err = sbwait(rcv)) != 0)
goto done;
}
goto restart;
got_mbuf:
CTR6(KTR_TOM, "t3_soreceive: ddp=%d m_len=%u resid=%u "
"m_seq=0x%08x copied_seq=0x%08x copied_unacked=%u",
is_ddp(m), m->m_pkthdr.len, len, m->m_seq, toep->tp_copied_seq,
copied_unacked);
KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d m_pktlen=%d\n", !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len, m->m_pkthdr.len));
KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x m->m_len=%d",
m->m_next, m->m_nextpkt, m->m_flags, m->m_len));
@ -690,17 +712,24 @@ got_mbuf:
panic("empty mbuf and NOCOPY not set\n");
CTR0(KTR_TOM, "ddp done notification");
p->user_ddp_pending = 0;
sbdroprecord_locked(&so->so_rcv);
sbdroprecord_locked(rcv);
goto done;
}
offset = toep->tp_copied_seq + copied_unacked - m->m_seq;
DPRINTF("m=%p copied_seq=0x%x copied_unacked=%d m_seq=0x%x offset=%d pktlen=%d is_ddp(m)=%d\n",
m, toep->tp_copied_seq, copied_unacked, m->m_seq, offset, m->m_pkthdr.len, !!is_ddp(m));
if (is_ddp(m)) {
KASSERT((int32_t)(toep->tp_copied_seq + copied_unacked - m->m_seq) >= 0,
("offset will go negative: offset=%d copied_seq=0x%08x copied_unacked=%d m_seq=0x%08x",
offset, toep->tp_copied_seq, copied_unacked, m->m_seq));
offset = toep->tp_copied_seq + copied_unacked - m->m_seq;
} else
offset = 0;
if (offset >= m->m_pkthdr.len)
panic("t3_soreceive: OFFSET >= LEN offset %d copied_seq 0x%x seq 0x%x "
"pktlen %d ddp flags 0x%x", offset, toep->tp_copied_seq + copied_unacked, m->m_seq,
panic("t3_soreceive: OFFSET >= LEN offset %d copied_seq 0x%x "
"seq 0x%x pktlen %d ddp flags 0x%x", offset,
toep->tp_copied_seq + copied_unacked, m->m_seq,
m->m_pkthdr.len, m->m_ddp_flags);
avail = m->m_pkthdr.len - offset;
@ -709,7 +738,6 @@ got_mbuf:
panic("bad state in t3_soreceive len=%d avail=%d offset=%d\n", len, avail, offset);
avail = len;
}
CTR4(KTR_TOM, "t3_soreceive: m_len=%u offset=%u len=%u m_seq=0%08x", m->m_pkthdr.len, offset, len, m->m_seq);
#ifdef URGENT_DATA_SUPPORTED
/*
@ -724,7 +752,7 @@ got_mbuf:
if (urg_offset) {
/* stop short of the urgent data */
avail = urg_offset;
} else if ((so->so_options & SO_OOBINLINE) == 0) {
} else if ((so_options_get(so) & SO_OOBINLINE) == 0) {
/* First byte is urgent, skip */
toep->tp_copied_seq++;
offset++;
@ -735,7 +763,7 @@ got_mbuf:
}
}
#endif
if (is_ddp_psh(m) || offset) {
if (is_ddp_psh(m) || offset || (rcv->sb_mb && !is_ddp(m))) {
user_ddp_ok = 0;
#ifdef T3_TRACE
T3_TRACE0(TIDTB(so), "t3_sosend: PSH");
@ -746,7 +774,8 @@ got_mbuf:
uio->uio_iov->iov_len > p->kbuf[0]->dgl_length &&
p->ubuf_ddp_ready) {
p->user_ddp_pending =
!t3_overlay_ubuf(so, uio, IS_NONBLOCKING(so), flags, 1, 1);
!t3_overlay_ubuf(toep, rcv, uio,
IS_NONBLOCKING(so), flags, 1, 1);
if (p->user_ddp_pending) {
p->kbuf_posted++;
user_ddp_ok = 0;
@ -765,16 +794,23 @@ got_mbuf:
if (__predict_true(!(flags & MSG_TRUNC))) {
int resid = uio->uio_resid;
SOCKBUF_UNLOCK(&so->so_rcv);
sockbuf_unlock(rcv);
if ((err = copy_data(m, offset, avail, uio))) {
if (err)
err = EFAULT;
goto done_unlocked;
}
SOCKBUF_LOCK(&so->so_rcv);
sockbuf_lock(rcv);
if (avail != (resid - uio->uio_resid))
printf("didn't copy all bytes :-/ avail=%d offset=%d pktlen=%d resid=%d uio_resid=%d copied=%d copied_unacked=%d is_ddp(m)=%d\n",
avail, offset, m->m_pkthdr.len, resid, uio->uio_resid, copied, copied_unacked, is_ddp(m));
if ((tp->t_flags & TF_TOE) == 0) {
sockbuf_unlock(rcv);
err = EAGAIN;
goto done_unlocked;
}
}
copied += avail;
@ -816,42 +852,45 @@ skip_copy:
while (count > 0) {
count -= m->m_len;
KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len));
sbfree(&so->so_rcv, m);
so->so_rcv.sb_mb = m_free(m);
m = so->so_rcv.sb_mb;
sbfree(rcv, m);
rcv->sb_mb = m_free(m);
m = rcv->sb_mb;
}
sockbuf_pushsync(&so->so_rcv, nextrecord);
sockbuf_pushsync(rcv, nextrecord);
#if 0
sbdrop_locked(&so->so_rcv, m->m_pkthdr.len);
sbdrop_locked(rcv, m->m_pkthdr.len);
#endif
exitnow = got_psh || nomoredata;
if (copied >= target && (so->so_rcv.sb_mb == NULL) && exitnow)
if (copied >= target && (rcv->sb_mb == NULL) && exitnow)
goto done;
if (copied_unacked > (so->so_rcv.sb_hiwat >> 2)) {
SOCKBUF_UNLOCK(&so->so_rcv);
if (copied_unacked > (rcv->sb_hiwat >> 2)) {
sockbuf_unlock(rcv);
inp_wlock(inp);
t3_cleanup_rbuf(tp, copied_unacked);
inp_wunlock(inp);
copied_unacked = 0;
SOCKBUF_LOCK(&so->so_rcv);
sockbuf_lock(rcv);
}
}
if (len > 0)
goto restart;
done:
if ((tp->t_flags & TF_TOE) == 0) {
sockbuf_unlock(rcv);
err = EAGAIN;
goto done_unlocked;
}
/*
* If we can still receive decide what to do in preparation for the
* next receive. Note that RCV_SHUTDOWN is set if the connection
* transitioned to CLOSE but not if it was in that state to begin with.
*/
if (__predict_true((so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) == 0)) {
if (__predict_true((so_state_get(so) & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) == 0)) {
if (p->user_ddp_pending) {
SOCKBUF_UNLOCK(&so->so_rcv);
SOCKBUF_LOCK(&so->so_rcv);
user_ddp_ok = 0;
t3_cancel_ubuf(toep);
if (so->so_rcv.sb_mb) {
t3_cancel_ubuf(toep, rcv);
if (rcv->sb_mb) {
if (copied < 0)
copied = 0;
if (len > 0)
@ -865,11 +904,11 @@ skip_copy:
"chelsio_recvmsg: about to exit, repost kbuf");
#endif
t3_post_kbuf(so, 1, IS_NONBLOCKING(so));
t3_post_kbuf(toep, 1, IS_NONBLOCKING(so));
p->kbuf_posted++;
} else if (so_should_ddp(toep, copied) && uio->uio_iovcnt == 1) {
CTR1(KTR_TOM ,"entering ddp on tid=%u", toep->tp_tid);
if (!t3_enter_ddp(so, TOM_TUNABLE(TOE_DEV(so),
if (!t3_enter_ddp(toep, TOM_TUNABLE(toep->tp_toedev,
ddp_copy_limit), 0, IS_NONBLOCKING(so)))
p->kbuf_posted = 1;
}
@ -881,14 +920,14 @@ skip_copy:
copied, len, buffers_freed, p ? p->kbuf_posted : -1,
p->user_ddp_pending);
#endif
SOCKBUF_UNLOCK(&so->so_rcv);
sockbuf_unlock(rcv);
done_unlocked:
if (copied_unacked) {
if (copied_unacked && (tp->t_flags & TF_TOE)) {
inp_wlock(inp);
t3_cleanup_rbuf(tp, copied_unacked);
inp_wunlock(inp);
}
sbunlock(&so->so_rcv);
sbunlock(rcv);
return (err);
}
@ -899,8 +938,8 @@ cxgb_soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
{
struct toedev *tdev;
int rv, zcopy_thres, zcopy_enabled, flags;
struct tcpcb *tp = sototcpcb(so);
struct tcpcb *tp = so_sototcpcb(so);
flags = flagsp ? *flagsp &~ MSG_EOR : 0;
/*
@ -916,30 +955,61 @@ cxgb_soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
* - iovcnt is 1
*
*/
if ((tp->t_flags & TF_TOE) && uio && ((flags & (MSG_OOB|MSG_PEEK|MSG_DONTWAIT)) == 0)
if (tp && (tp->t_flags & TF_TOE) && uio && ((flags & (MSG_OOB|MSG_PEEK|MSG_DONTWAIT)) == 0)
&& (uio->uio_iovcnt == 1) && (mp0 == NULL)) {
tdev = TOE_DEV(so);
struct toepcb *toep = tp->t_toe;
tdev = toep->tp_toedev;
zcopy_thres = TOM_TUNABLE(tdev, ddp_thres);
zcopy_enabled = TOM_TUNABLE(tdev, ddp);
if ((uio->uio_resid > zcopy_thres) &&
(uio->uio_iovcnt == 1)
&& zcopy_enabled) {
CTR3(KTR_CXGB, "cxgb_soreceive: t_flags=0x%x flags=0x%x uio_resid=%d",
tp->t_flags, flags, uio->uio_resid);
rv = t3_soreceive(so, flagsp, uio);
if (rv != EAGAIN)
return (rv);
else
printf("returned EAGAIN\n");
}
} else if ((tp->t_flags & TF_TOE) && uio && mp0 == NULL)
printf("skipping t3_soreceive flags=0x%x iovcnt=%d sb_state=0x%x\n",
flags, uio->uio_iovcnt, so->so_rcv.sb_state);
} else if (tp && (tp->t_flags & TF_TOE) && uio && mp0 == NULL) {
struct sockbuf *rcv = so_sockbuf_rcv(so);
log(LOG_INFO, "skipping t3_soreceive flags=0x%x iovcnt=%d sb_state=0x%x\n",
flags, uio->uio_iovcnt, rcv->sb_state);
}
return pru_soreceive(so, psa, uio, mp0, controlp, flagsp);
}
struct protosw cxgb_protosw;
struct pr_usrreqs cxgb_tcp_usrreqs;
void
t3_install_socket_ops(struct socket *so)
{
static int copied = 0;
struct pr_usrreqs *pru;
struct protosw *psw;
if (copied == 0) {
psw = so_protosw_get(so);
pru = psw->pr_usrreqs;
bcopy(psw, &cxgb_protosw, sizeof(*psw));
bcopy(pru, &cxgb_tcp_usrreqs, sizeof(*pru));
cxgb_protosw.pr_ctloutput = t3_ctloutput;
cxgb_protosw.pr_usrreqs = &cxgb_tcp_usrreqs;
cxgb_tcp_usrreqs.pru_sosend = cxgb_sosend;
cxgb_tcp_usrreqs.pru_soreceive = cxgb_soreceive;
}
so_protosw_set(so, &cxgb_protosw);
#if 0
so->so_proto->pr_usrreqs->pru_sosend = cxgb_sosend;
so->so_proto->pr_usrreqs->pru_soreceive = cxgb_soreceive;
#endif
}

View File

@ -44,7 +44,6 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/socket.h>
#include <sys/syslog.h>
#include <sys/socketvar.h>
#include <sys/uio.h>
#include <machine/bus.h>
@ -61,6 +60,7 @@ __FBSDID("$FreeBSD$");
#include <dev/cxgb/cxgb_osdep.h>
#include <dev/cxgb/sys/mbufq.h>
#include <dev/cxgb/ulp/tom/cxgb_tcp_offload.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_fsm.h>
@ -72,7 +72,6 @@ __FBSDID("$FreeBSD$");
#include <dev/cxgb/common/cxgb_t3_cpl.h>
#include <dev/cxgb/common/cxgb_tcb.h>
#include <dev/cxgb/common/cxgb_ctl_defs.h>
#include <dev/cxgb/cxgb_l2t.h>
#include <dev/cxgb/cxgb_offload.h>
#include <vm/vm.h>
@ -90,6 +89,7 @@ __FBSDID("$FreeBSD$");
#include <dev/cxgb/ulp/tom/cxgb_tcp.h>
#include <dev/cxgb/ulp/tom/cxgb_vm.h>
#define MAX_SCHEDULE_TIMEOUT 300
/*
@ -222,13 +222,15 @@ t3_free_ddp_gl(struct ddp_gather_list *gl)
* pods before failing entirely.
*/
static int
alloc_buf1_ppods(struct socket *so, struct ddp_state *p,
alloc_buf1_ppods(struct toepcb *toep, struct ddp_state *p,
unsigned long addr, unsigned int len)
{
int err, tag, npages, nppods;
struct tom_data *d = TOM_DATA(TOE_DEV(so));
struct tom_data *d = TOM_DATA(toep->tp_toedev);
#if 0
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
#endif
npages = ((addr & PAGE_MASK) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
nppods = min(pages2ppods(npages), MAX_PPODS);
nppods = roundup2(nppods, PPOD_CLUSTER_SIZE);
@ -243,7 +245,7 @@ alloc_buf1_ppods(struct socket *so, struct ddp_state *p,
p->ubuf_nppods = nppods;
p->ubuf_tag = tag;
#if NUM_DDP_KBUF == 1
t3_set_ddp_tag(so, 1, tag << 6);
t3_set_ddp_tag(toep, 1, tag << 6);
#endif
return (0);
}
@ -255,7 +257,7 @@ alloc_buf1_ppods(struct socket *so, struct ddp_state *p,
#define UBUF_OFFSET 1
static __inline unsigned long
select_ddp_flags(const struct socket *so, int buf_idx,
select_ddp_flags(const struct toepcb *toep, int buf_idx,
int nonblock, int rcv_flags)
{
if (buf_idx == 1) {
@ -266,7 +268,7 @@ select_ddp_flags(const struct socket *so, int buf_idx,
if (nonblock)
return V_TF_DDP_BUF1_FLUSH(1);
return V_TF_DDP_BUF1_FLUSH(!TOM_TUNABLE(TOE_DEV(so),
return V_TF_DDP_BUF1_FLUSH(!TOM_TUNABLE(toep->tp_toedev,
ddp_push_wait));
}
@ -277,7 +279,7 @@ select_ddp_flags(const struct socket *so, int buf_idx,
if (nonblock)
return V_TF_DDP_BUF0_FLUSH(1);
return V_TF_DDP_BUF0_FLUSH(!TOM_TUNABLE(TOE_DEV(so), ddp_push_wait));
return V_TF_DDP_BUF0_FLUSH(!TOM_TUNABLE(toep->tp_toedev, ddp_push_wait));
}
/*
@ -289,21 +291,22 @@ select_ddp_flags(const struct socket *so, int buf_idx,
* needs to be done separately.
*/
static void
t3_repost_kbuf(struct socket *so, unsigned int bufidx, int modulate,
t3_repost_kbuf(struct toepcb *toep, unsigned int bufidx, int modulate,
int activate, int nonblock)
{
struct toepcb *toep = sototcpcb(so)->t_toe;
struct ddp_state *p = &toep->tp_ddp_state;
unsigned long flags;
#if 0
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
#endif
p->buf_state[bufidx].cur_offset = p->kbuf[bufidx]->dgl_offset;
p->buf_state[bufidx].flags = p->kbuf_noinval ? DDP_BF_NOINVAL : 0;
p->buf_state[bufidx].gl = p->kbuf[bufidx];
p->cur_buf = bufidx;
p->kbuf_idx = bufidx;
flags = select_ddp_flags(so, bufidx, nonblock, 0);
flags = select_ddp_flags(toep, bufidx, nonblock, 0);
if (!bufidx)
t3_setup_ddpbufs(toep, 0, 0, 0, 0, flags |
V_TF_DDP_PSH_NO_INVALIDATE0(p->kbuf_noinval) |
@ -342,19 +345,20 @@ t3_repost_kbuf(struct socket *so, unsigned int bufidx, int modulate,
* The current implementation handles iovecs with only one entry.
*/
static int
setup_uio_ppods(struct socket *so, const struct uio *uio, int oft, int *length)
setup_uio_ppods(struct toepcb *toep, const struct uio *uio, int oft, int *length)
{
int err;
unsigned int len;
struct ddp_gather_list *gl = NULL;
struct toepcb *toep = sototcpcb(so)->t_toe;
struct ddp_state *p = &toep->tp_ddp_state;
struct iovec *iov = uio->uio_iov;
vm_offset_t addr = (vm_offset_t)iov->iov_base - oft;
#ifdef notyet
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
#endif
if (__predict_false(p->ubuf_nppods == 0)) {
err = alloc_buf1_ppods(so, p, addr, iov->iov_len + oft);
err = alloc_buf1_ppods(toep, p, addr, iov->iov_len + oft);
if (err)
return (err);
}
@ -363,7 +367,7 @@ setup_uio_ppods(struct socket *so, const struct uio *uio, int oft, int *length)
len -= addr & PAGE_MASK;
if (len > M_TCB_RX_DDP_BUF0_LEN)
len = M_TCB_RX_DDP_BUF0_LEN;
len = min(len, sototcpcb(so)->rcv_wnd - 32768);
len = min(len, toep->tp_tp->rcv_wnd - 32768);
len = min(len, iov->iov_len + oft);
if (len <= p->kbuf[0]->dgl_length) {
@ -378,7 +382,7 @@ setup_uio_ppods(struct socket *so, const struct uio *uio, int oft, int *length)
if (p->ubuf)
t3_free_ddp_gl(p->ubuf);
p->ubuf = gl;
t3_setup_ppods(so, gl, pages2ppods(gl->dgl_nelem), p->ubuf_tag, len,
t3_setup_ppods(toep, gl, pages2ppods(gl->dgl_nelem), p->ubuf_tag, len,
gl->dgl_offset, 0);
}
*length = len;
@ -389,26 +393,19 @@ setup_uio_ppods(struct socket *so, const struct uio *uio, int oft, int *length)
*
*/
void
t3_cancel_ubuf(struct toepcb *toep)
t3_cancel_ubuf(struct toepcb *toep, struct sockbuf *rcv)
{
struct ddp_state *p = &toep->tp_ddp_state;
int ubuf_pending = t3_ddp_ubuf_pending(toep);
struct socket *so = toeptoso(toep);
int err = 0, count=0;
int err = 0, count = 0;
if (p->ubuf == NULL)
return;
sockbuf_lock_assert(rcv);
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
p->cancel_ubuf = 1;
while (ubuf_pending && !(so->so_rcv.sb_state & SBS_CANTRCVMORE)) {
#ifdef T3_TRACE
T3_TRACE3(TB(p),
"t3_cancel_ubuf: flags0 0x%x flags1 0x%x get_tcb_count %d",
p->buf_state[0].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY),
p->buf_state[1].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY),
p->get_tcb_count);
#endif
while (ubuf_pending && !(rcv->sb_state & SBS_CANTRCVMORE)) {
CTR3(KTR_TOM,
"t3_cancel_ubuf: flags0 0x%x flags1 0x%x get_tcb_count %d",
p->buf_state[0].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY),
@ -417,20 +414,22 @@ t3_cancel_ubuf(struct toepcb *toep)
if (p->get_tcb_count == 0)
t3_cancel_ddpbuf(toep, p->cur_buf);
else
CTR5(KTR_TOM, "waiting err=%d get_tcb_count=%d timeo=%d so=%p SBS_CANTRCVMORE=%d",
err, p->get_tcb_count, so->so_rcv.sb_timeo, so,
!!(so->so_rcv.sb_state & SBS_CANTRCVMORE));
CTR5(KTR_TOM, "waiting err=%d get_tcb_count=%d timeo=%d rcv=%p SBS_CANTRCVMORE=%d",
err, p->get_tcb_count, rcv->sb_timeo, rcv,
!!(rcv->sb_state & SBS_CANTRCVMORE));
while (p->get_tcb_count && !(so->so_rcv.sb_state & SBS_CANTRCVMORE)) {
while (p->get_tcb_count && !(rcv->sb_state & SBS_CANTRCVMORE)) {
if (count & 0xfffffff)
CTR5(KTR_TOM, "waiting err=%d get_tcb_count=%d timeo=%d so=%p count=%d",
err, p->get_tcb_count, so->so_rcv.sb_timeo, so, count);
CTR5(KTR_TOM, "waiting err=%d get_tcb_count=%d timeo=%d rcv=%p count=%d",
err, p->get_tcb_count, rcv->sb_timeo, rcv, count);
count++;
err = sbwait(&so->so_rcv);
err = sbwait(rcv);
}
ubuf_pending = t3_ddp_ubuf_pending(toep);
}
p->cancel_ubuf = 0;
p->user_ddp_pending = 0;
}
#define OVERLAY_MASK (V_TF_DDP_PSH_NO_INVALIDATE0(1) | \
@ -445,31 +444,34 @@ t3_cancel_ubuf(struct toepcb *toep)
* Post a user buffer as an overlay on top of the current kernel buffer.
*/
int
t3_overlay_ubuf(struct socket *so, const struct uio *uio,
int nonblock, int rcv_flags, int modulate, int post_kbuf)
t3_overlay_ubuf(struct toepcb *toep, struct sockbuf *rcv,
const struct uio *uio, int nonblock, int rcv_flags,
int modulate, int post_kbuf)
{
int err, len, ubuf_idx;
unsigned long flags;
struct toepcb *toep = sototcpcb(so)->t_toe;
struct ddp_state *p = &toep->tp_ddp_state;
if (p->kbuf[0] == NULL) {
return (EINVAL);
}
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
err = setup_uio_ppods(so, uio, 0, &len);
if (err) {
sockbuf_unlock(rcv);
err = setup_uio_ppods(toep, uio, 0, &len);
sockbuf_lock(rcv);
if (err)
return (err);
}
if ((rcv->sb_state & SBS_CANTRCVMORE) ||
(toep->tp_tp->t_flags & TF_TOE) == 0)
return (EINVAL);
ubuf_idx = p->kbuf_idx;
p->buf_state[ubuf_idx].flags = DDP_BF_NOFLIP;
/* Use existing offset */
/* Don't need to update .gl, user buffer isn't copied. */
p->cur_buf = ubuf_idx;
flags = select_ddp_flags(so, ubuf_idx, nonblock, rcv_flags);
flags = select_ddp_flags(toep, ubuf_idx, nonblock, rcv_flags);
if (post_kbuf) {
struct ddp_buf_state *dbs = &p->buf_state[ubuf_idx ^ 1];
@ -565,14 +567,13 @@ t3_release_ddp_resources(struct toepcb *toep)
}
void
t3_post_kbuf(struct socket *so, int modulate, int nonblock)
t3_post_kbuf(struct toepcb *toep, int modulate, int nonblock)
{
struct toepcb *toep = sototcpcb(so)->t_toe;
struct ddp_state *p = &toep->tp_ddp_state;
t3_set_ddp_tag(so, p->cur_buf, p->kbuf_tag[p->cur_buf] << 6);
t3_set_ddp_buf(so, p->cur_buf, 0, p->kbuf[p->cur_buf]->dgl_length);
t3_repost_kbuf(so, p->cur_buf, modulate, 1, nonblock);
t3_set_ddp_tag(toep, p->cur_buf, p->kbuf_tag[p->cur_buf] << 6);
t3_set_ddp_buf(toep, p->cur_buf, 0, p->kbuf[p->cur_buf]->dgl_length);
t3_repost_kbuf(toep, p->cur_buf, modulate, 1, nonblock);
#ifdef T3_TRACE
T3_TRACE1(TIDTB(so),
"t3_post_kbuf: cur_buf = kbuf_idx = %u ", p->cur_buf);
@ -586,12 +587,11 @@ t3_post_kbuf(struct socket *so, int modulate, int nonblock)
* open.
*/
int
t3_enter_ddp(struct socket *so, unsigned int kbuf_size, unsigned int waitall, int nonblock)
t3_enter_ddp(struct toepcb *toep, unsigned int kbuf_size, unsigned int waitall, int nonblock)
{
int i, err = ENOMEM;
static vm_pindex_t color;
unsigned int nppods, kbuf_pages, idx = 0;
struct toepcb *toep = sototcpcb(so)->t_toe;
struct ddp_state *p = &toep->tp_ddp_state;
struct tom_data *d = TOM_DATA(toep->tp_toedev);
@ -599,8 +599,9 @@ t3_enter_ddp(struct socket *so, unsigned int kbuf_size, unsigned int waitall, in
if (kbuf_size > M_TCB_RX_DDP_BUF0_LEN)
return (EINVAL);
#ifdef notyet
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
#endif
kbuf_pages = (kbuf_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
nppods = pages2ppods(kbuf_pages);
@ -643,18 +644,18 @@ t3_enter_ddp(struct socket *so, unsigned int kbuf_size, unsigned int waitall, in
pci_map_page(p->pdev, p->kbuf[idx]->pages[i],
0, PAGE_SIZE, PCI_DMA_FROMDEVICE);
#endif
t3_setup_ppods(so, p->kbuf[idx], nppods, p->kbuf_tag[idx],
t3_setup_ppods(toep, p->kbuf[idx], nppods, p->kbuf_tag[idx],
p->kbuf[idx]->dgl_length, 0, 0);
}
cxgb_log_tcb(TOEP_T3C_DEV(toep)->adapter, toep->tp_tid);
t3_set_ddp_tag(so, 0, p->kbuf_tag[0] << 6);
t3_set_ddp_buf(so, 0, 0, p->kbuf[0]->dgl_length);
t3_repost_kbuf(so, 0, 0, 1, nonblock);
t3_set_ddp_tag(toep, 0, p->kbuf_tag[0] << 6);
t3_set_ddp_buf(toep, 0, 0, p->kbuf[0]->dgl_length);
t3_repost_kbuf(toep, 0, 0, 1, nonblock);
t3_set_rcv_coalesce_enable(so,
TOM_TUNABLE(TOE_DEV(so), ddp_rcvcoalesce));
t3_set_dack_mss(so, TOM_TUNABLE(TOE_DEV(so), delack)>>1);
t3_set_rcv_coalesce_enable(toep,
TOM_TUNABLE(toep->tp_toedev, ddp_rcvcoalesce));
t3_set_dack_mss(toep, TOM_TUNABLE(toep->tp_toedev, delack)>>1);
#ifdef T3_TRACE
T3_TRACE4(TIDTB(so),
@ -664,7 +665,6 @@ t3_enter_ddp(struct socket *so, unsigned int kbuf_size, unsigned int waitall, in
CTR4(KTR_TOM,
"t3_enter_ddp: kbuf_size %u waitall %u tag0 %d tag1 %d",
kbuf_size, waitall, p->kbuf_tag[0], p->kbuf_tag[1]);
DELAY(100000);
cxgb_log_tcb(TOEP_T3C_DEV(toep)->adapter, toep->tp_tid);
return (0);

View File

@ -77,12 +77,14 @@ void toepcb_hold(struct toepcb *);
void toepcb_release(struct toepcb *);
void toepcb_init(struct toepcb *);
void t3_set_rcv_coalesce_enable(struct socket *so, int on_off);
void t3_set_dack_mss(struct socket *so, int on);
void t3_set_keepalive(struct socket *so, int on_off);
void t3_set_ddp_tag(struct socket *so, int buf_idx, unsigned int tag);
void t3_set_ddp_buf(struct socket *so, int buf_idx, unsigned int offset,
void t3_set_rcv_coalesce_enable(struct toepcb *toep, int on_off);
void t3_set_dack_mss(struct toepcb *toep, int on);
void t3_set_keepalive(struct toepcb *toep, int on_off);
void t3_set_ddp_tag(struct toepcb *toep, int buf_idx, unsigned int tag);
void t3_set_ddp_buf(struct toepcb *toep, int buf_idx, unsigned int offset,
unsigned int len);
int t3_get_tcb(struct socket *so);
int t3_get_tcb(struct toepcb *toep);
int t3_ctloutput(struct socket *so, struct sockopt *sopt);
#endif

View File

@ -540,4 +540,3 @@ t3_free_l2t(struct l2t_data *d)
cxgb_free_mem(d);
}

View File

@ -1,6 +1,6 @@
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
Copyright (c) 2007-2008, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without

View File

@ -65,7 +65,6 @@ __FBSDID("$FreeBSD$");
#include <dev/cxgb/common/cxgb_t3_cpl.h>
#include <dev/cxgb/common/cxgb_tcb.h>
#include <dev/cxgb/common/cxgb_ctl_defs.h>
#include <dev/cxgb/cxgb_l2t.h>
#include <dev/cxgb/cxgb_offload.h>
#include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
#include <dev/cxgb/ulp/tom/cxgb_defs.h>

View File

@ -109,13 +109,13 @@ struct ddp_state {
unsigned short kbuf_noinval;
unsigned short kbuf_idx; /* which HW buffer is used for kbuf */
struct ddp_gather_list *ubuf;
int user_ddp_pending;
unsigned int ubuf_nppods; /* # of page pods for buffer 1 */
unsigned int ubuf_tag;
unsigned int ubuf_ddp_ready;
int cancel_ubuf;
int get_tcb_count;
unsigned int kbuf_posted;
int cancel_ubuf;
int user_ddp_pending;
unsigned int kbuf_nppods[NUM_DDP_KBUF];
unsigned int kbuf_tag[NUM_DDP_KBUF];
struct ddp_gather_list *kbuf[NUM_DDP_KBUF]; /* kernel buffer for DDP prefetch */
@ -133,6 +133,7 @@ enum {
};
#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
struct sockbuf;
/*
* Returns 1 if a UBUF DMA buffer might be active.
@ -153,7 +154,7 @@ t3_ddp_ubuf_pending(struct toepcb *toep)
(p->buf_state[1].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY));
}
int t3_setup_ppods(struct socket *so, const struct ddp_gather_list *gl,
int t3_setup_ppods(struct toepcb *toep, const struct ddp_gather_list *gl,
unsigned int nppods, unsigned int tag, unsigned int maxoff,
unsigned int pg_off, unsigned int color);
int t3_alloc_ppods(struct tom_data *td, unsigned int n, int *tag);
@ -161,13 +162,14 @@ void t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n);
void t3_free_ddp_gl(struct ddp_gather_list *gl);
int t3_ddp_copy(const struct mbuf *m, int offset, struct uio *uio, int len);
//void t3_repost_kbuf(struct socket *so, int modulate, int activate);
void t3_post_kbuf(struct socket *so, int modulate, int nonblock);
int t3_post_ubuf(struct socket *so, const struct uio *uio, int nonblock,
void t3_post_kbuf(struct toepcb *toep, int modulate, int nonblock);
int t3_post_ubuf(struct toepcb *toep, const struct uio *uio, int nonblock,
int rcv_flags, int modulate, int post_kbuf);
void t3_cancel_ubuf(struct toepcb *toep);
int t3_overlay_ubuf(struct socket *so, const struct uio *uio, int nonblock,
void t3_cancel_ubuf(struct toepcb *toep, struct sockbuf *rcv);
int t3_overlay_ubuf(struct toepcb *toep, struct sockbuf *rcv,
const struct uio *uio, int nonblock,
int rcv_flags, int modulate, int post_kbuf);
int t3_enter_ddp(struct socket *so, unsigned int kbuf_size, unsigned int waitall, int nonblock);
int t3_enter_ddp(struct toepcb *toep, unsigned int kbuf_size, unsigned int waitall, int nonblock);
void t3_cleanup_ddp(struct toepcb *toep);
void t3_release_ddp_resources(struct toepcb *toep);
void t3_cancel_ddpbuf(struct toepcb *, unsigned int bufidx);

View File

@ -0,0 +1,360 @@
/*-
* Copyright (c) 2007, Chelsio Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Neither the name of the Chelsio Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
/*
* grab bag of accessor routines that will either be moved to netinet
* or removed
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/types.h>
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/if_var.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_pcb.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_offload.h>
#include <netinet/tcp_syncache.h>
#include <netinet/toedev.h>
#include <dev/cxgb/ulp/tom/cxgb_tcp_offload.h>
/*
* This file contains code as a short-term staging area before it is moved in
* to sys/netinet/tcp_offload.c
*/
void
tcp_offload_twstart(struct tcpcb *tp)
{
INP_INFO_WLOCK(&tcbinfo);
inp_wlock(tp->t_inpcb);
tcp_twstart(tp);
INP_INFO_WUNLOCK(&tcbinfo);
}
void
tcp_offload_twstart_disconnect(struct tcpcb *tp)
{
struct socket *so;
INP_INFO_WLOCK(&tcbinfo);
inp_wlock(tp->t_inpcb);
so = tp->t_inpcb->inp_socket;
tcp_twstart(tp);
if (so)
soisdisconnected(so);
INP_INFO_WUNLOCK(&tcbinfo);
}
struct tcpcb *
tcp_offload_close(struct tcpcb *tp)
{
INP_INFO_WLOCK(&tcbinfo);
INP_WLOCK(tp->t_inpcb);
tp = tcp_close(tp);
INP_INFO_WUNLOCK(&tcbinfo);
if (tp)
INP_WUNLOCK(tp->t_inpcb);
return (tp);
}
struct tcpcb *
tcp_offload_drop(struct tcpcb *tp, int error)
{
INP_INFO_WLOCK(&tcbinfo);
INP_WLOCK(tp->t_inpcb);
tp = tcp_drop(tp, error);
INP_INFO_WUNLOCK(&tcbinfo);
if (tp)
INP_WUNLOCK(tp->t_inpcb);
return (tp);
}
void
inp_apply_all(void (*func)(struct inpcb *, void *), void *arg)
{
struct inpcb *inp;
INP_INFO_RLOCK(&tcbinfo);
LIST_FOREACH(inp, tcbinfo.ipi_listhead, inp_list) {
INP_WLOCK(inp);
func(inp, arg);
INP_WUNLOCK(inp);
}
INP_INFO_RUNLOCK(&tcbinfo);
}
struct socket *
inp_inpcbtosocket(struct inpcb *inp)
{
INP_WLOCK_ASSERT(inp);
return (inp->inp_socket);
}
struct tcpcb *
inp_inpcbtotcpcb(struct inpcb *inp)
{
INP_WLOCK_ASSERT(inp);
return ((struct tcpcb *)inp->inp_ppcb);
}
int
inp_ip_tos_get(const struct inpcb *inp)
{
return (inp->inp_ip_tos);
}
void
inp_ip_tos_set(struct inpcb *inp, int val)
{
inp->inp_ip_tos = val;
}
void
inp_4tuple_get(const struct inpcb *inp, uint32_t *laddr, uint16_t *lp, uint32_t *faddr, uint16_t *fp)
{
memcpy(laddr, &inp->inp_laddr, 4);
memcpy(faddr, &inp->inp_faddr, 4);
*lp = inp->inp_lport;
*fp = inp->inp_fport;
}
void
so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *), void *arg)
{
TAILQ_FOREACH(so, &so->so_comp, so_list)
func(so, arg);
}
struct tcpcb *
so_sototcpcb(struct socket *so)
{
return (sototcpcb(so));
}
struct inpcb *
so_sotoinpcb(struct socket *so)
{
return (sotoinpcb(so));
}
struct sockbuf *
so_sockbuf_rcv(struct socket *so)
{
return (&so->so_rcv);
}
struct sockbuf *
so_sockbuf_snd(struct socket *so)
{
return (&so->so_snd);
}
int
so_state_get(const struct socket *so)
{
return (so->so_state);
}
void
so_state_set(struct socket *so, int val)
{
so->so_state = val;
}
int
so_options_get(const struct socket *so)
{
return (so->so_options);
}
void
so_options_set(struct socket *so, int val)
{
so->so_options = val;
}
int
so_error_get(const struct socket *so)
{
return (so->so_error);
}
void
so_error_set(struct socket *so, int val)
{
so->so_error = val;
}
int
so_linger_get(const struct socket *so)
{
return (so->so_linger);
}
void
so_linger_set(struct socket *so, int val)
{
so->so_linger = val;
}
struct protosw *
so_protosw_get(const struct socket *so)
{
return (so->so_proto);
}
void
so_protosw_set(struct socket *so, struct protosw *val)
{
so->so_proto = val;
}
void
so_sorwakeup(struct socket *so)
{
sorwakeup(so);
}
void
so_sowwakeup(struct socket *so)
{
sowwakeup(so);
}
void
so_sorwakeup_locked(struct socket *so)
{
sorwakeup_locked(so);
}
void
so_sowwakeup_locked(struct socket *so)
{
sowwakeup_locked(so);
}
void
so_lock(struct socket *so)
{
SOCK_LOCK(so);
}
void
so_unlock(struct socket *so)
{
SOCK_UNLOCK(so);
}
void
sockbuf_lock(struct sockbuf *sb)
{
SOCKBUF_LOCK(sb);
}
void
sockbuf_lock_assert(struct sockbuf *sb)
{
SOCKBUF_LOCK_ASSERT(sb);
}
void
sockbuf_unlock(struct sockbuf *sb)
{
SOCKBUF_UNLOCK(sb);
}
int
sockbuf_sbspace(struct sockbuf *sb)
{
return (sbspace(sb));
}
int
syncache_offload_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
struct socket **lsop, struct mbuf *m)
{
int rc;
INP_INFO_WLOCK(&tcbinfo);
rc = syncache_expand(inc, to, th, lsop, m);
INP_INFO_WUNLOCK(&tcbinfo);
return (rc);
}

View File

@ -0,0 +1,205 @@
/* $FreeBSD$ */
#ifndef CXGB_TCP_OFFLOAD_H_
#define CXGB_TCP_OFFLOAD_H_
struct tcpcb;
struct socket;
struct sockbuf;
void tcp_offload_twstart(struct tcpcb *tp);
void tcp_offload_twstart_disconnect(struct tcpcb *tp);
struct tcpcb *tcp_offload_close(struct tcpcb *tp);
struct tcpcb *tcp_offload_drop(struct tcpcb *tp, int error);
void inp_apply_all(void (*func)(struct inpcb *, void *), void *arg);
struct socket *inp_inpcbtosocket(struct inpcb *inp);
struct tcpcb *inp_inpcbtotcpcb(struct inpcb *inp);
int inp_ip_tos_get(const struct inpcb *);
void inp_ip_tos_set(struct inpcb *, int);
void inp_4tuple_get(const struct inpcb *inp, uint32_t *, uint16_t *, uint32_t *, uint16_t *);
struct tcpcb *so_sototcpcb(struct socket *so);
struct inpcb *so_sotoinpcb(struct socket *so);
struct sockbuf *so_sockbuf_snd(struct socket *);
struct sockbuf *so_sockbuf_rcv(struct socket *);
int so_state_get(const struct socket *);
void so_state_set(struct socket *, int);
int so_options_get(const struct socket *);
void so_options_set(struct socket *, int);
int so_error_get(const struct socket *);
void so_error_set(struct socket *, int);
int so_linger_get(const struct socket *);
void so_linger_set(struct socket *, int);
struct protosw *so_protosw_get(const struct socket *);
void so_protosw_set(struct socket *, struct protosw *);
void so_sorwakeup_locked(struct socket *so);
void so_sowwakeup_locked(struct socket *so);
void so_sorwakeup(struct socket *so);
void so_sowwakeup(struct socket *so);
void so_lock(struct socket *so);
void so_unlock(struct socket *so);
void so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *), void *arg);
void sockbuf_lock(struct sockbuf *);
void sockbuf_lock_assert(struct sockbuf *);
void sockbuf_unlock(struct sockbuf *);
int sockbuf_sbspace(struct sockbuf *);
struct tcphdr;
struct tcpopt;
int syncache_offload_expand(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct socket **, struct mbuf *);
#ifndef _SYS_SOCKETVAR_H_
#include <sys/selinfo.h>
#include <sys/sx.h>
/*
* Constants for sb_flags field of struct sockbuf.
*/
#define SB_MAX (256*1024) /* default for max chars in sockbuf */
/*
* Constants for sb_flags field of struct sockbuf.
*/
#define SB_WAIT 0x04 /* someone is waiting for data/space */
#define SB_SEL 0x08 /* someone is selecting */
#define SB_ASYNC 0x10 /* ASYNC I/O, need signals */
#define SB_UPCALL 0x20 /* someone wants an upcall */
#define SB_NOINTR 0x40 /* operations not interruptible */
#define SB_AIO 0x80 /* AIO operations queued */
#define SB_KNOTE 0x100 /* kernel note attached */
#define SB_NOCOALESCE 0x200 /* don't coalesce new data into existing mbufs */
#define SB_AUTOSIZE 0x800 /* automatically size socket buffer */
struct sockbuf {
struct selinfo sb_sel; /* process selecting read/write */
struct mtx sb_mtx; /* sockbuf lock */
struct sx sb_sx; /* prevent I/O interlacing */
short sb_state; /* (c/d) socket state on sockbuf */
#define sb_startzero sb_mb
struct mbuf *sb_mb; /* (c/d) the mbuf chain */
struct mbuf *sb_mbtail; /* (c/d) the last mbuf in the chain */
struct mbuf *sb_lastrecord; /* (c/d) first mbuf of last
* record in socket buffer */
struct mbuf *sb_sndptr; /* (c/d) pointer into mbuf chain */
u_int sb_sndptroff; /* (c/d) byte offset of ptr into chain */
u_int sb_cc; /* (c/d) actual chars in buffer */
u_int sb_hiwat; /* (c/d) max actual char count */
u_int sb_mbcnt; /* (c/d) chars of mbufs used */
u_int sb_mbmax; /* (c/d) max chars of mbufs to use */
u_int sb_ctl; /* (c/d) non-data chars in buffer */
int sb_lowat; /* (c/d) low water mark */
int sb_timeo; /* (c/d) timeout for read/write */
short sb_flags; /* (c/d) flags, see below */
};
void sbappend(struct sockbuf *sb, struct mbuf *m);
void sbappend_locked(struct sockbuf *sb, struct mbuf *m);
void sbappendstream(struct sockbuf *sb, struct mbuf *m);
void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m);
void sbdrop(struct sockbuf *sb, int len);
void sbdrop_locked(struct sockbuf *sb, int len);
void sbdroprecord(struct sockbuf *sb);
void sbdroprecord_locked(struct sockbuf *sb);
void sbflush(struct sockbuf *sb);
void sbflush_locked(struct sockbuf *sb);
int sbwait(struct sockbuf *sb);
int sblock(struct sockbuf *, int);
void sbunlock(struct sockbuf *);
/* adjust counters in sb reflecting allocation of m */
#define sballoc(sb, m) { \
(sb)->sb_cc += (m)->m_len; \
if ((m)->m_type != MT_DATA && (m)->m_type != MT_OOBDATA) \
(sb)->sb_ctl += (m)->m_len; \
(sb)->sb_mbcnt += MSIZE; \
if ((m)->m_flags & M_EXT) \
(sb)->sb_mbcnt += (m)->m_ext.ext_size; \
}
/* adjust counters in sb reflecting freeing of m */
#define sbfree(sb, m) { \
(sb)->sb_cc -= (m)->m_len; \
if ((m)->m_type != MT_DATA && (m)->m_type != MT_OOBDATA) \
(sb)->sb_ctl -= (m)->m_len; \
(sb)->sb_mbcnt -= MSIZE; \
if ((m)->m_flags & M_EXT) \
(sb)->sb_mbcnt -= (m)->m_ext.ext_size; \
if ((sb)->sb_sndptr == (m)) { \
(sb)->sb_sndptr = NULL; \
(sb)->sb_sndptroff = 0; \
} \
if ((sb)->sb_sndptroff != 0) \
(sb)->sb_sndptroff -= (m)->m_len; \
}
#define SS_NOFDREF 0x0001 /* no file table ref any more */
#define SS_ISCONNECTED 0x0002 /* socket connected to a peer */
#define SS_ISCONNECTING 0x0004 /* in process of connecting to peer */
#define SS_ISDISCONNECTING 0x0008 /* in process of disconnecting */
#define SS_NBIO 0x0100 /* non-blocking ops */
#define SS_ASYNC 0x0200 /* async i/o notify */
#define SS_ISCONFIRMING 0x0400 /* deciding to accept connection req */
#define SS_ISDISCONNECTED 0x2000 /* socket disconnected from peer */
/*
* Protocols can mark a socket as SS_PROTOREF to indicate that, following
* pru_detach, they still want the socket to persist, and will free it
* themselves when they are done. Protocols should only ever call sofree()
* following setting this flag in pru_detach(), and never otherwise, as
* sofree() bypasses socket reference counting.
*/
#define SS_PROTOREF 0x4000 /* strong protocol reference */
/*
* Socket state bits now stored in the socket buffer state field.
*/
#define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */
#define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */
#define SBS_RCVATMARK 0x0040 /* at mark on input */
enum sopt_dir { SOPT_GET, SOPT_SET };
struct sockopt {
enum sopt_dir sopt_dir; /* is this a get or a set? */
int sopt_level; /* second arg of [gs]etsockopt */
int sopt_name; /* third arg of [gs]etsockopt */
void *sopt_val; /* fourth arg of [gs]etsockopt */
size_t sopt_valsize; /* (almost) fifth arg of [gs]etsockopt */
struct thread *sopt_td; /* calling thread or null if kernel */
};
int sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen);
int sooptcopyout(struct sockopt *sopt, const void *buf, size_t len);
void soisconnected(struct socket *so);
void soisconnecting(struct socket *so);
void soisdisconnected(struct socket *so);
void soisdisconnecting(struct socket *so);
void socantrcvmore(struct socket *so);
void socantrcvmore_locked(struct socket *so);
void socantsendmore(struct socket *so);
void socantsendmore_locked(struct socket *so);
#endif /* !NET_CORE */
#endif /* CXGB_TCP_OFFLOAD_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -67,7 +67,6 @@ __FBSDID("$FreeBSD$");
#include <dev/cxgb/common/cxgb_t3_cpl.h>
#include <dev/cxgb/cxgb_offload.h>
#include <dev/cxgb/cxgb_include.h>
#include <dev/cxgb/cxgb_l2t.h>
#include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
#include <dev/cxgb/ulp/tom/cxgb_tom.h>
#include <dev/cxgb/ulp/tom/cxgb_defs.h>

View File

@ -6,10 +6,9 @@ CXGB = ${.CURDIR}/../../../dev/cxgb
KMOD= if_cxgb
SRCS= cxgb_mc5.c cxgb_vsc8211.c cxgb_ael1002.c cxgb_mv88e1xxx.c
SRCS+= cxgb_xgmac.c cxgb_vsc7323.c cxgb_t3_hw.c cxgb_main.c
SRCS+= cxgb_sge.c cxgb_lro.c cxgb_offload.c cxgb_l2t.c
SRCS+= device_if.h bus_if.h pci_if.h opt_zero.h opt_sched.h opt_global.h
SRCS+= uipc_mvec.c cxgb_support.c
SRCS+= cxgb_multiq.c
SRCS+= cxgb_sge.c cxgb_lro.c cxgb_offload.c
SRCS+= device_if.h bus_if.h pci_if.h opt_zero.h opt_sched.h
SRCS+= uipc_mvec.c cxgb_support.c cxgb_multiq.c
CFLAGS+= -DCONFIG_CHELSIO_T3_CORE -g -DCONFIG_DEFINED -DDEFAULT_JUMBO -I${CXGB} -DSMP
CFLAGS+= -DDISABLE_MBUF_IOVEC

View File

@ -5,7 +5,7 @@ TOM = ${.CURDIR}/../../../dev/cxgb/ulp/tom
KMOD= tom
SRCS= cxgb_tom.c cxgb_cpl_io.c cxgb_listen.c cxgb_tom_sysctl.c cxgb_cpl_socket.c
SRCS+= cxgb_ddp.c cxgb_vm.c
SRCS+= cxgb_ddp.c cxgb_vm.c cxgb_l2t.c cxgb_tcp_offload.c
SRCS+= opt_compat.h opt_inet.h opt_inet6.h opt_ipsec.h opt_mac.h
SRCS+= opt_tcpdebug.h opt_ddb.h opt_sched.h opt_global.h opt_ktr.h
SRCS+= device_if.h bus_if.h pci_if.h

View File

@ -229,6 +229,12 @@ int tcp_offload_connect(struct socket *so, struct sockaddr *nam);
* Connection is offloaded
*/
#define tp_offload(tp) ((tp)->t_flags & TF_TOE)
/*
* hackish way of allowing this file to also be included by TOE
* which needs to be kept ignorant of socket implementation details
*/
#ifdef _SYS_SOCKETVAR_H_
/*
* The socket has not been marked as "do not offload"
*/
@ -324,7 +330,7 @@ tcp_offload_listen_close(struct tcpcb *tp)
EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
#endif
}
#undef tp_offload
#undef SO_OFFLOADABLE
#endif /* _SYS_SOCKETVAR_H_ */
#undef tp_offload
#endif /* _NETINET_TCP_OFFLOAD_H_ */