From 46b0a854ccd07858a429beb69c992e80a742006d Mon Sep 17 00:00:00 2001 From: Kip Macy Date: Sat, 19 Apr 2008 03:22:43 +0000 Subject: [PATCH] move cxgb_lt2.[ch] from NIC to TOE move most offload functionality from NIC to TOE factor out all socket and inpcb direct access factor out access to locking in incpb, pcbinfo, and sockbuf --- sys/conf/files | 1 - sys/dev/cxgb/cxgb_main.c | 16 +- sys/dev/cxgb/cxgb_offload.c | 1130 ++--------------------- sys/dev/cxgb/cxgb_offload.h | 4 +- sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c | 953 +++++++++++-------- sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c | 258 ++++-- sys/dev/cxgb/ulp/tom/cxgb_ddp.c | 122 +-- sys/dev/cxgb/ulp/tom/cxgb_defs.h | 14 +- sys/dev/cxgb/{ => ulp/tom}/cxgb_l2t.c | 1 - sys/dev/cxgb/{ => ulp/tom}/cxgb_l2t.h | 2 +- sys/dev/cxgb/ulp/tom/cxgb_listen.c | 1 - sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h | 18 +- sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.c | 360 ++++++++ sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h | 205 ++++ sys/dev/cxgb/ulp/tom/cxgb_tom.c | 1065 ++++++++++++++++++++- sys/dev/cxgb/ulp/tom/cxgb_tom_sysctl.c | 1 - sys/modules/cxgb/cxgb/Makefile | 7 +- sys/modules/cxgb/tom/Makefile | 2 +- sys/netinet/tcp_offload.h | 10 +- 19 files changed, 2461 insertions(+), 1709 deletions(-) rename sys/dev/cxgb/{ => ulp/tom}/cxgb_l2t.c (99%) rename sys/dev/cxgb/{ => ulp/tom}/cxgb_l2t.h (99%) create mode 100644 sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.c create mode 100644 sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h diff --git a/sys/conf/files b/sys/conf/files index c1ca81e329a3..f2f6f1147b01 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -518,7 +518,6 @@ dev/cs/if_cs_isa.c optional cs isa dev/cs/if_cs_pccard.c optional cs pccard dev/cxgb/cxgb_main.c optional cxgb pci dev/cxgb/cxgb_offload.c optional cxgb pci -dev/cxgb/cxgb_l2t.c optional cxgb pci dev/cxgb/cxgb_lro.c optional cxgb pci dev/cxgb/cxgb_sge.c optional cxgb pci dev/cxgb/cxgb_multiq.c optional cxgb pci diff --git a/sys/dev/cxgb/cxgb_main.c b/sys/dev/cxgb/cxgb_main.c index 5bea6b1b9df7..838ab519b665 100644 --- a/sys/dev/cxgb/cxgb_main.c +++ b/sys/dev/cxgb/cxgb_main.c @@ -1,6 +1,6 @@ /************************************************************************** -Copyright (c) 2007, Chelsio Inc. +Copyright (c) 2007-2008, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -9,7 +9,7 @@ modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. -2. Neither the name of the Chelsio Corporation nor the names of its + 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -723,9 +723,10 @@ cxgb_free(struct adapter *sc) printf("cxgb_free: DEVMAP_BIT not set\n"); } else printf("not offloading set\n"); - +#ifdef notyet if (sc->flags & CXGB_OFLD_INIT) cxgb_offload_deactivate(sc); +#endif free(sc->filters, M_DEVBUF); t3_sge_free(sc); @@ -1732,9 +1733,6 @@ offload_open(struct port_info *pi) t3_tp_set_offload_mode(adapter, 1); tdev->lldev = pi->ifp; - err = cxgb_offload_activate(adapter); - if (err) - goto out; init_port_mtus(adapter); t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd, @@ -1743,10 +1741,6 @@ offload_open(struct port_info *pi) adapter->port[0].ifp->if_mtu : 0xffff); init_smt(adapter); - /* Call back all registered clients */ - cxgb_add_clients(tdev); - -out: /* restore them in case the offload module has changed them */ if (err) { t3_tp_set_offload_mode(adapter, 0); @@ -1764,8 +1758,6 @@ offload_close(struct t3cdev *tdev) if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT)) return (0); - /* Call back all registered clients */ - cxgb_remove_clients(tdev); tdev->lldev = NULL; cxgb_set_dummy_ops(tdev); t3_tp_set_offload_mode(adapter, 0); diff --git a/sys/dev/cxgb/cxgb_offload.c b/sys/dev/cxgb/cxgb_offload.c index 1b6daf111944..68572c242eaa 100644 --- a/sys/dev/cxgb/cxgb_offload.c +++ b/sys/dev/cxgb/cxgb_offload.c @@ -63,7 +63,6 @@ __FBSDID("$FreeBSD$"); #include #endif -#include #include #define VALIDATE_TID 0 @@ -71,14 +70,11 @@ MALLOC_DEFINE(M_CXGB, "cxgb", "Chelsio 10 Gigabit Ethernet and services"); TAILQ_HEAD(, cxgb_client) client_list; TAILQ_HEAD(, t3cdev) ofld_dev_list; -TAILQ_HEAD(, adapter) adapter_list; + static struct mtx cxgb_db_lock; -static struct rwlock adapter_list_lock; -static const unsigned int MAX_ATIDS = 64 * 1024; -static const unsigned int ATID_BASE = 0x100000; static int inited = 0; static inline int @@ -89,6 +85,26 @@ offload_activated(struct t3cdev *tdev) return (isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT)); } +static inline void +register_tdev(struct t3cdev *tdev) +{ + static int unit; + + mtx_lock(&cxgb_db_lock); + snprintf(tdev->name, sizeof(tdev->name), "ofld_dev%d", unit++); + TAILQ_INSERT_TAIL(&ofld_dev_list, tdev, entry); + mtx_unlock(&cxgb_db_lock); +} + +static inline void +unregister_tdev(struct t3cdev *tdev) +{ + mtx_lock(&cxgb_db_lock); + TAILQ_REMOVE(&ofld_dev_list, tdev, entry); + mtx_unlock(&cxgb_db_lock); +} + +#ifdef TOE_ENABLED /** * cxgb_register_client - register an offload client * @client: the client @@ -178,291 +194,24 @@ cxgb_remove_clients(struct t3cdev *tdev) } mtx_unlock(&cxgb_db_lock); } +#endif -static int -is_offloading(struct ifnet *ifp) +/** + * cxgb_ofld_recv - process n received offload packets + * @dev: the offload device + * @m: an array of offload packets + * @n: the number of offload packets + * + * Process an array of ingress offload packets. Each packet is forwarded + * to any active network taps and then passed to the offload device's receive + * method. We optimize passing packets to the receive method by passing + * it the whole array at once except when there are active taps. + */ +int +cxgb_ofld_recv(struct t3cdev *dev, struct mbuf **m, int n) { - struct adapter *adapter; - int port; - rw_rlock(&adapter_list_lock); - TAILQ_FOREACH(adapter, &adapter_list, adapter_entry) { - for_each_port(adapter, port) { - if (ifp == adapter->port[port].ifp) { - rw_runlock(&adapter_list_lock); - return 1; - } - } - } - rw_runlock(&adapter_list_lock); - return 0; -} - -static struct ifnet * -get_iff_from_mac(adapter_t *adapter, const uint8_t *mac, unsigned int vlan) -{ - int i; - - for_each_port(adapter, i) { -#ifdef notyet - const struct vlan_group *grp; -#endif - const struct port_info *p = &adapter->port[i]; - struct ifnet *ifp = p->ifp; - - if (!memcmp(p->hw_addr, mac, ETHER_ADDR_LEN)) { -#ifdef notyet - - if (vlan && vlan != EVL_VLID_MASK) { - grp = p->vlan_grp; - dev = grp ? grp->vlan_devices[vlan] : NULL; - } else - while (dev->master) - dev = dev->master; -#endif - return (ifp); - } - } - return (NULL); -} - -static inline void -failover_fixup(adapter_t *adapter, int port) -{ - if (adapter->params.rev == 0) { - struct ifnet *ifp = adapter->port[port].ifp; - struct cmac *mac = &adapter->port[port].mac; - if (!(ifp->if_flags & IFF_UP)) { - /* Failover triggered by the interface ifdown */ - t3_write_reg(adapter, A_XGM_TX_CTRL + mac->offset, - F_TXEN); - t3_read_reg(adapter, A_XGM_TX_CTRL + mac->offset); - } else { - /* Failover triggered by the interface link down */ - t3_write_reg(adapter, A_XGM_RX_CTRL + mac->offset, 0); - t3_read_reg(adapter, A_XGM_RX_CTRL + mac->offset); - t3_write_reg(adapter, A_XGM_RX_CTRL + mac->offset, - F_RXEN); - } - } -} - -static int -cxgb_ulp_iscsi_ctl(adapter_t *adapter, unsigned int req, void *data) -{ - int ret = 0; - struct ulp_iscsi_info *uiip = data; - - switch (req) { - case ULP_ISCSI_GET_PARAMS: - uiip->llimit = t3_read_reg(adapter, A_ULPRX_ISCSI_LLIMIT); - uiip->ulimit = t3_read_reg(adapter, A_ULPRX_ISCSI_ULIMIT); - uiip->tagmask = t3_read_reg(adapter, A_ULPRX_ISCSI_TAGMASK); - /* - * On tx, the iscsi pdu has to be <= tx page size and has to - * fit into the Tx PM FIFO. - */ - uiip->max_txsz = min(adapter->params.tp.tx_pg_size, - t3_read_reg(adapter, A_PM1_TX_CFG) >> 17); - /* on rx, the iscsi pdu has to be < rx page size and the - whole pdu + cpl headers has to fit into one sge buffer */ - /* also check the max rx data length programmed in TP */ - uiip->max_rxsz = min(uiip->max_rxsz, - ((t3_read_reg(adapter, A_TP_PARA_REG2)) - >> S_MAXRXDATA) & M_MAXRXDATA); - break; - case ULP_ISCSI_SET_PARAMS: - t3_write_reg(adapter, A_ULPRX_ISCSI_TAGMASK, uiip->tagmask); - break; - default: - ret = (EOPNOTSUPP); - } - return ret; -} - -/* Response queue used for RDMA events. */ -#define ASYNC_NOTIF_RSPQ 0 - -static int -cxgb_rdma_ctl(adapter_t *adapter, unsigned int req, void *data) -{ - int ret = 0; - - switch (req) { - case RDMA_GET_PARAMS: { - struct rdma_info *req = data; - - req->udbell_physbase = rman_get_start(adapter->udbs_res); - req->udbell_len = rman_get_size(adapter->udbs_res); - req->tpt_base = t3_read_reg(adapter, A_ULPTX_TPT_LLIMIT); - req->tpt_top = t3_read_reg(adapter, A_ULPTX_TPT_ULIMIT); - req->pbl_base = t3_read_reg(adapter, A_ULPTX_PBL_LLIMIT); - req->pbl_top = t3_read_reg(adapter, A_ULPTX_PBL_ULIMIT); - req->rqt_base = t3_read_reg(adapter, A_ULPRX_RQ_LLIMIT); - req->rqt_top = t3_read_reg(adapter, A_ULPRX_RQ_ULIMIT); - req->kdb_addr = (void *)((unsigned long)rman_get_virtual(adapter->regs_res) + A_SG_KDOORBELL); break; - } - case RDMA_CQ_OP: { - struct rdma_cq_op *req = data; - - /* may be called in any context */ - mtx_lock_spin(&adapter->sge.reg_lock); - ret = t3_sge_cqcntxt_op(adapter, req->id, req->op, - req->credits); - mtx_unlock_spin(&adapter->sge.reg_lock); - break; - } - case RDMA_GET_MEM: { - struct ch_mem_range *t = data; - struct mc7 *mem; - - if ((t->addr & 7) || (t->len & 7)) - return (EINVAL); - if (t->mem_id == MEM_CM) - mem = &adapter->cm; - else if (t->mem_id == MEM_PMRX) - mem = &adapter->pmrx; - else if (t->mem_id == MEM_PMTX) - mem = &adapter->pmtx; - else - return (EINVAL); - - ret = t3_mc7_bd_read(mem, t->addr/8, t->len/8, (u64 *)t->buf); - if (ret) - return (ret); - break; - } - case RDMA_CQ_SETUP: { - struct rdma_cq_setup *req = data; - - mtx_lock_spin(&adapter->sge.reg_lock); - ret = t3_sge_init_cqcntxt(adapter, req->id, req->base_addr, - req->size, ASYNC_NOTIF_RSPQ, - req->ovfl_mode, req->credits, - req->credit_thres); - mtx_unlock_spin(&adapter->sge.reg_lock); - break; - } - case RDMA_CQ_DISABLE: - mtx_lock_spin(&adapter->sge.reg_lock); - ret = t3_sge_disable_cqcntxt(adapter, *(unsigned int *)data); - mtx_unlock_spin(&adapter->sge.reg_lock); - break; - case RDMA_CTRL_QP_SETUP: { - struct rdma_ctrlqp_setup *req = data; - - mtx_lock_spin(&adapter->sge.reg_lock); - ret = t3_sge_init_ecntxt(adapter, FW_RI_SGEEC_START, 0, - SGE_CNTXT_RDMA, ASYNC_NOTIF_RSPQ, - req->base_addr, req->size, - FW_RI_TID_START, 1, 0); - mtx_unlock_spin(&adapter->sge.reg_lock); - break; - } - default: - ret = EOPNOTSUPP; - } - return (ret); -} - -static int -cxgb_offload_ctl(struct t3cdev *tdev, unsigned int req, void *data) -{ - struct adapter *adapter = tdev2adap(tdev); - struct tid_range *tid; - struct mtutab *mtup; - struct iff_mac *iffmacp; - struct ddp_params *ddpp; - struct adap_ports *ports; - struct ofld_page_info *rx_page_info; - struct tp_params *tp = &adapter->params.tp; - int port; - - switch (req) { - case GET_MAX_OUTSTANDING_WR: - *(unsigned int *)data = FW_WR_NUM; - break; - case GET_WR_LEN: - *(unsigned int *)data = WR_FLITS; - break; - case GET_TX_MAX_CHUNK: - *(unsigned int *)data = 1 << 20; /* 1MB */ - break; - case GET_TID_RANGE: - tid = data; - tid->num = t3_mc5_size(&adapter->mc5) - - adapter->params.mc5.nroutes - - adapter->params.mc5.nfilters - - adapter->params.mc5.nservers; - tid->base = 0; - break; - case GET_STID_RANGE: - tid = data; - tid->num = adapter->params.mc5.nservers; - tid->base = t3_mc5_size(&adapter->mc5) - tid->num - - adapter->params.mc5.nfilters - - adapter->params.mc5.nroutes; - break; - case GET_L2T_CAPACITY: - *(unsigned int *)data = 2048; - break; - case GET_MTUS: - mtup = data; - mtup->size = NMTUS; - mtup->mtus = adapter->params.mtus; - break; - case GET_IFF_FROM_MAC: - iffmacp = data; - iffmacp->dev = get_iff_from_mac(adapter, iffmacp->mac_addr, - iffmacp->vlan_tag & EVL_VLID_MASK); - break; - case GET_DDP_PARAMS: - ddpp = data; - ddpp->llimit = t3_read_reg(adapter, A_ULPRX_TDDP_LLIMIT); - ddpp->ulimit = t3_read_reg(adapter, A_ULPRX_TDDP_ULIMIT); - ddpp->tag_mask = t3_read_reg(adapter, A_ULPRX_TDDP_TAGMASK); - break; - case GET_PORTS: - ports = data; - ports->nports = adapter->params.nports; - for_each_port(adapter, port) - ports->lldevs[port] = adapter->port[port].ifp; - break; - case FAILOVER: - port = *(int *)data; - t3_port_failover(adapter, port); - failover_fixup(adapter, port); - break; - case FAILOVER_DONE: - port = *(int *)data; - t3_failover_done(adapter, port); - break; - case FAILOVER_CLEAR: - t3_failover_clear(adapter); - break; - case GET_RX_PAGE_INFO: - rx_page_info = data; - rx_page_info->page_size = tp->rx_pg_size; - rx_page_info->num = tp->rx_num_pgs; - break; - case ULP_ISCSI_GET_PARAMS: - case ULP_ISCSI_SET_PARAMS: - if (!offload_running(adapter)) - return (EAGAIN); - return cxgb_ulp_iscsi_ctl(adapter, req, data); - case RDMA_GET_PARAMS: - case RDMA_CQ_OP: - case RDMA_CQ_SETUP: - case RDMA_CQ_DISABLE: - case RDMA_CTRL_QP_SETUP: - case RDMA_GET_MEM: - if (!offload_running(adapter)) - return (EAGAIN); - return cxgb_rdma_ctl(adapter, req, data); - default: - return (EOPNOTSUPP); - } - return 0; + return dev->recv(dev, m, n); } /* @@ -491,195 +240,6 @@ cxgb_set_dummy_ops(struct t3cdev *dev) dev->arp_update = dummy_neigh_update; } -/* - * Free an active-open TID. - */ -void * -cxgb_free_atid(struct t3cdev *tdev, int atid) -{ - struct tid_info *t = &(T3C_DATA(tdev))->tid_maps; - union active_open_entry *p = atid2entry(t, atid); - void *ctx = p->toe_tid.ctx; - - mtx_lock(&t->atid_lock); - p->next = t->afree; - t->afree = p; - t->atids_in_use--; - mtx_unlock(&t->atid_lock); - - return ctx; -} - -/* - * Free a server TID and return it to the free pool. - */ -void -cxgb_free_stid(struct t3cdev *tdev, int stid) -{ - struct tid_info *t = &(T3C_DATA (tdev))->tid_maps; - union listen_entry *p = stid2entry(t, stid); - - mtx_lock(&t->stid_lock); - p->next = t->sfree; - t->sfree = p; - t->stids_in_use--; - mtx_unlock(&t->stid_lock); -} - -/* - * Free a server TID and return it to the free pool. - */ -void * -cxgb_get_lctx(struct t3cdev *tdev, int stid) -{ - struct tid_info *t = &(T3C_DATA (tdev))->tid_maps; - union listen_entry *p = stid2entry(t, stid); - - return (p->toe_tid.ctx); -} - -void -cxgb_insert_tid(struct t3cdev *tdev, struct cxgb_client *client, - void *ctx, unsigned int tid) -{ - struct tid_info *t = &(T3C_DATA (tdev))->tid_maps; - - t->tid_tab[tid].client = client; - t->tid_tab[tid].ctx = ctx; - atomic_add_int(&t->tids_in_use, 1); -} - -/* - * Populate a TID_RELEASE WR. The mbuf must be already propely sized. - */ -static inline void -mk_tid_release(struct mbuf *m, unsigned int tid) -{ - struct cpl_tid_release *req; - - m_set_priority(m, CPL_PRIORITY_SETUP); - req = mtod(m, struct cpl_tid_release *); - m->m_pkthdr.len = m->m_len = sizeof(*req); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid)); -} - -static void -t3_process_tid_release_list(void *data, int pending) -{ - struct mbuf *m; - struct t3cdev *tdev = data; - struct t3c_data *td = T3C_DATA (tdev); - - mtx_lock(&td->tid_release_lock); - while (td->tid_release_list) { - struct toe_tid_entry *p = td->tid_release_list; - - td->tid_release_list = (struct toe_tid_entry *)p->ctx; - mtx_unlock(&td->tid_release_lock); - m = m_get(M_WAIT, MT_DATA); - mk_tid_release(m, p - td->tid_maps.tid_tab); - cxgb_ofld_send(tdev, m); - p->ctx = NULL; - mtx_lock(&td->tid_release_lock); - } - mtx_unlock(&td->tid_release_lock); -} - -/* use ctx as a next pointer in the tid release list */ -void -cxgb_queue_tid_release(struct t3cdev *tdev, unsigned int tid) -{ - struct t3c_data *td = T3C_DATA (tdev); - struct toe_tid_entry *p = &td->tid_maps.tid_tab[tid]; - - printf("queuing tid release\n"); - - mtx_lock(&td->tid_release_lock); - p->ctx = td->tid_release_list; - td->tid_release_list = p; - - if (!p->ctx) - taskqueue_enqueue(tdev->adapter->tq, &td->tid_release_task); - - mtx_unlock(&td->tid_release_lock); -} - -/* - * Remove a tid from the TID table. A client may defer processing its last - * CPL message if it is locked at the time it arrives, and while the message - * sits in the client's backlog the TID may be reused for another connection. - * To handle this we atomically switch the TID association if it still points - * to the original client context. - */ -void -cxgb_remove_tid(struct t3cdev *tdev, void *ctx, unsigned int tid) -{ - struct tid_info *t = &(T3C_DATA (tdev))->tid_maps; - - if (tid >= t->ntids) - panic("tid=%d >= t->ntids=%d", tid, t->ntids); - - if (tdev->type == T3A) - atomic_cmpset_ptr((uintptr_t *)&t->tid_tab[tid].ctx, (long)NULL, (long)ctx); - else { - struct mbuf *m; - - m = m_get(M_NOWAIT, MT_DATA); - if (__predict_true(m != NULL)) { - mk_tid_release(m, tid); - CTR1(KTR_CXGB, "releasing tid=%u", tid); - - cxgb_ofld_send(tdev, m); - t->tid_tab[tid].ctx = NULL; - } else - cxgb_queue_tid_release(tdev, tid); - } - atomic_add_int(&t->tids_in_use, -1); -} - -int -cxgb_alloc_atid(struct t3cdev *tdev, struct cxgb_client *client, - void *ctx) -{ - int atid = -1; - struct tid_info *t = &(T3C_DATA (tdev))->tid_maps; - - mtx_lock(&t->atid_lock); - if (t->afree) { - union active_open_entry *p = t->afree; - - atid = (p - t->atid_tab) + t->atid_base; - t->afree = p->next; - p->toe_tid.ctx = ctx; - p->toe_tid.client = client; - t->atids_in_use++; - } - mtx_unlock(&t->atid_lock); - return atid; -} - -int -cxgb_alloc_stid(struct t3cdev *tdev, struct cxgb_client *client, - void *ctx) -{ - int stid = -1; - struct tid_info *t = &(T3C_DATA (tdev))->tid_maps; - - mtx_lock(&t->stid_lock); - if (t->sfree) { - union listen_entry *p = t->sfree; - - stid = (p - t->stid_tab) + t->stid_base; - t->sfree = p->next; - p->toe_tid.ctx = ctx; - p->toe_tid.client = client; - t->stids_in_use++; - } - mtx_unlock(&t->stid_lock); - return stid; -} - static int do_smt_write_rpl(struct t3cdev *dev, struct mbuf *m) { @@ -719,150 +279,6 @@ do_rte_write_rpl(struct t3cdev *dev, struct mbuf *m) return CPL_RET_BUF_DONE; } -static int -do_act_open_rpl(struct t3cdev *dev, struct mbuf *m) -{ - struct cpl_act_open_rpl *rpl = cplhdr(m); - unsigned int atid = G_TID(ntohl(rpl->atid)); - struct toe_tid_entry *toe_tid; - - toe_tid = lookup_atid(&(T3C_DATA (dev))->tid_maps, atid); - if (toe_tid->ctx && toe_tid->client && toe_tid->client->handlers && - toe_tid->client->handlers[CPL_ACT_OPEN_RPL]) { - return toe_tid->client->handlers[CPL_ACT_OPEN_RPL] (dev, m, - toe_tid->ctx); - } else { - log(LOG_ERR, "%s: received clientless CPL command 0x%x\n", - dev->name, CPL_ACT_OPEN_RPL); - return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG; - } -} - -static int -do_stid_rpl(struct t3cdev *dev, struct mbuf *m) -{ - union opcode_tid *p = cplhdr(m); - unsigned int stid = G_TID(ntohl(p->opcode_tid)); - struct toe_tid_entry *toe_tid; - - toe_tid = lookup_stid(&(T3C_DATA (dev))->tid_maps, stid); - if (toe_tid->ctx && toe_tid->client->handlers && - toe_tid->client->handlers[p->opcode]) { - return toe_tid->client->handlers[p->opcode] (dev, m, toe_tid->ctx); - } else { - log(LOG_ERR, "%s: received clientless CPL command 0x%x\n", - dev->name, p->opcode); - return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG; - } -} - -static int -do_hwtid_rpl(struct t3cdev *dev, struct mbuf *m) -{ - union opcode_tid *p = cplhdr(m); - unsigned int hwtid; - struct toe_tid_entry *toe_tid; - - DPRINTF("do_hwtid_rpl opcode=0x%x\n", p->opcode); - hwtid = G_TID(ntohl(p->opcode_tid)); - - toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid); - if (toe_tid->ctx && toe_tid->client->handlers && - toe_tid->client->handlers[p->opcode]) { - return toe_tid->client->handlers[p->opcode] - (dev, m, toe_tid->ctx); - } else { - log(LOG_ERR, "%s: received clientless CPL command 0x%x\n", - dev->name, p->opcode); - return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG; - } -} - -static int -do_cr(struct t3cdev *dev, struct mbuf *m) -{ - struct cpl_pass_accept_req *req = cplhdr(m); - unsigned int stid = G_PASS_OPEN_TID(ntohl(req->tos_tid)); - struct toe_tid_entry *toe_tid; - - toe_tid = lookup_stid(&(T3C_DATA (dev))->tid_maps, stid); - if (toe_tid->ctx && toe_tid->client->handlers && - toe_tid->client->handlers[CPL_PASS_ACCEPT_REQ]) { - return toe_tid->client->handlers[CPL_PASS_ACCEPT_REQ] - (dev, m, toe_tid->ctx); - } else { - log(LOG_ERR, "%s: received clientless CPL command 0x%x\n", - dev->name, CPL_PASS_ACCEPT_REQ); - return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG; - } -} - -static int -do_abort_req_rss(struct t3cdev *dev, struct mbuf *m) -{ - union opcode_tid *p = cplhdr(m); - unsigned int hwtid = G_TID(ntohl(p->opcode_tid)); - struct toe_tid_entry *toe_tid; - - toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid); - if (toe_tid->ctx && toe_tid->client->handlers && - toe_tid->client->handlers[p->opcode]) { - return toe_tid->client->handlers[p->opcode] - (dev, m, toe_tid->ctx); - } else { - struct cpl_abort_req_rss *req = cplhdr(m); - struct cpl_abort_rpl *rpl; - - struct mbuf *m = m_get(M_NOWAIT, MT_DATA); - if (!m) { - log(LOG_NOTICE, "do_abort_req_rss: couldn't get mbuf!\n"); - goto out; - } - - m_set_priority(m, CPL_PRIORITY_DATA); -#if 0 - __skb_put(skb, sizeof(struct cpl_abort_rpl)); -#endif - rpl = cplhdr(m); - rpl->wr.wr_hi = - htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); - rpl->wr.wr_lo = htonl(V_WR_TID(GET_TID(req))); - OPCODE_TID(rpl) = - htonl(MK_OPCODE_TID(CPL_ABORT_RPL, GET_TID(req))); - rpl->cmd = req->status; - cxgb_ofld_send(dev, m); - out: - return CPL_RET_BUF_DONE; - } -} - -static int -do_act_establish(struct t3cdev *dev, struct mbuf *m) -{ - struct cpl_act_establish *req; - unsigned int atid; - struct toe_tid_entry *toe_tid; - - req = cplhdr(m); - atid = G_PASS_OPEN_TID(ntohl(req->tos_tid)); - toe_tid = lookup_atid(&(T3C_DATA (dev))->tid_maps, atid); - if (toe_tid && toe_tid->ctx && toe_tid->client->handlers && - toe_tid->client->handlers[CPL_ACT_ESTABLISH]) { - printf("active establish callback\n"); - - return toe_tid->client->handlers[CPL_ACT_ESTABLISH] - (dev, m, toe_tid->ctx); - } else { - printf("toe_tid=%p\n", toe_tid); - - log(LOG_ERR, "%s: received clientless CPL command 0x%x\n", - dev->name, CPL_PASS_ACCEPT_REQ); - return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG; - } -} - - - static int do_set_tcb_rpl(struct t3cdev *dev, struct mbuf *m) { @@ -891,78 +307,6 @@ do_trace(struct t3cdev *dev, struct mbuf *m) return 0; } -static int -do_term(struct t3cdev *dev, struct mbuf *m) -{ - unsigned int hwtid = ntohl(m_get_priority(m)) >> 8 & 0xfffff; - unsigned int opcode = G_OPCODE(ntohl(m->m_pkthdr.csum_data)); - struct toe_tid_entry *toe_tid; - - toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid); - if (toe_tid->ctx && toe_tid->client->handlers && - toe_tid->client->handlers[opcode]) { - return toe_tid->client->handlers[opcode](dev, m, toe_tid->ctx); - } else { - log(LOG_ERR, "%s: received clientless CPL command 0x%x\n", - dev->name, opcode); - return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG; - } - return (0); -} - -static void -cxgb_arp_update_event(void *unused, struct rtentry *rt0, - uint8_t *enaddr, struct sockaddr *sa) -{ - - if (!is_offloading(rt0->rt_ifp)) - return; - - RT_ADDREF(rt0); - RT_UNLOCK(rt0); - cxgb_neigh_update(rt0, enaddr, sa); - RT_LOCK(rt0); - RT_REMREF(rt0); -} - - -static void -cxgb_redirect_event(void *unused, int event, struct rtentry *rt0, - struct rtentry *rt1, struct sockaddr *sa) -{ - /* - * ignore events on non-offloaded interfaces - */ - if (!is_offloading(rt0->rt_ifp)) - return; - - /* - * Cannot redirect to non-offload device. - */ - if (!is_offloading(rt1->rt_ifp)) { - log(LOG_WARNING, "%s: Redirect to non-offload" - "device ignored.\n", __FUNCTION__); - return; - } - - /* - * avoid LORs by dropping the route lock but keeping a reference - * - */ - RT_ADDREF(rt0); - RT_UNLOCK(rt0); - RT_ADDREF(rt1); - RT_UNLOCK(rt1); - - cxgb_redirect(rt0, rt1, sa); - cxgb_neigh_update(rt1, NULL, sa); - - RT_LOCK(rt0); - RT_REMREF(rt0); - RT_LOCK(rt1); - RT_REMREF(rt1); -} - /* * Process a received packet with an unknown/unexpected CPL opcode. */ @@ -971,6 +315,7 @@ do_bad_cpl(struct t3cdev *dev, struct mbuf *m) { log(LOG_ERR, "%s: received bad CPL command 0x%x\n", dev->name, 0xFF & *mtod(m, uint32_t *)); + kdb_backtrace(); return (CPL_RET_BUF_DONE | CPL_RET_BAD_MSG); } @@ -979,20 +324,6 @@ do_bad_cpl(struct t3cdev *dev, struct mbuf *m) */ static cpl_handler_func cpl_handlers[256]; -/* - * Add a new handler to the CPL dispatch table. A NULL handler may be supplied - * to unregister an existing handler. - */ -void -t3_register_cpl_handler(unsigned int opcode, cpl_handler_func h) -{ - if (opcode < NUM_CPL_CMDS) - cpl_handlers[opcode] = h ? h : do_bad_cpl; - else - log(LOG_ERR, "T3C: handler registration for " - "opcode %x failed\n", opcode); -} - /* * T3CDEV's receive method. */ @@ -1024,137 +355,17 @@ process_rx(struct t3cdev *dev, struct mbuf **m, int n) } /* - * Sends an sk_buff to a T3C driver after dealing with any active network taps. + * Add a new handler to the CPL dispatch table. A NULL handler may be supplied + * to unregister an existing handler. */ -int -cxgb_ofld_send(struct t3cdev *dev, struct mbuf *m) -{ - int r; - - r = dev->send(dev, m); - return r; -} - -/** - * cxgb_ofld_recv - process n received offload packets - * @dev: the offload device - * @m: an array of offload packets - * @n: the number of offload packets - * - * Process an array of ingress offload packets. Each packet is forwarded - * to any active network taps and then passed to the offload device's receive - * method. We optimize passing packets to the receive method by passing - * it the whole array at once except when there are active taps. - */ -int -cxgb_ofld_recv(struct t3cdev *dev, struct mbuf **m, int n) -{ - -#if defined(CONFIG_CHELSIO_T3) - if (likely(!netdev_nit)) - return dev->recv(dev, skb, n); - - for ( ; n; n--, skb++) { - skb[0]->dev = dev->lldev; - dev_queue_xmit_nit(skb[0], dev->lldev); - skb[0]->dev = NULL; - dev->recv(dev, skb, 1); - } - return 0; -#else - return dev->recv(dev, m, n); -#endif -} - void -cxgb_neigh_update(struct rtentry *rt, uint8_t *enaddr, struct sockaddr *sa) +t3_register_cpl_handler(unsigned int opcode, cpl_handler_func h) { - - if (rt->rt_ifp && is_offloading(rt->rt_ifp) && (rt->rt_ifp->if_flags & IFCAP_TOE)) { - struct t3cdev *tdev = T3CDEV(rt->rt_ifp); - - PANIC_IF(!tdev); - t3_l2t_update(tdev, rt, enaddr, sa); - } -} - -static void -set_l2t_ix(struct t3cdev *tdev, u32 tid, struct l2t_entry *e) -{ - struct mbuf *m; - struct cpl_set_tcb_field *req; - - m = m_gethdr(M_NOWAIT, MT_DATA); - if (!m) { - log(LOG_ERR, "%s: cannot allocate mbuf!\n", __FUNCTION__); - return; - } - - m_set_priority(m, CPL_PRIORITY_CONTROL); - req = mtod(m, struct cpl_set_tcb_field *); - m->m_pkthdr.len = m->m_len = sizeof(*req); - - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); - req->reply = 0; - req->cpu_idx = 0; - req->word = htons(W_TCB_L2T_IX); - req->mask = htobe64(V_TCB_L2T_IX(M_TCB_L2T_IX)); - req->val = htobe64(V_TCB_L2T_IX(e->idx)); - tdev->send(tdev, m); -} - -void -cxgb_redirect(struct rtentry *old, struct rtentry *new, struct sockaddr *sa) -{ - struct ifnet *olddev, *newdev; - struct tid_info *ti; - struct t3cdev *tdev; - u32 tid; - int update_tcb; - struct l2t_entry *e; - struct toe_tid_entry *te; - - olddev = old->rt_ifp; - newdev = new->rt_ifp; - if (!is_offloading(olddev)) - return; - if (!is_offloading(newdev)) { - log(LOG_WARNING, "%s: Redirect to non-offload" - "device ignored.\n", __FUNCTION__); - return; - } - tdev = T3CDEV(olddev); - PANIC_IF(!tdev); - if (tdev != T3CDEV(newdev)) { - log(LOG_WARNING, "%s: Redirect to different " - "offload device ignored.\n", __FUNCTION__); - return; - } - - /* Add new L2T entry */ - e = t3_l2t_get(tdev, new, new->rt_ifp, sa); - if (!e) { - log(LOG_ERR, "%s: couldn't allocate new l2t entry!\n", - __FUNCTION__); - return; - } - - /* Walk tid table and notify clients of dst change. */ - ti = &(T3C_DATA (tdev))->tid_maps; - for (tid=0; tid < ti->ntids; tid++) { - te = lookup_tid(ti, tid); - PANIC_IF(!te); - if (te->ctx && te->client && te->client->redirect) { - update_tcb = te->client->redirect(te->ctx, old, new, - e); - if (update_tcb) { - l2t_hold(L2DATA(tdev), e); - set_l2t_ix(tdev, tid, e); - } - } - } - l2t_release(L2DATA(tdev), e); + if (opcode < NUM_CPL_CMDS) + cpl_handlers[opcode] = h ? h : do_bad_cpl; + else + log(LOG_ERR, "T3C: handler registration for " + "opcode %x failed\n", opcode); } /* @@ -1177,196 +388,6 @@ cxgb_free_mem(void *addr) free(addr, M_CXGB); } -/* - * Allocate and initialize the TID tables. Returns 0 on success. - */ -static int -init_tid_tabs(struct tid_info *t, unsigned int ntids, - unsigned int natids, unsigned int nstids, - unsigned int atid_base, unsigned int stid_base) -{ - unsigned long size = ntids * sizeof(*t->tid_tab) + - natids * sizeof(*t->atid_tab) + nstids * sizeof(*t->stid_tab); - - t->tid_tab = cxgb_alloc_mem(size); - if (!t->tid_tab) - return (ENOMEM); - - t->stid_tab = (union listen_entry *)&t->tid_tab[ntids]; - t->atid_tab = (union active_open_entry *)&t->stid_tab[nstids]; - t->ntids = ntids; - t->nstids = nstids; - t->stid_base = stid_base; - t->sfree = NULL; - t->natids = natids; - t->atid_base = atid_base; - t->afree = NULL; - t->stids_in_use = t->atids_in_use = 0; - atomic_set_int(&t->tids_in_use, 0); - mtx_init(&t->stid_lock, "stid", NULL, MTX_DUPOK|MTX_DEF); - mtx_init(&t->atid_lock, "atid", NULL, MTX_DUPOK|MTX_DEF); - - /* - * Setup the free lists for stid_tab and atid_tab. - */ - if (nstids) { - while (--nstids) - t->stid_tab[nstids - 1].next = &t->stid_tab[nstids]; - t->sfree = t->stid_tab; - } - if (natids) { - while (--natids) - t->atid_tab[natids - 1].next = &t->atid_tab[natids]; - t->afree = t->atid_tab; - } - return 0; -} - -static void -free_tid_maps(struct tid_info *t) -{ - mtx_destroy(&t->stid_lock); - mtx_destroy(&t->atid_lock); - cxgb_free_mem(t->tid_tab); -} - -static inline void -add_adapter(adapter_t *adap) -{ - rw_wlock(&adapter_list_lock); - TAILQ_INSERT_TAIL(&adapter_list, adap, adapter_entry); - rw_wunlock(&adapter_list_lock); -} - -static inline void -remove_adapter(adapter_t *adap) -{ - rw_wlock(&adapter_list_lock); - TAILQ_REMOVE(&adapter_list, adap, adapter_entry); - rw_wunlock(&adapter_list_lock); -} - -int -cxgb_offload_activate(struct adapter *adapter) -{ - struct t3cdev *dev = &adapter->tdev; - int natids, err; - struct t3c_data *t; - struct tid_range stid_range, tid_range; - struct mtutab mtutab; - unsigned int l2t_capacity; - - t = malloc(sizeof(*t), M_CXGB, M_WAITOK|M_ZERO); - if (!t) - return (ENOMEM); - - dev->adapter = adapter; - - err = (EOPNOTSUPP); - if (dev->ctl(dev, GET_TX_MAX_CHUNK, &t->tx_max_chunk) < 0 || - dev->ctl(dev, GET_MAX_OUTSTANDING_WR, &t->max_wrs) < 0 || - dev->ctl(dev, GET_L2T_CAPACITY, &l2t_capacity) < 0 || - dev->ctl(dev, GET_MTUS, &mtutab) < 0 || - dev->ctl(dev, GET_TID_RANGE, &tid_range) < 0 || - dev->ctl(dev, GET_STID_RANGE, &stid_range) < 0) { - device_printf(adapter->dev, "%s: dev->ctl check failed\n", __FUNCTION__); - goto out_free; - } - - err = (ENOMEM); - L2DATA(dev) = t3_init_l2t(l2t_capacity); - if (!L2DATA(dev)) { - device_printf(adapter->dev, "%s: t3_init_l2t failed\n", __FUNCTION__); - goto out_free; - } - natids = min(tid_range.num / 2, MAX_ATIDS); - err = init_tid_tabs(&t->tid_maps, tid_range.num, natids, - stid_range.num, ATID_BASE, stid_range.base); - if (err) { - device_printf(adapter->dev, "%s: init_tid_tabs failed\n", __FUNCTION__); - goto out_free_l2t; - } - - t->mtus = mtutab.mtus; - t->nmtus = mtutab.size; - - TASK_INIT(&t->tid_release_task, 0 /* XXX? */, t3_process_tid_release_list, dev); - mtx_init(&t->tid_release_lock, "tid release", NULL, MTX_DUPOK|MTX_DEF); - t->dev = dev; - - T3C_DATA (dev) = t; - dev->recv = process_rx; - dev->arp_update = t3_l2t_update; -#if 0 - offload_proc_dev_setup(dev); -#endif - /* Register netevent handler once */ - if (TAILQ_EMPTY(&adapter_list)) { -#if defined(CONFIG_CHELSIO_T3_MODULE) - if (prepare_arp_with_t3core()) - log(LOG_ERR, "Unable to set offload capabilities\n"); -#endif - } - CTR1(KTR_CXGB, "adding adapter %p", adapter); - add_adapter(adapter); - device_printf(adapter->dev, "offload started\n"); - adapter->flags |= CXGB_OFLD_INIT; -#if 0 - printf("failing as test\n"); - return (ENOMEM); -#endif - return (0); - -out_free_l2t: - t3_free_l2t(L2DATA(dev)); - L2DATA(dev) = NULL; -out_free: - free(t, M_CXGB); - return (err); - -} - -void -cxgb_offload_deactivate(struct adapter *adapter) -{ - struct t3cdev *tdev = &adapter->tdev; - struct t3c_data *t = T3C_DATA(tdev); - - printf("removing adapter %p\n", adapter); - remove_adapter(adapter); - if (TAILQ_EMPTY(&adapter_list)) { -#if defined(CONFIG_CHELSIO_T3_MODULE) - restore_arp_sans_t3core(); -#endif - } - free_tid_maps(&t->tid_maps); - T3C_DATA(tdev) = NULL; - t3_free_l2t(L2DATA(tdev)); - L2DATA(tdev) = NULL; - mtx_destroy(&t->tid_release_lock); - free(t, M_CXGB); -} - - -static inline void -register_tdev(struct t3cdev *tdev) -{ - static int unit; - - mtx_lock(&cxgb_db_lock); - snprintf(tdev->name, sizeof(tdev->name), "ofld_dev%d", unit++); - TAILQ_INSERT_TAIL(&ofld_dev_list, tdev, entry); - mtx_unlock(&cxgb_db_lock); -} - -static inline void -unregister_tdev(struct t3cdev *tdev) -{ - mtx_lock(&cxgb_db_lock); - TAILQ_REMOVE(&ofld_dev_list, tdev, entry); - mtx_unlock(&cxgb_db_lock); -} - static __inline int adap2type(struct adapter *adapter) { @@ -1385,36 +406,28 @@ adap2type(struct adapter *adapter) break; } return type; -} - +} + void cxgb_adapter_ofld(struct adapter *adapter) { struct t3cdev *tdev = &adapter->tdev; cxgb_set_dummy_ops(tdev); - tdev->send = t3_offload_tx; - tdev->ctl = cxgb_offload_ctl; tdev->type = adap2type(adapter); - - register_tdev(tdev); -#if 0 - offload_proc_dev_init(tdev); -#endif + tdev->adapter = adapter; + register_tdev(tdev); + } void cxgb_adapter_unofld(struct adapter *adapter) { struct t3cdev *tdev = &adapter->tdev; -#if 0 - offload_proc_dev_cleanup(tdev); - offload_proc_dev_exit(tdev); -#endif + tdev->recv = NULL; tdev->arp_update = NULL; - - unregister_tdev(tdev); + unregister_tdev(tdev); } void @@ -1426,10 +439,9 @@ cxgb_offload_init(void) return; mtx_init(&cxgb_db_lock, "ofld db", NULL, MTX_DEF); - rw_init(&adapter_list_lock, "ofld adap list"); + TAILQ_INIT(&client_list); TAILQ_INIT(&ofld_dev_list); - TAILQ_INIT(&adapter_list); for (i = 0; i < 0x100; ++i) cpl_handlers[i] = do_bad_cpl; @@ -1437,40 +449,10 @@ cxgb_offload_init(void) t3_register_cpl_handler(CPL_SMT_WRITE_RPL, do_smt_write_rpl); t3_register_cpl_handler(CPL_RTE_WRITE_RPL, do_rte_write_rpl); t3_register_cpl_handler(CPL_L2T_WRITE_RPL, do_l2t_write_rpl); - t3_register_cpl_handler(CPL_PASS_OPEN_RPL, do_stid_rpl); - t3_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_stid_rpl); - t3_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_cr); - t3_register_cpl_handler(CPL_PASS_ESTABLISH, do_hwtid_rpl); - t3_register_cpl_handler(CPL_ABORT_RPL_RSS, do_hwtid_rpl); - t3_register_cpl_handler(CPL_ABORT_RPL, do_hwtid_rpl); - t3_register_cpl_handler(CPL_RX_URG_NOTIFY, do_hwtid_rpl); - t3_register_cpl_handler(CPL_RX_DATA, do_hwtid_rpl); - t3_register_cpl_handler(CPL_TX_DATA_ACK, do_hwtid_rpl); - t3_register_cpl_handler(CPL_TX_DMA_ACK, do_hwtid_rpl); - t3_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl); - t3_register_cpl_handler(CPL_PEER_CLOSE, do_hwtid_rpl); - t3_register_cpl_handler(CPL_CLOSE_CON_RPL, do_hwtid_rpl); - t3_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req_rss); - t3_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish); + t3_register_cpl_handler(CPL_SET_TCB_RPL, do_set_tcb_rpl); - t3_register_cpl_handler(CPL_RDMA_TERMINATE, do_term); - t3_register_cpl_handler(CPL_RDMA_EC_STATUS, do_hwtid_rpl); t3_register_cpl_handler(CPL_TRACE_PKT, do_trace); - t3_register_cpl_handler(CPL_RX_DATA_DDP, do_hwtid_rpl); - t3_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_hwtid_rpl); - t3_register_cpl_handler(CPL_ISCSI_HDR, do_hwtid_rpl); - t3_register_cpl_handler(CPL_GET_TCB_RPL, do_hwtid_rpl); - t3_register_cpl_handler(CPL_SET_TCB_RPL, do_hwtid_rpl); - EVENTHANDLER_REGISTER(route_arp_update_event, cxgb_arp_update_event, - NULL, EVENTHANDLER_PRI_ANY); - EVENTHANDLER_REGISTER(route_redirect_event, cxgb_redirect_event, - NULL, EVENTHANDLER_PRI_ANY); - -#if 0 - if (offload_proc_init()) - log(LOG_WARNING, "Unable to create /proc/net/cxgb3 dir\n"); -#endif } void @@ -1481,10 +463,6 @@ cxgb_offload_exit(void) return; mtx_destroy(&cxgb_db_lock); - rw_destroy(&adapter_list_lock); -#if 0 - offload_proc_cleanup(); -#endif } MODULE_VERSION(if_cxgb, 1); diff --git a/sys/dev/cxgb/cxgb_offload.h b/sys/dev/cxgb/cxgb_offload.h index 727382583988..b8e0999585b2 100644 --- a/sys/dev/cxgb/cxgb_offload.h +++ b/sys/dev/cxgb/cxgb_offload.h @@ -36,13 +36,13 @@ $FreeBSD$ #ifdef CONFIG_DEFINED #include #include -#include +#include #include #include #else #include #include -#include +#include #include #include #endif diff --git a/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c b/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c index 44a6c98d12d5..e1d78f8e0585 100644 --- a/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c +++ b/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c @@ -1,6 +1,6 @@ /************************************************************************** -Copyright (c) 2007, Chelsio Inc. +Copyright (c) 2007-2008, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include @@ -72,7 +71,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include @@ -85,6 +83,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include + /* * For ULP connections HW may add headers, e.g., for digests, that aren't part * of the messages sent by the host but that are part of the TCP payload and @@ -130,12 +130,16 @@ static unsigned int mbuf_wrs[TX_MAX_SEGS + 1] __read_mostly; * coalescing, handle jumbo frames, and not trigger sender SWS avoidance. */ #define MIN_RCV_WND (24 * 1024U) -#define SO_TOS(so) ((sotoinpcb(so)->inp_ip_tos >> 2) & M_TOS) +#define INP_TOS(inp) ((inp_ip_tos_get(inp) >> 2) & M_TOS) #define VALIDATE_SEQ 0 #define VALIDATE_SOCK(so) #define DEBUG_WR 0 +#define TCP_TIMEWAIT 1 +#define TCP_CLOSE 2 +#define TCP_DROP 3 + extern int tcp_do_autorcvbuf; extern int tcp_do_autosndbuf; extern int tcp_autorcvbuf_max; @@ -149,7 +153,7 @@ static void handle_syncache_event(int event, void *arg); static inline void SBAPPEND(struct sockbuf *sb, struct mbuf *n) { - struct mbuf * m; + struct mbuf *m; m = sb->sb_mb; while (m) { @@ -171,6 +175,7 @@ SBAPPEND(struct sockbuf *sb, struct mbuf *n) } sbappend_locked(sb, n); m = sb->sb_mb; + while (m) { KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", m->m_next, m->m_nextpkt, m->m_flags)); @@ -208,6 +213,7 @@ rtalloc2(struct sockaddr *dst, int report, u_long ignflags) return (rt); } #endif + /* * Determine whether to send a CPL message now or defer it. A message is * deferred if the connection is in SYN_SENT since we don't know the TID yet. @@ -255,12 +261,14 @@ mk_tid_release(struct mbuf *m, const struct toepcb *toep, unsigned int tid) static inline void make_tx_data_wr(struct socket *so, struct mbuf *m, int len, struct mbuf *tail) { - struct tcpcb *tp = sototcpcb(so); + struct tcpcb *tp = so_sototcpcb(so); struct toepcb *toep = tp->t_toe; struct tx_data_wr *req; - + struct sockbuf *snd; + inp_lock_assert(tp->t_inpcb); - + snd = so_sockbuf_snd(so); + req = mtod(m, struct tx_data_wr *); m->m_len = sizeof(*req); req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); @@ -280,10 +288,12 @@ make_tx_data_wr(struct socket *so, struct mbuf *m, int len, struct mbuf *tail) /* Sendbuffer is in units of 32KB. */ - if (tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) + if (tcp_do_autosndbuf && snd->sb_flags & SB_AUTOSIZE) req->param |= htonl(V_TX_SNDBUF(tcp_autosndbuf_max >> 15)); - else - req->param |= htonl(V_TX_SNDBUF(so->so_snd.sb_hiwat >> 15)); + else { + req->param |= htonl(V_TX_SNDBUF(snd->sb_hiwat >> 15)); + } + toep->tp_flags |= TP_DATASENT; } } @@ -293,44 +303,52 @@ make_tx_data_wr(struct socket *so, struct mbuf *m, int len, struct mbuf *tail) int t3_push_frames(struct socket *so, int req_completion) { - struct tcpcb *tp = sototcpcb(so); + struct tcpcb *tp = so_sototcpcb(so); struct toepcb *toep = tp->t_toe; struct mbuf *tail, *m0, *last; struct t3cdev *cdev; struct tom_data *d; - int i, bytes, count, total_bytes; + int state, bytes, count, total_bytes; bus_dma_segment_t segs[TX_MAX_SEGS], *segp; - + struct sockbuf *snd; + if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) { DPRINTF("tcp state=%d\n", tp->t_state); return (0); } - if (so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) { + state = so_state_get(so); + + if (state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) { DPRINTF("disconnecting\n"); return (0); } - inp_lock_assert(tp->t_inpcb); - SOCKBUF_LOCK(&so->so_snd); - d = TOM_DATA(TOE_DEV(so)); + + snd = so_sockbuf_snd(so); + sockbuf_lock(snd); + + d = TOM_DATA(toep->tp_toedev); cdev = d->cdev; - last = tail = so->so_snd.sb_sndptr ? so->so_snd.sb_sndptr : so->so_snd.sb_mb; + + last = tail = snd->sb_sndptr ? snd->sb_sndptr : snd->sb_mb; + total_bytes = 0; DPRINTF("wr_avail=%d tail=%p snd.cc=%d tp_last=%p\n", - toep->tp_wr_avail, tail, so->so_snd.sb_cc, toep->tp_m_last); + toep->tp_wr_avail, tail, snd->sb_cc, toep->tp_m_last); - if (last && toep->tp_m_last == last && so->so_snd.sb_sndptroff != 0) { + if (last && toep->tp_m_last == last && snd->sb_sndptroff != 0) { KASSERT(tail, ("sbdrop error")); last = tail = tail->m_next; } if ((toep->tp_wr_avail == 0 ) || (tail == NULL)) { DPRINTF("wr_avail=%d tail=%p\n", toep->tp_wr_avail, tail); - SOCKBUF_UNLOCK(&so->so_snd); + sockbuf_unlock(snd); + return (0); } @@ -339,7 +357,7 @@ t3_push_frames(struct socket *so, int req_completion) count = bytes = 0; segp = segs; if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) { - SOCKBUF_UNLOCK(&so->so_snd); + sockbuf_unlock(snd); return (0); } /* @@ -384,19 +402,19 @@ t3_push_frames(struct socket *so, int req_completion) m_set_priority(m0, mkprio(CPL_PRIORITY_DATA, toep)); if (tail) { - so->so_snd.sb_sndptr = tail; + snd->sb_sndptr = tail; toep->tp_m_last = NULL; } else - toep->tp_m_last = so->so_snd.sb_sndptr = last; + toep->tp_m_last = snd->sb_sndptr = last; DPRINTF("toep->tp_m_last=%p\n", toep->tp_m_last); - so->so_snd.sb_sndptroff += bytes; + snd->sb_sndptroff += bytes; total_bytes += bytes; toep->tp_write_seq += bytes; CTR6(KTR_TOM, "t3_push_frames: wr_avail=%d mbuf_wrs[%d]=%d tail=%p sndptr=%p sndptroff=%d", - toep->tp_wr_avail, count, mbuf_wrs[count], tail, so->so_snd.sb_sndptr, so->so_snd.sb_sndptroff); + toep->tp_wr_avail, count, mbuf_wrs[count], tail, snd->sb_sndptr, snd->sb_sndptroff); if (tail) CTR4(KTR_TOM, "t3_push_frames: total_bytes=%d tp_m_last=%p tailbuf=%p snd_una=0x%08x", total_bytes, toep->tp_m_last, tail->m_data, tp->snd_una); @@ -405,6 +423,10 @@ t3_push_frames(struct socket *so, int req_completion) total_bytes, toep->tp_m_last, tp->snd_una); +#ifdef KTR +{ + int i; + i = 0; while (i < count && m_get_sgllen(m0)) { if ((count - i) >= 3) { @@ -425,7 +447,8 @@ t3_push_frames(struct socket *so, int req_completion) } } - +} +#endif /* * remember credits used */ @@ -450,7 +473,7 @@ t3_push_frames(struct socket *so, int req_completion) bytes, count); l2t_send(cdev, m0, toep->tp_l2t); } - SOCKBUF_UNLOCK(&so->so_snd); + sockbuf_unlock(snd); return (total_bytes); } @@ -466,14 +489,14 @@ close_conn(struct socket *so) struct mbuf *m; struct cpl_close_con_req *req; struct tom_data *d; - struct inpcb *inp = sotoinpcb(so); + struct inpcb *inp = so_sotoinpcb(so); struct tcpcb *tp; struct toepcb *toep; unsigned int tid; inp_wlock(inp); - tp = sototcpcb(so); + tp = so_sototcpcb(so); toep = tp->t_toe; if (tp->t_state != TCPS_SYN_SENT) @@ -489,6 +512,9 @@ close_conn(struct socket *so) d = TOM_DATA(toep->tp_toedev); m = m_gethdr_nofail(sizeof(*req)); + m_set_priority(m, CPL_PRIORITY_DATA); + m_set_sgl(m, NULL); + m_set_sgllen(m, 0); toep->tp_flags |= TP_FIN_SENT; req = mtod(m, struct cpl_close_con_req *); @@ -496,12 +522,13 @@ close_conn(struct socket *so) req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); req->wr.wr_lo = htonl(V_WR_TID(tid)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); - req->rsvd = htonl(toep->tp_write_seq); + req->rsvd = 0; inp_wunlock(inp); /* * XXX - need to defer shutdown while there is still data in the queue * */ + CTR4(KTR_TOM, "%s CLOSE_CON_REQ so %p tp %p tid=%u", __FUNCTION__, so, tp, tid); cxgb_ofld_send(d->cdev, m); } @@ -580,7 +607,7 @@ static void handle_urg_ptr(struct socket *so, uint32_t urg_seq) { #ifdef URGENT_DATA_SUPPORTED - struct tcpcb *tp = sototcpcb(so); + struct tcpcb *tp = so_sototcpcb(so); urg_seq--; /* initially points past the urgent data, per BSD */ @@ -606,7 +633,7 @@ handle_urg_ptr(struct socket *so, uint32_t urg_seq) static inline int so_no_receive(const struct socket *so) { - return (so->so_state & (SS_ISDISCONNECTED|SS_ISDISCONNECTING)); + return (so_state_get(so) & (SS_ISDISCONNECTED|SS_ISDISCONNECTING)); } /* @@ -616,7 +643,7 @@ static void rx_urg_notify(struct toepcb *toep, struct mbuf *m) { struct cpl_rx_urg_notify *hdr = cplhdr(m); - struct socket *so = toeptoso(toep); + struct socket *so = inp_inpcbtosocket(toep->tp_tp->t_inpcb); VALIDATE_SOCK(so); @@ -663,39 +690,43 @@ t3_cleanup_rbuf(struct tcpcb *tp, int copied) struct toedev *dev; int dack_mode, must_send, read; u32 thres, credits, dack = 0; + struct sockbuf *rcv; + + so = inp_inpcbtosocket(tp->t_inpcb); + rcv = so_sockbuf_rcv(so); - so = tp->t_inpcb->inp_socket; if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) || (tp->t_state == TCPS_FIN_WAIT_2))) { if (copied) { - SOCKBUF_LOCK(&so->so_rcv); + sockbuf_lock(rcv); toep->tp_copied_seq += copied; - SOCKBUF_UNLOCK(&so->so_rcv); + sockbuf_unlock(rcv); } return; } - inp_lock_assert(tp->t_inpcb); - SOCKBUF_LOCK(&so->so_rcv); + inp_lock_assert(tp->t_inpcb); + + sockbuf_lock(rcv); if (copied) toep->tp_copied_seq += copied; else { - read = toep->tp_enqueued_bytes - so->so_rcv.sb_cc; + read = toep->tp_enqueued_bytes - rcv->sb_cc; toep->tp_copied_seq += read; } credits = toep->tp_copied_seq - toep->tp_rcv_wup; - toep->tp_enqueued_bytes = so->so_rcv.sb_cc; - SOCKBUF_UNLOCK(&so->so_rcv); + toep->tp_enqueued_bytes = rcv->sb_cc; + sockbuf_unlock(rcv); - if (credits > so->so_rcv.sb_mbmax) { - printf("copied_seq=%u rcv_wup=%u credits=%u\n", - toep->tp_copied_seq, toep->tp_rcv_wup, credits); - credits = so->so_rcv.sb_mbmax; + if (credits > rcv->sb_mbmax) { + log(LOG_ERR, "copied_seq=%u rcv_wup=%u credits=%u\n", + toep->tp_copied_seq, toep->tp_rcv_wup, credits); + credits = rcv->sb_mbmax; } - /* + /* * XXX this won't accurately reflect credit return - we need * to look at the difference between the amount that has been * put in the recv sockbuf and what is there now @@ -739,7 +770,7 @@ cxgb_toe_disconnect(struct tcpcb *tp) DPRINTF("cxgb_toe_disconnect\n"); - so = tp->t_inpcb->inp_socket; + so = inp_inpcbtosocket(tp->t_inpcb); close_conn(so); return (0); } @@ -768,7 +799,7 @@ cxgb_toe_send(struct tcpcb *tp) DPRINTF("cxgb_toe_send\n"); dump_toepcb(tp->t_toe); - so = tp->t_inpcb->inp_socket; + so = inp_inpcbtosocket(tp->t_inpcb); t3_push_frames(so, 1); return (0); } @@ -778,6 +809,7 @@ cxgb_toe_rcvd(struct tcpcb *tp) { inp_lock_assert(tp->t_inpcb); + t3_cleanup_rbuf(tp, 0); return (0); @@ -840,11 +872,10 @@ __set_tcb_field(struct toepcb *toep, struct mbuf *m, uint16_t word, } static void -t3_set_tcb_field(struct socket *so, uint16_t word, uint64_t mask, uint64_t val) +t3_set_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask, uint64_t val) { struct mbuf *m; - struct tcpcb *tp = sototcpcb(so); - struct toepcb *toep = tp->t_toe; + struct tcpcb *tp = toep->tp_tp; if (toep == NULL) return; @@ -863,51 +894,56 @@ t3_set_tcb_field(struct socket *so, uint16_t word, uint64_t mask, uint64_t val) * Set one of the t_flags bits in the TCB. */ static void -set_tcb_tflag(struct socket *so, unsigned int bit_pos, int val) +set_tcb_tflag(struct toepcb *toep, unsigned int bit_pos, int val) { - t3_set_tcb_field(so, W_TCB_T_FLAGS1, 1ULL << bit_pos, val << bit_pos); + + t3_set_tcb_field(toep, W_TCB_T_FLAGS1, 1ULL << bit_pos, val << bit_pos); } /* * Send a SET_TCB_FIELD CPL message to change a connection's Nagle setting. */ static void -t3_set_nagle(struct socket *so) +t3_set_nagle(struct toepcb *toep) { - struct tcpcb *tp = sototcpcb(so); + struct tcpcb *tp = toep->tp_tp; - set_tcb_tflag(so, S_TF_NAGLE, !(tp->t_flags & TF_NODELAY)); + set_tcb_tflag(toep, S_TF_NAGLE, !(tp->t_flags & TF_NODELAY)); } /* * Send a SET_TCB_FIELD CPL message to change a connection's keepalive setting. */ void -t3_set_keepalive(struct socket *so, int on_off) +t3_set_keepalive(struct toepcb *toep, int on_off) { - set_tcb_tflag(so, S_TF_KEEPALIVE, on_off); + + set_tcb_tflag(toep, S_TF_KEEPALIVE, on_off); } void -t3_set_rcv_coalesce_enable(struct socket *so, int on_off) +t3_set_rcv_coalesce_enable(struct toepcb *toep, int on_off) { - set_tcb_tflag(so, S_TF_RCV_COALESCE_ENABLE, on_off); + set_tcb_tflag(toep, S_TF_RCV_COALESCE_ENABLE, on_off); } void -t3_set_dack_mss(struct socket *so, int on_off) +t3_set_dack_mss(struct toepcb *toep, int on_off) { - set_tcb_tflag(so, S_TF_DACK_MSS, on_off); + + set_tcb_tflag(toep, S_TF_DACK_MSS, on_off); } /* * Send a SET_TCB_FIELD CPL message to change a connection's TOS setting. */ static void -t3_set_tos(struct socket *so) +t3_set_tos(struct toepcb *toep) { - t3_set_tcb_field(so, W_TCB_TOS, V_TCB_TOS(M_TCB_TOS), - V_TCB_TOS(SO_TOS(so))); + int tos = inp_ip_tos_get(toep->tp_tp->t_inpcb); + + t3_set_tcb_field(toep, W_TCB_TOS, V_TCB_TOS(M_TCB_TOS), + V_TCB_TOS(tos)); } @@ -928,14 +964,14 @@ t3_set_tos(struct socket *so) 32)) static void -t3_enable_ddp(struct socket *so, int on) +t3_enable_ddp(struct toepcb *toep, int on) { if (on) { - t3_set_tcb_field(so, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1), + t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1), V_TF_DDP_OFF(0)); } else - t3_set_tcb_field(so, W_TCB_RX_DDP_FLAGS, + t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1) | TP_DDP_TIMER_WORKAROUND_MASK, V_TF_DDP_OFF(1) | @@ -944,25 +980,25 @@ t3_enable_ddp(struct socket *so, int on) } void -t3_set_ddp_tag(struct socket *so, int buf_idx, unsigned int tag_color) +t3_set_ddp_tag(struct toepcb *toep, int buf_idx, unsigned int tag_color) { - t3_set_tcb_field(so, W_TCB_RX_DDP_BUF0_TAG + buf_idx, + t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_TAG + buf_idx, V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG), tag_color); } void -t3_set_ddp_buf(struct socket *so, int buf_idx, unsigned int offset, +t3_set_ddp_buf(struct toepcb *toep, int buf_idx, unsigned int offset, unsigned int len) { if (buf_idx == 0) - t3_set_tcb_field(so, W_TCB_RX_DDP_BUF0_OFFSET, + t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_OFFSET, V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset) | V_TCB_RX_DDP_BUF0_LEN((uint64_t)len)); else - t3_set_tcb_field(so, W_TCB_RX_DDP_BUF1_OFFSET, + t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF1_OFFSET, V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN << 32), V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset) | @@ -986,11 +1022,10 @@ t3_set_cong_control(struct socket *so, const char *name) } int -t3_get_tcb(struct socket *so) +t3_get_tcb(struct toepcb *toep) { struct cpl_get_tcb *req; - struct tcpcb *tp = sototcpcb(so); - struct toepcb *toep = tp->t_toe; + struct tcpcb *tp = toep->tp_tp; struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA); if (!m) @@ -1005,17 +1040,17 @@ t3_get_tcb(struct socket *so) OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, toep->tp_tid)); req->cpuno = htons(toep->tp_qset); req->rsvd = 0; - if (sototcpcb(so)->t_state == TCPS_SYN_SENT) + if (tp->t_state == TCPS_SYN_SENT) mbufq_tail(&toep->out_of_order_queue, m); // defer else - cxgb_ofld_send(T3C_DEV(so), m); + cxgb_ofld_send(TOEP_T3C_DEV(toep), m); return 0; } static inline void -so_insert_tid(struct tom_data *d, struct socket *so, unsigned int tid) +so_insert_tid(struct tom_data *d, struct toepcb *toep, unsigned int tid) { - struct toepcb *toep = sototoep(so); + toepcb_hold(toep); cxgb_insert_tid(d->cdev, d->client, toep, tid); @@ -1045,7 +1080,7 @@ select_mss(struct t3c_data *td, struct tcpcb *tp, unsigned int pmtu) unsigned int idx; #ifdef notyet - struct rtentry *dst = sotoinpcb(so)->inp_route.ro_rt; + struct rtentry *dst = so_sotoinpcb(so)->inp_route.ro_rt; #endif if (tp) { tp->t_maxseg = pmtu - 40; @@ -1078,7 +1113,11 @@ t3_release_offload_resources(struct toepcb *toep) struct tcpcb *tp = toep->tp_tp; struct toedev *tdev = toep->tp_toedev; struct t3cdev *cdev; + struct socket *so; unsigned int tid = toep->tp_tid; + struct sockbuf *rcv; + + CTR0(KTR_TOM, "t3_release_offload_resources"); if (!tdev) return; @@ -1107,8 +1146,21 @@ t3_release_offload_resources(struct toepcb *toep) toep->tp_tp = NULL; if (tp) { inp_lock_assert(tp->t_inpcb); + so = inp_inpcbtosocket(tp->t_inpcb); + rcv = so_sockbuf_rcv(so); + /* + * cancel any offloaded reads + * + */ + sockbuf_lock(rcv); tp->t_toe = NULL; tp->t_flags &= ~TF_TOE; + if (toep->tp_ddp_state.user_ddp_pending) { + t3_cancel_ubuf(toep, rcv); + toep->tp_ddp_state.user_ddp_pending = 0; + } + so_sorwakeup_locked(so); + } if (toep->tp_state == TCPS_SYN_SENT) { @@ -1128,7 +1180,7 @@ t3_release_offload_resources(struct toepcb *toep) static void install_offload_ops(struct socket *so) { - struct tcpcb *tp = sototcpcb(so); + struct tcpcb *tp = so_sototcpcb(so); KASSERT(tp->t_toe != NULL, ("toepcb not set")); @@ -1164,11 +1216,14 @@ select_rcv_wnd(struct toedev *dev, struct socket *so) struct tom_data *d = TOM_DATA(dev); unsigned int wnd; unsigned int max_rcv_wnd; + struct sockbuf *rcv; + rcv = so_sockbuf_rcv(so); + if (tcp_do_autorcvbuf) wnd = tcp_autorcvbuf_max; else - wnd = so->so_rcv.sb_hiwat; + wnd = rcv->sb_hiwat; @@ -1195,18 +1250,24 @@ static inline void init_offload_socket(struct socket *so, struct toedev *dev, unsigned int tid, struct l2t_entry *e, struct rtentry *dst, struct toepcb *toep) { - struct tcpcb *tp = sototcpcb(so); + struct tcpcb *tp = so_sototcpcb(so); struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev); - - SOCK_LOCK_ASSERT(so); + struct sockbuf *snd, *rcv; - printf("initializing offload socket\n"); +#ifdef notyet + SOCK_LOCK_ASSERT(so); +#endif + + snd = so_sockbuf_snd(so); + rcv = so_sockbuf_rcv(so); + + log(LOG_INFO, "initializing offload socket\n"); /* * We either need to fix push frames to work with sbcompress * or we need to add this */ - so->so_snd.sb_flags |= SB_NOCOALESCE; - so->so_rcv.sb_flags |= SB_NOCOALESCE; + snd->sb_flags |= SB_NOCOALESCE; + rcv->sb_flags |= SB_NOCOALESCE; tp->t_toe = toep; toep->tp_tp = tp; @@ -1225,7 +1286,7 @@ init_offload_socket(struct socket *so, struct toedev *dev, unsigned int tid, */ tp->rcv_wnd = select_rcv_wnd(dev, so); - toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so->so_options & SO_NO_DDP) && + toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so_options_get(so) & SO_NO_DDP) && tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; toep->tp_qset_idx = 0; @@ -1239,24 +1300,24 @@ init_offload_socket(struct socket *so, struct toedev *dev, unsigned int tid, static inline unsigned int calc_opt0h(struct socket *so, int mtu_idx) { - struct tcpcb *tp = sototcpcb(so); + struct tcpcb *tp = so_sototcpcb(so); int wscale = select_rcv_wscale(tp->rcv_wnd); - + return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) | - V_KEEP_ALIVE((so->so_options & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS | + V_KEEP_ALIVE((so_options_get(so) & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS | V_WND_SCALE(wscale) | V_MSS_IDX(mtu_idx); } static inline unsigned int calc_opt0l(struct socket *so, int ulp_mode) { - struct tcpcb *tp = sototcpcb(so); + struct tcpcb *tp = so_sototcpcb(so); unsigned int val; - val = V_TOS(SO_TOS(so)) | V_ULP_MODE(ulp_mode) | + val = V_TOS(INP_TOS(tp->t_inpcb)) | V_ULP_MODE(ulp_mode) | V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ)); - DPRINTF("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", SO_TOS(so), tp->rcv_wnd, val); + DPRINTF("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", INP_TOS(tp->t_inpcb), tp->rcv_wnd, val); return (val); } @@ -1293,10 +1354,10 @@ mk_act_open_req(struct socket *so, struct mbuf *m, unsigned int atid, const struct l2t_entry *e) { struct cpl_act_open_req *req; - struct inpcb *inp = sotoinpcb(so); - struct tcpcb *tp = intotcpcb(inp); + struct inpcb *inp = so_sotoinpcb(so); + struct tcpcb *tp = inp_inpcbtotcpcb(inp); struct toepcb *toep = tp->t_toe; - struct toedev *tdev = TOE_DEV(so); + struct toedev *tdev = toep->tp_toedev; m_set_priority((struct mbuf *)m, mkprio(CPL_PRIORITY_SETUP, toep)); @@ -1306,10 +1367,13 @@ mk_act_open_req(struct socket *so, struct mbuf *m, req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); req->wr.wr_lo = 0; OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid)); + inp_4tuple_get(inp, &req->local_ip, &req->local_port, &req->peer_ip, &req->peer_port); +#if 0 req->local_port = inp->inp_lport; req->peer_port = inp->inp_fport; memcpy(&req->local_ip, &inp->inp_laddr, 4); memcpy(&req->peer_ip, &inp->inp_faddr, 4); +#endif req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx)); req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode)); @@ -1348,8 +1412,8 @@ fail_act_open(struct toepcb *toep, int errno) t3_release_offload_resources(toep); if (tp) { - inp_lock_assert(tp->t_inpcb); - tcp_drop(tp, errno); + inp_wunlock(tp->t_inpcb); + tcp_offload_drop(tp, errno); } #ifdef notyet @@ -1370,7 +1434,6 @@ active_open_failed(struct toepcb *toep, struct mbuf *m) goto done; inp = toep->tp_tp->t_inpcb; - inp_wlock(inp); /* * Don't handle connection retry for now @@ -1384,10 +1447,13 @@ active_open_failed(struct toepcb *toep, struct mbuf *m) sk_reset_timer(so, &icsk->icsk_retransmit_timer, jiffies + HZ / 2); } else -#endif +#endif + { + inp_wlock(inp); fail_act_open(toep, act_open_rpl_status_to_errno(rpl->status)); - inp_wunlock(inp); -done: + } + + done: m_free(m); } @@ -1432,7 +1498,7 @@ act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m) struct toepcb *toep = m_get_toep(m); struct tcpcb *tp = toep->tp_tp; struct inpcb *inp = tp->t_inpcb; - struct socket *so = toeptoso(toep); + struct socket *so; inp_wlock(inp); if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) { @@ -1440,8 +1506,8 @@ act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m) printf("freeing %p\n", m); m_free(m); - } - inp_wunlock(inp); + } else + inp_wunlock(inp); } #endif /* @@ -1454,7 +1520,7 @@ t3_connect(struct toedev *tdev, struct socket *so, struct mbuf *m; struct l2t_entry *e; struct tom_data *d = TOM_DATA(tdev); - struct inpcb *inp = sotoinpcb(so); + struct inpcb *inp = so_sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); struct toepcb *toep; /* allocated by init_offload_socket */ @@ -1478,14 +1544,14 @@ t3_connect(struct toedev *tdev, struct socket *so, m->m_toe.mt_toepcb = tp->t_toe; set_arp_failure_handler((struct mbuf *)m, act_open_req_arp_failure); #endif - SOCK_LOCK(so); + so_lock(so); init_offload_socket(so, tdev, atid, e, rt, toep); install_offload_ops(so); mk_act_open_req(so, m, atid, e); - SOCK_UNLOCK(so); + so_unlock(so); soisconnecting(so); toep = tp->t_toe; @@ -1495,7 +1561,7 @@ t3_connect(struct toedev *tdev, struct socket *so, l2t_send(d->cdev, (struct mbuf *)m, e); if (toep->tp_ulp_mode) - t3_enable_ddp(so, 0); + t3_enable_ddp(toep, 0); return (0); free_tid: @@ -1524,20 +1590,22 @@ t3_send_reset(struct toepcb *toep) struct toedev *tdev = toep->tp_toedev; struct socket *so = NULL; struct mbuf *m; + struct sockbuf *snd; if (tp) { inp_lock_assert(tp->t_inpcb); - so = toeptoso(toep); + so = inp_inpcbtosocket(tp->t_inpcb); } if (__predict_false((toep->tp_flags & TP_ABORT_SHUTDOWN) || tdev == NULL)) return; toep->tp_flags |= (TP_ABORT_RPL_PENDING|TP_ABORT_SHUTDOWN); - + + snd = so_sockbuf_snd(so); /* Purge the send queue so we don't send anything after an abort. */ if (so) - sbflush(&so->so_snd); + sbflush(snd); if ((toep->tp_flags & TP_CLOSE_CON_REQUESTED) && is_t3a(tdev)) mode |= CPL_ABORT_POST_CLOSE_REQ; @@ -1578,10 +1646,12 @@ t3_ip_ctloutput(struct socket *so, struct sockopt *sopt) if (optval > IPTOS_PREC_CRITIC_ECP && !suser(curthread)) return (EPERM); - inp = sotoinpcb(so); + inp = so_sotoinpcb(so); + inp_ip_tos_set(inp, optval); +#if 0 inp->inp_ip_tos = optval; - - t3_set_tos(so); +#endif + t3_set_tos(inp_inpcbtotcpcb(inp)->t_toe); return (0); } @@ -1611,7 +1681,7 @@ t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt) if (copied < 1) return (EINVAL); - tp = sototcpcb(so); + tp = so_sototcpcb(so); /* * XXX I need to revisit this */ @@ -1625,15 +1695,15 @@ t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt) int optval, oldval; struct inpcb *inp; struct tcpcb *tp; - + err = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (err) return (err); - inp = sotoinpcb(so); - tp = intotcpcb(inp); + inp = so_sotoinpcb(so); + tp = inp_inpcbtotcpcb(inp); inp_wlock(inp); @@ -1643,16 +1713,17 @@ t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt) else tp->t_flags &= ~TF_NODELAY; inp_wunlock(inp); - + + if (oldval != tp->t_flags) - t3_set_nagle(so); + t3_set_nagle(tp->t_toe); } return (0); } -static int +int t3_ctloutput(struct socket *so, struct sockopt *sopt) { int err; @@ -1686,7 +1757,8 @@ static void handle_excess_rx(struct toepcb *toep, struct mbuf *m) { - if (need_rst_on_excess_rx(toep) && !(toep->tp_flags & TP_ABORT_SHUTDOWN)) + if (need_rst_on_excess_rx(toep) && + !(toep->tp_flags & TP_ABORT_SHUTDOWN)) t3_send_reset(toep); m_freem(m); } @@ -1704,18 +1776,21 @@ tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m) unsigned int ddp_offset; struct socket *so; struct tcpcb *tp; + struct sockbuf *rcv; + int state; uint64_t t; __be64 *tcb; - so = toeptoso(toep); tp = toep->tp_tp; + so = inp_inpcbtosocket(tp->t_inpcb); inp_lock_assert(tp->t_inpcb); - SOCKBUF_LOCK(&so->so_rcv); + rcv = so_sockbuf_rcv(so); + sockbuf_lock(rcv); - /* Note that we only accout for CPL_GET_TCB issued by the DDP code. We - * really need a cookie in order to dispatch the RPLs. + /* Note that we only accout for CPL_GET_TCB issued by the DDP code. + * We really need a cookie in order to dispatch the RPLs. */ q->get_tcb_count--; @@ -1725,13 +1800,14 @@ tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m) * !q->get_tcb_count and we need to wake it up. */ if (q->cancel_ubuf && !t3_ddp_ubuf_pending(toep)) { - struct socket *so = toeptoso(toep); - + int state = so_state_get(so); + m_freem(m); - if (__predict_true((so->so_state & SS_NOFDREF) == 0)) - sorwakeup_locked(so); + if (__predict_true((state & SS_NOFDREF) == 0)) + so_sorwakeup_locked(so); else - SOCKBUF_UNLOCK(&so->so_rcv); + sockbuf_unlock(rcv); + return; } @@ -1753,15 +1829,10 @@ tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m) CTR5(KTR_TOM, "tcb_rpl_as_ddp_complete: idx=%d seq=0x%x hwbuf=%u ddp_offset=%u cur_offset=%u", q->cur_buf, tp->rcv_nxt, q->cur_buf, ddp_offset, m->m_cur_offset); - KASSERT(ddp_offset >= m->m_cur_offset, ("ddp_offset=%u less than cur_offset=%u", + KASSERT(ddp_offset >= m->m_cur_offset, + ("ddp_offset=%u less than cur_offset=%u", ddp_offset, m->m_cur_offset)); -#ifdef T3_TRACE - T3_TRACE3(TIDTB(so), - "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u ddp_offset %u", - tp->rcv_nxt, q->cur_buf, ddp_offset); -#endif - #if 0 { unsigned int ddp_flags, rcv_nxt, rx_hdr_offset, buf_idx; @@ -1833,13 +1904,13 @@ tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m) q->kbuf_posted--; bsp->flags |= DDP_BF_NODATA; } - SOCKBUF_UNLOCK(&so->so_rcv); - + sockbuf_unlock(rcv); m_free(m); return; } } else { - SOCKBUF_UNLOCK(&so->so_rcv); + sockbuf_unlock(rcv); + /* This reply is for a CPL_GET_TCB_RPL to cancel the UBUF DDP, * but it got here way late and nobody cares anymore. */ @@ -1852,21 +1923,19 @@ tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m) m->m_seq = tp->rcv_nxt; tp->rcv_nxt += m->m_pkthdr.len; tp->t_rcvtime = ticks; -#ifdef T3_TRACE - T3_TRACE3(TB(q), - "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u lskb->len %u", - m->m_seq, q->cur_buf, m->m_pkthdr.len); -#endif CTR3(KTR_TOM, "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u m->m_pktlen %u", m->m_seq, q->cur_buf, m->m_pkthdr.len); - if (m->m_pkthdr.len == 0) + if (m->m_pkthdr.len == 0) { q->user_ddp_pending = 0; - else - SBAPPEND(&so->so_rcv, m); - if (__predict_true((so->so_state & SS_NOFDREF) == 0)) - sorwakeup_locked(so); + m_free(m); + } else + SBAPPEND(rcv, m); + + state = so_state_get(so); + if (__predict_true((state & SS_NOFDREF) == 0)) + so_sorwakeup_locked(so); else - SOCKBUF_UNLOCK(&so->so_rcv); + sockbuf_unlock(rcv); } /* @@ -1895,17 +1964,21 @@ static void handle_ddp_data(struct toepcb *toep, struct mbuf *m) { struct tcpcb *tp = toep->tp_tp; - struct socket *so = toeptoso(toep); + struct socket *so; struct ddp_state *q; struct ddp_buf_state *bsp; struct cpl_rx_data *hdr = cplhdr(m); unsigned int rcv_nxt = ntohl(hdr->seq); - + struct sockbuf *rcv; + if (tp->rcv_nxt == rcv_nxt) return; inp_lock_assert(tp->t_inpcb); - SOCKBUF_LOCK(&so->so_rcv); + so = inp_inpcbtosocket(tp->t_inpcb); + rcv = so_sockbuf_rcv(so); + sockbuf_lock(rcv); + q = &toep->tp_ddp_state; bsp = &q->buf_state[q->cur_buf]; KASSERT(SEQ_GT(rcv_nxt, tp->rcv_nxt), ("tp->rcv_nxt=0x%08x decreased rcv_nxt=0x08%x", @@ -1920,7 +1993,6 @@ handle_ddp_data(struct toepcb *toep, struct mbuf *m) t3_ddp_error(so, "handle_ddp_data: neg len"); } #endif - m->m_ddp_gl = (unsigned char *)bsp->gl; m->m_flags |= M_DDP; m->m_cur_offset = bsp->cur_offset; @@ -1938,7 +2010,7 @@ handle_ddp_data(struct toepcb *toep, struct mbuf *m) * mode. */ q->ubuf_ddp_ready = 0; - SOCKBUF_UNLOCK(&so->so_rcv); + sockbuf_unlock(rcv); } /* @@ -1949,10 +2021,14 @@ new_rx_data(struct toepcb *toep, struct mbuf *m) { struct cpl_rx_data *hdr = cplhdr(m); struct tcpcb *tp = toep->tp_tp; - struct socket *so = toeptoso(toep); + struct socket *so; + struct sockbuf *rcv; + int state; int len = be16toh(hdr->len); inp_wlock(tp->t_inpcb); + + so = inp_inpcbtosocket(tp->t_inpcb); if (__predict_false(so_no_receive(so))) { handle_excess_rx(toep, m); @@ -1971,7 +2047,7 @@ new_rx_data(struct toepcb *toep, struct mbuf *m) if (__predict_false(m->m_seq != tp->rcv_nxt)) { log(LOG_ERR, "%s: TID %u: Bad sequence number %u, expected %u\n", - TOE_DEV(toeptoso(toep))->name, toep->tp_tid, m->m_seq, + toep->tp_toedev->name, toep->tp_tid, m->m_seq, tp->rcv_nxt); m_freem(m); inp_wunlock(tp->t_inpcb); @@ -2004,37 +2080,38 @@ new_rx_data(struct toepcb *toep, struct mbuf *m) tp->rcv_nxt += m->m_pkthdr.len; tp->t_rcvtime = ticks; toep->tp_enqueued_bytes += m->m_pkthdr.len; -#ifdef T3_TRACE - T3_TRACE2(TIDTB(sk), + CTR2(KTR_TOM, "new_rx_data: seq 0x%x len %u", m->m_seq, m->m_pkthdr.len); -#endif inp_wunlock(tp->t_inpcb); - SOCKBUF_LOCK(&so->so_rcv); - if (sb_notify(&so->so_rcv)) - DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, so->so_rcv.sb_flags, m->m_pkthdr.len); - - SBAPPEND(&so->so_rcv, m); + rcv = so_sockbuf_rcv(so); + sockbuf_lock(rcv); +#if 0 + if (sb_notify(rcv)) + DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, rcv->sb_flags, m->m_pkthdr.len); +#endif + SBAPPEND(rcv, m); #ifdef notyet /* * We're giving too many credits to the card - but disable this check so we can keep on moving :-| * */ - KASSERT(so->so_rcv.sb_cc < (so->so_rcv.sb_mbmax << 1), + KASSERT(rcv->sb_cc < (rcv->sb_mbmax << 1), ("so=%p, data contents exceed mbmax, sb_cc=%d sb_mbmax=%d", - so, so->so_rcv.sb_cc, so->so_rcv.sb_mbmax)); + so, rcv->sb_cc, rcv->sb_mbmax)); #endif CTR2(KTR_TOM, "sb_cc=%d sb_mbcnt=%d", - so->so_rcv.sb_cc, so->so_rcv.sb_mbcnt); - - if (__predict_true((so->so_state & SS_NOFDREF) == 0)) - sorwakeup_locked(so); + rcv->sb_cc, rcv->sb_mbcnt); + + state = so_state_get(so); + if (__predict_true((state & SS_NOFDREF) == 0)) + so_sorwakeup_locked(so); else - SOCKBUF_UNLOCK(&so->so_rcv); + sockbuf_unlock(rcv); } /* @@ -2059,14 +2136,16 @@ new_rx_data_ddp(struct toepcb *toep, struct mbuf *m) struct ddp_state *q; struct ddp_buf_state *bsp; struct cpl_rx_data_ddp *hdr; + struct socket *so; unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx; - struct socket *so = toeptoso(toep); int nomoredata = 0; unsigned int delack_mode; - - tp = sototcpcb(so); + struct sockbuf *rcv; + tp = toep->tp_tp; inp_wlock(tp->t_inpcb); + so = inp_inpcbtosocket(tp->t_inpcb); + if (__predict_false(so_no_receive(so))) { handle_excess_rx(toep, m); @@ -2080,16 +2159,6 @@ new_rx_data_ddp(struct toepcb *toep, struct mbuf *m) buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1; bsp = &q->buf_state[buf_idx]; -#ifdef T3_TRACE - T3_TRACE5(TIDTB(sk), - "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u " - "hdr seq 0x%x len %u offset %u", - tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq), - ntohs(hdr->len), G_DDP_OFFSET(ddp_report)); - T3_TRACE1(TIDTB(sk), - "new_rx_data_ddp: ddp_report 0x%x", - ddp_report); -#endif CTR4(KTR_TOM, "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u " "hdr seq 0x%x len %u", @@ -2130,7 +2199,9 @@ new_rx_data_ddp(struct toepcb *toep, struct mbuf *m) end_offset = G_DDP_OFFSET(ddp_report) + ddp_len; m->m_cur_offset = end_offset - m->m_pkthdr.len; - SOCKBUF_LOCK(&so->so_rcv); + rcv = so_sockbuf_rcv(so); + sockbuf_lock(rcv); + m->m_ddp_gl = (unsigned char *)bsp->gl; m->m_flags |= M_DDP; bsp->cur_offset = end_offset; @@ -2146,8 +2217,6 @@ new_rx_data_ddp(struct toepcb *toep, struct mbuf *m) KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); KASSERT(m->m_next == NULL, ("m_len=%p", m->m_next)); - - /* * Bit 0 of flags stores whether the DDP buffer is completed. * Note that other parts of the code depend on this being in bit 0. @@ -2174,12 +2243,14 @@ new_rx_data_ddp(struct toepcb *toep, struct mbuf *m) skb_reset_transport_header(skb); tcp_hdr(skb)->fin = 0; /* changes original hdr->ddp_report */ #endif - SBAPPEND(&so->so_rcv, m); - - if ((so->so_state & SS_NOFDREF) == 0) - sorwakeup_locked(so); + SBAPPEND(rcv, m); + + if ((so_state_get(so) & SS_NOFDREF) == 0 && ((ddp_report & F_DDP_PSH) || + (((m->m_ddp_flags & (DDP_BF_NOCOPY|1)) == (DDP_BF_NOCOPY|1)) + || !(m->m_ddp_flags & DDP_BF_NOCOPY)))) + so_sorwakeup_locked(so); else - SOCKBUF_UNLOCK(&so->so_rcv); + sockbuf_unlock(rcv); } #define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\ @@ -2214,16 +2285,19 @@ static void process_ddp_complete(struct toepcb *toep, struct mbuf *m) { struct tcpcb *tp = toep->tp_tp; - struct socket *so = toeptoso(toep); + struct socket *so; struct ddp_state *q; struct ddp_buf_state *bsp; struct cpl_rx_ddp_complete *hdr; unsigned int ddp_report, buf_idx, when, delack_mode; int nomoredata = 0; - + struct sockbuf *rcv; + inp_wlock(tp->t_inpcb); + so = inp_inpcbtosocket(tp->t_inpcb); + if (__predict_false(so_no_receive(so))) { - struct inpcb *inp = sotoinpcb(so); + struct inpcb *inp = so_sotoinpcb(so); handle_excess_rx(toep, m); inp_wunlock(inp); @@ -2235,8 +2309,9 @@ process_ddp_complete(struct toepcb *toep, struct mbuf *m) buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1; m->m_pkthdr.csum_data = tp->rcv_nxt; - - SOCKBUF_LOCK(&so->so_rcv); + rcv = so_sockbuf_rcv(so); + sockbuf_lock(rcv); + bsp = &q->buf_state[buf_idx]; when = bsp->cur_offset; m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when; @@ -2283,13 +2358,11 @@ process_ddp_complete(struct toepcb *toep, struct mbuf *m) if (nomoredata) m->m_ddp_flags |= DDP_BF_NODATA; - - SBAPPEND(&so->so_rcv, m); - - if ((so->so_state & SS_NOFDREF) == 0) - sorwakeup_locked(so); + SBAPPEND(rcv, m); + if ((so_state_get(so) & SS_NOFDREF) == 0) + so_sorwakeup_locked(so); else - SOCKBUF_UNLOCK(&so->so_rcv); + sockbuf_unlock(rcv); } /* @@ -2313,11 +2386,8 @@ do_rx_ddp_complete(struct t3cdev *cdev, struct mbuf *m, void *ctx) * socket state before calling tcp_time_wait to comply with its expectations. */ static void -enter_timewait(struct socket *so) +enter_timewait(struct tcpcb *tp) { - struct tcpcb *tp = sototcpcb(so); - - inp_lock_assert(tp->t_inpcb); /* * Bump rcv_nxt for the peer FIN. We don't do this at the time we * process peer_close because we don't want to carry the peer FIN in @@ -2325,11 +2395,32 @@ enter_timewait(struct socket *so) * having the FIN in the receive queue we'll confuse facilities such * as SIOCINQ. */ + inp_wlock(tp->t_inpcb); tp->rcv_nxt++; tp->ts_recent_age = 0; /* defeat recycling */ tp->t_srtt = 0; /* defeat tcp_update_metrics */ - tcp_twstart(tp); + inp_wunlock(tp->t_inpcb); + tcp_offload_twstart(tp); +} + +static void +enter_timewait_disconnect(struct tcpcb *tp) +{ + /* + * Bump rcv_nxt for the peer FIN. We don't do this at the time we + * process peer_close because we don't want to carry the peer FIN in + * the socket's receive queue and if we increment rcv_nxt without + * having the FIN in the receive queue we'll confuse facilities such + * as SIOCINQ. + */ + inp_wlock(tp->t_inpcb); + tp->rcv_nxt++; + + tp->ts_recent_age = 0; /* defeat recycling */ + tp->t_srtt = 0; /* defeat tcp_update_metrics */ + inp_wunlock(tp->t_inpcb); + tcp_offload_twstart_disconnect(tp); } /* @@ -2343,16 +2434,18 @@ enter_timewait(struct socket *so) static int handle_peer_close_data(struct socket *so, struct mbuf *m) { - struct tcpcb *tp = sototcpcb(so); + struct tcpcb *tp = so_sototcpcb(so); struct toepcb *toep = tp->t_toe; struct ddp_state *q; struct ddp_buf_state *bsp; struct cpl_peer_close *req = cplhdr(m); unsigned int rcv_nxt = ntohl(req->rcv_nxt) - 1; /* exclude FIN */ - + struct sockbuf *rcv; + if (tp->rcv_nxt == rcv_nxt) /* no data */ return (0); + CTR0(KTR_TOM, "handle_peer_close_data"); if (__predict_false(so_no_receive(so))) { handle_excess_rx(toep, m); @@ -2368,7 +2461,9 @@ handle_peer_close_data(struct socket *so, struct mbuf *m) inp_lock_assert(tp->t_inpcb); q = &toep->tp_ddp_state; - SOCKBUF_LOCK(&so->so_rcv); + rcv = so_sockbuf_rcv(so); + sockbuf_lock(rcv); + bsp = &q->buf_state[q->cur_buf]; m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); @@ -2387,11 +2482,12 @@ handle_peer_close_data(struct socket *so, struct mbuf *m) tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */ #endif tp->t_rcvtime = ticks; - SBAPPEND(&so->so_rcv, m); - if (__predict_true((so->so_state & SS_NOFDREF) == 0)) - sorwakeup_locked(so); + SBAPPEND(rcv, m); + if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0)) + so_sorwakeup_locked(so); else - SOCKBUF_UNLOCK(&so->so_rcv); + sockbuf_unlock(rcv); + return (1); } @@ -2399,28 +2495,24 @@ handle_peer_close_data(struct socket *so, struct mbuf *m) * Handle a peer FIN. */ static void -do_peer_fin(struct socket *so, struct mbuf *m) +do_peer_fin(struct toepcb *toep, struct mbuf *m) { - struct tcpcb *tp = sototcpcb(so); - struct toepcb *toep = tp->t_toe; - int keep = 0; - DPRINTF("do_peer_fin state=%d\n", tp->t_state); + struct socket *so; + struct tcpcb *tp = toep->tp_tp; + int keep, action; -#ifdef T3_TRACE - T3_TRACE0(TIDTB(sk),"do_peer_fin:"); -#endif - - if (!is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) { + action = keep = 0; + CTR1(KTR_TOM, "do_peer_fin state=%d", tp->t_state); + if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) { printf("abort_pending set\n"); goto out; } - INP_INFO_WLOCK(&tcbinfo); inp_wlock(tp->t_inpcb); + so = inp_inpcbtosocket(toep->tp_tp->t_inpcb); if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) { keep = handle_peer_close_data(so, m); if (keep < 0) { - INP_INFO_WUNLOCK(&tcbinfo); inp_wunlock(tp->t_inpcb); return; } @@ -2463,23 +2555,29 @@ do_peer_fin(struct socket *so, struct mbuf *m) */ t3_release_offload_resources(toep); if (toep->tp_flags & TP_ABORT_RPL_PENDING) { - tp = tcp_close(tp); + action = TCP_CLOSE; } else { - enter_timewait(so); - tp = NULL; + action = TCP_TIMEWAIT; } break; default: log(LOG_ERR, "%s: TID %u received PEER_CLOSE in bad state %d\n", - TOE_DEV(so)->tod_name, toep->tp_tid, tp->t_state); + toep->tp_toedev->tod_name, toep->tp_tid, tp->t_state); } - INP_INFO_WUNLOCK(&tcbinfo); - if (tp) - inp_wunlock(tp->t_inpcb); + inp_wunlock(tp->t_inpcb); - DPRINTF("waking up waiters on %p rcv_notify=%d flags=0x%x\n", so, sb_notify(&so->so_rcv), so->so_rcv.sb_flags); + DPRINTF("waking up waiters on %p rcv_notify=%d flags=0x%x\n", so, sb_notify(rcv), rcv->sb_flags); + + if (action == TCP_TIMEWAIT) { + enter_timewait(tp); + } else if (action == TCP_DROP) { + tcp_offload_drop(tp, 0); + } else if (action == TCP_CLOSE) { + tcp_offload_close(tp); + } + #ifdef notyet /* Do not send POLL_HUP for half duplex close. */ if ((sk->sk_shutdown & SEND_SHUTDOWN) || @@ -2501,40 +2599,43 @@ static int do_peer_close(struct t3cdev *cdev, struct mbuf *m, void *ctx) { struct toepcb *toep = (struct toepcb *)ctx; - struct socket *so = toeptoso(toep); VALIDATE_SOCK(so); - do_peer_fin(so, m); + do_peer_fin(toep, m); return (0); } static void -process_close_con_rpl(struct socket *so, struct mbuf *m) +process_close_con_rpl(struct toepcb *toep, struct mbuf *m) { - struct tcpcb *tp = sototcpcb(so); struct cpl_close_con_rpl *rpl = cplhdr(m); - struct toepcb *toep = tp->t_toe; - + struct tcpcb *tp = toep->tp_tp; + struct socket *so; + int action = 0; + struct sockbuf *rcv; + + inp_wlock(tp->t_inpcb); + so = inp_inpcbtosocket(tp->t_inpcb); + tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */ - DPRINTF("process_close_con_rpl(%p) state=%d dead=%d\n", so, tp->t_state, - !!(so->so_state & SS_NOFDREF)); - if (!is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) + if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) { + inp_wunlock(tp->t_inpcb); goto out; + } - INP_INFO_WLOCK(&tcbinfo); - inp_wlock(tp->t_inpcb); + CTR3(KTR_TOM, "process_close_con_rpl(%p) state=%d dead=%d", toep, + tp->t_state, !!(so_state_get(so) & SS_NOFDREF)); + switch (tp->t_state) { case TCPS_CLOSING: /* see FIN_WAIT2 case in do_peer_fin */ t3_release_offload_resources(toep); if (toep->tp_flags & TP_ABORT_RPL_PENDING) { - tp = tcp_close(tp); + action = TCP_CLOSE; } else { - enter_timewait(so); - tp = NULL; - soisdisconnected(so); + action = TCP_TIMEWAIT; } break; case TCPS_LAST_ACK: @@ -2544,7 +2645,7 @@ process_close_con_rpl(struct socket *so, struct mbuf *m) * late, this close_con_rpl is the actual last message. */ t3_release_offload_resources(toep); - tp = tcp_close(tp); + action = TCP_CLOSE; break; case TCPS_FIN_WAIT_1: /* @@ -2558,30 +2659,43 @@ process_close_con_rpl(struct socket *so, struct mbuf *m) * we should release the tp also, and use a * compressed state. */ - if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { + if (so) + rcv = so_sockbuf_rcv(so); + else + break; + + if (rcv->sb_state & SBS_CANTRCVMORE) { int timeout; - - soisdisconnected(so); + + if (so) + soisdisconnected(so); timeout = (tcp_fast_finwait2_recycle) ? tcp_finwait2_timeout : tcp_maxidle; tcp_timer_activate(tp, TT_2MSL, timeout); } tp->t_state = TCPS_FIN_WAIT_2; - if ((so->so_options & SO_LINGER) && so->so_linger == 0 && + if ((so_options_get(so) & SO_LINGER) && so_linger_get(so) == 0 && (toep->tp_flags & TP_ABORT_SHUTDOWN) == 0) { - tp = tcp_drop(tp, 0); + action = TCP_DROP; } break; default: log(LOG_ERR, "%s: TID %u received CLOSE_CON_RPL in bad state %d\n", - TOE_DEV(so)->tod_name, toep->tp_tid, + toep->tp_toedev->tod_name, toep->tp_tid, tp->t_state); } - INP_INFO_WUNLOCK(&tcbinfo); - if (tp) - inp_wunlock(tp->t_inpcb); + inp_wunlock(tp->t_inpcb); + + + if (action == TCP_TIMEWAIT) { + enter_timewait_disconnect(tp); + } else if (action == TCP_DROP) { + tcp_offload_drop(tp, 0); + } else if (action == TCP_CLOSE) { + tcp_offload_close(tp); + } out: m_freem(m); } @@ -2594,11 +2708,8 @@ do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) { struct toepcb *toep = (struct toepcb *)ctx; - struct socket *so = toeptoso(toep); - VALIDATE_SOCK(so); - - process_close_con_rpl(so, m); + process_close_con_rpl(toep, m); return (0); } @@ -2609,10 +2720,11 @@ do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m, * originated the ABORT_REQ. */ static void -process_abort_rpl(struct socket *so, struct mbuf *m) +process_abort_rpl(struct toepcb *toep, struct mbuf *m) { - struct tcpcb *tp = sototcpcb(so); - struct toepcb *toep = tp->t_toe; + struct tcpcb *tp = toep->tp_tp; + struct socket *so; + int needclose = 0; #ifdef T3_TRACE T3_TRACE1(TIDTB(sk), @@ -2620,29 +2732,30 @@ process_abort_rpl(struct socket *so, struct mbuf *m) sock_flag(sk, ABORT_RPL_PENDING)); #endif - INP_INFO_WLOCK(&tcbinfo); inp_wlock(tp->t_inpcb); + so = inp_inpcbtosocket(tp->t_inpcb); if (toep->tp_flags & TP_ABORT_RPL_PENDING) { /* * XXX panic on tcpdrop */ - if (!(toep->tp_flags & TP_ABORT_RPL_RCVD) && !is_t3a(TOE_DEV(so))) + if (!(toep->tp_flags & TP_ABORT_RPL_RCVD) && !is_t3a(toep->tp_toedev)) toep->tp_flags |= TP_ABORT_RPL_RCVD; else { toep->tp_flags &= ~(TP_ABORT_RPL_RCVD|TP_ABORT_RPL_PENDING); if (!(toep->tp_flags & TP_ABORT_REQ_RCVD) || - !is_t3a(TOE_DEV(so))) { + !is_t3a(toep->tp_toedev)) { if (toep->tp_flags & TP_ABORT_REQ_RCVD) panic("TP_ABORT_REQ_RCVD set"); t3_release_offload_resources(toep); - tp = tcp_close(tp); + needclose = 1; } } } - if (tp) - inp_wunlock(tp->t_inpcb); - INP_INFO_WUNLOCK(&tcbinfo); + inp_wunlock(tp->t_inpcb); + + if (needclose) + tcp_offload_close(tp); m_free(m); } @@ -2653,7 +2766,6 @@ process_abort_rpl(struct socket *so, struct mbuf *m) static int do_abort_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) { - struct socket *so; struct cpl_abort_rpl_rss *rpl = cplhdr(m); struct toepcb *toep; @@ -2682,7 +2794,7 @@ discard: goto discard; if (toep->tp_tp == NULL) { - printf("removing tid for abort\n"); + log(LOG_NOTICE, "removing tid for abort\n"); cxgb_remove_tid(cdev, toep, toep->tp_tid); if (toep->tp_l2t) l2t_release(L2DATA(cdev), toep->tp_l2t); @@ -2691,12 +2803,11 @@ discard: goto discard; } - printf("toep=%p\n", toep); - printf("tp=%p\n", toep->tp_tp); + log(LOG_NOTICE, "toep=%p\n", toep); + log(LOG_NOTICE, "tp=%p\n", toep->tp_tp); - so = toeptoso(toep); /* <- XXX panic */ toepcb_hold(toep); - process_abort_rpl(so, m); + process_abort_rpl(toep, m); toepcb_release(toep); return (0); } @@ -2708,7 +2819,7 @@ discard: static int abort_status_to_errno(struct socket *so, int abort_reason, int *need_rst) { - struct tcpcb *tp = sototcpcb(so); + struct tcpcb *tp = so_sototcpcb(so); switch (abort_reason) { case CPL_ERR_BAD_SYN: @@ -2817,8 +2928,8 @@ cleanup_syn_rcv_conn(struct socket *child, struct socket *parent) static void do_abort_syn_rcv(struct socket *child, struct socket *parent) { - struct tcpcb *parenttp = sototcpcb(parent); - struct tcpcb *childtp = sototcpcb(child); + struct tcpcb *parenttp = so_sototcpcb(parent); + struct tcpcb *childtp = so_sototcpcb(child); /* * If the server is still open we clean up the child connection, @@ -2827,13 +2938,10 @@ do_abort_syn_rcv(struct socket *child, struct socket *parent) */ if (__predict_false(parenttp->t_state == TCPS_LISTEN)) { cleanup_syn_rcv_conn(child, parent); - INP_INFO_WLOCK(&tcbinfo); inp_wlock(childtp->t_inpcb); t3_release_offload_resources(childtp->t_toe); - childtp = tcp_close(childtp); - INP_INFO_WUNLOCK(&tcbinfo); - if (childtp) - inp_wunlock(childtp->t_inpcb); + inp_wunlock(childtp->t_inpcb); + tcp_offload_close(childtp); } } #endif @@ -2848,7 +2956,7 @@ abort_syn_rcv(struct socket *so, struct mbuf *m) CXGB_UNIMPLEMENTED(); #ifdef notyet struct socket *parent; - struct toedev *tdev = TOE_DEV(so); + struct toedev *tdev = toep->tp_toedev; struct t3cdev *cdev = TOM_DATA(tdev)->cdev; struct socket *oreq = so->so_incomp; struct t3c_tid_entry *t3c_stid; @@ -2861,10 +2969,10 @@ abort_syn_rcv(struct socket *so, struct mbuf *m) t3c_stid = lookup_stid(t, oreq->ts_recent); parent = ((struct listen_ctx *)t3c_stid->ctx)->lso; - SOCK_LOCK(parent); + so_lock(parent); do_abort_syn_rcv(so, parent); send_abort_rpl(m, tdev, CPL_ABORT_NO_RST); - SOCK_UNLOCK(parent); + so_unlock(parent); #endif return (0); } @@ -2874,14 +2982,16 @@ abort_syn_rcv(struct socket *so, struct mbuf *m) * request except that we need to reply to it. */ static void -process_abort_req(struct socket *so, struct mbuf *m, struct toedev *tdev) +process_abort_req(struct toepcb *toep, struct mbuf *m, struct toedev *tdev) { int rst_status = CPL_ABORT_NO_RST; const struct cpl_abort_req_rss *req = cplhdr(m); - struct tcpcb *tp = sototcpcb(so); - struct toepcb *toep = tp->t_toe; - + struct tcpcb *tp = toep->tp_tp; + struct socket *so; + int needclose = 0; + inp_wlock(tp->t_inpcb); + so = inp_inpcbtosocket(toep->tp_tp->t_inpcb); if ((toep->tp_flags & TP_ABORT_REQ_RCVD) == 0) { toep->tp_flags |= (TP_ABORT_REQ_RCVD|TP_ABORT_SHUTDOWN); m_free(m); @@ -2899,11 +3009,15 @@ process_abort_req(struct socket *so, struct mbuf *m, struct toedev *tdev) * That will generate an abort_rpl with status 0, wait for it. */ if (((toep->tp_flags & TP_ABORT_RPL_PENDING) == 0) || - (is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_CLOSE_CON_REQUESTED))) { - so->so_error = abort_status_to_errno(so, req->status, + (is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_CLOSE_CON_REQUESTED))) { + int error; + + error = abort_status_to_errno(so, req->status, &rst_status); - if (__predict_true((so->so_state & SS_NOFDREF) == 0)) - sorwakeup(so); + so_error_set(so, error); + + if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0)) + so_sorwakeup(so); /* * SYN_RECV needs special processing. If abort_syn_rcv() * returns 0 is has taken care of the abort. @@ -2912,15 +3026,17 @@ process_abort_req(struct socket *so, struct mbuf *m, struct toedev *tdev) goto skip; t3_release_offload_resources(toep); - tp = tcp_close(tp); + needclose = 1; } - if (tp) - inp_wunlock(tp->t_inpcb); + inp_wunlock(tp->t_inpcb); + + if (needclose) + tcp_offload_close(tp); + send_abort_rpl(m, tdev, rst_status); return; - skip: - inp_wunlock(tp->t_inpcb); + inp_wunlock(tp->t_inpcb); } /* @@ -2931,23 +3047,19 @@ do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) { const struct cpl_abort_req_rss *req = cplhdr(m); struct toepcb *toep = (struct toepcb *)ctx; - struct socket *so; - struct inpcb *inp; if (is_neg_adv_abort(req->status)) { m_free(m); return (0); } - printf("aborting tid=%d\n", toep->tp_tid); + log(LOG_NOTICE, "aborting tid=%d\n", toep->tp_tid); if ((toep->tp_flags & (TP_SYN_RCVD|TP_ABORT_REQ_RCVD)) == TP_SYN_RCVD) { cxgb_remove_tid(cdev, toep, toep->tp_tid); toep->tp_flags |= TP_ABORT_REQ_RCVD; - printf("sending abort rpl\n"); send_abort_rpl(m, toep->tp_toedev, CPL_ABORT_NO_RST); - printf("sent\n"); if (toep->tp_l2t) l2t_release(L2DATA(cdev), toep->tp_l2t); @@ -2962,23 +3074,18 @@ do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) * have a way of doing that yet */ toepcb_release(toep); - printf("abort for unestablished connection :-(\n"); + log(LOG_ERR, "abort for unestablished connection :-(\n"); return (0); } if (toep->tp_tp == NULL) { - printf("disconnected toepcb\n"); + log(LOG_NOTICE, "disconnected toepcb\n"); /* should be freed momentarily */ return (0); } - so = toeptoso(toep); - inp = sotoinpcb(so); - - VALIDATE_SOCK(so); + toepcb_hold(toep); - INP_INFO_WLOCK(&tcbinfo); - process_abort_req(so, m, TOE_DEV(so)); - INP_INFO_WUNLOCK(&tcbinfo); + process_abort_req(toep, m, toep->tp_toedev); toepcb_release(toep); return (0); } @@ -3010,7 +3117,7 @@ handle_pass_open_arp_failure(struct socket *so, struct mbuf *m) struct socket *oreq; struct t3c_tid_entry *t3c_stid; struct tid_info *t; - struct tcpcb *otp, *tp = sototcpcb(so); + struct tcpcb *otp, *tp = so_sototcpcb(so); struct toepcb *toep = tp->t_toe; /* @@ -3024,16 +3131,16 @@ handle_pass_open_arp_failure(struct socket *so, struct mbuf *m) } oreq = so->so_incomp; - otp = sototcpcb(oreq); + otp = so_sototcpcb(oreq); cdev = T3C_DEV(so); t = &(T3C_DATA(cdev))->tid_maps; t3c_stid = lookup_stid(t, otp->ts_recent); parent = ((struct listen_ctx *)t3c_stid->ctx)->lso; - SOCK_LOCK(parent); + so_lock(parent); pass_open_abort(so, parent, m); - SOCK_UNLOCK(parent); + so_unlock(parent); #endif } @@ -3127,7 +3234,7 @@ syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, str uint32_t rcv_isn = ntohl(req->rcv_isn); bzero(&to, sizeof(struct tcpopt)); - inp = sotoinpcb(lso); + inp = so_sotoinpcb(lso); /* * Fill out information for entering us into the syncache @@ -3156,8 +3263,6 @@ syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, str to.to_mss = mss; to.to_wscale = wsf; to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0); - INP_INFO_WLOCK(&tcbinfo); - inp_wlock(inp); syncache_offload_add(&inc, &to, &th, inp, &lso, &cxgb_toe_usrreqs, toep); } @@ -3180,7 +3285,7 @@ process_pass_accept_req(struct socket *so, struct mbuf *m, struct toedev *tdev, unsigned int tid = GET_TID(req); struct tom_data *d = TOM_DATA(tdev); struct t3cdev *cdev = d->cdev; - struct tcpcb *tp = sototcpcb(so); + struct tcpcb *tp = so_sototcpcb(so); struct toepcb *newtoep; struct rtentry *dst; struct sockaddr_in nam; @@ -3269,11 +3374,11 @@ process_pass_accept_req(struct socket *so, struct mbuf *m, struct toedev *tdev, tp->rcv_wnd = select_rcv_wnd(tdev, so); cxgb_insert_tid(cdev, d->client, newtoep, tid); - SOCK_LOCK(so); + so_lock(so); LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry); - SOCK_UNLOCK(so); - - newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && !(so->so_options & SO_NO_DDP) && + so_unlock(so); + + newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && !(so_options_get(so) & SO_NO_DDP) && tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; if (newtoep->tp_ulp_mode) { @@ -3346,7 +3451,7 @@ static int do_pass_accept_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) { struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx; - struct socket *lso = listen_ctx->lso; + struct socket *lso = listen_ctx->lso; /* XXX need an interlock against the listen socket going away */ struct tom_data *d = listen_ctx->tom_data; #if VALIDATE_TID @@ -3391,9 +3496,9 @@ do_pass_accept_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) static void assign_rxopt(struct socket *so, unsigned int opt) { - const struct t3c_data *td = T3C_DATA(T3C_DEV(so)); - struct tcpcb *tp = sototcpcb(so); + struct tcpcb *tp = so_sototcpcb(so); struct toepcb *toep = tp->t_toe; + const struct t3c_data *td = T3C_DATA(TOEP_T3C_DEV(toep)); inp_lock_assert(tp->t_inpcb); @@ -3415,12 +3520,19 @@ assign_rxopt(struct socket *so, unsigned int opt) static void make_established(struct socket *so, u32 snd_isn, unsigned int opt) { - struct tcpcb *tp = sototcpcb(so); + struct tcpcb *tp = so_sototcpcb(so); struct toepcb *toep = tp->t_toe; toep->tp_write_seq = tp->iss = tp->snd_max = tp->snd_nxt = tp->snd_una = snd_isn; assign_rxopt(so, opt); + + /* + *XXXXXXXXXXX + * + */ +#ifdef notyet so->so_proto->pr_ctloutput = t3_ctloutput; +#endif #if 0 inet_sk(sk)->id = tp->write_seq ^ jiffies; @@ -3494,7 +3606,7 @@ syncache_expand_establish_req(struct cpl_pass_establish *req, struct socket **so ntohl(req->local_ip), ntohs(req->local_port), ntohl(req->peer_ip), ntohs(req->peer_port), mss, wsf, ts, sack); - return syncache_expand(&inc, &to, &th, so, m); + return syncache_offload_expand(&inc, &to, &th, so, m); } @@ -3507,21 +3619,36 @@ do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) { struct cpl_pass_establish *req = cplhdr(m); struct toepcb *toep = (struct toepcb *)ctx; - struct tcpcb *tp; + struct tcpcb *tp = toep->tp_tp; struct socket *so, *lso; struct t3c_data *td = T3C_DATA(cdev); + struct sockbuf *snd, *rcv; + // Complete socket initialization now that we have the SND_ISN struct toedev *tdev; - so = lso = toeptoso(toep); + tdev = toep->tp_toedev; - SOCK_LOCK(so); - LIST_REMOVE(toep, synq_entry); - SOCK_UNLOCK(so); + inp_wlock(tp->t_inpcb); + + /* + * + * XXX need to add reference while we're manipulating + */ + so = lso = inp_inpcbtosocket(tp->t_inpcb); + + inp_wunlock(tp->t_inpcb); + + snd = so_sockbuf_snd(so); + rcv = so_sockbuf_rcv(so); + + + so_lock(so); + LIST_REMOVE(toep, synq_entry); + so_unlock(so); - INP_INFO_WLOCK(&tcbinfo); if (!syncache_expand_establish_req(req, &so, toep)) { /* * No entry @@ -3535,16 +3662,12 @@ do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) CXGB_UNIMPLEMENTED(); } - /* - * XXX workaround for lack of syncache drop - */ - toepcb_release(toep); - - tp = sototcpcb(so); + tp = so_sototcpcb(so); inp_wlock(tp->t_inpcb); - so->so_snd.sb_flags |= SB_NOCOALESCE; - so->so_rcv.sb_flags |= SB_NOCOALESCE; + + snd->sb_flags |= SB_NOCOALESCE; + rcv->sb_flags |= SB_NOCOALESCE; toep->tp_tp = tp; toep->tp_flags = 0; @@ -3565,7 +3688,11 @@ do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) */ make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); - INP_INFO_WUNLOCK(&tcbinfo); + + /* + * XXX workaround for lack of syncache drop + */ + toepcb_release(toep); inp_wunlock(tp->t_inpcb); CTR1(KTR_TOM, "do_pass_establish tid=%u", toep->tp_tid); @@ -3605,15 +3732,14 @@ do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) * and send them to the TOE. */ static void -fixup_and_send_ofo(struct socket *so) +fixup_and_send_ofo(struct toepcb *toep) { struct mbuf *m; - struct toedev *tdev = TOE_DEV(so); - struct tcpcb *tp = sototcpcb(so); - struct toepcb *toep = tp->t_toe; + struct toedev *tdev = toep->tp_toedev; + struct tcpcb *tp = toep->tp_tp; unsigned int tid = toep->tp_tid; - printf("fixup_and_send_ofo\n"); + log(LOG_NOTICE, "fixup_and_send_ofo\n"); inp_lock_assert(tp->t_inpcb); while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) { @@ -3638,7 +3764,7 @@ socket_act_establish(struct socket *so, struct mbuf *m) { struct cpl_act_establish *req = cplhdr(m); u32 rcv_isn = ntohl(req->rcv_isn); /* real RCV_ISN + 1 */ - struct tcpcb *tp = sototcpcb(so); + struct tcpcb *tp = so_sototcpcb(so); struct toepcb *toep = tp->t_toe; if (__predict_false(tp->t_state != TCPS_SYN_SENT)) @@ -3656,13 +3782,13 @@ socket_act_establish(struct socket *so, struct mbuf *m) * defer for lack of a TID. */ if (mbufq_len(&toep->out_of_order_queue)) - fixup_and_send_ofo(so); + fixup_and_send_ofo(toep); - if (__predict_false(so->so_state & SS_NOFDREF)) { + if (__predict_false(so_state_get(so) & SS_NOFDREF)) { /* * XXX does this even make sense? */ - sorwakeup(so); + so_sorwakeup(so); } m_free(m); #ifdef notyet @@ -3708,20 +3834,22 @@ do_act_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) free_atid(cdev, atid); return (0); } - - so = toeptoso(toep); - tdev = TOE_DEV(so); /* blow up here if link was down */ - d = TOM_DATA(tdev); - inp_wlock(tp->t_inpcb); - + + /* + * XXX + */ + so = inp_inpcbtosocket(tp->t_inpcb); + tdev = toep->tp_toedev; /* blow up here if link was down */ + d = TOM_DATA(tdev); + /* * It's OK if the TID is currently in use, the owning socket may have * backlogged its last CPL message(s). Just take it away. */ toep->tp_tid = tid; toep->tp_tp = tp; - so_insert_tid(d, so, tid); + so_insert_tid(d, toep, tid); free_atid(cdev, atid); toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data)); @@ -3742,15 +3870,16 @@ wr_ack(struct toepcb *toep, struct mbuf *m) { struct tcpcb *tp = toep->tp_tp; struct cpl_wr_ack *hdr = cplhdr(m); - struct socket *so = toeptoso(toep); + struct socket *so; unsigned int credits = ntohs(hdr->credits); u32 snd_una = ntohl(hdr->snd_una); int bytes = 0; + struct sockbuf *snd; CTR2(KTR_SPARE2, "wr_ack: snd_una=%u credits=%d", snd_una, credits); inp_wlock(tp->t_inpcb); - + so = inp_inpcbtosocket(tp->t_inpcb); toep->tp_wr_avail += credits; if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail) toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail; @@ -3765,9 +3894,11 @@ wr_ack(struct toepcb *toep, struct mbuf *m) break; } CTR2(KTR_TOM, - "wr_ack: p->credits=%d p->bytes=%d", p->m_pkthdr.csum_data, p->m_pkthdr.len); + "wr_ack: p->credits=%d p->bytes=%d", + p->m_pkthdr.csum_data, p->m_pkthdr.len); + KASSERT(p->m_pkthdr.csum_data != 0, + ("empty request still on list")); - KASSERT(p->m_pkthdr.csum_data != 0, ("empty request still on list")); if (__predict_false(credits < p->m_pkthdr.csum_data)) { #if DEBUG_WR > 1 @@ -3822,14 +3953,17 @@ wr_ack(struct toepcb *toep, struct mbuf *m) if (tp->snd_una == tp->snd_nxt) toep->tp_flags &= ~TP_TX_WAIT_IDLE; } + + snd = so_sockbuf_snd(so); if (bytes) { CTR1(KTR_SPARE2, "wr_ack: sbdrop(%d)", bytes); - SOCKBUF_LOCK(&so->so_snd); - sbdrop_locked(&so->so_snd, bytes); - sowwakeup_locked(so); + snd = so_sockbuf_snd(so); + sockbuf_lock(snd); + sbdrop_locked(snd, bytes); + so_sowwakeup_locked(so); } - - if (so->so_snd.sb_sndptroff < so->so_snd.sb_cc) + + if (snd->sb_sndptroff < snd->sb_cc) t3_push_frames(so, 0); out_free: @@ -3871,11 +4005,24 @@ do_trace_pkt(struct t3cdev *dev, struct mbuf *m, void *ctx) static void t3_reset_listen_child(struct socket *child) { - struct tcpcb *tp = sototcpcb(child); + struct tcpcb *tp = so_sototcpcb(child); t3_send_reset(tp->t_toe); } + +static void +t3_child_disconnect(struct socket *so, void *arg) +{ + struct tcpcb *tp = so_sototcpcb(so); + + if (tp->t_flags & TF_TOE) { + inp_wlock(tp->t_inpcb); + t3_reset_listen_child(so); + inp_wunlock(tp->t_inpcb); + } +} + /* * Disconnect offloaded established but not yet accepted connections sitting * on a server's accept_queue. We just send an ABORT_REQ at this point and @@ -3884,18 +4031,10 @@ t3_reset_listen_child(struct socket *child) void t3_disconnect_acceptq(struct socket *listen_so) { - struct socket *so; - struct tcpcb *tp; - TAILQ_FOREACH(so, &listen_so->so_comp, so_list) { - tp = sototcpcb(so); - - if (tp->t_flags & TF_TOE) { - inp_wlock(tp->t_inpcb); - t3_reset_listen_child(so); - inp_wunlock(tp->t_inpcb); - } - } + so_lock(listen_so); + so_listeners_apply_all(listen_so, t3_child_disconnect, NULL); + so_unlock(listen_so); } /* @@ -3908,7 +4047,7 @@ t3_reset_synq(struct listen_ctx *lctx) { struct toepcb *toep; - SOCK_LOCK(lctx->lso); + so_lock(lctx->lso); while (!LIST_EMPTY(&lctx->synq_head)) { toep = LIST_FIRST(&lctx->synq_head); LIST_REMOVE(toep, synq_entry); @@ -3917,12 +4056,12 @@ t3_reset_synq(struct listen_ctx *lctx) cxgb_remove_tid(TOEP_T3C_DEV(toep), toep, toep->tp_tid); toepcb_release(toep); } - SOCK_UNLOCK(lctx->lso); + so_unlock(lctx->lso); } int -t3_setup_ppods(struct socket *so, const struct ddp_gather_list *gl, +t3_setup_ppods(struct toepcb *toep, const struct ddp_gather_list *gl, unsigned int nppods, unsigned int tag, unsigned int maxoff, unsigned int pg_off, unsigned int color) { @@ -3930,10 +4069,8 @@ t3_setup_ppods(struct socket *so, const struct ddp_gather_list *gl, struct pagepod *p; struct mbuf *m; struct ulp_mem_io *req; - struct tcpcb *tp = sototcpcb(so); - struct toepcb *toep = tp->t_toe; unsigned int tid = toep->tp_tid; - const struct tom_data *td = TOM_DATA(TOE_DEV(so)); + const struct tom_data *td = TOM_DATA(toep->tp_toedev); unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit; CTR6(KTR_TOM, "t3_setup_ppods(gl=%p nppods=%u tag=%u maxoff=%u pg_off=%u color=%u)", @@ -4024,7 +4161,7 @@ mk_set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid, * Build a CPL_RX_DATA_ACK message as payload of a ULP_TX_PKT command. */ static void -mk_rx_data_ack_ulp(struct socket *so,struct cpl_rx_data_ack *ack, +mk_rx_data_ack_ulp(struct toepcb *toep, struct cpl_rx_data_ack *ack, unsigned int tid, unsigned int credits) { struct ulp_txpkt *txpkt = (struct ulp_txpkt *)ack; @@ -4033,7 +4170,7 @@ mk_rx_data_ack_ulp(struct socket *so,struct cpl_rx_data_ack *ack, txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*ack) / 8)); OPCODE_TID(ack) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid)); ack->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE | - V_RX_DACK_MODE(TOM_TUNABLE(TOE_DEV(so), delack)) | + V_RX_DACK_MODE(TOM_TUNABLE(toep->tp_toedev, delack)) | V_RX_CREDITS(credits)); } @@ -4048,7 +4185,9 @@ t3_cancel_ddpbuf(struct toepcb *toep, unsigned int bufidx) struct cpl_get_tcb *getreq; struct ddp_state *p = &toep->tp_ddp_state; +#if 0 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); +#endif wrlen = sizeof(*wr) + sizeof(*req) + 2 * sizeof(*lock) + sizeof(*getreq); m = m_gethdr_nofail(wrlen); @@ -4126,7 +4265,9 @@ t3_overlay_ddpbuf(struct toepcb *toep, unsigned int bufidx, unsigned int tag0, CTR4(KTR_TCB, "t3_setup_ppods(bufidx=%u tag0=%u tag1=%u len=%u)", bufidx, tag0, tag1, len); +#if 0 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); +#endif wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq); m = m_gethdr_nofail(wrlen); m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); @@ -4202,7 +4343,9 @@ t3_setup_ddpbufs(struct toepcb *toep, unsigned int len0, unsigned int offset0, CTR6(KTR_TCB, "t3_setup_ddpbufs(len0=%u offset0=%u len1=%u offset1=%u ddp_flags=0x%08x%08x ", len0, offset0, len1, offset1, ddp_flags >> 32, ddp_flags & 0xffffffff); +#if 0 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); +#endif wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) + (len1 ? sizeof(*req) : 0) + (modulate ? sizeof(struct cpl_rx_data_ack) : 0); @@ -4236,7 +4379,7 @@ t3_setup_ddpbufs(struct toepcb *toep, unsigned int len0, unsigned int offset0, ddp_flags); if (modulate) { - mk_rx_data_ack_ulp(toeptoso(toep), + mk_rx_data_ack_ulp(toep, (struct cpl_rx_data_ack *)(req + 1), toep->tp_tid, toep->tp_copied_seq - toep->tp_rcv_wup); toep->tp_rcv_wup = toep->tp_copied_seq; diff --git a/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c b/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c index 2f9a92eefa2c..e6cfa59bc926 100644 --- a/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c +++ b/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c @@ -44,7 +44,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include @@ -64,6 +63,9 @@ __FBSDID("$FreeBSD$"); #include #include + +#include + #include #include #include @@ -75,7 +77,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include @@ -93,6 +94,7 @@ __FBSDID("$FreeBSD$"); #include #include + static int (*pru_sosend)(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td); @@ -262,13 +264,13 @@ so_should_ddp(const struct toepcb *toep, int last_recv_len) static inline int is_ddp(const struct mbuf *m) { - return (m->m_flags & M_DDP); + return ((m->m_flags & M_DDP) != 0); } static inline int is_ddp_psh(const struct mbuf *m) { - return is_ddp(m) && (m->m_pkthdr.csum_flags & DDP_BF_PSH); + return ((is_ddp(m) && (m->m_pkthdr.csum_flags & DDP_BF_PSH)) != 0); } static int @@ -398,11 +400,12 @@ t3_sosend(struct socket *so, struct uio *uio) { int rv, count, hold_resid, sent, iovcnt; struct iovec iovtmp[TMP_IOV_MAX], *iovtmpp, *iov; - struct tcpcb *tp = sototcpcb(so); + struct tcpcb *tp = so_sototcpcb(so); struct toepcb *toep = tp->t_toe; struct mbuf *m; struct uio uiotmp; - + struct sockbuf *snd; + /* * Events requiring iteration: * - number of pages exceeds max hold pages for process or system @@ -418,11 +421,12 @@ t3_sosend(struct socket *so, struct uio *uio) iovcnt = uio->uio_iovcnt; iov = uio->uio_iov; sent = 0; + snd = so_sockbuf_snd(so); sendmore: /* * Make sure we don't exceed the socket buffer */ - count = min(toep->tp_page_count, (sbspace(&so->so_snd) >> PAGE_SHIFT) + 2*PAGE_SIZE); + count = min(toep->tp_page_count, (sockbuf_sbspace(snd) >> PAGE_SHIFT) + 2*PAGE_SIZE); rv = cxgb_hold_iovec_pages(&uiotmp, toep->tp_pages, &count, 0); hold_resid = uiotmp.uio_resid; if (rv) @@ -455,7 +459,7 @@ sendmore: } uio->uio_resid -= m->m_pkthdr.len; sent += m->m_pkthdr.len; - sbappend(&so->so_snd, m); + sbappend(snd, m); t3_push_frames(so, TRUE); iov_adj(&uiotmp.uio_iov, &iovcnt, uiotmp.uio_resid); } @@ -487,7 +491,7 @@ static int cxgb_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { - struct tcpcb *tp = sototcpcb(so); + struct tcpcb *tp = so_sototcpcb(so); struct toedev *tdev; int zcopy_thres, zcopy_enabled, rv; @@ -503,13 +507,15 @@ cxgb_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, * - blocking socket XXX for now * */ - if (tp->t_flags & TF_TOE) { - tdev = TOE_DEV(so); + if (tp && tp->t_flags & TF_TOE) { + struct toepcb *toep = tp->t_toe; + + tdev = toep->tp_toedev; zcopy_thres = TOM_TUNABLE(tdev, zcopy_sosend_partial_thres); zcopy_enabled = TOM_TUNABLE(tdev, zcopy_sosend_enabled); if (uio && (uio->uio_resid > zcopy_thres) && - (uio->uio_iovcnt < TMP_IOV_MAX) && ((so->so_state & SS_NBIO) == 0) + (uio->uio_iovcnt < TMP_IOV_MAX) && ((so_state_get(so) & SS_NBIO) == 0) && zcopy_enabled) { rv = t3_sosend(so, uio); if (rv != EAGAIN) @@ -530,8 +536,9 @@ cxgb_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, static __inline void sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord) { - +#ifdef notyet SOCKBUF_LOCK_ASSERT(sb); +#endif /* * First, update for the new value of nextrecord. If necessary, make * it the first record. @@ -554,13 +561,12 @@ sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord) sb->sb_lastrecord = sb->sb_mb; } -#define IS_NONBLOCKING(so) ((so)->so_state & SS_NBIO) - +#define IS_NONBLOCKING(so) (so_state_get(so) & SS_NBIO) static int t3_soreceive(struct socket *so, int *flagsp, struct uio *uio) { - struct tcpcb *tp = sototcpcb(so); + struct tcpcb *tp = so_sototcpcb(so); struct toepcb *toep = tp->t_toe; struct mbuf *m; uint32_t offset; @@ -568,68 +574,83 @@ t3_soreceive(struct socket *so, int *flagsp, struct uio *uio) int target; /* Read at least this many bytes */ int user_ddp_ok; struct ddp_state *p; - struct inpcb *inp = sotoinpcb(so); - + struct inpcb *inp = so_sotoinpcb(so); + int socket_state, socket_error; + struct sockbuf *rcv; + avail = offset = copied = copied_unacked = 0; flags = flagsp ? (*flagsp &~ MSG_EOR) : 0; - err = sblock(&so->so_rcv, SBLOCKWAIT(flags)); + rcv = so_sockbuf_rcv(so); + + err = sblock(rcv, SBLOCKWAIT(flags)); p = &toep->tp_ddp_state; if (err) return (err); - SOCKBUF_LOCK(&so->so_rcv); + rcv = so_sockbuf_rcv(so); + sockbuf_lock(rcv); + if ((tp->t_flags & TF_TOE) == 0) { + sockbuf_unlock(rcv); + err = EAGAIN; + goto done_unlocked; + } + p->user_ddp_pending = 0; restart: + if ((tp->t_flags & TF_TOE) == 0) { + sockbuf_unlock(rcv); + err = EAGAIN; + goto done_unlocked; + } + len = uio->uio_resid; - m = so->so_rcv.sb_mb; - target = (flags & MSG_WAITALL) ? len : so->so_rcv.sb_lowat; + m = rcv->sb_mb; + target = (flags & MSG_WAITALL) ? len : rcv->sb_lowat; user_ddp_ok = p->ubuf_ddp_ready; p->cancel_ubuf = 0; - + if (len == 0) goto done; -#if 0 - while (m && m->m_len == 0) { - so->so_rcv.sb_mb = m_free(m); - m = so->so_rcv.sb_mb; - } -#endif if (m) goto got_mbuf; /* empty receive queue */ - if (copied >= target && (so->so_rcv.sb_mb == NULL) && + if (copied >= target && (rcv->sb_mb == NULL) && !p->user_ddp_pending) goto done; + socket_state = so_state_get(so); + socket_error = so_error_get(so); + rcv = so_sockbuf_rcv(so); + if (copied) { - if (so->so_error || tp->t_state == TCPS_CLOSED || - (so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED))) + if (socket_error || tp->t_state == TCPS_CLOSED || + (socket_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED))) goto done; } else { - if (so->so_state & SS_NOFDREF) + if (socket_state & SS_NOFDREF) goto done; - if (so->so_error) { - err = so->so_error; - so->so_error = 0; + if (socket_error) { + err = socket_error; + socket_error = 0; goto done; } - if (so->so_rcv.sb_state & SBS_CANTRCVMORE) + if (rcv->sb_state & SBS_CANTRCVMORE) goto done; - if (so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) + if (socket_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) goto done; if (tp->t_state == TCPS_CLOSED) { err = ENOTCONN; goto done; } } - if (so->so_rcv.sb_mb && !p->user_ddp_pending) { - SOCKBUF_UNLOCK(&so->so_rcv); + if (rcv->sb_mb && !p->user_ddp_pending) { + sockbuf_unlock(rcv); inp_wlock(inp); t3_cleanup_rbuf(tp, copied_unacked); inp_wunlock(inp); - SOCKBUF_LOCK(&so->so_rcv); + sockbuf_lock(rcv); copied_unacked = 0; goto restart; } @@ -637,14 +658,15 @@ restart: uio->uio_iov->iov_len > p->kbuf[0]->dgl_length && p->ubuf_ddp_ready) { p->user_ddp_pending = - !t3_overlay_ubuf(so, uio, IS_NONBLOCKING(so), flags, 1, 1); + !t3_overlay_ubuf(toep, rcv, uio, + IS_NONBLOCKING(so), flags, 1, 1); if (p->user_ddp_pending) { p->kbuf_posted++; user_ddp_ok = 0; } } if (p->kbuf[0] && (p->kbuf_posted == 0)) { - t3_post_kbuf(so, 1, IS_NONBLOCKING(so)); + t3_post_kbuf(toep, 1, IS_NONBLOCKING(so)); p->kbuf_posted++; } if (p->user_ddp_pending) { @@ -652,8 +674,7 @@ restart: if (copied >= target) user_ddp_ok = 0; - DPRINTF("sbwaiting 1\n"); - if ((err = sbwait(&so->so_rcv)) != 0) + if ((err = sbwait(rcv)) != 0) goto done; //for timers to work await_ddp_completion(sk, flags, &timeo); } else if (copied >= target) @@ -662,26 +683,27 @@ restart: if (copied_unacked) { int i = 0; - SOCKBUF_UNLOCK(&so->so_rcv); + sockbuf_unlock(rcv); inp_wlock(inp); t3_cleanup_rbuf(tp, copied_unacked); inp_wunlock(inp); copied_unacked = 0; if (mp_ncpus > 1) - while (i++ < 200 && so->so_rcv.sb_mb == NULL) + while (i++ < 200 && rcv->sb_mb == NULL) cpu_spinwait(); - SOCKBUF_LOCK(&so->so_rcv); + sockbuf_lock(rcv); } - - if (so->so_rcv.sb_mb) + if (rcv->sb_mb) goto restart; - DPRINTF("sbwaiting 2 copied=%d target=%d avail=%d so=%p mb=%p cc=%d\n", copied, target, avail, so, - so->so_rcv.sb_mb, so->so_rcv.sb_cc); - if ((err = sbwait(&so->so_rcv)) != 0) - goto done; + if ((err = sbwait(rcv)) != 0) + goto done; } goto restart; got_mbuf: + CTR6(KTR_TOM, "t3_soreceive: ddp=%d m_len=%u resid=%u " + "m_seq=0x%08x copied_seq=0x%08x copied_unacked=%u", + is_ddp(m), m->m_pkthdr.len, len, m->m_seq, toep->tp_copied_seq, + copied_unacked); KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d m_pktlen=%d\n", !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len, m->m_pkthdr.len)); KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x m->m_len=%d", m->m_next, m->m_nextpkt, m->m_flags, m->m_len)); @@ -690,17 +712,24 @@ got_mbuf: panic("empty mbuf and NOCOPY not set\n"); CTR0(KTR_TOM, "ddp done notification"); p->user_ddp_pending = 0; - sbdroprecord_locked(&so->so_rcv); + sbdroprecord_locked(rcv); goto done; } - - offset = toep->tp_copied_seq + copied_unacked - m->m_seq; - DPRINTF("m=%p copied_seq=0x%x copied_unacked=%d m_seq=0x%x offset=%d pktlen=%d is_ddp(m)=%d\n", - m, toep->tp_copied_seq, copied_unacked, m->m_seq, offset, m->m_pkthdr.len, !!is_ddp(m)); + + if (is_ddp(m)) { + KASSERT((int32_t)(toep->tp_copied_seq + copied_unacked - m->m_seq) >= 0, + ("offset will go negative: offset=%d copied_seq=0x%08x copied_unacked=%d m_seq=0x%08x", + offset, toep->tp_copied_seq, copied_unacked, m->m_seq)); + + offset = toep->tp_copied_seq + copied_unacked - m->m_seq; + } else + offset = 0; + if (offset >= m->m_pkthdr.len) - panic("t3_soreceive: OFFSET >= LEN offset %d copied_seq 0x%x seq 0x%x " - "pktlen %d ddp flags 0x%x", offset, toep->tp_copied_seq + copied_unacked, m->m_seq, + panic("t3_soreceive: OFFSET >= LEN offset %d copied_seq 0x%x " + "seq 0x%x pktlen %d ddp flags 0x%x", offset, + toep->tp_copied_seq + copied_unacked, m->m_seq, m->m_pkthdr.len, m->m_ddp_flags); avail = m->m_pkthdr.len - offset; @@ -709,7 +738,6 @@ got_mbuf: panic("bad state in t3_soreceive len=%d avail=%d offset=%d\n", len, avail, offset); avail = len; } - CTR4(KTR_TOM, "t3_soreceive: m_len=%u offset=%u len=%u m_seq=0%08x", m->m_pkthdr.len, offset, len, m->m_seq); #ifdef URGENT_DATA_SUPPORTED /* @@ -724,7 +752,7 @@ got_mbuf: if (urg_offset) { /* stop short of the urgent data */ avail = urg_offset; - } else if ((so->so_options & SO_OOBINLINE) == 0) { + } else if ((so_options_get(so) & SO_OOBINLINE) == 0) { /* First byte is urgent, skip */ toep->tp_copied_seq++; offset++; @@ -735,7 +763,7 @@ got_mbuf: } } #endif - if (is_ddp_psh(m) || offset) { + if (is_ddp_psh(m) || offset || (rcv->sb_mb && !is_ddp(m))) { user_ddp_ok = 0; #ifdef T3_TRACE T3_TRACE0(TIDTB(so), "t3_sosend: PSH"); @@ -746,7 +774,8 @@ got_mbuf: uio->uio_iov->iov_len > p->kbuf[0]->dgl_length && p->ubuf_ddp_ready) { p->user_ddp_pending = - !t3_overlay_ubuf(so, uio, IS_NONBLOCKING(so), flags, 1, 1); + !t3_overlay_ubuf(toep, rcv, uio, + IS_NONBLOCKING(so), flags, 1, 1); if (p->user_ddp_pending) { p->kbuf_posted++; user_ddp_ok = 0; @@ -765,16 +794,23 @@ got_mbuf: if (__predict_true(!(flags & MSG_TRUNC))) { int resid = uio->uio_resid; - SOCKBUF_UNLOCK(&so->so_rcv); + sockbuf_unlock(rcv); if ((err = copy_data(m, offset, avail, uio))) { if (err) err = EFAULT; goto done_unlocked; } - SOCKBUF_LOCK(&so->so_rcv); + + sockbuf_lock(rcv); if (avail != (resid - uio->uio_resid)) printf("didn't copy all bytes :-/ avail=%d offset=%d pktlen=%d resid=%d uio_resid=%d copied=%d copied_unacked=%d is_ddp(m)=%d\n", avail, offset, m->m_pkthdr.len, resid, uio->uio_resid, copied, copied_unacked, is_ddp(m)); + + if ((tp->t_flags & TF_TOE) == 0) { + sockbuf_unlock(rcv); + err = EAGAIN; + goto done_unlocked; + } } copied += avail; @@ -816,42 +852,45 @@ skip_copy: while (count > 0) { count -= m->m_len; KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len)); - sbfree(&so->so_rcv, m); - so->so_rcv.sb_mb = m_free(m); - m = so->so_rcv.sb_mb; + sbfree(rcv, m); + rcv->sb_mb = m_free(m); + m = rcv->sb_mb; } - sockbuf_pushsync(&so->so_rcv, nextrecord); + sockbuf_pushsync(rcv, nextrecord); #if 0 - sbdrop_locked(&so->so_rcv, m->m_pkthdr.len); + sbdrop_locked(rcv, m->m_pkthdr.len); #endif exitnow = got_psh || nomoredata; - if (copied >= target && (so->so_rcv.sb_mb == NULL) && exitnow) + if (copied >= target && (rcv->sb_mb == NULL) && exitnow) goto done; - if (copied_unacked > (so->so_rcv.sb_hiwat >> 2)) { - SOCKBUF_UNLOCK(&so->so_rcv); + if (copied_unacked > (rcv->sb_hiwat >> 2)) { + sockbuf_unlock(rcv); inp_wlock(inp); t3_cleanup_rbuf(tp, copied_unacked); inp_wunlock(inp); copied_unacked = 0; - SOCKBUF_LOCK(&so->so_rcv); + sockbuf_lock(rcv); } } if (len > 0) goto restart; done: + if ((tp->t_flags & TF_TOE) == 0) { + sockbuf_unlock(rcv); + err = EAGAIN; + goto done_unlocked; + } /* * If we can still receive decide what to do in preparation for the * next receive. Note that RCV_SHUTDOWN is set if the connection * transitioned to CLOSE but not if it was in that state to begin with. */ - if (__predict_true((so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) == 0)) { + if (__predict_true((so_state_get(so) & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) == 0)) { if (p->user_ddp_pending) { - SOCKBUF_UNLOCK(&so->so_rcv); - SOCKBUF_LOCK(&so->so_rcv); user_ddp_ok = 0; - t3_cancel_ubuf(toep); - if (so->so_rcv.sb_mb) { + t3_cancel_ubuf(toep, rcv); + if (rcv->sb_mb) { if (copied < 0) copied = 0; if (len > 0) @@ -865,11 +904,11 @@ skip_copy: "chelsio_recvmsg: about to exit, repost kbuf"); #endif - t3_post_kbuf(so, 1, IS_NONBLOCKING(so)); + t3_post_kbuf(toep, 1, IS_NONBLOCKING(so)); p->kbuf_posted++; } else if (so_should_ddp(toep, copied) && uio->uio_iovcnt == 1) { CTR1(KTR_TOM ,"entering ddp on tid=%u", toep->tp_tid); - if (!t3_enter_ddp(so, TOM_TUNABLE(TOE_DEV(so), + if (!t3_enter_ddp(toep, TOM_TUNABLE(toep->tp_toedev, ddp_copy_limit), 0, IS_NONBLOCKING(so))) p->kbuf_posted = 1; } @@ -881,14 +920,14 @@ skip_copy: copied, len, buffers_freed, p ? p->kbuf_posted : -1, p->user_ddp_pending); #endif - SOCKBUF_UNLOCK(&so->so_rcv); + sockbuf_unlock(rcv); done_unlocked: - if (copied_unacked) { + if (copied_unacked && (tp->t_flags & TF_TOE)) { inp_wlock(inp); t3_cleanup_rbuf(tp, copied_unacked); inp_wunlock(inp); } - sbunlock(&so->so_rcv); + sbunlock(rcv); return (err); } @@ -899,8 +938,8 @@ cxgb_soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, { struct toedev *tdev; int rv, zcopy_thres, zcopy_enabled, flags; - struct tcpcb *tp = sototcpcb(so); - + struct tcpcb *tp = so_sototcpcb(so); + flags = flagsp ? *flagsp &~ MSG_EOR : 0; /* @@ -916,30 +955,61 @@ cxgb_soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, * - iovcnt is 1 * */ - - if ((tp->t_flags & TF_TOE) && uio && ((flags & (MSG_OOB|MSG_PEEK|MSG_DONTWAIT)) == 0) + if (tp && (tp->t_flags & TF_TOE) && uio && ((flags & (MSG_OOB|MSG_PEEK|MSG_DONTWAIT)) == 0) && (uio->uio_iovcnt == 1) && (mp0 == NULL)) { - tdev = TOE_DEV(so); + struct toepcb *toep = tp->t_toe; + + tdev = toep->tp_toedev; zcopy_thres = TOM_TUNABLE(tdev, ddp_thres); zcopy_enabled = TOM_TUNABLE(tdev, ddp); if ((uio->uio_resid > zcopy_thres) && (uio->uio_iovcnt == 1) && zcopy_enabled) { + CTR3(KTR_CXGB, "cxgb_soreceive: t_flags=0x%x flags=0x%x uio_resid=%d", + tp->t_flags, flags, uio->uio_resid); rv = t3_soreceive(so, flagsp, uio); if (rv != EAGAIN) return (rv); else printf("returned EAGAIN\n"); } - } else if ((tp->t_flags & TF_TOE) && uio && mp0 == NULL) - printf("skipping t3_soreceive flags=0x%x iovcnt=%d sb_state=0x%x\n", - flags, uio->uio_iovcnt, so->so_rcv.sb_state); + } else if (tp && (tp->t_flags & TF_TOE) && uio && mp0 == NULL) { + struct sockbuf *rcv = so_sockbuf_rcv(so); + + log(LOG_INFO, "skipping t3_soreceive flags=0x%x iovcnt=%d sb_state=0x%x\n", + flags, uio->uio_iovcnt, rcv->sb_state); + } + return pru_soreceive(so, psa, uio, mp0, controlp, flagsp); } +struct protosw cxgb_protosw; +struct pr_usrreqs cxgb_tcp_usrreqs; + + void t3_install_socket_ops(struct socket *so) { + static int copied = 0; + struct pr_usrreqs *pru; + struct protosw *psw; + + if (copied == 0) { + psw = so_protosw_get(so); + pru = psw->pr_usrreqs; + + bcopy(psw, &cxgb_protosw, sizeof(*psw)); + bcopy(pru, &cxgb_tcp_usrreqs, sizeof(*pru)); + + cxgb_protosw.pr_ctloutput = t3_ctloutput; + cxgb_protosw.pr_usrreqs = &cxgb_tcp_usrreqs; + cxgb_tcp_usrreqs.pru_sosend = cxgb_sosend; + cxgb_tcp_usrreqs.pru_soreceive = cxgb_soreceive; + } + so_protosw_set(so, &cxgb_protosw); + +#if 0 so->so_proto->pr_usrreqs->pru_sosend = cxgb_sosend; so->so_proto->pr_usrreqs->pru_soreceive = cxgb_soreceive; +#endif } diff --git a/sys/dev/cxgb/ulp/tom/cxgb_ddp.c b/sys/dev/cxgb/ulp/tom/cxgb_ddp.c index 81c73164ba0b..498b40956934 100644 --- a/sys/dev/cxgb/ulp/tom/cxgb_ddp.c +++ b/sys/dev/cxgb/ulp/tom/cxgb_ddp.c @@ -44,7 +44,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include @@ -61,6 +60,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -72,7 +72,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include @@ -90,6 +89,7 @@ __FBSDID("$FreeBSD$"); #include #include + #define MAX_SCHEDULE_TIMEOUT 300 /* @@ -222,13 +222,15 @@ t3_free_ddp_gl(struct ddp_gather_list *gl) * pods before failing entirely. */ static int -alloc_buf1_ppods(struct socket *so, struct ddp_state *p, +alloc_buf1_ppods(struct toepcb *toep, struct ddp_state *p, unsigned long addr, unsigned int len) { int err, tag, npages, nppods; - struct tom_data *d = TOM_DATA(TOE_DEV(so)); + struct tom_data *d = TOM_DATA(toep->tp_toedev); +#if 0 SOCKBUF_LOCK_ASSERT(&so->so_rcv); +#endif npages = ((addr & PAGE_MASK) + len + PAGE_SIZE - 1) >> PAGE_SHIFT; nppods = min(pages2ppods(npages), MAX_PPODS); nppods = roundup2(nppods, PPOD_CLUSTER_SIZE); @@ -243,7 +245,7 @@ alloc_buf1_ppods(struct socket *so, struct ddp_state *p, p->ubuf_nppods = nppods; p->ubuf_tag = tag; #if NUM_DDP_KBUF == 1 - t3_set_ddp_tag(so, 1, tag << 6); + t3_set_ddp_tag(toep, 1, tag << 6); #endif return (0); } @@ -255,7 +257,7 @@ alloc_buf1_ppods(struct socket *so, struct ddp_state *p, #define UBUF_OFFSET 1 static __inline unsigned long -select_ddp_flags(const struct socket *so, int buf_idx, +select_ddp_flags(const struct toepcb *toep, int buf_idx, int nonblock, int rcv_flags) { if (buf_idx == 1) { @@ -266,7 +268,7 @@ select_ddp_flags(const struct socket *so, int buf_idx, if (nonblock) return V_TF_DDP_BUF1_FLUSH(1); - return V_TF_DDP_BUF1_FLUSH(!TOM_TUNABLE(TOE_DEV(so), + return V_TF_DDP_BUF1_FLUSH(!TOM_TUNABLE(toep->tp_toedev, ddp_push_wait)); } @@ -277,7 +279,7 @@ select_ddp_flags(const struct socket *so, int buf_idx, if (nonblock) return V_TF_DDP_BUF0_FLUSH(1); - return V_TF_DDP_BUF0_FLUSH(!TOM_TUNABLE(TOE_DEV(so), ddp_push_wait)); + return V_TF_DDP_BUF0_FLUSH(!TOM_TUNABLE(toep->tp_toedev, ddp_push_wait)); } /* @@ -289,21 +291,22 @@ select_ddp_flags(const struct socket *so, int buf_idx, * needs to be done separately. */ static void -t3_repost_kbuf(struct socket *so, unsigned int bufidx, int modulate, +t3_repost_kbuf(struct toepcb *toep, unsigned int bufidx, int modulate, int activate, int nonblock) { - struct toepcb *toep = sototcpcb(so)->t_toe; struct ddp_state *p = &toep->tp_ddp_state; unsigned long flags; - + +#if 0 SOCKBUF_LOCK_ASSERT(&so->so_rcv); +#endif p->buf_state[bufidx].cur_offset = p->kbuf[bufidx]->dgl_offset; p->buf_state[bufidx].flags = p->kbuf_noinval ? DDP_BF_NOINVAL : 0; p->buf_state[bufidx].gl = p->kbuf[bufidx]; p->cur_buf = bufidx; p->kbuf_idx = bufidx; - flags = select_ddp_flags(so, bufidx, nonblock, 0); + flags = select_ddp_flags(toep, bufidx, nonblock, 0); if (!bufidx) t3_setup_ddpbufs(toep, 0, 0, 0, 0, flags | V_TF_DDP_PSH_NO_INVALIDATE0(p->kbuf_noinval) | @@ -342,19 +345,20 @@ t3_repost_kbuf(struct socket *so, unsigned int bufidx, int modulate, * The current implementation handles iovecs with only one entry. */ static int -setup_uio_ppods(struct socket *so, const struct uio *uio, int oft, int *length) +setup_uio_ppods(struct toepcb *toep, const struct uio *uio, int oft, int *length) { int err; unsigned int len; struct ddp_gather_list *gl = NULL; - struct toepcb *toep = sototcpcb(so)->t_toe; struct ddp_state *p = &toep->tp_ddp_state; struct iovec *iov = uio->uio_iov; vm_offset_t addr = (vm_offset_t)iov->iov_base - oft; +#ifdef notyet SOCKBUF_LOCK_ASSERT(&so->so_rcv); +#endif if (__predict_false(p->ubuf_nppods == 0)) { - err = alloc_buf1_ppods(so, p, addr, iov->iov_len + oft); + err = alloc_buf1_ppods(toep, p, addr, iov->iov_len + oft); if (err) return (err); } @@ -363,7 +367,7 @@ setup_uio_ppods(struct socket *so, const struct uio *uio, int oft, int *length) len -= addr & PAGE_MASK; if (len > M_TCB_RX_DDP_BUF0_LEN) len = M_TCB_RX_DDP_BUF0_LEN; - len = min(len, sototcpcb(so)->rcv_wnd - 32768); + len = min(len, toep->tp_tp->rcv_wnd - 32768); len = min(len, iov->iov_len + oft); if (len <= p->kbuf[0]->dgl_length) { @@ -378,7 +382,7 @@ setup_uio_ppods(struct socket *so, const struct uio *uio, int oft, int *length) if (p->ubuf) t3_free_ddp_gl(p->ubuf); p->ubuf = gl; - t3_setup_ppods(so, gl, pages2ppods(gl->dgl_nelem), p->ubuf_tag, len, + t3_setup_ppods(toep, gl, pages2ppods(gl->dgl_nelem), p->ubuf_tag, len, gl->dgl_offset, 0); } *length = len; @@ -389,26 +393,19 @@ setup_uio_ppods(struct socket *so, const struct uio *uio, int oft, int *length) * */ void -t3_cancel_ubuf(struct toepcb *toep) +t3_cancel_ubuf(struct toepcb *toep, struct sockbuf *rcv) { struct ddp_state *p = &toep->tp_ddp_state; int ubuf_pending = t3_ddp_ubuf_pending(toep); - struct socket *so = toeptoso(toep); - int err = 0, count=0; + int err = 0, count = 0; if (p->ubuf == NULL) return; + + sockbuf_lock_assert(rcv); - SOCKBUF_LOCK_ASSERT(&so->so_rcv); p->cancel_ubuf = 1; - while (ubuf_pending && !(so->so_rcv.sb_state & SBS_CANTRCVMORE)) { -#ifdef T3_TRACE - T3_TRACE3(TB(p), - "t3_cancel_ubuf: flags0 0x%x flags1 0x%x get_tcb_count %d", - p->buf_state[0].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY), - p->buf_state[1].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY), - p->get_tcb_count); -#endif + while (ubuf_pending && !(rcv->sb_state & SBS_CANTRCVMORE)) { CTR3(KTR_TOM, "t3_cancel_ubuf: flags0 0x%x flags1 0x%x get_tcb_count %d", p->buf_state[0].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY), @@ -417,20 +414,22 @@ t3_cancel_ubuf(struct toepcb *toep) if (p->get_tcb_count == 0) t3_cancel_ddpbuf(toep, p->cur_buf); else - CTR5(KTR_TOM, "waiting err=%d get_tcb_count=%d timeo=%d so=%p SBS_CANTRCVMORE=%d", - err, p->get_tcb_count, so->so_rcv.sb_timeo, so, - !!(so->so_rcv.sb_state & SBS_CANTRCVMORE)); + CTR5(KTR_TOM, "waiting err=%d get_tcb_count=%d timeo=%d rcv=%p SBS_CANTRCVMORE=%d", + err, p->get_tcb_count, rcv->sb_timeo, rcv, + !!(rcv->sb_state & SBS_CANTRCVMORE)); - while (p->get_tcb_count && !(so->so_rcv.sb_state & SBS_CANTRCVMORE)) { + while (p->get_tcb_count && !(rcv->sb_state & SBS_CANTRCVMORE)) { if (count & 0xfffffff) - CTR5(KTR_TOM, "waiting err=%d get_tcb_count=%d timeo=%d so=%p count=%d", - err, p->get_tcb_count, so->so_rcv.sb_timeo, so, count); + CTR5(KTR_TOM, "waiting err=%d get_tcb_count=%d timeo=%d rcv=%p count=%d", + err, p->get_tcb_count, rcv->sb_timeo, rcv, count); count++; - err = sbwait(&so->so_rcv); + err = sbwait(rcv); } ubuf_pending = t3_ddp_ubuf_pending(toep); } p->cancel_ubuf = 0; + p->user_ddp_pending = 0; + } #define OVERLAY_MASK (V_TF_DDP_PSH_NO_INVALIDATE0(1) | \ @@ -445,31 +444,34 @@ t3_cancel_ubuf(struct toepcb *toep) * Post a user buffer as an overlay on top of the current kernel buffer. */ int -t3_overlay_ubuf(struct socket *so, const struct uio *uio, - int nonblock, int rcv_flags, int modulate, int post_kbuf) +t3_overlay_ubuf(struct toepcb *toep, struct sockbuf *rcv, + const struct uio *uio, int nonblock, int rcv_flags, + int modulate, int post_kbuf) { int err, len, ubuf_idx; unsigned long flags; - struct toepcb *toep = sototcpcb(so)->t_toe; struct ddp_state *p = &toep->tp_ddp_state; if (p->kbuf[0] == NULL) { return (EINVAL); } - - SOCKBUF_LOCK_ASSERT(&so->so_rcv); - err = setup_uio_ppods(so, uio, 0, &len); - if (err) { + sockbuf_unlock(rcv); + err = setup_uio_ppods(toep, uio, 0, &len); + sockbuf_lock(rcv); + if (err) return (err); - } + if ((rcv->sb_state & SBS_CANTRCVMORE) || + (toep->tp_tp->t_flags & TF_TOE) == 0) + return (EINVAL); + ubuf_idx = p->kbuf_idx; p->buf_state[ubuf_idx].flags = DDP_BF_NOFLIP; /* Use existing offset */ /* Don't need to update .gl, user buffer isn't copied. */ p->cur_buf = ubuf_idx; - flags = select_ddp_flags(so, ubuf_idx, nonblock, rcv_flags); + flags = select_ddp_flags(toep, ubuf_idx, nonblock, rcv_flags); if (post_kbuf) { struct ddp_buf_state *dbs = &p->buf_state[ubuf_idx ^ 1]; @@ -565,14 +567,13 @@ t3_release_ddp_resources(struct toepcb *toep) } void -t3_post_kbuf(struct socket *so, int modulate, int nonblock) +t3_post_kbuf(struct toepcb *toep, int modulate, int nonblock) { - struct toepcb *toep = sototcpcb(so)->t_toe; struct ddp_state *p = &toep->tp_ddp_state; - t3_set_ddp_tag(so, p->cur_buf, p->kbuf_tag[p->cur_buf] << 6); - t3_set_ddp_buf(so, p->cur_buf, 0, p->kbuf[p->cur_buf]->dgl_length); - t3_repost_kbuf(so, p->cur_buf, modulate, 1, nonblock); + t3_set_ddp_tag(toep, p->cur_buf, p->kbuf_tag[p->cur_buf] << 6); + t3_set_ddp_buf(toep, p->cur_buf, 0, p->kbuf[p->cur_buf]->dgl_length); + t3_repost_kbuf(toep, p->cur_buf, modulate, 1, nonblock); #ifdef T3_TRACE T3_TRACE1(TIDTB(so), "t3_post_kbuf: cur_buf = kbuf_idx = %u ", p->cur_buf); @@ -586,12 +587,11 @@ t3_post_kbuf(struct socket *so, int modulate, int nonblock) * open. */ int -t3_enter_ddp(struct socket *so, unsigned int kbuf_size, unsigned int waitall, int nonblock) +t3_enter_ddp(struct toepcb *toep, unsigned int kbuf_size, unsigned int waitall, int nonblock) { int i, err = ENOMEM; static vm_pindex_t color; unsigned int nppods, kbuf_pages, idx = 0; - struct toepcb *toep = sototcpcb(so)->t_toe; struct ddp_state *p = &toep->tp_ddp_state; struct tom_data *d = TOM_DATA(toep->tp_toedev); @@ -599,8 +599,9 @@ t3_enter_ddp(struct socket *so, unsigned int kbuf_size, unsigned int waitall, in if (kbuf_size > M_TCB_RX_DDP_BUF0_LEN) return (EINVAL); +#ifdef notyet SOCKBUF_LOCK_ASSERT(&so->so_rcv); - +#endif kbuf_pages = (kbuf_size + PAGE_SIZE - 1) >> PAGE_SHIFT; nppods = pages2ppods(kbuf_pages); @@ -643,18 +644,18 @@ t3_enter_ddp(struct socket *so, unsigned int kbuf_size, unsigned int waitall, in pci_map_page(p->pdev, p->kbuf[idx]->pages[i], 0, PAGE_SIZE, PCI_DMA_FROMDEVICE); #endif - t3_setup_ppods(so, p->kbuf[idx], nppods, p->kbuf_tag[idx], + t3_setup_ppods(toep, p->kbuf[idx], nppods, p->kbuf_tag[idx], p->kbuf[idx]->dgl_length, 0, 0); } cxgb_log_tcb(TOEP_T3C_DEV(toep)->adapter, toep->tp_tid); - t3_set_ddp_tag(so, 0, p->kbuf_tag[0] << 6); - t3_set_ddp_buf(so, 0, 0, p->kbuf[0]->dgl_length); - t3_repost_kbuf(so, 0, 0, 1, nonblock); + t3_set_ddp_tag(toep, 0, p->kbuf_tag[0] << 6); + t3_set_ddp_buf(toep, 0, 0, p->kbuf[0]->dgl_length); + t3_repost_kbuf(toep, 0, 0, 1, nonblock); - t3_set_rcv_coalesce_enable(so, - TOM_TUNABLE(TOE_DEV(so), ddp_rcvcoalesce)); - t3_set_dack_mss(so, TOM_TUNABLE(TOE_DEV(so), delack)>>1); + t3_set_rcv_coalesce_enable(toep, + TOM_TUNABLE(toep->tp_toedev, ddp_rcvcoalesce)); + t3_set_dack_mss(toep, TOM_TUNABLE(toep->tp_toedev, delack)>>1); #ifdef T3_TRACE T3_TRACE4(TIDTB(so), @@ -664,7 +665,6 @@ t3_enter_ddp(struct socket *so, unsigned int kbuf_size, unsigned int waitall, in CTR4(KTR_TOM, "t3_enter_ddp: kbuf_size %u waitall %u tag0 %d tag1 %d", kbuf_size, waitall, p->kbuf_tag[0], p->kbuf_tag[1]); - DELAY(100000); cxgb_log_tcb(TOEP_T3C_DEV(toep)->adapter, toep->tp_tid); return (0); diff --git a/sys/dev/cxgb/ulp/tom/cxgb_defs.h b/sys/dev/cxgb/ulp/tom/cxgb_defs.h index 2800a853e885..8c14f5ae89c8 100644 --- a/sys/dev/cxgb/ulp/tom/cxgb_defs.h +++ b/sys/dev/cxgb/ulp/tom/cxgb_defs.h @@ -77,12 +77,14 @@ void toepcb_hold(struct toepcb *); void toepcb_release(struct toepcb *); void toepcb_init(struct toepcb *); -void t3_set_rcv_coalesce_enable(struct socket *so, int on_off); -void t3_set_dack_mss(struct socket *so, int on); -void t3_set_keepalive(struct socket *so, int on_off); -void t3_set_ddp_tag(struct socket *so, int buf_idx, unsigned int tag); -void t3_set_ddp_buf(struct socket *so, int buf_idx, unsigned int offset, +void t3_set_rcv_coalesce_enable(struct toepcb *toep, int on_off); +void t3_set_dack_mss(struct toepcb *toep, int on); +void t3_set_keepalive(struct toepcb *toep, int on_off); +void t3_set_ddp_tag(struct toepcb *toep, int buf_idx, unsigned int tag); +void t3_set_ddp_buf(struct toepcb *toep, int buf_idx, unsigned int offset, unsigned int len); -int t3_get_tcb(struct socket *so); +int t3_get_tcb(struct toepcb *toep); + +int t3_ctloutput(struct socket *so, struct sockopt *sopt); #endif diff --git a/sys/dev/cxgb/cxgb_l2t.c b/sys/dev/cxgb/ulp/tom/cxgb_l2t.c similarity index 99% rename from sys/dev/cxgb/cxgb_l2t.c rename to sys/dev/cxgb/ulp/tom/cxgb_l2t.c index 43d09f2510e3..ab5fbe740114 100644 --- a/sys/dev/cxgb/cxgb_l2t.c +++ b/sys/dev/cxgb/ulp/tom/cxgb_l2t.c @@ -540,4 +540,3 @@ t3_free_l2t(struct l2t_data *d) cxgb_free_mem(d); } - diff --git a/sys/dev/cxgb/cxgb_l2t.h b/sys/dev/cxgb/ulp/tom/cxgb_l2t.h similarity index 99% rename from sys/dev/cxgb/cxgb_l2t.h rename to sys/dev/cxgb/ulp/tom/cxgb_l2t.h index 954d02ad1a68..3575f6fa98b1 100644 --- a/sys/dev/cxgb/cxgb_l2t.h +++ b/sys/dev/cxgb/ulp/tom/cxgb_l2t.h @@ -1,6 +1,6 @@ /************************************************************************** -Copyright (c) 2007, Chelsio Inc. +Copyright (c) 2007-2008, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/sys/dev/cxgb/ulp/tom/cxgb_listen.c b/sys/dev/cxgb/ulp/tom/cxgb_listen.c index acbad6f26318..1d15cf292dcd 100644 --- a/sys/dev/cxgb/ulp/tom/cxgb_listen.c +++ b/sys/dev/cxgb/ulp/tom/cxgb_listen.c @@ -65,7 +65,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include diff --git a/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h b/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h index e37c9b1240d7..2cbfa7b38b28 100644 --- a/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h +++ b/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h @@ -109,13 +109,13 @@ struct ddp_state { unsigned short kbuf_noinval; unsigned short kbuf_idx; /* which HW buffer is used for kbuf */ struct ddp_gather_list *ubuf; + int user_ddp_pending; unsigned int ubuf_nppods; /* # of page pods for buffer 1 */ unsigned int ubuf_tag; unsigned int ubuf_ddp_ready; + int cancel_ubuf; int get_tcb_count; unsigned int kbuf_posted; - int cancel_ubuf; - int user_ddp_pending; unsigned int kbuf_nppods[NUM_DDP_KBUF]; unsigned int kbuf_tag[NUM_DDP_KBUF]; struct ddp_gather_list *kbuf[NUM_DDP_KBUF]; /* kernel buffer for DDP prefetch */ @@ -133,6 +133,7 @@ enum { }; #include +struct sockbuf; /* * Returns 1 if a UBUF DMA buffer might be active. @@ -153,7 +154,7 @@ t3_ddp_ubuf_pending(struct toepcb *toep) (p->buf_state[1].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY)); } -int t3_setup_ppods(struct socket *so, const struct ddp_gather_list *gl, +int t3_setup_ppods(struct toepcb *toep, const struct ddp_gather_list *gl, unsigned int nppods, unsigned int tag, unsigned int maxoff, unsigned int pg_off, unsigned int color); int t3_alloc_ppods(struct tom_data *td, unsigned int n, int *tag); @@ -161,13 +162,14 @@ void t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n); void t3_free_ddp_gl(struct ddp_gather_list *gl); int t3_ddp_copy(const struct mbuf *m, int offset, struct uio *uio, int len); //void t3_repost_kbuf(struct socket *so, int modulate, int activate); -void t3_post_kbuf(struct socket *so, int modulate, int nonblock); -int t3_post_ubuf(struct socket *so, const struct uio *uio, int nonblock, +void t3_post_kbuf(struct toepcb *toep, int modulate, int nonblock); +int t3_post_ubuf(struct toepcb *toep, const struct uio *uio, int nonblock, int rcv_flags, int modulate, int post_kbuf); -void t3_cancel_ubuf(struct toepcb *toep); -int t3_overlay_ubuf(struct socket *so, const struct uio *uio, int nonblock, +void t3_cancel_ubuf(struct toepcb *toep, struct sockbuf *rcv); +int t3_overlay_ubuf(struct toepcb *toep, struct sockbuf *rcv, + const struct uio *uio, int nonblock, int rcv_flags, int modulate, int post_kbuf); -int t3_enter_ddp(struct socket *so, unsigned int kbuf_size, unsigned int waitall, int nonblock); +int t3_enter_ddp(struct toepcb *toep, unsigned int kbuf_size, unsigned int waitall, int nonblock); void t3_cleanup_ddp(struct toepcb *toep); void t3_release_ddp_resources(struct toepcb *toep); void t3_cancel_ddpbuf(struct toepcb *, unsigned int bufidx); diff --git a/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.c b/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.c new file mode 100644 index 000000000000..1d9684291e16 --- /dev/null +++ b/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.c @@ -0,0 +1,360 @@ +/*- + * Copyright (c) 2007, Chelsio Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Neither the name of the Chelsio Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +/* + * grab bag of accessor routines that will either be moved to netinet + * or removed + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +/* + * This file contains code as a short-term staging area before it is moved in + * to sys/netinet/tcp_offload.c + */ + +void +tcp_offload_twstart(struct tcpcb *tp) +{ + + INP_INFO_WLOCK(&tcbinfo); + inp_wlock(tp->t_inpcb); + tcp_twstart(tp); + INP_INFO_WUNLOCK(&tcbinfo); +} + +void +tcp_offload_twstart_disconnect(struct tcpcb *tp) +{ + struct socket *so; + + INP_INFO_WLOCK(&tcbinfo); + inp_wlock(tp->t_inpcb); + so = tp->t_inpcb->inp_socket; + tcp_twstart(tp); + if (so) + soisdisconnected(so); + INP_INFO_WUNLOCK(&tcbinfo); +} + +struct tcpcb * +tcp_offload_close(struct tcpcb *tp) +{ + + INP_INFO_WLOCK(&tcbinfo); + INP_WLOCK(tp->t_inpcb); + tp = tcp_close(tp); + INP_INFO_WUNLOCK(&tcbinfo); + if (tp) + INP_WUNLOCK(tp->t_inpcb); + + return (tp); +} + +struct tcpcb * +tcp_offload_drop(struct tcpcb *tp, int error) +{ + + INP_INFO_WLOCK(&tcbinfo); + INP_WLOCK(tp->t_inpcb); + tp = tcp_drop(tp, error); + INP_INFO_WUNLOCK(&tcbinfo); + if (tp) + INP_WUNLOCK(tp->t_inpcb); + + return (tp); +} + +void +inp_apply_all(void (*func)(struct inpcb *, void *), void *arg) +{ + struct inpcb *inp; + + INP_INFO_RLOCK(&tcbinfo); + LIST_FOREACH(inp, tcbinfo.ipi_listhead, inp_list) { + INP_WLOCK(inp); + func(inp, arg); + INP_WUNLOCK(inp); + } + INP_INFO_RUNLOCK(&tcbinfo); +} + +struct socket * +inp_inpcbtosocket(struct inpcb *inp) +{ + + INP_WLOCK_ASSERT(inp); + return (inp->inp_socket); +} + +struct tcpcb * +inp_inpcbtotcpcb(struct inpcb *inp) +{ + + INP_WLOCK_ASSERT(inp); + return ((struct tcpcb *)inp->inp_ppcb); +} + +int +inp_ip_tos_get(const struct inpcb *inp) +{ + + return (inp->inp_ip_tos); +} + +void +inp_ip_tos_set(struct inpcb *inp, int val) +{ + + inp->inp_ip_tos = val; +} + +void +inp_4tuple_get(const struct inpcb *inp, uint32_t *laddr, uint16_t *lp, uint32_t *faddr, uint16_t *fp) +{ + + memcpy(laddr, &inp->inp_laddr, 4); + memcpy(faddr, &inp->inp_faddr, 4); + + *lp = inp->inp_lport; + *fp = inp->inp_fport; +} + +void +so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *), void *arg) +{ + + TAILQ_FOREACH(so, &so->so_comp, so_list) + func(so, arg); +} + +struct tcpcb * +so_sototcpcb(struct socket *so) +{ + + return (sototcpcb(so)); +} + +struct inpcb * +so_sotoinpcb(struct socket *so) +{ + + return (sotoinpcb(so)); +} + +struct sockbuf * +so_sockbuf_rcv(struct socket *so) +{ + + return (&so->so_rcv); +} + +struct sockbuf * +so_sockbuf_snd(struct socket *so) +{ + + return (&so->so_snd); +} + +int +so_state_get(const struct socket *so) +{ + + return (so->so_state); +} + +void +so_state_set(struct socket *so, int val) +{ + + so->so_state = val; +} + +int +so_options_get(const struct socket *so) +{ + + return (so->so_options); +} + +void +so_options_set(struct socket *so, int val) +{ + + so->so_options = val; +} + +int +so_error_get(const struct socket *so) +{ + + return (so->so_error); +} + +void +so_error_set(struct socket *so, int val) +{ + + so->so_error = val; +} + +int +so_linger_get(const struct socket *so) +{ + + return (so->so_linger); +} + +void +so_linger_set(struct socket *so, int val) +{ + + so->so_linger = val; +} + +struct protosw * +so_protosw_get(const struct socket *so) +{ + + return (so->so_proto); +} + +void +so_protosw_set(struct socket *so, struct protosw *val) +{ + + so->so_proto = val; +} + +void +so_sorwakeup(struct socket *so) +{ + + sorwakeup(so); +} + +void +so_sowwakeup(struct socket *so) +{ + + sowwakeup(so); +} + +void +so_sorwakeup_locked(struct socket *so) +{ + + sorwakeup_locked(so); +} + +void +so_sowwakeup_locked(struct socket *so) +{ + + sowwakeup_locked(so); +} + +void +so_lock(struct socket *so) +{ + SOCK_LOCK(so); +} + +void +so_unlock(struct socket *so) +{ + SOCK_UNLOCK(so); +} + +void +sockbuf_lock(struct sockbuf *sb) +{ + + SOCKBUF_LOCK(sb); +} + +void +sockbuf_lock_assert(struct sockbuf *sb) +{ + + SOCKBUF_LOCK_ASSERT(sb); +} + +void +sockbuf_unlock(struct sockbuf *sb) +{ + + SOCKBUF_UNLOCK(sb); +} + +int +sockbuf_sbspace(struct sockbuf *sb) +{ + + return (sbspace(sb)); +} + +int +syncache_offload_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, + struct socket **lsop, struct mbuf *m) +{ + int rc; + + INP_INFO_WLOCK(&tcbinfo); + rc = syncache_expand(inc, to, th, lsop, m); + INP_INFO_WUNLOCK(&tcbinfo); + + return (rc); +} diff --git a/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h b/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h new file mode 100644 index 000000000000..a1a4bf356361 --- /dev/null +++ b/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h @@ -0,0 +1,205 @@ +/* $FreeBSD$ */ + +#ifndef CXGB_TCP_OFFLOAD_H_ +#define CXGB_TCP_OFFLOAD_H_ + +struct tcpcb; +struct socket; +struct sockbuf; + +void tcp_offload_twstart(struct tcpcb *tp); +void tcp_offload_twstart_disconnect(struct tcpcb *tp); +struct tcpcb *tcp_offload_close(struct tcpcb *tp); +struct tcpcb *tcp_offload_drop(struct tcpcb *tp, int error); + +void inp_apply_all(void (*func)(struct inpcb *, void *), void *arg); +struct socket *inp_inpcbtosocket(struct inpcb *inp); +struct tcpcb *inp_inpcbtotcpcb(struct inpcb *inp); + +int inp_ip_tos_get(const struct inpcb *); +void inp_ip_tos_set(struct inpcb *, int); +void inp_4tuple_get(const struct inpcb *inp, uint32_t *, uint16_t *, uint32_t *, uint16_t *); + +struct tcpcb *so_sototcpcb(struct socket *so); +struct inpcb *so_sotoinpcb(struct socket *so); +struct sockbuf *so_sockbuf_snd(struct socket *); +struct sockbuf *so_sockbuf_rcv(struct socket *); + +int so_state_get(const struct socket *); +void so_state_set(struct socket *, int); + +int so_options_get(const struct socket *); +void so_options_set(struct socket *, int); + +int so_error_get(const struct socket *); +void so_error_set(struct socket *, int); + +int so_linger_get(const struct socket *); +void so_linger_set(struct socket *, int); + +struct protosw *so_protosw_get(const struct socket *); +void so_protosw_set(struct socket *, struct protosw *); + +void so_sorwakeup_locked(struct socket *so); +void so_sowwakeup_locked(struct socket *so); + +void so_sorwakeup(struct socket *so); +void so_sowwakeup(struct socket *so); + +void so_lock(struct socket *so); +void so_unlock(struct socket *so); + +void so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *), void *arg); + + +void sockbuf_lock(struct sockbuf *); +void sockbuf_lock_assert(struct sockbuf *); +void sockbuf_unlock(struct sockbuf *); +int sockbuf_sbspace(struct sockbuf *); + +struct tcphdr; +struct tcpopt; + +int syncache_offload_expand(struct in_conninfo *, struct tcpopt *, + struct tcphdr *, struct socket **, struct mbuf *); + +#ifndef _SYS_SOCKETVAR_H_ +#include +#include + +/* + * Constants for sb_flags field of struct sockbuf. + */ +#define SB_MAX (256*1024) /* default for max chars in sockbuf */ +/* + * Constants for sb_flags field of struct sockbuf. + */ +#define SB_WAIT 0x04 /* someone is waiting for data/space */ +#define SB_SEL 0x08 /* someone is selecting */ +#define SB_ASYNC 0x10 /* ASYNC I/O, need signals */ +#define SB_UPCALL 0x20 /* someone wants an upcall */ +#define SB_NOINTR 0x40 /* operations not interruptible */ +#define SB_AIO 0x80 /* AIO operations queued */ +#define SB_KNOTE 0x100 /* kernel note attached */ +#define SB_NOCOALESCE 0x200 /* don't coalesce new data into existing mbufs */ +#define SB_AUTOSIZE 0x800 /* automatically size socket buffer */ + + +struct sockbuf { + struct selinfo sb_sel; /* process selecting read/write */ + struct mtx sb_mtx; /* sockbuf lock */ + struct sx sb_sx; /* prevent I/O interlacing */ + short sb_state; /* (c/d) socket state on sockbuf */ +#define sb_startzero sb_mb + struct mbuf *sb_mb; /* (c/d) the mbuf chain */ + struct mbuf *sb_mbtail; /* (c/d) the last mbuf in the chain */ + struct mbuf *sb_lastrecord; /* (c/d) first mbuf of last + * record in socket buffer */ + struct mbuf *sb_sndptr; /* (c/d) pointer into mbuf chain */ + u_int sb_sndptroff; /* (c/d) byte offset of ptr into chain */ + u_int sb_cc; /* (c/d) actual chars in buffer */ + u_int sb_hiwat; /* (c/d) max actual char count */ + u_int sb_mbcnt; /* (c/d) chars of mbufs used */ + u_int sb_mbmax; /* (c/d) max chars of mbufs to use */ + u_int sb_ctl; /* (c/d) non-data chars in buffer */ + int sb_lowat; /* (c/d) low water mark */ + int sb_timeo; /* (c/d) timeout for read/write */ + short sb_flags; /* (c/d) flags, see below */ +}; + +void sbappend(struct sockbuf *sb, struct mbuf *m); +void sbappend_locked(struct sockbuf *sb, struct mbuf *m); +void sbappendstream(struct sockbuf *sb, struct mbuf *m); +void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m); +void sbdrop(struct sockbuf *sb, int len); +void sbdrop_locked(struct sockbuf *sb, int len); +void sbdroprecord(struct sockbuf *sb); +void sbdroprecord_locked(struct sockbuf *sb); +void sbflush(struct sockbuf *sb); +void sbflush_locked(struct sockbuf *sb); +int sbwait(struct sockbuf *sb); +int sblock(struct sockbuf *, int); +void sbunlock(struct sockbuf *); + + + +/* adjust counters in sb reflecting allocation of m */ +#define sballoc(sb, m) { \ + (sb)->sb_cc += (m)->m_len; \ + if ((m)->m_type != MT_DATA && (m)->m_type != MT_OOBDATA) \ + (sb)->sb_ctl += (m)->m_len; \ + (sb)->sb_mbcnt += MSIZE; \ + if ((m)->m_flags & M_EXT) \ + (sb)->sb_mbcnt += (m)->m_ext.ext_size; \ +} + +/* adjust counters in sb reflecting freeing of m */ +#define sbfree(sb, m) { \ + (sb)->sb_cc -= (m)->m_len; \ + if ((m)->m_type != MT_DATA && (m)->m_type != MT_OOBDATA) \ + (sb)->sb_ctl -= (m)->m_len; \ + (sb)->sb_mbcnt -= MSIZE; \ + if ((m)->m_flags & M_EXT) \ + (sb)->sb_mbcnt -= (m)->m_ext.ext_size; \ + if ((sb)->sb_sndptr == (m)) { \ + (sb)->sb_sndptr = NULL; \ + (sb)->sb_sndptroff = 0; \ + } \ + if ((sb)->sb_sndptroff != 0) \ + (sb)->sb_sndptroff -= (m)->m_len; \ +} + +#define SS_NOFDREF 0x0001 /* no file table ref any more */ +#define SS_ISCONNECTED 0x0002 /* socket connected to a peer */ +#define SS_ISCONNECTING 0x0004 /* in process of connecting to peer */ +#define SS_ISDISCONNECTING 0x0008 /* in process of disconnecting */ +#define SS_NBIO 0x0100 /* non-blocking ops */ +#define SS_ASYNC 0x0200 /* async i/o notify */ +#define SS_ISCONFIRMING 0x0400 /* deciding to accept connection req */ +#define SS_ISDISCONNECTED 0x2000 /* socket disconnected from peer */ +/* + * Protocols can mark a socket as SS_PROTOREF to indicate that, following + * pru_detach, they still want the socket to persist, and will free it + * themselves when they are done. Protocols should only ever call sofree() + * following setting this flag in pru_detach(), and never otherwise, as + * sofree() bypasses socket reference counting. + */ +#define SS_PROTOREF 0x4000 /* strong protocol reference */ + +/* + * Socket state bits now stored in the socket buffer state field. + */ +#define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */ +#define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */ +#define SBS_RCVATMARK 0x0040 /* at mark on input */ + + + +enum sopt_dir { SOPT_GET, SOPT_SET }; +struct sockopt { + enum sopt_dir sopt_dir; /* is this a get or a set? */ + int sopt_level; /* second arg of [gs]etsockopt */ + int sopt_name; /* third arg of [gs]etsockopt */ + void *sopt_val; /* fourth arg of [gs]etsockopt */ + size_t sopt_valsize; /* (almost) fifth arg of [gs]etsockopt */ + struct thread *sopt_td; /* calling thread or null if kernel */ +}; + + +int sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen); +int sooptcopyout(struct sockopt *sopt, const void *buf, size_t len); + + +void soisconnected(struct socket *so); +void soisconnecting(struct socket *so); +void soisdisconnected(struct socket *so); +void soisdisconnecting(struct socket *so); +void socantrcvmore(struct socket *so); +void socantrcvmore_locked(struct socket *so); +void socantsendmore(struct socket *so); +void socantsendmore_locked(struct socket *so); + +#endif /* !NET_CORE */ + + +#endif /* CXGB_TCP_OFFLOAD_H_ */ diff --git a/sys/dev/cxgb/ulp/tom/cxgb_tom.c b/sys/dev/cxgb/ulp/tom/cxgb_tom.c index 4f80fefb82b8..751b1cd0b051 100644 --- a/sys/dev/cxgb/ulp/tom/cxgb_tom.c +++ b/sys/dev/cxgb/ulp/tom/cxgb_tom.c @@ -45,7 +45,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include @@ -60,12 +59,23 @@ __FBSDID("$FreeBSD$"); #include #include + +#include #include #include #include #include + +#ifdef CONFIG_DEFINED +#include +#else +#include +#endif + +#include #include + #include #include #include @@ -73,7 +83,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include @@ -82,17 +91,27 @@ __FBSDID("$FreeBSD$"); #include + + + static int activated = 1; TUNABLE_INT("hw.t3toe.activated", &activated); SYSCTL_NODE(_hw, OID_AUTO, t3toe, CTLFLAG_RD, 0, "T3 toe driver parameters"); SYSCTL_UINT(_hw_t3toe, OID_AUTO, activated, CTLFLAG_RDTUN, &activated, 0, "enable TOE at init time"); + +TAILQ_HEAD(, adapter) adapter_list; +static struct rwlock adapter_list_lock; + static TAILQ_HEAD(, tom_data) cxgb_list; static struct mtx cxgb_list_lock; +static const unsigned int MAX_ATIDS = 64 * 1024; +static const unsigned int ATID_BASE = 0x100000; static int t3_toe_attach(struct toedev *dev, const struct offload_id *entry); static void cxgb_register_listeners(void); +static void t3c_tom_add(struct t3cdev *cdev); /* * Handlers for each CPL opcode @@ -117,6 +136,7 @@ static struct tom_info t3_tom_info = { struct cxgb_client t3c_tom_client = { .name = "tom_cxgb3", + .add = t3c_tom_add, .remove = NULL, .handlers = tom_cpl_handlers, .redirect = NULL @@ -144,7 +164,7 @@ toepcb_alloc(void) { struct toepcb *toep; - toep = malloc(sizeof(struct toepcb), M_DEVBUF, M_NOWAIT|M_ZERO); + toep = malloc(sizeof(struct toepcb), M_CXGB, M_NOWAIT|M_ZERO); if (toep == NULL) return (NULL); @@ -170,18 +190,19 @@ void toepcb_release(struct toepcb *toep) { if (toep->tp_refcount == 1) { - free(toep, M_DEVBUF); + free(toep, M_CXGB); return; } atomic_add_acq_int(&toep->tp_refcount, -1); } + /* * Add a T3 offload device to the list of devices we are managing. */ static void t3cdev_add(struct tom_data *t) -{ +{ mtx_lock(&cxgb_list_lock); TAILQ_INSERT_TAIL(&cxgb_list, t, entry); mtx_unlock(&cxgb_list_lock); @@ -206,6 +227,483 @@ cdev2type(struct t3cdev *cdev) return (type); } +/* + * Allocate and initialize the TID tables. Returns 0 on success. + */ +static int +init_tid_tabs(struct tid_info *t, unsigned int ntids, + unsigned int natids, unsigned int nstids, + unsigned int atid_base, unsigned int stid_base) +{ + unsigned long size = ntids * sizeof(*t->tid_tab) + + natids * sizeof(*t->atid_tab) + nstids * sizeof(*t->stid_tab); + + t->tid_tab = cxgb_alloc_mem(size); + if (!t->tid_tab) + return (ENOMEM); + + t->stid_tab = (union listen_entry *)&t->tid_tab[ntids]; + t->atid_tab = (union active_open_entry *)&t->stid_tab[nstids]; + t->ntids = ntids; + t->nstids = nstids; + t->stid_base = stid_base; + t->sfree = NULL; + t->natids = natids; + t->atid_base = atid_base; + t->afree = NULL; + t->stids_in_use = t->atids_in_use = 0; + atomic_set_int(&t->tids_in_use, 0); + mtx_init(&t->stid_lock, "stid", NULL, MTX_DUPOK|MTX_DEF); + mtx_init(&t->atid_lock, "atid", NULL, MTX_DUPOK|MTX_DEF); + + /* + * Setup the free lists for stid_tab and atid_tab. + */ + if (nstids) { + while (--nstids) + t->stid_tab[nstids - 1].next = &t->stid_tab[nstids]; + t->sfree = t->stid_tab; + } + if (natids) { + while (--natids) + t->atid_tab[natids - 1].next = &t->atid_tab[natids]; + t->afree = t->atid_tab; + } + return 0; +} + +static void +free_tid_maps(struct tid_info *t) +{ + mtx_destroy(&t->stid_lock); + mtx_destroy(&t->atid_lock); + cxgb_free_mem(t->tid_tab); +} + +static inline void +add_adapter(adapter_t *adap) +{ + rw_wlock(&adapter_list_lock); + TAILQ_INSERT_TAIL(&adapter_list, adap, adapter_entry); + rw_wunlock(&adapter_list_lock); +} + +static inline void +remove_adapter(adapter_t *adap) +{ + rw_wlock(&adapter_list_lock); + TAILQ_REMOVE(&adapter_list, adap, adapter_entry); + rw_wunlock(&adapter_list_lock); +} + +/* + * Populate a TID_RELEASE WR. The mbuf must be already propely sized. + */ +static inline void +mk_tid_release(struct mbuf *m, unsigned int tid) +{ + struct cpl_tid_release *req; + + m_set_priority(m, CPL_PRIORITY_SETUP); + req = mtod(m, struct cpl_tid_release *); + m->m_pkthdr.len = m->m_len = sizeof(*req); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid)); +} + +static void +t3_process_tid_release_list(void *data, int pending) +{ + struct mbuf *m; + struct t3cdev *tdev = data; + struct t3c_data *td = T3C_DATA (tdev); + + mtx_lock(&td->tid_release_lock); + while (td->tid_release_list) { + struct toe_tid_entry *p = td->tid_release_list; + + td->tid_release_list = (struct toe_tid_entry *)p->ctx; + mtx_unlock(&td->tid_release_lock); + m = m_get(M_WAIT, MT_DATA); + mk_tid_release(m, p - td->tid_maps.tid_tab); + cxgb_ofld_send(tdev, m); + p->ctx = NULL; + mtx_lock(&td->tid_release_lock); + } + mtx_unlock(&td->tid_release_lock); +} + +int +cxgb_offload_activate(struct adapter *adapter) +{ + struct t3cdev *dev = &adapter->tdev; + int natids, err; + struct t3c_data *t; + struct tid_range stid_range, tid_range; + struct mtutab mtutab; + unsigned int l2t_capacity; + + t = malloc(sizeof(*t), M_CXGB, M_NOWAIT|M_ZERO); + if (!t) + return (ENOMEM); + dev->adapter = adapter; + + err = (EOPNOTSUPP); + if (dev->ctl(dev, GET_TX_MAX_CHUNK, &t->tx_max_chunk) < 0 || + dev->ctl(dev, GET_MAX_OUTSTANDING_WR, &t->max_wrs) < 0 || + dev->ctl(dev, GET_L2T_CAPACITY, &l2t_capacity) < 0 || + dev->ctl(dev, GET_MTUS, &mtutab) < 0 || + dev->ctl(dev, GET_TID_RANGE, &tid_range) < 0 || + dev->ctl(dev, GET_STID_RANGE, &stid_range) < 0) { + device_printf(adapter->dev, "%s: dev->ctl check failed\n", __FUNCTION__); + goto out_free; + } + + err = (ENOMEM); + L2DATA(dev) = t3_init_l2t(l2t_capacity); + if (!L2DATA(dev)) { + device_printf(adapter->dev, "%s: t3_init_l2t failed\n", __FUNCTION__); + goto out_free; + } + natids = min(tid_range.num / 2, MAX_ATIDS); + err = init_tid_tabs(&t->tid_maps, tid_range.num, natids, + stid_range.num, ATID_BASE, stid_range.base); + if (err) { + device_printf(adapter->dev, "%s: init_tid_tabs failed\n", __FUNCTION__); + goto out_free_l2t; + } + + t->mtus = mtutab.mtus; + t->nmtus = mtutab.size; + + TASK_INIT(&t->tid_release_task, 0 /* XXX? */, t3_process_tid_release_list, dev); + mtx_init(&t->tid_release_lock, "tid release", NULL, MTX_DUPOK|MTX_DEF); + t->dev = dev; + + T3C_DATA (dev) = t; + dev->recv = process_rx; + dev->arp_update = t3_l2t_update; + /* Register netevent handler once */ + if (TAILQ_EMPTY(&adapter_list)) { +#if defined(CONFIG_CHELSIO_T3_MODULE) + if (prepare_arp_with_t3core()) + log(LOG_ERR, "Unable to set offload capabilities\n"); +#endif + } + CTR1(KTR_CXGB, "adding adapter %p", adapter); + add_adapter(adapter); + device_printf(adapter->dev, "offload started\n"); + adapter->flags |= CXGB_OFLD_INIT; + return (0); + +out_free_l2t: + t3_free_l2t(L2DATA(dev)); + L2DATA(dev) = NULL; +out_free: + free(t, M_CXGB); + return (err); +} + +void +cxgb_offload_deactivate(struct adapter *adapter) +{ + struct t3cdev *tdev = &adapter->tdev; + struct t3c_data *t = T3C_DATA(tdev); + + printf("removing adapter %p\n", adapter); + remove_adapter(adapter); + if (TAILQ_EMPTY(&adapter_list)) { +#if defined(CONFIG_CHELSIO_T3_MODULE) + restore_arp_sans_t3core(); +#endif + } + free_tid_maps(&t->tid_maps); + T3C_DATA(tdev) = NULL; + t3_free_l2t(L2DATA(tdev)); + L2DATA(tdev) = NULL; + mtx_destroy(&t->tid_release_lock); + free(t, M_CXGB); +} + +/* + * Sends an sk_buff to a T3C driver after dealing with any active network taps. + */ +int +cxgb_ofld_send(struct t3cdev *dev, struct mbuf *m) +{ + int r; + + r = dev->send(dev, m); + return r; +} + +static struct ifnet * +get_iff_from_mac(adapter_t *adapter, const uint8_t *mac, unsigned int vlan) +{ + int i; + + for_each_port(adapter, i) { +#ifdef notyet + const struct vlan_group *grp; +#endif + const struct port_info *p = &adapter->port[i]; + struct ifnet *ifp = p->ifp; + + if (!memcmp(p->hw_addr, mac, ETHER_ADDR_LEN)) { +#ifdef notyet + + if (vlan && vlan != EVL_VLID_MASK) { + grp = p->vlan_grp; + dev = grp ? grp->vlan_devices[vlan] : NULL; + } else + while (dev->master) + dev = dev->master; +#endif + return (ifp); + } + } + return (NULL); +} + +static inline void +failover_fixup(adapter_t *adapter, int port) +{ + if (adapter->params.rev == 0) { + struct ifnet *ifp = adapter->port[port].ifp; + struct cmac *mac = &adapter->port[port].mac; + if (!(ifp->if_flags & IFF_UP)) { + /* Failover triggered by the interface ifdown */ + t3_write_reg(adapter, A_XGM_TX_CTRL + mac->offset, + F_TXEN); + t3_read_reg(adapter, A_XGM_TX_CTRL + mac->offset); + } else { + /* Failover triggered by the interface link down */ + t3_write_reg(adapter, A_XGM_RX_CTRL + mac->offset, 0); + t3_read_reg(adapter, A_XGM_RX_CTRL + mac->offset); + t3_write_reg(adapter, A_XGM_RX_CTRL + mac->offset, + F_RXEN); + } + } +} + +static int +cxgb_ulp_iscsi_ctl(adapter_t *adapter, unsigned int req, void *data) +{ + int ret = 0; + struct ulp_iscsi_info *uiip = data; + + switch (req) { + case ULP_ISCSI_GET_PARAMS: + uiip->llimit = t3_read_reg(adapter, A_ULPRX_ISCSI_LLIMIT); + uiip->ulimit = t3_read_reg(adapter, A_ULPRX_ISCSI_ULIMIT); + uiip->tagmask = t3_read_reg(adapter, A_ULPRX_ISCSI_TAGMASK); + /* + * On tx, the iscsi pdu has to be <= tx page size and has to + * fit into the Tx PM FIFO. + */ + uiip->max_txsz = min(adapter->params.tp.tx_pg_size, + t3_read_reg(adapter, A_PM1_TX_CFG) >> 17); + /* on rx, the iscsi pdu has to be < rx page size and the + whole pdu + cpl headers has to fit into one sge buffer */ + /* also check the max rx data length programmed in TP */ + uiip->max_rxsz = min(uiip->max_rxsz, + ((t3_read_reg(adapter, A_TP_PARA_REG2)) + >> S_MAXRXDATA) & M_MAXRXDATA); + break; + case ULP_ISCSI_SET_PARAMS: + t3_write_reg(adapter, A_ULPRX_ISCSI_TAGMASK, uiip->tagmask); + break; + default: + ret = (EOPNOTSUPP); + } + return ret; +} + +/* Response queue used for RDMA events. */ +#define ASYNC_NOTIF_RSPQ 0 + +static int +cxgb_rdma_ctl(adapter_t *adapter, unsigned int req, void *data) +{ + int ret = 0; + + switch (req) { + case RDMA_GET_PARAMS: { + struct rdma_info *req = data; + + req->udbell_physbase = rman_get_start(adapter->udbs_res); + req->udbell_len = rman_get_size(adapter->udbs_res); + req->tpt_base = t3_read_reg(adapter, A_ULPTX_TPT_LLIMIT); + req->tpt_top = t3_read_reg(adapter, A_ULPTX_TPT_ULIMIT); + req->pbl_base = t3_read_reg(adapter, A_ULPTX_PBL_LLIMIT); + req->pbl_top = t3_read_reg(adapter, A_ULPTX_PBL_ULIMIT); + req->rqt_base = t3_read_reg(adapter, A_ULPRX_RQ_LLIMIT); + req->rqt_top = t3_read_reg(adapter, A_ULPRX_RQ_ULIMIT); + req->kdb_addr = (void *)((unsigned long)rman_get_virtual(adapter->regs_res) + A_SG_KDOORBELL); break; + } + case RDMA_CQ_OP: { + struct rdma_cq_op *req = data; + + /* may be called in any context */ + mtx_lock_spin(&adapter->sge.reg_lock); + ret = t3_sge_cqcntxt_op(adapter, req->id, req->op, + req->credits); + mtx_unlock_spin(&adapter->sge.reg_lock); + break; + } + case RDMA_GET_MEM: { + struct ch_mem_range *t = data; + struct mc7 *mem; + + if ((t->addr & 7) || (t->len & 7)) + return (EINVAL); + if (t->mem_id == MEM_CM) + mem = &adapter->cm; + else if (t->mem_id == MEM_PMRX) + mem = &adapter->pmrx; + else if (t->mem_id == MEM_PMTX) + mem = &adapter->pmtx; + else + return (EINVAL); + + ret = t3_mc7_bd_read(mem, t->addr/8, t->len/8, (u64 *)t->buf); + if (ret) + return (ret); + break; + } + case RDMA_CQ_SETUP: { + struct rdma_cq_setup *req = data; + + mtx_lock_spin(&adapter->sge.reg_lock); + ret = t3_sge_init_cqcntxt(adapter, req->id, req->base_addr, + req->size, ASYNC_NOTIF_RSPQ, + req->ovfl_mode, req->credits, + req->credit_thres); + mtx_unlock_spin(&adapter->sge.reg_lock); + break; + } + case RDMA_CQ_DISABLE: + mtx_lock_spin(&adapter->sge.reg_lock); + ret = t3_sge_disable_cqcntxt(adapter, *(unsigned int *)data); + mtx_unlock_spin(&adapter->sge.reg_lock); + break; + case RDMA_CTRL_QP_SETUP: { + struct rdma_ctrlqp_setup *req = data; + + mtx_lock_spin(&adapter->sge.reg_lock); + ret = t3_sge_init_ecntxt(adapter, FW_RI_SGEEC_START, 0, + SGE_CNTXT_RDMA, ASYNC_NOTIF_RSPQ, + req->base_addr, req->size, + FW_RI_TID_START, 1, 0); + mtx_unlock_spin(&adapter->sge.reg_lock); + break; + } + default: + ret = EOPNOTSUPP; + } + return (ret); +} + +static int +cxgb_offload_ctl(struct t3cdev *tdev, unsigned int req, void *data) +{ + struct adapter *adapter = tdev2adap(tdev); + struct tid_range *tid; + struct mtutab *mtup; + struct iff_mac *iffmacp; + struct ddp_params *ddpp; + struct adap_ports *ports; + struct ofld_page_info *rx_page_info; + struct tp_params *tp = &adapter->params.tp; + int port; + + switch (req) { + case GET_MAX_OUTSTANDING_WR: + *(unsigned int *)data = FW_WR_NUM; + break; + case GET_WR_LEN: + *(unsigned int *)data = WR_FLITS; + break; + case GET_TX_MAX_CHUNK: + *(unsigned int *)data = 1 << 20; /* 1MB */ + break; + case GET_TID_RANGE: + tid = data; + tid->num = t3_mc5_size(&adapter->mc5) - + adapter->params.mc5.nroutes - + adapter->params.mc5.nfilters - + adapter->params.mc5.nservers; + tid->base = 0; + break; + case GET_STID_RANGE: + tid = data; + tid->num = adapter->params.mc5.nservers; + tid->base = t3_mc5_size(&adapter->mc5) - tid->num - + adapter->params.mc5.nfilters - + adapter->params.mc5.nroutes; + break; + case GET_L2T_CAPACITY: + *(unsigned int *)data = 2048; + break; + case GET_MTUS: + mtup = data; + mtup->size = NMTUS; + mtup->mtus = adapter->params.mtus; + break; + case GET_IFF_FROM_MAC: + iffmacp = data; + iffmacp->dev = get_iff_from_mac(adapter, iffmacp->mac_addr, + iffmacp->vlan_tag & EVL_VLID_MASK); + break; + case GET_DDP_PARAMS: + ddpp = data; + ddpp->llimit = t3_read_reg(adapter, A_ULPRX_TDDP_LLIMIT); + ddpp->ulimit = t3_read_reg(adapter, A_ULPRX_TDDP_ULIMIT); + ddpp->tag_mask = t3_read_reg(adapter, A_ULPRX_TDDP_TAGMASK); + break; + case GET_PORTS: + ports = data; + ports->nports = adapter->params.nports; + for_each_port(adapter, port) + ports->lldevs[port] = adapter->port[port].ifp; + break; + case FAILOVER: + port = *(int *)data; + t3_port_failover(adapter, port); + failover_fixup(adapter, port); + break; + case FAILOVER_DONE: + port = *(int *)data; + t3_failover_done(adapter, port); + break; + case FAILOVER_CLEAR: + t3_failover_clear(adapter); + break; + case GET_RX_PAGE_INFO: + rx_page_info = data; + rx_page_info->page_size = tp->rx_pg_size; + rx_page_info->num = tp->rx_num_pgs; + break; + case ULP_ISCSI_GET_PARAMS: + case ULP_ISCSI_SET_PARAMS: + if (!offload_running(adapter)) + return (EAGAIN); + return cxgb_ulp_iscsi_ctl(adapter, req, data); + case RDMA_GET_PARAMS: + case RDMA_CQ_OP: + case RDMA_CQ_SETUP: + case RDMA_CQ_DISABLE: + case RDMA_CTRL_QP_SETUP: + case RDMA_GET_MEM: + if (!offload_running(adapter)) + return (EAGAIN); + return cxgb_rdma_ctl(adapter, req, data); + default: + return (EOPNOTSUPP); + } + return 0; +} + /* * Allocate a TOM data structure, * initialize its cpl_handlers @@ -224,6 +722,9 @@ t3c_tom_add(struct t3cdev *cdev) if (t == NULL) return; + cdev->send = t3_offload_tx; + cdev->ctl = cxgb_offload_ctl; + if (cdev->ctl(cdev, GET_WR_LEN, &wr_len) < 0) goto out_free_tom; @@ -242,7 +743,7 @@ t3c_tom_add(struct t3cdev *cdev) tdev = &t->tdev; tdev->tod_ttid = cdev2type(cdev); tdev->tod_lldev = cdev->lldev; - + if (register_toedev(tdev, "toe%d")) { printf("unable to register offload device"); goto out_free_all; @@ -263,6 +764,8 @@ t3c_tom_add(struct t3cdev *cdev) t3cdev_add(t); /* Activate TCP offload device */ + cxgb_offload_activate(TOM_DATA(tdev)->cdev->adapter); + activate_offload(tdev); cxgb_register_listeners(); return; @@ -276,6 +779,165 @@ out_free_tom: return; } + + +static int +do_act_open_rpl(struct t3cdev *dev, struct mbuf *m) +{ + struct cpl_act_open_rpl *rpl = cplhdr(m); + unsigned int atid = G_TID(ntohl(rpl->atid)); + struct toe_tid_entry *toe_tid; + + toe_tid = lookup_atid(&(T3C_DATA (dev))->tid_maps, atid); + if (toe_tid->ctx && toe_tid->client && toe_tid->client->handlers && + toe_tid->client->handlers[CPL_ACT_OPEN_RPL]) { + return toe_tid->client->handlers[CPL_ACT_OPEN_RPL] (dev, m, + toe_tid->ctx); + } else { + log(LOG_ERR, "%s: received clientless CPL command 0x%x\n", + dev->name, CPL_ACT_OPEN_RPL); + return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG; + } +} + +static int +do_stid_rpl(struct t3cdev *dev, struct mbuf *m) +{ + union opcode_tid *p = cplhdr(m); + unsigned int stid = G_TID(ntohl(p->opcode_tid)); + struct toe_tid_entry *toe_tid; + + toe_tid = lookup_stid(&(T3C_DATA (dev))->tid_maps, stid); + if (toe_tid->ctx && toe_tid->client->handlers && + toe_tid->client->handlers[p->opcode]) { + return toe_tid->client->handlers[p->opcode] (dev, m, toe_tid->ctx); + } else { + log(LOG_ERR, "%s: received clientless CPL command 0x%x\n", + dev->name, p->opcode); + return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG; + } +} + +static int +do_hwtid_rpl(struct t3cdev *dev, struct mbuf *m) +{ + union opcode_tid *p = cplhdr(m); + unsigned int hwtid; + struct toe_tid_entry *toe_tid; + + DPRINTF("do_hwtid_rpl opcode=0x%x\n", p->opcode); + hwtid = G_TID(ntohl(p->opcode_tid)); + + toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid); + if (toe_tid->ctx && toe_tid->client->handlers && + toe_tid->client->handlers[p->opcode]) { + return toe_tid->client->handlers[p->opcode] + (dev, m, toe_tid->ctx); + } else { + log(LOG_ERR, "%s: received clientless CPL command 0x%x\n", + dev->name, p->opcode); + return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG; + } +} + +static int +do_cr(struct t3cdev *dev, struct mbuf *m) +{ + struct cpl_pass_accept_req *req = cplhdr(m); + unsigned int stid = G_PASS_OPEN_TID(ntohl(req->tos_tid)); + struct toe_tid_entry *toe_tid; + + toe_tid = lookup_stid(&(T3C_DATA (dev))->tid_maps, stid); + if (toe_tid->ctx && toe_tid->client->handlers && + toe_tid->client->handlers[CPL_PASS_ACCEPT_REQ]) { + return toe_tid->client->handlers[CPL_PASS_ACCEPT_REQ] + (dev, m, toe_tid->ctx); + } else { + log(LOG_ERR, "%s: received clientless CPL command 0x%x\n", + dev->name, CPL_PASS_ACCEPT_REQ); + return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG; + } +} + +static int +do_abort_req_rss(struct t3cdev *dev, struct mbuf *m) +{ + union opcode_tid *p = cplhdr(m); + unsigned int hwtid = G_TID(ntohl(p->opcode_tid)); + struct toe_tid_entry *toe_tid; + + toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid); + if (toe_tid->ctx && toe_tid->client->handlers && + toe_tid->client->handlers[p->opcode]) { + return toe_tid->client->handlers[p->opcode] + (dev, m, toe_tid->ctx); + } else { + struct cpl_abort_req_rss *req = cplhdr(m); + struct cpl_abort_rpl *rpl; + + struct mbuf *m = m_get(M_NOWAIT, MT_DATA); + if (!m) { + log(LOG_NOTICE, "do_abort_req_rss: couldn't get mbuf!\n"); + goto out; + } + + m_set_priority(m, CPL_PRIORITY_DATA); + rpl = cplhdr(m); + rpl->wr.wr_hi = + htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); + rpl->wr.wr_lo = htonl(V_WR_TID(GET_TID(req))); + OPCODE_TID(rpl) = + htonl(MK_OPCODE_TID(CPL_ABORT_RPL, GET_TID(req))); + rpl->cmd = req->status; + cxgb_ofld_send(dev, m); + out: + return (CPL_RET_BUF_DONE); + } +} + +static int +do_act_establish(struct t3cdev *dev, struct mbuf *m) +{ + struct cpl_act_establish *req; + unsigned int atid; + struct toe_tid_entry *toe_tid; + + req = cplhdr(m); + atid = G_PASS_OPEN_TID(ntohl(req->tos_tid)); + toe_tid = lookup_atid(&(T3C_DATA (dev))->tid_maps, atid); + if (toe_tid && toe_tid->ctx && toe_tid->client->handlers && + toe_tid->client->handlers[CPL_ACT_ESTABLISH]) { + + return toe_tid->client->handlers[CPL_ACT_ESTABLISH] + (dev, m, toe_tid->ctx); + } else { + + log(LOG_ERR, "%s: received clientless CPL command 0x%x\n", + dev->name, CPL_PASS_ACCEPT_REQ); + return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG; + } +} + + +static int +do_term(struct t3cdev *dev, struct mbuf *m) +{ + unsigned int hwtid = ntohl(m_get_priority(m)) >> 8 & 0xfffff; + unsigned int opcode = G_OPCODE(ntohl(m->m_pkthdr.csum_data)); + struct toe_tid_entry *toe_tid; + + toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid); + if (toe_tid && toe_tid->ctx && toe_tid->client->handlers && + toe_tid->client->handlers[opcode]) { + return toe_tid->client->handlers[opcode](dev, m, toe_tid->ctx); + } else { + log(LOG_ERR, "%s: received clientless CPL command 0x%x\n", + dev->name, opcode); + return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG; + } + return (0); +} + /* * Process a received packet with an unknown/unexpected CPL opcode. */ @@ -287,7 +949,6 @@ do_bad_cpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) return (CPL_RET_BUF_DONE | CPL_RET_BAD_MSG); } - /* * Add a new handler to the CPL dispatch table. A NULL handler may be supplied * to unregister an existing handler. @@ -295,7 +956,7 @@ do_bad_cpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) void t3tom_register_cpl_handler(unsigned int opcode, cxgb_cpl_handler_func h) { - if (opcode < 256) + if (opcode < UCHAR_MAX) tom_cpl_handlers[opcode] = h ? h : do_bad_cpl; else log(LOG_ERR, "Chelsio T3 TOM: handler registration for " @@ -314,7 +975,7 @@ can_offload(struct toedev *dev, struct socket *so) struct t3cdev *cdev = T3CDEV(dev->tod_lldev); struct tid_info *t = &(T3C_DATA(cdev))->tid_maps; - return sotoinpcb(so)->inp_depend4.inp4_options == NULL && + return so_sotoinpcb(so)->inp_depend4.inp4_options == NULL && tomd->conf.activated && (tomd->conf.max_conn < 0 || atomic_load_acq_int(&t->tids_in_use) + t->atids_in_use < tomd->conf.max_conn); @@ -332,6 +993,322 @@ tom_ctl(struct toedev *dev, unsigned int req, void *data) return (EOPNOTSUPP); } +/* + * Free an active-open TID. + */ +void * +cxgb_free_atid(struct t3cdev *tdev, int atid) +{ + struct tid_info *t = &(T3C_DATA(tdev))->tid_maps; + union active_open_entry *p = atid2entry(t, atid); + void *ctx = p->toe_tid.ctx; + + mtx_lock(&t->atid_lock); + p->next = t->afree; + t->afree = p; + t->atids_in_use--; + mtx_unlock(&t->atid_lock); + + return ctx; +} + +/* + * Free a server TID and return it to the free pool. + */ +void +cxgb_free_stid(struct t3cdev *tdev, int stid) +{ + struct tid_info *t = &(T3C_DATA (tdev))->tid_maps; + union listen_entry *p = stid2entry(t, stid); + + mtx_lock(&t->stid_lock); + p->next = t->sfree; + t->sfree = p; + t->stids_in_use--; + mtx_unlock(&t->stid_lock); +} + +/* + * Free a server TID and return it to the free pool. + */ +void * +cxgb_get_lctx(struct t3cdev *tdev, int stid) +{ + struct tid_info *t = &(T3C_DATA (tdev))->tid_maps; + union listen_entry *p = stid2entry(t, stid); + + return (p->toe_tid.ctx); +} + +void +cxgb_insert_tid(struct t3cdev *tdev, struct cxgb_client *client, + void *ctx, unsigned int tid) +{ + struct tid_info *t = &(T3C_DATA (tdev))->tid_maps; + + t->tid_tab[tid].client = client; + t->tid_tab[tid].ctx = ctx; + atomic_add_int(&t->tids_in_use, 1); +} + +/* use ctx as a next pointer in the tid release list */ +void +cxgb_queue_tid_release(struct t3cdev *tdev, unsigned int tid) +{ + struct t3c_data *td = T3C_DATA (tdev); + struct toe_tid_entry *p = &td->tid_maps.tid_tab[tid]; + + CTR0(KTR_TOM, "queuing tid release\n"); + + mtx_lock(&td->tid_release_lock); + p->ctx = td->tid_release_list; + td->tid_release_list = p; + + if (!p->ctx) + taskqueue_enqueue(tdev->adapter->tq, &td->tid_release_task); + + mtx_unlock(&td->tid_release_lock); +} + +/* + * Remove a tid from the TID table. A client may defer processing its last + * CPL message if it is locked at the time it arrives, and while the message + * sits in the client's backlog the TID may be reused for another connection. + * To handle this we atomically switch the TID association if it still points + * to the original client context. + */ +void +cxgb_remove_tid(struct t3cdev *tdev, void *ctx, unsigned int tid) +{ + struct tid_info *t = &(T3C_DATA (tdev))->tid_maps; + + if (tid >= t->ntids) + panic("tid=%d >= t->ntids=%d", tid, t->ntids); + + if (tdev->type == T3A) + atomic_cmpset_ptr((uintptr_t *)&t->tid_tab[tid].ctx, (long)NULL, (long)ctx); + else { + struct mbuf *m; + + m = m_get(M_NOWAIT, MT_DATA); + if (__predict_true(m != NULL)) { + mk_tid_release(m, tid); + CTR1(KTR_CXGB, "releasing tid=%u", tid); + + cxgb_ofld_send(tdev, m); + t->tid_tab[tid].ctx = NULL; + } else + cxgb_queue_tid_release(tdev, tid); + } + atomic_add_int(&t->tids_in_use, -1); +} + +int +cxgb_alloc_atid(struct t3cdev *tdev, struct cxgb_client *client, + void *ctx) +{ + int atid = -1; + struct tid_info *t = &(T3C_DATA (tdev))->tid_maps; + + mtx_lock(&t->atid_lock); + if (t->afree) { + union active_open_entry *p = t->afree; + + atid = (p - t->atid_tab) + t->atid_base; + t->afree = p->next; + p->toe_tid.ctx = ctx; + p->toe_tid.client = client; + t->atids_in_use++; + } + mtx_unlock(&t->atid_lock); + return atid; +} + +int +cxgb_alloc_stid(struct t3cdev *tdev, struct cxgb_client *client, + void *ctx) +{ + int stid = -1; + struct tid_info *t = &(T3C_DATA (tdev))->tid_maps; + + mtx_lock(&t->stid_lock); + if (t->sfree) { + union listen_entry *p = t->sfree; + + stid = (p - t->stid_tab) + t->stid_base; + t->sfree = p->next; + p->toe_tid.ctx = ctx; + p->toe_tid.client = client; + t->stids_in_use++; + } + mtx_unlock(&t->stid_lock); + return stid; +} + + +static int +is_offloading(struct ifnet *ifp) +{ + struct adapter *adapter; + int port; + + rw_rlock(&adapter_list_lock); + TAILQ_FOREACH(adapter, &adapter_list, adapter_entry) { + for_each_port(adapter, port) { + if (ifp == adapter->port[port].ifp) { + rw_runlock(&adapter_list_lock); + return 1; + } + } + } + rw_runlock(&adapter_list_lock); + return 0; +} + + +static void +cxgb_arp_update_event(void *unused, struct rtentry *rt0, + uint8_t *enaddr, struct sockaddr *sa) +{ + + if (!is_offloading(rt0->rt_ifp)) + return; + + RT_ADDREF(rt0); + RT_UNLOCK(rt0); + cxgb_neigh_update(rt0, enaddr, sa); + RT_LOCK(rt0); + RT_REMREF(rt0); +} + +static void +cxgb_redirect_event(void *unused, int event, struct rtentry *rt0, + struct rtentry *rt1, struct sockaddr *sa) +{ + /* + * ignore events on non-offloaded interfaces + */ + if (!is_offloading(rt0->rt_ifp)) + return; + + /* + * Cannot redirect to non-offload device. + */ + if (!is_offloading(rt1->rt_ifp)) { + log(LOG_WARNING, "%s: Redirect to non-offload" + "device ignored.\n", __FUNCTION__); + return; + } + + /* + * avoid LORs by dropping the route lock but keeping a reference + * + */ + RT_ADDREF(rt0); + RT_UNLOCK(rt0); + RT_ADDREF(rt1); + RT_UNLOCK(rt1); + + cxgb_redirect(rt0, rt1, sa); + cxgb_neigh_update(rt1, NULL, sa); + + RT_LOCK(rt0); + RT_REMREF(rt0); + RT_LOCK(rt1); + RT_REMREF(rt1); +} + +void +cxgb_neigh_update(struct rtentry *rt, uint8_t *enaddr, struct sockaddr *sa) +{ + + if (rt->rt_ifp && is_offloading(rt->rt_ifp) && (rt->rt_ifp->if_flags & IFCAP_TOE)) { + struct t3cdev *tdev = T3CDEV(rt->rt_ifp); + + PANIC_IF(!tdev); + t3_l2t_update(tdev, rt, enaddr, sa); + } +} + +static void +set_l2t_ix(struct t3cdev *tdev, u32 tid, struct l2t_entry *e) +{ + struct mbuf *m; + struct cpl_set_tcb_field *req; + + m = m_gethdr(M_NOWAIT, MT_DATA); + if (!m) { + log(LOG_ERR, "%s: cannot allocate mbuf!\n", __FUNCTION__); + return; + } + + m_set_priority(m, CPL_PRIORITY_CONTROL); + req = mtod(m, struct cpl_set_tcb_field *); + m->m_pkthdr.len = m->m_len = sizeof(*req); + + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); + req->reply = 0; + req->cpu_idx = 0; + req->word = htons(W_TCB_L2T_IX); + req->mask = htobe64(V_TCB_L2T_IX(M_TCB_L2T_IX)); + req->val = htobe64(V_TCB_L2T_IX(e->idx)); + tdev->send(tdev, m); +} + +void +cxgb_redirect(struct rtentry *old, struct rtentry *new, struct sockaddr *sa) +{ + struct ifnet *olddev, *newdev; + struct tid_info *ti; + struct t3cdev *tdev; + u32 tid; + int update_tcb; + struct l2t_entry *e; + struct toe_tid_entry *te; + + olddev = old->rt_ifp; + newdev = new->rt_ifp; + if (!is_offloading(olddev)) + return; + if (!is_offloading(newdev)) { + log(LOG_WARNING, "%s: Redirect to non-offload" + "device ignored.\n", __FUNCTION__); + return; + } + tdev = T3CDEV(olddev); + PANIC_IF(!tdev); + if (tdev != T3CDEV(newdev)) { + log(LOG_WARNING, "%s: Redirect to different " + "offload device ignored.\n", __FUNCTION__); + return; + } + + /* Add new L2T entry */ + e = t3_l2t_get(tdev, new, new->rt_ifp, sa); + if (!e) { + log(LOG_ERR, "%s: couldn't allocate new l2t entry!\n", + __FUNCTION__); + return; + } + + /* Walk tid table and notify clients of dst change. */ + ti = &(T3C_DATA (tdev))->tid_maps; + for (tid=0; tid < ti->ntids; tid++) { + te = lookup_tid(ti, tid); + PANIC_IF(!te); + if (te->ctx && te->client && te->client->redirect) { + update_tcb = te->client->redirect(te->ctx, old, new, + e); + if (update_tcb) { + l2t_hold(L2DATA(tdev), e); + set_l2t_ix(tdev, tid, e); + } + } + } + l2t_release(L2DATA(tdev), e); +} + /* * Initialize the CPL dispatch table. */ @@ -355,11 +1332,6 @@ t3_toe_attach(struct toedev *dev, const struct offload_id *entry) struct ofld_page_info rx_page_info; int err; -#if 0 - skb_queue_head_init(&t->deferq); - T3_INIT_WORK(&t->deferq_task, process_deferq, t); - spin_lock_init(&t->listen_lock); -#endif t3_init_tunables(t); mtx_init(&t->listen_lock, "tom data listeners", NULL, MTX_DEF); CTR2(KTR_TOM, "t3_toe_attach dev=%p entry=%p", dev, entry); @@ -370,9 +1342,6 @@ t3_toe_attach(struct toedev *dev, const struct offload_id *entry) dev->tod_connect = t3_connect; dev->tod_ctl = tom_ctl; #if 0 -#ifndef NETEVENT - dev->tod_neigh_update = tom_neigh_update; -#endif dev->tod_failover = t3_failover; #endif err = cdev->ctl(cdev, GET_DDP_PARAMS, &ddp); @@ -389,7 +1358,7 @@ t3_toe_attach(struct toedev *dev, const struct offload_id *entry) t->rx_page_size = rx_page_info.page_size; /* OK if this fails, we just can't do DDP */ t->nppods = (ddp.ulimit + 1 - ddp.llimit) / PPOD_SIZE; - t->ppod_map = malloc(t->nppods, M_DEVBUF, M_WAITOK|M_ZERO); + t->ppod_map = malloc(t->nppods, M_DEVBUF, M_NOWAIT|M_ZERO); mtx_init(&t->ppod_map_lock, "ppod map", NULL, MTX_DEF); @@ -401,7 +1370,7 @@ t3_toe_attach(struct toedev *dev, const struct offload_id *entry) static void cxgb_toe_listen_start(void *unused, struct tcpcb *tp) { - struct socket *so = tp->t_inpcb->inp_socket; + struct socket *so = inp_inpcbtosocket(tp->t_inpcb); struct tom_data *p; mtx_lock(&cxgb_list_lock); @@ -414,7 +1383,7 @@ cxgb_toe_listen_start(void *unused, struct tcpcb *tp) static void cxgb_toe_listen_stop(void *unused, struct tcpcb *tp) { - struct socket *so = tp->t_inpcb->inp_socket; + struct socket *so = inp_inpcbtosocket(tp->t_inpcb); struct tom_data *p; mtx_lock(&cxgb_list_lock); @@ -425,20 +1394,20 @@ cxgb_toe_listen_stop(void *unused, struct tcpcb *tp) mtx_unlock(&cxgb_list_lock); } +static void +cxgb_toe_listen_start_handler(struct inpcb *inp, void *arg) +{ + struct tcpcb *tp = intotcpcb(inp); + + if (tp->t_state == TCPS_LISTEN) + cxgb_toe_listen_start(NULL, tp); +} + static void cxgb_register_listeners(void) { - struct inpcb *inp; - struct tcpcb *tp; - - INP_INFO_RLOCK(&tcbinfo); - LIST_FOREACH(inp, tcbinfo.ipi_listhead, inp_list) { - tp = intotcpcb(inp); - if (tp->t_state == TCPS_LISTEN) - cxgb_toe_listen_start(NULL, tp); - } - INP_INFO_RUNLOCK(&tcbinfo); + inp_apply_all(cxgb_toe_listen_start_handler, NULL); } static int @@ -459,9 +1428,14 @@ t3_tom_init(void) "Unable to register Chelsio T3 TCP offload module.\n"); return -1; } - INP_INFO_WLOCK(&tcbinfo); - INP_INFO_WUNLOCK(&tcbinfo); + rw_init(&adapter_list_lock, "ofld adap list"); + TAILQ_INIT(&adapter_list); + EVENTHANDLER_REGISTER(route_arp_update_event, cxgb_arp_update_event, + NULL, EVENTHANDLER_PRI_ANY); + EVENTHANDLER_REGISTER(route_redirect_event, cxgb_redirect_event, + NULL, EVENTHANDLER_PRI_ANY); + mtx_init(&cxgb_list_lock, "cxgb tom list", NULL, MTX_DEF); listen_tag = EVENTHANDLER_REGISTER(tcp_offload_listen_start, cxgb_toe_listen_start, NULL, EVENTHANDLER_PRI_ANY); @@ -469,9 +1443,34 @@ t3_tom_init(void) cxgb_toe_listen_stop, NULL, EVENTHANDLER_PRI_ANY); TAILQ_INIT(&cxgb_list); + + + t3_register_cpl_handler(CPL_PASS_OPEN_RPL, do_stid_rpl); + t3_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_stid_rpl); + t3_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_cr); + t3_register_cpl_handler(CPL_PASS_ESTABLISH, do_hwtid_rpl); + t3_register_cpl_handler(CPL_ABORT_RPL_RSS, do_hwtid_rpl); + t3_register_cpl_handler(CPL_ABORT_RPL, do_hwtid_rpl); + t3_register_cpl_handler(CPL_RX_URG_NOTIFY, do_hwtid_rpl); + t3_register_cpl_handler(CPL_RX_DATA, do_hwtid_rpl); + t3_register_cpl_handler(CPL_TX_DATA_ACK, do_hwtid_rpl); + t3_register_cpl_handler(CPL_TX_DMA_ACK, do_hwtid_rpl); + t3_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl); + t3_register_cpl_handler(CPL_PEER_CLOSE, do_hwtid_rpl); + t3_register_cpl_handler(CPL_CLOSE_CON_RPL, do_hwtid_rpl); + t3_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req_rss); + t3_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish); + t3_register_cpl_handler(CPL_RDMA_TERMINATE, do_term); + t3_register_cpl_handler(CPL_RDMA_EC_STATUS, do_hwtid_rpl); + t3_register_cpl_handler(CPL_RX_DATA_DDP, do_hwtid_rpl); + t3_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_hwtid_rpl); + t3_register_cpl_handler(CPL_ISCSI_HDR, do_hwtid_rpl); + t3_register_cpl_handler(CPL_GET_TCB_RPL, do_hwtid_rpl); + t3_register_cpl_handler(CPL_SET_TCB_RPL, do_hwtid_rpl); + /* Register to offloading devices */ - t3c_tom_client.add = t3c_tom_add; cxgb_register_client(&t3c_tom_client); + return (0); } diff --git a/sys/dev/cxgb/ulp/tom/cxgb_tom_sysctl.c b/sys/dev/cxgb/ulp/tom/cxgb_tom_sysctl.c index b4ff748cea52..1490bfbdc29b 100644 --- a/sys/dev/cxgb/ulp/tom/cxgb_tom_sysctl.c +++ b/sys/dev/cxgb/ulp/tom/cxgb_tom_sysctl.c @@ -67,7 +67,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include diff --git a/sys/modules/cxgb/cxgb/Makefile b/sys/modules/cxgb/cxgb/Makefile index 3638d2e6e2ed..a76e0162a896 100644 --- a/sys/modules/cxgb/cxgb/Makefile +++ b/sys/modules/cxgb/cxgb/Makefile @@ -6,10 +6,9 @@ CXGB = ${.CURDIR}/../../../dev/cxgb KMOD= if_cxgb SRCS= cxgb_mc5.c cxgb_vsc8211.c cxgb_ael1002.c cxgb_mv88e1xxx.c SRCS+= cxgb_xgmac.c cxgb_vsc7323.c cxgb_t3_hw.c cxgb_main.c -SRCS+= cxgb_sge.c cxgb_lro.c cxgb_offload.c cxgb_l2t.c -SRCS+= device_if.h bus_if.h pci_if.h opt_zero.h opt_sched.h opt_global.h -SRCS+= uipc_mvec.c cxgb_support.c -SRCS+= cxgb_multiq.c +SRCS+= cxgb_sge.c cxgb_lro.c cxgb_offload.c +SRCS+= device_if.h bus_if.h pci_if.h opt_zero.h opt_sched.h +SRCS+= uipc_mvec.c cxgb_support.c cxgb_multiq.c CFLAGS+= -DCONFIG_CHELSIO_T3_CORE -g -DCONFIG_DEFINED -DDEFAULT_JUMBO -I${CXGB} -DSMP CFLAGS+= -DDISABLE_MBUF_IOVEC diff --git a/sys/modules/cxgb/tom/Makefile b/sys/modules/cxgb/tom/Makefile index 7134386e1616..2417edf1fc40 100644 --- a/sys/modules/cxgb/tom/Makefile +++ b/sys/modules/cxgb/tom/Makefile @@ -5,7 +5,7 @@ TOM = ${.CURDIR}/../../../dev/cxgb/ulp/tom KMOD= tom SRCS= cxgb_tom.c cxgb_cpl_io.c cxgb_listen.c cxgb_tom_sysctl.c cxgb_cpl_socket.c -SRCS+= cxgb_ddp.c cxgb_vm.c +SRCS+= cxgb_ddp.c cxgb_vm.c cxgb_l2t.c cxgb_tcp_offload.c SRCS+= opt_compat.h opt_inet.h opt_inet6.h opt_ipsec.h opt_mac.h SRCS+= opt_tcpdebug.h opt_ddb.h opt_sched.h opt_global.h opt_ktr.h SRCS+= device_if.h bus_if.h pci_if.h diff --git a/sys/netinet/tcp_offload.h b/sys/netinet/tcp_offload.h index 21ecba485fc0..7626d9b6cd8c 100644 --- a/sys/netinet/tcp_offload.h +++ b/sys/netinet/tcp_offload.h @@ -229,6 +229,12 @@ int tcp_offload_connect(struct socket *so, struct sockaddr *nam); * Connection is offloaded */ #define tp_offload(tp) ((tp)->t_flags & TF_TOE) + +/* + * hackish way of allowing this file to also be included by TOE + * which needs to be kept ignorant of socket implementation details + */ +#ifdef _SYS_SOCKETVAR_H_ /* * The socket has not been marked as "do not offload" */ @@ -324,7 +330,7 @@ tcp_offload_listen_close(struct tcpcb *tp) EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp); #endif } - -#undef tp_offload #undef SO_OFFLOADABLE +#endif /* _SYS_SOCKETVAR_H_ */ +#undef tp_offload #endif /* _NETINET_TCP_OFFLOAD_H_ */