Merge branches 'amso1100', 'cma', 'cxgb3', 'ehca', 'ipath', 'ipoib', 'iser', 'misc', 'mlx4' and 'nes' into for-next

This commit is contained in:
Roland Dreier 2009-12-15 23:39:25 -08:00
38 changed files with 1077 additions and 906 deletions

View file

@ -36,11 +36,11 @@ Datagram vs Connected modes
fabric with a 2K MTU, the IPoIB MTU will be 2048 - 4 = 2044 bytes.
In connected mode, the IB RC (Reliable Connected) transport is used.
Connected mode is to takes advantage of the connected nature of the
IB transport and allows an MTU up to the maximal IP packet size of
64K, which reduces the number of IP packets needed for handling
large UDP datagrams, TCP segments, etc and increases the performance
for large messages.
Connected mode takes advantage of the connected nature of the IB
transport and allows an MTU up to the maximal IP packet size of 64K,
which reduces the number of IP packets needed for handling large UDP
datagrams, TCP segments, etc and increases the performance for large
messages.
In connected mode, the interface's UD QP is still used for multicast
and communication with peers that don't support connected mode. In

View file

@ -36,7 +36,6 @@
#include <linux/mutex.h>
#include <linux/inetdevice.h>
#include <linux/workqueue.h>
#include <linux/if_arp.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
@ -92,22 +91,12 @@ EXPORT_SYMBOL(rdma_addr_unregister_client);
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
const unsigned char *dst_dev_addr)
{
switch (dev->type) {
case ARPHRD_INFINIBAND:
dev_addr->dev_type = RDMA_NODE_IB_CA;
break;
case ARPHRD_ETHER:
dev_addr->dev_type = RDMA_NODE_RNIC;
break;
default:
return -EADDRNOTAVAIL;
}
dev_addr->dev_type = dev->type;
memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
if (dst_dev_addr)
memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
dev_addr->src_dev = dev;
dev_addr->bound_dev_if = dev->ifindex;
return 0;
}
EXPORT_SYMBOL(rdma_copy_addr);
@ -117,6 +106,15 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
struct net_device *dev;
int ret = -EADDRNOTAVAIL;
if (dev_addr->bound_dev_if) {
dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
if (!dev)
return -ENODEV;
ret = rdma_copy_addr(dev_addr, dev, NULL);
dev_put(dev);
return ret;
}
switch (addr->sa_family) {
case AF_INET:
dev = ip_dev_find(&init_net,
@ -131,6 +129,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6:
read_lock(&dev_base_lock);
for_each_netdev(&init_net, dev) {
if (ipv6_chk_addr(&init_net,
&((struct sockaddr_in6 *) addr)->sin6_addr,
@ -139,6 +138,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
break;
}
}
read_unlock(&dev_base_lock);
break;
#endif
}
@ -176,48 +176,9 @@ static void queue_req(struct addr_req *req)
mutex_unlock(&lock);
}
static void addr_send_arp(struct sockaddr *dst_in)
{
struct rtable *rt;
struct flowi fl;
memset(&fl, 0, sizeof fl);
switch (dst_in->sa_family) {
case AF_INET:
fl.nl_u.ip4_u.daddr =
((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
if (ip_route_output_key(&init_net, &rt, &fl))
return;
neigh_event_send(rt->u.dst.neighbour, NULL);
ip_rt_put(rt);
break;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6:
{
struct dst_entry *dst;
fl.nl_u.ip6_u.daddr =
((struct sockaddr_in6 *) dst_in)->sin6_addr;
dst = ip6_route_output(&init_net, NULL, &fl);
if (!dst)
return;
neigh_event_send(dst->neighbour, NULL);
dst_release(dst);
break;
}
#endif
}
}
static int addr4_resolve_remote(struct sockaddr_in *src_in,
struct sockaddr_in *dst_in,
struct rdma_dev_addr *addr)
static int addr4_resolve(struct sockaddr_in *src_in,
struct sockaddr_in *dst_in,
struct rdma_dev_addr *addr)
{
__be32 src_ip = src_in->sin_addr.s_addr;
__be32 dst_ip = dst_in->sin_addr.s_addr;
@ -229,10 +190,22 @@ static int addr4_resolve_remote(struct sockaddr_in *src_in,
memset(&fl, 0, sizeof fl);
fl.nl_u.ip4_u.daddr = dst_ip;
fl.nl_u.ip4_u.saddr = src_ip;
fl.oif = addr->bound_dev_if;
ret = ip_route_output_key(&init_net, &rt, &fl);
if (ret)
goto out;
src_in->sin_family = AF_INET;
src_in->sin_addr.s_addr = rt->rt_src;
if (rt->idev->dev->flags & IFF_LOOPBACK) {
ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
goto put;
}
/* If the device does ARP internally, return 'done' */
if (rt->idev->dev->flags & IFF_NOARP) {
rdma_copy_addr(addr, rt->idev->dev, NULL);
@ -240,21 +213,14 @@ static int addr4_resolve_remote(struct sockaddr_in *src_in,
}
neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
if (!neigh) {
if (!neigh || !(neigh->nud_state & NUD_VALID)) {
neigh_event_send(rt->u.dst.neighbour, NULL);
ret = -ENODATA;
if (neigh)
goto release;
goto put;
}
if (!(neigh->nud_state & NUD_VALID)) {
ret = -ENODATA;
goto release;
}
if (!src_ip) {
src_in->sin_family = dst_in->sin_family;
src_in->sin_addr.s_addr = rt->rt_src;
}
ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
release:
neigh_release(neigh);
@ -265,52 +231,77 @@ out:
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr)
static int addr6_resolve(struct sockaddr_in6 *src_in,
struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr)
{
struct flowi fl;
struct neighbour *neigh;
struct dst_entry *dst;
int ret = -ENODATA;
int ret;
memset(&fl, 0, sizeof fl);
fl.nl_u.ip6_u.daddr = dst_in->sin6_addr;
fl.nl_u.ip6_u.saddr = src_in->sin6_addr;
ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr);
ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr);
fl.oif = addr->bound_dev_if;
dst = ip6_route_output(&init_net, NULL, &fl);
if (!dst)
return ret;
if ((ret = dst->error))
goto put;
if (dst->dev->flags & IFF_NOARP) {
ret = rdma_copy_addr(addr, dst->dev, NULL);
} else {
neigh = dst->neighbour;
if (neigh && (neigh->nud_state & NUD_VALID))
ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
if (ipv6_addr_any(&fl.fl6_src)) {
ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
&fl.fl6_dst, 0, &fl.fl6_src);
if (ret)
goto put;
src_in->sin6_family = AF_INET6;
ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src);
}
if (dst->dev->flags & IFF_LOOPBACK) {
ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
goto put;
}
/* If the device does ARP internally, return 'done' */
if (dst->dev->flags & IFF_NOARP) {
ret = rdma_copy_addr(addr, dst->dev, NULL);
goto put;
}
neigh = dst->neighbour;
if (!neigh || !(neigh->nud_state & NUD_VALID)) {
neigh_event_send(dst->neighbour, NULL);
ret = -ENODATA;
goto put;
}
ret = rdma_copy_addr(addr, dst->dev, neigh->ha);
put:
dst_release(dst);
return ret;
}
#else
static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr)
static int addr6_resolve(struct sockaddr_in6 *src_in,
struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr)
{
return -EADDRNOTAVAIL;
}
#endif
static int addr_resolve_remote(struct sockaddr *src_in,
struct sockaddr *dst_in,
struct rdma_dev_addr *addr)
static int addr_resolve(struct sockaddr *src_in,
struct sockaddr *dst_in,
struct rdma_dev_addr *addr)
{
if (src_in->sa_family == AF_INET) {
return addr4_resolve_remote((struct sockaddr_in *) src_in,
return addr4_resolve((struct sockaddr_in *) src_in,
(struct sockaddr_in *) dst_in, addr);
} else
return addr6_resolve_remote((struct sockaddr_in6 *) src_in,
return addr6_resolve((struct sockaddr_in6 *) src_in,
(struct sockaddr_in6 *) dst_in, addr);
}
@ -327,8 +318,7 @@ static void process_req(struct work_struct *work)
if (req->status == -ENODATA) {
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
req->status = addr_resolve_remote(src_in, dst_in,
req->addr);
req->status = addr_resolve(src_in, dst_in, req->addr);
if (req->status && time_after_eq(jiffies, req->timeout))
req->status = -ETIMEDOUT;
else if (req->status == -ENODATA)
@ -352,82 +342,6 @@ static void process_req(struct work_struct *work)
}
}
static int addr_resolve_local(struct sockaddr *src_in,
struct sockaddr *dst_in,
struct rdma_dev_addr *addr)
{
struct net_device *dev;
int ret;
switch (dst_in->sa_family) {
case AF_INET:
{
__be32 src_ip = ((struct sockaddr_in *) src_in)->sin_addr.s_addr;
__be32 dst_ip = ((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
dev = ip_dev_find(&init_net, dst_ip);
if (!dev)
return -EADDRNOTAVAIL;
if (ipv4_is_zeronet(src_ip)) {
src_in->sa_family = dst_in->sa_family;
((struct sockaddr_in *) src_in)->sin_addr.s_addr = dst_ip;
ret = rdma_copy_addr(addr, dev, dev->dev_addr);
} else if (ipv4_is_loopback(src_ip)) {
ret = rdma_translate_ip(dst_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
} else {
ret = rdma_translate_ip(src_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
}
dev_put(dev);
break;
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6:
{
struct in6_addr *a;
for_each_netdev(&init_net, dev)
if (ipv6_chk_addr(&init_net,
&((struct sockaddr_in6 *) dst_in)->sin6_addr,
dev, 1))
break;
if (!dev)
return -EADDRNOTAVAIL;
a = &((struct sockaddr_in6 *) src_in)->sin6_addr;
if (ipv6_addr_any(a)) {
src_in->sa_family = dst_in->sa_family;
((struct sockaddr_in6 *) src_in)->sin6_addr =
((struct sockaddr_in6 *) dst_in)->sin6_addr;
ret = rdma_copy_addr(addr, dev, dev->dev_addr);
} else if (ipv6_addr_loopback(a)) {
ret = rdma_translate_ip(dst_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
} else {
ret = rdma_translate_ip(src_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
}
break;
}
#endif
default:
ret = -EADDRNOTAVAIL;
break;
}
return ret;
}
int rdma_resolve_ip(struct rdma_addr_client *client,
struct sockaddr *src_addr, struct sockaddr *dst_addr,
struct rdma_dev_addr *addr, int timeout_ms,
@ -443,22 +357,28 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
if (!req)
return -ENOMEM;
if (src_addr)
memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr));
memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr));
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
if (src_addr) {
if (src_addr->sa_family != dst_addr->sa_family) {
ret = -EINVAL;
goto err;
}
memcpy(src_in, src_addr, ip_addr_size(src_addr));
} else {
src_in->sa_family = dst_addr->sa_family;
}
memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
req->addr = addr;
req->callback = callback;
req->context = context;
req->client = client;
atomic_inc(&client->refcount);
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
req->status = addr_resolve_local(src_in, dst_in, addr);
if (req->status == -EADDRNOTAVAIL)
req->status = addr_resolve_remote(src_in, dst_in, addr);
req->status = addr_resolve(src_in, dst_in, addr);
switch (req->status) {
case 0:
req->timeout = jiffies;
@ -467,15 +387,16 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
case -ENODATA:
req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
queue_req(req);
addr_send_arp(dst_in);
break;
default:
ret = req->status;
atomic_dec(&client->refcount);
kfree(req);
break;
goto err;
}
return ret;
err:
kfree(req);
return ret;
}
EXPORT_SYMBOL(rdma_resolve_ip);

View file

@ -330,17 +330,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
union ib_gid gid;
int ret = -ENODEV;
switch (rdma_node_get_transport(dev_addr->dev_type)) {
case RDMA_TRANSPORT_IB:
ib_addr_get_sgid(dev_addr, &gid);
break;
case RDMA_TRANSPORT_IWARP:
iw_addr_get_sgid(dev_addr, &gid);
break;
default:
return -ENODEV;
}
rdma_addr_get_sgid(dev_addr, &gid);
list_for_each_entry(cma_dev, &dev_list, list) {
ret = ib_find_cached_gid(cma_dev->device, &gid,
&id_priv->id.port_num, NULL);
@ -1032,11 +1022,17 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
if (rt->num_paths == 2)
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
&id->route.addr.dev_addr);
if (ret)
goto destroy_id;
if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey);
} else {
ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
&rt->addr.dev_addr);
if (ret)
goto destroy_id;
}
rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->state = CMA_CONNECT;
@ -1071,10 +1067,12 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
cma_save_net_info(&id->route.addr, &listen_id->route.addr,
ip_ver, port, src, dst);
ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
&id->route.addr.dev_addr);
if (ret)
goto err;
if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
&id->route.addr.dev_addr);
if (ret)
goto err;
}
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->state = CMA_CONNECT;
@ -1474,15 +1472,6 @@ static void cma_listen_on_all(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
{
struct sockaddr_storage addr_in;
memset(&addr_in, 0, sizeof addr_in);
addr_in.ss_family = af;
return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
}
int rdma_listen(struct rdma_cm_id *id, int backlog)
{
struct rdma_id_private *id_priv;
@ -1490,7 +1479,8 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
id_priv = container_of(id, struct rdma_id_private, id);
if (id_priv->state == CMA_IDLE) {
ret = cma_bind_any(id, AF_INET);
((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
if (ret)
return ret;
}
@ -1565,8 +1555,8 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
struct sockaddr_in6 *sin6;
memset(&path_rec, 0, sizeof path_rec);
ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
path_rec.numb_path = 1;
path_rec.reversible = 1;
@ -1781,7 +1771,11 @@ port_found:
if (ret)
goto out;
ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
id_priv->id.route.addr.dev_addr.dev_type =
(rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB) ?
ARPHRD_INFINIBAND : ARPHRD_ETHER;
rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
id_priv->id.port_num = p;
cma_attach_to_dev(id_priv, cma_dev);
@ -1839,7 +1833,7 @@ out:
static int cma_resolve_loopback(struct rdma_id_private *id_priv)
{
struct cma_work *work;
struct sockaddr_in *src_in, *dst_in;
struct sockaddr *src, *dst;
union ib_gid gid;
int ret;
@ -1853,14 +1847,19 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
goto err;
}
ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
if (cma_zero_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)) {
src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
src_in->sin_family = dst_in->sin_family;
src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
src = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
if (cma_zero_addr(src)) {
dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
if ((src->sa_family = dst->sa_family) == AF_INET) {
((struct sockaddr_in *) src)->sin_addr.s_addr =
((struct sockaddr_in *) dst)->sin_addr.s_addr;
} else {
ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr,
&((struct sockaddr_in6 *) dst)->sin6_addr);
}
}
work->id = id_priv;
@ -1878,10 +1877,14 @@ err:
static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
struct sockaddr *dst_addr)
{
if (src_addr && src_addr->sa_family)
return rdma_bind_addr(id, src_addr);
else
return cma_bind_any(id, dst_addr->sa_family);
if (!src_addr || !src_addr->sa_family) {
src_addr = (struct sockaddr *) &id->route.addr.src_addr;
if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) {
((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
}
}
return rdma_bind_addr(id, src_addr);
}
int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
@ -2077,6 +2080,25 @@ static int cma_get_port(struct rdma_id_private *id_priv)
return ret;
}
static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
struct sockaddr *addr)
{
#if defined(CONFIG_IPv6) || defined(CONFIG_IPV6_MODULE)
struct sockaddr_in6 *sin6;
if (addr->sa_family != AF_INET6)
return 0;
sin6 = (struct sockaddr_in6 *) addr;
if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
!sin6->sin6_scope_id)
return -EINVAL;
dev_addr->bound_dev_if = sin6->sin6_scope_id;
#endif
return 0;
}
int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
{
struct rdma_id_private *id_priv;
@ -2089,7 +2111,13 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
return -EINVAL;
if (!cma_any_addr(addr)) {
ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
if (ret)
goto err1;
if (cma_loopback_addr(addr)) {
ret = cma_bind_loopback(id_priv);
} else if (!cma_zero_addr(addr)) {
ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
if (ret)
goto err1;
@ -2108,7 +2136,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
return 0;
err2:
if (!cma_any_addr(addr)) {
if (id_priv->cma_dev) {
mutex_lock(&lock);
cma_detach_from_dev(id_priv);
mutex_unlock(&lock);
@ -2687,10 +2715,15 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
if (cma_any_addr(addr)) {
memset(mgid, 0, sizeof *mgid);
} else if ((addr->sa_family == AF_INET6) &&
((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
0xFF10A01B)) {
/* IPv6 address is an SA assigned MGID. */
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
} else if ((addr->sa_family == AF_INET6)) {
ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
mc_map[7] = 0x01; /* Use RDMA CM signature */
*mgid = *(union ib_gid *) (mc_map + 4);
} else {
ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
@ -2716,7 +2749,7 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
if (id_priv->id.ps == RDMA_PS_UDP)
rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
ib_addr_get_sgid(dev_addr, &rec.port_gid);
rdma_addr_get_sgid(dev_addr, &rec.port_gid);
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = 1;
@ -2815,7 +2848,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
dev_addr = &id_priv->id.route.addr.dev_addr;
if ((dev_addr->src_dev == ndev) &&
if ((dev_addr->bound_dev_if == ndev->ifindex) &&
memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
ndev->name, &id_priv->id);

View file

@ -604,6 +604,12 @@ retry:
return ret ? ret : id;
}
void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
{
ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
}
EXPORT_SYMBOL(ib_sa_unpack_path);
static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)

View file

@ -43,6 +43,7 @@
#include <rdma/rdma_user_cm.h>
#include <rdma/ib_marshall.h>
#include <rdma/rdma_cm.h>
#include <rdma/rdma_cm_ib.h>
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
@ -562,10 +563,10 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
switch (route->num_paths) {
case 0:
dev_addr = &route->addr.dev_addr;
ib_addr_get_dgid(dev_addr,
(union ib_gid *) &resp->ib_route[0].dgid);
ib_addr_get_sgid(dev_addr,
(union ib_gid *) &resp->ib_route[0].sgid);
rdma_addr_get_dgid(dev_addr,
(union ib_gid *) &resp->ib_route[0].dgid);
rdma_addr_get_sgid(dev_addr,
(union ib_gid *) &resp->ib_route[0].sgid);
resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
break;
case 2:
@ -812,6 +813,51 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
return ret;
}
static int ucma_set_ib_path(struct ucma_context *ctx,
struct ib_path_rec_data *path_data, size_t optlen)
{
struct ib_sa_path_rec sa_path;
struct rdma_cm_event event;
int ret;
if (optlen % sizeof(*path_data))
return -EINVAL;
for (; optlen; optlen -= sizeof(*path_data), path_data++) {
if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY |
IB_PATH_BIDIRECTIONAL))
break;
}
if (!optlen)
return -EINVAL;
ib_sa_unpack_path(path_data->path_rec, &sa_path);
ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
if (ret)
return ret;
memset(&event, 0, sizeof event);
event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
return ucma_event_handler(ctx->cm_id, &event);
}
static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
void *optval, size_t optlen)
{
int ret;
switch (optname) {
case RDMA_OPTION_IB_PATH:
ret = ucma_set_ib_path(ctx, optval, optlen);
break;
default:
ret = -ENOSYS;
}
return ret;
}
static int ucma_set_option_level(struct ucma_context *ctx, int level,
int optname, void *optval, size_t optlen)
{
@ -821,6 +867,9 @@ static int ucma_set_option_level(struct ucma_context *ctx, int level,
case RDMA_OPTION_ID:
ret = ucma_set_option_id(ctx, optname, optval, optlen);
break;
case RDMA_OPTION_IB:
ret = ucma_set_option_ib(ctx, optname, optval, optlen);
break;
default:
ret = -ENOSYS;
}

View file

@ -285,7 +285,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
ucontext = ibdev->alloc_ucontext(ibdev, &udata);
if (IS_ERR(ucontext)) {
ret = PTR_ERR(file->ucontext);
ret = PTR_ERR(ucontext);
goto err;
}

View file

@ -365,18 +365,19 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
spin_lock_irqsave(&qhp->lock, flag);
if (qhp->attr.state > IWCH_QP_STATE_RTS) {
spin_unlock_irqrestore(&qhp->lock, flag);
return -EINVAL;
err = -EINVAL;
goto out;
}
num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
qhp->wq.sq_size_log2);
if (num_wrs <= 0) {
spin_unlock_irqrestore(&qhp->lock, flag);
return -ENOMEM;
err = -ENOMEM;
goto out;
}
while (wr) {
if (num_wrs == 0) {
err = -ENOMEM;
*bad_wr = wr;
break;
}
idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
@ -428,10 +429,8 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
wr->opcode);
err = -EINVAL;
}
if (err) {
*bad_wr = wr;
if (err)
break;
}
wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
sqp->wr_id = wr->wr_id;
sqp->opcode = wr2opcode(t3_wr_opcode);
@ -454,6 +453,10 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
spin_unlock_irqrestore(&qhp->lock, flag);
ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
out:
if (err)
*bad_wr = wr;
return err;
}
@ -471,18 +474,19 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
spin_lock_irqsave(&qhp->lock, flag);
if (qhp->attr.state > IWCH_QP_STATE_RTS) {
spin_unlock_irqrestore(&qhp->lock, flag);
return -EINVAL;
err = -EINVAL;
goto out;
}
num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
qhp->wq.rq_size_log2) - 1;
if (!wr) {
spin_unlock_irqrestore(&qhp->lock, flag);
return -EINVAL;
err = -ENOMEM;
goto out;
}
while (wr) {
if (wr->num_sge > T3_MAX_SGE) {
err = -EINVAL;
*bad_wr = wr;
break;
}
idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
@ -494,10 +498,10 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
err = build_zero_stag_recv(qhp, wqe, wr);
else
err = -ENOMEM;
if (err) {
*bad_wr = wr;
if (err)
break;
}
build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
@ -511,6 +515,10 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
}
spin_unlock_irqrestore(&qhp->lock, flag);
ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
out:
if (err)
*bad_wr = wr;
return err;
}

View file

@ -375,6 +375,7 @@ extern rwlock_t ehca_qp_idr_lock;
extern rwlock_t ehca_cq_idr_lock;
extern struct idr ehca_qp_idr;
extern struct idr ehca_cq_idr;
extern spinlock_t shca_list_lock;
extern int ehca_static_rate;
extern int ehca_port_act_time;

View file

@ -169,12 +169,15 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
unsigned long flags;
u64 h_ret;
spin_lock_irqsave(&eq->spinlock, flags);
ibmebus_free_irq(eq->ist, (void *)shca);
h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
spin_lock_irqsave(&shca_list_lock, flags);
eq->is_initialized = 0;
spin_unlock_irqrestore(&shca_list_lock, flags);
spin_unlock_irqrestore(&eq->spinlock, flags);
tasklet_kill(&eq->interrupt_task);
h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "Can't free EQ resources.");

View file

@ -123,7 +123,7 @@ DEFINE_IDR(ehca_qp_idr);
DEFINE_IDR(ehca_cq_idr);
static LIST_HEAD(shca_list); /* list of all registered ehcas */
static DEFINE_SPINLOCK(shca_list_lock);
DEFINE_SPINLOCK(shca_list_lock);
static struct timer_list poll_eqs_timer;

View file

@ -400,7 +400,6 @@ static inline void map_ib_wc_status(u32 cqe_status,
static inline int post_one_send(struct ehca_qp *my_qp,
struct ib_send_wr *cur_send_wr,
struct ib_send_wr **bad_send_wr,
int hidden)
{
struct ehca_wqe *wqe_p;
@ -412,8 +411,6 @@ static inline int post_one_send(struct ehca_qp *my_qp,
wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
if (unlikely(!wqe_p)) {
/* too many posted work requests: queue overflow */
if (bad_send_wr)
*bad_send_wr = cur_send_wr;
ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
"qp_num=%x", my_qp->ib_qp.qp_num);
return -ENOMEM;
@ -433,8 +430,6 @@ static inline int post_one_send(struct ehca_qp *my_qp,
*/
if (unlikely(ret)) {
my_qp->ipz_squeue.current_q_offset = start_offset;
if (bad_send_wr)
*bad_send_wr = cur_send_wr;
ehca_err(my_qp->ib_qp.device, "Could not write WQE "
"qp_num=%x", my_qp->ib_qp.qp_num);
return -EINVAL;
@ -448,7 +443,6 @@ int ehca_post_send(struct ib_qp *qp,
struct ib_send_wr **bad_send_wr)
{
struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
struct ib_send_wr *cur_send_wr;
int wqe_cnt = 0;
int ret = 0;
unsigned long flags;
@ -457,7 +451,8 @@ int ehca_post_send(struct ib_qp *qp,
if (unlikely(my_qp->state < IB_QPS_RTS)) {
ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
my_qp->state, qp->qp_num);
return -EINVAL;
ret = -EINVAL;
goto out;
}
/* LOCK the QUEUE */
@ -476,24 +471,21 @@ int ehca_post_send(struct ib_qp *qp,
struct ib_send_wr circ_wr;
memset(&circ_wr, 0, sizeof(circ_wr));
circ_wr.opcode = IB_WR_RDMA_READ;
post_one_send(my_qp, &circ_wr, NULL, 1); /* ignore retcode */
post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */
wqe_cnt++;
ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num);
my_qp->message_count = my_qp->packet_count = 0;
}
/* loop processes list of send reqs */
for (cur_send_wr = send_wr; cur_send_wr != NULL;
cur_send_wr = cur_send_wr->next) {
ret = post_one_send(my_qp, cur_send_wr, bad_send_wr, 0);
while (send_wr) {
ret = post_one_send(my_qp, send_wr, 0);
if (unlikely(ret)) {
/* if one or more WQEs were successful, don't fail */
if (wqe_cnt)
ret = 0;
goto post_send_exit0;
}
wqe_cnt++;
} /* eof for cur_send_wr */
send_wr = send_wr->next;
}
post_send_exit0:
iosync(); /* serialize GAL register access */
@ -503,6 +495,10 @@ post_send_exit0:
my_qp, qp->qp_num, wqe_cnt, ret);
my_qp->message_count += wqe_cnt;
spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
out:
if (ret)
*bad_send_wr = send_wr;
return ret;
}
@ -511,7 +507,6 @@ static int internal_post_recv(struct ehca_qp *my_qp,
struct ib_recv_wr *recv_wr,
struct ib_recv_wr **bad_recv_wr)
{
struct ib_recv_wr *cur_recv_wr;
struct ehca_wqe *wqe_p;
int wqe_cnt = 0;
int ret = 0;
@ -522,27 +517,23 @@ static int internal_post_recv(struct ehca_qp *my_qp,
if (unlikely(!HAS_RQ(my_qp))) {
ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d",
my_qp, my_qp->real_qp_num, my_qp->ext_type);
return -ENODEV;
ret = -ENODEV;
goto out;
}
/* LOCK the QUEUE */
spin_lock_irqsave(&my_qp->spinlock_r, flags);
/* loop processes list of send reqs */
for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
cur_recv_wr = cur_recv_wr->next) {
/* loop processes list of recv reqs */
while (recv_wr) {
u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
/* get pointer next to free WQE */
wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
if (unlikely(!wqe_p)) {
/* too many posted work requests: queue overflow */
if (bad_recv_wr)
*bad_recv_wr = cur_recv_wr;
if (wqe_cnt == 0) {
ret = -ENOMEM;
ehca_err(dev, "Too many posted WQEs "
"qp_num=%x", my_qp->real_qp_num);
}
ret = -ENOMEM;
ehca_err(dev, "Too many posted WQEs "
"qp_num=%x", my_qp->real_qp_num);
goto post_recv_exit0;
}
/*
@ -552,7 +543,7 @@ static int internal_post_recv(struct ehca_qp *my_qp,
rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
/* write a RECV WQE into the QUEUE */
ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr,
ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr,
rq_map_idx);
/*
* if something failed,
@ -560,22 +551,20 @@ static int internal_post_recv(struct ehca_qp *my_qp,
*/
if (unlikely(ret)) {
my_qp->ipz_rqueue.current_q_offset = start_offset;
*bad_recv_wr = cur_recv_wr;
if (wqe_cnt == 0) {
ret = -EINVAL;
ehca_err(dev, "Could not write WQE "
"qp_num=%x", my_qp->real_qp_num);
}
ret = -EINVAL;
ehca_err(dev, "Could not write WQE "
"qp_num=%x", my_qp->real_qp_num);
goto post_recv_exit0;
}
qmap_entry = &my_qp->rq_map.map[rq_map_idx];
qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id);
qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id);
qmap_entry->reported = 0;
qmap_entry->cqe_req = 1;
wqe_cnt++;
} /* eof for cur_recv_wr */
recv_wr = recv_wr->next;
} /* eof for recv_wr */
post_recv_exit0:
iosync(); /* serialize GAL register access */
@ -584,6 +573,11 @@ post_recv_exit0:
ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
my_qp, my_qp->real_qp_num, wqe_cnt, ret);
spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
out:
if (ret)
*bad_recv_wr = recv_wr;
return ret;
}
@ -597,6 +591,7 @@ int ehca_post_recv(struct ib_qp *qp,
if (unlikely(my_qp->state == IB_QPS_RESET)) {
ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
my_qp->state, qp->qp_num);
*bad_recv_wr = recv_wr;
return -EINVAL;
}

View file

@ -39,6 +39,7 @@
#include <linux/delay.h>
#include <linux/netdevice.h>
#include <linux/vmalloc.h>
#include <linux/bitmap.h>
#include "ipath_kernel.h"
#include "ipath_verbs.h"
@ -1697,7 +1698,7 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
unsigned len, int avail)
{
unsigned long flags;
unsigned end, cnt = 0, next;
unsigned end, cnt = 0;
/* There are two bits per send buffer (busy and generation) */
start *= 2;
@ -1748,12 +1749,7 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
if (dd->ipath_pioupd_thresh) {
end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
next = find_first_bit(dd->ipath_pioavailkernel, end);
while (next < end) {
cnt++;
next = find_next_bit(dd->ipath_pioavailkernel, end,
next + 1);
}
cnt = bitmap_weight(dd->ipath_pioavailkernel, end);
}
spin_unlock_irqrestore(&ipath_pioavail_lock, flags);

View file

@ -103,7 +103,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
if (dev->dev->caps.max_gso_sz)
if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)
props->device_cap_flags |= IB_DEVICE_UD_TSO;
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;

View file

@ -54,7 +54,8 @@ enum {
/*
* Largest possible UD header: send with GRH and immediate data.
*/
MLX4_IB_UD_HEADER_SIZE = 72
MLX4_IB_UD_HEADER_SIZE = 72,
MLX4_IB_LSO_HEADER_SPARE = 128,
};
struct mlx4_ib_sqp {
@ -67,7 +68,8 @@ struct mlx4_ib_sqp {
};
enum {
MLX4_IB_MIN_SQ_STRIDE = 6
MLX4_IB_MIN_SQ_STRIDE = 6,
MLX4_IB_CACHE_LINE_SIZE = 64,
};
static const __be32 mlx4_ib_opcode[] = {
@ -261,7 +263,7 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
case IB_QPT_UD:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_datagram_seg) +
((flags & MLX4_IB_QP_LSO) ? 64 : 0);
((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
case IB_QPT_UC:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_raddr_seg);
@ -897,7 +899,6 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
(to_mlx4_st(ibqp->qp_type) << 16));
context->flags |= cpu_to_be32(1 << 8); /* DE? */
if (!(attr_mask & IB_QP_PATH_MIG_STATE))
context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
@ -1467,16 +1468,12 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
__be32 *lso_hdr_sz)
__be32 *lso_hdr_sz, __be32 *blh)
{
unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
/*
* This is a temporary limitation and will be removed in
* a forthcoming FW release:
*/
if (unlikely(halign > 64))
return -EINVAL;
if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
*blh = cpu_to_be32(1 << 6);
if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
wr->num_sge > qp->sq.max_gs - (halign >> 4)))
@ -1522,6 +1519,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
__be32 dummy;
__be32 *lso_wqe;
__be32 uninitialized_var(lso_hdr_sz);
__be32 blh;
int i;
spin_lock_irqsave(&qp->sq.lock, flags);
@ -1530,6 +1528,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
for (nreq = 0; wr; ++nreq, wr = wr->next) {
lso_wqe = &dummy;
blh = 0;
if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
err = -ENOMEM;
@ -1616,7 +1615,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
if (wr->opcode == IB_WR_LSO) {
err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz);
err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
if (unlikely(err)) {
*bad_wr = wr;
goto out;