[Devel] [PATCH 2/4] IB/mlx4: move iboe field aside from mlx4_ib_device

Jan Dakinevich jan.dakinevich at virtuozzo.com
Wed Sep 5 16:26:41 MSK 2018


On Wed, 5 Sep 2018 15:10:14 +0300
Konstantin Khorenko <khorenko at virtuozzo.com> wrote:

> 1) did you send this patchset to mainstream as well? Link?
> 

At first, I have to pass internal review.

> 2) see below
> 
> On 09/03/2018 05:02 PM, Jan Dakinevich wrote:
> > This is the 1st patch of 3 of the work for decreasing size
> > of mlx4_ib_device.
> >
> > The field takes about 8K and could be safely allocated with
> > kvzalloc.
> >
> > https://jira.sw.ru/browse/HCI-129
> > Signed-off-by: Jan Dakinevich <jan.dakinevich at virtuozzo.com>
> > ---
> >  drivers/infiniband/hw/mlx4/main.c    | 65
> > ++++++++++++++++++++++--------------
> > drivers/infiniband/hw/mlx4/mlx4_ib.h |  3 +-
> > drivers/infiniband/hw/mlx4/qp.c      |  4 +-- 3 files changed, 44
> > insertions(+), 28 deletions(-)
> >
> > diff --git a/drivers/infiniband/hw/mlx4/main.c
> > b/drivers/infiniband/hw/mlx4/main.c index 0bdbb64..63f9922 100644
> > --- a/drivers/infiniband/hw/mlx4/main.c
> > +++ b/drivers/infiniband/hw/mlx4/main.c
> > @@ -253,7 +253,7 @@ static int mlx4_ib_add_gid(struct ib_device
> > *device, void **context)
> >  {
> >  	struct mlx4_ib_dev *ibdev = to_mdev(device);
> > -	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
> > +	struct mlx4_ib_iboe *iboe = ibdev->iboe;
> >  	struct mlx4_port_gid_table   *port_gid_table;
> >  	int free = -1, found = -1;
> >  	int ret = 0;
> > @@ -331,7 +331,7 @@ static int mlx4_ib_del_gid(struct ib_device
> > *device, {
> >  	struct gid_cache_context *ctx = *context;
> >  	struct mlx4_ib_dev *ibdev = to_mdev(device);
> > -	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
> > +	struct mlx4_ib_iboe *iboe = ibdev->iboe;
> >  	struct mlx4_port_gid_table   *port_gid_table;
> >  	int ret = 0;
> >  	int hw_update = 0;
> > @@ -379,7 +379,7 @@ static int mlx4_ib_del_gid(struct ib_device
> > *device, int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev
> > *ibdev, u8 port_num, int index)
> >  {
> > -	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
> > +	struct mlx4_ib_iboe *iboe = ibdev->iboe;
> >  	struct gid_cache_context *ctx = NULL;
> >  	union ib_gid gid;
> >  	struct mlx4_port_gid_table   *port_gid_table;
> > @@ -716,7 +716,7 @@ static int eth_link_query_port(struct ib_device
> > *ibdev, u8 port, {
> >
> >  	struct mlx4_ib_dev *mdev = to_mdev(ibdev);
> > -	struct mlx4_ib_iboe *iboe = &mdev->iboe;
> > +	struct mlx4_ib_iboe *iboe = mdev->iboe;
> >  	struct net_device *ndev;
> >  	enum ib_mtu tmp;
> >  	struct mlx4_cmd_mailbox *mailbox;
> > @@ -1444,11 +1444,11 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev
> > *mdev, struct mlx4_ib_qp *mqp, if (!mqp->port)
> >  		return 0;
> >
> > -	spin_lock_bh(&mdev->iboe.lock);
> > -	ndev = mdev->iboe.netdevs[mqp->port - 1];
> > +	spin_lock_bh(&mdev->iboe->lock);
> > +	ndev = mdev->iboe->netdevs[mqp->port - 1];
> >  	if (ndev)
> >  		dev_hold(ndev);
> > -	spin_unlock_bh(&mdev->iboe.lock);
> > +	spin_unlock_bh(&mdev->iboe->lock);
> >
> >  	if (ndev) {
> >  		ret = 1;
> > @@ -2101,11 +2101,11 @@ static int mlx4_ib_mcg_detach(struct ib_qp
> > *ibqp, union ib_gid *gid, u16 lid) mutex_lock(&mqp->mutex);
> >  	ge = find_gid_entry(mqp, gid->raw);
> >  	if (ge) {
> > -		spin_lock_bh(&mdev->iboe.lock);
> > -		ndev = ge->added ? mdev->iboe.netdevs[ge->port -
> > 1] : NULL;
> > +		spin_lock_bh(&mdev->iboe->lock);
> > +		ndev = ge->added ? mdev->iboe->netdevs[ge->port -
> > 1] : NULL; if (ndev)
> >  			dev_hold(ndev);
> > -		spin_unlock_bh(&mdev->iboe.lock);
> > +		spin_unlock_bh(&mdev->iboe->lock);
> >  		if (ndev)
> >  			dev_put(ndev);
> >  		list_del(&ge->list);
> > @@ -2396,7 +2396,7 @@ static void mlx4_ib_update_qps(struct
> > mlx4_ib_dev *ibdev, new_smac = mlx4_mac_to_u64(dev->dev_addr);
> >  	read_unlock(&dev_base_lock);
> >
> > -	atomic64_set(&ibdev->iboe.mac[port - 1], new_smac);
> > +	atomic64_set(&ibdev->iboe->mac[port - 1], new_smac);
> >
> >  	/* no need for update QP1 and mac registration in
> > non-SRIOV */ if (!mlx4_is_mfunc(ibdev->dev))
> > @@ -2452,7 +2452,7 @@ static void mlx4_ib_scan_netdevs(struct
> > mlx4_ib_dev *ibdev,
> >
> >  	ASSERT_RTNL();
> >
> > -	iboe = &ibdev->iboe;
> > +	iboe = ibdev->iboe;
> >
> >  	spin_lock_bh(&iboe->lock);
> >  	mlx4_foreach_ib_transport_port(port, ibdev->dev) {
> > @@ -2476,13 +2476,13 @@ static int mlx4_ib_netdev_event(struct
> > notifier_block *this, unsigned long event, void *ptr)
> >  {
> >  	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
> > -	struct mlx4_ib_dev *ibdev;
> > +	struct mlx4_ib_iboe *iboe;
> >
> >  	if (!net_eq(dev_net(dev), &init_net))
> >  		return NOTIFY_DONE;
> >
> > -	ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
> > -	mlx4_ib_scan_netdevs(ibdev, dev, event);
> > +	iboe = container_of(this, struct mlx4_ib_iboe, nb);
> > +	mlx4_ib_scan_netdevs(iboe->parent, dev, event);
> >
> >  	return NOTIFY_DONE;
> >  }
> > @@ -2612,6 +2612,14 @@ static void get_fw_ver_str(struct ib_device
> > *device, char *str) (int) dev->dev->caps.fw_ver & 0xffff);
> >  }
> >
> > +static void mlx4_ib_release(struct ib_device *device)
> > +{
> > +	struct mlx4_ib_dev *ibdev = container_of(device, struct
> > mlx4_ib_dev,
> > +						 ib_dev);
> > +
> > +	kvfree(ibdev->iboe);
> > +}
> > +
> >  static void *mlx4_ib_add(struct mlx4_dev *dev)
> >  {
> >  	struct mlx4_ib_dev *ibdev;
> > @@ -2642,7 +2650,14 @@ static void *mlx4_ib_add(struct mlx4_dev
> > *dev) return NULL;
> >  	}
> >
> > -	iboe = &ibdev->iboe;
> > +	ibdev->ib_dev.release		= mlx4_ib_release;
> > +
> > +	ibdev->iboe = kvzalloc(sizeof(struct mlx4_ib_iboe),
> > GFP_KERNEL);
> > +	if (!ibdev->iboe)
> > +		goto err_dealloc;
> > +
> > +	ibdev->iboe->parent = ibdev;
> > +	iboe = ibdev->iboe;
> 
> 2) It's a good practice to free allocated resources on errors right
> in the function where resources are allocated - like all other
> resources in this function.
> 

These resources are deallocated in the same way as memory for ibdev:
they previously were part of ibdev and now despite allocated separately
they should have the same life time as iboe has.

Deallocation happens in err_dealloc error path, then ib_dealloc_device
is called to decrease ref counter:

mlx4_ib_add (goto err_dealloc)
 ib_dealloc_device
  kobject_put
   kref_put
    kref_sub
     kobject_release
      kobject_cleanup
       device_release
        device_release
         ib_device_release
          mlx4_ib_release
           kfree(iboe)
           kfree(pkeys)
           kfree(sriov)
          kfree(ibdev)

That is the reason why ->release() callback should be assigned before
subparts allocation.

> Please handle error cases in the same way in all 3 patches.
> 
> >
> >  	if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
> >  		goto err_dealloc;
> > @@ -2965,10 +2980,10 @@ static void *mlx4_ib_add(struct mlx4_dev
> > *dev) return ibdev;
> >
> >  err_notif:
> > -	if (ibdev->iboe.nb.notifier_call) {
> > -		if
> > (unregister_netdevice_notifier_rh(&ibdev->iboe.nb))
> > +	if (ibdev->iboe->nb.notifier_call) {
> > +		if
> > (unregister_netdevice_notifier_rh(&ibdev->iboe->nb))
> > pr_warn("failure unregistering notifier\n");
> > -		ibdev->iboe.nb.notifier_call = NULL;
> > +		ibdev->iboe->nb.notifier_call = NULL;
> >  	}
> >  	flush_workqueue(wq);
> >
> > @@ -3088,10 +3103,10 @@ static void mlx4_ib_remove(struct mlx4_dev
> > *dev, void *ibdev_ptr) mlx4_ib_mad_cleanup(ibdev);
> >  	ib_unregister_device(&ibdev->ib_dev);
> >  	mlx4_ib_diag_cleanup(ibdev);
> > -	if (ibdev->iboe.nb.notifier_call) {
> > -		if
> > (unregister_netdevice_notifier_rh(&ibdev->iboe.nb))
> > +	if (ibdev->iboe->nb.notifier_call) {
> > +		if
> > (unregister_netdevice_notifier_rh(&ibdev->iboe->nb))
> > pr_warn("failure unregistering notifier\n");
> > -		ibdev->iboe.nb.notifier_call = NULL;
> > +		ibdev->iboe->nb.notifier_call = NULL;
> >  	}
> >
> >  	if (ibdev->steering_support ==
> > MLX4_STEERING_MODE_DEVICE_MANAGED) { @@ -3235,9 +3250,9 @@ static
> > void handle_bonded_port_state_event(struct work_struct *work)
> > struct ib_event ibev;
> >
> >  	kfree(ew);
> > -	spin_lock_bh(&ibdev->iboe.lock);
> > +	spin_lock_bh(&ibdev->iboe->lock);
> >  	for (i = 0; i < MLX4_MAX_PORTS; ++i) {
> > -		struct net_device *curr_netdev =
> > ibdev->iboe.netdevs[i];
> > +		struct net_device *curr_netdev =
> > ibdev->iboe->netdevs[i]; enum ib_port_state curr_port_state;
> >
> >  		if (!curr_netdev)
> > @@ -3251,7 +3266,7 @@ static void
> > handle_bonded_port_state_event(struct work_struct *work)
> > bonded_port_state = (bonded_port_state != IB_PORT_ACTIVE) ?
> > curr_port_state : IB_PORT_ACTIVE; }
> > -	spin_unlock_bh(&ibdev->iboe.lock);
> > +	spin_unlock_bh(&ibdev->iboe->lock);
> >
> >  	ibev.device = &ibdev->ib_dev;
> >  	ibev.element.port_num = 1;
> > diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h
> > b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 719dae3..37c4d878
> > 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
> > +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
> > @@ -523,6 +523,7 @@ struct mlx4_ib_iboe {
> >  	atomic64_t		mac[MLX4_MAX_PORTS];
> >  	struct notifier_block 	nb;
> >  	struct mlx4_port_gid_table gids[MLX4_MAX_PORTS];
> > +	struct mlx4_ib_dev     *parent;
> >  };
> >
> >  struct pkey_mgt {
> > @@ -599,7 +600,7 @@ struct mlx4_ib_dev {
> >
> >  	struct mutex		cap_mask_mutex;
> >  	bool			ib_active;
> > -	struct mlx4_ib_iboe	iboe;
> > +	struct mlx4_ib_iboe    *iboe;
> >  	struct mlx4_ib_counters counters_table[MLX4_MAX_PORTS];
> >  	int		       *eq_table;
> >  	struct kobject	       *iov_parent;
> > diff --git a/drivers/infiniband/hw/mlx4/qp.c
> > b/drivers/infiniband/hw/mlx4/qp.c index bd9ec5f..faf0f86 100644
> > --- a/drivers/infiniband/hw/mlx4/qp.c
> > +++ b/drivers/infiniband/hw/mlx4/qp.c
> > @@ -1972,7 +1972,7 @@ static int handle_eth_ud_smac_index(struct
> > mlx4_ib_dev *dev, u64 u64_mac;
> >  	int smac_index;
> >
> > -	u64_mac = atomic64_read(&dev->iboe.mac[qp->port - 1]);
> > +	u64_mac = atomic64_read(&dev->iboe->mac[qp->port - 1]);
> >
> >  	context->pri_path.sched_queue =
> > MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6); if
> > (!qp->pri.smac && !qp->pri.smac_port) { @@ -3042,7 +3042,7 @@
> > static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8
> > port_num, int index, union ib_gid *gid, enum ib_gid_type *gid_type)
> >  {
> > -	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
> > +	struct mlx4_ib_iboe *iboe = ibdev->iboe;
> >  	struct mlx4_port_gid_table *port_gid_table;
> >  	unsigned long flags;
> >
> >



-- 
Best regards
Jan Dakinevich


More information about the Devel mailing list