[Devel] [PATCH RH9 02/10] drivers/vhost: use array to store workers

Wed Sep 7 16:52:10 MSK 2022

On 9/2/22 17:20, Konstantin Khorenko wrote:
> On 16.08.2022 09:43, Andrey Zhadchenko wrote:
>> We want to support several vhost workers. The first step is to
>> rework vhost to use array of workers rather than single pointer.
>> Update creation and cleanup routines.
>>
>> https://jira.sw.ru/browse/PSBM-139414
>> Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
>> ---
>>   drivers/vhost/vhost.c | 75 +++++++++++++++++++++++++++++++------------
>>   drivers/vhost/vhost.h | 10 +++++-
>>   2 files changed, 63 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>> index a0bfc77c6a43..968601325a37 100644
>> --- a/drivers/vhost/vhost.c
>> +++ b/drivers/vhost/vhost.c
>> @@ -231,11 +231,24 @@ void vhost_poll_stop(struct vhost_poll *poll)
>>   }
>>   EXPORT_SYMBOL_GPL(vhost_poll_stop);
>> +static void vhost_work_queue_at_worker(struct vhost_worker *w,
>> +                       struct vhost_work *work)
>> +{
>> +    if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
>> +        /* We can only add the work to the list after we're
>> +         * sure it was not in the list.
>> +         * test_and_set_bit() implies a memory barrier.
>> +         */
>> +        llist_add(&work->node, &w->work_list);
>> +        wake_up_process(w->worker);
>> +    }
>> +}
>> +
>>   void vhost_work_dev_flush(struct vhost_dev *dev)
>>   {
>>       struct vhost_flush_struct flush;
>> -    if (dev->worker) {
>> +    if (dev->workers[0].worker) {
>>           init_completion(&flush.wait_event);
>>           vhost_work_init(&flush.work, vhost_flush_work);
>> @@ -255,17 +268,12 @@ EXPORT_SYMBOL_GPL(vhost_poll_flush);
>>   void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
>>   {
>> -    if (!dev->worker)
>> +    struct vhost_worker *w = &dev->workers[0];
>> +
>> +    if (!w->worker)
>>           return;
>> -    if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
>> -        /* We can only add the work to the list after we're
>> -         * sure it was not in the list.
>> -         * test_and_set_bit() implies a memory barrier.
>> -         */
>> -        llist_add(&work->node, &dev->work_list);
>> -        wake_up_process(dev->worker);
>> -    }
>> +    vhost_work_queue_at_worker(w, work);
>>   }
>>   EXPORT_SYMBOL_GPL(vhost_work_queue);
>> @@ -341,9 +349,29 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>>       __vhost_vq_meta_reset(vq);
>>   }
>> +static void vhost_worker_reset(struct vhost_worker *w)
>> +{
>> +    init_llist_head(&w->work_list);
>> +    w->worker = NULL;
>> +}
>> +
>> +void vhost_cleanup_workers(struct vhost_dev *dev)
>> +{
>> +    int i;
>> +
>> +    for (i = 0; i < dev->nworkers; ++i) {
>> +        WARN_ON(!llist_empty(&dev->workers[i].work_list));
>> +        kthread_stop(dev->workers[i].worker);
>> +        vhost_worker_reset(&dev->workers[i]);
>> +    }
>> +
>> +    dev->nworkers = 0;
>> +}
>> +
>>   static int vhost_worker(void *data)
>>   {
>> -    struct vhost_dev *dev = data;
>> +    struct vhost_worker *w = data;
>> +    struct vhost_dev *dev = w->dev;
>>       struct vhost_work *work, *work_next;
>>       struct llist_node *node;
>> @@ -358,7 +386,7 @@ static int vhost_worker(void *data)
>>               break;
>>           }
>> -        node = llist_del_all(&dev->work_list);
>> +        node = llist_del_all(&w->work_list);
>>           if (!node)
>>               schedule();
>> @@ -481,7 +509,6 @@ void vhost_dev_init(struct vhost_dev *dev,
>>       dev->umem = NULL;
>>       dev->iotlb = NULL;
>>       dev->mm = NULL;
>> -    dev->worker = NULL;
>>       dev->iov_limit = iov_limit;
>>       dev->weight = weight;
>>       dev->byte_weight = byte_weight;
>> @@ -493,6 +520,11 @@ void vhost_dev_init(struct vhost_dev *dev,
>>       INIT_LIST_HEAD(&dev->pending_list);
>>       spin_lock_init(&dev->iotlb_lock);
>> +    dev->nworkers = 0;
>> +    for (i = 0; i < VHOST_MAX_WORKERS; ++i) {
>> +        dev->workers[i].dev = dev;
>> +        vhost_worker_reset(&dev->workers[i]);
>> +    }
>>       for (i = 0; i < dev->nvqs; ++i) {
>>           vq = dev->vqs[i];
>> @@ -602,7 +634,8 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
>>               goto err_worker;
>>           }
>> -        dev->worker = worker;
>> +        dev->workers[0].worker = worker;
>> +        dev->nworkers = 1;
>>           wake_up_process(worker); /* avoid contributing to loadavg */
>>           err = vhost_attach_cgroups(dev);
>> @@ -616,9 +649,10 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
>>       return 0;
>>   err_cgroup:
>> -    if (dev->worker) {
>> -        kthread_stop(dev->worker);
>> -        dev->worker = NULL;
>> +    dev->nworkers = 0;
>> +    if (dev->workers[0].worker) {
>> +        kthread_stop(dev->workers[0].worker);
>> +        dev->workers[0].worker = NULL;
> 
> This place is a bit out of sync here:
> we do state here there can be only 1 worker,
> but vhost_cleanup_workers() and vhost_dev_init() already have a loop on 
> dev->nworkers in this patch.
> 
> Technically there is no problem here, the code is correct, just non-glossy.

I can make a separate patch if you insist.
My idea was if we convert worker to array of workers, cleanup\init must 
be prepared for any amount of workers regardless that some function sets 
only 1 for a time being.

> 
>>       }
>>   err_worker:
>>       vhost_detach_mm(dev);
>> @@ -701,6 +735,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
>>               eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx);
>>           vhost_vq_reset(dev, dev->vqs[i]);
>>       }
>> +
>>       vhost_dev_free_iovecs(dev);
>>       if (dev->log_ctx)
>>           eventfd_ctx_put(dev->log_ctx);
>> @@ -712,10 +747,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
>>       dev->iotlb = NULL;
>>       vhost_clear_msg(dev);
>>       wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM);
>> -    WARN_ON(!llist_empty(&dev->work_list));
>> -    if (dev->worker) {
>> -        kthread_stop(dev->worker);
>> -        dev->worker = NULL;
>> +    if (dev->use_worker) {
>> +        vhost_cleanup_workers(dev);
>>           dev->kcov_handle = 0;
>>       }
>>       vhost_detach_mm(dev);
>> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
>> index 638bb640d6b4..634ea828cbba 100644
>> --- a/drivers/vhost/vhost.h
>> +++ b/drivers/vhost/vhost.h
>> @@ -25,6 +25,13 @@ struct vhost_work {
>>       unsigned long        flags;
>>   };
>> +#define VHOST_MAX_WORKERS 4
>> +struct vhost_worker {
>> +    struct task_struct *worker;
>> +    struct llist_head work_list;
>> +    struct vhost_dev *dev;
>> +};
>> +
>>   /* Poll a file (eventfd or socket) */
>>   /* Note: there's nothing vhost specific about this structure. */
>>   struct vhost_poll {
>> @@ -149,7 +156,8 @@ struct vhost_dev {
>>       int nvqs;
>>       struct eventfd_ctx *log_ctx;
>>       struct llist_head work_list;
>> -    struct task_struct *worker;
>> +    struct vhost_worker workers[VHOST_MAX_WORKERS];
>> +    int nworkers;
>>       struct vhost_iotlb *umem;
>>       struct vhost_iotlb *iotlb;
>>       spinlock_t iotlb_lock;