[Devel] Re: [PATCH] Rewrite proc seq operations via seq_list_xxx ones
Mathieu Desnoyers
mathieu.desnoyers at polymtl.ca
Wed Oct 10 08:10:49 PDT 2007
* Pavel Emelyanov (xemul at openvz.org) wrote:
> Mathieu Desnoyers wrote:
> > * Pavel Emelyanov (xemul at openvz.org) wrote:
> >> Some time ago I posted a set of patches that consolidated
> >> the seq files, walking the list_head-s. This set was merged
> >> in the mm tree. /proc/diskstats and /proc/partitions files
> >> were included in this set, but a bit later this part was
> >> dropped because of conflicts with some other changes.
> >>
> >> Here's the fixed version against the latest git block repo.
> >>
> >
> > Hi Pavel,
> >
> > You might want to try implementing this patch on top of the sorted seq
> > list patch I proposed there. It deals with a race between proc file read
> > and list modification between iterations.
>
> AFAIS there's no race - everything is protected with the mutex.
>
> OTOH, If you mean the problem that the list can change while the
> seq file reallocates its buffers, then this problem exists for all
> the kernel and I don't think it's a good idea to stick to the block
> proc files only.
>
The problem is that the list can change between two consecutive reads.
And yes, all proc files using lists are affected, but changing every
proc file users is a quite large task.
A problematic sequence would be:
1st procfs read()
*_start()
- mutex down
- seq list start
*_next()
- seq list next
*_stop()
- mutex up
end of 1st procfs read()
mutex down
-> Removing a list entry
mutex up
2nd procfs read()
*_start()
- mutex down
... we now skip a list entry depending on the relative position of our
iterator and the deletion.
The mutex is taken/released at each and every read, therefore not
protecting against modifications across reads.
Mathieu
> > http://lkml.org/lkml/2007/9/6/175
> >
> > See this patch for a user implementation example and test code
> > (/proc/modules) :
> >
> > http://lkml.org/lkml/2007/9/6/176
> >
> > Mathieu
> >
> >> Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
> >>
> >> ---
> >>
> >> diff --git a/block/genhd.c b/block/genhd.c
> >> index 3af1e7a..27f7061 100644
> >> --- a/block/genhd.c
> >> +++ b/block/genhd.c
> >> @@ -264,39 +264,34 @@ void __init printk_all_partitions(void)
> >>
> >> #ifdef CONFIG_PROC_FS
> >> /* iterator */
> >> -static void *part_start(struct seq_file *part, loff_t *pos)
> >> +static void *disk_start(struct seq_file *part, loff_t *pos)
> >> {
> >> - struct list_head *p;
> >> - loff_t l = *pos;
> >> -
> >> mutex_lock(&block_subsys_lock);
> >> - list_for_each(p, &block_subsys.list)
> >> - if (!l--)
> >> - return list_entry(p, struct gendisk, kobj.entry);
> >> - return NULL;
> >> + return seq_list_start_head(&block_subsys.list, *pos);
> >> }
> >>
> >> -static void *part_next(struct seq_file *part, void *v, loff_t *pos)
> >> +static void *disk_next(struct seq_file *part, void *v, loff_t *pos)
> >> {
> >> - struct list_head *p = ((struct gendisk *)v)->kobj.entry.next;
> >> - ++*pos;
> >> - return p==&block_subsys.list ? NULL :
> >> - list_entry(p, struct gendisk, kobj.entry);
> >> + return seq_list_next(v, &block_subsys.list, pos);
> >> }
> >>
> >> -static void part_stop(struct seq_file *part, void *v)
> >> +static void disk_stop(struct seq_file *part, void *v)
> >> {
> >> mutex_unlock(&block_subsys_lock);
> >> }
> >>
> >> static int show_partition(struct seq_file *part, void *v)
> >> {
> >> - struct gendisk *sgp = v;
> >> + struct gendisk *sgp;
> >> int n;
> >> char buf[BDEVNAME_SIZE];
> >>
> >> - if (&sgp->kobj.entry == block_subsys.list.next)
> >> + if (v == &block_subsys.list) {
> >> seq_puts(part, "major minor #blocks name\n\n");
> >> + return 0;
> >> + }
> >> +
> >> + sgp = list_entry(v, struct gendisk, kobj.entry);
> >>
> >> /* Don't show non-partitionable removeable devices or empty devices */
> >> if (!get_capacity(sgp) ||
> >> @@ -325,9 +320,9 @@ static int show_partition(struct seq_fil
> >> }
> >>
> >> struct seq_operations partitions_op = {
> >> - .start =part_start,
> >> - .next = part_next,
> >> - .stop = part_stop,
> >> + .start =disk_start,
> >> + .next = disk_next,
> >> + .stop = disk_stop,
> >> .show = show_partition
> >> };
> >> #endif
> >> @@ -616,44 +611,22 @@ decl_subsys(block, &ktype_block, &block_
> >> */
> >>
> >> /* iterator */
> >> -static void *diskstats_start(struct seq_file *part, loff_t *pos)
> >> -{
> >> - loff_t k = *pos;
> >> - struct list_head *p;
> >> -
> >> - mutex_lock(&block_subsys_lock);
> >> - list_for_each(p, &block_subsys.list)
> >> - if (!k--)
> >> - return list_entry(p, struct gendisk, kobj.entry);
> >> - return NULL;
> >> -}
> >> -
> >> -static void *diskstats_next(struct seq_file *part, void *v, loff_t *pos)
> >> -{
> >> - struct list_head *p = ((struct gendisk *)v)->kobj.entry.next;
> >> - ++*pos;
> >> - return p==&block_subsys.list ? NULL :
> >> - list_entry(p, struct gendisk, kobj.entry);
> >> -}
> >> -
> >> -static void diskstats_stop(struct seq_file *part, void *v)
> >> -{
> >> - mutex_unlock(&block_subsys_lock);
> >> -}
> >> -
> >> static int diskstats_show(struct seq_file *s, void *v)
> >> {
> >> - struct gendisk *gp = v;
> >> + struct gendisk *gp;
> >> char buf[BDEVNAME_SIZE];
> >> int n = 0;
> >>
> >> + if (v == &block_subsys.list)
> >> /*
> >> - if (&sgp->kobj.entry == block_subsys.kset.list.next)
> >> seq_puts(s, "major minor name"
> >> " rio rmerge rsect ruse wio wmerge "
> >> "wsect wuse running use aveq"
> >> "\n\n");
> >> */
> >> + return 0;
> >> +
> >> + gp = list_entry(v, struct gendisk, kobj.entry);
> >>
> >> preempt_disable();
> >> disk_round_stats(gp);
> >> @@ -686,9 +659,9 @@ static int diskstats_show(struct seq_fil
> >> }
> >>
> >> struct seq_operations diskstats_op = {
> >> - .start = diskstats_start,
> >> - .next = diskstats_next,
> >> - .stop = diskstats_stop,
> >> + .start = disk_start,
> >> + .next = disk_next,
> >> + .stop = disk_stop,
> >> .show = diskstats_show
> >> };
> >>
> >> -
> >> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> >> the body of a message to majordomo at vger.kernel.org
> >> More majordomo info at http://vger.kernel.org/majordomo-info.html
> >> Please read the FAQ at http://www.tux.org/lkml/
> >>
> >
>
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
More information about the Devel
mailing list