[Devel] [PATCH RHEL7 COMMIT] fuse kio pcs: implement logging and prometheus statistics collection v2
Konstantin Khorenko
khorenko at virtuozzo.com
Tue Feb 27 14:09:42 MSK 2018
The commit is pushed to "branch-rh7-3.10.0-693.17.1.vz7.45.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-693.17.1.vz7.45.4
------>
commit d457c1a005433fd84fa4552d4abf09c0ef11adb0
Author: Dmitry Monakhov <dmonakhov at openvz.org>
Date: Tue Feb 27 14:09:42 2018 +0300
fuse kio pcs: implement logging and prometheus statistics collection v2
Implement core tracing and latency statistics similar to vstorage
userspace fuse daemon, export via relayfs, collecting logic implemened
in userspace fuse daemon, added vstorage-7.7.135 (commit: d3a7b85)
New sysfs dentries:
- loglevel config /sys/fs/fuse/connections/$CONN_ID/loglevel
- relayfs entry /sys/kernel/debug/fuse/$CONN_ID/prometheus
https://jira.sw.ru/browse/PSBM-81741
Signed-off-by: Alexey Kuznetsov <kuznet at virtuozzo.com>
Signed-off-by: Dmitry Monakhov <dmonakhov at openvz.org>
---
fs/fuse/control.c | 48 +++++-
fs/fuse/fuse_i.h | 6 +
fs/fuse/kio/pcs/fuse_io.c | 5 +
fs/fuse/kio/pcs/fuse_ktrace.h | 45 ++++++
fs/fuse/kio/pcs/fuse_ktrace_prot.h | 43 ++++++
fs/fuse/kio/pcs/fuse_prometheus.h | 11 ++
fs/fuse/kio/pcs/fuse_prometheus_prot.h | 23 +++
fs/fuse/kio/pcs/pcs_cs.c | 63 +++++++-
fs/fuse/kio/pcs/pcs_fuse_kdirect.c | 263 +++++++++++++++++++++++++++++++++
fs/fuse/kio/pcs/pcs_map.c | 22 +--
fs/fuse/kio/pcs/pcs_req.c | 3 +-
fs/fuse/kio/pcs/pcs_req.h | 4 +-
12 files changed, 521 insertions(+), 15 deletions(-)
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 1461e587120b..5f74b50d6ce8 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -171,6 +171,42 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
return ret;
}
+static ssize_t fuse_conn_loglevel_read(struct file *file,
+ char __user *buf, size_t len,
+ loff_t *ppos)
+{
+ struct fuse_conn *fc;
+ unsigned val;
+
+ fc = fuse_ctl_file_conn_get(file);
+ if (!fc)
+ return 0;
+
+ val = fc->ktrace_level;
+ fuse_conn_put(fc);
+
+ return fuse_conn_limit_read(file, buf, len, ppos, val);
+}
+
+static ssize_t fuse_conn_loglevel_write(struct file *file,
+ const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned uninitialized_var(val);
+ ssize_t ret;
+
+ ret = fuse_conn_limit_write(file, buf, count, ppos, &val, 16);
+ if (ret > 0) {
+ struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
+ if (fc) {
+ fc->ktrace_level = val;
+ fuse_conn_put(fc);
+ }
+ }
+
+ return ret;
+}
+
static const struct file_operations fuse_ctl_abort_ops = {
.open = nonseekable_open,
.write = fuse_conn_abort_write,
@@ -197,6 +233,13 @@ static const struct file_operations fuse_conn_congestion_threshold_ops = {
.llseek = no_llseek,
};
+static const struct file_operations fuse_conn_loglevel_ops = {
+ .open = nonseekable_open,
+ .read = fuse_conn_loglevel_read,
+ .write = fuse_conn_loglevel_write,
+ .llseek = no_llseek,
+};
+
struct fuse_conn_priv {
struct fuse_conn *conn;
struct list_head *req_list;
@@ -534,7 +577,10 @@ int fuse_ctl_add_conn(struct fuse_conn *fc)
&fuse_conn_files_ops) ||
!fuse_ctl_add_dentry(parent, fc, "conn_info",
S_IFREG | 0600, 1, NULL,
- &fuse_conn_info_ops)
+ &fuse_conn_info_ops) ||
+ !fuse_ctl_add_dentry(parent, fc, "loglevel",
+ S_IFREG | 0600, 1, NULL,
+ &fuse_conn_loglevel_ops)
)
goto err;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index a4cff705689c..f704fd17905b 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -764,6 +764,9 @@ struct fuse_conn {
struct fuse_kio_ops *op;
void *ctx;
} kio;
+
+ int ktrace_level;
+ struct fuse_ktrace * ktrace;
};
static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -1082,6 +1085,9 @@ struct fuse_req *fuse_generic_request_alloc(struct fuse_conn *fc,
struct kmem_cache *cachep,
unsigned npages, gfp_t flags);
+void fuse_stat_account(struct fuse_conn * fc, int op, ktime_t val);
+
+
int fuse_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
diff --git a/fs/fuse/kio/pcs/fuse_io.c b/fs/fuse/kio/pcs/fuse_io.c
index 5884fe25a20f..b1a490d058e1 100644
--- a/fs/fuse/kio/pcs/fuse_io.c
+++ b/fs/fuse/kio/pcs/fuse_io.c
@@ -13,6 +13,7 @@
#include "pcs_cs.h"
#include "pcs_cluster.h"
#include "log.h"
+#include "fuse_prometheus.h"
#include "../../fuse_i.h"
@@ -36,6 +37,7 @@ static void on_read_done(struct pcs_fuse_req *r, size_t size)
struct pcs_fuse_cluster *pfc = cl_from_req(r);
DTRACE("do fuse_request_end req:%p op:%d err:%d\n", &r->req, r->req.in.h.opcode, r->req.out.h.error);
+ fuse_stat_account(pfc->fc, KFUSE_OP_READ, ktime_sub(ktime_get(), r->exec.ireq.ts));
r->req.out.args[0].size = size;
inode_dio_end(r->req.io_inode);
request_end(pfc->fc, &r->req);
@@ -46,6 +48,7 @@ static void on_sync_done(struct pcs_fuse_req *r)
struct pcs_fuse_cluster *pfc = cl_from_req(r);
DTRACE("do fuse_request_end req:%p op:%d err:%d\n", &r->req, r->req.in.h.opcode, r->req.out.h.error);
+ fuse_stat_account(pfc->fc, KFUSE_OP_FSYNC, ktime_sub(ktime_get(), r->exec.ireq.ts));
request_end(pfc->fc, &r->req);
}
@@ -57,6 +60,7 @@ static void on_write_done(struct pcs_fuse_req *r, off_t pos, size_t size)
out->size = size;
DTRACE("do fuse_request_end req:%p op:%d err:%d\n", &r->req, r->req.in.h.opcode, r->req.out.h.error);
+ fuse_stat_account(pfc->fc, KFUSE_OP_WRITE, ktime_sub(ktime_get(), r->exec.ireq.ts));
inode_dio_end(r->req.io_inode);
request_end(pfc->fc, &r->req);
}
@@ -126,6 +130,7 @@ static void prepare_io_(struct pcs_fuse_req *r, unsigned short type, off_t offse
/* Initialize internal request structure */
ireq->type = PCS_IREQ_API;
+ ireq->ts = ktime_get();
ireq->apireq.req = &r->exec.io.req;
ireq->complete_cb = intreq_complete;
ireq->completion_data.parent = 0;
diff --git a/fs/fuse/kio/pcs/fuse_ktrace.h b/fs/fuse/kio/pcs/fuse_ktrace.h
new file mode 100644
index 000000000000..7f8a50a9cc1f
--- /dev/null
+++ b/fs/fuse/kio/pcs/fuse_ktrace.h
@@ -0,0 +1,45 @@
+#ifndef _FUSE_KTRACE_H_
+#define _FUSE_KTRACE_H_ 1
+
+#include "fuse_ktrace_prot.h"
+#include <linux/relay.h>
+
+struct fuse_ktrace
+{
+ atomic_t refcnt;
+ struct rchan *rchan;
+ struct dentry *dir;
+ unsigned long __percpu *ovfl;
+ struct dentry *prometheus_dentry;
+ struct kfuse_histogram * __percpu *prometheus_hist;
+};
+
+static inline void * fuse_trace_prepare(struct fuse_ktrace * tr, int type, int len)
+{
+ struct fuse_trace_hdr * t;
+ unsigned long * ovfl;
+
+ preempt_disable();
+ ovfl = per_cpu_ptr(tr->ovfl, smp_processor_id());
+
+ t = relay_reserve(tr->rchan, sizeof(*t) + len);
+ if (t) {
+ t->magic = FUSE_TRACE_MAGIC;
+ t->type = type;
+ t->pdu_len = len;
+ if ((t->ovfl = *ovfl) != 0)
+ *ovfl = 0;
+ t->time = ktime_to_ns(ktime_get()) / 1000;
+
+ return t;
+ } else {
+ if (++(*ovfl) == 0)
+ *ovfl = 65535;
+ return NULL;
+ }
+}
+
+#define FUSE_TRACE_PREPARE(tr, type, len) fuse_trace_prepare((tr), (type), (len))
+#define FUSE_TRACE_COMMIT(tr) preempt_enable()
+
+#endif /* _FUSE_KTRACE_H_ */
diff --git a/fs/fuse/kio/pcs/fuse_ktrace_prot.h b/fs/fuse/kio/pcs/fuse_ktrace_prot.h
new file mode 100644
index 000000000000..19b25ed0df6a
--- /dev/null
+++ b/fs/fuse/kio/pcs/fuse_ktrace_prot.h
@@ -0,0 +1,43 @@
+#ifndef _FUSE_KTRACE_PROT_H_
+#define _FUSE_KTRACE_PROT_H_ 1
+
+#define FUSE_TRACE_MAGIC 0xf59c
+#define FUSE_KTRACE_SIZE (512 * 1024)
+#define FUSE_KTRACE_NR (4)
+
+struct fuse_trace_hdr
+{
+ __u16 magic;
+ __u16 type;
+ __u16 pdu_len;
+ __u16 ovfl;
+ __u64 time;
+};
+
+#define FUSE_KTRACE_STRING 1
+#define FUSE_KTRACE_IOTIMES 2
+
+struct fuse_tr_iotimes_hdr
+{
+ __u64 chunk;
+ __u64 offset;
+ __u64 size;
+ __u64 start_time;
+ __u32 local_delay;
+ __u32 lat;
+ __u64 ino;
+ __u16 type;
+ __u8 cses;
+ __u8 __pad;
+ __u32 __pad1;
+};
+
+struct fuse_tr_iotimes_cs
+{
+ __u64 csid;
+ __u64 misc;
+ __u32 ts_net;
+ __u32 ts_io;
+};
+
+#endif /* _FUSE_KTRACE_PROT_H_ */
diff --git a/fs/fuse/kio/pcs/fuse_prometheus.h b/fs/fuse/kio/pcs/fuse_prometheus.h
new file mode 100644
index 000000000000..0d9f4071818e
--- /dev/null
+++ b/fs/fuse/kio/pcs/fuse_prometheus.h
@@ -0,0 +1,11 @@
+#ifndef __FUSE_PROMETHEUS_H__
+#define __FUSE_PROMETHEUS_H__ 1
+
+#include "fuse_prometheus_prot.h"
+
+struct fuse_prometheus_data
+{
+ struct kfuse_histogram __percpu *histo;
+};
+
+#endif /* __FUSE_PROMETHEUS_H__ */
diff --git a/fs/fuse/kio/pcs/fuse_prometheus_prot.h b/fs/fuse/kio/pcs/fuse_prometheus_prot.h
new file mode 100644
index 000000000000..254fae3ad1eb
--- /dev/null
+++ b/fs/fuse/kio/pcs/fuse_prometheus_prot.h
@@ -0,0 +1,23 @@
+#ifndef __FUSE_PROMETHEUS_PROT__
+#define __FUSE_PROMETHEUS_PROT__ 1
+
+#define KFUSE_OP_READ 0
+#define KFUSE_OP_WRITE 1
+#define KFUSE_OP_FSYNC 2
+#define KFUSE_OP_FALLOCATE 3
+#define KFUSE_OP_MAX 4
+
+#define KFUSE_PROM_MAX (9*5 + 1)
+
+struct kfuse_stat_rec
+{
+ u64 value;
+ u64 count;
+};
+
+struct kfuse_histogram
+{
+ struct kfuse_stat_rec buckets[KFUSE_OP_MAX][KFUSE_PROM_MAX+1];
+};
+
+#endif /* __FUSE_PROMETHEUS_PROT__ */
diff --git a/fs/fuse/kio/pcs/pcs_cs.c b/fs/fuse/kio/pcs/pcs_cs.c
index 0f7463e8f13a..d6324d9a90f4 100644
--- a/fs/fuse/kio/pcs/pcs_cs.c
+++ b/fs/fuse/kio/pcs/pcs_cs.c
@@ -15,6 +15,7 @@
#include "pcs_cluster.h"
#include "pcs_ioctl.h"
#include "log.h"
+#include "fuse_ktrace.h"
/* Lock order: cs->lock -> css->lock (lru, hash, bl_list) */
@@ -242,6 +243,64 @@ void cs_set_io_times_logger(void (*logger)(struct pcs_int_request *ireq, struct
io_times_logger_ctx = ctx;
}
+void cs_log_io_times(struct pcs_int_request * ireq, struct pcs_msg * resp, unsigned int max_iolat)
+{
+ /* Ugly. Need to move fc ref to get rid of pcs_cluster_core */
+ struct fuse_conn * fc = container_of(ireq->cc, struct pcs_fuse_cluster, cc)->fc;
+ if (fc->ktrace && fc->ktrace_level >= LOG_TRACE) {
+ struct pcs_cs_iohdr * h = (struct pcs_cs_iohdr *)msg_inline_head(resp);
+ int n = 1;
+ struct fuse_trace_hdr * t;
+
+ if (h->hdr.type != PCS_CS_READ_RESP) {
+ struct pcs_cs_sync_resp * srec;
+
+ for (srec = (struct pcs_cs_sync_resp*)(h + 1);
+ (void*)(srec + 1) <= (void*)h + h->hdr.len;
+ srec++)
+ n++;
+ }
+
+ t = FUSE_TRACE_PREPARE(fc->ktrace, FUSE_KTRACE_IOTIMES, sizeof(struct fuse_tr_iotimes_hdr) +
+ n*sizeof(struct fuse_tr_iotimes_cs));
+ if (t) {
+ struct fuse_tr_iotimes_hdr * th = (struct fuse_tr_iotimes_hdr *)(t + 1);
+ struct fuse_tr_iotimes_cs * ch = (struct fuse_tr_iotimes_cs *)(th + 1);
+
+ th->chunk = ireq->iochunk.chunk;
+ th->offset = h->hdr.type != PCS_CS_SYNC_RESP ? ireq->iochunk.chunk + ireq->iochunk.offset : 0;
+ th->size = h->hdr.type != PCS_CS_SYNC_RESP ? ireq->iochunk.size : 0;
+ th->start_time = ktime_to_us(ireq->ts);
+ th->local_delay = ktime_to_us(ktime_sub(ireq->ts_sent, ireq->ts));
+ th->lat = t->time - ktime_to_us(ireq->ts_sent);
+ th->ino = ireq->dentry->fileinfo.attr.id;
+ th->type = h->hdr.type;
+ th->cses = 1;
+
+ ch->csid = resp->rpc->peer_id.val;
+ ch->misc = h->sync.misc;
+ ch->ts_net = h->sync.ts_net;
+ ch->ts_io = h->sync.ts_io;
+ ch++;
+
+ if (h->hdr.type != PCS_CS_READ_RESP) {
+ struct pcs_cs_sync_resp * srec;
+
+ for (srec = (struct pcs_cs_sync_resp*)(h + 1);
+ (void*)(srec + 1) <= (void*)h + h->hdr.len;
+ srec++) {
+ ch->csid = srec->cs_id.val;
+ ch->misc = srec->sync.misc;
+ ch->ts_net = srec->sync.ts_net;
+ ch->ts_io = srec->sync.ts_io;
+ ch++;
+ th->cses++;
+ }
+ }
+ }
+ FUSE_TRACE_COMMIT(fc->ktrace);
+ }
+}
void pcs_cs_update_stat(struct pcs_cs *cs, u32 iolat, u32 netlat, int op_type)
{
@@ -443,7 +502,7 @@ void pcs_cs_submit(struct pcs_cs *cs, struct pcs_int_request *ireq)
msg->timeout = csl->write_timeout;
else
msg->timeout = csl->read_timeout;
- ireq->ts_sent = jiffies;
+ ireq->ts_sent = ktime_get();
ireq->wait_origin.val = 0;
@@ -525,7 +584,7 @@ static void cs_keep_waiting(struct pcs_rpc *ep, struct pcs_msg *req, struct pcs_
who = lookup_and_lock_cs(cs->css, &h->xid.origin);
if (who) {
struct pcs_int_request *ireq = req->private2;
- abs_time_t lat = ((jiffies - ireq->ts_sent) * 1000) / HZ;
+ abs_time_t lat = ktime_to_ms(ktime_sub(ktime_get(), ireq->ts_sent));
if (ireq)
ireq->wait_origin = h->xid.origin;
diff --git a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
index 29d62faa8612..35b309e392c8 100644
--- a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
+++ b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
@@ -19,15 +19,22 @@
#include <linux/delay.h>
#include <linux/socket.h>
#include <linux/net.h>
+#include <linux/debugfs.h>
#include "pcs_ioctl.h"
#include "pcs_cluster.h"
#include "pcs_rpc.h"
+#include "fuse_ktrace.h"
+#include "fuse_prometheus.h"
+
+static int fuse_ktrace_setup(struct fuse_conn * fc);
+static int fuse_ktrace_remove(struct fuse_conn *fc);
static struct kmem_cache *pcs_fuse_req_cachep;
static struct kmem_cache *pcs_ireq_cachep;
static struct workqueue_struct *pcs_wq;
static struct fuse_kio_ops kio_pcs_ops;
+static struct dentry *fuse_trace_root;
static void process_pcs_init_reply(struct fuse_conn *fc, struct fuse_req *req)
{
@@ -58,6 +65,10 @@ static void process_pcs_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->kio.ctx = pfc;
printk("FUSE: kio_pcs: cl: " CLUSTER_ID_FMT ", clientid: " NODE_FMT "\n",
CLUSTER_ID_ARGS(info->cluster_id), NODE_ARGS(info->node_id));
+
+ fuse_ktrace_setup(fc);
+ fc->ktrace_level = LOG_TRACE;
+
out:
kfree(info);
/* We are called from process_init_reply before connection
@@ -111,6 +122,9 @@ int kpcs_conn_init(struct fuse_conn *fc)
void kpcs_conn_fini(struct fuse_conn *fc)
{
+ if (fc->ktrace)
+ fuse_ktrace_remove(fc);
+
if (!fc->kio.ctx)
return;
@@ -937,6 +951,249 @@ static int kpcs_req_send(struct fuse_conn* fc, struct fuse_req *req, bool bg, bo
return 0;
}
+static void fuse_trace_free(struct fuse_ktrace *tr)
+{
+ relay_close(tr->rchan);
+ free_percpu(tr->ovfl);
+ if (tr->prometheus_dentry) {
+ debugfs_remove(tr->prometheus_dentry);
+ }
+ if (tr->prometheus_hist) {
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct kfuse_histogram ** histp;
+ histp = per_cpu_ptr(tr->prometheus_hist, cpu);
+ if (*histp)
+ free_page((unsigned long)*histp);
+ }
+ free_percpu(tr->prometheus_hist);
+ }
+ debugfs_remove(tr->dir);
+ kfree(tr);
+}
+
+static int fuse_ktrace_remove(struct fuse_conn *fc)
+{
+ struct fuse_ktrace *tr;
+
+ tr = xchg(&fc->ktrace, NULL);
+ if (!tr)
+ return -EINVAL;
+
+ if (atomic_dec_and_test(&tr->refcnt))
+ fuse_trace_free(tr);
+ return 0;
+}
+
+static int subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
+ void *prev_subbuf, size_t prev_padding)
+{
+ return !relay_buf_full(buf);
+}
+
+static struct dentry * create_buf_file_callback(const char *filename,
+ struct dentry *parent,
+ umode_t mode,
+ struct rchan_buf *buf,
+ int *is_global)
+{
+ return debugfs_create_file(filename, mode, parent, buf,
+ &relay_file_operations);
+}
+
+static int remove_buf_file_callback(struct dentry *dentry)
+{
+ debugfs_remove(dentry);
+ return 0;
+}
+
+
+static struct rchan_callbacks relay_callbacks = {
+ .subbuf_start = subbuf_start_callback,
+ .create_buf_file = create_buf_file_callback,
+ .remove_buf_file = remove_buf_file_callback,
+};
+
+void fuse_stat_account(struct fuse_conn * fc, int op, ktime_t val)
+{
+ struct fuse_ktrace * tr = fc->ktrace;
+
+ BUG_ON(op >= KFUSE_OP_MAX);
+
+ if (tr) {
+ struct kfuse_histogram ** histp;
+ int cpu;
+
+ cpu = get_cpu();
+ histp = per_cpu_ptr(tr->prometheus_hist, cpu);
+ if (histp && *histp) {
+ struct kfuse_stat_rec * buckets = (*histp)->buckets[op];
+ struct kfuse_stat_rec * bucket;
+ unsigned long long lat = ktime_to_ns(val)/1000;
+
+ if (lat < 1000)
+ bucket = buckets + (lat/100);
+ else if (lat < 10000)
+ bucket = buckets + 9*1 + (lat/1000);
+ else if (lat < 100000)
+ bucket = buckets + 9*2 + (lat/10000);
+ else if (lat < 1000000)
+ bucket = buckets + 9*3 + (lat/100000);
+ else if (lat < 10000000)
+ bucket = buckets + 9*4 + (lat/1000000);
+ else
+ bucket = buckets + 9*5;
+
+ bucket->value += lat;
+ bucket->count++;
+ buckets[KFUSE_PROM_MAX].value += lat;
+ buckets[KFUSE_PROM_MAX].count++;
+ }
+ put_cpu();
+ }
+}
+
+static int prometheus_file_open(struct inode *inode, struct file *filp)
+{
+ struct fuse_ktrace * tr = inode->i_private;
+
+ atomic_inc(&tr->refcnt);
+ filp->private_data = tr;
+
+ return generic_file_open(inode, filp);
+}
+
+static int prometheus_file_release(struct inode *inode, struct file *filp)
+{
+ struct fuse_ktrace * tr = inode->i_private;
+
+ if (atomic_dec_and_test(&tr->refcnt))
+ fuse_trace_free(tr);
+
+ return 0;
+}
+
+static ssize_t prometheus_file_read(struct file *filp,
+ char __user *buffer,
+ size_t count,
+ loff_t *ppos)
+{
+ struct fuse_ktrace * tr = filp->private_data;
+ struct kfuse_histogram * hist;
+ int cpu;
+
+ if (*ppos >= KFUSE_PROM_MAX*KFUSE_OP_MAX*sizeof(struct kfuse_stat_rec))
+ return 0;
+ if (*ppos + count > KFUSE_PROM_MAX*KFUSE_OP_MAX*sizeof(struct kfuse_stat_rec))
+ count = KFUSE_PROM_MAX*KFUSE_OP_MAX*sizeof(struct kfuse_stat_rec) - *ppos;
+
+ hist = (void*)get_zeroed_page(GFP_KERNEL);
+ if (!hist)
+ return -ENOMEM;
+
+ if (!tr->prometheus_hist)
+ return -EINVAL;
+
+ for_each_possible_cpu(cpu) {
+ struct kfuse_histogram ** histp;
+
+ histp = per_cpu_ptr(tr->prometheus_hist, cpu);
+ if (histp && *histp) {
+ int i, k;
+ for (i = 0; i < KFUSE_OP_MAX; i++) {
+ for (k = 0; k < KFUSE_PROM_MAX + 1; k++) {
+ hist->buckets[i][k].value += (*histp)->buckets[i][k].value;
+ hist->buckets[i][k].count += (*histp)->buckets[i][k].count;
+ }
+ }
+ }
+ }
+
+ if (copy_to_user(buffer, (char*)hist + *ppos, count))
+ count = -EFAULT;
+ else
+ *ppos += count;
+
+ free_page((unsigned long)hist);
+ return count;
+}
+
+const struct file_operations prometheus_file_operations = {
+ .open = prometheus_file_open,
+ .read = prometheus_file_read,
+ .release = prometheus_file_release,
+};
+
+static int fuse_ktrace_setup(struct fuse_conn * fc)
+{
+ int ret;
+ struct fuse_ktrace * tr = NULL;
+ struct fuse_ktrace * old_tr;
+ struct dentry * dir;
+ struct kfuse_histogram * __percpu * hist;
+ char name[16];
+
+ if (!fuse_trace_root)
+ return -ENOENT;
+
+ tr = kzalloc(sizeof(*tr), GFP_KERNEL);
+ if (!tr)
+ return -ENOMEM;
+
+ ret = -ENOMEM;
+ tr->ovfl = alloc_percpu(unsigned long);
+ if (!tr->ovfl)
+ goto err;
+
+ ret = -ENOENT;
+
+ snprintf(name, sizeof(name), "%u", fc->dev);
+
+ dir = debugfs_create_dir(name, fuse_trace_root);
+
+ if (!dir)
+ goto err;
+
+ tr->dir = dir;
+ tr->rchan = relay_open("trace", dir, FUSE_KTRACE_SIZE,
+ FUSE_KTRACE_NR, &relay_callbacks, tr);
+ if (!tr->rchan)
+ goto err;
+
+ tr->prometheus_dentry = debugfs_create_file("prometheus", S_IFREG|0444, dir, tr,
+ &prometheus_file_operations);
+ hist = (void*)alloc_percpu(void *);
+ if (hist) {
+ int cpu;
+
+ BUILD_BUG_ON(sizeof(struct kfuse_histogram) > PAGE_SIZE);
+
+ for_each_possible_cpu(cpu) {
+ struct kfuse_histogram ** histp;
+ histp = per_cpu_ptr(hist, cpu);
+ *histp = (void*)get_zeroed_page(GFP_KERNEL);
+ }
+ tr->prometheus_hist = hist;
+ }
+
+ atomic_set(&tr->refcnt, 1);
+
+ ret = -EBUSY;
+ old_tr = xchg(&fc->ktrace, tr);
+ if (old_tr) {
+ (void) xchg(&fc->ktrace, old_tr);
+ goto err;
+ }
+
+ return 0;
+
+err:
+ if (tr && atomic_dec_and_test(&tr->refcnt))
+ fuse_trace_free(tr);
+ return ret;
+}
+
static struct fuse_kio_ops kio_pcs_ops = {
.name = "pcs",
@@ -974,6 +1231,9 @@ static int __init kpcs_mod_init(void)
if(fuse_register_kio(&kio_pcs_ops))
goto free_wq;
+
+ fuse_trace_root = debugfs_create_dir("fuse", NULL);
+
printk("%s fuse_c:%p ireq_c:%p pcs_wq:%p\n", __FUNCTION__,
pcs_fuse_req_cachep, pcs_ireq_cachep, pcs_wq);
@@ -989,6 +1249,9 @@ static int __init kpcs_mod_init(void)
static void __exit kpcs_mod_exit(void)
{
+ if (fuse_trace_root)
+ debugfs_remove(fuse_trace_root);
+
fuse_unregister_kio(&kio_pcs_ops);
destroy_workqueue(pcs_wq);
kmem_cache_destroy(pcs_ireq_cachep);
diff --git a/fs/fuse/kio/pcs/pcs_map.c b/fs/fuse/kio/pcs/pcs_map.c
index 98b8357c5c2b..7f877a06d63f 100644
--- a/fs/fuse/kio/pcs/pcs_map.c
+++ b/fs/fuse/kio/pcs/pcs_map.c
@@ -1316,7 +1316,7 @@ static int worth_to_grow(struct pcs_int_request *ireq, struct pcs_cs * cs)
if (ireq->type == PCS_IREQ_FLUSH)
return 0;
- return jiffies < ireq->ts_sent + cc_from_csset(cs->css)->netlat_cutoff;
+ return ktime_to_ms(ktime_sub(ktime_get(), ireq->ts_sent)) + cc_from_csset(cs->css)->netlat_cutoff;
}
static void pcs_cs_deaccount(struct pcs_int_request *ireq, struct pcs_cs * cs, int error)
@@ -2107,7 +2107,7 @@ static int pcs_cslist_submit_flush(struct pcs_int_request *ireq, struct pcs_cs_l
BUG_ON(ireq->flushreq.csl);
cslist_get(csl);
ireq->flushreq.csl = csl;
- ireq->ts_sent = jiffies;
+ ireq->ts_sent = ktime_get();
ireq->wait_origin.val = 0;
msg = ireq->flushreq.msg;
@@ -2508,11 +2508,16 @@ static void update_net_latency(struct pcs_cs_list * csl, PCS_NODE_ID_T id,
}
}
-static inline u32 calc_latency(abs_time_t start)
+static inline u32 calc_latency(ktime_t start)
{
- abs_time_t now = jiffies;
- u64 elapsed = (now > start)? now - start: 0;
- return elapsed > ~0U ? ~0U : elapsed;
+ ktime_t now = ktime_get();
+
+ if (ktime_compare(now, start) > 0) {
+ u64 elapsed = ktime_to_ms(ktime_sub(now, start));
+ return elapsed > ~0U ? ~0U : elapsed;
+ } else {
+ return 0;
+ }
}
static int commit_sync_info(struct pcs_int_request *req,
@@ -2550,8 +2555,7 @@ static int commit_sync_info(struct pcs_int_request *req,
max_iolat = srec->sync.ts_io;
}
}
- //// temproraly disable logging
- ////cs_log_io_times(req, resp, max_iolat);
+ cs_log_io_times(req, resp, max_iolat);
evaluate_dirty_status(m);
return err;
@@ -2871,7 +2875,7 @@ static int prepare_map_flush_ireq(struct pcs_map_entry *m, struct pcs_int_reques
}
prepare_map_flush_msg(m, sreq, msg);
sreq->type = PCS_IREQ_FLUSH;
- sreq->ts = jiffies;
+ sreq->ts = ktime_get();
sreq->completion_data.parent = NULL;
sreq->flushreq.map = m;
sreq->flushreq.csl = NULL;
diff --git a/fs/fuse/kio/pcs/pcs_req.c b/fs/fuse/kio/pcs/pcs_req.c
index 117e050691d9..1794990ada15 100644
--- a/fs/fuse/kio/pcs/pcs_req.c
+++ b/fs/fuse/kio/pcs/pcs_req.c
@@ -23,7 +23,8 @@ static void __ireq_init(struct pcs_dentry_info *di, struct pcs_cluster_core *cc,
{
memset(ireq, 0, sizeof(*ireq));
ireq->cc = cc;
- ireq->ts = ireq->create_ts = jiffies;
+ ireq->ts = ktime_get();
+ ireq->create_ts = jiffies;
setup_timer(&ireq->timer, ireq_timer_handler, (unsigned long)ireq);
INIT_HLIST_HEAD(&ireq->completion_data.child_list);
spin_lock_init(&ireq->completion_data.child_lock);
diff --git a/fs/fuse/kio/pcs/pcs_req.h b/fs/fuse/kio/pcs/pcs_req.h
index c8481a48413a..9954f5e7104b 100644
--- a/fs/fuse/kio/pcs/pcs_req.h
+++ b/fs/fuse/kio/pcs/pcs_req.h
@@ -61,8 +61,8 @@ struct pcs_int_request
atomic_t iocount;
int qdepth;
- abs_time_t ts;
- abs_time_t ts_sent;
+ ktime_t ts;
+ ktime_t ts_sent;
PCS_NODE_ID_T wait_origin;
struct {
More information about the Devel
mailing list