[Devel] [PATCH RHEL7 COMMIT] fuse/kio_pcs: fallocate support at low pcs protocol level
Konstantin Khorenko
khorenko at virtuozzo.com
Fri Apr 27 12:07:53 MSK 2018
The commit is pushed to "branch-rh7-3.10.0-693.21.1.vz7.47.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-693.21.1.vz7.46.7
------>
commit 8793cbd3ace5794165f687c3b1a678dc9754ec64
Author: Alexey Kuznetsov <kuznet at virtuozzo.com>
Date: Fri Apr 27 12:07:53 2018 +0300
fuse/kio_pcs: fallocate support at low pcs protocol level
It is still not hooked to fuse protocol.
It is direct backport of commits from user space vstorage:
35006c3f8e230e1fe49b653947fa527b3deb7586
dcd256a49de5d9f0e54dfbc9bf7b15eab1b01c5f (partial)
36723c1c49a872d8ea4968ab11fa91643f6aa61c
Signed-off-by: Alexey Kuznetsov <kuznet at virtuozzo.com>
Signed-off-by: Dmitry Monakhov <dmonakhov at openvz.org>
---
fs/fuse/kio/pcs/pcs_client_types.h | 15 ++++++++++++
fs/fuse/kio/pcs/pcs_cluster.c | 2 +-
fs/fuse/kio/pcs/pcs_cs.c | 49 ++++++++++++++++++++++++++------------
fs/fuse/kio/pcs/pcs_cs.h | 3 +++
fs/fuse/kio/pcs/pcs_cs_prot.h | 30 ++++++++++++++++++++++-
fs/fuse/kio/pcs/pcs_map.c | 32 +++++++++++++++----------
fs/fuse/kio/pcs/pcs_req.c | 2 +-
fs/fuse/kio/pcs/pcs_req.h | 9 ++++---
8 files changed, 106 insertions(+), 36 deletions(-)
diff --git a/fs/fuse/kio/pcs/pcs_client_types.h b/fs/fuse/kio/pcs/pcs_client_types.h
index 0be1caff7b46..a060ed58f87d 100644
--- a/fs/fuse/kio/pcs/pcs_client_types.h
+++ b/fs/fuse/kio/pcs/pcs_client_types.h
@@ -110,8 +110,23 @@ enum {
PCS_REQ_T_READ = 0,
PCS_REQ_T_WRITE = 1,
PCS_REQ_T_SYNC = 2,
+ PCS_REQ_T_WRITE_HOLE = 3,
+ PCS_REQ_T_WRITE_ZERO = 4,
+ PCS_REQ_T_FIEMAP = 5,
+ PCS_REQ_T_MAX = 6,
};
+static inline int pcs_req_direction(int reqno)
+{
+ switch (reqno) {
+ case PCS_REQ_T_READ:
+ case PCS_REQ_T_FIEMAP:
+ return 0;
+ default:
+ return 1;
+ }
+}
+
/* Request flags */
#define PCS_REQ_F_ERROR 2
#define PCS_REQ_F_NOSPACE 4
diff --git a/fs/fuse/kio/pcs/pcs_cluster.c b/fs/fuse/kio/pcs/pcs_cluster.c
index 7a9af9683e5e..8514e5ed06ce 100644
--- a/fs/fuse/kio/pcs/pcs_cluster.c
+++ b/fs/fuse/kio/pcs/pcs_cluster.c
@@ -138,7 +138,7 @@ static noinline void __pcs_cc_process_ireq_rw(struct pcs_int_request *ireq)
sreq->type = PCS_IREQ_IOCHUNK;
sreq->iochunk.map = NULL;
sreq->iochunk.flow = pcs_flow_get(fl);
- sreq->iochunk.direction = ireq->apireq.req->type;
+ sreq->iochunk.cmd = ireq->apireq.req->type;
sreq->iochunk.cs_index = 0;
sreq->iochunk.chunk = chunk;
sreq->iochunk.offset = rpos % di->fileinfo.sys.chunk_size;
diff --git a/fs/fuse/kio/pcs/pcs_cs.c b/fs/fuse/kio/pcs/pcs_cs.c
index 2752658bedab..1fecaeaff16b 100644
--- a/fs/fuse/kio/pcs/pcs_cs.c
+++ b/fs/fuse/kio/pcs/pcs_cs.c
@@ -239,14 +239,14 @@ void cs_log_io_times(struct pcs_int_request * ireq, struct pcs_msg * resp, unsig
/* Ugly. Need to move fc ref to get rid of pcs_cluster_core */
struct fuse_conn * fc = container_of(ireq->cc, struct pcs_fuse_cluster, cc)->fc;
struct pcs_cs_iohdr * h = (struct pcs_cs_iohdr *)msg_inline_head(resp);
- int reqt = h->hdr.type != PCS_CS_SYNC_RESP ? ireq->iochunk.direction : PCS_REQ_T_SYNC;
+ int reqt = h->hdr.type != PCS_CS_SYNC_RESP ? ireq->iochunk.cmd : PCS_REQ_T_SYNC;
fuse_stat_account(fc, reqt, ktime_sub(ktime_get(), ireq->ts_sent));
if (fc->ktrace && fc->ktrace_level >= LOG_TRACE) {
int n = 1;
struct fuse_trace_hdr * t;
- if (h->hdr.type != PCS_CS_READ_RESP) {
+ if (h->hdr.type != PCS_CS_READ_RESP && h->hdr.type != PCS_CS_FIEMAP_RESP) {
struct pcs_cs_sync_resp * srec;
for (srec = (struct pcs_cs_sync_resp*)(h + 1);
@@ -277,7 +277,7 @@ void cs_log_io_times(struct pcs_int_request * ireq, struct pcs_msg * resp, unsig
ch->ts_io = h->sync.ts_io;
ch++;
- if (h->hdr.type != PCS_CS_READ_RESP) {
+ if (h->hdr.type != PCS_CS_READ_RESP && h->hdr.type != PCS_CS_FIEMAP_RESP) {
struct pcs_cs_sync_resp * srec;
for (srec = (struct pcs_cs_sync_resp*)(h + 1);
@@ -326,8 +326,9 @@ static void cs_response_done(struct pcs_msg *msg)
pcs_map_verify_sync_state(ireq->dentry, ireq, msg);
} else {
- TRACE(XID_FMT " IO error %d %lu : %llu:%u+%u\n", XID_ARGS(ireq->iochunk.hbuf.hdr.xid), msg->error.value, msg->error.remote ? (unsigned long)msg->error.offender.val : 0UL,
- (unsigned long long)ireq->iochunk.chunk, (unsigned)ireq->iochunk.offset, ireq->iochunk.size);
+ TRACE(XID_FMT " IO error %d %lu : %llu:%u+%u\n", XID_ARGS(ireq->iochunk.hbuf.hdr.xid),
+ msg->error.value, msg->error.remote ? (unsigned long)msg->error.offender.val : 0UL,
+ (unsigned long long)ireq->iochunk.chunk, (unsigned)ireq->iochunk.offset, (unsigned)ireq->iochunk.size);
}
pcs_copy_error_cond(&ireq->error, &msg->error);
@@ -355,7 +356,7 @@ static void cs_get_read_response_iter(struct pcs_msg *msg, int offset, struct io
pcs_api_iorequest_t *ar = parent->apireq.req;
/* Read directly to memory given by user */
- BUG_ON(ireq->iochunk.direction != PCS_REQ_T_READ);
+ BUG_ON(ireq->iochunk.cmd != PCS_REQ_T_READ && ireq->iochunk.cmd != PCS_REQ_T_FIEMAP);
offset -= (unsigned int)sizeof(struct pcs_cs_iohdr);
ar->get_iter(ar->datasource, ireq->iochunk.dio_offset, it);
@@ -387,7 +388,7 @@ static struct pcs_msg *cs_get_hdr(struct pcs_rpc *ep, struct pcs_rpc_hdr *h)
if (!RPC_IS_RESPONSE(h->type))
return NULL;
- if (h->type != PCS_CS_READ_RESP)
+ if (h->type != PCS_CS_READ_RESP && h->type != PCS_CS_FIEMAP_RESP)
return NULL;
/* The goal is to avoid allocation new msg and reuse one inlined in ireq */
@@ -397,7 +398,7 @@ static struct pcs_msg *cs_get_hdr(struct pcs_rpc *ep, struct pcs_rpc_hdr *h)
return NULL;
req_h = (struct pcs_rpc_hdr *)msg_inline_head(msg);
- if (req_h->type != PCS_CS_READ_REQ)
+ if (req_h->type != (h->type & ~PCS_RPC_DIRECTION))
return NULL;
resp = pcs_rpc_alloc_input_msg(ep, sizeof(struct pcs_cs_iohdr));
@@ -430,7 +431,7 @@ static void cs_get_data(struct pcs_msg *msg, int offset, struct iov_iter *it)
if (parent->type == PCS_IREQ_API) {
pcs_api_iorequest_t *ar = parent->apireq.req;
- BUG_ON(ireq->iochunk.direction != PCS_REQ_T_WRITE);
+ BUG_ON(ireq->iochunk.cmd != PCS_REQ_T_WRITE);
offset -= (unsigned int)sizeof(struct pcs_cs_iohdr);
ar->get_iter(ar->datasource, ireq->iochunk.dio_offset, it);
@@ -466,14 +467,32 @@ void pcs_cs_submit(struct pcs_cs *cs, struct pcs_int_request *ireq)
msg->private2 = ireq;
ioh = &ireq->iochunk.hbuf;
- ioh->hdr.len = sizeof(struct pcs_cs_iohdr) +
- (ireq->iochunk.direction ? ireq->iochunk.size : 0);
- ioh->hdr.type = ireq->iochunk.direction ? PCS_CS_WRITE_REQ : PCS_CS_READ_REQ;
+ ioh->hdr.len = sizeof(struct pcs_cs_iohdr);
+ switch (ireq->iochunk.cmd) {
+ case PCS_REQ_T_READ:
+ ioh->hdr.type = PCS_CS_READ_REQ;
+ break;
+ case PCS_REQ_T_WRITE:
+ ioh->hdr.type = PCS_CS_WRITE_REQ;
+ ioh->hdr.len += ireq->iochunk.size;
+ break;
+ case PCS_REQ_T_WRITE_HOLE:
+ ioh->hdr.type = PCS_CS_WRITE_HOLE_REQ;
+ break;
+ case PCS_REQ_T_WRITE_ZERO:
+ ioh->hdr.type = PCS_CS_WRITE_ZERO_REQ;
+ break;
+ case PCS_REQ_T_FIEMAP:
+ ioh->hdr.type = PCS_CS_FIEMAP_REQ;
+ break;
+ }
pcs_rpc_get_new_xid(&cc_from_cs(cs)->eng, &ioh->hdr.xid);
ioh->offset = ireq->iochunk.offset;
ioh->size = ireq->iochunk.size;
ioh->iocontext = (u32)ireq->dentry->fileinfo.attr.id;
ioh->_reserved = 0;
+ if (ireq->iochunk.cmd == PCS_REQ_T_FIEMAP)
+ ioh->fiemap_count = PCS_FIEMAP_CHUNK_COUNT;
memset(&ioh->sync, 0, sizeof(ioh->sync));
if (ireq->flags & IREQ_F_SEQ)
@@ -492,7 +511,7 @@ void pcs_cs_submit(struct pcs_cs *cs, struct pcs_int_request *ireq)
*/
BUG_ON(ireq->iochunk.map->state & PCS_MAP_DEAD);
ioh->map_version = csl->version;
- if (ireq->iochunk.direction)
+ if (pcs_req_direction(ireq->iochunk.cmd))
msg->timeout = csl->write_timeout;
else
msg->timeout = csl->read_timeout;
@@ -504,7 +523,7 @@ void pcs_cs_submit(struct pcs_cs *cs, struct pcs_int_request *ireq)
msg, ireq,
(unsigned long long)ireq->iochunk.chunk,
(unsigned)ireq->iochunk.offset,
- ireq->iochunk.size);
+ (unsigned)ireq->iochunk.size);
/* TODO reanable ratelimiting */
#if 0
@@ -596,7 +615,7 @@ static void cs_keep_waiting(struct pcs_rpc *ep, struct pcs_msg *req, struct pcs_
who->cwr_state = 1;
}
cs_update_io_latency(who, lat);
- if (ireq && ireq->type == PCS_IREQ_IOCHUNK && ireq->iochunk.direction == 0) {
+ if (ireq && ireq->type == PCS_IREQ_IOCHUNK && !pcs_req_direction(ireq->iochunk.cmd)) {
/* Force CS reselection */
pcs_map_force_reselect(who);
diff --git a/fs/fuse/kio/pcs/pcs_cs.h b/fs/fuse/kio/pcs/pcs_cs.h
index c04317e4a9a9..f46b31f2633d 100644
--- a/fs/fuse/kio/pcs/pcs_cs.h
+++ b/fs/fuse/kio/pcs/pcs_cs.h
@@ -25,6 +25,9 @@ struct pcs_map_entry;
#define PCS_CS_BLACKLIST_TIMER (10*HZ)
+#define PCS_FIEMAP_BUFSIZE (128*1024)
+#define PCS_FIEMAP_CHUNK_COUNT (PCS_FIEMAP_BUFSIZE/sizeof(struct pcs_cs_fiemap_rec))
+
enum {
CS_SF_LOCAL,
CS_SF_LOCAL_SOCK,
diff --git a/fs/fuse/kio/pcs/pcs_cs_prot.h b/fs/fuse/kio/pcs/pcs_cs_prot.h
index f6b1c7f0dedf..7de7dd1bea6b 100644
--- a/fs/fuse/kio/pcs/pcs_cs_prot.h
+++ b/fs/fuse/kio/pcs/pcs_cs_prot.h
@@ -4,6 +4,7 @@
#include "pcs_rpc_prot.h"
#define PCS_CS_FLUSH_WEIGHT (128*1024)
+#define PCS_CS_HOLE_WEIGHT (4096)
struct pcs_cs_sync_data
{
@@ -34,6 +35,17 @@ struct pcs_cs_sync_resp {
struct pcs_cs_sync_data sync;
} __attribute__((aligned(8)));
+struct pcs_cs_fiemap_rec
+{
+ u32 offset;
+ u32 size;
+ u32 flags;
+#define PCS_CS_FIEMAP_FL_OVFL 1
+#define PCS_CS_FIEMAP_FL_ZERO 2
+#define PCS_CS_FIEMAP_FL_CACHE 4
+ u32 _reserved;
+} __attribute__((aligned(8)));
+
struct pcs_cs_iohdr {
struct pcs_rpc_hdr hdr;
@@ -42,7 +54,11 @@ struct pcs_cs_iohdr {
u64 offset;
u32 size;
u32 iocontext;
- u64 _reserved; /* For future extensions */
+ union {
+ u64 _reserved; /* For future extensions */
+ u64 hole_mask; /* Used only in REPLICATEX responces */
+ u32 fiemap_count; /* Used only in FIEMAP request, limit on number of extents to return */
+ };
struct pcs_cs_sync_data sync; /* Filled in all requests and responses */
struct pcs_cs_sync_resp sync_resp[0]; /* Used only in response to write/sync */
} __attribute__((aligned(8)));
@@ -74,6 +90,18 @@ struct pcs_cs_cong_notification {
#define PCS_CS_CONG_NOTIFY (PCS_RPC_CS_CLIENT_BASE + 10)
+#define PCS_CS_WRITE_ZERO_REQ (PCS_RPC_CS_CLIENT_BASE + 12)
+#define PCS_CS_WRITE_ZERO_RESP (PCS_CS_WRITE_ZERO_REQ|PCS_RPC_DIRECTION)
+
+#define PCS_CS_WRITE_HOLE_REQ (PCS_RPC_CS_CLIENT_BASE + 14)
+#define PCS_CS_WRITE_HOLE_RESP (PCS_CS_WRITE_HOLE_REQ|PCS_RPC_DIRECTION)
+
+#define PCS_CS_REPLICATEX_REQ (PCS_RPC_CS_CLIENT_BASE + 16)
+#define PCS_CS_REPLICATEX_RESP (PCS_CS_REPLICATEX_REQ|PCS_RPC_DIRECTION)
+
+#define PCS_CS_FIEMAP_REQ (PCS_RPC_CS_CLIENT_BASE + 18)
+#define PCS_CS_FIEMAP_RESP (PCS_CS_FIEMAP_REQ|PCS_RPC_DIRECTION)
+
////////////////////////////////////////////
//// from pcs_mds_cs_prot.h
//// required for PCS_CS_MAP_PROP_REQ/ping to work
diff --git a/fs/fuse/kio/pcs/pcs_map.c b/fs/fuse/kio/pcs/pcs_map.c
index 7f877a06d63f..65b71c69ec8d 100644
--- a/fs/fuse/kio/pcs/pcs_map.c
+++ b/fs/fuse/kio/pcs/pcs_map.c
@@ -374,7 +374,7 @@ unsigned long map_gc(struct pcs_map_set *maps)
static inline int is_dirtying(struct pcs_map_entry * map, struct pcs_int_request *ireq)
{
- if (!ireq->iochunk.direction)
+ if (!pcs_req_direction(ireq->iochunk.cmd))
return 0;
/* Was not dirty? */
@@ -1235,7 +1235,7 @@ static void map_replicating(struct pcs_int_request *ireq)
struct pcs_cs_list * csl = ireq->iochunk.csl;
int read_idx = READ_ONCE(csl->read_index);
- BUG_ON(ireq->iochunk.direction);
+ BUG_ON(pcs_req_direction(ireq->iochunk.cmd));
if (csl == NULL || csl->map == NULL)
return;
@@ -1269,7 +1269,7 @@ static void map_read_error(struct pcs_int_request *ireq)
struct pcs_cs_list * csl = ireq->iochunk.csl;
struct pcs_cs * cs;
- BUG_ON(ireq->iochunk.direction);
+ BUG_ON(pcs_req_direction(ireq->iochunk.cmd));
if (csl == NULL || csl->map == NULL || (csl->map->state & PCS_MAP_ERROR))
return;
@@ -1324,9 +1324,13 @@ static void pcs_cs_deaccount(struct pcs_int_request *ireq, struct pcs_cs * cs, i
unsigned int cost;
spin_lock(&cs->lock);
- if (ireq->type == PCS_IREQ_IOCHUNK)
- cost = (ireq->flags & IREQ_F_RND_WEIGHT) ? 512*1024 : cong_roundup(ireq->iochunk.size);
- else
+ if (ireq->type == PCS_IREQ_IOCHUNK) {
+ if (ireq->iochunk.cmd == PCS_REQ_T_WRITE_HOLE ||
+ ireq->iochunk.cmd == PCS_REQ_T_WRITE_ZERO)
+ cost = PCS_CS_HOLE_WEIGHT;
+ else
+ cost = (ireq->flags & IREQ_F_RND_WEIGHT) ? 512*1024 : cong_roundup(ireq->iochunk.size);
+ } else
cost = PCS_CS_FLUSH_WEIGHT;
if (!error) {
@@ -1512,7 +1516,7 @@ void pcs_deaccount_ireq(struct pcs_int_request *ireq, pcs_error_t * err)
}
}
- if (ireq->type == PCS_IREQ_FLUSH || (ireq->iochunk.direction && !(ireq->flags & IREQ_F_MAPPED))) {
+ if (ireq->type == PCS_IREQ_FLUSH || (pcs_req_direction(ireq->iochunk.cmd) && !(ireq->flags & IREQ_F_MAPPED))) {
int i;
int requeue = 0;
@@ -1574,7 +1578,7 @@ void map_notify_soft_error(struct pcs_int_request *ireq)
err = ireq->error;
- if (!ireq->iochunk.direction &&
+ if (!pcs_req_direction(ireq->iochunk.cmd) &&
pcs_if_error(&err) &&
err.remote &&
err.value != PCS_ERR_CSD_STALE_MAP &&
@@ -1768,7 +1772,7 @@ pcs_ireq_split(struct pcs_int_request *ireq, unsigned int iochunk, int noalign)
if (sreq->iochunk.map)
__pcs_map_get(sreq->iochunk.map);
sreq->iochunk.flow = pcs_flow_get(ireq->iochunk.flow);
- sreq->iochunk.direction = ireq->iochunk.direction;
+ sreq->iochunk.cmd = ireq->iochunk.cmd;
sreq->iochunk.role = ireq->iochunk.role;
sreq->iochunk.cs_index = ireq->iochunk.cs_index;
sreq->iochunk.chunk = ireq->iochunk.chunk;
@@ -1885,7 +1889,7 @@ static int pcs_cslist_submit_read(struct pcs_int_request *ireq, struct pcs_cs_li
struct pcs_int_request * sreq = ireq;
unsigned int weight;
- if (ireq->iochunk.size > iochunk) {
+ if (ireq->iochunk.size > iochunk && ireq->iochunk.cmd == PCS_REQ_T_WRITE) {
sreq = pcs_ireq_split(ireq, iochunk, 0);
if (sreq == NULL) {
@@ -2008,7 +2012,9 @@ static int pcs_cslist_submit_write(struct pcs_int_request *ireq, struct pcs_cs_l
sreq->flags &= ~(IREQ_F_RND_WEIGHT | IREQ_F_SEQ);
BUG_ON(sreq->flags & IREQ_F_SEQ_READ);
- if (pcs_flow_sequential(sreq->iochunk.flow)) {
+ if (ireq->iochunk.cmd != PCS_REQ_T_WRITE) {
+ weight = PCS_CS_HOLE_WEIGHT;
+ } else if (pcs_flow_sequential(sreq->iochunk.flow)) {
weight = cong_roundup(sreq->iochunk.size);
sreq->flags |= IREQ_F_SEQ;
} else if (!(get_io_tweaks(ireq->cc) & PCS_TWEAK_USE_FLOW_WEIGHT) ||
@@ -2137,7 +2143,7 @@ int pcs_cslist_submit(struct pcs_int_request *ireq, struct pcs_cs_list *csl, int
if (ireq->type == PCS_IREQ_FLUSH) {
return pcs_cslist_submit_flush(ireq, csl, requeue);
- } else if (!ireq->iochunk.direction) {
+ } else if (!pcs_req_direction(ireq->iochunk.cmd)) {
return pcs_cslist_submit_read(ireq, csl, requeue);
} else if (ireq->flags & IREQ_F_MAPPED) {
BUG();
@@ -2166,7 +2172,7 @@ void map_submit(struct pcs_map_entry * m, struct pcs_int_request *ireq, int requ
BUG_ON(pcs_if_error(&ireq->error));
- direction = (ireq->type != PCS_IREQ_FLUSH ? ireq->iochunk.direction : 1);
+ direction = (ireq->type != PCS_IREQ_FLUSH ? pcs_req_direction(ireq->iochunk.cmd) : 1);
do {
struct pcs_cs_list *csl = NULL;
diff --git a/fs/fuse/kio/pcs/pcs_req.c b/fs/fuse/kio/pcs/pcs_req.c
index 1794990ada15..f1c364fe5f00 100644
--- a/fs/fuse/kio/pcs/pcs_req.c
+++ b/fs/fuse/kio/pcs/pcs_req.c
@@ -87,7 +87,7 @@ void ireq_handle_hole(struct pcs_int_request *ireq)
pcs_api_iorequest_t * ar = ireq->completion_data.parent->apireq.req;
BUG_ON(ireq->type != PCS_IREQ_IOCHUNK);
- BUG_ON(ireq->iochunk.direction);
+ BUG_ON(pcs_req_direction(ireq->iochunk.cmd));
len = ireq->iochunk.size;
offset = 0;
diff --git a/fs/fuse/kio/pcs/pcs_req.h b/fs/fuse/kio/pcs/pcs_req.h
index 9954f5e7104b..bf603e0af414 100644
--- a/fs/fuse/kio/pcs/pcs_req.h
+++ b/fs/fuse/kio/pcs/pcs_req.h
@@ -90,13 +90,12 @@ struct pcs_int_request
struct {
struct pcs_map_entry *map;
//// Temproraly disable flow
- struct pcs_flow_node *flow;
- ////struct pcs_splice_buf *splice_rbuf;
- u8 direction;
+ struct pcs_flow_node *flow;
+ u8 cmd;
u8 role;
short cs_index;
- unsigned int size;
- unsigned int dio_offset;
+ u64 size;
+ u64 dio_offset;
u64 chunk;
u64 offset;
struct pcs_cs_list *csl;
More information about the Devel
mailing list