[Devel] [PATCH RHEL7 COMMIT] fs/fuse kio: align CS messages to 512 bytes
Konstantin Khorenko
khorenko at virtuozzo.com
Mon May 25 15:56:37 MSK 2020
The commit is pushed to "branch-rh7-3.10.0-1127.8.2.vz7.161.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.8.2.vz7.161.1
------>
commit 94fa9d799079e071295ec87761f6df55b94f60b3
Author: Ildar Ismagilov <Ildar.Ismagilov at acronis.com>
Date: Mon May 25 15:56:37 2020 +0300
fs/fuse kio: align CS messages to 512 bytes
CS now receives client messages into large continuous buffers
in a batch. And csd_aio and csd_next modes require data buffers to be
512 bytes aligned to use them for O_DIRECT. It means that now every
message (and message header too) must be aligned.
The message alignment is used only if storage version is greater than or
equal to PCS_CS_MSG_ALIGNED_VERSION.
https://pmc.acronis.com/browse/VSTOR-33830
Signed-off-by: Ildar Ismagilov <ildar.ismagilov at virtuozzo.com>
Acked-by: Andrey Zaitsev <azaitsev at virtuozzo.com>
Acked-by: Alexey Kuznetsov <kuznet at acronis.com>
---
fs/fuse/kio/pcs/fuse_stat.c | 1 +
fs/fuse/kio/pcs/pcs_cs.c | 86 +++++++++++++++++++++++++++++++++++-----
fs/fuse/kio/pcs/pcs_cs.h | 3 ++
fs/fuse/kio/pcs/pcs_cs_prot.h | 9 +++++
fs/fuse/kio/pcs/pcs_map.c | 15 ++++---
fs/fuse/kio/pcs/pcs_prot_types.h | 1 +
fs/fuse/kio/pcs/pcs_req.h | 2 +
7 files changed, 100 insertions(+), 17 deletions(-)
diff --git a/fs/fuse/kio/pcs/fuse_stat.c b/fs/fuse/kio/pcs/fuse_stat.c
index 1bcffa9641a36..47e7f1c404eb5 100644
--- a/fs/fuse/kio/pcs/fuse_stat.c
+++ b/fs/fuse/kio/pcs/fuse_stat.c
@@ -725,6 +725,7 @@ struct fuse_val_stat *req_stat_entry(struct pcs_fuse_io_stat *io, u32 type)
return &io->read_bytes;
case PCS_CS_WRITE_SYNC_RESP:
case PCS_CS_WRITE_RESP:
+ case PCS_CS_WRITE_AL_RESP:
return &io->write_bytes;
case PCS_CS_SYNC_RESP:
return &io->flush_cnt;
diff --git a/fs/fuse/kio/pcs/pcs_cs.c b/fs/fuse/kio/pcs/pcs_cs.c
index 9487e614bce8b..abf714620b9c9 100644
--- a/fs/fuse/kio/pcs/pcs_cs.c
+++ b/fs/fuse/kio/pcs/pcs_cs.c
@@ -82,6 +82,31 @@ static int pcs_cs_percpu_stat_alloc(struct pcs_cs *cs)
return -ENOMEM;
}
+u32 pcs_cs_msg_size(u32 size, u32 storage_version)
+{
+ if (pcs_cs_use_aligned_io(storage_version))
+ size = ALIGN(size, PCS_CS_MSG_ALIGNMENT);
+
+ return size;
+}
+
+struct pcs_msg* pcs_alloc_cs_msg(u32 type, u32 size, u32 storage_version)
+{
+ struct pcs_msg* msg;
+ struct pcs_rpc_hdr* h;
+
+ msg = pcs_rpc_alloc_output_msg(pcs_cs_msg_size(size, storage_version));
+ if (!msg)
+ return NULL;
+
+ h = (struct pcs_rpc_hdr*)msg_inline_head(msg);
+ memset(h, 0, msg->size);
+ h->len = msg->size;
+ h->type = type;
+
+ return msg;
+}
+
static void pcs_cs_percpu_stat_free(struct pcs_cs *cs)
{
free_percpu(cs->stat.sync_ops_rate);
@@ -354,6 +379,7 @@ void pcs_cs_update_stat(struct pcs_cs *cs, u32 iolat, u32 netlat, int op_type)
switch (op_type) {
case PCS_CS_WRITE_SYNC_RESP:
case PCS_CS_WRITE_RESP:
+ case PCS_CS_WRITE_AL_RESP:
this_cpu_inc(cs->stat.write_ops_rate->total);
break;
case PCS_CS_READ_RESP:
@@ -549,6 +575,40 @@ static void cs_get_data(struct pcs_msg *msg, int offset, struct iov_iter *it)
}
}
+static void cs_get_data_aligned(struct pcs_msg *msg, int offset, struct iov_iter *it)
+{
+ struct pcs_int_request * ireq = ireq_from_msg(msg);
+ int storage_version = atomic_read(&ireq->cc->storage_version);
+ unsigned hdrsize = pcs_cs_msg_size(sizeof(struct pcs_cs_iohdr),
+ storage_version);
+ unsigned padding;
+
+ if (offset < sizeof(struct pcs_cs_iohdr)) {
+ cs_get_data(msg, offset, it);
+ return;
+ }
+
+ if (offset < hdrsize) {
+ BUILD_BUG_ON(sizeof(ireq->cc->nilbuffer) < PCS_CS_MSG_ALIGNMENT);
+ iov_iter_init_plain(it, ireq->cc->nilbuffer, hdrsize - offset, 0);
+ return;
+ }
+
+ if (offset < hdrsize + ireq->iochunk.size) {
+ /* cs_get_data() does not know about header padding, so fixup the offset */
+ offset -= hdrsize - sizeof(struct pcs_cs_iohdr);
+ cs_get_data(msg, offset, it);
+ return;
+ }
+
+ padding = pcs_cs_msg_size(ireq->iochunk.size, storage_version) -
+ ireq->iochunk.size;
+ BUG_ON(offset >= hdrsize + ireq->iochunk.size + padding);
+
+ iov_iter_init_plain(it, ireq->cc->nilbuffer,
+ hdrsize + ireq->iochunk.size + padding - offset, 0);
+}
+
static void cs_sent(struct pcs_msg *msg)
{
msg->done = cs_response_done;
@@ -565,6 +625,8 @@ void pcs_cs_submit(struct pcs_cs *cs, struct pcs_int_request *ireq)
struct pcs_cs_iohdr *ioh;
struct pcs_cs_list *csl = ireq->iochunk.csl;
struct pcs_map_entry *map = ireq->iochunk.map; /* ireq keeps reference to map */
+ int storage_version = atomic_read(&ireq->cc->storage_version);
+ int aligned_msg;
msg->private = cs;
@@ -572,15 +634,21 @@ void pcs_cs_submit(struct pcs_cs *cs, struct pcs_int_request *ireq)
msg->private2 = ireq;
ioh = &ireq->iochunk.hbuf;
- ioh->hdr.len = sizeof(struct pcs_cs_iohdr);
+ ioh->hdr.len = pcs_cs_msg_size(sizeof(struct pcs_cs_iohdr),
+ storage_version);
+ aligned_msg = pcs_cs_use_aligned_io(storage_version);
switch (ireq->iochunk.cmd) {
case PCS_REQ_T_READ:
ioh->hdr.type = PCS_CS_READ_REQ;
break;
case PCS_REQ_T_WRITE:
- ioh->hdr.type = (ireq->dentry->fileinfo.attr.attrib & PCS_FATTR_IMMEDIATE_WRITE) ?
- PCS_CS_WRITE_SYNC_REQ : PCS_CS_WRITE_REQ;
- ioh->hdr.len += ireq->iochunk.size;
+ if (aligned_msg)
+ ioh->hdr.type = PCS_CS_WRITE_AL_REQ;
+ else
+ ioh->hdr.type = (ireq->dentry->fileinfo.attr.attrib & PCS_FATTR_IMMEDIATE_WRITE) ?
+ PCS_CS_WRITE_SYNC_REQ : PCS_CS_WRITE_REQ;
+ ioh->hdr.len = pcs_cs_msg_size(ioh->hdr.len + ireq->iochunk.size,
+ storage_version);
break;
case PCS_REQ_T_WRITE_HOLE:
ioh->hdr.type = PCS_CS_WRITE_HOLE_REQ;
@@ -611,7 +679,7 @@ void pcs_cs_submit(struct pcs_cs *cs, struct pcs_int_request *ireq)
msg->rpc = NULL;
pcs_clear_error(&msg->error);
msg->done = cs_sent;
- msg->get_iter = cs_get_data;
+ msg->get_iter = aligned_msg ? cs_get_data_aligned : cs_get_data;
if ((map->state & PCS_MAP_DEAD) || (map->cs_list != csl)) {
ireq->error.value = PCS_ERR_CSD_STALE_MAP;
@@ -1085,17 +1153,13 @@ static struct pcs_msg *cs_prep_probe(struct pcs_cs *cs)
struct pcs_msg *msg;
struct pcs_cs_map_prop *m;
unsigned int msg_sz = offsetof(struct pcs_cs_map_prop, nodes) + sizeof(struct pcs_cs_node_desc);
+ int storage_version = atomic_read(&cc_from_csset(cs->css)->storage_version);
-
- msg = pcs_rpc_alloc_output_msg(msg_sz);
+ msg = pcs_alloc_cs_msg(PCS_CS_MAP_PROP_REQ, msg_sz, storage_version);
if (!msg)
return NULL;
m = (struct pcs_cs_map_prop *)msg_inline_head(msg);
- memset(m, 0, msg_sz);
-
- m->hdr.h.type = PCS_CS_MAP_PROP_REQ;
- m->hdr.h.len = msg_sz;
m->flags = CS_MAPF_PING;
m->nnodes = 1;
diff --git a/fs/fuse/kio/pcs/pcs_cs.h b/fs/fuse/kio/pcs/pcs_cs.h
index 5a7bee151be8d..81743fd8a3e11 100644
--- a/fs/fuse/kio/pcs/pcs_cs.h
+++ b/fs/fuse/kio/pcs/pcs_cs.h
@@ -201,4 +201,7 @@ static inline bool cs_is_blacklisted(struct pcs_cs *cs)
void pcs_cs_set_stat_up(struct pcs_cs_set *set);
+u32 pcs_cs_msg_size(u32 size, u32 storage_version);
+struct pcs_msg* pcs_alloc_cs_msg(u32 type, u32 size, u32 storage_version);
+
#endif /* _PCS_CS_H_ */
diff --git a/fs/fuse/kio/pcs/pcs_cs_prot.h b/fs/fuse/kio/pcs/pcs_cs_prot.h
index 8ca6cbabf7418..12ffbf94cb2e7 100644
--- a/fs/fuse/kio/pcs/pcs_cs_prot.h
+++ b/fs/fuse/kio/pcs/pcs_cs_prot.h
@@ -6,6 +6,8 @@
#define PCS_CS_FLUSH_WEIGHT (128*1024)
#define PCS_CS_HOLE_WEIGHT (4096)
+#define PCS_CS_MSG_ALIGNMENT (512ULL)
+
struct pcs_cs_sync_data
{
PCS_INTEGRITY_SEQ_T integrity_seq; /* Invariant. Changed only on CS host crash */
@@ -67,6 +69,10 @@ struct pcs_cs_iohdr {
struct pcs_cs_sync_resp sync_resp[0]; /* Used only in response to write/sync */
} __attribute__((aligned(8)));
+static inline int pcs_cs_use_aligned_io(u32 storage_version)
+{
+ return (storage_version >= PCS_CS_MSG_ALIGNED_VERSION);
+}
/* Maximal message size. Actually, random */
#define PCS_CS_MSG_MAX_SIZE (1024*1024 + sizeof(struct pcs_cs_iohdr))
@@ -86,6 +92,9 @@ struct pcs_cs_iohdr {
#define PCS_CS_WRITE_SYNC_REQ (PCS_RPC_CS_CLIENT_BASE + 8)
#define PCS_CS_WRITE_SYNC_RESP (PCS_CS_WRITE_SYNC_REQ|PCS_RPC_DIRECTION)
+#define PCS_CS_WRITE_AL_REQ (PCS_RPC_CS_CLIENT_BASE + 20)
+#define PCS_CS_WRITE_AL_RESP (PCS_CS_WRITE_AL_REQ|PCS_RPC_DIRECTION)
+
struct pcs_cs_cong_notification {
struct pcs_rpc_hdr hdr;
diff --git a/fs/fuse/kio/pcs/pcs_map.c b/fs/fuse/kio/pcs/pcs_map.c
index d70ef8fea70e8..89caac4284a1d 100644
--- a/fs/fuse/kio/pcs/pcs_map.c
+++ b/fs/fuse/kio/pcs/pcs_map.c
@@ -2575,6 +2575,7 @@ static int commit_cs_record(struct pcs_map_entry * m, struct pcs_cs_record * rec
BUG_ON(srec->dirty_integrity && srec->dirty_integrity != sync->integrity_seq);
dirtify = (op_type == PCS_CS_WRITE_SYNC_RESP || op_type == PCS_CS_WRITE_RESP ||
+ op_type == PCS_CS_WRITE_AL_RESP ||
op_type == PCS_CS_WRITE_HOLE_RESP || op_type == PCS_CS_WRITE_ZERO_RESP);
/* The following looks scary, could be more clear.
* The goal is to update sync seq numbers:
@@ -2926,17 +2927,15 @@ static void prepare_map_flush_msg(struct pcs_map_entry * m, struct pcs_int_reque
{
struct pcs_cs_iohdr * ioh;
struct pcs_cs_sync_resp * arr;
+ unsigned varsize = 0;
assert_spin_locked(&m->lock);
ioh = (struct pcs_cs_iohdr *)msg->_inline_buffer;
arr = (struct pcs_cs_sync_resp *)(ioh + 1);
- ioh->hdr.len = sizeof(struct pcs_cs_iohdr);
- ioh->hdr.type = PCS_CS_SYNC_REQ;
memset(&ioh->sync, 0, sizeof(ioh->sync));
ioh->offset = 0;
- ioh->size = 0;
ioh->_reserved = 0;
ioh->sync.misc = PCS_CS_IO_SEQ;
@@ -2959,7 +2958,7 @@ static void prepare_map_flush_msg(struct pcs_map_entry * m, struct pcs_int_reque
arr->sync.ts_io = 0;
arr->sync.ts_net = 0;
arr->sync._reserved = 0;
- ioh->hdr.len += sizeof(struct pcs_cs_sync_resp);
+ varsize += sizeof(struct pcs_cs_sync_resp);
FUSE_KLOG(cc_from_maps(m->maps)->fc, LOG_DEBUG5, "fill sync "NODE_FMT" [%d,%d,%d,%d]", NODE_ARGS(arr->cs_id),
arr->sync.integrity_seq, arr->sync.sync_epoch,
arr->sync.sync_dirty, arr->sync.sync_current);
@@ -2967,6 +2966,9 @@ static void prepare_map_flush_msg(struct pcs_map_entry * m, struct pcs_int_reque
}
}
}
+ ioh->size = varsize;
+ ioh->hdr.len = pcs_cs_msg_size(sizeof(struct pcs_cs_iohdr) + varsize,
+ atomic_read(&cc_from_map(m)->storage_version));
msg->size = ioh->hdr.len;
msg->private = sreq;
msg->done = sync_done;
@@ -3019,8 +3021,9 @@ static int prepare_map_flush_ireq(struct pcs_map_entry *m,
if (!sreq)
goto err_cslist;
- msg = pcs_rpc_alloc_output_msg(sizeof(struct pcs_cs_iohdr) +
- cslist->nsrv * sizeof(struct pcs_cs_sync_resp));
+ msg = pcs_alloc_cs_msg(PCS_CS_SYNC_REQ, sizeof(struct pcs_cs_iohdr) +
+ cslist->nsrv * sizeof(struct pcs_cs_sync_resp),
+ atomic_read(&cc_from_map(m)->storage_version));
if (!msg)
goto err_ireq;
diff --git a/fs/fuse/kio/pcs/pcs_prot_types.h b/fs/fuse/kio/pcs/pcs_prot_types.h
index 638b076674678..d48cfc4f0470d 100644
--- a/fs/fuse/kio/pcs/pcs_prot_types.h
+++ b/fs/fuse/kio/pcs/pcs_prot_types.h
@@ -15,6 +15,7 @@
#define PCS_VERSION_UNKNOWN 0
#define PCS_VZ7_VERSION 100
+#define PCS_CS_MSG_ALIGNED_VERSION 134
/* milliseconds since Jan 1970 */
typedef u64 PCS_FILETIME_T;
diff --git a/fs/fuse/kio/pcs/pcs_req.h b/fs/fuse/kio/pcs/pcs_req.h
index 33f0fe9e7cb55..722175a1132f6 100644
--- a/fs/fuse/kio/pcs/pcs_req.h
+++ b/fs/fuse/kio/pcs/pcs_req.h
@@ -239,6 +239,8 @@ struct pcs_cluster_core
char cluster_name[NAME_MAX];
atomic_t storage_version;
+
+ char nilbuffer[PCS_CS_MSG_ALIGNMENT];
};
static inline struct pcs_cluster_core *cc_from_csset(struct pcs_cs_set * css)
More information about the Devel
mailing list