aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-08-31 15:36:41 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-08-31 15:36:41 -0700
commit99d99825fc075fd24b60cc9cf0fb1e20b9c16b0f (patch)
tree10993069e3382b3f658253664f36a8b46ad29e25
parentf35d1706159e015848ec7421e91b44b614c02dc2 (diff)
parentc4a123d2e8c4dc91d581ee7d05c0cd51a0273fab (diff)
downloadlinux-99d99825fc075fd24b60cc9cf0fb1e20b9c16b0f.tar.gz
Merge tag 'nfs-for-6.6-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
Pull NFS client updates from Anna Schumaker: "New Features: - Enable the NFS v4.2 READ_PLUS operation by default Stable Fixes: - NFSv4/pnfs: minor fix for cleanup path in nfs4_get_device_info - NFS: Fix a potential data corruption Bugfixes: - Fix various READ_PLUS issues including: - smatch warnings - xdr size calculations - scratch buffer handling - 32bit / highmem xdr page handling - Fix checkpatch errors in file.c - Fix redundant readdir request after an EOF - Fix handling of COPY ERR_OFFLOAD_NO_REQ - Fix assignment of xprtdata.cred Cleanups: - Remove unused xprtrdma function declarations - Clean up an integer overflow check to avoid a warning - Clean up #includes in dns_resolve.c - Clean up nfs4_get_device_info so we don't pass a NULL pointer to __free_page() - Clean up sunrpc TCP socket timeout configuration - Guard against READDIR loops when entry names are too long - Use EXCHID4_FLAG_USE_PNFS_DS for DS servers" * tag 'nfs-for-6.6-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (22 commits) pNFS: Fix assignment of xprtdata.cred NFSv4.2: fix handling of COPY ERR_OFFLOAD_NO_REQ NFS: Guard against READDIR loop when entry names exceed MAXNAMELEN NFSv4.1: use EXCHGID4_FLAG_USE_PNFS_DS for DS server NFS/pNFS: Set the connect timeout for the pNFS flexfiles driver SUNRPC: Don't override connect timeouts in rpc_clnt_add_xprt() SUNRPC: Allow specification of TCP client connect timeout at setup SUNRPC: Refactor and simplify connect timeout SUNRPC: Set the TCP_SYNCNT to match the socket timeout NFS: Fix a potential data corruption nfs: fix redundant readdir request after get eof nfs/blocklayout: Use the passed in gfp flags filemap: Fix errors in file.c NFSv4/pnfs: minor fix for cleanup path in nfs4_get_device_info NFS: Move common includes outside ifdef SUNRPC: clean up integer overflow check xprtrdma: Remove unused function declaration rpcrdma_bc_post_recv() NFS: Enable the READ_PLUS operation by default SUNRPC: kmap() the xdr pages during decode NFSv4.2: Rework scratch handling for READ_PLUS (again) ...
-rw-r--r--fs/nfs/Kconfig6
-rw-r--r--fs/nfs/blocklayout/dev.c4
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/dir.c15
-rw-r--r--fs/nfs/direct.c20
-rw-r--r--fs/nfs/dns_resolve.c12
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/internal.h3
-rw-r--r--fs/nfs/nfs2xdr.c2
-rw-r--r--fs/nfs/nfs3client.c3
-rw-r--r--fs/nfs/nfs3xdr.c2
-rw-r--r--fs/nfs/nfs42.h1
-rw-r--r--fs/nfs/nfs42proc.c5
-rw-r--r--fs/nfs/nfs42xdr.c17
-rw-r--r--fs/nfs/nfs4client.c3
-rw-r--r--fs/nfs/nfs4proc.c17
-rw-r--r--fs/nfs/pnfs_dev.c2
-rw-r--r--fs/nfs/pnfs_nfs.c5
-rw-r--r--fs/nfs/read.c10
-rw-r--r--include/linux/sunrpc/clnt.h2
-rw-r--r--include/linux/sunrpc/xdr.h6
-rw-r--r--include/linux/sunrpc/xprt.h2
-rw-r--r--net/sunrpc/clnt.c8
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/xdr.c27
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h1
-rw-r--r--net/sunrpc/xprtsock.c55
27 files changed, 171 insertions, 63 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index b6fc169be1b16e..7df2503cef6c30 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -209,8 +209,6 @@ config NFS_DISABLE_UDP_SUPPORT
config NFS_V4_2_READ_PLUS
bool "NFS: Enable support for the NFSv4.2 READ_PLUS operation"
depends on NFS_V4_2
- default n
+ default y
help
- This is intended for developers only. The READ_PLUS operation has
- been shown to have issues under specific conditions and should not
- be used in production.
+ Choose Y here to enable use of the NFS v4.2 READ_PLUS operation.
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index 70f5563a8e81c7..65cbb5607a5fc4 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -404,7 +404,7 @@ bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
int ret, i;
d->children = kcalloc(v->concat.volumes_count,
- sizeof(struct pnfs_block_dev), GFP_KERNEL);
+ sizeof(struct pnfs_block_dev), gfp_mask);
if (!d->children)
return -ENOMEM;
@@ -433,7 +433,7 @@ bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
int ret, i;
d->children = kcalloc(v->stripe.volumes_count,
- sizeof(struct pnfs_block_dev), GFP_KERNEL);
+ sizeof(struct pnfs_block_dev), gfp_mask);
if (!d->children)
return -ENOMEM;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index e4c5f193ed5e8d..44eca51b28085d 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -517,6 +517,8 @@ int nfs_create_rpc_client(struct nfs_client *clp,
.authflavor = flavor,
.cred = cl_init->cred,
.xprtsec = cl_init->xprtsec,
+ .connect_timeout = cl_init->connect_timeout,
+ .reconnect_timeout = cl_init->reconnect_timeout,
};
if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8f3112e71a6a62..e6a51fd94fea87 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1089,6 +1089,17 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
for (i = desc->cache_entry_index; i < array->size; i++) {
struct nfs_cache_array_entry *ent;
+ /*
+ * nfs_readdir_handle_cache_misses return force clear at
+ * (cache_misses > NFS_READDIR_CACHE_MISS_THRESHOLD) for
+ * readdir heuristic, NFS_READDIR_CACHE_MISS_THRESHOLD + 1
+ * entries need be emitted here.
+ */
+ if (first_emit && i > NFS_READDIR_CACHE_MISS_THRESHOLD + 2) {
+ desc->eob = true;
+ break;
+ }
+
ent = &array->array[i];
if (!dir_emit(desc->ctx, ent->name, ent->name_len,
nfs_compat_user_ino64(ent->ino), ent->d_type)) {
@@ -1107,10 +1118,6 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
desc->ctx->pos = desc->dir_cookie;
else
desc->ctx->pos++;
- if (first_emit && i > NFS_READDIR_CACHE_MISS_THRESHOLD + 1) {
- desc->eob = true;
- break;
- }
}
if (array->folio_is_eof)
desc->eof = !desc->eob;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index aaffaaa336cc55..47d892a1d363d9 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -472,13 +472,31 @@ out:
return result;
}
+static void nfs_direct_add_page_head(struct list_head *list,
+ struct nfs_page *req)
+{
+ struct nfs_page *head = req->wb_head;
+
+ if (!list_empty(&head->wb_list) || !nfs_lock_request(head))
+ return;
+ if (!list_empty(&head->wb_list)) {
+ nfs_unlock_request(head);
+ return;
+ }
+ list_add(&head->wb_list, list);
+ kref_get(&head->wb_kref);
+ kref_get(&head->wb_kref);
+}
+
static void nfs_direct_join_group(struct list_head *list, struct inode *inode)
{
struct nfs_page *req, *subreq;
list_for_each_entry(req, list, wb_list) {
- if (req->wb_head != req)
+ if (req->wb_head != req) {
+ nfs_direct_add_page_head(&req->wb_list, req);
continue;
+ }
subreq = req->wb_this_page;
if (subreq == req)
continue;
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index 6603b5cee029c0..714975e5c0dbd6 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -7,14 +7,16 @@
* Resolves DNS hostnames into valid ip addresses
*/
-#ifdef CONFIG_NFS_USE_KERNEL_DNS
-
#include <linux/module.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/addr.h>
-#include <linux/dns_resolver.h>
+
#include "dns_resolve.h"
+#ifdef CONFIG_NFS_USE_KERNEL_DNS
+
+#include <linux/dns_resolver.h>
+
ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
struct sockaddr_storage *ss, size_t salen)
{
@@ -35,7 +37,6 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
#else
-#include <linux/module.h>
#include <linux/hash.h>
#include <linux/string.h>
#include <linux/kmod.h>
@@ -43,15 +44,12 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
#include <linux/socket.h>
#include <linux/seq_file.h>
#include <linux/inet.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/cache.h>
#include <linux/sunrpc/svcauth.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/nfs_fs.h>
#include "nfs4_fs.h"
-#include "dns_resolve.h"
#include "cache_lib.h"
#include "netns.h"
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 79b1b3fcd3fcf6..3f9768810427df 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -200,7 +200,7 @@ nfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe
EXPORT_SYMBOL_GPL(nfs_file_splice_read);
int
-nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
+nfs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file_inode(file);
int status;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 913c09806c7f5a..9c9cf764f6000d 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -82,6 +82,8 @@ struct nfs_client_initdata {
const struct rpc_timeout *timeparms;
const struct cred *cred;
struct xprtsec_parms xprtsec;
+ unsigned long connect_timeout;
+ unsigned long reconnect_timeout;
};
/*
@@ -493,6 +495,7 @@ extern const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
struct inode *inode, bool force_mds,
const struct nfs_pgio_completion_ops *compl_ops);
+extern bool nfs_read_alloc_scratch(struct nfs_pgio_header *hdr, size_t size);
extern int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio,
struct nfs_open_context *ctx,
struct folio *folio);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 05c3b4b2b3dd8c..c190938142960e 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -949,7 +949,7 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
error = decode_filename_inline(xdr, &entry->name, &entry->len);
if (unlikely(error))
- return -EAGAIN;
+ return error == -ENAMETOOLONG ? -ENAMETOOLONG : -EAGAIN;
/*
* The type (size and byte order) of nfscookie isn't defined in
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index eff3802c5e0351..674c012868b1a2 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -86,6 +86,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans)
{
struct rpc_timeout ds_timeout;
+ unsigned long connect_timeout = ds_timeo * (ds_retrans + 1) * HZ / 10;
struct nfs_client *mds_clp = mds_srv->nfs_client;
struct nfs_client_initdata cl_init = {
.addr = ds_addr,
@@ -98,6 +99,8 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
.timeparms = &ds_timeout,
.cred = mds_srv->cred,
.xprtsec = mds_clp->cl_xprtsec,
+ .connect_timeout = connect_timeout,
+ .reconnect_timeout = connect_timeout,
};
struct nfs_client *clp;
char buf[INET6_ADDRSTRLEN + 1];
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 3b0b650c9c5ab2..60f032be805ae5 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1991,7 +1991,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
error = decode_inline_filename3(xdr, &entry->name, &entry->len);
if (unlikely(error))
- return -EAGAIN;
+ return error == -ENAMETOOLONG ? -ENAMETOOLONG : -EAGAIN;
error = decode_cookie3(xdr, &new_cookie);
if (unlikely(error))
diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index 0fe5aacbcfdf1a..b59876b01a1e3c 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -13,6 +13,7 @@
* more? Need to consider not to pre-alloc too much for a compound.
*/
#define PNFS_LAYOUTSTATS_MAXDEV (4)
+#define READ_PLUS_SCRATCH_SIZE (16)
/* nfs4.2proc.c */
#ifdef CONFIG_NFS_V4_2
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 49f78e23b34c0b..063e00aff87edb 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -471,8 +471,9 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
continue;
}
break;
- } else if (err == -NFS4ERR_OFFLOAD_NO_REQS && !args.sync) {
- args.sync = true;
+ } else if (err == -NFS4ERR_OFFLOAD_NO_REQS &&
+ args.sync != res.synchronous) {
+ args.sync = res.synchronous;
dst_exception.retry = 1;
continue;
} else if ((err == -ESTALE ||
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 95234208dc9ee1..9e3ae53e220583 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -54,10 +54,16 @@
(1 /* data_content4 */ + \
2 /* data_info4.di_offset */ + \
1 /* data_info4.di_length */)
+#define NFS42_READ_PLUS_HOLE_SEGMENT_SIZE \
+ (1 /* data_content4 */ + \
+ 2 /* data_info4.di_offset */ + \
+ 2 /* data_info4.di_length */)
+#define READ_PLUS_SEGMENT_SIZE_DIFF (NFS42_READ_PLUS_HOLE_SEGMENT_SIZE - \
+ NFS42_READ_PLUS_DATA_SEGMENT_SIZE)
#define decode_read_plus_maxsz (op_decode_hdr_maxsz + \
1 /* rpr_eof */ + \
1 /* rpr_contents count */ + \
- NFS42_READ_PLUS_DATA_SEGMENT_SIZE)
+ NFS42_READ_PLUS_HOLE_SEGMENT_SIZE)
#define encode_seek_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + \
2 /* offset */ + \
@@ -617,8 +623,8 @@ static void nfs4_xdr_enc_read_plus(struct rpc_rqst *req,
encode_putfh(xdr, args->fh, &hdr);
encode_read_plus(xdr, args, &hdr);
- rpc_prepare_reply_pages(req, args->pages, args->pgbase,
- args->count, hdr.replen);
+ rpc_prepare_reply_pages(req, args->pages, args->pgbase, args->count,
+ hdr.replen - READ_PLUS_SEGMENT_SIZE_DIFF);
encode_nops(&hdr);
}
@@ -1056,13 +1062,12 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
res->eof = be32_to_cpup(p++);
segments = be32_to_cpup(p++);
if (segments == 0)
- return status;
+ return 0;
segs = kmalloc_array(segments, sizeof(*segs), GFP_KERNEL);
if (!segs)
return -ENOMEM;
- status = -EIO;
for (i = 0; i < segments; i++) {
status = decode_read_plus_segment(xdr, &segs[i]);
if (status < 0)
@@ -1428,7 +1433,7 @@ static int nfs4_xdr_dec_read_plus(struct rpc_rqst *rqstp,
struct compound_hdr hdr;
int status;
- xdr_set_scratch_buffer(xdr, res->scratch, sizeof(res->scratch));
+ xdr_set_scratch_buffer(xdr, res->scratch, READ_PLUS_SCRATCH_SIZE);
status = decode_compound_hdr(xdr, &hdr);
if (status)
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index d9114a754db73c..27fb25567ce755 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -232,6 +232,8 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
__set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
__set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags);
+ if (test_bit(NFS_CS_DS, &cl_init->init_flags))
+ __set_bit(NFS_CS_DS, &clp->cl_flags);
/*
* Set up the connection to the server before we add add to the
* global list.
@@ -1007,6 +1009,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+ __set_bit(NFS_CS_DS, &cl_init.init_flags);
/*
* Set an authflavor equual to the MDS value. Use the MDS nfs_client
* cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d57aaf0cc577fe..794343790ea8bb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5438,18 +5438,8 @@ static bool nfs4_read_plus_not_supported(struct rpc_task *task,
return false;
}
-static inline void nfs4_read_plus_scratch_free(struct nfs_pgio_header *hdr)
-{
- if (hdr->res.scratch) {
- kfree(hdr->res.scratch);
- hdr->res.scratch = NULL;
- }
-}
-
static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
- nfs4_read_plus_scratch_free(hdr);
-
if (!nfs4_sequence_done(task, &hdr->res.seq_res))
return -EAGAIN;
if (nfs4_read_stateid_changed(task, &hdr->args))
@@ -5469,8 +5459,7 @@ static bool nfs42_read_plus_support(struct nfs_pgio_header *hdr,
/* Note: We don't use READ_PLUS with pNFS yet */
if (nfs_server_capable(hdr->inode, NFS_CAP_READ_PLUS) && !hdr->ds_clp) {
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS];
- hdr->res.scratch = kmalloc(32, GFP_KERNEL);
- return hdr->res.scratch != NULL;
+ return nfs_read_alloc_scratch(hdr, READ_PLUS_SCRATCH_SIZE);
}
return false;
}
@@ -8798,6 +8787,8 @@ nfs4_run_exchange_id(struct nfs_client *clp, const struct cred *cred,
#ifdef CONFIG_NFS_V4_1_MIGRATION
calldata->args.flags |= EXCHGID4_FLAG_SUPP_MOVED_MIGR;
#endif
+ if (test_bit(NFS_CS_DS, &clp->cl_flags))
+ calldata->args.flags |= EXCHGID4_FLAG_USE_PNFS_DS;
msg.rpc_argp = &calldata->args;
msg.rpc_resp = &calldata->res;
task_setup_data.callback_data = calldata;
@@ -8875,6 +8866,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre
/* Save the EXCHANGE_ID verifier session trunk tests */
memcpy(clp->cl_confirm.data, argp->verifier.data,
sizeof(clp->cl_confirm.data));
+ if (resp->flags & EXCHGID4_FLAG_USE_PNFS_DS)
+ set_bit(NFS_CS_DS, &clp->cl_flags);
out:
trace_nfs4_exchange_id(clp, status);
rpc_put_task(task);
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index ddbbf4fcda867c..178001c90156fd 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -154,7 +154,7 @@ nfs4_get_device_info(struct nfs_server *server,
set_bit(NFS_DEVICEID_NOCACHE, &d->flags);
out_free_pages:
- for (i = 0; i < max_pages; i++)
+ while (--i >= 0)
__free_page(pages[i]);
kfree(pages);
out_free_pdev:
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index a0112ad4937aa2..afd23910f3bffc 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -852,6 +852,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
{
struct nfs_client *clp = ERR_PTR(-EIO);
struct nfs4_pnfs_ds_addr *da;
+ unsigned long connect_timeout = timeo * (retrans + 1) * HZ / 10;
int status = 0;
dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr);
@@ -870,6 +871,8 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
.dstaddr = (struct sockaddr *)&da->da_addr,
.addrlen = da->da_addrlen,
.servername = clp->cl_hostname,
+ .connect_timeout = connect_timeout,
+ .reconnect_timeout = connect_timeout,
};
if (da->da_transport != clp->cl_proto)
@@ -943,7 +946,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
* Test this address for session trunking and
* add as an alias
*/
- xprtdata.cred = nfs4_get_clid_cred(clp),
+ xprtdata.cred = nfs4_get_clid_cred(clp);
rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
rpc_clnt_setup_test_and_add_xprt,
&rpcdata);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index f71eeee67e201d..7dc21a48e3e7b6 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -47,6 +47,8 @@ static struct nfs_pgio_header *nfs_readhdr_alloc(void)
static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
{
+ if (rhdr->res.scratch != NULL)
+ kfree(rhdr->res.scratch);
kmem_cache_free(nfs_rdata_cachep, rhdr);
}
@@ -108,6 +110,14 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
+bool nfs_read_alloc_scratch(struct nfs_pgio_header *hdr, size_t size)
+{
+ WARN_ON(hdr->res.scratch != NULL);
+ hdr->res.scratch = kmalloc(size, GFP_KERNEL);
+ return hdr->res.scratch != NULL;
+}
+EXPORT_SYMBOL_GPL(nfs_read_alloc_scratch);
+
static void nfs_readpage_release(struct nfs_page *req, int error)
{
struct folio *folio = nfs_page_to_folio(req);
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 4f41d839face4d..af7358277f1c34 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -148,6 +148,8 @@ struct rpc_create_args {
const struct cred *cred;
unsigned int max_connect;
struct xprtsec_parms xprtsec;
+ unsigned long connect_timeout;
+ unsigned long reconnect_timeout;
};
struct rpc_add_xprt_test {
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 42f9d7eb9a1a3e..5b4fb3c791bc26 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -226,6 +226,7 @@ struct xdr_stream {
struct kvec *iov; /* pointer to the current kvec */
struct kvec scratch; /* Scratch buffer */
struct page **page_ptr; /* pointer to the current page */
+ void *page_kaddr; /* kmapped address of the current page */
unsigned int nwords; /* Remaining decode buffer length */
struct rpc_rqst *rqst; /* For debugging */
@@ -257,6 +258,7 @@ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf,
__be32 *p, struct rpc_rqst *rqst);
extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
struct page **pages, unsigned int len);
+extern void xdr_finish_decode(struct xdr_stream *xdr);
extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
@@ -777,9 +779,7 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr,
if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0))
return -EBADMSG;
- if (len > SIZE_MAX / sizeof(*p))
- return -EBADMSG;
- p = xdr_inline_decode(xdr, len * sizeof(*p));
+ p = xdr_inline_decode(xdr, size_mul(len, sizeof(*p)));
if (unlikely(!p))
return -EBADMSG;
if (array == NULL)
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index b52411bcfe4e7c..4ecc89301eb74d 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -351,6 +351,8 @@ struct xprt_create {
struct rpc_xprt_switch *bc_xps;
unsigned int flags;
struct xprtsec_parms xprtsec;
+ unsigned long connect_timeout;
+ unsigned long reconnect_timeout;
};
struct xprt_class {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d7c697af3762f6..8d75290f1a31d2 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -534,6 +534,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
.servername = args->servername,
.bc_xprt = args->bc_xprt,
.xprtsec = args->xprtsec,
+ .connect_timeout = args->connect_timeout,
+ .reconnect_timeout = args->reconnect_timeout,
};
char servername[48];
struct rpc_clnt *clnt;
@@ -2602,6 +2604,7 @@ out:
case 0:
task->tk_action = rpc_exit_task;
task->tk_status = rpcauth_unwrap_resp(task, &xdr);
+ xdr_finish_decode(&xdr);
return;
case -EAGAIN:
task->tk_status = 0;
@@ -3069,6 +3072,11 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
}
xprt->resvport = resvport;
xprt->reuseport = reuseport;
+
+ if (xprtargs->connect_timeout)
+ connect_timeout = xprtargs->connect_timeout;
+ if (xprtargs->reconnect_timeout)
+ reconnect_timeout = xprtargs->reconnect_timeout;
if (xprt->ops->set_connect_timeout != NULL)
xprt->ops->set_connect_timeout(xprt,
connect_timeout,
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index dc21e6c732db33..812fda9d45dd63 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1394,6 +1394,8 @@ svc_process_common(struct svc_rqst *rqstp)
rc = process.dispatch(rqstp);
if (procp->pc_release)
procp->pc_release(rqstp);
+ xdr_finish_decode(xdr);
+
if (!rc)
goto dropit;
if (rqstp->rq_auth_stat != rpc_auth_ok)
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 358e6de91775e4..62e07c330a66ff 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1338,6 +1338,14 @@ static unsigned int xdr_set_tail_base(struct xdr_stream *xdr,
return xdr_set_iov(xdr, buf->tail, base, len);
}
+static void xdr_stream_unmap_current_page(struct xdr_stream *xdr)
+{
+ if (xdr->page_kaddr) {
+ kunmap_local(xdr->page_kaddr);
+ xdr->page_kaddr = NULL;
+ }
+}
+
static unsigned int xdr_set_page_base(struct xdr_stream *xdr,
unsigned int base, unsigned int len)
{
@@ -1355,12 +1363,18 @@ static unsigned int xdr_set_page_base(struct xdr_stream *xdr,
if (len > maxlen)
len = maxlen;
+ xdr_stream_unmap_current_page(xdr);
xdr_stream_page_set_pos(xdr, base);
base += xdr->buf->page_base;
pgnr = base >> PAGE_SHIFT;
xdr->page_ptr = &xdr->buf->pages[pgnr];
- kaddr = page_address(*xdr->page_ptr);
+
+ if (PageHighMem(*xdr->page_ptr)) {
+ xdr->page_kaddr = kmap_local_page(*xdr->page_ptr);
+ kaddr = xdr->page_kaddr;
+ } else
+ kaddr = page_address(*xdr->page_ptr);
pgoff = base & ~PAGE_MASK;
xdr->p = (__be32*)(kaddr + pgoff);
@@ -1414,6 +1428,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
struct rpc_rqst *rqst)
{
xdr->buf = buf;
+ xdr->page_kaddr = NULL;
xdr_reset_scratch_buffer(xdr);
xdr->nwords = XDR_QUADLEN(buf->len);
if (xdr_set_iov(xdr, buf->head, 0, buf->len) == 0 &&
@@ -1446,6 +1461,16 @@ void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
}
EXPORT_SYMBOL_GPL(xdr_init_decode_pages);
+/**
+ * xdr_finish_decode - Clean up the xdr_stream after decoding data.
+ * @xdr: pointer to xdr_stream struct
+ */
+void xdr_finish_decode(struct xdr_stream *xdr)
+{
+ xdr_stream_unmap_current_page(xdr);
+}
+EXPORT_SYMBOL(xdr_finish_decode);
+
static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
{
unsigned int nwords = XDR_QUADLEN(nbytes);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 5e5ff6784ef5ff..da409450dfc05c 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -593,7 +593,6 @@ void xprt_rdma_cleanup(void);
int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *);
-int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst);
void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 268a2cc61acd14..71cd916e384f17 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2237,9 +2237,13 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
struct socket *sock)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ struct net *net = sock_net(sock->sk);
+ unsigned long connect_timeout;
+ unsigned long syn_retries;
unsigned int keepidle;
unsigned int keepcnt;
unsigned int timeo;
+ unsigned long t;
spin_lock(&xprt->transport_lock);
keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ);
@@ -2257,6 +2261,35 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
/* TCP user timeout (see RFC5482) */
tcp_sock_set_user_timeout(sock->sk, timeo);
+
+ /* Connect timeout */
+ connect_timeout = max_t(unsigned long,
+ DIV_ROUND_UP(xprt->connect_timeout, HZ), 1);
+ syn_retries = max_t(unsigned long,
+ READ_ONCE(net->ipv4.sysctl_tcp_syn_retries), 1);
+ for (t = 0; t <= syn_retries && (1UL << t) < connect_timeout; t++)
+ ;
+ if (t <= syn_retries)
+ tcp_sock_set_syncnt(sock->sk, t - 1);
+}
+
+static void xs_tcp_do_set_connect_timeout(struct rpc_xprt *xprt,
+ unsigned long connect_timeout)
+{
+ struct sock_xprt *transport =
+ container_of(xprt, struct sock_xprt, xprt);
+ struct rpc_timeout to;
+ unsigned long initval;
+
+ memcpy(&to, xprt->timeout, sizeof(to));
+ /* Arbitrary lower limit */
+ initval = max_t(unsigned long, connect_timeout, XS_TCP_INIT_REEST_TO);
+ to.to_initval = initval;
+ to.to_maxval = initval;
+ to.to_retries = 0;
+ memcpy(&transport->tcp_timeout, &to, sizeof(transport->tcp_timeout));
+ xprt->timeout = &transport->tcp_timeout;
+ xprt->connect_timeout = connect_timeout;
}
static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
@@ -2264,25 +2297,12 @@ static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
unsigned long reconnect_timeout)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
- struct rpc_timeout to;
- unsigned long initval;
spin_lock(&xprt->transport_lock);
if (reconnect_timeout < xprt->max_reconnect_timeout)
xprt->max_reconnect_timeout = reconnect_timeout;
- if (connect_timeout < xprt->connect_timeout) {
- memcpy(&to, xprt->timeout, sizeof(to));
- initval = DIV_ROUND_UP(connect_timeout, to.to_retries + 1);
- /* Arbitrary lower limit */
- if (initval < XS_TCP_INIT_REEST_TO << 1)
- initval = XS_TCP_INIT_REEST_TO << 1;
- to.to_initval = initval;
- to.to_maxval = initval;
- memcpy(&transport->tcp_timeout, &to,
- sizeof(transport->tcp_timeout));
- xprt->timeout = &transport->tcp_timeout;
- xprt->connect_timeout = connect_timeout;
- }
+ if (connect_timeout < xprt->connect_timeout)
+ xs_tcp_do_set_connect_timeout(xprt, connect_timeout);
set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
spin_unlock(&xprt->transport_lock);
}
@@ -3335,8 +3355,13 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
xprt->timeout = &xs_tcp_default_timeout;
xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
+ if (args->reconnect_timeout)
+ xprt->max_reconnect_timeout = args->reconnect_timeout;
+
xprt->connect_timeout = xprt->timeout->to_initval *
(xprt->timeout->to_retries + 1);
+ if (args->connect_timeout)
+ xs_tcp_do_set_connect_timeout(xprt, args->connect_timeout);
INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
INIT_WORK(&transport->error_worker, xs_error_handle);