Re: [PATCH] Retrying on failed server close

From: Steve French
Date: Fri Mar 22 2024 - 10:48:03 EST


Do you know a repro scenario where you can get the server to return
EAGAIN or EBUSY?

SInce close is also issued from other paths than the one you issued
retries from (_cifsFileInfo_put) - are there other cases we should be
retrying? e.g. error paths in do_create and atomic_open, cifs_open,
cifs_close_dir, find_cifs_entry

Also do you know a scenario where we can repro the negative total open
files count?

On Fri, Mar 22, 2024 at 2:33 AM Ritvik Budhiraja
<budhirajaritviksmb@xxxxxxxxx> wrote:
>
> Attaching the updated patch
>
>
> On Fri, 15 Mar 2024 at 01:12, Ritvik Budhiraja <budhirajaritviksmb@xxxxxxxxx> wrote:
>>
>> In the current implementation, CIFS close sends a close to the server
>> and does not check for the success of the server close. This patch adds
>> functionality to check for server close return status and retries
>> in case of an EBUSY or EAGAIN error
>>
>> Signed-off-by: Ritvik Budhiraja <rbudhiraja@xxxxxxxxxxxxx>
>> ---
>> fs/smb/client/cifsfs.c | 11 +++++++
>> fs/smb/client/cifsglob.h | 7 +++--
>> fs/smb/client/file.c | 63 ++++++++++++++++++++++++++++++++++++----
>> fs/smb/client/smb1ops.c | 4 +--
>> fs/smb/client/smb2ops.c | 9 +++---
>> 5 files changed, 80 insertions(+), 14 deletions(-)
>>
>> diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
>> index fb368b191eef..e4b2ded86fce 100644
>> --- a/fs/smb/client/cifsfs.c
>> +++ b/fs/smb/client/cifsfs.c
>> @@ -160,6 +160,7 @@ struct workqueue_struct *decrypt_wq;
>> struct workqueue_struct *fileinfo_put_wq;
>> struct workqueue_struct *cifsoplockd_wq;
>> struct workqueue_struct *deferredclose_wq;
>> +struct workqueue_struct *serverclose_wq;
>> __u32 cifs_lock_secret;
>>
>> /*
>> @@ -1890,6 +1891,13 @@ init_cifs(void)
>> goto out_destroy_cifsoplockd_wq;
>> }
>>
>> + serverclose_wq = alloc_workqueue("serverclose",
>> + WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
>> + if (!serverclose_wq) {
>> + rc = -ENOMEM;
>> + goto out_destroy_serverclose_wq;
>> + }
>> +
>> rc = cifs_init_inodecache();
>> if (rc)
>> goto out_destroy_deferredclose_wq;
>> @@ -1964,6 +1972,8 @@ init_cifs(void)
>> destroy_workqueue(decrypt_wq);
>> out_destroy_cifsiod_wq:
>> destroy_workqueue(cifsiod_wq);
>> +out_destroy_serverclose_wq:
>> + destroy_workqueue(serverclose_wq);
>> out_clean_proc:
>> cifs_proc_clean();
>> return rc;
>> @@ -1993,6 +2003,7 @@ exit_cifs(void)
>> destroy_workqueue(cifsoplockd_wq);
>> destroy_workqueue(decrypt_wq);
>> destroy_workqueue(fileinfo_put_wq);
>> + destroy_workqueue(serverclose_wq);
>> destroy_workqueue(cifsiod_wq);
>> cifs_proc_clean();
>> }
>> diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
>> index 53c75cfb33ab..c99bc3b3ff56 100644
>> --- a/fs/smb/client/cifsglob.h
>> +++ b/fs/smb/client/cifsglob.h
>> @@ -429,10 +429,10 @@ struct smb_version_operations {
>> /* set fid protocol-specific info */
>> void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32);
>> /* close a file */
>> - void (*close)(const unsigned int, struct cifs_tcon *,
>> + int (*close)(const unsigned int, struct cifs_tcon *,
>> struct cifs_fid *);
>> /* close a file, returning file attributes and timestamps */
>> - void (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon,
>> + int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon,
>> struct cifsFileInfo *pfile_info);
>> /* send a flush request to the server */
>> int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *);
>> @@ -1420,6 +1420,7 @@ struct cifsFileInfo {
>> bool invalidHandle:1; /* file closed via session abend */
>> bool swapfile:1;
>> bool oplock_break_cancelled:1;
>> + bool offload:1; /* offload final part of _put to a wq */
>> unsigned int oplock_epoch; /* epoch from the lease break */
>> __u32 oplock_level; /* oplock/lease level from the lease break */
>> int count;
>> @@ -1428,6 +1429,7 @@ struct cifsFileInfo {
>> struct cifs_search_info srch_inf;
>> struct work_struct oplock_break; /* work for oplock breaks */
>> struct work_struct put; /* work for the final part of _put */
>> + struct work_struct serverclose; /* work for serverclose */
>> struct delayed_work deferred;
>> bool deferred_close_scheduled; /* Flag to indicate close is scheduled */
>> char *symlink_target;
>> @@ -2085,6 +2087,7 @@ extern struct workqueue_struct *decrypt_wq;
>> extern struct workqueue_struct *fileinfo_put_wq;
>> extern struct workqueue_struct *cifsoplockd_wq;
>> extern struct workqueue_struct *deferredclose_wq;
>> +extern struct workqueue_struct *serverclose_wq;
>> extern __u32 cifs_lock_secret;
>>
>> extern mempool_t *cifs_mid_poolp;
>> diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
>> index c3b8e7091a4d..c1379ec27dcd 100644
>> --- a/fs/smb/client/file.c
>> +++ b/fs/smb/client/file.c
>> @@ -445,6 +445,7 @@ cifs_down_write(struct rw_semaphore *sem)
>> }
>>
>> static void cifsFileInfo_put_work(struct work_struct *work);
>> +void serverclose_work(struct work_struct *work);
>>
>> struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
>> struct tcon_link *tlink, __u32 oplock,
>> @@ -491,6 +492,7 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
>> cfile->tlink = cifs_get_tlink(tlink);
>> INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
>> INIT_WORK(&cfile->put, cifsFileInfo_put_work);
>> + INIT_WORK(&cfile->serverclose, serverclose_work);
>> INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
>> mutex_init(&cfile->fh_mutex);
>> spin_lock_init(&cfile->file_info_lock);
>> @@ -582,6 +584,40 @@ static void cifsFileInfo_put_work(struct work_struct *work)
>> cifsFileInfo_put_final(cifs_file);
>> }
>>
>> +void serverclose_work(struct work_struct *work)
>> +{
>> + struct cifsFileInfo *cifs_file = container_of(work,
>> + struct cifsFileInfo, serverclose);
>> +
>> + struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
>> +
>> + struct TCP_Server_Info *server = tcon->ses->server;
>> + int rc;
>> + int retries = 0;
>> + int MAX_RETRIES = 4;
>> +
>> + do {
>> + if (server->ops->close_getattr)
>> + rc = server->ops->close_getattr(0, tcon, cifs_file);
>> + else if (server->ops->close)
>> + rc = server->ops->close(0, tcon, &cifs_file->fid);
>> +
>> + if (rc == -EBUSY || rc == -EAGAIN) {
>> + retries++;
>> + msleep(250);
>> + }
>> + } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
>> + );
>> +
>> + if (retries == MAX_RETRIES)
>> + printk(KERN_WARNING "[CIFS_CLOSE] Serverclose failed %d times, giving up\n", MAX_RETRIES);
>> +
>> + if (cifs_file->offload)
>> + queue_work(fileinfo_put_wq, &cifs_file->put);
>> + else
>> + cifsFileInfo_put_final(cifs_file);
>> +}
>> +
>> /**
>> * cifsFileInfo_put - release a reference of file priv data
>> *
>> @@ -622,10 +658,13 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
>> struct cifs_fid fid = {};
>> struct cifs_pending_open open;
>> bool oplock_break_cancelled;
>> + bool serverclose_offloaded = false;
>>
>> spin_lock(&tcon->open_file_lock);
>> spin_lock(&cifsi->open_file_lock);
>> spin_lock(&cifs_file->file_info_lock);
>> +
>> + cifs_file->offload = offload;
>> if (--cifs_file->count > 0) {
>> spin_unlock(&cifs_file->file_info_lock);
>> spin_unlock(&cifsi->open_file_lock);
>> @@ -667,13 +706,20 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
>> if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
>> struct TCP_Server_Info *server = tcon->ses->server;
>> unsigned int xid;
>> + int rc;
>>
>> xid = get_xid();
>> if (server->ops->close_getattr)
>> - server->ops->close_getattr(xid, tcon, cifs_file);
>> + rc = server->ops->close_getattr(xid, tcon, cifs_file);
>> else if (server->ops->close)
>> - server->ops->close(xid, tcon, &cifs_file->fid);
>> + rc = server->ops->close(xid, tcon, &cifs_file->fid);
>> _free_xid(xid);
>> +
>> + if (rc == -EBUSY || rc == -EAGAIN) {
>> + // Server close failed, hence offloading it as an async op
>> + queue_work(serverclose_wq, &cifs_file->serverclose);
>> + serverclose_offloaded = true;
>> + }
>> }
>>
>> if (oplock_break_cancelled)
>> @@ -681,10 +727,15 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
>>
>> cifs_del_pending_open(&open);
>>
>> - if (offload)
>> - queue_work(fileinfo_put_wq, &cifs_file->put);
>> - else
>> - cifsFileInfo_put_final(cifs_file);
>> + // if serverclose has been offloaded to wq (on failure), it will
>> + // handle offloading put as well. If serverclose not offloaded,
>> + // we need to handle offloading put here.
>> + if (!serverclose_offloaded) {
>> + if (offload)
>> + queue_work(fileinfo_put_wq, &cifs_file->put);
>> + else
>> + cifsFileInfo_put_final(cifs_file);
>> + }
>> }
>>
>> int cifs_open(struct inode *inode, struct file *file)
>> diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c
>> index a9eaba8083b0..212ec6f66ec6 100644
>> --- a/fs/smb/client/smb1ops.c
>> +++ b/fs/smb/client/smb1ops.c
>> @@ -753,11 +753,11 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
>> cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
>> }
>>
>> -static void
>> +static int
>> cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon,
>> struct cifs_fid *fid)
>> {
>> - CIFSSMBClose(xid, tcon, fid->netfid);
>> + return CIFSSMBClose(xid, tcon, fid->netfid);
>> }
>>
>> static int
>> diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
>> index 4695433fcf39..1dcd4944958f 100644
>> --- a/fs/smb/client/smb2ops.c
>> +++ b/fs/smb/client/smb2ops.c
>> @@ -1411,14 +1411,14 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
>> memcpy(cfile->fid.create_guid, fid->create_guid, 16);
>> }
>>
>> -static void
>> +static int
>> smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon,
>> struct cifs_fid *fid)
>> {
>> - SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
>> + return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
>> }
>>
>> -static void
>> +static int
>> smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
>> struct cifsFileInfo *cfile)
>> {
>> @@ -1429,7 +1429,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
>> rc = __SMB2_close(xid, tcon, cfile->fid.persistent_fid,
>> cfile->fid.volatile_fid, &file_inf);
>> if (rc)
>> - return;
>> + return rc;
>>
>> inode = d_inode(cfile->dentry);
>>
>> @@ -1458,6 +1458,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
>>
>> /* End of file and Attributes should not have to be updated on close */
>> spin_unlock(&inode->i_lock);
>> + return rc;
>> }
>>
>> static int
>> --
>> 2.34.1
>>


--
Thanks,

Steve