From e7fd41792fc0ee52a05fcaac87511f118328d147 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 18 Jan 2006 09:30:29 +0000 Subject: [DLM] The core of the DLM for GFS2/CLVM This is the core of the distributed lock manager which is required to use GFS2 as a cluster filesystem. It is also used by CLVM and can be used as a standalone lock manager independantly of either of these two projects. It implements VAX-style locking modes. Signed-off-by: David Teigland Signed-off-by: Steve Whitehouse --- fs/dlm/lock.c | 3610 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 3610 insertions(+) create mode 100644 fs/dlm/lock.c (limited to 'fs/dlm/lock.c') diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c new file mode 100644 index 00000000000000..81efb361f95df9 --- /dev/null +++ b/fs/dlm/lock.c @@ -0,0 +1,3610 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) 2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +/* Central locking logic has four stages: + + dlm_lock() + dlm_unlock() + + request_lock(ls, lkb) + convert_lock(ls, lkb) + unlock_lock(ls, lkb) + cancel_lock(ls, lkb) + + _request_lock(r, lkb) + _convert_lock(r, lkb) + _unlock_lock(r, lkb) + _cancel_lock(r, lkb) + + do_request(r, lkb) + do_convert(r, lkb) + do_unlock(r, lkb) + do_cancel(r, lkb) + + Stage 1 (lock, unlock) is mainly about checking input args and + splitting into one of the four main operations: + + dlm_lock = request_lock + dlm_lock+CONVERT = convert_lock + dlm_unlock = unlock_lock + dlm_unlock+CANCEL = cancel_lock + + Stage 2, xxxx_lock(), just finds and locks the relevant rsb which is + provided to the next stage. + + Stage 3, _xxxx_lock(), determines if the operation is local or remote. + When remote, it calls send_xxxx(), when local it calls do_xxxx(). + + Stage 4, do_xxxx(), is the guts of the operation. It manipulates the + given rsb and lkb and queues callbacks. + + For remote operations, send_xxxx() results in the corresponding do_xxxx() + function being executed on the remote node. The connecting send/receive + calls on local (L) and remote (R) nodes: + + L: send_xxxx() -> R: receive_xxxx() + R: do_xxxx() + L: receive_xxxx_reply() <- R: send_xxxx_reply() +*/ + +#include "dlm_internal.h" +#include "memory.h" +#include "lowcomms.h" +#include "requestqueue.h" +#include "util.h" +#include "dir.h" +#include "member.h" +#include "lockspace.h" +#include "ast.h" +#include "lock.h" +#include "rcom.h" +#include "recover.h" +#include "lvb_table.h" +#include "config.h" + +static int send_request(struct dlm_rsb *r, struct dlm_lkb *lkb); +static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb); +static int send_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb); +static int send_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb); +static int send_grant(struct dlm_rsb *r, struct dlm_lkb *lkb); +static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode); +static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb); +static int send_remove(struct dlm_rsb *r); +static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); +static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, + struct dlm_message *ms); +static int receive_extralen(struct dlm_message *ms); + +/* + * Lock compatibilty matrix - thanks Steve + * UN = Unlocked state. Not really a state, used as a flag + * PD = Padding. Used to make the matrix a nice power of two in size + * Other states are the same as the VMS DLM. + * Usage: matrix[grmode+1][rqmode+1] (although m[rq+1][gr+1] is the same) + */ + +static const int __dlm_compat_matrix[8][8] = { + /* UN NL CR CW PR PW EX PD */ + {1, 1, 1, 1, 1, 1, 1, 0}, /* UN */ + {1, 1, 1, 1, 1, 1, 1, 0}, /* NL */ + {1, 1, 1, 1, 1, 1, 0, 0}, /* CR */ + {1, 1, 1, 1, 0, 0, 0, 0}, /* CW */ + {1, 1, 1, 0, 1, 0, 0, 0}, /* PR */ + {1, 1, 1, 0, 0, 0, 0, 0}, /* PW */ + {1, 1, 0, 0, 0, 0, 0, 0}, /* EX */ + {0, 0, 0, 0, 0, 0, 0, 0} /* PD */ +}; + +/* + * This defines the direction of transfer of LVB data. + * Granted mode is the row; requested mode is the column. + * Usage: matrix[grmode+1][rqmode+1] + * 1 = LVB is returned to the caller + * 0 = LVB is written to the resource + * -1 = nothing happens to the LVB + */ + +const int dlm_lvb_operations[8][8] = { + /* UN NL CR CW PR PW EX PD*/ + { -1, 1, 1, 1, 1, 1, 1, -1 }, /* UN */ + { -1, 1, 1, 1, 1, 1, 1, 0 }, /* NL */ + { -1, -1, 1, 1, 1, 1, 1, 0 }, /* CR */ + { -1, -1, -1, 1, 1, 1, 1, 0 }, /* CW */ + { -1, -1, -1, -1, 1, 1, 1, 0 }, /* PR */ + { -1, 0, 0, 0, 0, 0, 1, 0 }, /* PW */ + { -1, 0, 0, 0, 0, 0, 0, 0 }, /* EX */ + { -1, 0, 0, 0, 0, 0, 0, 0 } /* PD */ +}; +EXPORT_SYMBOL_GPL(dlm_lvb_operations); + +#define modes_compat(gr, rq) \ + __dlm_compat_matrix[(gr)->lkb_grmode + 1][(rq)->lkb_rqmode + 1] + +int dlm_modes_compat(int mode1, int mode2) +{ + return __dlm_compat_matrix[mode1 + 1][mode2 + 1]; +} + +/* + * Compatibility matrix for conversions with QUECVT set. + * Granted mode is the row; requested mode is the column. + * Usage: matrix[grmode+1][rqmode+1] + */ + +static const int __quecvt_compat_matrix[8][8] = { + /* UN NL CR CW PR PW EX PD */ + {0, 0, 0, 0, 0, 0, 0, 0}, /* UN */ + {0, 0, 1, 1, 1, 1, 1, 0}, /* NL */ + {0, 0, 0, 1, 1, 1, 1, 0}, /* CR */ + {0, 0, 0, 0, 1, 1, 1, 0}, /* CW */ + {0, 0, 0, 1, 0, 1, 1, 0}, /* PR */ + {0, 0, 0, 0, 0, 0, 1, 0}, /* PW */ + {0, 0, 0, 0, 0, 0, 0, 0}, /* EX */ + {0, 0, 0, 0, 0, 0, 0, 0} /* PD */ +}; + +static void dlm_print_lkb(struct dlm_lkb *lkb) +{ + printk(KERN_ERR "lkb: nodeid %d id %x remid %x exflags %x flags %x\n" + " status %d rqmode %d grmode %d wait_type %d ast_type %d\n", + lkb->lkb_nodeid, lkb->lkb_id, lkb->lkb_remid, lkb->lkb_exflags, + lkb->lkb_flags, lkb->lkb_status, lkb->lkb_rqmode, + lkb->lkb_grmode, lkb->lkb_wait_type, lkb->lkb_ast_type); +} + +void dlm_print_rsb(struct dlm_rsb *r) +{ + printk(KERN_ERR "rsb: nodeid %d flags %lx first %x rlc %d name %s\n", + r->res_nodeid, r->res_flags, r->res_first_lkid, + r->res_recover_locks_count, r->res_name); +} + +/* Threads cannot use the lockspace while it's being recovered */ + +static inline void lock_recovery(struct dlm_ls *ls) +{ + down_read(&ls->ls_in_recovery); +} + +static inline void unlock_recovery(struct dlm_ls *ls) +{ + up_read(&ls->ls_in_recovery); +} + +static inline int lock_recovery_try(struct dlm_ls *ls) +{ + return down_read_trylock(&ls->ls_in_recovery); +} + +static inline int can_be_queued(struct dlm_lkb *lkb) +{ + return !(lkb->lkb_exflags & DLM_LKF_NOQUEUE); +} + +static inline int force_blocking_asts(struct dlm_lkb *lkb) +{ + return (lkb->lkb_exflags & DLM_LKF_NOQUEUEBAST); +} + +static inline int is_demoted(struct dlm_lkb *lkb) +{ + return (lkb->lkb_sbflags & DLM_SBF_DEMOTED); +} + +static inline int is_remote(struct dlm_rsb *r) +{ + DLM_ASSERT(r->res_nodeid >= 0, dlm_print_rsb(r);); + return !!r->res_nodeid; +} + +static inline int is_process_copy(struct dlm_lkb *lkb) +{ + return (lkb->lkb_nodeid && !(lkb->lkb_flags & DLM_IFL_MSTCPY)); +} + +static inline int is_master_copy(struct dlm_lkb *lkb) +{ + if (lkb->lkb_flags & DLM_IFL_MSTCPY) + DLM_ASSERT(lkb->lkb_nodeid, dlm_print_lkb(lkb);); + return (lkb->lkb_flags & DLM_IFL_MSTCPY) ? TRUE : FALSE; +} + +static inline int middle_conversion(struct dlm_lkb *lkb) +{ + if ((lkb->lkb_grmode==DLM_LOCK_PR && lkb->lkb_rqmode==DLM_LOCK_CW) || + (lkb->lkb_rqmode==DLM_LOCK_PR && lkb->lkb_grmode==DLM_LOCK_CW)) + return TRUE; + return FALSE; +} + +static inline int down_conversion(struct dlm_lkb *lkb) +{ + return (!middle_conversion(lkb) && lkb->lkb_rqmode < lkb->lkb_grmode); +} + +static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) +{ + if (is_master_copy(lkb)) + return; + + DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb);); + + lkb->lkb_lksb->sb_status = rv; + lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags; + + dlm_add_ast(lkb, AST_COMP); +} + +static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode) +{ + if (is_master_copy(lkb)) + send_bast(r, lkb, rqmode); + else { + lkb->lkb_bastmode = rqmode; + dlm_add_ast(lkb, AST_BAST); + } +} + +/* + * Basic operations on rsb's and lkb's + */ + +static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len) +{ + struct dlm_rsb *r; + + r = allocate_rsb(ls, len); + if (!r) + return NULL; + + r->res_ls = ls; + r->res_length = len; + memcpy(r->res_name, name, len); + init_MUTEX(&r->res_sem); + + INIT_LIST_HEAD(&r->res_lookup); + INIT_LIST_HEAD(&r->res_grantqueue); + INIT_LIST_HEAD(&r->res_convertqueue); + INIT_LIST_HEAD(&r->res_waitqueue); + INIT_LIST_HEAD(&r->res_root_list); + INIT_LIST_HEAD(&r->res_recover_list); + + return r; +} + +static int search_rsb_list(struct list_head *head, char *name, int len, + unsigned int flags, struct dlm_rsb **r_ret) +{ + struct dlm_rsb *r; + int error = 0; + + list_for_each_entry(r, head, res_hashchain) { + if (len == r->res_length && !memcmp(name, r->res_name, len)) + goto found; + } + return -ENOENT; + + found: + if (r->res_nodeid && (flags & R_MASTER)) + error = -ENOTBLK; + *r_ret = r; + return error; +} + +static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b, + unsigned int flags, struct dlm_rsb **r_ret) +{ + struct dlm_rsb *r; + int error; + + error = search_rsb_list(&ls->ls_rsbtbl[b].list, name, len, flags, &r); + if (!error) { + kref_get(&r->res_ref); + goto out; + } + error = search_rsb_list(&ls->ls_rsbtbl[b].toss, name, len, flags, &r); + if (error) + goto out; + + list_move(&r->res_hashchain, &ls->ls_rsbtbl[b].list); + + if (dlm_no_directory(ls)) + goto out; + + if (r->res_nodeid == -1) { + rsb_clear_flag(r, RSB_MASTER_UNCERTAIN); + r->res_first_lkid = 0; + } else if (r->res_nodeid > 0) { + rsb_set_flag(r, RSB_MASTER_UNCERTAIN); + r->res_first_lkid = 0; + } else { + DLM_ASSERT(r->res_nodeid == 0, dlm_print_rsb(r);); + DLM_ASSERT(!rsb_flag(r, RSB_MASTER_UNCERTAIN),); + } + out: + *r_ret = r; + return error; +} + +static int search_rsb(struct dlm_ls *ls, char *name, int len, int b, + unsigned int flags, struct dlm_rsb **r_ret) +{ + int error; + write_lock(&ls->ls_rsbtbl[b].lock); + error = _search_rsb(ls, name, len, b, flags, r_ret); + write_unlock(&ls->ls_rsbtbl[b].lock); + return error; +} + +/* + * Find rsb in rsbtbl and potentially create/add one + * + * Delaying the release of rsb's has a similar benefit to applications keeping + * NL locks on an rsb, but without the guarantee that the cached master value + * will still be valid when the rsb is reused. Apps aren't always smart enough + * to keep NL locks on an rsb that they may lock again shortly; this can lead + * to excessive master lookups and removals if we don't delay the release. + * + * Searching for an rsb means looking through both the normal list and toss + * list. When found on the toss list the rsb is moved to the normal list with + * ref count of 1; when found on normal list the ref count is incremented. + */ + +static int find_rsb(struct dlm_ls *ls, char *name, int namelen, + unsigned int flags, struct dlm_rsb **r_ret) +{ + struct dlm_rsb *r, *tmp; + uint32_t hash, bucket; + int error = 0; + + if (dlm_no_directory(ls)) + flags |= R_CREATE; + + hash = jhash(name, namelen, 0); + bucket = hash & (ls->ls_rsbtbl_size - 1); + + error = search_rsb(ls, name, namelen, bucket, flags, &r); + if (!error) + goto out; + + if (error == -ENOENT && !(flags & R_CREATE)) + goto out; + + /* the rsb was found but wasn't a master copy */ + if (error == -ENOTBLK) + goto out; + + error = -ENOMEM; + r = create_rsb(ls, name, namelen); + if (!r) + goto out; + + r->res_hash = hash; + r->res_bucket = bucket; + r->res_nodeid = -1; + kref_init(&r->res_ref); + + /* With no directory, the master can be set immediately */ + if (dlm_no_directory(ls)) { + int nodeid = dlm_dir_nodeid(r); + if (nodeid == dlm_our_nodeid()) + nodeid = 0; + r->res_nodeid = nodeid; + } + + write_lock(&ls->ls_rsbtbl[bucket].lock); + error = _search_rsb(ls, name, namelen, bucket, 0, &tmp); + if (!error) { + write_unlock(&ls->ls_rsbtbl[bucket].lock); + free_rsb(r); + r = tmp; + goto out; + } + list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list); + write_unlock(&ls->ls_rsbtbl[bucket].lock); + error = 0; + out: + *r_ret = r; + return error; +} + +int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen, + unsigned int flags, struct dlm_rsb **r_ret) +{ + return find_rsb(ls, name, namelen, flags, r_ret); +} + +/* This is only called to add a reference when the code already holds + a valid reference to the rsb, so there's no need for locking. */ + +static inline void hold_rsb(struct dlm_rsb *r) +{ + kref_get(&r->res_ref); +} + +void dlm_hold_rsb(struct dlm_rsb *r) +{ + hold_rsb(r); +} + +static void toss_rsb(struct kref *kref) +{ + struct dlm_rsb *r = container_of(kref, struct dlm_rsb, res_ref); + struct dlm_ls *ls = r->res_ls; + + DLM_ASSERT(list_empty(&r->res_root_list), dlm_print_rsb(r);); + kref_init(&r->res_ref); + list_move(&r->res_hashchain, &ls->ls_rsbtbl[r->res_bucket].toss); + r->res_toss_time = jiffies; + if (r->res_lvbptr) { + free_lvb(r->res_lvbptr); + r->res_lvbptr = NULL; + } +} + +/* When all references to the rsb are gone it's transfered to + the tossed list for later disposal. */ + +static void put_rsb(struct dlm_rsb *r) +{ + struct dlm_ls *ls = r->res_ls; + uint32_t bucket = r->res_bucket; + + write_lock(&ls->ls_rsbtbl[bucket].lock); + kref_put(&r->res_ref, toss_rsb); + write_unlock(&ls->ls_rsbtbl[bucket].lock); +} + +void dlm_put_rsb(struct dlm_rsb *r) +{ + put_rsb(r); +} + +/* See comment for unhold_lkb */ + +static void unhold_rsb(struct dlm_rsb *r) +{ + int rv; + rv = kref_put(&r->res_ref, toss_rsb); + DLM_ASSERT(!rv, dlm_print_rsb(r);); +} + +static void kill_rsb(struct kref *kref) +{ + struct dlm_rsb *r = container_of(kref, struct dlm_rsb, res_ref); + + /* All work is done after the return from kref_put() so we + can release the write_lock before the remove and free. */ + + DLM_ASSERT(list_empty(&r->res_lookup),); + DLM_ASSERT(list_empty(&r->res_grantqueue),); + DLM_ASSERT(list_empty(&r->res_convertqueue),); + DLM_ASSERT(list_empty(&r->res_waitqueue),); + DLM_ASSERT(list_empty(&r->res_root_list),); + DLM_ASSERT(list_empty(&r->res_recover_list),); +} + +/* Attaching/detaching lkb's from rsb's is for rsb reference counting. + The rsb must exist as long as any lkb's for it do. */ + +static void attach_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + hold_rsb(r); + lkb->lkb_resource = r; +} + +static void detach_lkb(struct dlm_lkb *lkb) +{ + if (lkb->lkb_resource) { + put_rsb(lkb->lkb_resource); + lkb->lkb_resource = NULL; + } +} + +static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) +{ + struct dlm_lkb *lkb, *tmp; + uint32_t lkid = 0; + uint16_t bucket; + + lkb = allocate_lkb(ls); + if (!lkb) + return -ENOMEM; + + lkb->lkb_nodeid = -1; + lkb->lkb_grmode = DLM_LOCK_IV; + kref_init(&lkb->lkb_ref); + + get_random_bytes(&bucket, sizeof(bucket)); + bucket &= (ls->ls_lkbtbl_size - 1); + + write_lock(&ls->ls_lkbtbl[bucket].lock); + + /* counter can roll over so we must verify lkid is not in use */ + + while (lkid == 0) { + lkid = bucket | (ls->ls_lkbtbl[bucket].counter++ << 16); + + list_for_each_entry(tmp, &ls->ls_lkbtbl[bucket].list, + lkb_idtbl_list) { + if (tmp->lkb_id != lkid) + continue; + lkid = 0; + break; + } + } + + lkb->lkb_id = lkid; + list_add(&lkb->lkb_idtbl_list, &ls->ls_lkbtbl[bucket].list); + write_unlock(&ls->ls_lkbtbl[bucket].lock); + + *lkb_ret = lkb; + return 0; +} + +static struct dlm_lkb *__find_lkb(struct dlm_ls *ls, uint32_t lkid) +{ + uint16_t bucket = lkid & 0xFFFF; + struct dlm_lkb *lkb; + + list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list) { + if (lkb->lkb_id == lkid) + return lkb; + } + return NULL; +} + +static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret) +{ + struct dlm_lkb *lkb; + uint16_t bucket = lkid & 0xFFFF; + + if (bucket >= ls->ls_lkbtbl_size) + return -EBADSLT; + + read_lock(&ls->ls_lkbtbl[bucket].lock); + lkb = __find_lkb(ls, lkid); + if (lkb) + kref_get(&lkb->lkb_ref); + read_unlock(&ls->ls_lkbtbl[bucket].lock); + + *lkb_ret = lkb; + return lkb ? 0 : -ENOENT; +} + +static void kill_lkb(struct kref *kref) +{ + struct dlm_lkb *lkb = container_of(kref, struct dlm_lkb, lkb_ref); + + /* All work is done after the return from kref_put() so we + can release the write_lock before the detach_lkb */ + + DLM_ASSERT(!lkb->lkb_status, dlm_print_lkb(lkb);); +} + +static int put_lkb(struct dlm_lkb *lkb) +{ + struct dlm_ls *ls = lkb->lkb_resource->res_ls; + uint16_t bucket = lkb->lkb_id & 0xFFFF; + + write_lock(&ls->ls_lkbtbl[bucket].lock); + if (kref_put(&lkb->lkb_ref, kill_lkb)) { + list_del(&lkb->lkb_idtbl_list); + write_unlock(&ls->ls_lkbtbl[bucket].lock); + + detach_lkb(lkb); + + /* for local/process lkbs, lvbptr points to caller's lksb */ + if (lkb->lkb_lvbptr && is_master_copy(lkb)) + free_lvb(lkb->lkb_lvbptr); + if (lkb->lkb_range) + free_range(lkb->lkb_range); + free_lkb(lkb); + return 1; + } else { + write_unlock(&ls->ls_lkbtbl[bucket].lock); + return 0; + } +} + +int dlm_put_lkb(struct dlm_lkb *lkb) +{ + return put_lkb(lkb); +} + +/* This is only called to add a reference when the code already holds + a valid reference to the lkb, so there's no need for locking. */ + +static inline void hold_lkb(struct dlm_lkb *lkb) +{ + kref_get(&lkb->lkb_ref); +} + +/* This is called when we need to remove a reference and are certain + it's not the last ref. e.g. del_lkb is always called between a + find_lkb/put_lkb and is always the inverse of a previous add_lkb. + put_lkb would work fine, but would involve unnecessary locking */ + +static inline void unhold_lkb(struct dlm_lkb *lkb) +{ + int rv; + rv = kref_put(&lkb->lkb_ref, kill_lkb); + DLM_ASSERT(!rv, dlm_print_lkb(lkb);); +} + +static void lkb_add_ordered(struct list_head *new, struct list_head *head, + int mode) +{ + struct dlm_lkb *lkb = NULL; + + list_for_each_entry(lkb, head, lkb_statequeue) + if (lkb->lkb_rqmode < mode) + break; + + if (!lkb) + list_add_tail(new, head); + else + __list_add(new, lkb->lkb_statequeue.prev, &lkb->lkb_statequeue); +} + +/* add/remove lkb to rsb's grant/convert/wait queue */ + +static void add_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, int status) +{ + kref_get(&lkb->lkb_ref); + + DLM_ASSERT(!lkb->lkb_status, dlm_print_lkb(lkb);); + + lkb->lkb_status = status; + + switch (status) { + case DLM_LKSTS_WAITING: + if (lkb->lkb_exflags & DLM_LKF_HEADQUE) + list_add(&lkb->lkb_statequeue, &r->res_waitqueue); + else + list_add_tail(&lkb->lkb_statequeue, &r->res_waitqueue); + break; + case DLM_LKSTS_GRANTED: + /* convention says granted locks kept in order of grmode */ + lkb_add_ordered(&lkb->lkb_statequeue, &r->res_grantqueue, + lkb->lkb_grmode); + break; + case DLM_LKSTS_CONVERT: + if (lkb->lkb_exflags & DLM_LKF_HEADQUE) + list_add(&lkb->lkb_statequeue, &r->res_convertqueue); + else + list_add_tail(&lkb->lkb_statequeue, + &r->res_convertqueue); + break; + default: + DLM_ASSERT(0, dlm_print_lkb(lkb); printk("sts=%d\n", status);); + } +} + +static void del_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + lkb->lkb_status = 0; + list_del(&lkb->lkb_statequeue); + unhold_lkb(lkb); +} + +static void move_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, int sts) +{ + hold_lkb(lkb); + del_lkb(r, lkb); + add_lkb(r, lkb, sts); + unhold_lkb(lkb); +} + +/* add/remove lkb from global waiters list of lkb's waiting for + a reply from a remote node */ + +static void add_to_waiters(struct dlm_lkb *lkb, int mstype) +{ + struct dlm_ls *ls = lkb->lkb_resource->res_ls; + + down(&ls->ls_waiters_sem); + if (lkb->lkb_wait_type) { + log_print("add_to_waiters error %d", lkb->lkb_wait_type); + goto out; + } + lkb->lkb_wait_type = mstype; + kref_get(&lkb->lkb_ref); + list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); + out: + up(&ls->ls_waiters_sem); +} + +static int _remove_from_waiters(struct dlm_lkb *lkb) +{ + int error = 0; + + if (!lkb->lkb_wait_type) { + log_print("remove_from_waiters error"); + error = -EINVAL; + goto out; + } + lkb->lkb_wait_type = 0; + list_del(&lkb->lkb_wait_reply); + unhold_lkb(lkb); + out: + return error; +} + +static int remove_from_waiters(struct dlm_lkb *lkb) +{ + struct dlm_ls *ls = lkb->lkb_resource->res_ls; + int error; + + down(&ls->ls_waiters_sem); + error = _remove_from_waiters(lkb); + up(&ls->ls_waiters_sem); + return error; +} + +static void dir_remove(struct dlm_rsb *r) +{ + int to_nodeid; + + if (dlm_no_directory(r->res_ls)) + return; + + to_nodeid = dlm_dir_nodeid(r); + if (to_nodeid != dlm_our_nodeid()) + send_remove(r); + else + dlm_dir_remove_entry(r->res_ls, to_nodeid, + r->res_name, r->res_length); +} + +/* FIXME: shouldn't this be able to exit as soon as one non-due rsb is + found since they are in order of newest to oldest? */ + +static int shrink_bucket(struct dlm_ls *ls, int b) +{ + struct dlm_rsb *r; + int count = 0, found; + + for (;;) { + found = FALSE; + write_lock(&ls->ls_rsbtbl[b].lock); + list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss, + res_hashchain) { + if (!time_after_eq(jiffies, r->res_toss_time + + dlm_config.toss_secs * HZ)) + continue; + found = TRUE; + break; + } + + if (!found) { + write_unlock(&ls->ls_rsbtbl[b].lock); + break; + } + + if (kref_put(&r->res_ref, kill_rsb)) { + list_del(&r->res_hashchain); + write_unlock(&ls->ls_rsbtbl[b].lock); + + if (is_master(r)) + dir_remove(r); + free_rsb(r); + count++; + } else { + write_unlock(&ls->ls_rsbtbl[b].lock); + log_error(ls, "tossed rsb in use %s", r->res_name); + } + } + + return count; +} + +void dlm_scan_rsbs(struct dlm_ls *ls) +{ + int i; + + if (dlm_locking_stopped(ls)) + return; + + for (i = 0; i < ls->ls_rsbtbl_size; i++) { + shrink_bucket(ls, i); + cond_resched(); + } +} + +/* lkb is master or local copy */ + +static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + int b, len = r->res_ls->ls_lvblen; + + /* b=1 lvb returned to caller + b=0 lvb written to rsb or invalidated + b=-1 do nothing */ + + b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1]; + + if (b == 1) { + if (!lkb->lkb_lvbptr) + return; + + if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) + return; + + if (!r->res_lvbptr) + return; + + memcpy(lkb->lkb_lvbptr, r->res_lvbptr, len); + lkb->lkb_lvbseq = r->res_lvbseq; + + } else if (b == 0) { + if (lkb->lkb_exflags & DLM_LKF_IVVALBLK) { + rsb_set_flag(r, RSB_VALNOTVALID); + return; + } + + if (!lkb->lkb_lvbptr) + return; + + if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) + return; + + if (!r->res_lvbptr) + r->res_lvbptr = allocate_lvb(r->res_ls); + + if (!r->res_lvbptr) + return; + + memcpy(r->res_lvbptr, lkb->lkb_lvbptr, len); + r->res_lvbseq++; + lkb->lkb_lvbseq = r->res_lvbseq; + rsb_clear_flag(r, RSB_VALNOTVALID); + } + + if (rsb_flag(r, RSB_VALNOTVALID)) + lkb->lkb_sbflags |= DLM_SBF_VALNOTVALID; +} + +static void set_lvb_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + if (lkb->lkb_grmode < DLM_LOCK_PW) + return; + + if (lkb->lkb_exflags & DLM_LKF_IVVALBLK) { + rsb_set_flag(r, RSB_VALNOTVALID); + return; + } + + if (!lkb->lkb_lvbptr) + return; + + if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) + return; + + if (!r->res_lvbptr) + r->res_lvbptr = allocate_lvb(r->res_ls); + + if (!r->res_lvbptr) + return; + + memcpy(r->res_lvbptr, lkb->lkb_lvbptr, r->res_ls->ls_lvblen); + r->res_lvbseq++; + rsb_clear_flag(r, RSB_VALNOTVALID); +} + +/* lkb is process copy (pc) */ + +static void set_lvb_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb, + struct dlm_message *ms) +{ + int b; + + if (!lkb->lkb_lvbptr) + return; + + if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) + return; + + b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1]; + if (b == 1) { + int len = receive_extralen(ms); + memcpy(lkb->lkb_lvbptr, ms->m_extra, len); + lkb->lkb_lvbseq = ms->m_lvbseq; + } +} + +/* Manipulate lkb's on rsb's convert/granted/waiting queues + remove_lock -- used for unlock, removes lkb from granted + revert_lock -- used for cancel, moves lkb from convert to granted + grant_lock -- used for request and convert, adds lkb to granted or + moves lkb from convert or waiting to granted + + Each of these is used for master or local copy lkb's. There is + also a _pc() variation used to make the corresponding change on + a process copy (pc) lkb. */ + +static void _remove_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + del_lkb(r, lkb); + lkb->lkb_grmode = DLM_LOCK_IV; + /* this unhold undoes the original ref from create_lkb() + so this leads to the lkb being freed */ + unhold_lkb(lkb); +} + +static void remove_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + set_lvb_unlock(r, lkb); + _remove_lock(r, lkb); +} + +static void remove_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + _remove_lock(r, lkb); +} + +static void revert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + lkb->lkb_rqmode = DLM_LOCK_IV; + + switch (lkb->lkb_status) { + case DLM_LKSTS_CONVERT: + move_lkb(r, lkb, DLM_LKSTS_GRANTED); + break; + case DLM_LKSTS_WAITING: + del_lkb(r, lkb); + lkb->lkb_grmode = DLM_LOCK_IV; + /* this unhold undoes the original ref from create_lkb() + so this leads to the lkb being freed */ + unhold_lkb(lkb); + break; + default: + log_print("invalid status for revert %d", lkb->lkb_status); + } +} + +static void revert_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + revert_lock(r, lkb); +} + +static void _grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + if (lkb->lkb_grmode != lkb->lkb_rqmode) { + lkb->lkb_grmode = lkb->lkb_rqmode; + if (lkb->lkb_status) + move_lkb(r, lkb, DLM_LKSTS_GRANTED); + else + add_lkb(r, lkb, DLM_LKSTS_GRANTED); + } + + lkb->lkb_rqmode = DLM_LOCK_IV; + + if (lkb->lkb_range) { + lkb->lkb_range[GR_RANGE_START] = lkb->lkb_range[RQ_RANGE_START]; + lkb->lkb_range[GR_RANGE_END] = lkb->lkb_range[RQ_RANGE_END]; + } +} + +static void grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + set_lvb_lock(r, lkb); + _grant_lock(r, lkb); + lkb->lkb_highbast = 0; +} + +static void grant_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb, + struct dlm_message *ms) +{ + set_lvb_lock_pc(r, lkb, ms); + _grant_lock(r, lkb); +} + +/* called by grant_pending_locks() which means an async grant message must + be sent to the requesting node in addition to granting the lock if the + lkb belongs to a remote node. */ + +static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + grant_lock(r, lkb); + if (is_master_copy(lkb)) + send_grant(r, lkb); + else + queue_cast(r, lkb, 0); +} + +static inline int first_in_list(struct dlm_lkb *lkb, struct list_head *head) +{ + struct dlm_lkb *first = list_entry(head->next, struct dlm_lkb, + lkb_statequeue); + if (lkb->lkb_id == first->lkb_id) + return TRUE; + + return FALSE; +} + +/* Return 1 if the locks' ranges overlap. If the lkb has no range then it is + assumed to cover 0-ffffffff.ffffffff */ + +static inline int ranges_overlap(struct dlm_lkb *lkb1, struct dlm_lkb *lkb2) +{ + if (!lkb1->lkb_range || !lkb2->lkb_range) + return TRUE; + + if (lkb1->lkb_range[RQ_RANGE_END] < lkb2->lkb_range[GR_RANGE_START] || + lkb1->lkb_range[RQ_RANGE_START] > lkb2->lkb_range[GR_RANGE_END]) + return FALSE; + + return TRUE; +} + +/* Check if the given lkb conflicts with another lkb on the queue. */ + +static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb) +{ + struct dlm_lkb *this; + + list_for_each_entry(this, head, lkb_statequeue) { + if (this == lkb) + continue; + if (ranges_overlap(lkb, this) && !modes_compat(this, lkb)) + return TRUE; + } + return FALSE; +} + +/* + * "A conversion deadlock arises with a pair of lock requests in the converting + * queue for one resource. The granted mode of each lock blocks the requested + * mode of the other lock." + * + * Part 2: if the granted mode of lkb is preventing the first lkb in the + * convert queue from being granted, then demote lkb (set grmode to NL). + * This second form requires that we check for conv-deadlk even when + * now == 0 in _can_be_granted(). + * + * Example: + * Granted Queue: empty + * Convert Queue: NL->EX (first lock) + * PR->EX (second lock) + * + * The first lock can't be granted because of the granted mode of the second + * lock and the second lock can't be granted because it's not first in the + * list. We demote the granted mode of the second lock (the lkb passed to this + * function). + * + * After the resolution, the "grant pending" function needs to go back and try + * to grant locks on the convert queue again since the first lock can now be + * granted. + */ + +static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb) +{ + struct dlm_lkb *this, *first = NULL, *self = NULL; + + list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) { + if (!first) + first = this; + if (this == lkb) { + self = lkb; + continue; + } + + if (!ranges_overlap(lkb, this)) + continue; + + if (!modes_compat(this, lkb) && !modes_compat(lkb, this)) + return TRUE; + } + + /* if lkb is on the convert queue and is preventing the first + from being granted, then there's deadlock and we demote lkb. + multiple converting locks may need to do this before the first + converting lock can be granted. */ + + if (self && self != first) { + if (!modes_compat(lkb, first) && + !queue_conflict(&rsb->res_grantqueue, first)) + return TRUE; + } + + return FALSE; +} + +/* + * Return 1 if the lock can be granted, 0 otherwise. + * Also detect and resolve conversion deadlocks. + * + * lkb is the lock to be granted + * + * now is 1 if the function is being called in the context of the + * immediate request, it is 0 if called later, after the lock has been + * queued. + * + * References are from chapter 6 of "VAXcluster Principles" by Roy Davis + */ + +static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) +{ + int8_t conv = (lkb->lkb_grmode != DLM_LOCK_IV); + + /* + * 6-10: Version 5.4 introduced an option to address the phenomenon of + * a new request for a NL mode lock being blocked. + * + * 6-11: If the optional EXPEDITE flag is used with the new NL mode + * request, then it would be granted. In essence, the use of this flag + * tells the Lock Manager to expedite theis request by not considering + * what may be in the CONVERTING or WAITING queues... As of this + * writing, the EXPEDITE flag can be used only with new requests for NL + * mode locks. This flag is not valid for conversion requests. + * + * A shortcut. Earlier checks return an error if EXPEDITE is used in a + * conversion or used with a non-NL requested mode. We also know an + * EXPEDITE request is always granted immediately, so now must always + * be 1. The full condition to grant an expedite request: (now && + * !conv && lkb->rqmode == DLM_LOCK_NL && (flags & EXPEDITE)) can + * therefore be shortened to just checking the flag. + */ + + if (lkb->lkb_exflags & DLM_LKF_EXPEDITE) + return TRUE; + + /* + * A shortcut. Without this, !queue_conflict(grantqueue, lkb) would be + * added to the remaining conditions. + */ + + if (queue_conflict(&r->res_grantqueue, lkb)) + goto out; + + /* + * 6-3: By default, a conversion request is immediately granted if the + * requested mode is compatible with the modes of all other granted + * locks + */ + + if (queue_conflict(&r->res_convertqueue, lkb)) + goto out; + + /* + * 6-5: But the default algorithm for deciding whether to grant or + * queue conversion requests does not by itself guarantee that such + * requests are serviced on a "first come first serve" basis. This, in + * turn, can lead to a phenomenon known as "indefinate postponement". + * + * 6-7: This issue is dealt with by using the optional QUECVT flag with + * the system service employed to request a lock conversion. This flag + * forces certain conversion requests to be queued, even if they are + * compatible with the granted modes of other locks on the same + * resource. Thus, the use of this flag results in conversion requests + * being ordered on a "first come first servce" basis. + * + * DCT: This condition is all about new conversions being able to occur + * "in place" while the lock remains on the granted queue (assuming + * nothing else conflicts.) IOW if QUECVT isn't set, a conversion + * doesn't _have_ to go onto the convert queue where it's processed in + * order. The "now" variable is necessary to distinguish converts + * being received and processed for the first time now, because once a + * convert is moved to the conversion queue the condition below applies + * requiring fifo granting. + */ + + if (now && conv && !(lkb->lkb_exflags & DLM_LKF_QUECVT)) + return TRUE; + + /* + * When using range locks the NOORDER flag is set to avoid the standard + * vms rules on grant order. + */ + + if (lkb->lkb_exflags & DLM_LKF_NOORDER) + return TRUE; + + /* + * 6-3: Once in that queue [CONVERTING], a conversion request cannot be + * granted until all other conversion requests ahead of it are granted + * and/or canceled. + */ + + if (!now && conv && first_in_list(lkb, &r->res_convertqueue)) + return TRUE; + + /* + * 6-4: By default, a new request is immediately granted only if all + * three of the following conditions are satisfied when the request is + * issued: + * - The queue of ungranted conversion requests for the resource is + * empty. + * - The queue of ungranted new requests for the resource is empty. + * - The mode of the new request is compatible with the most + * restrictive mode of all granted locks on the resource. + */ + + if (now && !conv && list_empty(&r->res_convertqueue) && + list_empty(&r->res_waitqueue)) + return TRUE; + + /* + * 6-4: Once a lock request is in the queue of ungranted new requests, + * it cannot be granted until the queue of ungranted conversion + * requests is empty, all ungranted new requests ahead of it are + * granted and/or canceled, and it is compatible with the granted mode + * of the most restrictive lock granted on the resource. + */ + + if (!now && !conv && list_empty(&r->res_convertqueue) && + first_in_list(lkb, &r->res_waitqueue)) + return TRUE; + + out: + /* + * The following, enabled by CONVDEADLK, departs from VMS. + */ + + if (conv && (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) && + conversion_deadlock_detect(r, lkb)) { + lkb->lkb_grmode = DLM_LOCK_NL; + lkb->lkb_sbflags |= DLM_SBF_DEMOTED; + } + + return FALSE; +} + +/* + * The ALTPR and ALTCW flags aren't traditional lock manager flags, but are a + * simple way to provide a big optimization to applications that can use them. + */ + +static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) +{ + uint32_t flags = lkb->lkb_exflags; + int rv; + int8_t alt = 0, rqmode = lkb->lkb_rqmode; + + rv = _can_be_granted(r, lkb, now); + if (rv) + goto out; + + if (lkb->lkb_sbflags & DLM_SBF_DEMOTED) + goto out; + + if (rqmode != DLM_LOCK_PR && flags & DLM_LKF_ALTPR) + alt = DLM_LOCK_PR; + else if (rqmode != DLM_LOCK_CW && flags & DLM_LKF_ALTCW) + alt = DLM_LOCK_CW; + + if (alt) { + lkb->lkb_rqmode = alt; + rv = _can_be_granted(r, lkb, now); + if (rv) + lkb->lkb_sbflags |= DLM_SBF_ALTMODE; + else + lkb->lkb_rqmode = rqmode; + } + out: + return rv; +} + +static int grant_pending_convert(struct dlm_rsb *r, int high) +{ + struct dlm_lkb *lkb, *s; + int hi, demoted, quit, grant_restart, demote_restart; + + quit = 0; + restart: + grant_restart = 0; + demote_restart = 0; + hi = DLM_LOCK_IV; + + list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) { + demoted = is_demoted(lkb); + if (can_be_granted(r, lkb, FALSE)) { + grant_lock_pending(r, lkb); + grant_restart = 1; + } else { + hi = max_t(int, lkb->lkb_rqmode, hi); + if (!demoted && is_demoted(lkb)) + demote_restart = 1; + } + } + + if (grant_restart) + goto restart; + if (demote_restart && !quit) { + quit = 1; + goto restart; + } + + return max_t(int, high, hi); +} + +static int grant_pending_wait(struct dlm_rsb *r, int high) +{ + struct dlm_lkb *lkb, *s; + + list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) { + if (can_be_granted(r, lkb, FALSE)) + grant_lock_pending(r, lkb); + else + high = max_t(int, lkb->lkb_rqmode, high); + } + + return high; +} + +static void grant_pending_locks(struct dlm_rsb *r) +{ + struct dlm_lkb *lkb, *s; + int high = DLM_LOCK_IV; + + DLM_ASSERT(is_master(r), dlm_print_rsb(r);); + + high = grant_pending_convert(r, high); + high = grant_pending_wait(r, high); + + if (high == DLM_LOCK_IV) + return; + + /* + * If there are locks left on the wait/convert queue then send blocking + * ASTs to granted locks based on the largest requested mode (high) + * found above. This can generate spurious blocking ASTs for range + * locks. FIXME: highbast < high comparison not valid for PR/CW. + */ + + list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) { + if (lkb->lkb_bastaddr && (lkb->lkb_highbast < high) && + !__dlm_compat_matrix[lkb->lkb_grmode+1][high+1]) { + queue_bast(r, lkb, high); + lkb->lkb_highbast = high; + } + } +} + +static void send_bast_queue(struct dlm_rsb *r, struct list_head *head, + struct dlm_lkb *lkb) +{ + struct dlm_lkb *gr; + + list_for_each_entry(gr, head, lkb_statequeue) { + if (gr->lkb_bastaddr && + gr->lkb_highbast < lkb->lkb_rqmode && + ranges_overlap(lkb, gr) && !modes_compat(gr, lkb)) { + queue_bast(r, gr, lkb->lkb_rqmode); + gr->lkb_highbast = lkb->lkb_rqmode; + } + } +} + +static void send_blocking_asts(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + send_bast_queue(r, &r->res_grantqueue, lkb); +} + +static void send_blocking_asts_all(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + send_bast_queue(r, &r->res_grantqueue, lkb); + send_bast_queue(r, &r->res_convertqueue, lkb); +} + +/* set_master(r, lkb) -- set the master nodeid of a resource + + The purpose of this function is to set the nodeid field in the given + lkb using the nodeid field in the given rsb. If the rsb's nodeid is + known, it can just be copied to the lkb and the function will return + 0. If the rsb's nodeid is _not_ known, it needs to be looked up + before it can be copied to the lkb. + + When the rsb nodeid is being looked up remotely, the initial lkb + causing the lookup is kept on the ls_waiters list waiting for the + lookup reply. Other lkb's waiting for the same rsb lookup are kept + on the rsb's res_lookup list until the master is verified. + + Return values: + 0: nodeid is set in rsb/lkb and the caller should go ahead and use it + 1: the rsb master is not available and the lkb has been placed on + a wait queue +*/ + +static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + struct dlm_ls *ls = r->res_ls; + int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid(); + + if (rsb_flag(r, RSB_MASTER_UNCERTAIN)) { + rsb_clear_flag(r, RSB_MASTER_UNCERTAIN); + r->res_first_lkid = lkb->lkb_id; + lkb->lkb_nodeid = r->res_nodeid; + return 0; + } + + if (r->res_first_lkid && r->res_first_lkid != lkb->lkb_id) { + list_add_tail(&lkb->lkb_rsb_lookup, &r->res_lookup); + return 1; + } + + if (r->res_nodeid == 0) { + lkb->lkb_nodeid = 0; + return 0; + } + + if (r->res_nodeid > 0) { + lkb->lkb_nodeid = r->res_nodeid; + return 0; + } + + DLM_ASSERT(r->res_nodeid == -1, dlm_print_rsb(r);); + + dir_nodeid = dlm_dir_nodeid(r); + + if (dir_nodeid != our_nodeid) { + r->res_first_lkid = lkb->lkb_id; + send_lookup(r, lkb); + return 1; + } + + for (;;) { + /* It's possible for dlm_scand to remove an old rsb for + this same resource from the toss list, us to create + a new one, look up the master locally, and find it + already exists just before dlm_scand does the + dir_remove() on the previous rsb. */ + + error = dlm_dir_lookup(ls, our_nodeid, r->res_name, + r->res_length, &ret_nodeid); + if (!error) + break; + log_debug(ls, "dir_lookup error %d %s", error, r->res_name); + schedule(); + } + + if (ret_nodeid == our_nodeid) { + r->res_first_lkid = 0; + r->res_nodeid = 0; + lkb->lkb_nodeid = 0; + } else { + r->res_first_lkid = lkb->lkb_id; + r->res_nodeid = ret_nodeid; + lkb->lkb_nodeid = ret_nodeid; + } + return 0; +} + +static void process_lookup_list(struct dlm_rsb *r) +{ + struct dlm_lkb *lkb, *safe; + + list_for_each_entry_safe(lkb, safe, &r->res_lookup, lkb_rsb_lookup) { + list_del(&lkb->lkb_rsb_lookup); + _request_lock(r, lkb); + schedule(); + } +} + +/* confirm_master -- confirm (or deny) an rsb's master nodeid */ + +static void confirm_master(struct dlm_rsb *r, int error) +{ + struct dlm_lkb *lkb; + + if (!r->res_first_lkid) + return; + + switch (error) { + case 0: + case -EINPROGRESS: + r->res_first_lkid = 0; + process_lookup_list(r); + break; + + case -EAGAIN: + /* the remote master didn't queue our NOQUEUE request; + make a waiting lkb the first_lkid */ + + r->res_first_lkid = 0; + + if (!list_empty(&r->res_lookup)) { + lkb = list_entry(r->res_lookup.next, struct dlm_lkb, + lkb_rsb_lookup); + list_del(&lkb->lkb_rsb_lookup); + r->res_first_lkid = lkb->lkb_id; + _request_lock(r, lkb); + } else + r->res_nodeid = -1; + break; + + default: + log_error(r->res_ls, "confirm_master unknown error %d", error); + } +} + +static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags, + int namelen, uint32_t parent_lkid, void *ast, + void *astarg, void *bast, struct dlm_range *range, + struct dlm_args *args) +{ + int rv = -EINVAL; + + /* check for invalid arg usage */ + + if (mode < 0 || mode > DLM_LOCK_EX) + goto out; + + if (!(flags & DLM_LKF_CONVERT) && (namelen > DLM_RESNAME_MAXLEN)) + goto out; + + if (flags & DLM_LKF_CANCEL) + goto out; + + if (flags & DLM_LKF_QUECVT && !(flags & DLM_LKF_CONVERT)) + goto out; + + if (flags & DLM_LKF_CONVDEADLK && !(flags & DLM_LKF_CONVERT)) + goto out; + + if (flags & DLM_LKF_CONVDEADLK && flags & DLM_LKF_NOQUEUE) + goto out; + + if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_CONVERT) + goto out; + + if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_QUECVT) + goto out; + + if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_NOQUEUE) + goto out; + + if (flags & DLM_LKF_EXPEDITE && mode != DLM_LOCK_NL) + goto out; + + if (!ast || !lksb) + goto out; + + if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr) + goto out; + + /* parent/child locks not yet supported */ + if (parent_lkid) + goto out; + + if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid) + goto out; + + /* these args will be copied to the lkb in validate_lock_args, + it cannot be done now because when converting locks, fields in + an active lkb cannot be modified before locking the rsb */ + + args->flags = flags; + args->astaddr = ast; + args->astparam = (long) astarg; + args->bastaddr = bast; + args->mode = mode; + args->lksb = lksb; + args->range = range; + rv = 0; + out: + return rv; +} + +static int set_unlock_args(uint32_t flags, void *astarg, struct dlm_args *args) +{ + if (flags & ~(DLM_LKF_CANCEL | DLM_LKF_VALBLK | DLM_LKF_IVVALBLK | + DLM_LKF_FORCEUNLOCK)) + return -EINVAL; + + args->flags = flags; + args->astparam = (long) astarg; + return 0; +} + +static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_args *args) +{ + int rv = -EINVAL; + + if (args->flags & DLM_LKF_CONVERT) { + if (lkb->lkb_flags & DLM_IFL_MSTCPY) + goto out; + + if (args->flags & DLM_LKF_QUECVT && + !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1]) + goto out; + + rv = -EBUSY; + if (lkb->lkb_status != DLM_LKSTS_GRANTED) + goto out; + + if (lkb->lkb_wait_type) + goto out; + } + + lkb->lkb_exflags = args->flags; + lkb->lkb_sbflags = 0; + lkb->lkb_astaddr = args->astaddr; + lkb->lkb_astparam = args->astparam; + lkb->lkb_bastaddr = args->bastaddr; + lkb->lkb_rqmode = args->mode; + lkb->lkb_lksb = args->lksb; + lkb->lkb_lvbptr = args->lksb->sb_lvbptr; + lkb->lkb_ownpid = (int) current->pid; + + rv = 0; + if (!args->range) + goto out; + + if (!lkb->lkb_range) { + rv = -ENOMEM; + lkb->lkb_range = allocate_range(ls); + if (!lkb->lkb_range) + goto out; + /* This is needed for conversions that contain ranges + where the original lock didn't but it's harmless for + new locks too. */ + lkb->lkb_range[GR_RANGE_START] = 0LL; + lkb->lkb_range[GR_RANGE_END] = 0xffffffffffffffffULL; + } + + lkb->lkb_range[RQ_RANGE_START] = args->range->ra_start; + lkb->lkb_range[RQ_RANGE_END] = args->range->ra_end; + lkb->lkb_flags |= DLM_IFL_RANGE; + rv = 0; + out: + return rv; +} + +static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args) +{ + int rv = -EINVAL; + + if (lkb->lkb_flags & DLM_IFL_MSTCPY) + goto out; + + if (args->flags & DLM_LKF_FORCEUNLOCK) + goto out_ok; + + if (args->flags & DLM_LKF_CANCEL && + lkb->lkb_status == DLM_LKSTS_GRANTED) + goto out; + + if (!(args->flags & DLM_LKF_CANCEL) && + lkb->lkb_status != DLM_LKSTS_GRANTED) + goto out; + + rv = -EBUSY; + if (lkb->lkb_wait_type) + goto out; + + out_ok: + lkb->lkb_exflags = args->flags; + lkb->lkb_sbflags = 0; + lkb->lkb_astparam = args->astparam; + + rv = 0; + out: + return rv; +} + +/* + * Four stage 4 varieties: + * do_request(), do_convert(), do_unlock(), do_cancel() + * These are called on the master node for the given lock and + * from the central locking logic. + */ + +static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + int error = 0; + + if (can_be_granted(r, lkb, TRUE)) { + grant_lock(r, lkb); + queue_cast(r, lkb, 0); + goto out; + } + + if (can_be_queued(lkb)) { + error = -EINPROGRESS; + add_lkb(r, lkb, DLM_LKSTS_WAITING); + send_blocking_asts(r, lkb); + goto out; + } + + error = -EAGAIN; + if (force_blocking_asts(lkb)) + send_blocking_asts_all(r, lkb); + queue_cast(r, lkb, -EAGAIN); + + out: + return error; +} + +static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + int error = 0; + + /* changing an existing lock may allow others to be granted */ + + if (can_be_granted(r, lkb, TRUE)) { + grant_lock(r, lkb); + queue_cast(r, lkb, 0); + grant_pending_locks(r); + goto out; + } + + if (can_be_queued(lkb)) { + if (is_demoted(lkb)) + grant_pending_locks(r); + error = -EINPROGRESS; + del_lkb(r, lkb); + add_lkb(r, lkb, DLM_LKSTS_CONVERT); + send_blocking_asts(r, lkb); + goto out; + } + + error = -EAGAIN; + if (force_blocking_asts(lkb)) + send_blocking_asts_all(r, lkb); + queue_cast(r, lkb, -EAGAIN); + + out: + return error; +} + +static int do_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + remove_lock(r, lkb); + queue_cast(r, lkb, -DLM_EUNLOCK); + grant_pending_locks(r); + return -DLM_EUNLOCK; +} + +static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + revert_lock(r, lkb); + queue_cast(r, lkb, -DLM_ECANCEL); + grant_pending_locks(r); + return -DLM_ECANCEL; +} + +/* + * Four stage 3 varieties: + * _request_lock(), _convert_lock(), _unlock_lock(), _cancel_lock() + */ + +/* add a new lkb to a possibly new rsb, called by requesting process */ + +static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + int error; + + /* set_master: sets lkb nodeid from r */ + + error = set_master(r, lkb); + if (error < 0) + goto out; + if (error) { + error = 0; + goto out; + } + + if (is_remote(r)) + /* receive_request() calls do_request() on remote node */ + error = send_request(r, lkb); + else + error = do_request(r, lkb); + out: + return error; +} + +/* change some property of an existing lkb, e.g. mode, range */ + +static int _convert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + int error; + + if (is_remote(r)) + /* receive_convert() calls do_convert() on remote node */ + error = send_convert(r, lkb); + else + error = do_convert(r, lkb); + + return error; +} + +/* remove an existing lkb from the granted queue */ + +static int _unlock_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + int error; + + if (is_remote(r)) + /* receive_unlock() calls do_unlock() on remote node */ + error = send_unlock(r, lkb); + else + error = do_unlock(r, lkb); + + return error; +} + +/* remove an existing lkb from the convert or wait queue */ + +static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + int error; + + if (is_remote(r)) + /* receive_cancel() calls do_cancel() on remote node */ + error = send_cancel(r, lkb); + else + error = do_cancel(r, lkb); + + return error; +} + +/* + * Four stage 2 varieties: + * request_lock(), convert_lock(), unlock_lock(), cancel_lock() + */ + +static int request_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, char *name, + int len, struct dlm_args *args) +{ + struct dlm_rsb *r; + int error; + + error = validate_lock_args(ls, lkb, args); + if (error) + goto out; + + error = find_rsb(ls, name, len, R_CREATE, &r); + if (error) + goto out; + + lock_rsb(r); + + attach_lkb(r, lkb); + lkb->lkb_lksb->sb_lkid = lkb->lkb_id; + + error = _request_lock(r, lkb); + + unlock_rsb(r); + put_rsb(r); + + out: + return error; +} + +static int convert_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_args *args) +{ + struct dlm_rsb *r; + int error; + + r = lkb->lkb_resource; + + hold_rsb(r); + lock_rsb(r); + + error = validate_lock_args(ls, lkb, args); + if (error) + goto out; + + error = _convert_lock(r, lkb); + out: + unlock_rsb(r); + put_rsb(r); + return error; +} + +static int unlock_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_args *args) +{ + struct dlm_rsb *r; + int error; + + r = lkb->lkb_resource; + + hold_rsb(r); + lock_rsb(r); + + error = validate_unlock_args(lkb, args); + if (error) + goto out; + + error = _unlock_lock(r, lkb); + out: + unlock_rsb(r); + put_rsb(r); + return error; +} + +static int cancel_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_args *args) +{ + struct dlm_rsb *r; + int error; + + r = lkb->lkb_resource; + + hold_rsb(r); + lock_rsb(r); + + error = validate_unlock_args(lkb, args); + if (error) + goto out; + + error = _cancel_lock(r, lkb); + out: + unlock_rsb(r); + put_rsb(r); + return error; +} + +/* + * Two stage 1 varieties: dlm_lock() and dlm_unlock() + */ + +int dlm_lock(dlm_lockspace_t *lockspace, + int mode, + struct dlm_lksb *lksb, + uint32_t flags, + void *name, + unsigned int namelen, + uint32_t parent_lkid, + void (*ast) (void *astarg), + void *astarg, + void (*bast) (void *astarg, int mode), + struct dlm_range *range) +{ + struct dlm_ls *ls; + struct dlm_lkb *lkb; + struct dlm_args args; + int error, convert = flags & DLM_LKF_CONVERT; + + ls = dlm_find_lockspace_local(lockspace); + if (!ls) + return -EINVAL; + + lock_recovery(ls); + + if (convert) + error = find_lkb(ls, lksb->sb_lkid, &lkb); + else + error = create_lkb(ls, &lkb); + + if (error) + goto out; + + error = set_lock_args(mode, lksb, flags, namelen, parent_lkid, ast, + astarg, bast, range, &args); + if (error) + goto out_put; + + if (convert) + error = convert_lock(ls, lkb, &args); + else + error = request_lock(ls, lkb, name, namelen, &args); + + if (error == -EINPROGRESS) + error = 0; + out_put: + if (convert || error) + put_lkb(lkb); + if (error == -EAGAIN) + error = 0; + out: + unlock_recovery(ls); + dlm_put_lockspace(ls); + return error; +} + +int dlm_unlock(dlm_lockspace_t *lockspace, + uint32_t lkid, + uint32_t flags, + struct dlm_lksb *lksb, + void *astarg) +{ + struct dlm_ls *ls; + struct dlm_lkb *lkb; + struct dlm_args args; + int error; + + ls = dlm_find_lockspace_local(lockspace); + if (!ls) + return -EINVAL; + + lock_recovery(ls); + + error = find_lkb(ls, lkid, &lkb); + if (error) + goto out; + + error = set_unlock_args(flags, astarg, &args); + if (error) + goto out_put; + + if (flags & DLM_LKF_CANCEL) + error = cancel_lock(ls, lkb, &args); + else + error = unlock_lock(ls, lkb, &args); + + if (error == -DLM_EUNLOCK || error == -DLM_ECANCEL) + error = 0; + out_put: + put_lkb(lkb); + out: + unlock_recovery(ls); + dlm_put_lockspace(ls); + return error; +} + +/* + * send/receive routines for remote operations and replies + * + * send_args + * send_common + * send_request receive_request + * send_convert receive_convert + * send_unlock receive_unlock + * send_cancel receive_cancel + * send_grant receive_grant + * send_bast receive_bast + * send_lookup receive_lookup + * send_remove receive_remove + * + * send_common_reply + * receive_request_reply send_request_reply + * receive_convert_reply send_convert_reply + * receive_unlock_reply send_unlock_reply + * receive_cancel_reply send_cancel_reply + * receive_lookup_reply send_lookup_reply + */ + +static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb, + int to_nodeid, int mstype, + struct dlm_message **ms_ret, + struct dlm_mhandle **mh_ret) +{ + struct dlm_message *ms; + struct dlm_mhandle *mh; + char *mb; + int mb_len = sizeof(struct dlm_message); + + switch (mstype) { + case DLM_MSG_REQUEST: + case DLM_MSG_LOOKUP: + case DLM_MSG_REMOVE: + mb_len += r->res_length; + break; + case DLM_MSG_CONVERT: + case DLM_MSG_UNLOCK: + case DLM_MSG_REQUEST_REPLY: + case DLM_MSG_CONVERT_REPLY: + case DLM_MSG_GRANT: + if (lkb && lkb->lkb_lvbptr) + mb_len += r->res_ls->ls_lvblen; + break; + } + + /* get_buffer gives us a message handle (mh) that we need to + pass into lowcomms_commit and a message buffer (mb) that we + write our data into */ + + mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb); + if (!mh) + return -ENOBUFS; + + memset(mb, 0, mb_len); + + ms = (struct dlm_message *) mb; + + ms->m_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR); + ms->m_header.h_lockspace = r->res_ls->ls_global_id; + ms->m_header.h_nodeid = dlm_our_nodeid(); + ms->m_header.h_length = mb_len; + ms->m_header.h_cmd = DLM_MSG; + + ms->m_type = mstype; + + *mh_ret = mh; + *ms_ret = ms; + return 0; +} + +/* further lowcomms enhancements or alternate implementations may make + the return value from this function useful at some point */ + +static int send_message(struct dlm_mhandle *mh, struct dlm_message *ms) +{ + dlm_message_out(ms); + dlm_lowcomms_commit_buffer(mh); + return 0; +} + +static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb, + struct dlm_message *ms) +{ + ms->m_nodeid = lkb->lkb_nodeid; + ms->m_pid = lkb->lkb_ownpid; + ms->m_lkid = lkb->lkb_id; + ms->m_remid = lkb->lkb_remid; + ms->m_exflags = lkb->lkb_exflags; + ms->m_sbflags = lkb->lkb_sbflags; + ms->m_flags = lkb->lkb_flags; + ms->m_lvbseq = lkb->lkb_lvbseq; + ms->m_status = lkb->lkb_status; + ms->m_grmode = lkb->lkb_grmode; + ms->m_rqmode = lkb->lkb_rqmode; + ms->m_hash = r->res_hash; + + /* m_result and m_bastmode are set from function args, + not from lkb fields */ + + if (lkb->lkb_bastaddr) + ms->m_asts |= AST_BAST; + if (lkb->lkb_astaddr) + ms->m_asts |= AST_COMP; + + if (lkb->lkb_range) { + ms->m_range[0] = lkb->lkb_range[RQ_RANGE_START]; + ms->m_range[1] = lkb->lkb_range[RQ_RANGE_END]; + } + + if (ms->m_type == DLM_MSG_REQUEST || ms->m_type == DLM_MSG_LOOKUP) + memcpy(ms->m_extra, r->res_name, r->res_length); + + else if (lkb->lkb_lvbptr) + memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen); + +} + +static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype) +{ + struct dlm_message *ms; + struct dlm_mhandle *mh; + int to_nodeid, error; + + add_to_waiters(lkb, mstype); + + to_nodeid = r->res_nodeid; + + error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh); + if (error) + goto fail; + + send_args(r, lkb, ms); + + error = send_message(mh, ms); + if (error) + goto fail; + return 0; + + fail: + remove_from_waiters(lkb); + return error; +} + +static int send_request(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + return send_common(r, lkb, DLM_MSG_REQUEST); +} + +static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + int error; + + error = send_common(r, lkb, DLM_MSG_CONVERT); + + /* down conversions go without a reply from the master */ + if (!error && down_conversion(lkb)) { + remove_from_waiters(lkb); + r->res_ls->ls_stub_ms.m_result = 0; + __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms); + } + + return error; +} + +/* FIXME: if this lkb is the only lock we hold on the rsb, then set + MASTER_UNCERTAIN to force the next request on the rsb to confirm + that the master is still correct. */ + +static int send_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + return send_common(r, lkb, DLM_MSG_UNLOCK); +} + +static int send_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + return send_common(r, lkb, DLM_MSG_CANCEL); +} + +static int send_grant(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + struct dlm_message *ms; + struct dlm_mhandle *mh; + int to_nodeid, error; + + to_nodeid = lkb->lkb_nodeid; + + error = create_message(r, lkb, to_nodeid, DLM_MSG_GRANT, &ms, &mh); + if (error) + goto out; + + send_args(r, lkb, ms); + + ms->m_result = 0; + + error = send_message(mh, ms); + out: + return error; +} + +static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode) +{ + struct dlm_message *ms; + struct dlm_mhandle *mh; + int to_nodeid, error; + + to_nodeid = lkb->lkb_nodeid; + + error = create_message(r, NULL, to_nodeid, DLM_MSG_BAST, &ms, &mh); + if (error) + goto out; + + send_args(r, lkb, ms); + + ms->m_bastmode = mode; + + error = send_message(mh, ms); + out: + return error; +} + +static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb) +{ + struct dlm_message *ms; + struct dlm_mhandle *mh; + int to_nodeid, error; + + add_to_waiters(lkb, DLM_MSG_LOOKUP); + + to_nodeid = dlm_dir_nodeid(r); + + error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh); + if (error) + goto fail; + + send_args(r, lkb, ms); + + error = send_message(mh, ms); + if (error) + goto fail; + return 0; + + fail: + remove_from_waiters(lkb); + return error; +} + +static int send_remove(struct dlm_rsb *r) +{ + struct dlm_message *ms; + struct dlm_mhandle *mh; + int to_nodeid, error; + + to_nodeid = dlm_dir_nodeid(r); + + error = create_message(r, NULL, to_nodeid, DLM_MSG_REMOVE, &ms, &mh); + if (error) + goto out; + + memcpy(ms->m_extra, r->res_name, r->res_length); + ms->m_hash = r->res_hash; + + error = send_message(mh, ms); + out: + return error; +} + +static int send_common_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, + int mstype, int rv) +{ + struct dlm_message *ms; + struct dlm_mhandle *mh; + int to_nodeid, error; + + to_nodeid = lkb->lkb_nodeid; + + error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh); + if (error) + goto out; + + send_args(r, lkb, ms); + + ms->m_result = rv; + + error = send_message(mh, ms); + out: + return error; +} + +static int send_request_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) +{ + return send_common_reply(r, lkb, DLM_MSG_REQUEST_REPLY, rv); +} + +static int send_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) +{ + return send_common_reply(r, lkb, DLM_MSG_CONVERT_REPLY, rv); +} + +static int send_unlock_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) +{ + return send_common_reply(r, lkb, DLM_MSG_UNLOCK_REPLY, rv); +} + +static int send_cancel_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) +{ + return send_common_reply(r, lkb, DLM_MSG_CANCEL_REPLY, rv); +} + +static int send_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms_in, + int ret_nodeid, int rv) +{ + struct dlm_rsb *r = &ls->ls_stub_rsb; + struct dlm_message *ms; + struct dlm_mhandle *mh; + int error, nodeid = ms_in->m_header.h_nodeid; + + error = create_message(r, NULL, nodeid, DLM_MSG_LOOKUP_REPLY, &ms, &mh); + if (error) + goto out; + + ms->m_lkid = ms_in->m_lkid; + ms->m_result = rv; + ms->m_nodeid = ret_nodeid; + + error = send_message(mh, ms); + out: + return error; +} + +/* which args we save from a received message depends heavily on the type + of message, unlike the send side where we can safely send everything about + the lkb for any type of message */ + +static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms) +{ + lkb->lkb_exflags = ms->m_exflags; + lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | + (ms->m_flags & 0x0000FFFF); +} + +static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms) +{ + lkb->lkb_sbflags = ms->m_sbflags; + lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | + (ms->m_flags & 0x0000FFFF); +} + +static int receive_extralen(struct dlm_message *ms) +{ + return (ms->m_header.h_length - sizeof(struct dlm_message)); +} + +static int receive_range(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_message *ms) +{ + if (lkb->lkb_flags & DLM_IFL_RANGE) { + if (!lkb->lkb_range) + lkb->lkb_range = allocate_range(ls); + if (!lkb->lkb_range) + return -ENOMEM; + lkb->lkb_range[RQ_RANGE_START] = ms->m_range[0]; + lkb->lkb_range[RQ_RANGE_END] = ms->m_range[1]; + } + return 0; +} + +static int receive_lvb(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_message *ms) +{ + int len; + + if (lkb->lkb_exflags & DLM_LKF_VALBLK) { + if (!lkb->lkb_lvbptr) + lkb->lkb_lvbptr = allocate_lvb(ls); + if (!lkb->lkb_lvbptr) + return -ENOMEM; + len = receive_extralen(ms); + memcpy(lkb->lkb_lvbptr, ms->m_extra, len); + } + return 0; +} + +static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_message *ms) +{ + lkb->lkb_nodeid = ms->m_header.h_nodeid; + lkb->lkb_ownpid = ms->m_pid; + lkb->lkb_remid = ms->m_lkid; + lkb->lkb_grmode = DLM_LOCK_IV; + lkb->lkb_rqmode = ms->m_rqmode; + lkb->lkb_bastaddr = (void *) (long) (ms->m_asts & AST_BAST); + lkb->lkb_astaddr = (void *) (long) (ms->m_asts & AST_COMP); + + DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb);); + + if (receive_range(ls, lkb, ms)) + return -ENOMEM; + + if (receive_lvb(ls, lkb, ms)) + return -ENOMEM; + + return 0; +} + +static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_message *ms) +{ + if (lkb->lkb_nodeid != ms->m_header.h_nodeid) { + log_error(ls, "convert_args nodeid %d %d lkid %x %x", + lkb->lkb_nodeid, ms->m_header.h_nodeid, + lkb->lkb_id, lkb->lkb_remid); + return -EINVAL; + } + + if (!is_master_copy(lkb)) + return -EINVAL; + + if (lkb->lkb_status != DLM_LKSTS_GRANTED) + return -EBUSY; + + if (receive_range(ls, lkb, ms)) + return -ENOMEM; + if (lkb->lkb_range) { + lkb->lkb_range[GR_RANGE_START] = 0LL; + lkb->lkb_range[GR_RANGE_END] = 0xffffffffffffffffULL; + } + + if (receive_lvb(ls, lkb, ms)) + return -ENOMEM; + + lkb->lkb_rqmode = ms->m_rqmode; + lkb->lkb_lvbseq = ms->m_lvbseq; + + return 0; +} + +static int receive_unlock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_message *ms) +{ + if (!is_master_copy(lkb)) + return -EINVAL; + if (receive_lvb(ls, lkb, ms)) + return -ENOMEM; + return 0; +} + +/* We fill in the stub-lkb fields with the info that send_xxxx_reply() + uses to send a reply and that the remote end uses to process the reply. */ + +static void setup_stub_lkb(struct dlm_ls *ls, struct dlm_message *ms) +{ + struct dlm_lkb *lkb = &ls->ls_stub_lkb; + lkb->lkb_nodeid = ms->m_header.h_nodeid; + lkb->lkb_remid = ms->m_lkid; +} + +static void receive_request(struct dlm_ls *ls, struct dlm_message *ms) +{ + struct dlm_lkb *lkb; + struct dlm_rsb *r; + int error, namelen; + + error = create_lkb(ls, &lkb); + if (error) + goto fail; + + receive_flags(lkb, ms); + lkb->lkb_flags |= DLM_IFL_MSTCPY; + error = receive_request_args(ls, lkb, ms); + if (error) { + put_lkb(lkb); + goto fail; + } + + namelen = receive_extralen(ms); + + error = find_rsb(ls, ms->m_extra, namelen, R_MASTER, &r); + if (error) { + put_lkb(lkb); + goto fail; + } + + lock_rsb(r); + + attach_lkb(r, lkb); + error = do_request(r, lkb); + send_request_reply(r, lkb, error); + + unlock_rsb(r); + put_rsb(r); + + if (error == -EINPROGRESS) + error = 0; + if (error) + put_lkb(lkb); + return; + + fail: + setup_stub_lkb(ls, ms); + send_request_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); +} + +static void receive_convert(struct dlm_ls *ls, struct dlm_message *ms) +{ + struct dlm_lkb *lkb; + struct dlm_rsb *r; + int error, reply = TRUE; + + error = find_lkb(ls, ms->m_remid, &lkb); + if (error) + goto fail; + + r = lkb->lkb_resource; + + hold_rsb(r); + lock_rsb(r); + + receive_flags(lkb, ms); + error = receive_convert_args(ls, lkb, ms); + if (error) + goto out; + reply = !down_conversion(lkb); + + error = do_convert(r, lkb); + out: + if (reply) + send_convert_reply(r, lkb, error); + + unlock_rsb(r); + put_rsb(r); + put_lkb(lkb); + return; + + fail: + setup_stub_lkb(ls, ms); + send_convert_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); +} + +static void receive_unlock(struct dlm_ls *ls, struct dlm_message *ms) +{ + struct dlm_lkb *lkb; + struct dlm_rsb *r; + int error; + + error = find_lkb(ls, ms->m_remid, &lkb); + if (error) + goto fail; + + r = lkb->lkb_resource; + + hold_rsb(r); + lock_rsb(r); + + receive_flags(lkb, ms); + error = receive_unlock_args(ls, lkb, ms); + if (error) + goto out; + + error = do_unlock(r, lkb); + out: + send_unlock_reply(r, lkb, error); + + unlock_rsb(r); + put_rsb(r); + put_lkb(lkb); + return; + + fail: + setup_stub_lkb(ls, ms); + send_unlock_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); +} + +static void receive_cancel(struct dlm_ls *ls, struct dlm_message *ms) +{ + struct dlm_lkb *lkb; + struct dlm_rsb *r; + int error; + + error = find_lkb(ls, ms->m_remid, &lkb); + if (error) + goto fail; + + receive_flags(lkb, ms); + + r = lkb->lkb_resource; + + hold_rsb(r); + lock_rsb(r); + + error = do_cancel(r, lkb); + send_cancel_reply(r, lkb, error); + + unlock_rsb(r); + put_rsb(r); + put_lkb(lkb); + return; + + fail: + setup_stub_lkb(ls, ms); + send_cancel_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error); +} + +static void receive_grant(struct dlm_ls *ls, struct dlm_message *ms) +{ + struct dlm_lkb *lkb; + struct dlm_rsb *r; + int error; + + error = find_lkb(ls, ms->m_remid, &lkb); + if (error) { + log_error(ls, "receive_grant no lkb"); + return; + } + DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); + + r = lkb->lkb_resource; + + hold_rsb(r); + lock_rsb(r); + + receive_flags_reply(lkb, ms); + grant_lock_pc(r, lkb, ms); + queue_cast(r, lkb, 0); + + unlock_rsb(r); + put_rsb(r); + put_lkb(lkb); +} + +static void receive_bast(struct dlm_ls *ls, struct dlm_message *ms) +{ + struct dlm_lkb *lkb; + struct dlm_rsb *r; + int error; + + error = find_lkb(ls, ms->m_remid, &lkb); + if (error) { + log_error(ls, "receive_bast no lkb"); + return; + } + DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); + + r = lkb->lkb_resource; + + hold_rsb(r); + lock_rsb(r); + + queue_bast(r, lkb, ms->m_bastmode); + + unlock_rsb(r); + put_rsb(r); + put_lkb(lkb); +} + +static void receive_lookup(struct dlm_ls *ls, struct dlm_message *ms) +{ + int len, error, ret_nodeid, dir_nodeid, from_nodeid, our_nodeid; + + from_nodeid = ms->m_header.h_nodeid; + our_nodeid = dlm_our_nodeid(); + + len = receive_extralen(ms); + + dir_nodeid = dlm_hash2nodeid(ls, ms->m_hash); + if (dir_nodeid != our_nodeid) { + log_error(ls, "lookup dir_nodeid %d from %d", + dir_nodeid, from_nodeid); + error = -EINVAL; + ret_nodeid = -1; + goto out; + } + + error = dlm_dir_lookup(ls, from_nodeid, ms->m_extra, len, &ret_nodeid); + + /* Optimization: we're master so treat lookup as a request */ + if (!error && ret_nodeid == our_nodeid) { + receive_request(ls, ms); + return; + } + out: + send_lookup_reply(ls, ms, ret_nodeid, error); +} + +static void receive_remove(struct dlm_ls *ls, struct dlm_message *ms) +{ + int len, dir_nodeid, from_nodeid; + + from_nodeid = ms->m_header.h_nodeid; + + len = receive_extralen(ms); + + dir_nodeid = dlm_hash2nodeid(ls, ms->m_hash); + if (dir_nodeid != dlm_our_nodeid()) { + log_error(ls, "remove dir entry dir_nodeid %d from %d", + dir_nodeid, from_nodeid); + return; + } + + dlm_dir_remove_entry(ls, from_nodeid, ms->m_extra, len); +} + +static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) +{ + struct dlm_lkb *lkb; + struct dlm_rsb *r; + int error, mstype; + + error = find_lkb(ls, ms->m_remid, &lkb); + if (error) { + log_error(ls, "receive_request_reply no lkb"); + return; + } + DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); + + mstype = lkb->lkb_wait_type; + error = remove_from_waiters(lkb); + if (error) { + log_error(ls, "receive_request_reply not on waiters"); + goto out; + } + + /* this is the value returned from do_request() on the master */ + error = ms->m_result; + + r = lkb->lkb_resource; + hold_rsb(r); + lock_rsb(r); + + /* Optimization: the dir node was also the master, so it took our + lookup as a request and sent request reply instead of lookup reply */ + if (mstype == DLM_MSG_LOOKUP) { + r->res_nodeid = ms->m_header.h_nodeid; + lkb->lkb_nodeid = r->res_nodeid; + } + + switch (error) { + case -EAGAIN: + /* request would block (be queued) on remote master; + the unhold undoes the original ref from create_lkb() + so it leads to the lkb being freed */ + queue_cast(r, lkb, -EAGAIN); + confirm_master(r, -EAGAIN); + unhold_lkb(lkb); + break; + + case -EINPROGRESS: + case 0: + /* request was queued or granted on remote master */ + receive_flags_reply(lkb, ms); + lkb->lkb_remid = ms->m_lkid; + if (error) + add_lkb(r, lkb, DLM_LKSTS_WAITING); + else { + grant_lock_pc(r, lkb, ms); + queue_cast(r, lkb, 0); + } + confirm_master(r, error); + break; + + case -ENOENT: + case -ENOTBLK: + /* find_rsb failed to find rsb or rsb wasn't master */ + r->res_nodeid = -1; + lkb->lkb_nodeid = -1; + _request_lock(r, lkb); + break; + + default: + log_error(ls, "receive_request_reply error %d", error); + } + + unlock_rsb(r); + put_rsb(r); + out: + put_lkb(lkb); +} + +static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, + struct dlm_message *ms) +{ + int error = ms->m_result; + + /* this is the value returned from do_convert() on the master */ + + switch (error) { + case -EAGAIN: + /* convert would block (be queued) on remote master */ + queue_cast(r, lkb, -EAGAIN); + break; + + case -EINPROGRESS: + /* convert was queued on remote master */ + del_lkb(r, lkb); + add_lkb(r, lkb, DLM_LKSTS_CONVERT); + break; + + case 0: + /* convert was granted on remote master */ + receive_flags_reply(lkb, ms); + grant_lock_pc(r, lkb, ms); + queue_cast(r, lkb, 0); + break; + + default: + log_error(r->res_ls, "receive_convert_reply error %d", error); + } +} + +static void _receive_convert_reply(struct dlm_lkb *lkb, struct dlm_message *ms) +{ + struct dlm_rsb *r = lkb->lkb_resource; + + hold_rsb(r); + lock_rsb(r); + + __receive_convert_reply(r, lkb, ms); + + unlock_rsb(r); + put_rsb(r); +} + +static void receive_convert_reply(struct dlm_ls *ls, struct dlm_message *ms) +{ + struct dlm_lkb *lkb; + int error; + + error = find_lkb(ls, ms->m_remid, &lkb); + if (error) { + log_error(ls, "receive_convert_reply no lkb"); + return; + } + DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); + + error = remove_from_waiters(lkb); + if (error) { + log_error(ls, "receive_convert_reply not on waiters"); + goto out; + } + + _receive_convert_reply(lkb, ms); + out: + put_lkb(lkb); +} + +static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms) +{ + struct dlm_rsb *r = lkb->lkb_resource; + int error = ms->m_result; + + hold_rsb(r); + lock_rsb(r); + + /* this is the value returned from do_unlock() on the master */ + + switch (error) { + case -DLM_EUNLOCK: + receive_flags_reply(lkb, ms); + remove_lock_pc(r, lkb); + queue_cast(r, lkb, -DLM_EUNLOCK); + break; + default: + log_error(r->res_ls, "receive_unlock_reply error %d", error); + } + + unlock_rsb(r); + put_rsb(r); +} + +static void receive_unlock_reply(struct dlm_ls *ls, struct dlm_message *ms) +{ + struct dlm_lkb *lkb; + int error; + + error = find_lkb(ls, ms->m_remid, &lkb); + if (error) { + log_error(ls, "receive_unlock_reply no lkb"); + return; + } + DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); + + error = remove_from_waiters(lkb); + if (error) { + log_error(ls, "receive_unlock_reply not on waiters"); + goto out; + } + + _receive_unlock_reply(lkb, ms); + out: + put_lkb(lkb); +} + +static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms) +{ + struct dlm_rsb *r = lkb->lkb_resource; + int error = ms->m_result; + + hold_rsb(r); + lock_rsb(r); + + /* this is the value returned from do_cancel() on the master */ + + switch (error) { + case -DLM_ECANCEL: + receive_flags_reply(lkb, ms); + revert_lock_pc(r, lkb); + queue_cast(r, lkb, -DLM_ECANCEL); + break; + default: + log_error(r->res_ls, "receive_cancel_reply error %d", error); + } + + unlock_rsb(r); + put_rsb(r); +} + +static void receive_cancel_reply(struct dlm_ls *ls, struct dlm_message *ms) +{ + struct dlm_lkb *lkb; + int error; + + error = find_lkb(ls, ms->m_remid, &lkb); + if (error) { + log_error(ls, "receive_cancel_reply no lkb"); + return; + } + DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); + + error = remove_from_waiters(lkb); + if (error) { + log_error(ls, "receive_cancel_reply not on waiters"); + goto out; + } + + _receive_cancel_reply(lkb, ms); + out: + put_lkb(lkb); +} + +static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) +{ + struct dlm_lkb *lkb; + struct dlm_rsb *r; + int error, ret_nodeid; + + error = find_lkb(ls, ms->m_lkid, &lkb); + if (error) { + log_error(ls, "receive_lookup_reply no lkb"); + return; + } + + error = remove_from_waiters(lkb); + if (error) { + log_error(ls, "receive_lookup_reply not on waiters"); + goto out; + } + + /* this is the value returned by dlm_dir_lookup on dir node + FIXME: will a non-zero error ever be returned? */ + error = ms->m_result; + + r = lkb->lkb_resource; + hold_rsb(r); + lock_rsb(r); + + ret_nodeid = ms->m_nodeid; + if (ret_nodeid == dlm_our_nodeid()) { + r->res_nodeid = 0; + ret_nodeid = 0; + r->res_first_lkid = 0; + } else { + /* set_master() will copy res_nodeid to lkb_nodeid */ + r->res_nodeid = ret_nodeid; + } + + _request_lock(r, lkb); + + if (!ret_nodeid) + process_lookup_list(r); + + unlock_rsb(r); + put_rsb(r); + out: + put_lkb(lkb); +} + +int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) +{ + struct dlm_message *ms = (struct dlm_message *) hd; + struct dlm_ls *ls; + int error; + + if (!recovery) + dlm_message_in(ms); + + ls = dlm_find_lockspace_global(hd->h_lockspace); + if (!ls) { + log_print("drop message %d from %d for unknown lockspace %d", + ms->m_type, nodeid, hd->h_lockspace); + return -EINVAL; + } + + /* recovery may have just ended leaving a bunch of backed-up requests + in the requestqueue; wait while dlm_recoverd clears them */ + + if (!recovery) + dlm_wait_requestqueue(ls); + + /* recovery may have just started while there were a bunch of + in-flight requests -- save them in requestqueue to be processed + after recovery. we can't let dlm_recvd block on the recovery + lock. if dlm_recoverd is calling this function to clear the + requestqueue, it needs to be interrupted (-EINTR) if another + recovery operation is starting. */ + + while (1) { + if (dlm_locking_stopped(ls)) { + if (!recovery) + dlm_add_requestqueue(ls, nodeid, hd); + error = -EINTR; + goto out; + } + + if (lock_recovery_try(ls)) + break; + schedule(); + } + + switch (ms->m_type) { + + /* messages sent to a master node */ + + case DLM_MSG_REQUEST: + receive_request(ls, ms); + break; + + case DLM_MSG_CONVERT: + receive_convert(ls, ms); + break; + + case DLM_MSG_UNLOCK: + receive_unlock(ls, ms); + break; + + case DLM_MSG_CANCEL: + receive_cancel(ls, ms); + break; + + /* messages sent from a master node (replies to above) */ + + case DLM_MSG_REQUEST_REPLY: + receive_request_reply(ls, ms); + break; + + case DLM_MSG_CONVERT_REPLY: + receive_convert_reply(ls, ms); + break; + + case DLM_MSG_UNLOCK_REPLY: + receive_unlock_reply(ls, ms); + break; + + case DLM_MSG_CANCEL_REPLY: + receive_cancel_reply(ls, ms); + break; + + /* messages sent from a master node (only two types of async msg) */ + + case DLM_MSG_GRANT: + receive_grant(ls, ms); + break; + + case DLM_MSG_BAST: + receive_bast(ls, ms); + break; + + /* messages sent to a dir node */ + + case DLM_MSG_LOOKUP: + receive_lookup(ls, ms); + break; + + case DLM_MSG_REMOVE: + receive_remove(ls, ms); + break; + + /* messages sent from a dir node (remove has no reply) */ + + case DLM_MSG_LOOKUP_REPLY: + receive_lookup_reply(ls, ms); + break; + + default: + log_error(ls, "unknown message type %d", ms->m_type); + } + + unlock_recovery(ls); + out: + dlm_put_lockspace(ls); + dlm_astd_wake(); + return 0; +} + + +/* + * Recovery related + */ + +static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) +{ + if (middle_conversion(lkb)) { + hold_lkb(lkb); + ls->ls_stub_ms.m_result = -EINPROGRESS; + _remove_from_waiters(lkb); + _receive_convert_reply(lkb, &ls->ls_stub_ms); + + /* Same special case as in receive_rcom_lock_args() */ + lkb->lkb_grmode = DLM_LOCK_IV; + rsb_set_flag(lkb->lkb_resource, RSB_RECOVER_CONVERT); + unhold_lkb(lkb); + + } else if (lkb->lkb_rqmode >= lkb->lkb_grmode) { + lkb->lkb_flags |= DLM_IFL_RESEND; + } + + /* lkb->lkb_rqmode < lkb->lkb_grmode shouldn't happen since down + conversions are async; there's no reply from the remote master */ +} + +/* A waiting lkb needs recovery if the master node has failed, or + the master node is changing (only when no directory is used) */ + +static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb) +{ + if (dlm_is_removed(ls, lkb->lkb_nodeid)) + return 1; + + if (!dlm_no_directory(ls)) + return 0; + + if (dlm_dir_nodeid(lkb->lkb_resource) != lkb->lkb_nodeid) + return 1; + + return 0; +} + +/* Recovery for locks that are waiting for replies from nodes that are now + gone. We can just complete unlocks and cancels by faking a reply from the + dead node. Requests and up-conversions we flag to be resent after + recovery. Down-conversions can just be completed with a fake reply like + unlocks. Conversions between PR and CW need special attention. */ + +void dlm_recover_waiters_pre(struct dlm_ls *ls) +{ + struct dlm_lkb *lkb, *safe; + + down(&ls->ls_waiters_sem); + + list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) { + log_debug(ls, "pre recover waiter lkid %x type %d flags %x", + lkb->lkb_id, lkb->lkb_wait_type, lkb->lkb_flags); + + /* all outstanding lookups, regardless of destination will be + resent after recovery is done */ + + if (lkb->lkb_wait_type == DLM_MSG_LOOKUP) { + lkb->lkb_flags |= DLM_IFL_RESEND; + continue; + } + + if (!waiter_needs_recovery(ls, lkb)) + continue; + + switch (lkb->lkb_wait_type) { + + case DLM_MSG_REQUEST: + lkb->lkb_flags |= DLM_IFL_RESEND; + break; + + case DLM_MSG_CONVERT: + recover_convert_waiter(ls, lkb); + break; + + case DLM_MSG_UNLOCK: + hold_lkb(lkb); + ls->ls_stub_ms.m_result = -DLM_EUNLOCK; + _remove_from_waiters(lkb); + _receive_unlock_reply(lkb, &ls->ls_stub_ms); + put_lkb(lkb); + break; + + case DLM_MSG_CANCEL: + hold_lkb(lkb); + ls->ls_stub_ms.m_result = -DLM_ECANCEL; + _remove_from_waiters(lkb); + _receive_cancel_reply(lkb, &ls->ls_stub_ms); + put_lkb(lkb); + break; + + default: + log_error(ls, "invalid lkb wait_type %d", + lkb->lkb_wait_type); + } + } + up(&ls->ls_waiters_sem); +} + +static int remove_resend_waiter(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) +{ + struct dlm_lkb *lkb; + int rv = 0; + + down(&ls->ls_waiters_sem); + list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) { + if (lkb->lkb_flags & DLM_IFL_RESEND) { + rv = lkb->lkb_wait_type; + _remove_from_waiters(lkb); + lkb->lkb_flags &= ~DLM_IFL_RESEND; + break; + } + } + up(&ls->ls_waiters_sem); + + if (!rv) + lkb = NULL; + *lkb_ret = lkb; + return rv; +} + +/* Deal with lookups and lkb's marked RESEND from _pre. We may now be the + master or dir-node for r. Processing the lkb may result in it being placed + back on waiters. */ + +int dlm_recover_waiters_post(struct dlm_ls *ls) +{ + struct dlm_lkb *lkb; + struct dlm_rsb *r; + int error = 0, mstype; + + while (1) { + if (dlm_locking_stopped(ls)) { + log_debug(ls, "recover_waiters_post aborted"); + error = -EINTR; + break; + } + + mstype = remove_resend_waiter(ls, &lkb); + if (!mstype) + break; + + r = lkb->lkb_resource; + + log_debug(ls, "recover_waiters_post %x type %d flags %x %s", + lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name); + + switch (mstype) { + + case DLM_MSG_LOOKUP: + hold_rsb(r); + lock_rsb(r); + _request_lock(r, lkb); + if (is_master(r)) + confirm_master(r, 0); + unlock_rsb(r); + put_rsb(r); + break; + + case DLM_MSG_REQUEST: + hold_rsb(r); + lock_rsb(r); + _request_lock(r, lkb); + unlock_rsb(r); + put_rsb(r); + break; + + case DLM_MSG_CONVERT: + hold_rsb(r); + lock_rsb(r); + _convert_lock(r, lkb); + unlock_rsb(r); + put_rsb(r); + break; + + default: + log_error(ls, "recover_waiters_post type %d", mstype); + } + } + + return error; +} + +static void purge_queue(struct dlm_rsb *r, struct list_head *queue, + int (*test)(struct dlm_ls *ls, struct dlm_lkb *lkb)) +{ + struct dlm_ls *ls = r->res_ls; + struct dlm_lkb *lkb, *safe; + + list_for_each_entry_safe(lkb, safe, queue, lkb_statequeue) { + if (test(ls, lkb)) { + del_lkb(r, lkb); + /* this put should free the lkb */ + if (!put_lkb(lkb)) + log_error(ls, "purged lkb not released"); + } + } +} + +static int purge_dead_test(struct dlm_ls *ls, struct dlm_lkb *lkb) +{ + return (is_master_copy(lkb) && dlm_is_removed(ls, lkb->lkb_nodeid)); +} + +static int purge_mstcpy_test(struct dlm_ls *ls, struct dlm_lkb *lkb) +{ + return is_master_copy(lkb); +} + +static void purge_dead_locks(struct dlm_rsb *r) +{ + purge_queue(r, &r->res_grantqueue, &purge_dead_test); + purge_queue(r, &r->res_convertqueue, &purge_dead_test); + purge_queue(r, &r->res_waitqueue, &purge_dead_test); +} + +void dlm_purge_mstcpy_locks(struct dlm_rsb *r) +{ + purge_queue(r, &r->res_grantqueue, &purge_mstcpy_test); + purge_queue(r, &r->res_convertqueue, &purge_mstcpy_test); + purge_queue(r, &r->res_waitqueue, &purge_mstcpy_test); +} + +/* Get rid of locks held by nodes that are gone. */ + +int dlm_purge_locks(struct dlm_ls *ls) +{ + struct dlm_rsb *r; + + log_debug(ls, "dlm_purge_locks"); + + down_write(&ls->ls_root_sem); + list_for_each_entry(r, &ls->ls_root_list, res_root_list) { + hold_rsb(r); + lock_rsb(r); + if (is_master(r)) + purge_dead_locks(r); + unlock_rsb(r); + unhold_rsb(r); + + schedule(); + } + up_write(&ls->ls_root_sem); + + return 0; +} + +int dlm_grant_after_purge(struct dlm_ls *ls) +{ + struct dlm_rsb *r; + int i; + + for (i = 0; i < ls->ls_rsbtbl_size; i++) { + read_lock(&ls->ls_rsbtbl[i].lock); + list_for_each_entry(r, &ls->ls_rsbtbl[i].list, res_hashchain) { + hold_rsb(r); + lock_rsb(r); + if (is_master(r)) { + grant_pending_locks(r); + confirm_master(r, 0); + } + unlock_rsb(r); + put_rsb(r); + } + read_unlock(&ls->ls_rsbtbl[i].lock); + } + + return 0; +} + +static struct dlm_lkb *search_remid_list(struct list_head *head, int nodeid, + uint32_t remid) +{ + struct dlm_lkb *lkb; + + list_for_each_entry(lkb, head, lkb_statequeue) { + if (lkb->lkb_nodeid == nodeid && lkb->lkb_remid == remid) + return lkb; + } + return NULL; +} + +static struct dlm_lkb *search_remid(struct dlm_rsb *r, int nodeid, + uint32_t remid) +{ + struct dlm_lkb *lkb; + + lkb = search_remid_list(&r->res_grantqueue, nodeid, remid); + if (lkb) + return lkb; + lkb = search_remid_list(&r->res_convertqueue, nodeid, remid); + if (lkb) + return lkb; + lkb = search_remid_list(&r->res_waitqueue, nodeid, remid); + if (lkb) + return lkb; + return NULL; +} + +static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_rsb *r, struct dlm_rcom *rc) +{ + struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf; + int lvblen; + + lkb->lkb_nodeid = rc->rc_header.h_nodeid; + lkb->lkb_ownpid = rl->rl_ownpid; + lkb->lkb_remid = rl->rl_lkid; + lkb->lkb_exflags = rl->rl_exflags; + lkb->lkb_flags = rl->rl_flags & 0x0000FFFF; + lkb->lkb_flags |= DLM_IFL_MSTCPY; + lkb->lkb_lvbseq = rl->rl_lvbseq; + lkb->lkb_rqmode = rl->rl_rqmode; + lkb->lkb_grmode = rl->rl_grmode; + /* don't set lkb_status because add_lkb wants to itself */ + + lkb->lkb_bastaddr = (void *) (long) (rl->rl_asts & AST_BAST); + lkb->lkb_astaddr = (void *) (long) (rl->rl_asts & AST_COMP); + + if (lkb->lkb_flags & DLM_IFL_RANGE) { + lkb->lkb_range = allocate_range(ls); + if (!lkb->lkb_range) + return -ENOMEM; + memcpy(lkb->lkb_range, rl->rl_range, 4*sizeof(uint64_t)); + } + + if (lkb->lkb_exflags & DLM_LKF_VALBLK) { + lkb->lkb_lvbptr = allocate_lvb(ls); + if (!lkb->lkb_lvbptr) + return -ENOMEM; + lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) - + sizeof(struct rcom_lock); + memcpy(lkb->lkb_lvbptr, rl->rl_lvb, lvblen); + } + + /* Conversions between PR and CW (middle modes) need special handling. + The real granted mode of these converting locks cannot be determined + until all locks have been rebuilt on the rsb (recover_conversion) */ + + if (rl->rl_wait_type == DLM_MSG_CONVERT && middle_conversion(lkb)) { + rl->rl_status = DLM_LKSTS_CONVERT; + lkb->lkb_grmode = DLM_LOCK_IV; + rsb_set_flag(r, RSB_RECOVER_CONVERT); + } + + return 0; +} + +/* This lkb may have been recovered in a previous aborted recovery so we need + to check if the rsb already has an lkb with the given remote nodeid/lkid. + If so we just send back a standard reply. If not, we create a new lkb with + the given values and send back our lkid. We send back our lkid by sending + back the rcom_lock struct we got but with the remid field filled in. */ + +int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc) +{ + struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf; + struct dlm_rsb *r; + struct dlm_lkb *lkb; + int error; + + if (rl->rl_parent_lkid) { + error = -EOPNOTSUPP; + goto out; + } + + error = find_rsb(ls, rl->rl_name, rl->rl_namelen, R_MASTER, &r); + if (error) + goto out; + + lock_rsb(r); + + lkb = search_remid(r, rc->rc_header.h_nodeid, rl->rl_lkid); + if (lkb) { + error = -EEXIST; + goto out_remid; + } + + error = create_lkb(ls, &lkb); + if (error) + goto out_unlock; + + error = receive_rcom_lock_args(ls, lkb, r, rc); + if (error) { + put_lkb(lkb); + goto out_unlock; + } + + attach_lkb(r, lkb); + add_lkb(r, lkb, rl->rl_status); + error = 0; + + out_remid: + /* this is the new value returned to the lock holder for + saving in its process-copy lkb */ + rl->rl_remid = lkb->lkb_id; + + out_unlock: + unlock_rsb(r); + put_rsb(r); + out: + if (error) + log_print("recover_master_copy %d %x", error, rl->rl_lkid); + rl->rl_result = error; + return error; +} + +int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) +{ + struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf; + struct dlm_rsb *r; + struct dlm_lkb *lkb; + int error; + + error = find_lkb(ls, rl->rl_lkid, &lkb); + if (error) { + log_error(ls, "recover_process_copy no lkid %x", rl->rl_lkid); + return error; + } + + DLM_ASSERT(is_process_copy(lkb), dlm_print_lkb(lkb);); + + error = rl->rl_result; + + r = lkb->lkb_resource; + hold_rsb(r); + lock_rsb(r); + + switch (error) { + case -EEXIST: + log_debug(ls, "master copy exists %x", lkb->lkb_id); + /* fall through */ + case 0: + lkb->lkb_remid = rl->rl_remid; + break; + default: + log_error(ls, "dlm_recover_process_copy unknown error %d %x", + error, lkb->lkb_id); + } + + /* an ack for dlm_recover_locks() which waits for replies from + all the locks it sends to new masters */ + dlm_recovered_lock(r); + + unlock_rsb(r); + put_rsb(r); + put_lkb(lkb); + + return 0; +} + -- cgit 1.2.3-korg From 901359256b2666f52a3a7d3f31927677e91b3a2a Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 20 Jan 2006 08:47:07 +0000 Subject: [DLM] Update DLM to the latest patch level Signed-off-by: David Teigland Signed-off-by: Steve Whitehouse --- fs/dlm/ast.c | 16 ++++++------ fs/dlm/config.c | 20 +++++++------- fs/dlm/debug_fs.c | 4 +-- fs/dlm/device.c | 55 +++++++++++++++++++++++---------------- fs/dlm/dir.c | 4 +-- fs/dlm/dlm_internal.h | 23 ++++------------ fs/dlm/lock.c | 72 +++++++++++++++++++++++++-------------------------- fs/dlm/lock.h | 4 +-- fs/dlm/lockspace.c | 21 +++++++-------- fs/dlm/member.c | 17 ++++++------ fs/dlm/midcomms.c | 2 +- fs/dlm/recover.c | 18 ++++++------- fs/dlm/recoverd.c | 16 ++++++------ fs/dlm/requestqueue.c | 26 +++++++++---------- 14 files changed, 147 insertions(+), 151 deletions(-) (limited to 'fs/dlm/lock.c') diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c index 2bd1c5e1a72c73..57bdf09b520a8a 100644 --- a/fs/dlm/ast.c +++ b/fs/dlm/ast.c @@ -21,7 +21,7 @@ static struct list_head ast_queue; static spinlock_t ast_queue_lock; static struct task_struct * astd_task; static unsigned long astd_wakeflags; -static struct semaphore astd_running; +static struct mutex astd_running; void dlm_del_ast(struct dlm_lkb *lkb) @@ -56,7 +56,7 @@ static void process_asts(void) int type = 0, found, bmode; for (;;) { - found = FALSE; + found = 0; spin_lock(&ast_queue_lock); list_for_each_entry(lkb, &ast_queue, lkb_astqueue) { r = lkb->lkb_resource; @@ -68,7 +68,7 @@ static void process_asts(void) list_del(&lkb->lkb_astqueue); type = lkb->lkb_ast_type; lkb->lkb_ast_type = 0; - found = TRUE; + found = 1; break; } spin_unlock(&ast_queue_lock); @@ -117,10 +117,10 @@ static int dlm_astd(void *data) schedule(); set_current_state(TASK_RUNNING); - down(&astd_running); + mutex_lock(&astd_running); if (test_and_clear_bit(WAKE_ASTS, &astd_wakeflags)) process_asts(); - up(&astd_running); + mutex_unlock(&astd_running); } return 0; } @@ -140,7 +140,7 @@ int dlm_astd_start(void) INIT_LIST_HEAD(&ast_queue); spin_lock_init(&ast_queue_lock); - init_MUTEX(&astd_running); + mutex_init(&astd_running); p = kthread_run(dlm_astd, NULL, "dlm_astd"); if (IS_ERR(p)) @@ -157,11 +157,11 @@ void dlm_astd_stop(void) void dlm_astd_suspend(void) { - down(&astd_running); + mutex_lock(&astd_running); } void dlm_astd_resume(void) { - up(&astd_running); + mutex_unlock(&astd_running); } diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 024ace9973a81b..87df9616415e5d 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -162,7 +162,7 @@ struct spaces { struct space { struct config_group group; struct list_head members; - struct semaphore members_lock; + struct mutex members_lock; int members_count; }; @@ -374,7 +374,7 @@ static struct config_group *make_space(struct config_group *g, const char *name) sp->group.default_groups[1] = NULL; INIT_LIST_HEAD(&sp->members); - init_MUTEX(&sp->members_lock); + mutex_init(&sp->members_lock); sp->members_count = 0; return &sp->group; @@ -453,10 +453,10 @@ static struct config_item *make_node(struct config_group *g, const char *name) nd->nodeid = -1; nd->weight = 1; /* default weight of 1 if none is set */ - down(&sp->members_lock); + mutex_lock(&sp->members_lock); list_add(&nd->list, &sp->members); sp->members_count++; - up(&sp->members_lock); + mutex_unlock(&sp->members_lock); return &nd->item; } @@ -466,10 +466,10 @@ static void drop_node(struct config_group *g, struct config_item *i) struct space *sp = to_space(g->cg_item.ci_parent); struct node *nd = to_node(i); - down(&sp->members_lock); + mutex_lock(&sp->members_lock); list_del(&nd->list); sp->members_count--; - up(&sp->members_lock); + mutex_unlock(&sp->members_lock); config_item_put(i); } @@ -677,7 +677,7 @@ int dlm_nodeid_list(char *lsname, int **ids_out) if (!sp) return -EEXIST; - down(&sp->members_lock); + mutex_lock(&sp->members_lock); if (!sp->members_count) { rv = 0; goto out; @@ -698,7 +698,7 @@ int dlm_nodeid_list(char *lsname, int **ids_out) *ids_out = ids; out: - up(&sp->members_lock); + mutex_unlock(&sp->members_lock); put_space(sp); return rv; } @@ -713,14 +713,14 @@ int dlm_node_weight(char *lsname, int nodeid) if (!sp) goto out; - down(&sp->members_lock); + mutex_lock(&sp->members_lock); list_for_each_entry(nd, &sp->members, list) { if (nd->nodeid != nodeid) continue; w = nd->weight; break; } - up(&sp->members_lock); + mutex_unlock(&sp->members_lock); put_space(sp); out: return w; diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index 98b49a1ece4713..5080bbffd586a7 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -63,12 +63,12 @@ static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, /* FIXME: this warns on Alpha */ if (lkb->lkb_status == DLM_LKSTS_CONVERT || lkb->lkb_status == DLM_LKSTS_GRANTED) - seq_printf(s, " %" PRIx64 "-%" PRIx64, + seq_printf(s, " %llx-%llx", lkb->lkb_range[GR_RANGE_START], lkb->lkb_range[GR_RANGE_END]); if (lkb->lkb_status == DLM_LKSTS_CONVERT || lkb->lkb_status == DLM_LKSTS_WAITING) - seq_printf(s, " (%" PRIx64 "-%" PRIx64 ")", + seq_printf(s, " (%llx-%llx)", lkb->lkb_range[RQ_RANGE_START], lkb->lkb_range[RQ_RANGE_END]); } diff --git a/fs/dlm/device.c b/fs/dlm/device.c index a8bf600ed13dd7..899d4f92a4d714 100644 --- a/fs/dlm/device.c +++ b/fs/dlm/device.c @@ -43,7 +43,7 @@ static struct file_operations _dlm_fops; static const char *name_prefix="dlm"; static struct list_head user_ls_list; -static struct semaphore user_ls_lock; +static struct mutex user_ls_lock; /* Lock infos are stored in here indexed by lock ID */ static DEFINE_IDR(lockinfo_idr); @@ -53,6 +53,7 @@ static rwlock_t lockinfo_lock; #define LI_FLAG_COMPLETE 1 #define LI_FLAG_FIRSTLOCK 2 #define LI_FLAG_PERSISTENT 3 +#define LI_FLAG_ONLIST 4 /* flags in ls_flags*/ #define LS_FLAG_DELETED 1 @@ -211,18 +212,18 @@ static struct user_ls *find_lockspace(int minor) { struct user_ls *lsinfo; - down(&user_ls_lock); + mutex_lock(&user_ls_lock); lsinfo = __find_lockspace(minor); - up(&user_ls_lock); + mutex_unlock(&user_ls_lock); return lsinfo; } static void add_lockspace_to_list(struct user_ls *lsinfo) { - down(&user_ls_lock); + mutex_lock(&user_ls_lock); list_add(&lsinfo->ls_list, &user_ls_list); - up(&user_ls_lock); + mutex_unlock(&user_ls_lock); } /* Register a lockspace with the DLM and create a misc @@ -235,12 +236,11 @@ static int register_lockspace(char *name, struct user_ls **ls, int flags) namelen = strlen(name)+strlen(name_prefix)+2; - newls = kmalloc(sizeof(struct user_ls), GFP_KERNEL); + newls = kzalloc(sizeof(struct user_ls), GFP_KERNEL); if (!newls) return -ENOMEM; - memset(newls, 0, sizeof(struct user_ls)); - newls->ls_miscinfo.name = kmalloc(namelen, GFP_KERNEL); + newls->ls_miscinfo.name = kzalloc(namelen, GFP_KERNEL); if (!newls->ls_miscinfo.name) { kfree(newls); return -ENOMEM; @@ -277,7 +277,7 @@ static int register_lockspace(char *name, struct user_ls **ls, int flags) return 0; } -/* Called with the user_ls_lock semaphore held */ +/* Called with the user_ls_lock mutex held */ static int unregister_lockspace(struct user_ls *lsinfo, int force) { int status; @@ -305,11 +305,10 @@ static int unregister_lockspace(struct user_ls *lsinfo, int force) static void add_to_astqueue(struct lock_info *li, void *astaddr, void *astparam, int lvb_updated) { - struct ast_info *ast = kmalloc(sizeof(struct ast_info), GFP_KERNEL); + struct ast_info *ast = kzalloc(sizeof(struct ast_info), GFP_KERNEL); if (!ast) return; - memset(ast, 0, sizeof(*ast)); ast->result.user_astparam = astparam; ast->result.user_astaddr = astaddr; ast->result.user_lksb = li->li_user_lksb; @@ -382,6 +381,7 @@ static void ast_routine(void *param) spin_lock(&li->li_file->fi_li_lock); list_del(&li->li_ownerqueue); + clear_bit(LI_FLAG_ONLIST, &li->li_flags); spin_unlock(&li->li_file->fi_li_lock); release_lockinfo(li); return; @@ -437,7 +437,7 @@ static int dlm_open(struct inode *inode, struct file *file) if (!lsinfo) return -ENOENT; - f = kmalloc(sizeof(struct file_info), GFP_KERNEL); + f = kzalloc(sizeof(struct file_info), GFP_KERNEL); if (!f) return -ENOMEM; @@ -570,7 +570,7 @@ static int dlm_close(struct inode *inode, struct file *file) * then free the struct. If it's an AUTOFREE lockspace * then free the whole thing. */ - down(&user_ls_lock); + mutex_lock(&user_ls_lock); if (atomic_dec_and_test(&lsinfo->ls_refcnt)) { if (lsinfo->ls_lockspace) { @@ -582,7 +582,7 @@ static int dlm_close(struct inode *inode, struct file *file) kfree(lsinfo); } } - up(&user_ls_lock); + mutex_unlock(&user_ls_lock); put_file_info(f); /* Restore signals */ @@ -620,10 +620,10 @@ static int do_user_remove_lockspace(struct file_info *fi, uint8_t cmd, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - down(&user_ls_lock); + mutex_lock(&user_ls_lock); lsinfo = __find_lockspace(kparams->minor); if (!lsinfo) { - up(&user_ls_lock); + mutex_unlock(&user_ls_lock); return -EINVAL; } @@ -631,7 +631,7 @@ static int do_user_remove_lockspace(struct file_info *fi, uint8_t cmd, force = 2; status = unregister_lockspace(lsinfo, force); - up(&user_ls_lock); + mutex_unlock(&user_ls_lock); return status; } @@ -752,7 +752,7 @@ static struct lock_info *allocate_lockinfo(struct file_info *fi, uint8_t cmd, if (!try_module_get(THIS_MODULE)) return NULL; - li = kmalloc(sizeof(struct lock_info), GFP_KERNEL); + li = kzalloc(sizeof(struct lock_info), GFP_KERNEL); if (li) { li->li_magic = LOCKINFO_MAGIC; li->li_file = fi; @@ -800,8 +800,10 @@ static int do_user_lock(struct file_info *fi, uint8_t cmd, /* If this is a persistent lock we will have to create a lockinfo again */ - if (!li && DLM_LKF_PERSISTENT) { + if (!li && (kparams->flags & DLM_LKF_PERSISTENT)) { li = allocate_lockinfo(fi, cmd, kparams); + if (!li) + return -ENOMEM; li->li_lksb.sb_lkid = kparams->lkid; li->li_castaddr = kparams->castaddr; @@ -887,6 +889,7 @@ static int do_user_lock(struct file_info *fi, uint8_t cmd, spin_lock(&fi->fi_li_lock); list_add(&li->li_ownerqueue, &fi->fi_li_list); + set_bit(LI_FLAG_ONLIST, &li->li_flags); spin_unlock(&fi->fi_li_lock); if (add_lockinfo(li)) printk(KERN_WARNING "Add lockinfo failed\n"); @@ -914,12 +917,13 @@ static int do_user_unlock(struct file_info *fi, uint8_t cmd, li = get_lockinfo(kparams->lkid); if (!li) { li = allocate_lockinfo(fi, cmd, kparams); + if (!li) + return -ENOMEM; spin_lock(&fi->fi_li_lock); list_add(&li->li_ownerqueue, &fi->fi_li_list); + set_bit(LI_FLAG_ONLIST, &li->li_flags); spin_unlock(&fi->fi_li_lock); } - if (!li) - return -ENOMEM; if (li->li_magic != LOCKINFO_MAGIC) return -EINVAL; @@ -932,6 +936,12 @@ static int do_user_unlock(struct file_info *fi, uint8_t cmd, if (kparams->flags & DLM_LKF_CANCEL && li->li_grmode != -1) convert_cancel = 1; + /* Wait until dlm_lock() has completed */ + if (!test_bit(LI_FLAG_ONLIST, &li->li_flags)) { + down(&li->li_firstlock); + up(&li->li_firstlock); + } + /* dlm_unlock() passes a 0 for castaddr which means don't overwrite the existing li_castaddr as that's the completion routine for unlocks. dlm_unlock_wait() specifies a new AST routine to be @@ -947,6 +957,7 @@ static int do_user_unlock(struct file_info *fi, uint8_t cmd, if (!status && !convert_cancel) { spin_lock(&fi->fi_li_lock); list_del(&li->li_ownerqueue); + clear_bit(LI_FLAG_ONLIST, &li->li_flags); spin_unlock(&fi->fi_li_lock); } @@ -1055,7 +1066,7 @@ static int __init dlm_device_init(void) int r; INIT_LIST_HEAD(&user_ls_list); - init_MUTEX(&user_ls_lock); + mutex_init(&user_ls_lock); rwlock_init(&lockinfo_lock); ctl_device.name = "dlm-control"; diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index 0f1dde54bcd279..46754553fdcc4a 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c @@ -33,7 +33,7 @@ static void put_free_de(struct dlm_ls *ls, struct dlm_direntry *de) static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len) { - int found = FALSE; + int found = 0; struct dlm_direntry *de; spin_lock(&ls->ls_recover_list_lock); @@ -42,7 +42,7 @@ static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len) list_del(&de->list); de->master_nodeid = 0; memset(de->name, 0, len); - found = TRUE; + found = 1; break; } } diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 0020cd07baf708..16f20cfd9197c9 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -42,20 +43,6 @@ #define DLM_LOCKSPACE_LEN 64 -#ifndef TRUE -#define TRUE 1 -#endif - -#ifndef FALSE -#define FALSE 0 -#endif - -#if (BITS_PER_LONG == 64) -#define PRIx64 "lx" -#else -#define PRIx64 "Lx" -#endif - /* Size of the temp buffer midcomms allocates on the stack. We try to make this large enough so most messages fit. FIXME: should sctp make this unnecessary? */ @@ -266,7 +253,7 @@ struct dlm_lkb { struct dlm_rsb { struct dlm_ls *res_ls; /* the lockspace */ struct kref res_ref; - struct semaphore res_sem; + struct mutex res_mutex; unsigned long res_flags; int res_length; /* length of rsb name */ int res_nodeid; @@ -449,7 +436,7 @@ struct dlm_ls { struct dlm_dirtable *ls_dirtbl; uint32_t ls_dirtbl_size; - struct semaphore ls_waiters_sem; + struct mutex ls_waiters_mutex; struct list_head ls_waiters; /* lkbs needing a reply */ struct list_head ls_nodes; /* current nodes in ls */ @@ -472,14 +459,14 @@ struct dlm_ls { struct timer_list ls_timer; struct task_struct *ls_recoverd_task; - struct semaphore ls_recoverd_active; + struct mutex ls_recoverd_active; spinlock_t ls_recover_lock; uint32_t ls_recover_status; /* DLM_RS_ */ uint64_t ls_recover_seq; struct dlm_recover *ls_recover_args; struct rw_semaphore ls_in_recovery; /* block local requests */ struct list_head ls_requestqueue;/* queue remote requests */ - struct semaphore ls_requestqueue_lock; + struct mutex ls_requestqueue_mutex; char *ls_recover_buf; struct list_head ls_recover_list; spinlock_t ls_recover_list_lock; diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 81efb361f95df9..29d3b95dbb6369 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -215,15 +215,15 @@ static inline int is_master_copy(struct dlm_lkb *lkb) { if (lkb->lkb_flags & DLM_IFL_MSTCPY) DLM_ASSERT(lkb->lkb_nodeid, dlm_print_lkb(lkb);); - return (lkb->lkb_flags & DLM_IFL_MSTCPY) ? TRUE : FALSE; + return (lkb->lkb_flags & DLM_IFL_MSTCPY) ? 1 : 0; } static inline int middle_conversion(struct dlm_lkb *lkb) { if ((lkb->lkb_grmode==DLM_LOCK_PR && lkb->lkb_rqmode==DLM_LOCK_CW) || (lkb->lkb_rqmode==DLM_LOCK_PR && lkb->lkb_grmode==DLM_LOCK_CW)) - return TRUE; - return FALSE; + return 1; + return 0; } static inline int down_conversion(struct dlm_lkb *lkb) @@ -269,7 +269,7 @@ static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len) r->res_ls = ls; r->res_length = len; memcpy(r->res_name, name, len); - init_MUTEX(&r->res_sem); + mutex_init(&r->res_mutex); INIT_LIST_HEAD(&r->res_lookup); INIT_LIST_HEAD(&r->res_grantqueue); @@ -712,7 +712,7 @@ static void add_to_waiters(struct dlm_lkb *lkb, int mstype) { struct dlm_ls *ls = lkb->lkb_resource->res_ls; - down(&ls->ls_waiters_sem); + mutex_lock(&ls->ls_waiters_mutex); if (lkb->lkb_wait_type) { log_print("add_to_waiters error %d", lkb->lkb_wait_type); goto out; @@ -721,7 +721,7 @@ static void add_to_waiters(struct dlm_lkb *lkb, int mstype) kref_get(&lkb->lkb_ref); list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); out: - up(&ls->ls_waiters_sem); + mutex_unlock(&ls->ls_waiters_mutex); } static int _remove_from_waiters(struct dlm_lkb *lkb) @@ -745,9 +745,9 @@ static int remove_from_waiters(struct dlm_lkb *lkb) struct dlm_ls *ls = lkb->lkb_resource->res_ls; int error; - down(&ls->ls_waiters_sem); + mutex_lock(&ls->ls_waiters_mutex); error = _remove_from_waiters(lkb); - up(&ls->ls_waiters_sem); + mutex_unlock(&ls->ls_waiters_mutex); return error; } @@ -775,14 +775,14 @@ static int shrink_bucket(struct dlm_ls *ls, int b) int count = 0, found; for (;;) { - found = FALSE; + found = 0; write_lock(&ls->ls_rsbtbl[b].lock); list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss, res_hashchain) { if (!time_after_eq(jiffies, r->res_toss_time + dlm_config.toss_secs * HZ)) continue; - found = TRUE; + found = 1; break; } @@ -1027,9 +1027,9 @@ static inline int first_in_list(struct dlm_lkb *lkb, struct list_head *head) struct dlm_lkb *first = list_entry(head->next, struct dlm_lkb, lkb_statequeue); if (lkb->lkb_id == first->lkb_id) - return TRUE; + return 1; - return FALSE; + return 0; } /* Return 1 if the locks' ranges overlap. If the lkb has no range then it is @@ -1038,13 +1038,13 @@ static inline int first_in_list(struct dlm_lkb *lkb, struct list_head *head) static inline int ranges_overlap(struct dlm_lkb *lkb1, struct dlm_lkb *lkb2) { if (!lkb1->lkb_range || !lkb2->lkb_range) - return TRUE; + return 1; if (lkb1->lkb_range[RQ_RANGE_END] < lkb2->lkb_range[GR_RANGE_START] || lkb1->lkb_range[RQ_RANGE_START] > lkb2->lkb_range[GR_RANGE_END]) - return FALSE; + return 0; - return TRUE; + return 1; } /* Check if the given lkb conflicts with another lkb on the queue. */ @@ -1057,9 +1057,9 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb) if (this == lkb) continue; if (ranges_overlap(lkb, this) && !modes_compat(this, lkb)) - return TRUE; + return 1; } - return FALSE; + return 0; } /* @@ -1103,7 +1103,7 @@ static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb) continue; if (!modes_compat(this, lkb) && !modes_compat(lkb, this)) - return TRUE; + return 1; } /* if lkb is on the convert queue and is preventing the first @@ -1114,10 +1114,10 @@ static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb) if (self && self != first) { if (!modes_compat(lkb, first) && !queue_conflict(&rsb->res_grantqueue, first)) - return TRUE; + return 1; } - return FALSE; + return 0; } /* @@ -1157,7 +1157,7 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) */ if (lkb->lkb_exflags & DLM_LKF_EXPEDITE) - return TRUE; + return 1; /* * A shortcut. Without this, !queue_conflict(grantqueue, lkb) would be @@ -1200,7 +1200,7 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) */ if (now && conv && !(lkb->lkb_exflags & DLM_LKF_QUECVT)) - return TRUE; + return 1; /* * When using range locks the NOORDER flag is set to avoid the standard @@ -1208,7 +1208,7 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) */ if (lkb->lkb_exflags & DLM_LKF_NOORDER) - return TRUE; + return 1; /* * 6-3: Once in that queue [CONVERTING], a conversion request cannot be @@ -1217,7 +1217,7 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) */ if (!now && conv && first_in_list(lkb, &r->res_convertqueue)) - return TRUE; + return 1; /* * 6-4: By default, a new request is immediately granted only if all @@ -1232,7 +1232,7 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) if (now && !conv && list_empty(&r->res_convertqueue) && list_empty(&r->res_waitqueue)) - return TRUE; + return 1; /* * 6-4: Once a lock request is in the queue of ungranted new requests, @@ -1244,7 +1244,7 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) if (!now && !conv && list_empty(&r->res_convertqueue) && first_in_list(lkb, &r->res_waitqueue)) - return TRUE; + return 1; out: /* @@ -1257,7 +1257,7 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) lkb->lkb_sbflags |= DLM_SBF_DEMOTED; } - return FALSE; + return 0; } /* @@ -1308,7 +1308,7 @@ static int grant_pending_convert(struct dlm_rsb *r, int high) list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) { demoted = is_demoted(lkb); - if (can_be_granted(r, lkb, FALSE)) { + if (can_be_granted(r, lkb, 0)) { grant_lock_pending(r, lkb); grant_restart = 1; } else { @@ -1333,7 +1333,7 @@ static int grant_pending_wait(struct dlm_rsb *r, int high) struct dlm_lkb *lkb, *s; list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) { - if (can_be_granted(r, lkb, FALSE)) + if (can_be_granted(r, lkb, 0)) grant_lock_pending(r, lkb); else high = max_t(int, lkb->lkb_rqmode, high); @@ -1705,7 +1705,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb) { int error = 0; - if (can_be_granted(r, lkb, TRUE)) { + if (can_be_granted(r, lkb, 1)) { grant_lock(r, lkb); queue_cast(r, lkb, 0); goto out; @@ -1733,7 +1733,7 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) /* changing an existing lock may allow others to be granted */ - if (can_be_granted(r, lkb, TRUE)) { + if (can_be_granted(r, lkb, 1)) { grant_lock(r, lkb); queue_cast(r, lkb, 0); grant_pending_locks(r); @@ -2556,7 +2556,7 @@ static void receive_convert(struct dlm_ls *ls, struct dlm_message *ms) { struct dlm_lkb *lkb; struct dlm_rsb *r; - int error, reply = TRUE; + int error, reply = 1; error = find_lkb(ls, ms->m_remid, &lkb); if (error) @@ -3205,7 +3205,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) { struct dlm_lkb *lkb, *safe; - down(&ls->ls_waiters_sem); + mutex_lock(&ls->ls_waiters_mutex); list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) { log_debug(ls, "pre recover waiter lkid %x type %d flags %x", @@ -3253,7 +3253,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) lkb->lkb_wait_type); } } - up(&ls->ls_waiters_sem); + mutex_unlock(&ls->ls_waiters_mutex); } static int remove_resend_waiter(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) @@ -3261,7 +3261,7 @@ static int remove_resend_waiter(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) struct dlm_lkb *lkb; int rv = 0; - down(&ls->ls_waiters_sem); + mutex_lock(&ls->ls_waiters_mutex); list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) { if (lkb->lkb_flags & DLM_IFL_RESEND) { rv = lkb->lkb_wait_type; @@ -3270,7 +3270,7 @@ static int remove_resend_waiter(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) break; } } - up(&ls->ls_waiters_sem); + mutex_unlock(&ls->ls_waiters_mutex); if (!rv) lkb = NULL; diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 9e6499f773da77..bffab9c88b1daf 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -38,12 +38,12 @@ static inline int is_master(struct dlm_rsb *r) static inline void lock_rsb(struct dlm_rsb *r) { - down(&r->res_sem); + mutex_lock(&r->res_mutex); } static inline void unlock_rsb(struct dlm_rsb *r) { - up(&r->res_sem); + mutex_unlock(&r->res_mutex); } #endif diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index fee4659b6582f2..d2ff505d51cd45 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -31,7 +31,7 @@ static inline void dlm_delete_debug_file(struct dlm_ls *ls) { } #endif static int ls_count; -static struct semaphore ls_lock; +static struct mutex ls_lock; static struct list_head lslist; static spinlock_t lslist_lock; static struct task_struct * scand_task; @@ -177,7 +177,7 @@ int dlm_lockspace_init(void) int error; ls_count = 0; - init_MUTEX(&ls_lock); + mutex_init(&ls_lock); INIT_LIST_HEAD(&lslist); spin_lock_init(&lslist_lock); @@ -351,10 +351,9 @@ static int new_lockspace(char *name, int namelen, void **lockspace, return -EEXIST; } - ls = kmalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL); + ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL); if (!ls) goto out; - memset(ls, 0, sizeof(struct dlm_ls) + namelen); memcpy(ls->ls_name, name, namelen); ls->ls_namelen = namelen; ls->ls_exflags = flags; @@ -398,7 +397,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, } INIT_LIST_HEAD(&ls->ls_waiters); - init_MUTEX(&ls->ls_waiters_sem); + mutex_init(&ls->ls_waiters_mutex); INIT_LIST_HEAD(&ls->ls_nodes); INIT_LIST_HEAD(&ls->ls_nodes_gone); @@ -416,14 +415,14 @@ static int new_lockspace(char *name, int namelen, void **lockspace, ls->ls_uevent_result = 0; ls->ls_recoverd_task = NULL; - init_MUTEX(&ls->ls_recoverd_active); + mutex_init(&ls->ls_recoverd_active); spin_lock_init(&ls->ls_recover_lock); ls->ls_recover_status = 0; ls->ls_recover_seq = 0; ls->ls_recover_args = NULL; init_rwsem(&ls->ls_in_recovery); INIT_LIST_HEAD(&ls->ls_requestqueue); - init_MUTEX(&ls->ls_requestqueue_lock); + mutex_init(&ls->ls_requestqueue_mutex); ls->ls_recover_buf = kmalloc(dlm_config.buffer_size, GFP_KERNEL); if (!ls->ls_recover_buf) @@ -493,7 +492,7 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace, { int error = 0; - down(&ls_lock); + mutex_lock(&ls_lock); if (!ls_count) error = threads_start(); if (error) @@ -503,7 +502,7 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace, if (!error) ls_count++; out: - up(&ls_lock); + mutex_unlock(&ls_lock); return error; } @@ -629,11 +628,11 @@ static int release_lockspace(struct dlm_ls *ls, int force) kobject_unregister(&ls->ls_kobj); kfree(ls); - down(&ls_lock); + mutex_lock(&ls_lock); ls_count--; if (!ls_count) threads_stop(); - up(&ls_lock); + mutex_unlock(&ls_lock); module_put(THIS_MODULE); return 0; diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 439249b62a574a..926cd0cb6bffff 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -52,7 +52,7 @@ static int dlm_add_member(struct dlm_ls *ls, int nodeid) struct dlm_member *memb; int w; - memb = kmalloc(sizeof(struct dlm_member), GFP_KERNEL); + memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL); if (!memb) return -ENOMEM; @@ -79,9 +79,9 @@ static int dlm_is_member(struct dlm_ls *ls, int nodeid) list_for_each_entry(memb, &ls->ls_nodes, list) { if (memb->nodeid == nodeid) - return TRUE; + return 1; } - return FALSE; + return 0; } int dlm_is_removed(struct dlm_ls *ls, int nodeid) @@ -90,9 +90,9 @@ int dlm_is_removed(struct dlm_ls *ls, int nodeid) list_for_each_entry(memb, &ls->ls_nodes_gone, list) { if (memb->nodeid == nodeid) - return TRUE; + return 1; } - return FALSE; + return 0; } static void clear_memb_list(struct list_head *head) @@ -178,10 +178,10 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) /* move departed members from ls_nodes to ls_nodes_gone */ list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) { - found = FALSE; + found = 0; for (i = 0; i < rv->node_count; i++) { if (memb->nodeid == rv->nodeids[i]) { - found = TRUE; + found = 1; break; } } @@ -271,10 +271,9 @@ int dlm_ls_start(struct dlm_ls *ls) int *ids = NULL; int error, count; - rv = kmalloc(sizeof(struct dlm_recover), GFP_KERNEL); + rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL); if (!rv) return -ENOMEM; - memset(rv, 0, sizeof(struct dlm_recover)); error = count = dlm_nodeid_list(ls->ls_name, &ids); if (error <= 0) diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c index d96f9bbb407c4f..c9b1c3d535f4c0 100644 --- a/fs/dlm/midcomms.c +++ b/fs/dlm/midcomms.c @@ -119,7 +119,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base, switch (msg->h_cmd) { case DLM_MSG: - dlm_receive_message(msg, nodeid, FALSE); + dlm_receive_message(msg, nodeid, 0); break; case DLM_RCOM: diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index 1712c97bc22937..b036ee7dcb32ae 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -420,7 +420,7 @@ int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc) r = recover_list_find(ls, rc->rc_id); if (!r) { - log_error(ls, "dlm_recover_master_reply no id %"PRIx64"", + log_error(ls, "dlm_recover_master_reply no id %llx", rc->rc_id); goto out; } @@ -477,8 +477,8 @@ static int all_queues_empty(struct dlm_rsb *r) if (!list_empty(&r->res_grantqueue) || !list_empty(&r->res_convertqueue) || !list_empty(&r->res_waitqueue)) - return FALSE; - return TRUE; + return 0; + return 1; } static int recover_locks(struct dlm_rsb *r) @@ -586,18 +586,18 @@ static void recover_lvb(struct dlm_rsb *r) { struct dlm_lkb *lkb, *high_lkb = NULL; uint32_t high_seq = 0; - int lock_lvb_exists = FALSE; - int big_lock_exists = FALSE; + int lock_lvb_exists = 0; + int big_lock_exists = 0; int lvblen = r->res_ls->ls_lvblen; list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) { if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) continue; - lock_lvb_exists = TRUE; + lock_lvb_exists = 1; if (lkb->lkb_grmode > DLM_LOCK_CR) { - big_lock_exists = TRUE; + big_lock_exists = 1; goto setflag; } @@ -611,10 +611,10 @@ static void recover_lvb(struct dlm_rsb *r) if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) continue; - lock_lvb_exists = TRUE; + lock_lvb_exists = 1; if (lkb->lkb_grmode > DLM_LOCK_CR) { - big_lock_exists = TRUE; + big_lock_exists = 1; goto setflag; } diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 06e4f7cab6e774..70103533677d91 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c @@ -45,9 +45,9 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) unsigned long start; int error, neg = 0; - log_debug(ls, "recover %"PRIx64"", rv->seq); + log_debug(ls, "recover %llx", rv->seq); - down(&ls->ls_recoverd_active); + mutex_lock(&ls->ls_recoverd_active); /* * Suspending and resuming dlm_astd ensures that no lkb's from this ls @@ -199,16 +199,16 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) dlm_astd_wake(); - log_debug(ls, "recover %"PRIx64" done: %u ms", rv->seq, + log_debug(ls, "recover %llx done: %u ms", rv->seq, jiffies_to_msecs(jiffies - start)); - up(&ls->ls_recoverd_active); + mutex_unlock(&ls->ls_recoverd_active); return 0; fail: dlm_release_root_list(ls); - log_debug(ls, "recover %"PRIx64" error %d", rv->seq, error); - up(&ls->ls_recoverd_active); + log_debug(ls, "recover %llx error %d", rv->seq, error); + mutex_unlock(&ls->ls_recoverd_active); return error; } @@ -275,11 +275,11 @@ void dlm_recoverd_stop(struct dlm_ls *ls) void dlm_recoverd_suspend(struct dlm_ls *ls) { - down(&ls->ls_recoverd_active); + mutex_lock(&ls->ls_recoverd_active); } void dlm_recoverd_resume(struct dlm_ls *ls) { - up(&ls->ls_recoverd_active); + mutex_unlock(&ls->ls_recoverd_active); } diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c index 36afe99e4f93e1..7b2b089634a2df 100644 --- a/fs/dlm/requestqueue.c +++ b/fs/dlm/requestqueue.c @@ -47,9 +47,9 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) e->nodeid = nodeid; memcpy(e->request, hd, length); - down(&ls->ls_requestqueue_lock); + mutex_lock(&ls->ls_requestqueue_mutex); list_add_tail(&e->list, &ls->ls_requestqueue); - up(&ls->ls_requestqueue_lock); + mutex_unlock(&ls->ls_requestqueue_mutex); } int dlm_process_requestqueue(struct dlm_ls *ls) @@ -58,19 +58,19 @@ int dlm_process_requestqueue(struct dlm_ls *ls) struct dlm_header *hd; int error = 0; - down(&ls->ls_requestqueue_lock); + mutex_lock(&ls->ls_requestqueue_mutex); for (;;) { if (list_empty(&ls->ls_requestqueue)) { - up(&ls->ls_requestqueue_lock); + mutex_unlock(&ls->ls_requestqueue_mutex); error = 0; break; } e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list); - up(&ls->ls_requestqueue_lock); + mutex_unlock(&ls->ls_requestqueue_mutex); hd = (struct dlm_header *) e->request; - error = dlm_receive_message(hd, e->nodeid, TRUE); + error = dlm_receive_message(hd, e->nodeid, 1); if (error == -EINTR) { /* entry is left on requestqueue */ @@ -78,13 +78,13 @@ int dlm_process_requestqueue(struct dlm_ls *ls) break; } - down(&ls->ls_requestqueue_lock); + mutex_lock(&ls->ls_requestqueue_mutex); list_del(&e->list); kfree(e); if (dlm_locking_stopped(ls)) { log_debug(ls, "process_requestqueue abort running"); - up(&ls->ls_requestqueue_lock); + mutex_unlock(&ls->ls_requestqueue_mutex); error = -EINTR; break; } @@ -105,15 +105,15 @@ int dlm_process_requestqueue(struct dlm_ls *ls) void dlm_wait_requestqueue(struct dlm_ls *ls) { for (;;) { - down(&ls->ls_requestqueue_lock); + mutex_lock(&ls->ls_requestqueue_mutex); if (list_empty(&ls->ls_requestqueue)) break; if (dlm_locking_stopped(ls)) break; - up(&ls->ls_requestqueue_lock); + mutex_unlock(&ls->ls_requestqueue_mutex); schedule(); } - up(&ls->ls_requestqueue_lock); + mutex_unlock(&ls->ls_requestqueue_mutex); } static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid) @@ -170,7 +170,7 @@ void dlm_purge_requestqueue(struct dlm_ls *ls) struct dlm_message *ms; struct rq_entry *e, *safe; - down(&ls->ls_requestqueue_lock); + mutex_lock(&ls->ls_requestqueue_mutex); list_for_each_entry_safe(e, safe, &ls->ls_requestqueue, list) { ms = (struct dlm_message *) e->request; @@ -179,6 +179,6 @@ void dlm_purge_requestqueue(struct dlm_ls *ls) kfree(e); } } - up(&ls->ls_requestqueue_lock); + mutex_unlock(&ls->ls_requestqueue_mutex); } -- cgit 1.2.3-korg From 3bcd3687f895f178fa8480a7bcc47a363817354a Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 23 Feb 2006 09:56:38 +0000 Subject: [DLM] Remove range locks from the DLM This patch removes support for range locking from the DLM Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/dlm/debug_fs.c | 14 ------ fs/dlm/device.c | 6 +-- fs/dlm/dlm_internal.h | 12 ------ fs/dlm/lock.c | 103 ++++----------------------------------------- fs/dlm/memory.c | 16 ------- fs/dlm/memory.h | 2 - fs/dlm/rcom.c | 3 -- fs/dlm/util.c | 12 ------ include/linux/dlm.h | 12 +----- include/linux/dlm_device.h | 3 +- 10 files changed, 13 insertions(+), 170 deletions(-) (limited to 'fs/dlm/lock.c') diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index 5080bbffd586a7..49deca845dbaf6 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -59,20 +59,6 @@ static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, || lkb->lkb_status == DLM_LKSTS_WAITING) seq_printf(s, " (%s)", print_lockmode(lkb->lkb_rqmode)); - if (lkb->lkb_range) { - /* FIXME: this warns on Alpha */ - if (lkb->lkb_status == DLM_LKSTS_CONVERT - || lkb->lkb_status == DLM_LKSTS_GRANTED) - seq_printf(s, " %llx-%llx", - lkb->lkb_range[GR_RANGE_START], - lkb->lkb_range[GR_RANGE_END]); - if (lkb->lkb_status == DLM_LKSTS_CONVERT - || lkb->lkb_status == DLM_LKSTS_WAITING) - seq_printf(s, " (%llx-%llx)", - lkb->lkb_range[RQ_RANGE_START], - lkb->lkb_range[RQ_RANGE_END]); - } - if (lkb->lkb_nodeid) { if (lkb->lkb_nodeid != res->res_nodeid) seq_printf(s, " Remote: %3d %08x", lkb->lkb_nodeid, diff --git a/fs/dlm/device.c b/fs/dlm/device.c index 899d4f92a4d714..99d8b6b07fbafc 100644 --- a/fs/dlm/device.c +++ b/fs/dlm/device.c @@ -532,8 +532,7 @@ static int dlm_close(struct inode *inode, struct file *file) status = dlm_lock(f->fi_ls->ls_lockspace, old_li->li_grmode, &li.li_lksb, DLM_LKF_CONVERT|DLM_LKF_ORPHAN, - NULL, 0, 0, ast_routine, NULL, - NULL, NULL); + NULL, 0, 0, ast_routine, NULL, NULL); if (status != 0) printk("dlm: Error orphaning lock %x: %d\n", old_li->li_lksb.sb_lkid, status); @@ -878,8 +877,7 @@ static int do_user_lock(struct file_info *fi, uint8_t cmd, ast_routine, li, (li->li_pend_bastaddr || li->li_bastaddr) ? - bast_routine : NULL, - kparams->range.ra_end ? &kparams->range : NULL); + bast_routine : NULL); if (status) goto out_err; diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 16f20cfd9197c9..c3299020c8f318 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -146,7 +146,6 @@ struct dlm_args { void *bastaddr; int mode; struct dlm_lksb *lksb; - struct dlm_range *range; }; @@ -195,13 +194,6 @@ struct dlm_args { #define AST_COMP 1 #define AST_BAST 2 -/* lkb_range[] */ - -#define GR_RANGE_START 0 -#define GR_RANGE_END 1 -#define RQ_RANGE_START 2 -#define RQ_RANGE_END 3 - /* lkb_status */ #define DLM_LKSTS_WAITING 1 @@ -212,7 +204,6 @@ struct dlm_args { #define DLM_IFL_MSTCPY 0x00010000 #define DLM_IFL_RESEND 0x00020000 -#define DLM_IFL_RANGE 0x00000001 struct dlm_lkb { struct dlm_rsb *lkb_resource; /* the rsb */ @@ -241,7 +232,6 @@ struct dlm_lkb { struct list_head lkb_wait_reply; /* waiting for remote reply */ struct list_head lkb_astqueue; /* need ast to be sent */ - uint64_t *lkb_range; /* array of gr/rq ranges */ char *lkb_lvbptr; struct dlm_lksb *lkb_lksb; /* caller's status block */ void *lkb_astaddr; /* caller's ast function */ @@ -360,7 +350,6 @@ struct dlm_message { int m_bastmode; int m_asts; int m_result; /* 0 or -EXXX */ - uint64_t m_range[2]; char m_extra[0]; /* name or lvb */ }; @@ -413,7 +402,6 @@ struct rcom_lock { int8_t rl_asts; uint16_t rl_wait_type; uint16_t rl_namelen; - uint64_t rl_range[4]; char rl_name[DLM_RESNAME_MAXLEN]; char rl_lvb[0]; }; diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 29d3b95dbb6369..80487703d58210 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -606,8 +606,6 @@ static int put_lkb(struct dlm_lkb *lkb) /* for local/process lkbs, lvbptr points to caller's lksb */ if (lkb->lkb_lvbptr && is_master_copy(lkb)) free_lvb(lkb->lkb_lvbptr); - if (lkb->lkb_range) - free_range(lkb->lkb_range); free_lkb(lkb); return 1; } else { @@ -988,11 +986,6 @@ static void _grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) } lkb->lkb_rqmode = DLM_LOCK_IV; - - if (lkb->lkb_range) { - lkb->lkb_range[GR_RANGE_START] = lkb->lkb_range[RQ_RANGE_START]; - lkb->lkb_range[GR_RANGE_END] = lkb->lkb_range[RQ_RANGE_END]; - } } static void grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) @@ -1032,21 +1025,6 @@ static inline int first_in_list(struct dlm_lkb *lkb, struct list_head *head) return 0; } -/* Return 1 if the locks' ranges overlap. If the lkb has no range then it is - assumed to cover 0-ffffffff.ffffffff */ - -static inline int ranges_overlap(struct dlm_lkb *lkb1, struct dlm_lkb *lkb2) -{ - if (!lkb1->lkb_range || !lkb2->lkb_range) - return 1; - - if (lkb1->lkb_range[RQ_RANGE_END] < lkb2->lkb_range[GR_RANGE_START] || - lkb1->lkb_range[RQ_RANGE_START] > lkb2->lkb_range[GR_RANGE_END]) - return 0; - - return 1; -} - /* Check if the given lkb conflicts with another lkb on the queue. */ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb) @@ -1056,7 +1034,7 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb) list_for_each_entry(this, head, lkb_statequeue) { if (this == lkb) continue; - if (ranges_overlap(lkb, this) && !modes_compat(this, lkb)) + if (!modes_compat(this, lkb)) return 1; } return 0; @@ -1099,9 +1077,6 @@ static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb) continue; } - if (!ranges_overlap(lkb, this)) - continue; - if (!modes_compat(this, lkb) && !modes_compat(lkb, this)) return 1; } @@ -1203,8 +1178,8 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) return 1; /* - * When using range locks the NOORDER flag is set to avoid the standard - * vms rules on grant order. + * The NOORDER flag is set to avoid the standard vms rules on grant + * order. */ if (lkb->lkb_exflags & DLM_LKF_NOORDER) @@ -1358,8 +1333,7 @@ static void grant_pending_locks(struct dlm_rsb *r) /* * If there are locks left on the wait/convert queue then send blocking * ASTs to granted locks based on the largest requested mode (high) - * found above. This can generate spurious blocking ASTs for range - * locks. FIXME: highbast < high comparison not valid for PR/CW. + * found above. FIXME: highbast < high comparison not valid for PR/CW. */ list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) { @@ -1379,7 +1353,7 @@ static void send_bast_queue(struct dlm_rsb *r, struct list_head *head, list_for_each_entry(gr, head, lkb_statequeue) { if (gr->lkb_bastaddr && gr->lkb_highbast < lkb->lkb_rqmode && - ranges_overlap(lkb, gr) && !modes_compat(gr, lkb)) { + !modes_compat(gr, lkb)) { queue_bast(r, gr, lkb->lkb_rqmode); gr->lkb_highbast = lkb->lkb_rqmode; } @@ -1530,8 +1504,7 @@ static void confirm_master(struct dlm_rsb *r, int error) static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags, int namelen, uint32_t parent_lkid, void *ast, - void *astarg, void *bast, struct dlm_range *range, - struct dlm_args *args) + void *astarg, void *bast, struct dlm_args *args) { int rv = -EINVAL; @@ -1590,7 +1563,6 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags, args->bastaddr = bast; args->mode = mode; args->lksb = lksb; - args->range = range; rv = 0; out: return rv; @@ -1637,26 +1609,6 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, lkb->lkb_lksb = args->lksb; lkb->lkb_lvbptr = args->lksb->sb_lvbptr; lkb->lkb_ownpid = (int) current->pid; - - rv = 0; - if (!args->range) - goto out; - - if (!lkb->lkb_range) { - rv = -ENOMEM; - lkb->lkb_range = allocate_range(ls); - if (!lkb->lkb_range) - goto out; - /* This is needed for conversions that contain ranges - where the original lock didn't but it's harmless for - new locks too. */ - lkb->lkb_range[GR_RANGE_START] = 0LL; - lkb->lkb_range[GR_RANGE_END] = 0xffffffffffffffffULL; - } - - lkb->lkb_range[RQ_RANGE_START] = args->range->ra_start; - lkb->lkb_range[RQ_RANGE_END] = args->range->ra_end; - lkb->lkb_flags |= DLM_IFL_RANGE; rv = 0; out: return rv; @@ -1805,7 +1757,7 @@ static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) return error; } -/* change some property of an existing lkb, e.g. mode, range */ +/* change some property of an existing lkb, e.g. mode */ static int _convert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) { @@ -1962,8 +1914,7 @@ int dlm_lock(dlm_lockspace_t *lockspace, uint32_t parent_lkid, void (*ast) (void *astarg), void *astarg, - void (*bast) (void *astarg, int mode), - struct dlm_range *range) + void (*bast) (void *astarg, int mode)) { struct dlm_ls *ls; struct dlm_lkb *lkb; @@ -1985,7 +1936,7 @@ int dlm_lock(dlm_lockspace_t *lockspace, goto out; error = set_lock_args(mode, lksb, flags, namelen, parent_lkid, ast, - astarg, bast, range, &args); + astarg, bast, &args); if (error) goto out_put; @@ -2154,11 +2105,6 @@ static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb, if (lkb->lkb_astaddr) ms->m_asts |= AST_COMP; - if (lkb->lkb_range) { - ms->m_range[0] = lkb->lkb_range[RQ_RANGE_START]; - ms->m_range[1] = lkb->lkb_range[RQ_RANGE_END]; - } - if (ms->m_type == DLM_MSG_REQUEST || ms->m_type == DLM_MSG_LOOKUP) memcpy(ms->m_extra, r->res_name, r->res_length); @@ -2402,20 +2348,6 @@ static int receive_extralen(struct dlm_message *ms) return (ms->m_header.h_length - sizeof(struct dlm_message)); } -static int receive_range(struct dlm_ls *ls, struct dlm_lkb *lkb, - struct dlm_message *ms) -{ - if (lkb->lkb_flags & DLM_IFL_RANGE) { - if (!lkb->lkb_range) - lkb->lkb_range = allocate_range(ls); - if (!lkb->lkb_range) - return -ENOMEM; - lkb->lkb_range[RQ_RANGE_START] = ms->m_range[0]; - lkb->lkb_range[RQ_RANGE_END] = ms->m_range[1]; - } - return 0; -} - static int receive_lvb(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_message *ms) { @@ -2445,9 +2377,6 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb, DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb);); - if (receive_range(ls, lkb, ms)) - return -ENOMEM; - if (receive_lvb(ls, lkb, ms)) return -ENOMEM; @@ -2470,13 +2399,6 @@ static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb, if (lkb->lkb_status != DLM_LKSTS_GRANTED) return -EBUSY; - if (receive_range(ls, lkb, ms)) - return -ENOMEM; - if (lkb->lkb_range) { - lkb->lkb_range[GR_RANGE_START] = 0LL; - lkb->lkb_range[GR_RANGE_END] = 0xffffffffffffffffULL; - } - if (receive_lvb(ls, lkb, ms)) return -ENOMEM; @@ -3476,13 +3398,6 @@ static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb, lkb->lkb_bastaddr = (void *) (long) (rl->rl_asts & AST_BAST); lkb->lkb_astaddr = (void *) (long) (rl->rl_asts & AST_COMP); - if (lkb->lkb_flags & DLM_IFL_RANGE) { - lkb->lkb_range = allocate_range(ls); - if (!lkb->lkb_range) - return -ENOMEM; - memcpy(lkb->lkb_range, rl->rl_range, 4*sizeof(uint64_t)); - } - if (lkb->lkb_exflags & DLM_LKF_VALBLK) { lkb->lkb_lvbptr = allocate_lvb(ls); if (!lkb->lkb_lvbptr) diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c index 0b9851d0bdb2d9..f7cf4589fae8fc 100644 --- a/fs/dlm/memory.c +++ b/fs/dlm/memory.c @@ -50,22 +50,6 @@ void free_lvb(char *p) kfree(p); } -uint64_t *allocate_range(struct dlm_ls *ls) -{ - int ralen = 4*sizeof(uint64_t); - uint64_t *p; - - p = kmalloc(ralen, GFP_KERNEL); - if (p) - memset(p, 0, ralen); - return p; -} - -void free_range(uint64_t *p) -{ - kfree(p); -} - /* FIXME: have some minimal space built-in to rsb for the name and kmalloc a separate name if needed, like dentries are done */ diff --git a/fs/dlm/memory.h b/fs/dlm/memory.h index 7b235132b0b437..6ead158ccc5c66 100644 --- a/fs/dlm/memory.h +++ b/fs/dlm/memory.h @@ -24,8 +24,6 @@ struct dlm_direntry *allocate_direntry(struct dlm_ls *ls, int namelen); void free_direntry(struct dlm_direntry *de); char *allocate_lvb(struct dlm_ls *ls); void free_lvb(char *l); -uint64_t *allocate_range(struct dlm_ls *ls); -void free_range(uint64_t *l); #endif /* __MEMORY_DOT_H__ */ diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 4c5c08a8860e92..55fbe313340e59 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -284,9 +284,6 @@ static void pack_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb, if (lkb->lkb_astaddr) rl->rl_asts |= AST_COMP; - if (lkb->lkb_range) - memcpy(rl->rl_range, lkb->lkb_range, 4*sizeof(uint64_t)); - rl->rl_namelen = r->res_length; memcpy(rl->rl_name, r->res_name, r->res_length); diff --git a/fs/dlm/util.c b/fs/dlm/util.c index 826d122edf559b..767197db994404 100644 --- a/fs/dlm/util.c +++ b/fs/dlm/util.c @@ -54,8 +54,6 @@ void dlm_message_out(struct dlm_message *ms) ms->m_bastmode = cpu_to_le32(ms->m_bastmode); ms->m_asts = cpu_to_le32(ms->m_asts); ms->m_result = cpu_to_le32(ms->m_result); - ms->m_range[0] = cpu_to_le64(ms->m_range[0]); - ms->m_range[1] = cpu_to_le64(ms->m_range[1]); } void dlm_message_in(struct dlm_message *ms) @@ -82,8 +80,6 @@ void dlm_message_in(struct dlm_message *ms) ms->m_bastmode = le32_to_cpu(ms->m_bastmode); ms->m_asts = le32_to_cpu(ms->m_asts); ms->m_result = le32_to_cpu(ms->m_result); - ms->m_range[0] = le64_to_cpu(ms->m_range[0]); - ms->m_range[1] = le64_to_cpu(ms->m_range[1]); } static void rcom_lock_out(struct rcom_lock *rl) @@ -99,10 +95,6 @@ static void rcom_lock_out(struct rcom_lock *rl) rl->rl_result = cpu_to_le32(rl->rl_result); rl->rl_wait_type = cpu_to_le16(rl->rl_wait_type); rl->rl_namelen = cpu_to_le16(rl->rl_namelen); - rl->rl_range[0] = cpu_to_le64(rl->rl_range[0]); - rl->rl_range[1] = cpu_to_le64(rl->rl_range[1]); - rl->rl_range[2] = cpu_to_le64(rl->rl_range[2]); - rl->rl_range[3] = cpu_to_le64(rl->rl_range[3]); } static void rcom_lock_in(struct rcom_lock *rl) @@ -118,10 +110,6 @@ static void rcom_lock_in(struct rcom_lock *rl) rl->rl_result = le32_to_cpu(rl->rl_result); rl->rl_wait_type = le16_to_cpu(rl->rl_wait_type); rl->rl_namelen = le16_to_cpu(rl->rl_namelen); - rl->rl_range[0] = le64_to_cpu(rl->rl_range[0]); - rl->rl_range[1] = le64_to_cpu(rl->rl_range[1]); - rl->rl_range[2] = le64_to_cpu(rl->rl_range[2]); - rl->rl_range[3] = le64_to_cpu(rl->rl_range[3]); } static void rcom_config_out(struct rcom_config *rf) diff --git a/include/linux/dlm.h b/include/linux/dlm.h index dd324ba44d8004..1b1dcb9a40bbab 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -159,15 +159,6 @@ typedef void dlm_lockspace_t; -/* - * Lock range structure - */ - -struct dlm_range { - uint64_t ra_start; - uint64_t ra_end; -}; - /* * Lock status block * @@ -277,8 +268,7 @@ int dlm_lock(dlm_lockspace_t *lockspace, uint32_t parent_lkid, void (*lockast) (void *astarg), void *astarg, - void (*bast) (void *astarg, int mode), - struct dlm_range *range); + void (*bast) (void *astarg, int mode)); /* * dlm_unlock diff --git a/include/linux/dlm_device.h b/include/linux/dlm_device.h index 5e17d295544b3c..f8ba1981aa96cf 100644 --- a/include/linux/dlm_device.h +++ b/include/linux/dlm_device.h @@ -18,7 +18,7 @@ #define DLM_USER_LVB_LEN 32 /* Version of the device interface */ -#define DLM_DEVICE_VERSION_MAJOR 3 +#define DLM_DEVICE_VERSION_MAJOR 4 #define DLM_DEVICE_VERSION_MINOR 0 #define DLM_DEVICE_VERSION_PATCH 0 @@ -28,7 +28,6 @@ struct dlm_lock_params { __u16 flags; __u32 lkid; __u32 parent; - struct dlm_range range; __u8 namelen; void __user *castparam; void __user *castaddr; -- cgit 1.2.3-korg From b3f58d8f2b1200f1b9abbcfb9dec6c25bc787469 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 28 Feb 2006 11:16:37 -0500 Subject: [DLM] Pass in lockspace to lkb put function In some cases a lockspace isn't attached to the lkb, so that it needs to be passed directly to the lkb put function. Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/dlm/lock.c | 54 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 23 deletions(-) (limited to 'fs/dlm/lock.c') diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 80487703d58210..85a0e73ba808cf 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -591,9 +591,11 @@ static void kill_lkb(struct kref *kref) DLM_ASSERT(!lkb->lkb_status, dlm_print_lkb(lkb);); } -static int put_lkb(struct dlm_lkb *lkb) +/* __put_lkb() is used when an lkb may not have an rsb attached to + it so we need to provide the lockspace explicitly */ + +static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb) { - struct dlm_ls *ls = lkb->lkb_resource->res_ls; uint16_t bucket = lkb->lkb_id & 0xFFFF; write_lock(&ls->ls_lkbtbl[bucket].lock); @@ -616,7 +618,13 @@ static int put_lkb(struct dlm_lkb *lkb) int dlm_put_lkb(struct dlm_lkb *lkb) { - return put_lkb(lkb); + struct dlm_ls *ls; + + DLM_ASSERT(lkb->lkb_resource, dlm_print_lkb(lkb);); + DLM_ASSERT(lkb->lkb_resource->res_ls, dlm_print_lkb(lkb);); + + ls = lkb->lkb_resource->res_ls; + return __put_lkb(ls, lkb); } /* This is only called to add a reference when the code already holds @@ -1949,7 +1957,7 @@ int dlm_lock(dlm_lockspace_t *lockspace, error = 0; out_put: if (convert || error) - put_lkb(lkb); + __put_lkb(ls, lkb); if (error == -EAGAIN) error = 0; out: @@ -1991,7 +1999,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace, if (error == -DLM_EUNLOCK || error == -DLM_ECANCEL) error = 0; out_put: - put_lkb(lkb); + dlm_put_lkb(lkb); out: unlock_recovery(ls); dlm_put_lockspace(ls); @@ -2442,7 +2450,7 @@ static void receive_request(struct dlm_ls *ls, struct dlm_message *ms) lkb->lkb_flags |= DLM_IFL_MSTCPY; error = receive_request_args(ls, lkb, ms); if (error) { - put_lkb(lkb); + __put_lkb(ls, lkb); goto fail; } @@ -2450,7 +2458,7 @@ static void receive_request(struct dlm_ls *ls, struct dlm_message *ms) error = find_rsb(ls, ms->m_extra, namelen, R_MASTER, &r); if (error) { - put_lkb(lkb); + __put_lkb(ls, lkb); goto fail; } @@ -2466,7 +2474,7 @@ static void receive_request(struct dlm_ls *ls, struct dlm_message *ms) if (error == -EINPROGRESS) error = 0; if (error) - put_lkb(lkb); + dlm_put_lkb(lkb); return; fail: @@ -2502,7 +2510,7 @@ static void receive_convert(struct dlm_ls *ls, struct dlm_message *ms) unlock_rsb(r); put_rsb(r); - put_lkb(lkb); + dlm_put_lkb(lkb); return; fail: @@ -2536,7 +2544,7 @@ static void receive_unlock(struct dlm_ls *ls, struct dlm_message *ms) unlock_rsb(r); put_rsb(r); - put_lkb(lkb); + dlm_put_lkb(lkb); return; fail: @@ -2566,7 +2574,7 @@ static void receive_cancel(struct dlm_ls *ls, struct dlm_message *ms) unlock_rsb(r); put_rsb(r); - put_lkb(lkb); + dlm_put_lkb(lkb); return; fail: @@ -2598,7 +2606,7 @@ static void receive_grant(struct dlm_ls *ls, struct dlm_message *ms) unlock_rsb(r); put_rsb(r); - put_lkb(lkb); + dlm_put_lkb(lkb); } static void receive_bast(struct dlm_ls *ls, struct dlm_message *ms) @@ -2623,7 +2631,7 @@ static void receive_bast(struct dlm_ls *ls, struct dlm_message *ms) unlock_rsb(r); put_rsb(r); - put_lkb(lkb); + dlm_put_lkb(lkb); } static void receive_lookup(struct dlm_ls *ls, struct dlm_message *ms) @@ -2746,7 +2754,7 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) unlock_rsb(r); put_rsb(r); out: - put_lkb(lkb); + dlm_put_lkb(lkb); } static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, @@ -2813,7 +2821,7 @@ static void receive_convert_reply(struct dlm_ls *ls, struct dlm_message *ms) _receive_convert_reply(lkb, ms); out: - put_lkb(lkb); + dlm_put_lkb(lkb); } static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms) @@ -2860,7 +2868,7 @@ static void receive_unlock_reply(struct dlm_ls *ls, struct dlm_message *ms) _receive_unlock_reply(lkb, ms); out: - put_lkb(lkb); + dlm_put_lkb(lkb); } static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms) @@ -2907,7 +2915,7 @@ static void receive_cancel_reply(struct dlm_ls *ls, struct dlm_message *ms) _receive_cancel_reply(lkb, ms); out: - put_lkb(lkb); + dlm_put_lkb(lkb); } static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) @@ -2954,7 +2962,7 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) unlock_rsb(r); put_rsb(r); out: - put_lkb(lkb); + dlm_put_lkb(lkb); } int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) @@ -3159,7 +3167,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) ls->ls_stub_ms.m_result = -DLM_EUNLOCK; _remove_from_waiters(lkb); _receive_unlock_reply(lkb, &ls->ls_stub_ms); - put_lkb(lkb); + dlm_put_lkb(lkb); break; case DLM_MSG_CANCEL: @@ -3167,7 +3175,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) ls->ls_stub_ms.m_result = -DLM_ECANCEL; _remove_from_waiters(lkb); _receive_cancel_reply(lkb, &ls->ls_stub_ms); - put_lkb(lkb); + dlm_put_lkb(lkb); break; default: @@ -3272,7 +3280,7 @@ static void purge_queue(struct dlm_rsb *r, struct list_head *queue, if (test(ls, lkb)) { del_lkb(r, lkb); /* this put should free the lkb */ - if (!put_lkb(lkb)) + if (!dlm_put_lkb(lkb)) log_error(ls, "purged lkb not released"); } } @@ -3456,7 +3464,7 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc) error = receive_rcom_lock_args(ls, lkb, r, rc); if (error) { - put_lkb(lkb); + __put_lkb(ls, lkb); goto out_unlock; } @@ -3518,7 +3526,7 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) unlock_rsb(r); put_rsb(r); - put_lkb(lkb); + dlm_put_lkb(lkb); return 0; } -- cgit 1.2.3-korg From 97a35d1e5fab9ff8de27814082b78b2fc9ad94f0 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 2 May 2006 13:34:03 -0400 Subject: [DLM] fix grant_after_purge softlockup In dlm_grant_after_purge() we were holding a hash table read_lock while calling put_rsb() which potentially removes the rsb from the hash table, taking the same lock in write. Fix this by flagging rsb's ahead of time that have been purged. Then iteratively read_lock the hash table, find a flagged rsb, unlock, process rsb. Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/dlm/dlm_internal.h | 1 + fs/dlm/lock.c | 42 ++++++++++++++++++++++++++++-------------- fs/dlm/lock.h | 2 +- 3 files changed, 30 insertions(+), 15 deletions(-) (limited to 'fs/dlm/lock.c') diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index c3299020c8f318..149106f2b80fab 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -280,6 +280,7 @@ enum rsb_flags { RSB_NEW_MASTER, RSB_NEW_MASTER2, RSB_RECOVER_CONVERT, + RSB_LOCKS_PURGED, }; static inline void rsb_set_flag(struct dlm_rsb *r, enum rsb_flags flag) diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 85a0e73ba808cf..5f69639041078f 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -3278,6 +3278,7 @@ static void purge_queue(struct dlm_rsb *r, struct list_head *queue, list_for_each_entry_safe(lkb, safe, queue, lkb_statequeue) { if (test(ls, lkb)) { + rsb_set_flag(r, RSB_LOCKS_PURGED); del_lkb(r, lkb); /* this put should free the lkb */ if (!dlm_put_lkb(lkb)) @@ -3334,27 +3335,40 @@ int dlm_purge_locks(struct dlm_ls *ls) return 0; } -int dlm_grant_after_purge(struct dlm_ls *ls) +static struct dlm_rsb *find_purged_rsb(struct dlm_ls *ls, int bucket) +{ + struct dlm_rsb *r, *r_ret = NULL; + + read_lock(&ls->ls_rsbtbl[bucket].lock); + list_for_each_entry(r, &ls->ls_rsbtbl[bucket].list, res_hashchain) { + if (!rsb_flag(r, RSB_LOCKS_PURGED)) + continue; + hold_rsb(r); + rsb_clear_flag(r, RSB_LOCKS_PURGED); + r_ret = r; + break; + } + read_unlock(&ls->ls_rsbtbl[bucket].lock); + return r_ret; +} + +void dlm_grant_after_purge(struct dlm_ls *ls) { struct dlm_rsb *r; int i; for (i = 0; i < ls->ls_rsbtbl_size; i++) { - read_lock(&ls->ls_rsbtbl[i].lock); - list_for_each_entry(r, &ls->ls_rsbtbl[i].list, res_hashchain) { - hold_rsb(r); - lock_rsb(r); - if (is_master(r)) { - grant_pending_locks(r); - confirm_master(r, 0); - } - unlock_rsb(r); - put_rsb(r); + r = find_purged_rsb(ls, i); + if (!r) + continue; + lock_rsb(r); + if (is_master(r)) { + grant_pending_locks(r); + confirm_master(r, 0); } - read_unlock(&ls->ls_rsbtbl[i].lock); + unlock_rsb(r); + put_rsb(r); } - - return 0; } static struct dlm_lkb *search_remid_list(struct list_head *head, int nodeid, diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index bffab9c88b1daf..56cdc073b1f61b 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -25,7 +25,7 @@ void dlm_scan_rsbs(struct dlm_ls *ls); int dlm_purge_locks(struct dlm_ls *ls); void dlm_purge_mstcpy_locks(struct dlm_rsb *r); -int dlm_grant_after_purge(struct dlm_ls *ls); +void dlm_grant_after_purge(struct dlm_ls *ls); int dlm_recover_waiters_post(struct dlm_ls *ls); void dlm_recover_waiters_pre(struct dlm_ls *ls); int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc); -- cgit 1.2.3-korg From 597d0cae0f99f62501e229bed50e8149604015bb Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 12 Jul 2006 16:44:04 -0500 Subject: [DLM] dlm: user locks This changes the way the dlm handles user locks. The core dlm is now aware of user locks so they can be dealt with more efficiently. There is no more dlm_device module which previously managed its own duplicate copy of every user lock. Signed-off-by: Patrick Caulfield Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/dlm/Kconfig | 8 - fs/dlm/Makefile | 4 +- fs/dlm/ast.c | 7 +- fs/dlm/device.c | 1239 ------------------------------------------------- fs/dlm/dlm_internal.h | 44 ++ fs/dlm/lock.c | 304 +++++++++++- fs/dlm/lock.h | 11 + fs/dlm/lockspace.c | 32 +- fs/dlm/lockspace.h | 1 + fs/dlm/main.c | 8 + fs/dlm/memory.c | 9 + fs/dlm/user.c | 769 ++++++++++++++++++++++++++++++ fs/dlm/user.h | 16 + 13 files changed, 1192 insertions(+), 1260 deletions(-) delete mode 100644 fs/dlm/device.c create mode 100644 fs/dlm/user.c create mode 100644 fs/dlm/user.h (limited to 'fs/dlm/lock.c') diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig index 09e78bf6e7a463..490f85b3fa590e 100644 --- a/fs/dlm/Kconfig +++ b/fs/dlm/Kconfig @@ -10,14 +10,6 @@ config DLM A general purpose distributed lock manager for kernel or userspace applications. -config DLM_DEVICE - tristate "DLM device for userspace access" - depends on DLM - help - This module creates a misc device through which the dlm lockspace - and locking functions become available to userspace applications - (usually through the libdlm library). - config DLM_DEBUG bool "DLM debugging" depends on DLM diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile index 1e6232e7d8e5ca..1832e0297f7d82 100644 --- a/fs/dlm/Makefile +++ b/fs/dlm/Makefile @@ -1,6 +1,4 @@ obj-$(CONFIG_DLM) += dlm.o -obj-$(CONFIG_DLM_DEVICE) += dlm_device.o - dlm-y := ast.o \ config.o \ dir.o \ @@ -15,7 +13,7 @@ dlm-y := ast.o \ recover.o \ recoverd.o \ requestqueue.o \ + user.o \ util.o dlm-$(CONFIG_DLM_DEBUG) += debug_fs.o -dlm_device-y := device.o diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c index 57bdf09b520a8a..a211330cbc422e 100644 --- a/fs/dlm/ast.c +++ b/fs/dlm/ast.c @@ -13,7 +13,7 @@ #include "dlm_internal.h" #include "lock.h" -#include "ast.h" +#include "user.h" #define WAKE_ASTS 0 @@ -34,6 +34,11 @@ void dlm_del_ast(struct dlm_lkb *lkb) void dlm_add_ast(struct dlm_lkb *lkb, int type) { + if (lkb->lkb_flags & DLM_IFL_USER) { + dlm_user_add_ast(lkb, type); + return; + } + spin_lock(&ast_queue_lock); if (!(lkb->lkb_ast_type & (AST_COMP | AST_BAST))) { kref_get(&lkb->lkb_ref); diff --git a/fs/dlm/device.c b/fs/dlm/device.c deleted file mode 100644 index 825bbc0a09c083..00000000000000 --- a/fs/dlm/device.c +++ /dev/null @@ -1,1239 +0,0 @@ -/****************************************************************************** -******************************************************************************* -** -** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. -** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. -** -******************************************************************************* -******************************************************************************/ - -/* - * device.c - * - * This is the userland interface to the DLM. - * - * The locking is done via a misc char device (find the - * registered minor number in /proc/misc). - * - * User code should not use this interface directly but - * call the library routines in libdlm.a instead. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "lvb_table.h" - -static struct file_operations _dlm_fops; -static const char *name_prefix="dlm"; -static struct list_head user_ls_list; -static struct mutex user_ls_lock; - -/* Flags in li_flags */ -#define LI_FLAG_COMPLETE 1 -#define LI_FLAG_FIRSTLOCK 2 -#define LI_FLAG_PERSISTENT 3 -#define LI_FLAG_ONLIST 4 - -/* flags in ls_flags*/ -#define LS_FLAG_DELETED 1 -#define LS_FLAG_AUTOFREE 2 - -/* flags in ls_flags*/ -#define FI_FLAG_OPEN 1 -#define FI_FLAG_COMPAT 2 - -#define LOCKINFO_MAGIC 0x53595324 - -struct lock_info { - uint32_t li_magic; - uint8_t li_cmd; - int8_t li_grmode; - int8_t li_rqmode; - struct dlm_lksb li_lksb; - wait_queue_head_t li_waitq; - unsigned long li_flags; - void __user *li_castparam; - void __user *li_castaddr; - void __user *li_bastparam; - void __user *li_bastaddr; - void __user *li_pend_bastparam; - void __user *li_pend_bastaddr; - struct list_head li_ownerqueue; - struct file_info *li_file; - struct dlm_lksb __user *li_user_lksb; - struct completion li_firstcomp; -}; - -/* A queued AST no less */ -struct ast_info { - struct dlm_lock_result result; - struct list_head list; - uint32_t lvb_updated; - uint32_t progress; /* How much has been read */ -}; - -/* One of these per userland lockspace */ -struct user_ls { - void *ls_lockspace; - atomic_t ls_refcnt; - long ls_flags; - - /* Lock infos are stored in here indexed by lock ID */ - struct idr lockinfo_idr; - rwlock_t lockinfo_lock; - - /* Passed into misc_register() */ - struct miscdevice ls_miscinfo; - struct list_head ls_list; -}; - -/* misc_device info for the control device */ -static struct miscdevice ctl_device; - -/* - * Stuff we hang off the file struct. - * The first two are to cope with unlocking all the - * locks help by a process when it dies. - */ -struct file_info { - struct list_head fi_li_list; /* List of active lock_infos */ - spinlock_t fi_li_lock; - struct list_head fi_ast_list; /* Queue of ASTs to be delivered */ - spinlock_t fi_ast_lock; - wait_queue_head_t fi_wait; - struct user_ls *fi_ls; - atomic_t fi_refcnt; /* Number of users */ - unsigned long fi_flags; -}; - -#ifdef CONFIG_COMPAT - -struct dlm_lock_params32 { - __u8 mode; - __u8 namelen; - __u16 flags; - __u32 lkid; - __u32 parent; - - __u32 castparam; - __u32 castaddr; - __u32 bastparam; - __u32 bastaddr; - __u32 lksb; - - char lvb[DLM_USER_LVB_LEN]; - char name[0]; -}; - -struct dlm_write_request32 { - __u32 version[3]; - __u8 cmd; - __u8 is64bit; - __u8 unused[2]; - - union { - struct dlm_lock_params32 lock; - struct dlm_lspace_params lspace; - } i; -}; - -struct dlm_lksb32 { - __u32 sb_status; - __u32 sb_lkid; - __u8 sb_flags; - __u32 sb_lvbptr; -}; - -struct dlm_lock_result32 { - __u32 length; - __u32 user_astaddr; - __u32 user_astparam; - __u32 user_lksb; - struct dlm_lksb32 lksb; - __u8 bast_mode; - __u8 unused[3]; - /* Offsets may be zero if no data is present */ - __u32 lvb_offset; -}; - - -static void compat_input(struct dlm_write_request *kparams, struct dlm_write_request32 *k32params) -{ - - kparams->version[0] = k32params->version[0]; - kparams->version[1] = k32params->version[1]; - kparams->version[2] = k32params->version[2]; - - kparams->cmd = k32params->cmd; - kparams->is64bit = k32params->is64bit; - if (kparams->cmd == DLM_USER_CREATE_LOCKSPACE || - kparams->cmd == DLM_USER_REMOVE_LOCKSPACE) { - - kparams->i.lspace.flags = k32params->i.lspace.flags; - kparams->i.lspace.minor = k32params->i.lspace.minor; - strcpy(kparams->i.lspace.name, k32params->i.lspace.name); - } - else { - kparams->i.lock.mode = k32params->i.lock.mode; - kparams->i.lock.namelen = k32params->i.lock.namelen; - kparams->i.lock.flags = k32params->i.lock.flags; - kparams->i.lock.lkid = k32params->i.lock.lkid; - kparams->i.lock.parent = k32params->i.lock.parent; - kparams->i.lock.castparam = (void *)(long)k32params->i.lock.castparam; - kparams->i.lock.castaddr = (void *)(long)k32params->i.lock.castaddr; - kparams->i.lock.bastparam = (void *)(long)k32params->i.lock.bastparam; - kparams->i.lock.bastaddr = (void *)(long)k32params->i.lock.bastaddr; - kparams->i.lock.lksb = (void *)(long)k32params->i.lock.lksb; - memcpy(kparams->i.lock.lvb, k32params->i.lock.lvb, DLM_USER_LVB_LEN); - memcpy(kparams->i.lock.name, k32params->i.lock.name, kparams->i.lock.namelen); - } -} - -void compat_output(struct dlm_lock_result *res, struct dlm_lock_result32 *res32) -{ - res32->length = res->length - (sizeof(struct dlm_lock_result) - sizeof(struct dlm_lock_result32)); - res32->user_astaddr = (__u32)(long)res->user_astaddr; - res32->user_astparam = (__u32)(long)res->user_astparam; - res32->user_lksb = (__u32)(long)res->user_lksb; - res32->bast_mode = res->bast_mode; - - res32->lvb_offset = res->lvb_offset; - res32->length = res->length; - - res32->lksb.sb_status = res->lksb.sb_status; - res32->lksb.sb_flags = res->lksb.sb_flags; - res32->lksb.sb_lkid = res->lksb.sb_lkid; - res32->lksb.sb_lvbptr = (__u32)(long)res->lksb.sb_lvbptr; -} -#endif - - -/* get and put ops for file_info. - Actually I don't really like "get" and "put", but everyone - else seems to use them and I can't think of anything - nicer at the moment */ -static void get_file_info(struct file_info *f) -{ - atomic_inc(&f->fi_refcnt); -} - -static void put_file_info(struct file_info *f) -{ - if (atomic_dec_and_test(&f->fi_refcnt)) - kfree(f); -} - -static void release_lockinfo(struct user_ls *ls, struct lock_info *li) -{ - put_file_info(li->li_file); - - write_lock(&ls->lockinfo_lock); - idr_remove(&ls->lockinfo_idr, li->li_lksb.sb_lkid); - write_unlock(&ls->lockinfo_lock); - - if (li->li_lksb.sb_lvbptr) - kfree(li->li_lksb.sb_lvbptr); - kfree(li); - - module_put(THIS_MODULE); -} - -static struct lock_info *get_lockinfo(struct user_ls *ls, uint32_t lockid) -{ - struct lock_info *li; - - read_lock(&ls->lockinfo_lock); - li = idr_find(&ls->lockinfo_idr, lockid); - read_unlock(&ls->lockinfo_lock); - - return li; -} - -static int add_lockinfo(struct user_ls *ls, struct lock_info *li) -{ - int n; - int r; - int ret = -EINVAL; - - write_lock(&ls->lockinfo_lock); - - if (idr_find(&ls->lockinfo_idr, li->li_lksb.sb_lkid)) - goto out_up; - - ret = -ENOMEM; - r = idr_pre_get(&ls->lockinfo_idr, GFP_KERNEL); - if (!r) - goto out_up; - - r = idr_get_new_above(&ls->lockinfo_idr, li, li->li_lksb.sb_lkid, &n); - if (r) - goto out_up; - - if (n != li->li_lksb.sb_lkid) { - idr_remove(&ls->lockinfo_idr, n); - goto out_up; - } - - ret = 0; - - out_up: - write_unlock(&ls->lockinfo_lock); - - return ret; -} - - -static struct user_ls *__find_lockspace(int minor) -{ - struct user_ls *lsinfo; - - list_for_each_entry(lsinfo, &user_ls_list, ls_list) { - if (lsinfo->ls_miscinfo.minor == minor) - return lsinfo; - } - return NULL; -} - -/* Find a lockspace struct given the device minor number */ -static struct user_ls *find_lockspace(int minor) -{ - struct user_ls *lsinfo; - - mutex_lock(&user_ls_lock); - lsinfo = __find_lockspace(minor); - mutex_unlock(&user_ls_lock); - - return lsinfo; -} - -static void add_lockspace_to_list(struct user_ls *lsinfo) -{ - mutex_lock(&user_ls_lock); - list_add(&lsinfo->ls_list, &user_ls_list); - mutex_unlock(&user_ls_lock); -} - -/* Register a lockspace with the DLM and create a misc - device for userland to access it */ -static int register_lockspace(char *name, struct user_ls **ls, int flags) -{ - struct user_ls *newls; - int status; - int namelen; - - namelen = strlen(name)+strlen(name_prefix)+2; - - newls = kzalloc(sizeof(struct user_ls), GFP_KERNEL); - if (!newls) - return -ENOMEM; - - newls->ls_miscinfo.name = kzalloc(namelen, GFP_KERNEL); - if (!newls->ls_miscinfo.name) { - kfree(newls); - return -ENOMEM; - } - - status = dlm_new_lockspace(name, strlen(name), &newls->ls_lockspace, 0, - DLM_USER_LVB_LEN); - if (status != 0) { - kfree(newls->ls_miscinfo.name); - kfree(newls); - return status; - } - - idr_init(&newls->lockinfo_idr); - rwlock_init(&newls->lockinfo_lock); - - snprintf((char*)newls->ls_miscinfo.name, namelen, "%s_%s", - name_prefix, name); - - newls->ls_miscinfo.fops = &_dlm_fops; - newls->ls_miscinfo.minor = MISC_DYNAMIC_MINOR; - - status = misc_register(&newls->ls_miscinfo); - if (status) { - printk(KERN_ERR "dlm: misc register failed for %s\n", name); - dlm_release_lockspace(newls->ls_lockspace, 0); - kfree(newls->ls_miscinfo.name); - kfree(newls); - return status; - } - - if (flags & DLM_USER_LSFLG_AUTOFREE) - set_bit(LS_FLAG_AUTOFREE, &newls->ls_flags); - - add_lockspace_to_list(newls); - *ls = newls; - return 0; -} - -/* Called with the user_ls_lock mutex held */ -static int unregister_lockspace(struct user_ls *lsinfo, int force) -{ - int status; - - status = dlm_release_lockspace(lsinfo->ls_lockspace, force); - if (status) - return status; - - status = misc_deregister(&lsinfo->ls_miscinfo); - if (status) - return status; - - list_del(&lsinfo->ls_list); - set_bit(LS_FLAG_DELETED, &lsinfo->ls_flags); - lsinfo->ls_lockspace = NULL; - if (atomic_read(&lsinfo->ls_refcnt) == 0) { - kfree(lsinfo->ls_miscinfo.name); - kfree(lsinfo); - } - - return 0; -} - -/* Add it to userland's AST queue */ -static void add_to_astqueue(struct lock_info *li, void *astaddr, void *astparam, - int lvb_updated) -{ - struct ast_info *ast = kzalloc(sizeof(struct ast_info), GFP_KERNEL); - if (!ast) - return; - - ast->result.user_astparam = astparam; - ast->result.user_astaddr = astaddr; - ast->result.user_lksb = li->li_user_lksb; - memcpy(&ast->result.lksb, &li->li_lksb, sizeof(struct dlm_lksb)); - ast->lvb_updated = lvb_updated; - - spin_lock(&li->li_file->fi_ast_lock); - list_add_tail(&ast->list, &li->li_file->fi_ast_list); - spin_unlock(&li->li_file->fi_ast_lock); - wake_up_interruptible(&li->li_file->fi_wait); -} - -static void bast_routine(void *param, int mode) -{ - struct lock_info *li = param; - - if (li && li->li_bastaddr) - add_to_astqueue(li, li->li_bastaddr, li->li_bastparam, 0); -} - -/* - * This is the kernel's AST routine. - * All lock, unlock & query operations complete here. - * The only syncronous ops are those done during device close. - */ -static void ast_routine(void *param) -{ - struct lock_info *li = param; - - /* Param may be NULL if a persistent lock is unlocked by someone else */ - if (!li) - return; - - /* If this is a succesful conversion then activate the blocking ast - * args from the conversion request */ - if (!test_bit(LI_FLAG_FIRSTLOCK, &li->li_flags) && - li->li_lksb.sb_status == 0) { - - li->li_bastparam = li->li_pend_bastparam; - li->li_bastaddr = li->li_pend_bastaddr; - li->li_pend_bastaddr = NULL; - } - - /* If it's an async request then post data to the user's AST queue. */ - if (li->li_castaddr) { - int lvb_updated = 0; - - /* See if the lvb has been updated */ - if (dlm_lvb_operations[li->li_grmode+1][li->li_rqmode+1] == 1) - lvb_updated = 1; - - if (li->li_lksb.sb_status == 0) - li->li_grmode = li->li_rqmode; - - /* Only queue AST if the device is still open */ - if (test_bit(FI_FLAG_OPEN, &li->li_file->fi_flags)) - add_to_astqueue(li, li->li_castaddr, li->li_castparam, - lvb_updated); - - /* If it's a new lock operation that failed, then - * remove it from the owner queue and free the - * lock_info. - */ - if (test_and_clear_bit(LI_FLAG_FIRSTLOCK, &li->li_flags) && - li->li_lksb.sb_status != 0) { - - /* Wait till dlm_lock() has finished */ - wait_for_completion(&li->li_firstcomp); - - spin_lock(&li->li_file->fi_li_lock); - list_del(&li->li_ownerqueue); - clear_bit(LI_FLAG_ONLIST, &li->li_flags); - spin_unlock(&li->li_file->fi_li_lock); - release_lockinfo(li->li_file->fi_ls, li); - return; - } - /* Free unlocks & queries */ - if (li->li_lksb.sb_status == -DLM_EUNLOCK || - li->li_cmd == DLM_USER_QUERY) { - release_lockinfo(li->li_file->fi_ls, li); - } - } else { - /* Synchronous request, just wake up the caller */ - set_bit(LI_FLAG_COMPLETE, &li->li_flags); - wake_up_interruptible(&li->li_waitq); - } -} - -/* - * Wait for the lock op to complete and return the status. - */ -static int wait_for_ast(struct lock_info *li) -{ - /* Wait for the AST routine to complete */ - set_task_state(current, TASK_INTERRUPTIBLE); - while (!test_bit(LI_FLAG_COMPLETE, &li->li_flags)) - schedule(); - - set_task_state(current, TASK_RUNNING); - - return li->li_lksb.sb_status; -} - - -/* Open on control device */ -static int dlm_ctl_open(struct inode *inode, struct file *file) -{ - file->private_data = NULL; - return 0; -} - -/* Close on control device */ -static int dlm_ctl_close(struct inode *inode, struct file *file) -{ - return 0; -} - -/* Open on lockspace device */ -static int dlm_open(struct inode *inode, struct file *file) -{ - struct file_info *f; - struct user_ls *lsinfo; - - lsinfo = find_lockspace(iminor(inode)); - if (!lsinfo) - return -ENOENT; - - f = kzalloc(sizeof(struct file_info), GFP_KERNEL); - if (!f) - return -ENOMEM; - - atomic_inc(&lsinfo->ls_refcnt); - INIT_LIST_HEAD(&f->fi_li_list); - INIT_LIST_HEAD(&f->fi_ast_list); - spin_lock_init(&f->fi_li_lock); - spin_lock_init(&f->fi_ast_lock); - init_waitqueue_head(&f->fi_wait); - f->fi_ls = lsinfo; - f->fi_flags = 0; - get_file_info(f); - set_bit(FI_FLAG_OPEN, &f->fi_flags); - - file->private_data = f; - - return 0; -} - -/* Check the user's version matches ours */ -static int check_version(struct dlm_write_request *req) -{ - if (req->version[0] != DLM_DEVICE_VERSION_MAJOR || - (req->version[0] == DLM_DEVICE_VERSION_MAJOR && - req->version[1] > DLM_DEVICE_VERSION_MINOR)) { - - printk(KERN_DEBUG "dlm: process %s (%d) version mismatch " - "user (%d.%d.%d) kernel (%d.%d.%d)\n", - current->comm, - current->pid, - req->version[0], - req->version[1], - req->version[2], - DLM_DEVICE_VERSION_MAJOR, - DLM_DEVICE_VERSION_MINOR, - DLM_DEVICE_VERSION_PATCH); - return -EINVAL; - } - return 0; -} - -/* Close on lockspace device */ -static int dlm_close(struct inode *inode, struct file *file) -{ - struct file_info *f = file->private_data; - struct lock_info li; - struct lock_info *old_li, *safe; - sigset_t tmpsig; - sigset_t allsigs; - struct user_ls *lsinfo; - DECLARE_WAITQUEUE(wq, current); - - lsinfo = find_lockspace(iminor(inode)); - if (!lsinfo) - return -ENOENT; - - /* Mark this closed so that ASTs will not be delivered any more */ - clear_bit(FI_FLAG_OPEN, &f->fi_flags); - - /* Block signals while we are doing this */ - sigfillset(&allsigs); - sigprocmask(SIG_BLOCK, &allsigs, &tmpsig); - - /* We use our own lock_info struct here, so that any - * outstanding "real" ASTs will be delivered with the - * corresponding "real" params, thus freeing the lock_info - * that belongs the lock. This catches the corner case where - * a lock is BUSY when we try to unlock it here - */ - memset(&li, 0, sizeof(li)); - clear_bit(LI_FLAG_COMPLETE, &li.li_flags); - init_waitqueue_head(&li.li_waitq); - add_wait_queue(&li.li_waitq, &wq); - - /* - * Free any outstanding locks, they are on the - * list in LIFO order so there should be no problems - * about unlocking parents before children. - */ - list_for_each_entry_safe(old_li, safe, &f->fi_li_list, li_ownerqueue) { - int status; - int flags = 0; - - /* Don't unlock persistent locks, just mark them orphaned */ - if (test_bit(LI_FLAG_PERSISTENT, &old_li->li_flags)) { - list_del(&old_li->li_ownerqueue); - - /* Update master copy */ - /* TODO: Check locking core updates the local and - remote ORPHAN flags */ - li.li_lksb.sb_lkid = old_li->li_lksb.sb_lkid; - status = dlm_lock(f->fi_ls->ls_lockspace, - old_li->li_grmode, &li.li_lksb, - DLM_LKF_CONVERT|DLM_LKF_ORPHAN, - NULL, 0, 0, ast_routine, NULL, NULL); - if (status != 0) - printk("dlm: Error orphaning lock %x: %d\n", - old_li->li_lksb.sb_lkid, status); - - /* But tidy our references in it */ - release_lockinfo(old_li->li_file->fi_ls, old_li); - continue; - } - - clear_bit(LI_FLAG_COMPLETE, &li.li_flags); - - flags = DLM_LKF_FORCEUNLOCK; - if (old_li->li_grmode >= DLM_LOCK_PW) - flags |= DLM_LKF_IVVALBLK; - - status = dlm_unlock(f->fi_ls->ls_lockspace, - old_li->li_lksb.sb_lkid, flags, - &li.li_lksb, &li); - - /* Must wait for it to complete as the next lock could be its - * parent */ - if (status == 0) - wait_for_ast(&li); - - /* Unlock suceeded, free the lock_info struct. */ - if (status == 0) - release_lockinfo(old_li->li_file->fi_ls, old_li); - } - - remove_wait_queue(&li.li_waitq, &wq); - - /* - * If this is the last reference to the lockspace - * then free the struct. If it's an AUTOFREE lockspace - * then free the whole thing. - */ - mutex_lock(&user_ls_lock); - if (atomic_dec_and_test(&lsinfo->ls_refcnt)) { - - if (lsinfo->ls_lockspace) { - if (test_bit(LS_FLAG_AUTOFREE, &lsinfo->ls_flags)) { - unregister_lockspace(lsinfo, 1); - } - } else { - kfree(lsinfo->ls_miscinfo.name); - kfree(lsinfo); - } - } - mutex_unlock(&user_ls_lock); - put_file_info(f); - - /* Restore signals */ - sigprocmask(SIG_SETMASK, &tmpsig, NULL); - recalc_sigpending(); - - return 0; -} - -static int do_user_create_lockspace(struct file_info *fi, uint8_t cmd, - struct dlm_lspace_params *kparams) -{ - int status; - struct user_ls *lsinfo; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - status = register_lockspace(kparams->name, &lsinfo, kparams->flags); - - /* If it succeeded then return the minor number */ - if (status == 0) - status = lsinfo->ls_miscinfo.minor; - - return status; -} - -static int do_user_remove_lockspace(struct file_info *fi, uint8_t cmd, - struct dlm_lspace_params *kparams) -{ - int status; - int force = 1; - struct user_ls *lsinfo; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - mutex_lock(&user_ls_lock); - lsinfo = __find_lockspace(kparams->minor); - if (!lsinfo) { - mutex_unlock(&user_ls_lock); - return -EINVAL; - } - - if (kparams->flags & DLM_USER_LSFLG_FORCEFREE) - force = 3; - - status = unregister_lockspace(lsinfo, force); - mutex_unlock(&user_ls_lock); - - return status; -} - -/* Read call, might block if no ASTs are waiting. - * It will only ever return one message at a time, regardless - * of how many are pending. - */ -static ssize_t dlm_read(struct file *file, char __user *buffer, size_t count, - loff_t *ppos) -{ - struct file_info *fi = file->private_data; - struct ast_info *ast; - void *data; - int data_size; - int struct_size; - int offset; - DECLARE_WAITQUEUE(wait, current); -#ifdef CONFIG_COMPAT - struct dlm_lock_result32 result32; - - if (count < sizeof(struct dlm_lock_result32)) -#else - if (count < sizeof(struct dlm_lock_result)) -#endif - return -EINVAL; - - spin_lock(&fi->fi_ast_lock); - if (list_empty(&fi->fi_ast_list)) { - - /* No waiting ASTs. - * Return EOF if the lockspace been deleted. - */ - if (test_bit(LS_FLAG_DELETED, &fi->fi_ls->ls_flags)) - return 0; - - if (file->f_flags & O_NONBLOCK) { - spin_unlock(&fi->fi_ast_lock); - return -EAGAIN; - } - - add_wait_queue(&fi->fi_wait, &wait); - - repeat: - set_current_state(TASK_INTERRUPTIBLE); - if (list_empty(&fi->fi_ast_list) && - !signal_pending(current)) { - - spin_unlock(&fi->fi_ast_lock); - schedule(); - spin_lock(&fi->fi_ast_lock); - goto repeat; - } - - current->state = TASK_RUNNING; - remove_wait_queue(&fi->fi_wait, &wait); - - if (signal_pending(current)) { - spin_unlock(&fi->fi_ast_lock); - return -ERESTARTSYS; - } - } - - ast = list_entry(fi->fi_ast_list.next, struct ast_info, list); - list_del(&ast->list); - spin_unlock(&fi->fi_ast_lock); - - /* Work out the size of the returned data */ -#ifdef CONFIG_COMPAT - if (test_bit(FI_FLAG_COMPAT, &fi->fi_flags)) { - data_size = struct_size = sizeof(struct dlm_lock_result32); - data = &result32; - } - else -#endif - { - data_size = struct_size = sizeof(struct dlm_lock_result); - data = &ast->result; - } - if (ast->lvb_updated && ast->result.lksb.sb_lvbptr) - data_size += DLM_USER_LVB_LEN; - - offset = struct_size; - - /* Room for the extended data ? */ - if (count >= data_size) { - - if (ast->lvb_updated && ast->result.lksb.sb_lvbptr) { - if (copy_to_user(buffer+offset, - ast->result.lksb.sb_lvbptr, - DLM_USER_LVB_LEN)) - return -EFAULT; - ast->result.lvb_offset = offset; - offset += DLM_USER_LVB_LEN; - } - } - - ast->result.length = data_size; - -#ifdef CONFIG_COMPAT - compat_output(&ast->result, &result32); -#endif - - /* Copy the header now it has all the offsets in it */ - if (copy_to_user(buffer, data, struct_size)) - offset = -EFAULT; - - /* If we only returned a header and there's more to come then put it - back on the list */ - if (count < data_size) { - spin_lock(&fi->fi_ast_lock); - list_add(&ast->list, &fi->fi_ast_list); - spin_unlock(&fi->fi_ast_lock); - } else - kfree(ast); - return offset; -} - -static unsigned int dlm_poll(struct file *file, poll_table *wait) -{ - struct file_info *fi = file->private_data; - - poll_wait(file, &fi->fi_wait, wait); - - spin_lock(&fi->fi_ast_lock); - if (!list_empty(&fi->fi_ast_list)) { - spin_unlock(&fi->fi_ast_lock); - return POLLIN | POLLRDNORM; - } - - spin_unlock(&fi->fi_ast_lock); - return 0; -} - -static struct lock_info *allocate_lockinfo(struct file_info *fi, uint8_t cmd, - struct dlm_lock_params *kparams) -{ - struct lock_info *li; - - if (!try_module_get(THIS_MODULE)) - return NULL; - - li = kzalloc(sizeof(struct lock_info), GFP_KERNEL); - if (li) { - li->li_magic = LOCKINFO_MAGIC; - li->li_file = fi; - li->li_cmd = cmd; - li->li_flags = 0; - li->li_grmode = -1; - li->li_rqmode = -1; - li->li_pend_bastparam = NULL; - li->li_pend_bastaddr = NULL; - li->li_castaddr = NULL; - li->li_castparam = NULL; - li->li_lksb.sb_lvbptr = NULL; - li->li_bastaddr = kparams->bastaddr; - li->li_bastparam = kparams->bastparam; - - get_file_info(fi); - } - return li; -} - -static int do_user_lock(struct file_info *fi, uint8_t cmd, - struct dlm_lock_params *kparams) -{ - struct lock_info *li; - int status; - - /* - * Validate things that we need to have correct. - */ - if (!kparams->castaddr) - return -EINVAL; - - if (!kparams->lksb) - return -EINVAL; - - /* Persistent child locks are not available yet */ - if ((kparams->flags & DLM_LKF_PERSISTENT) && kparams->parent) - return -EINVAL; - - /* For conversions, there should already be a lockinfo struct, - unless we are adopting an orphaned persistent lock */ - if (kparams->flags & DLM_LKF_CONVERT) { - - li = get_lockinfo(fi->fi_ls, kparams->lkid); - - /* If this is a persistent lock we will have to create a - lockinfo again */ - if (!li && (kparams->flags & DLM_LKF_PERSISTENT)) { - li = allocate_lockinfo(fi, cmd, kparams); - if (!li) - return -ENOMEM; - - li->li_lksb.sb_lkid = kparams->lkid; - li->li_castaddr = kparams->castaddr; - li->li_castparam = kparams->castparam; - - /* OK, this isn't exactly a FIRSTLOCK but it is the - first time we've used this lockinfo, and if things - fail we want rid of it */ - init_completion(&li->li_firstcomp); - set_bit(LI_FLAG_FIRSTLOCK, &li->li_flags); - add_lockinfo(fi->fi_ls, li); - - /* TODO: do a query to get the current state ?? */ - } - if (!li) - return -EINVAL; - - if (li->li_magic != LOCKINFO_MAGIC) - return -EINVAL; - - /* For conversions don't overwrite the current blocking AST - info so that: - a) if a blocking AST fires before the conversion is queued - it runs the current handler - b) if the conversion is cancelled, the original blocking AST - declaration is active - The pend_ info is made active when the conversion - completes. - */ - li->li_pend_bastaddr = kparams->bastaddr; - li->li_pend_bastparam = kparams->bastparam; - } else { - li = allocate_lockinfo(fi, cmd, kparams); - if (!li) - return -ENOMEM; - - /* Allow us to complete our work before - the AST routine runs. In fact we only need (and use) this - when the initial lock fails */ - init_completion(&li->li_firstcomp); - set_bit(LI_FLAG_FIRSTLOCK, &li->li_flags); - } - - li->li_user_lksb = kparams->lksb; - li->li_castaddr = kparams->castaddr; - li->li_castparam = kparams->castparam; - li->li_lksb.sb_lkid = kparams->lkid; - li->li_rqmode = kparams->mode; - if (kparams->flags & DLM_LKF_PERSISTENT) - set_bit(LI_FLAG_PERSISTENT, &li->li_flags); - - /* Copy in the value block */ - if (kparams->flags & DLM_LKF_VALBLK) { - if (!li->li_lksb.sb_lvbptr) { - li->li_lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, - GFP_KERNEL); - if (!li->li_lksb.sb_lvbptr) { - status = -ENOMEM; - goto out_err; - } - } - - memcpy(li->li_lksb.sb_lvbptr, kparams->lvb, DLM_USER_LVB_LEN); - } - - /* Lock it ... */ - status = dlm_lock(fi->fi_ls->ls_lockspace, - kparams->mode, &li->li_lksb, - kparams->flags, - kparams->name, kparams->namelen, - kparams->parent, - ast_routine, - li, - (li->li_pend_bastaddr || li->li_bastaddr) ? - bast_routine : NULL); - if (status) - goto out_err; - - /* If it succeeded (this far) with a new lock then keep track of - it on the file's lockinfo list */ - if (!status && test_bit(LI_FLAG_FIRSTLOCK, &li->li_flags)) { - - spin_lock(&fi->fi_li_lock); - list_add(&li->li_ownerqueue, &fi->fi_li_list); - set_bit(LI_FLAG_ONLIST, &li->li_flags); - spin_unlock(&fi->fi_li_lock); - if (add_lockinfo(fi->fi_ls, li)) - printk(KERN_WARNING "Add lockinfo failed\n"); - - complete(&li->li_firstcomp); - } - - /* Return the lockid as the user needs it /now/ */ - return li->li_lksb.sb_lkid; - - out_err: - if (test_bit(LI_FLAG_FIRSTLOCK, &li->li_flags)) - release_lockinfo(fi->fi_ls, li); - return status; - -} - -static int do_user_unlock(struct file_info *fi, uint8_t cmd, - struct dlm_lock_params *kparams) -{ - struct lock_info *li; - int status; - int convert_cancel = 0; - - li = get_lockinfo(fi->fi_ls, kparams->lkid); - if (!li) { - li = allocate_lockinfo(fi, cmd, kparams); - if (!li) - return -ENOMEM; - spin_lock(&fi->fi_li_lock); - list_add(&li->li_ownerqueue, &fi->fi_li_list); - set_bit(LI_FLAG_ONLIST, &li->li_flags); - spin_unlock(&fi->fi_li_lock); - } - - if (li->li_magic != LOCKINFO_MAGIC) - return -EINVAL; - - li->li_user_lksb = kparams->lksb; - li->li_castparam = kparams->castparam; - li->li_cmd = cmd; - - /* Cancelling a conversion doesn't remove the lock...*/ - if (kparams->flags & DLM_LKF_CANCEL && li->li_grmode != -1) - convert_cancel = 1; - - /* Wait until dlm_lock() has completed */ - if (!test_bit(LI_FLAG_ONLIST, &li->li_flags)) { - wait_for_completion(&li->li_firstcomp); - } - - /* dlm_unlock() passes a 0 for castaddr which means don't overwrite - the existing li_castaddr as that's the completion routine for - unlocks. dlm_unlock_wait() specifies a new AST routine to be - executed when the unlock completes. */ - if (kparams->castaddr) - li->li_castaddr = kparams->castaddr; - - /* Use existing lksb & astparams */ - status = dlm_unlock(fi->fi_ls->ls_lockspace, - kparams->lkid, - kparams->flags, &li->li_lksb, li); - - if (!status && !convert_cancel) { - spin_lock(&fi->fi_li_lock); - list_del(&li->li_ownerqueue); - clear_bit(LI_FLAG_ONLIST, &li->li_flags); - spin_unlock(&fi->fi_li_lock); - } - - return status; -} - -/* Write call, submit a locking request */ -static ssize_t dlm_write(struct file *file, const char __user *buffer, - size_t count, loff_t *ppos) -{ - struct file_info *fi = file->private_data; - struct dlm_write_request *kparams; - sigset_t tmpsig; - sigset_t allsigs; - int status; - -#ifdef CONFIG_COMPAT - if (count < sizeof(struct dlm_write_request32)) -#else - if (count < sizeof(struct dlm_write_request)) -#endif - return -EINVAL; - - if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN) - return -EINVAL; - - /* Has the lockspace been deleted */ - if (fi && test_bit(LS_FLAG_DELETED, &fi->fi_ls->ls_flags)) - return -ENOENT; - - kparams = kmalloc(count, GFP_KERNEL); - if (!kparams) - return -ENOMEM; - - status = -EFAULT; - /* Get the command info */ - if (copy_from_user(kparams, buffer, count)) - goto out_free; - - status = -EBADE; - if (check_version(kparams)) - goto out_free; - -#ifdef CONFIG_COMPAT - if (!kparams->is64bit) { - struct dlm_write_request32 *k32params = (struct dlm_write_request32 *)kparams; - kparams = kmalloc(count + (sizeof(struct dlm_write_request) - sizeof(struct dlm_write_request32)), GFP_KERNEL); - if (!kparams) - return -ENOMEM; - - if (fi) - set_bit(FI_FLAG_COMPAT, &fi->fi_flags); - compat_input(kparams, k32params); - kfree(k32params); - } -#endif - - /* Block signals while we are doing this */ - sigfillset(&allsigs); - sigprocmask(SIG_BLOCK, &allsigs, &tmpsig); - - status = -EINVAL; - switch (kparams->cmd) - { - case DLM_USER_LOCK: - if (!fi) goto out_sig; - status = do_user_lock(fi, kparams->cmd, &kparams->i.lock); - break; - - case DLM_USER_UNLOCK: - if (!fi) goto out_sig; - status = do_user_unlock(fi, kparams->cmd, &kparams->i.lock); - break; - - case DLM_USER_CREATE_LOCKSPACE: - if (fi) goto out_sig; - status = do_user_create_lockspace(fi, kparams->cmd, - &kparams->i.lspace); - break; - - case DLM_USER_REMOVE_LOCKSPACE: - if (fi) goto out_sig; - status = do_user_remove_lockspace(fi, kparams->cmd, - &kparams->i.lspace); - break; - default: - printk("Unknown command passed to DLM device : %d\n", - kparams->cmd); - break; - } - - out_sig: - /* Restore signals */ - sigprocmask(SIG_SETMASK, &tmpsig, NULL); - recalc_sigpending(); - - out_free: - kfree(kparams); - if (status == 0) - return count; - else - return status; -} - -static struct file_operations _dlm_fops = { - .open = dlm_open, - .release = dlm_close, - .read = dlm_read, - .write = dlm_write, - .poll = dlm_poll, - .owner = THIS_MODULE, -}; - -static struct file_operations _dlm_ctl_fops = { - .open = dlm_ctl_open, - .release = dlm_ctl_close, - .write = dlm_write, - .owner = THIS_MODULE, -}; - -/* - * Create control device - */ -static int __init dlm_device_init(void) -{ - int r; - - INIT_LIST_HEAD(&user_ls_list); - mutex_init(&user_ls_lock); - - ctl_device.name = "dlm-control"; - ctl_device.fops = &_dlm_ctl_fops; - ctl_device.minor = MISC_DYNAMIC_MINOR; - - r = misc_register(&ctl_device); - if (r) { - printk(KERN_ERR "dlm: misc_register failed for control dev\n"); - return r; - } - - return 0; -} - -static void __exit dlm_device_exit(void) -{ - misc_deregister(&ctl_device); -} - -MODULE_DESCRIPTION("Distributed Lock Manager device interface"); -MODULE_AUTHOR("Red Hat, Inc."); -MODULE_LICENSE("GPL"); - -module_init(dlm_device_init); -module_exit(dlm_device_exit); diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 149106f2b80fab..db080de2a7e91d 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -68,6 +69,7 @@ struct dlm_mhandle; #define log_error(ls, fmt, args...) \ printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args) +#define DLM_LOG_DEBUG #ifdef DLM_LOG_DEBUG #define log_debug(ls, fmt, args...) log_error(ls, fmt, ##args) #else @@ -204,6 +206,9 @@ struct dlm_args { #define DLM_IFL_MSTCPY 0x00010000 #define DLM_IFL_RESEND 0x00020000 +#define DLM_IFL_DEAD 0x00040000 +#define DLM_IFL_USER 0x00000001 +#define DLM_IFL_ORPHAN 0x00000002 struct dlm_lkb { struct dlm_rsb *lkb_resource; /* the rsb */ @@ -231,6 +236,7 @@ struct dlm_lkb { struct list_head lkb_rsb_lookup; /* waiting for rsb lookup */ struct list_head lkb_wait_reply; /* waiting for remote reply */ struct list_head lkb_astqueue; /* need ast to be sent */ + struct list_head lkb_ownqueue; /* list of locks for a process */ char *lkb_lvbptr; struct dlm_lksb *lkb_lksb; /* caller's status block */ @@ -409,6 +415,7 @@ struct rcom_lock { struct dlm_ls { struct list_head ls_list; /* list of lockspaces */ + dlm_lockspace_t *ls_local_handle; uint32_t ls_global_id; /* global unique lockspace ID */ uint32_t ls_exflags; int ls_lvblen; @@ -444,6 +451,8 @@ struct dlm_ls { wait_queue_head_t ls_uevent_wait; /* user part of join/leave */ int ls_uevent_result; + struct miscdevice ls_device; + /* recovery related */ struct timer_list ls_timer; @@ -461,6 +470,7 @@ struct dlm_ls { spinlock_t ls_recover_list_lock; int ls_recover_list_count; wait_queue_head_t ls_wait_general; + struct mutex ls_clear_proc_locks; struct list_head ls_root_list; /* root resources */ struct rw_semaphore ls_root_sem; /* protect root_list */ @@ -475,6 +485,40 @@ struct dlm_ls { #define LSFL_RCOM_READY 3 #define LSFL_UEVENT_WAIT 4 +/* much of this is just saving user space pointers associated with the + lock that we pass back to the user lib with an ast */ + +struct dlm_user_args { + struct dlm_user_proc *proc; /* each process that opens the lockspace + device has private data + (dlm_user_proc) on the struct file, + the process's locks point back to it*/ + struct dlm_lksb lksb; + int old_mode; + int update_user_lvb; + struct dlm_lksb __user *user_lksb; + void __user *castparam; + void __user *castaddr; + void __user *bastparam; + void __user *bastaddr; +}; + +#define DLM_PROC_FLAGS_CLOSING 1 +#define DLM_PROC_FLAGS_COMPAT 2 + +/* locks list is kept so we can remove all a process's locks when it + exits (or orphan those that are persistent) */ + +struct dlm_user_proc { + dlm_lockspace_t *lockspace; + unsigned long flags; /* DLM_PROC_FLAGS */ + struct list_head asts; + spinlock_t asts_spin; + struct list_head locks; + spinlock_t locks_spin; + wait_queue_head_t wait; +}; + static inline int dlm_locking_stopped(struct dlm_ls *ls) { return !test_bit(LSFL_RUNNING, &ls->ls_flags); diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 5f69639041078f..4e222f873b6c0d 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -55,8 +55,9 @@ R: do_xxxx() L: receive_xxxx_reply() <- R: send_xxxx_reply() */ - +#include #include "dlm_internal.h" +#include #include "memory.h" #include "lowcomms.h" #include "requestqueue.h" @@ -69,6 +70,7 @@ #include "rcom.h" #include "recover.h" #include "lvb_table.h" +#include "user.h" #include "config.h" static int send_request(struct dlm_rsb *r, struct dlm_lkb *lkb); @@ -84,6 +86,8 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, struct dlm_message *ms); static int receive_extralen(struct dlm_message *ms); +#define FAKE_USER_AST (void*)0xff00ff00 + /* * Lock compatibilty matrix - thanks Steve * UN = Unlocked state. Not really a state, used as a flag @@ -152,7 +156,7 @@ static const int __quecvt_compat_matrix[8][8] = { {0, 0, 0, 0, 0, 0, 0, 0} /* PD */ }; -static void dlm_print_lkb(struct dlm_lkb *lkb) +void dlm_print_lkb(struct dlm_lkb *lkb) { printk(KERN_ERR "lkb: nodeid %d id %x remid %x exflags %x flags %x\n" " status %d rqmode %d grmode %d wait_type %d ast_type %d\n", @@ -291,7 +295,7 @@ static int search_rsb_list(struct list_head *head, char *name, int len, if (len == r->res_length && !memcmp(name, r->res_name, len)) goto found; } - return -ENOENT; + return -EBADR; found: if (r->res_nodeid && (flags & R_MASTER)) @@ -376,7 +380,7 @@ static int find_rsb(struct dlm_ls *ls, char *name, int namelen, if (!error) goto out; - if (error == -ENOENT && !(flags & R_CREATE)) + if (error == -EBADR && !(flags & R_CREATE)) goto out; /* the rsb was found but wasn't a master copy */ @@ -920,7 +924,7 @@ static void set_lvb_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb, if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) return; - b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1]; + b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1]; if (b == 1) { int len = receive_extralen(ms); memcpy(lkb->lkb_lvbptr, ms->m_extra, len); @@ -963,6 +967,8 @@ static void revert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) lkb->lkb_rqmode = DLM_LOCK_IV; switch (lkb->lkb_status) { + case DLM_LKSTS_GRANTED: + break; case DLM_LKSTS_CONVERT: move_lkb(r, lkb, DLM_LKSTS_GRANTED); break; @@ -1727,6 +1733,11 @@ static int do_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb) return -DLM_EUNLOCK; } +/* FIXME: if revert_lock() finds that the lkb is granted, we should + skip the queue_cast(ECANCEL). It indicates that the request/convert + completed (and queued a normal ast) just before the cancel; we don't + want to clobber the sb_result for the normal ast with ECANCEL. */ + static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb) { revert_lock(r, lkb); @@ -2739,7 +2750,7 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms) confirm_master(r, error); break; - case -ENOENT: + case -EBADR: case -ENOTBLK: /* find_rsb failed to find rsb or rsb wasn't master */ r->res_nodeid = -1; @@ -3545,3 +3556,284 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc) return 0; } +int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, + int mode, uint32_t flags, void *name, unsigned int namelen, + uint32_t parent_lkid) +{ + struct dlm_lkb *lkb; + struct dlm_args args; + int error; + + lock_recovery(ls); + + error = create_lkb(ls, &lkb); + if (error) { + kfree(ua); + goto out; + } + + if (flags & DLM_LKF_VALBLK) { + ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL); + if (!ua->lksb.sb_lvbptr) { + kfree(ua); + __put_lkb(ls, lkb); + error = -ENOMEM; + goto out; + } + } + + /* After ua is attached to lkb it will be freed by free_lkb(). + When DLM_IFL_USER is set, the dlm knows that this is a userspace + lock and that lkb_astparam is the dlm_user_args structure. */ + + error = set_lock_args(mode, &ua->lksb, flags, namelen, parent_lkid, + FAKE_USER_AST, ua, FAKE_USER_AST, &args); + lkb->lkb_flags |= DLM_IFL_USER; + ua->old_mode = DLM_LOCK_IV; + + if (error) { + __put_lkb(ls, lkb); + goto out; + } + + error = request_lock(ls, lkb, name, namelen, &args); + + switch (error) { + case 0: + break; + case -EINPROGRESS: + error = 0; + break; + case -EAGAIN: + error = 0; + /* fall through */ + default: + __put_lkb(ls, lkb); + goto out; + } + + /* add this new lkb to the per-process list of locks */ + spin_lock(&ua->proc->locks_spin); + kref_get(&lkb->lkb_ref); + list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks); + spin_unlock(&ua->proc->locks_spin); + out: + unlock_recovery(ls); + return error; +} + +int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, + int mode, uint32_t flags, uint32_t lkid, char *lvb_in) +{ + struct dlm_lkb *lkb; + struct dlm_args args; + struct dlm_user_args *ua; + int error; + + lock_recovery(ls); + + error = find_lkb(ls, lkid, &lkb); + if (error) + goto out; + + /* user can change the params on its lock when it converts it, or + add an lvb that didn't exist before */ + + ua = (struct dlm_user_args *)lkb->lkb_astparam; + + if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) { + ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL); + if (!ua->lksb.sb_lvbptr) { + error = -ENOMEM; + goto out_put; + } + } + if (lvb_in && ua->lksb.sb_lvbptr) + memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN); + + ua->castparam = ua_tmp->castparam; + ua->castaddr = ua_tmp->castaddr; + ua->bastparam = ua_tmp->bastparam; + ua->bastaddr = ua_tmp->bastaddr; + ua->old_mode = lkb->lkb_grmode; + + error = set_lock_args(mode, &ua->lksb, flags, 0, 0, FAKE_USER_AST, ua, + FAKE_USER_AST, &args); + if (error) + goto out_put; + + error = convert_lock(ls, lkb, &args); + + if (error == -EINPROGRESS || error == -EAGAIN) + error = 0; + out_put: + dlm_put_lkb(lkb); + out: + unlock_recovery(ls); + kfree(ua_tmp); + return error; +} + +int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, + uint32_t flags, uint32_t lkid, char *lvb_in) +{ + struct dlm_lkb *lkb; + struct dlm_args args; + struct dlm_user_args *ua; + int error; + + lock_recovery(ls); + + error = find_lkb(ls, lkid, &lkb); + if (error) + goto out; + + ua = (struct dlm_user_args *)lkb->lkb_astparam; + + if (lvb_in && ua->lksb.sb_lvbptr) + memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN); + ua->castparam = ua_tmp->castparam; + + error = set_unlock_args(flags, ua, &args); + if (error) + goto out_put; + + error = unlock_lock(ls, lkb, &args); + + if (error == -DLM_EUNLOCK) + error = 0; + if (error) + goto out_put; + + spin_lock(&ua->proc->locks_spin); + list_del(&lkb->lkb_ownqueue); + spin_unlock(&ua->proc->locks_spin); + + /* this removes the reference for the proc->locks list added by + dlm_user_request */ + unhold_lkb(lkb); + out_put: + dlm_put_lkb(lkb); + out: + unlock_recovery(ls); + return error; +} + +int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, + uint32_t flags, uint32_t lkid) +{ + struct dlm_lkb *lkb; + struct dlm_args args; + struct dlm_user_args *ua; + int error; + + lock_recovery(ls); + + error = find_lkb(ls, lkid, &lkb); + if (error) + goto out; + + ua = (struct dlm_user_args *)lkb->lkb_astparam; + ua->castparam = ua_tmp->castparam; + + error = set_unlock_args(flags, ua, &args); + if (error) + goto out_put; + + error = cancel_lock(ls, lkb, &args); + + if (error == -DLM_ECANCEL) + error = 0; + if (error) + goto out_put; + + /* this lkb was removed from the WAITING queue */ + if (lkb->lkb_grmode == DLM_LOCK_IV) { + spin_lock(&ua->proc->locks_spin); + list_del(&lkb->lkb_ownqueue); + spin_unlock(&ua->proc->locks_spin); + unhold_lkb(lkb); + } + out_put: + dlm_put_lkb(lkb); + out: + unlock_recovery(ls); + return error; +} + +static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) +{ + struct dlm_user_args *ua = (struct dlm_user_args *)lkb->lkb_astparam; + + if (ua->lksb.sb_lvbptr) + kfree(ua->lksb.sb_lvbptr); + kfree(ua); + lkb->lkb_astparam = (long)NULL; + + /* TODO: propogate to master if needed */ + return 0; +} + +/* The force flag allows the unlock to go ahead even if the lkb isn't granted. + Regardless of what rsb queue the lock is on, it's removed and freed. */ + +static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) +{ + struct dlm_user_args *ua = (struct dlm_user_args *)lkb->lkb_astparam; + struct dlm_args args; + int error; + + /* FIXME: we need to handle the case where the lkb is in limbo + while the rsb is being looked up, currently we assert in + _unlock_lock/is_remote because rsb nodeid is -1. */ + + set_unlock_args(DLM_LKF_FORCEUNLOCK, ua, &args); + + error = unlock_lock(ls, lkb, &args); + if (error == -DLM_EUNLOCK) + error = 0; + return error; +} + +/* The ls_clear_proc_locks mutex protects against dlm_user_add_asts() which + 1) references lkb->ua which we free here and 2) adds lkbs to proc->asts, + which we clear here. */ + +/* proc CLOSING flag is set so no more device_reads should look at proc->asts + list, and no more device_writes should add lkb's to proc->locks list; so we + shouldn't need to take asts_spin or locks_spin here. this assumes that + device reads/writes/closes are serialized -- FIXME: we may need to serialize + them ourself. */ + +void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) +{ + struct dlm_lkb *lkb, *safe; + + lock_recovery(ls); + mutex_lock(&ls->ls_clear_proc_locks); + + list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) { + if (lkb->lkb_ast_type) { + list_del(&lkb->lkb_astqueue); + unhold_lkb(lkb); + } + + list_del(&lkb->lkb_ownqueue); + + if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) { + lkb->lkb_flags |= DLM_IFL_ORPHAN; + orphan_proc_lock(ls, lkb); + } else { + lkb->lkb_flags |= DLM_IFL_DEAD; + unlock_proc_lock(ls, lkb); + } + + /* this removes the reference for the proc->locks list + added by dlm_user_request, it may result in the lkb + being freed */ + + dlm_put_lkb(lkb); + } + mutex_unlock(&ls->ls_clear_proc_locks); + unlock_recovery(ls); +} diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 56cdc073b1f61b..8d2660f0ab108f 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -14,6 +14,7 @@ #define __LOCK_DOT_H__ void dlm_print_rsb(struct dlm_rsb *r); +void dlm_print_lkb(struct dlm_lkb *lkb); int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery); int dlm_modes_compat(int mode1, int mode2); int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen, @@ -31,6 +32,16 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls); int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc); int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc); +int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode, + uint32_t flags, void *name, unsigned int namelen, uint32_t parent_lkid); +int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, + int mode, uint32_t flags, uint32_t lkid, char *lvb_in); +int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, + uint32_t flags, uint32_t lkid, char *lvb_in); +int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, + uint32_t flags, uint32_t lkid); +void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc); + static inline int is_master(struct dlm_rsb *r) { return !r->res_nodeid; diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 9ed4b70348fb68..3f6cb422ac4ba1 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -270,12 +270,36 @@ struct dlm_ls *dlm_find_lockspace_global(uint32_t id) return ls; } -struct dlm_ls *dlm_find_lockspace_local(void *id) +struct dlm_ls *dlm_find_lockspace_local(dlm_lockspace_t *lockspace) { - struct dlm_ls *ls = id; + struct dlm_ls *ls; spin_lock(&lslist_lock); - ls->ls_count++; + list_for_each_entry(ls, &lslist, ls_list) { + if (ls->ls_local_handle == lockspace) { + ls->ls_count++; + goto out; + } + } + ls = NULL; + out: + spin_unlock(&lslist_lock); + return ls; +} + +struct dlm_ls *dlm_find_lockspace_device(int minor) +{ + struct dlm_ls *ls; + + spin_lock(&lslist_lock); + list_for_each_entry(ls, &lslist, ls_list) { + if (ls->ls_device.minor == minor) { + ls->ls_count++; + goto out; + } + } + ls = NULL; + out: spin_unlock(&lslist_lock); return ls; } @@ -436,6 +460,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, init_rwsem(&ls->ls_in_recovery); INIT_LIST_HEAD(&ls->ls_requestqueue); mutex_init(&ls->ls_requestqueue_mutex); + mutex_init(&ls->ls_clear_proc_locks); ls->ls_recover_buf = kmalloc(dlm_config.buffer_size, GFP_KERNEL); if (!ls->ls_recover_buf) @@ -444,6 +469,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, INIT_LIST_HEAD(&ls->ls_recover_list); spin_lock_init(&ls->ls_recover_list_lock); ls->ls_recover_list_count = 0; + ls->ls_local_handle = ls; init_waitqueue_head(&ls->ls_wait_general); INIT_LIST_HEAD(&ls->ls_root_list); init_rwsem(&ls->ls_root_sem); diff --git a/fs/dlm/lockspace.h b/fs/dlm/lockspace.h index 17bd3ba863a989..891eabbdd021e1 100644 --- a/fs/dlm/lockspace.h +++ b/fs/dlm/lockspace.h @@ -18,6 +18,7 @@ int dlm_lockspace_init(void); void dlm_lockspace_exit(void); struct dlm_ls *dlm_find_lockspace_global(uint32_t id); struct dlm_ls *dlm_find_lockspace_local(void *id); +struct dlm_ls *dlm_find_lockspace_device(int minor); void dlm_put_lockspace(struct dlm_ls *ls); #endif /* __LOCKSPACE_DOT_H__ */ diff --git a/fs/dlm/main.c b/fs/dlm/main.c index 81bf4cb2203344..a8da8dc36b2eee 100644 --- a/fs/dlm/main.c +++ b/fs/dlm/main.c @@ -14,6 +14,7 @@ #include "dlm_internal.h" #include "lockspace.h" #include "lock.h" +#include "user.h" #include "memory.h" #include "lowcomms.h" #include "config.h" @@ -50,10 +51,16 @@ static int __init init_dlm(void) if (error) goto out_debug; + error = dlm_user_init(); + if (error) + goto out_lowcomms; + printk("DLM (built %s %s) installed\n", __DATE__, __TIME__); return 0; + out_lowcomms: + dlm_lowcomms_exit(); out_debug: dlm_unregister_debugfs(); out_config: @@ -68,6 +75,7 @@ static int __init init_dlm(void) static void __exit exit_dlm(void) { + dlm_user_exit(); dlm_lowcomms_exit(); dlm_config_exit(); dlm_memory_exit(); diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c index f7cf4589fae8fc..48dfc27861f426 100644 --- a/fs/dlm/memory.c +++ b/fs/dlm/memory.c @@ -84,6 +84,15 @@ struct dlm_lkb *allocate_lkb(struct dlm_ls *ls) void free_lkb(struct dlm_lkb *lkb) { + if (lkb->lkb_flags & DLM_IFL_USER) { + struct dlm_user_args *ua; + ua = (struct dlm_user_args *)lkb->lkb_astparam; + if (ua) { + if (ua->lksb.sb_lvbptr) + kfree(ua->lksb.sb_lvbptr); + kfree(ua); + } + } kmem_cache_free(lkb_cache, lkb); } diff --git a/fs/dlm/user.c b/fs/dlm/user.c new file mode 100644 index 00000000000000..1f05960a916f38 --- /dev/null +++ b/fs/dlm/user.c @@ -0,0 +1,769 @@ +/* + * Copyright (C) 2006 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dlm_internal.h" +#include "lockspace.h" +#include "lock.h" +#include "lvb_table.h" + +static const char *name_prefix="dlm"; +static struct miscdevice ctl_device; +static struct file_operations device_fops; + +#ifdef CONFIG_COMPAT + +struct dlm_lock_params32 { + __u8 mode; + __u8 namelen; + __u16 flags; + __u32 lkid; + __u32 parent; + + __u32 castparam; + __u32 castaddr; + __u32 bastparam; + __u32 bastaddr; + __u32 lksb; + + char lvb[DLM_USER_LVB_LEN]; + char name[0]; +}; + +struct dlm_write_request32 { + __u32 version[3]; + __u8 cmd; + __u8 is64bit; + __u8 unused[2]; + + union { + struct dlm_lock_params32 lock; + struct dlm_lspace_params lspace; + } i; +}; + +struct dlm_lksb32 { + __u32 sb_status; + __u32 sb_lkid; + __u8 sb_flags; + __u32 sb_lvbptr; +}; + +struct dlm_lock_result32 { + __u32 length; + __u32 user_astaddr; + __u32 user_astparam; + __u32 user_lksb; + struct dlm_lksb32 lksb; + __u8 bast_mode; + __u8 unused[3]; + /* Offsets may be zero if no data is present */ + __u32 lvb_offset; +}; + +static void compat_input(struct dlm_write_request *kb, + struct dlm_write_request32 *kb32) +{ + kb->version[0] = kb32->version[0]; + kb->version[1] = kb32->version[1]; + kb->version[2] = kb32->version[2]; + + kb->cmd = kb32->cmd; + kb->is64bit = kb32->is64bit; + if (kb->cmd == DLM_USER_CREATE_LOCKSPACE || + kb->cmd == DLM_USER_REMOVE_LOCKSPACE) { + kb->i.lspace.flags = kb32->i.lspace.flags; + kb->i.lspace.minor = kb32->i.lspace.minor; + strcpy(kb->i.lspace.name, kb32->i.lspace.name); + } else { + kb->i.lock.mode = kb32->i.lock.mode; + kb->i.lock.namelen = kb32->i.lock.namelen; + kb->i.lock.flags = kb32->i.lock.flags; + kb->i.lock.lkid = kb32->i.lock.lkid; + kb->i.lock.parent = kb32->i.lock.parent; + kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam; + kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr; + kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam; + kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr; + kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb; + memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN); + memcpy(kb->i.lock.name, kb32->i.lock.name, kb->i.lock.namelen); + } +} + +static void compat_output(struct dlm_lock_result *res, + struct dlm_lock_result32 *res32) +{ + res32->length = res->length - (sizeof(struct dlm_lock_result) - + sizeof(struct dlm_lock_result32)); + res32->user_astaddr = (__u32)(long)res->user_astaddr; + res32->user_astparam = (__u32)(long)res->user_astparam; + res32->user_lksb = (__u32)(long)res->user_lksb; + res32->bast_mode = res->bast_mode; + + res32->lvb_offset = res->lvb_offset; + res32->length = res->length; + + res32->lksb.sb_status = res->lksb.sb_status; + res32->lksb.sb_flags = res->lksb.sb_flags; + res32->lksb.sb_lkid = res->lksb.sb_lkid; + res32->lksb.sb_lvbptr = (__u32)(long)res->lksb.sb_lvbptr; +} +#endif + + +void dlm_user_add_ast(struct dlm_lkb *lkb, int type) +{ + struct dlm_ls *ls; + struct dlm_user_args *ua; + struct dlm_user_proc *proc; + + /* dlm_clear_proc_locks() sets ORPHAN/DEAD flag on each + lkb before dealing with it. We need to check this + flag before taking ls_clear_proc_locks mutex because if + it's set, dlm_clear_proc_locks() holds the mutex. */ + + if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) { + /* log_print("user_add_ast skip1 %x", lkb->lkb_flags); */ + return; + } + + ls = lkb->lkb_resource->res_ls; + mutex_lock(&ls->ls_clear_proc_locks); + + /* If ORPHAN/DEAD flag is set, it means the process is dead so an ast + can't be delivered. For ORPHAN's, dlm_clear_proc_locks() freed + lkb->ua so we can't try to use it. */ + + if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) { + /* log_print("user_add_ast skip2 %x", lkb->lkb_flags); */ + goto out; + } + + DLM_ASSERT(lkb->lkb_astparam, dlm_print_lkb(lkb);); + ua = (struct dlm_user_args *)lkb->lkb_astparam; + proc = ua->proc; + + if (type == AST_BAST && ua->bastaddr == NULL) + goto out; + + spin_lock(&proc->asts_spin); + if (!(lkb->lkb_ast_type & (AST_COMP | AST_BAST))) { + kref_get(&lkb->lkb_ref); + list_add_tail(&lkb->lkb_astqueue, &proc->asts); + lkb->lkb_ast_type |= type; + wake_up_interruptible(&proc->wait); + } + + /* We want to copy the lvb to userspace when the completion + ast is read if the status is 0, the lock has an lvb and + lvb_ops says we should. We could probably have set_lvb_lock() + set update_user_lvb instead and not need old_mode */ + + if ((lkb->lkb_ast_type & AST_COMP) && + (lkb->lkb_lksb->sb_status == 0) && + lkb->lkb_lksb->sb_lvbptr && + dlm_lvb_operations[ua->old_mode + 1][lkb->lkb_grmode + 1]) + ua->update_user_lvb = 1; + else + ua->update_user_lvb = 0; + + spin_unlock(&proc->asts_spin); + out: + mutex_unlock(&ls->ls_clear_proc_locks); +} + +static int device_user_lock(struct dlm_user_proc *proc, + struct dlm_lock_params *params) +{ + struct dlm_ls *ls; + struct dlm_user_args *ua; + int error = -ENOMEM; + + ls = dlm_find_lockspace_local(proc->lockspace); + if (!ls) + return -ENOENT; + + if (!params->castaddr || !params->lksb) { + error = -EINVAL; + goto out; + } + + ua = kzalloc(sizeof(struct dlm_user_args), GFP_KERNEL); + if (!ua) + goto out; + ua->proc = proc; + ua->user_lksb = params->lksb; + ua->castparam = params->castparam; + ua->castaddr = params->castaddr; + ua->bastparam = params->bastparam; + ua->bastaddr = params->bastaddr; + + if (params->flags & DLM_LKF_CONVERT) + error = dlm_user_convert(ls, ua, + params->mode, params->flags, + params->lkid, params->lvb); + else { + error = dlm_user_request(ls, ua, + params->mode, params->flags, + params->name, params->namelen, + params->parent); + if (!error) + error = ua->lksb.sb_lkid; + } + out: + dlm_put_lockspace(ls); + return error; +} + +static int device_user_unlock(struct dlm_user_proc *proc, + struct dlm_lock_params *params) +{ + struct dlm_ls *ls; + struct dlm_user_args *ua; + int error = -ENOMEM; + + ls = dlm_find_lockspace_local(proc->lockspace); + if (!ls) + return -ENOENT; + + ua = kzalloc(sizeof(struct dlm_user_args), GFP_KERNEL); + if (!ua) + goto out; + ua->proc = proc; + ua->user_lksb = params->lksb; + ua->castparam = params->castparam; + ua->castaddr = params->castaddr; + + if (params->flags & DLM_LKF_CANCEL) + error = dlm_user_cancel(ls, ua, params->flags, params->lkid); + else + error = dlm_user_unlock(ls, ua, params->flags, params->lkid, + params->lvb); + out: + dlm_put_lockspace(ls); + return error; +} + +static int device_create_lockspace(struct dlm_lspace_params *params) +{ + dlm_lockspace_t *lockspace; + struct dlm_ls *ls; + int error, len; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + error = dlm_new_lockspace(params->name, strlen(params->name), + &lockspace, 0, DLM_USER_LVB_LEN); + if (error) + return error; + + ls = dlm_find_lockspace_local(lockspace); + if (!ls) + return -ENOENT; + + error = -ENOMEM; + len = strlen(params->name) + strlen(name_prefix) + 2; + ls->ls_device.name = kzalloc(len, GFP_KERNEL); + if (!ls->ls_device.name) + goto fail; + snprintf((char *)ls->ls_device.name, len, "%s_%s", name_prefix, + params->name); + ls->ls_device.fops = &device_fops; + ls->ls_device.minor = MISC_DYNAMIC_MINOR; + + error = misc_register(&ls->ls_device); + if (error) { + kfree(ls->ls_device.name); + goto fail; + } + + error = ls->ls_device.minor; + dlm_put_lockspace(ls); + return error; + + fail: + dlm_put_lockspace(ls); + dlm_release_lockspace(lockspace, 0); + return error; +} + +static int device_remove_lockspace(struct dlm_lspace_params *params) +{ + dlm_lockspace_t *lockspace; + struct dlm_ls *ls; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ls = dlm_find_lockspace_device(params->minor); + if (!ls) + return -ENOENT; + + error = misc_deregister(&ls->ls_device); + if (error) { + dlm_put_lockspace(ls); + goto out; + } + kfree(ls->ls_device.name); + + lockspace = ls->ls_local_handle; + + /* dlm_release_lockspace waits for references to go to zero, + so all processes will need to close their device for the ls + before the release will procede */ + + dlm_put_lockspace(ls); + error = dlm_release_lockspace(lockspace, 0); +out: + return error; +} + +/* Check the user's version matches ours */ +static int check_version(struct dlm_write_request *req) +{ + if (req->version[0] != DLM_DEVICE_VERSION_MAJOR || + (req->version[0] == DLM_DEVICE_VERSION_MAJOR && + req->version[1] > DLM_DEVICE_VERSION_MINOR)) { + + printk(KERN_DEBUG "dlm: process %s (%d) version mismatch " + "user (%d.%d.%d) kernel (%d.%d.%d)\n", + current->comm, + current->pid, + req->version[0], + req->version[1], + req->version[2], + DLM_DEVICE_VERSION_MAJOR, + DLM_DEVICE_VERSION_MINOR, + DLM_DEVICE_VERSION_PATCH); + return -EINVAL; + } + return 0; +} + +/* + * device_write + * + * device_user_lock + * dlm_user_request -> request_lock + * dlm_user_convert -> convert_lock + * + * device_user_unlock + * dlm_user_unlock -> unlock_lock + * dlm_user_cancel -> cancel_lock + * + * device_create_lockspace + * dlm_new_lockspace + * + * device_remove_lockspace + * dlm_release_lockspace + */ + +/* a write to a lockspace device is a lock or unlock request, a write + to the control device is to create/remove a lockspace */ + +static ssize_t device_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct dlm_user_proc *proc = file->private_data; + struct dlm_write_request *kbuf; + sigset_t tmpsig, allsigs; + int error; + +#ifdef CONFIG_COMPAT + if (count < sizeof(struct dlm_write_request32)) +#else + if (count < sizeof(struct dlm_write_request)) +#endif + return -EINVAL; + + kbuf = kmalloc(count, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + if (copy_from_user(kbuf, buf, count)) { + error = -EFAULT; + goto out_free; + } + + if (check_version(kbuf)) { + error = -EBADE; + goto out_free; + } + +#ifdef CONFIG_COMPAT + if (!kbuf->is64bit) { + struct dlm_write_request32 *k32buf; + k32buf = (struct dlm_write_request32 *)kbuf; + kbuf = kmalloc(count + (sizeof(struct dlm_write_request) - + sizeof(struct dlm_write_request32)), GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + if (proc) + set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags); + compat_input(kbuf, k32buf); + kfree(k32buf); + } +#endif + + /* do we really need this? can a write happen after a close? */ + if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) && + test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags)) + return -EINVAL; + + sigfillset(&allsigs); + sigprocmask(SIG_BLOCK, &allsigs, &tmpsig); + + error = -EINVAL; + + switch (kbuf->cmd) + { + case DLM_USER_LOCK: + if (!proc) { + log_print("no locking on control device"); + goto out_sig; + } + error = device_user_lock(proc, &kbuf->i.lock); + break; + + case DLM_USER_UNLOCK: + if (!proc) { + log_print("no locking on control device"); + goto out_sig; + } + error = device_user_unlock(proc, &kbuf->i.lock); + break; + + case DLM_USER_CREATE_LOCKSPACE: + if (proc) { + log_print("create/remove only on control device"); + goto out_sig; + } + error = device_create_lockspace(&kbuf->i.lspace); + break; + + case DLM_USER_REMOVE_LOCKSPACE: + if (proc) { + log_print("create/remove only on control device"); + goto out_sig; + } + error = device_remove_lockspace(&kbuf->i.lspace); + break; + + default: + log_print("Unknown command passed to DLM device : %d\n", + kbuf->cmd); + } + + out_sig: + sigprocmask(SIG_SETMASK, &tmpsig, NULL); + recalc_sigpending(); + out_free: + kfree(kbuf); + return error; +} + +/* Every process that opens the lockspace device has its own "proc" structure + hanging off the open file that's used to keep track of locks owned by the + process and asts that need to be delivered to the process. */ + +static int device_open(struct inode *inode, struct file *file) +{ + struct dlm_user_proc *proc; + struct dlm_ls *ls; + + ls = dlm_find_lockspace_device(iminor(inode)); + if (!ls) + return -ENOENT; + + proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL); + if (!proc) { + dlm_put_lockspace(ls); + return -ENOMEM; + } + + proc->lockspace = ls->ls_local_handle; + INIT_LIST_HEAD(&proc->asts); + INIT_LIST_HEAD(&proc->locks); + spin_lock_init(&proc->asts_spin); + spin_lock_init(&proc->locks_spin); + init_waitqueue_head(&proc->wait); + file->private_data = proc; + + return 0; +} + +static int device_close(struct inode *inode, struct file *file) +{ + struct dlm_user_proc *proc = file->private_data; + struct dlm_ls *ls; + sigset_t tmpsig, allsigs; + + ls = dlm_find_lockspace_local(proc->lockspace); + if (!ls) + return -ENOENT; + + sigfillset(&allsigs); + sigprocmask(SIG_BLOCK, &allsigs, &tmpsig); + + set_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags); + + dlm_clear_proc_locks(ls, proc); + + /* at this point no more lkb's should exist for this lockspace, + so there's no chance of dlm_user_add_ast() being called and + looking for lkb->ua->proc */ + + kfree(proc); + file->private_data = NULL; + + dlm_put_lockspace(ls); + dlm_put_lockspace(ls); /* for the find in device_open() */ + + /* FIXME: AUTOFREE: if this ls is no longer used do + device_remove_lockspace() */ + + sigprocmask(SIG_SETMASK, &tmpsig, NULL); + recalc_sigpending(); + + return 0; +} + +static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type, + int bmode, char __user *buf, size_t count) +{ +#ifdef CONFIG_COMPAT + struct dlm_lock_result32 result32; +#endif + struct dlm_lock_result result; + void *resultptr; + int error=0; + int len; + int struct_len; + + memset(&result, 0, sizeof(struct dlm_lock_result)); + memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb)); + result.user_lksb = ua->user_lksb; + + /* FIXME: dlm1 provides for the user's bastparam/addr to not be updated + in a conversion unless the conversion is successful. See code + in dlm_user_convert() for updating ua from ua_tmp. OpenVMS, though, + notes that a new blocking AST address and parameter are set even if + the conversion fails, so maybe we should just do that. */ + + if (type == AST_BAST) { + result.user_astaddr = ua->bastaddr; + result.user_astparam = ua->bastparam; + result.bast_mode = bmode; + } else { + result.user_astaddr = ua->castaddr; + result.user_astparam = ua->castparam; + } + +#ifdef CONFIG_COMPAT + if (compat) + len = sizeof(struct dlm_lock_result32); + else +#endif + len = sizeof(struct dlm_lock_result); + struct_len = len; + + /* copy lvb to userspace if there is one, it's been updated, and + the user buffer has space for it */ + + if (ua->update_user_lvb && ua->lksb.sb_lvbptr && + count >= len + DLM_USER_LVB_LEN) { + if (copy_to_user(buf+len, ua->lksb.sb_lvbptr, + DLM_USER_LVB_LEN)) { + error = -EFAULT; + goto out; + } + + result.lvb_offset = len; + len += DLM_USER_LVB_LEN; + } + + result.length = len; + resultptr = &result; +#ifdef CONFIG_COMPAT + if (compat) { + compat_output(&result, &result32); + resultptr = &result32; + } +#endif + + if (copy_to_user(buf, resultptr, struct_len)) + error = -EFAULT; + else + error = len; + out: + return error; +} + +/* a read returns a single ast described in a struct dlm_lock_result */ + +static ssize_t device_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct dlm_user_proc *proc = file->private_data; + struct dlm_lkb *lkb; + struct dlm_user_args *ua; + DECLARE_WAITQUEUE(wait, current); + int error, type=0, bmode=0, removed = 0; + +#ifdef CONFIG_COMPAT + if (count < sizeof(struct dlm_lock_result32)) +#else + if (count < sizeof(struct dlm_lock_result)) +#endif + return -EINVAL; + + /* do we really need this? can a read happen after a close? */ + if (test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags)) + return -EINVAL; + + spin_lock(&proc->asts_spin); + if (list_empty(&proc->asts)) { + if (file->f_flags & O_NONBLOCK) { + spin_unlock(&proc->asts_spin); + return -EAGAIN; + } + + add_wait_queue(&proc->wait, &wait); + + repeat: + set_current_state(TASK_INTERRUPTIBLE); + if (list_empty(&proc->asts) && !signal_pending(current)) { + spin_unlock(&proc->asts_spin); + schedule(); + spin_lock(&proc->asts_spin); + goto repeat; + } + set_current_state(TASK_RUNNING); + remove_wait_queue(&proc->wait, &wait); + + if (signal_pending(current)) { + spin_unlock(&proc->asts_spin); + return -ERESTARTSYS; + } + } + + if (list_empty(&proc->asts)) { + spin_unlock(&proc->asts_spin); + return -EAGAIN; + } + + /* there may be both completion and blocking asts to return for + the lkb, don't remove lkb from asts list unless no asts remain */ + + lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_astqueue); + + if (lkb->lkb_ast_type & AST_COMP) { + lkb->lkb_ast_type &= ~AST_COMP; + type = AST_COMP; + } else if (lkb->lkb_ast_type & AST_BAST) { + lkb->lkb_ast_type &= ~AST_BAST; + type = AST_BAST; + bmode = lkb->lkb_bastmode; + } + + if (!lkb->lkb_ast_type) { + list_del(&lkb->lkb_astqueue); + removed = 1; + } + spin_unlock(&proc->asts_spin); + + ua = (struct dlm_user_args *)lkb->lkb_astparam; + error = copy_result_to_user(ua, + test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags), + type, bmode, buf, count); + + /* removes reference for the proc->asts lists added by + dlm_user_add_ast() and may result in the lkb being freed */ + if (removed) + dlm_put_lkb(lkb); + + return error; +} + +static unsigned int device_poll(struct file *file, poll_table *wait) +{ + struct dlm_user_proc *proc = file->private_data; + + poll_wait(file, &proc->wait, wait); + + spin_lock(&proc->asts_spin); + if (!list_empty(&proc->asts)) { + spin_unlock(&proc->asts_spin); + return POLLIN | POLLRDNORM; + } + spin_unlock(&proc->asts_spin); + return 0; +} + +static int ctl_device_open(struct inode *inode, struct file *file) +{ + file->private_data = NULL; + return 0; +} + +static int ctl_device_close(struct inode *inode, struct file *file) +{ + return 0; +} + +static struct file_operations device_fops = { + .open = device_open, + .release = device_close, + .read = device_read, + .write = device_write, + .poll = device_poll, + .owner = THIS_MODULE, +}; + +static struct file_operations ctl_device_fops = { + .open = ctl_device_open, + .release = ctl_device_close, + .write = device_write, + .owner = THIS_MODULE, +}; + +int dlm_user_init(void) +{ + int error; + + ctl_device.name = "dlm-control"; + ctl_device.fops = &ctl_device_fops; + ctl_device.minor = MISC_DYNAMIC_MINOR; + + error = misc_register(&ctl_device); + if (error) + log_print("misc_register failed for control device"); + + return error; +} + +void dlm_user_exit(void) +{ + misc_deregister(&ctl_device); +} + diff --git a/fs/dlm/user.h b/fs/dlm/user.h new file mode 100644 index 00000000000000..d38e9f3e415118 --- /dev/null +++ b/fs/dlm/user.h @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2006 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __USER_DOT_H__ +#define __USER_DOT_H__ + +void dlm_user_add_ast(struct dlm_lkb *lkb, int type); +int dlm_user_init(void); +void dlm_user_exit(void); + +#endif -- cgit 1.2.3-korg From 3b4a0a74949cf4f87f7ca84cc0457eb5687b2923 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 15 Jul 2006 02:36:31 +0200 Subject: [DLM] [RFC: -mm patch] fs/dlm/lock.c: unexport dlm_lvb_operations On Thu, Jul 13, 2006 at 10:48:00PM -0700, Andrew Morton wrote: >... > Changes since 2.6.18-rc1-mm1: >... > git-gfs2.patch >... > git trees. >... This patch removes the unused EXPORT_SYMBOL_GPL(dlm_lvb_operations). Signed-off-by: Adrian Bunk Signed-off-by: Steven Whitehouse --- fs/dlm/lock.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/dlm/lock.c') diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 4e222f873b6c0d..82e1ac251843ef 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -128,7 +128,6 @@ const int dlm_lvb_operations[8][8] = { { -1, 0, 0, 0, 0, 0, 0, 0 }, /* EX */ { -1, 0, 0, 0, 0, 0, 0, 0 } /* PD */ }; -EXPORT_SYMBOL_GPL(dlm_lvb_operations); #define modes_compat(gr, rq) \ __dlm_compat_matrix[(gr)->lkb_grmode + 1][(rq)->lkb_rqmode + 1] -- cgit 1.2.3-korg From 34e22bed19013c77f942083e25cfc7adf6c71a8f Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 18 Jul 2006 11:24:04 -0500 Subject: [DLM] fix leaking user locks User NOQUEUE lock requests to a remote node that failed with -EAGAIN were never being removed from a process's list of locks. Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/dlm/lock.c | 7 ++++--- fs/dlm/user.c | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) (limited to 'fs/dlm/lock.c') diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 82e1ac251843ef..22744321816722 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -526,6 +526,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) lkb->lkb_nodeid = -1; lkb->lkb_grmode = DLM_LOCK_IV; kref_init(&lkb->lkb_ref); + INIT_LIST_HEAD(&lkb->lkb_ownqueue); get_random_bytes(&bucket, sizeof(bucket)); bucket &= (ls->ls_lkbtbl_size - 1); @@ -3705,7 +3706,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, goto out_put; spin_lock(&ua->proc->locks_spin); - list_del(&lkb->lkb_ownqueue); + list_del_init(&lkb->lkb_ownqueue); spin_unlock(&ua->proc->locks_spin); /* this removes the reference for the proc->locks list added by @@ -3749,7 +3750,7 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, /* this lkb was removed from the WAITING queue */ if (lkb->lkb_grmode == DLM_LOCK_IV) { spin_lock(&ua->proc->locks_spin); - list_del(&lkb->lkb_ownqueue); + list_del_init(&lkb->lkb_ownqueue); spin_unlock(&ua->proc->locks_spin); unhold_lkb(lkb); } @@ -3817,7 +3818,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) unhold_lkb(lkb); } - list_del(&lkb->lkb_ownqueue); + list_del_init(&lkb->lkb_ownqueue); if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) { lkb->lkb_flags |= DLM_IFL_ORPHAN; diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 1f05960a916f38..fd19caf9af973b 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -133,6 +133,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type) struct dlm_ls *ls; struct dlm_user_args *ua; struct dlm_user_proc *proc; + int remove_ownqueue = 0; /* dlm_clear_proc_locks() sets ORPHAN/DEAD flag on each lkb before dealing with it. We need to check this @@ -171,6 +172,14 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type) wake_up_interruptible(&proc->wait); } + /* noqueue requests that fail may need to be removed from the + proc's locks list, there should be a better way of detecting + this situation than checking all these things... */ + + if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV && + ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue)) + remove_ownqueue = 1; + /* We want to copy the lvb to userspace when the completion ast is read if the status is 0, the lock has an lvb and lvb_ops says we should. We could probably have set_lvb_lock() @@ -185,6 +194,13 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type) ua->update_user_lvb = 0; spin_unlock(&proc->asts_spin); + + if (remove_ownqueue) { + spin_lock(&ua->proc->locks_spin); + list_del_init(&lkb->lkb_ownqueue); + spin_unlock(&ua->proc->locks_spin); + dlm_put_lkb(lkb); + } out: mutex_unlock(&ls->ls_clear_proc_locks); } -- cgit 1.2.3-korg From 2b4e926aab7c854a536beee6ba8b9a78a9e00316 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 25 Jul 2006 13:59:48 -0500 Subject: [DLM] fix loop in grant_after_purge The loop in grant_after_purge is intended to find all rsb's in each hash bucket that have the LOCKS_PURGED flag set. The loop was quitting the current bucket after finding just one rsb instead of going until there are no more. Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/dlm/lock.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs/dlm/lock.c') diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 22744321816722..eaad28e51ec994 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -3366,12 +3366,16 @@ static struct dlm_rsb *find_purged_rsb(struct dlm_ls *ls, int bucket) void dlm_grant_after_purge(struct dlm_ls *ls) { struct dlm_rsb *r; - int i; + int bucket = 0; - for (i = 0; i < ls->ls_rsbtbl_size; i++) { - r = find_purged_rsb(ls, i); - if (!r) + while (1) { + r = find_purged_rsb(ls, bucket); + if (!r) { + if (bucket == ls->ls_rsbtbl_size - 1) + break; + bucket++; continue; + } lock_rsb(r); if (is_master(r)) { grant_pending_locks(r); @@ -3379,6 +3383,7 @@ void dlm_grant_after_purge(struct dlm_ls *ls) } unlock_rsb(r); put_rsb(r); + schedule(); } } -- cgit 1.2.3-korg From 81456807a33c2122e2f1f92acfbaaa77b3d06c3c Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 25 Jul 2006 14:05:09 -0500 Subject: [DLM] schedule during long loop through locks The loop through all waiting locks in recover_waiters can potentially be long, so we should schedule explicitly. Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/dlm/lock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/dlm/lock.c') diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index eaad28e51ec994..7d38f914c5b97e 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -3193,6 +3193,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) log_error(ls, "invalid lkb wait_type %d", lkb->lkb_wait_type); } + schedule(); } mutex_unlock(&ls->ls_waiters_mutex); } -- cgit 1.2.3-korg From cc346d555f2c3eb4a63b2df6bf9c9947f0a92a01 Mon Sep 17 00:00:00 2001 From: Patrick Caulfield Date: Tue, 8 Aug 2006 10:34:40 -0400 Subject: [DLM] fix userland unlock This patch fixes the userland DLM unlock code so that it correctly returns the address of the userland lock status block in its completion AST. It fixes bug #201348 Patrick Signed-Off-By: Patrick Caulfield Signed-off-by: Steven Whitehouse --- fs/dlm/lock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/dlm/lock.c') diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 7d38f914c5b97e..bb2e3515121077 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -3699,6 +3699,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, if (lvb_in && ua->lksb.sb_lvbptr) memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN); ua->castparam = ua_tmp->castparam; + ua->user_lksb = ua_tmp->user_lksb; error = set_unlock_args(flags, ua, &args); if (error) -- cgit 1.2.3-korg From a345da3e8f28ff69e1b14df78f7ddc6e7b78b726 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 18 Aug 2006 11:54:25 -0500 Subject: [DLM] dump rsb and locks on assert Introduce new function dlm_dump_rsb() to call within assertions instead of dlm_print_rsb(). The new function dumps info about all locks on the rsb in addition to rsb details. Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/dlm/lock.c | 40 +++++++++++++++++++++++++++++++--------- fs/dlm/lock.h | 1 + fs/dlm/recover.c | 4 ++-- 3 files changed, 34 insertions(+), 11 deletions(-) (limited to 'fs/dlm/lock.c') diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index bb2e3515121077..712438513cc43d 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -171,6 +171,28 @@ void dlm_print_rsb(struct dlm_rsb *r) r->res_recover_locks_count, r->res_name); } +void dlm_dump_rsb(struct dlm_rsb *r) +{ + struct dlm_lkb *lkb; + + dlm_print_rsb(r); + + printk(KERN_ERR "rsb: root_list empty %d recover_list empty %d\n", + list_empty(&r->res_root_list), list_empty(&r->res_recover_list)); + printk(KERN_ERR "rsb lookup list\n"); + list_for_each_entry(lkb, &r->res_lookup, lkb_rsb_lookup) + dlm_print_lkb(lkb); + printk(KERN_ERR "rsb grant queue:\n"); + list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) + dlm_print_lkb(lkb); + printk(KERN_ERR "rsb convert queue:\n"); + list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) + dlm_print_lkb(lkb); + printk(KERN_ERR "rsb wait queue:\n"); + list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue) + dlm_print_lkb(lkb); +} + /* Threads cannot use the lockspace while it's being recovered */ static inline void lock_recovery(struct dlm_ls *ls) @@ -478,7 +500,7 @@ static void unhold_rsb(struct dlm_rsb *r) { int rv; rv = kref_put(&r->res_ref, toss_rsb); - DLM_ASSERT(!rv, dlm_print_rsb(r);); + DLM_ASSERT(!rv, dlm_dump_rsb(r);); } static void kill_rsb(struct kref *kref) @@ -488,12 +510,12 @@ static void kill_rsb(struct kref *kref) /* All work is done after the return from kref_put() so we can release the write_lock before the remove and free. */ - DLM_ASSERT(list_empty(&r->res_lookup),); - DLM_ASSERT(list_empty(&r->res_grantqueue),); - DLM_ASSERT(list_empty(&r->res_convertqueue),); - DLM_ASSERT(list_empty(&r->res_waitqueue),); - DLM_ASSERT(list_empty(&r->res_root_list),); - DLM_ASSERT(list_empty(&r->res_recover_list),); + DLM_ASSERT(list_empty(&r->res_lookup), dlm_dump_rsb(r);); + DLM_ASSERT(list_empty(&r->res_grantqueue), dlm_dump_rsb(r);); + DLM_ASSERT(list_empty(&r->res_convertqueue), dlm_dump_rsb(r);); + DLM_ASSERT(list_empty(&r->res_waitqueue), dlm_dump_rsb(r);); + DLM_ASSERT(list_empty(&r->res_root_list), dlm_dump_rsb(r);); + DLM_ASSERT(list_empty(&r->res_recover_list), dlm_dump_rsb(r);); } /* Attaching/detaching lkb's from rsb's is for rsb reference counting. @@ -1336,7 +1358,7 @@ static void grant_pending_locks(struct dlm_rsb *r) struct dlm_lkb *lkb, *s; int high = DLM_LOCK_IV; - DLM_ASSERT(is_master(r), dlm_print_rsb(r);); + DLM_ASSERT(is_master(r), dlm_dump_rsb(r);); high = grant_pending_convert(r, high); high = grant_pending_wait(r, high); @@ -1431,7 +1453,7 @@ static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb) return 0; } - DLM_ASSERT(r->res_nodeid == -1, dlm_print_rsb(r);); + DLM_ASSERT(r->res_nodeid == -1, dlm_dump_rsb(r);); dir_nodeid = dlm_dir_nodeid(r); diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 8d2660f0ab108f..0843a3073ec3ea 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -14,6 +14,7 @@ #define __LOCK_DOT_H__ void dlm_print_rsb(struct dlm_rsb *r); +void dlm_dump_rsb(struct dlm_rsb *r); void dlm_print_lkb(struct dlm_lkb *lkb); int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery); int dlm_modes_compat(int mode1, int mode2); diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index 1a86dfc8034eb2..528a2a0fa32948 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -490,7 +490,7 @@ static int recover_locks(struct dlm_rsb *r) if (all_queues_empty(r)) goto out; - DLM_ASSERT(!r->res_recover_locks_count, dlm_print_rsb(r);); + DLM_ASSERT(!r->res_recover_locks_count, dlm_dump_rsb(r);); error = recover_locks_queue(r, &r->res_grantqueue); if (error) @@ -557,7 +557,7 @@ int dlm_recover_locks(struct dlm_ls *ls) void dlm_recovered_lock(struct dlm_rsb *r) { - DLM_ASSERT(rsb_flag(r, RSB_NEW_MASTER), dlm_print_rsb(r);); + DLM_ASSERT(rsb_flag(r, RSB_NEW_MASTER), dlm_dump_rsb(r);); r->res_recover_locks_count--; if (!r->res_recover_locks_count) { -- cgit 1.2.3-korg From 10948eb4eddea6750a3b27f77bec423d844e6726 Mon Sep 17 00:00:00 2001 From: Patrick Caulfield Date: Wed, 23 Aug 2006 09:49:31 +0100 Subject: [DLM] preserve lksb address in user conversions This patch fixes bz#203444 where the LKSB was lost during userland conversion operations Signed-off-by: Patrick Caulfield Signed-off-by: Steven Whitehouse --- fs/dlm/lock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/dlm/lock.c') diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 712438513cc43d..7ffbe8090de038 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -3683,6 +3683,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, ua->castaddr = ua_tmp->castaddr; ua->bastparam = ua_tmp->bastparam; ua->bastaddr = ua_tmp->bastaddr; + ua->user_lksb = ua_tmp->user_lksb; ua->old_mode = lkb->lkb_grmode; error = set_lock_args(mode, &ua->lksb, flags, 0, 0, FAKE_USER_AST, ua, -- cgit 1.2.3-korg From c059f70e357af1adcfc1a9294e44cdd945adb841 Mon Sep 17 00:00:00 2001 From: Patrick Caulfield Date: Wed, 23 Aug 2006 10:24:03 +0100 Subject: [DLM] down conversion clearing flags Oh, and here's (hopefully) the last of these ua_tmp patches. I think I've caught all the paths now. Sorry it didn't make the last one. Signed-off-by: Patrick Caulfield Signed-off-by: Steven Whitehouse --- fs/dlm/lock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/dlm/lock.c') diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 7ffbe8090de038..6dcd475826c185 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -3765,6 +3765,7 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, ua = (struct dlm_user_args *)lkb->lkb_astparam; ua->castparam = ua_tmp->castparam; + ua->user_lksb = ua_tmp->user_lksb; error = set_unlock_args(flags, ua, &args); if (error) -- cgit 1.2.3-korg From 32f105a123804c7882d447f013aeb3530b4d63c0 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 23 Aug 2006 16:07:31 -0400 Subject: [DLM] down conversion clearing flags The down-conversion optimization was resulting in the lkb flags being cleared because the stub message reply had no flags value set. Copy the current flags into the stub message so they'll be copied back into the lkb as part of processing the fake reply. Also add an assertion to catch this error more directly if it exists elsewhere. Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/dlm/ast.c | 1 + fs/dlm/dlm_internal.h | 2 ++ fs/dlm/lock.c | 9 ++++----- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'fs/dlm/lock.c') diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c index a211330cbc422e..f91d39cb1e0bd0 100644 --- a/fs/dlm/ast.c +++ b/fs/dlm/ast.c @@ -38,6 +38,7 @@ void dlm_add_ast(struct dlm_lkb *lkb, int type) dlm_user_add_ast(lkb, type); return; } + DLM_ASSERT(lkb->lkb_astaddr != DLM_FAKE_USER_AST, dlm_print_lkb(lkb);); spin_lock(&ast_queue_lock); if (!(lkb->lkb_ast_type & (AST_COMP | AST_BAST))) { diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index da7509986699ff..1e5cd67e1b7ad2 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -91,6 +91,8 @@ struct dlm_mhandle; } \ } +#define DLM_FAKE_USER_AST ERR_PTR(-EINVAL) + struct dlm_direntry { struct list_head list; diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 6dcd475826c185..67247f0b508a5f 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -86,8 +86,6 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, struct dlm_message *ms); static int receive_extralen(struct dlm_message *ms); -#define FAKE_USER_AST (void*)0xff00ff00 - /* * Lock compatibilty matrix - thanks Steve * UN = Unlocked state. Not really a state, used as a flag @@ -2195,6 +2193,7 @@ static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) if (!error && down_conversion(lkb)) { remove_from_waiters(lkb); r->res_ls->ls_stub_ms.m_result = 0; + r->res_ls->ls_stub_ms.m_flags = lkb->lkb_flags; __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms); } @@ -3615,7 +3614,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, lock and that lkb_astparam is the dlm_user_args structure. */ error = set_lock_args(mode, &ua->lksb, flags, namelen, parent_lkid, - FAKE_USER_AST, ua, FAKE_USER_AST, &args); + DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args); lkb->lkb_flags |= DLM_IFL_USER; ua->old_mode = DLM_LOCK_IV; @@ -3686,8 +3685,8 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, ua->user_lksb = ua_tmp->user_lksb; ua->old_mode = lkb->lkb_grmode; - error = set_lock_args(mode, &ua->lksb, flags, 0, 0, FAKE_USER_AST, ua, - FAKE_USER_AST, &args); + error = set_lock_args(mode, &ua->lksb, flags, 0, 0, DLM_FAKE_USER_AST, + ua, DLM_FAKE_USER_AST, &args); if (error) goto out_put; -- cgit 1.2.3-korg From fa9f0e4925c7796afd14bf7bbf7a064078818bbc Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 8 Sep 2006 08:36:35 -0500 Subject: [DLM] confirm master for recovered waiting requests Fixing the following scenario: - A request is on the waiters list waiting for a reply from a remote node. - The request is the first one on the resource, so first_lkid is set. - The remote node fails causing recovery. - During recovery the requesting node becomes master. - The request is now processed locally instead of being a remote operation. - At this point we need to call confirm_master() on the resource since we're certain we're now the master node. This will clear first_lkid. - We weren't calling confirm_master(), so first_lkid was not being cleared causing subsequent requests on that resource to get stuck. Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/dlm/lock.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/dlm/lock.c') diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 67247f0b508a5f..af2f2f01bd5fd4 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -3283,6 +3283,8 @@ int dlm_recover_waiters_post(struct dlm_ls *ls) hold_rsb(r); lock_rsb(r); _request_lock(r, lkb); + if (is_master(r)) + confirm_master(r, 0); unlock_rsb(r); put_rsb(r); break; -- cgit 1.2.3-korg From 907b9bceb41fa46beae93f79cc4a2247df502c0f Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 25 Sep 2006 09:26:04 -0400 Subject: [GFS2/DLM] Fix trailing whitespace As per Andrew Morton's request, removed trailing whitespace. Cc: Andrew Morton Signed-off-by: Steven Whitehouse --- fs/dlm/lock.c | 2 +- fs/gfs2/acl.c | 2 +- fs/gfs2/bmap.c | 4 ++-- fs/gfs2/daemon.c | 2 +- fs/gfs2/dir.c | 13 ++++++------- fs/gfs2/eaops.c | 2 +- fs/gfs2/eattr.c | 2 +- fs/gfs2/glock.c | 12 ++++++------ fs/gfs2/inode.c | 5 ++--- fs/gfs2/inode.h | 2 +- fs/gfs2/lm.c | 2 +- fs/gfs2/locking/dlm/thread.c | 4 ++-- fs/gfs2/lops.c | 2 +- fs/gfs2/main.c | 2 +- fs/gfs2/ops_address.c | 4 ++-- fs/gfs2/ops_file.c | 6 +++--- fs/gfs2/ops_fstype.c | 18 +++++++++--------- fs/gfs2/ops_inode.c | 2 +- fs/gfs2/ops_super.c | 2 +- fs/gfs2/quota.c | 6 +++--- fs/gfs2/recovery.c | 4 ++-- fs/gfs2/rgrp.c | 2 +- fs/gfs2/super.c | 5 ++--- 23 files changed, 51 insertions(+), 54 deletions(-) (limited to 'fs/dlm/lock.c') diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index af2f2f01bd5fd4..3f2befa4797b9a 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -1757,7 +1757,7 @@ static int do_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb) skip the queue_cast(ECANCEL). It indicates that the request/convert completed (and queued a normal ast) just before the cancel; we don't want to clobber the sb_result for the normal ast with ECANCEL. */ - + static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb) { revert_lock(r, lkb); diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 3123fc07123378..5f959b8ce4065d 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -181,7 +181,7 @@ int gfs2_check_acl(struct inode *inode, int mask) error = gfs2_check_acl_locked(inode, mask); gfs2_glock_dq_uninit(&i_gh); } - + return error; } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 3fb9a26b6f5871..92eef825167d5b 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -130,7 +130,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto out; - + if (ip->i_di.di_size) { /* Get a free block, fill it with the stuffed data, and write it out to disk */ @@ -246,7 +246,7 @@ static int build_height(struct inode *inode, unsigned height) blocks[n] = gfs2_meta_new(ip->i_gl, bn); gfs2_trans_add_bh(ip->i_gl, blocks[n], 1); } - + n = 0; bn = blocks[0]->b_blocknr; if (new_height > 1) { diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c index a9908cd78cd948..cab1f68d468518 100644 --- a/fs/gfs2/daemon.c +++ b/fs/gfs2/daemon.c @@ -30,7 +30,7 @@ /* This uses schedule_timeout() instead of msleep() because it's good for the daemons to wake up more often than the timeout when unmounting so the user's unmount doesn't sit there forever. - + The kthread functions used to start these daemons block and flush signals. */ /** diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 40e94ac0b93d36..459498cac93bdb 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -81,7 +81,7 @@ #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) -typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len, +typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len, u64 leaf_no, void *data); typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, const struct qstr *name, void *opaque); @@ -119,7 +119,6 @@ static int gfs2_dir_get_existing_buffer(struct gfs2_inode *ip, u64 block, static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, unsigned int offset, unsigned int size) - { struct buffer_head *dibh; int error; @@ -685,7 +684,7 @@ static struct gfs2_dirent *gfs2_dirent_alloc(struct inode *inode, const struct qstr *name) { struct gfs2_dirent *dent; - dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, + dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, gfs2_dirent_find_space, name, NULL); if (!dent || IS_ERR(dent)) return dent; @@ -764,7 +763,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode, gfs2_consist_inode(ip); return ERR_PTR(-EIO); } - + index = name->hash >> (32 - ip->i_di.di_depth); error = get_first_leaf(ip, index, &bh); if (error) @@ -779,14 +778,14 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode, brelse(bh); if (!ln) break; - + error = get_leaf(ip, ln, &bh); } while(!error); return error ? ERR_PTR(error) : NULL; } - + error = gfs2_meta_inode_buffer(ip, &bh); if (error) return ERR_PTR(error); @@ -810,7 +809,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, struct qstr name = { .name = "", .len = 0, .hash = 0 }; if (!bh) return NULL; - + gfs2_trans_add_bh(ip->i_gl, bh, 1); gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); leaf = (struct gfs2_leaf *)bh->b_data; diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c index 1a7877fe73934c..92c54e9b0dc3b8 100644 --- a/fs/gfs2/eaops.c +++ b/fs/gfs2/eaops.c @@ -146,7 +146,7 @@ static int system_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) if (error == -ENODATA) error = 0; } - return error; + return error; } return -EPERM; diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index bd5ca602f9f094..a65a4ccfd4dd17 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c @@ -1133,7 +1133,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(&ip->i_di, dibh->b_data); brelse(dibh); - } + } gfs2_trans_end(GFS2_SB(&ip->i_inode)); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index f98694e7d6685f..78fe0fae23ff53 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -371,7 +371,7 @@ fail_aspace: if (gl->gl_aspace) gfs2_aspace_put(gl->gl_aspace); fail: - kmem_cache_free(gfs2_glock_cachep, gl); + kmem_cache_free(gfs2_glock_cachep, gl); return error; } @@ -614,7 +614,7 @@ static int rq_greedy(struct gfs2_holder *gh) gfs2_holder_uninit(gh); kfree(container_of(gh, struct greedy, gr_gh)); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_spin); return 0; } @@ -1184,11 +1184,11 @@ static void add_to_queue(struct gfs2_holder *gh) if (existing) { print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); printk(KERN_INFO "pid : %d\n", existing->gh_owner->pid); - printk(KERN_INFO "lock type : %d lock state : %d\n", + printk(KERN_INFO "lock type : %d lock state : %d\n", existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state); print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); printk(KERN_INFO "pid : %d\n", gh->gh_owner->pid); - printk(KERN_INFO "lock type : %d lock state : %d\n", + printk(KERN_INFO "lock type : %d lock state : %d\n", gl->gl_name.ln_type, gl->gl_state); BUG(); } @@ -1203,7 +1203,7 @@ static void add_to_queue(struct gfs2_holder *gh) if (gh->gh_flags & LM_FLAG_PRIORITY) list_add(&gh->gh_list, &gl->gl_waiters3); else - list_add_tail(&gh->gh_list, &gl->gl_waiters3); + list_add_tail(&gh->gh_list, &gl->gl_waiters3); } /** @@ -2003,7 +2003,7 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) for (;;) { cont = 0; for (x = 0; x < GFS2_GL_HASH_SIZE; x++) { - if (examine_bucket(clear_glock, sdp, x)) + if (examine_bucket(clear_glock, sdp, x)) cont = 1; } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index b9e4bcb3bf1e18..ac9535be304f07 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -386,7 +386,6 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, int is_root, struct nameidata *nd) - { struct super_block *sb = dir->i_sb; struct gfs2_inode *dip = GFS2_I(dir); @@ -491,7 +490,7 @@ static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino) error = gfs2_meta_inode_buffer(ip, &bh); if (error) goto out_end_trans; - + gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode)); if (!ir.ir_length) { @@ -769,7 +768,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + al->al_rgd->rd_ri.ri_length + - 2 * RES_DINODE + + 2 * RES_DINODE + RES_STATFS + RES_QUOTA, 0); if (error) goto fail_ipreserv; diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 32015d89f24999..f5d8617605795b 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -34,7 +34,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip); int gfs2_dinode_dealloc(struct gfs2_inode *inode); int gfs2_change_nlink(struct gfs2_inode *ip, int diff); -struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, +struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, int is_root, struct nameidata *nd); struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, unsigned int mode); diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c index 2109fc4791d4b9..effe4a337c1dbc 100644 --- a/fs/gfs2/lm.c +++ b/fs/gfs2/lm.c @@ -106,7 +106,7 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) fs_err(sdp, "about to withdraw from the cluster\n"); BUG_ON(sdp->sd_args.ar_debug); - + fs_err(sdp, "waiting for outstanding I/O\n"); diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c index 554bf882a4c2bf..9cf1f168eaf8b2 100644 --- a/fs/gfs2/locking/dlm/thread.c +++ b/fs/gfs2/locking/dlm/thread.c @@ -54,7 +54,7 @@ static void process_complete(struct gdlm_lock *lp) if (lp->lksb.sb_status == -DLM_ECANCEL) { log_info("complete dlm cancel %x,%llx flags %lx", - lp->lockname.ln_type, + lp->lockname.ln_type, (unsigned long long)lp->lockname.ln_number, lp->flags); @@ -102,7 +102,7 @@ static void process_complete(struct gdlm_lock *lp) if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) { log_info("complete internal cancel %x,%llx", - lp->lockname.ln_type, + lp->lockname.ln_type, (unsigned long long)lp->lockname.ln_number); lp->req = lp->cur; acb.lc_ret |= LM_OUT_CANCELED; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 2a98cbe3290f0f..881e337b6a70ab 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -420,7 +420,7 @@ static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) gfs2_log_lock(sdp); sdp->sd_log_num_rg++; list_add(&le->le_list, &sdp->sd_log_le_rg); - gfs2_log_unlock(sdp); + gfs2_log_unlock(sdp); } static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 7903be735fe9ad..21508a13bb78d7 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -77,7 +77,7 @@ static int __init init_gfs2_fs(void) error = -ENOMEM; gfs2_glock_cachep = kmem_cache_create("gfs2_glock", sizeof(struct gfs2_glock), - 0, 0, + 0, 0, gfs2_init_glock_once, NULL); if (!gfs2_glock_cachep) goto fail; diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 8b18e974fa4dbb..811f4ada2a019e 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -296,7 +296,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, LM_FLAG_TRY_1CB|GL_ATIME|GL_AOP, &gh); do_unlock = 1; ret = gfs2_glock_nq_m_atime(1, &gh); - if (ret == GLR_TRYFAILED) + if (ret == GLR_TRYFAILED) goto out_noerror; if (unlikely(ret)) goto out_unlock; @@ -681,7 +681,7 @@ static unsigned limit = 0; gl = bd->bd_gl; - fs_warn(sdp, "gl = (%u, %llu)\n", + fs_warn(sdp, "gl = (%u, %llu)\n", gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number); fs_warn(sdp, "bd_list_tr = %s, bd_le.le_list = %s\n", diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 372dbcb3f7f304..a9ac1358ce21ca 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -236,7 +236,7 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr) error = gfs2_glock_nq_m_atime(1, &gh); if (error) return error; - + iflags = iflags_cvt(gfs2_to_iflags, ip->i_di.di_flags); if (put_user(iflags, ptr)) error = -EFAULT; @@ -299,7 +299,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) goto out; if (IS_APPEND(inode) && (new_flags & GFS2_DIF_APPENDONLY)) goto out; - if (((new_flags ^ flags) & GFS2_DIF_IMMUTABLE) && + if (((new_flags ^ flags) & GFS2_DIF_IMMUTABLE) && !capable(CAP_LINUX_IMMUTABLE)) goto out; if (!IS_IMMUTABLE(inode)) { @@ -541,7 +541,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) goto out; gfs2_glock_hold(gl); flock_lock_file_wait(file, - &(struct file_lock){.fl_type = F_UNLCK}); + &(struct file_lock){.fl_type = F_UNLCK}); gfs2_glock_dq_uninit(fl_gh); } else { error = gfs2_glock_get(GFS2_SB(&ip->i_inode), diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index e32a6b242e0cd0..a9aa2edd756fcc 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -146,7 +146,7 @@ static int init_names(struct gfs2_sbd *sdp, int silent) brelse(bh); return -ENOMEM; } - gfs2_sb_in(sb, bh->b_data); + gfs2_sb_in(sb, bh->b_data); brelse(bh); error = gfs2_check_sb(sdp, sb, silent); @@ -272,7 +272,7 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) } return 0; } - + error = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops, LM_ST_SHARED, 0, &sb_gh); if (error) { @@ -358,7 +358,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) error = -EINVAL; if (!gfs2_jindex_size(sdp)) { fs_err(sdp, "no journals!\n"); - goto fail_jindex; + goto fail_jindex; } if (sdp->sd_args.ar_spectator) { @@ -789,7 +789,7 @@ out: return error; } -static int fill_super_meta(struct super_block *sb, struct super_block *new, +static int fill_super_meta(struct super_block *sb, struct super_block *new, void *data, int silent) { struct gfs2_sbd *sdp = sb->s_fs_info; @@ -821,7 +821,7 @@ static int set_bdev_super(struct super_block *s, void *data) s->s_dev = s->s_bdev->bd_dev; return 0; } - + static int test_bdev_super(struct super_block *s, void *data) { return s->s_bdev == data; @@ -835,10 +835,10 @@ static struct super_block* get_gfs2_sb(const char *dev_name) struct super_block *sb = NULL, *s; struct list_head *l; int error; - + error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); if (error) { - printk(KERN_WARNING "GFS2: path_lookup on %s returned error\n", + printk(KERN_WARNING "GFS2: path_lookup on %s returned error\n", dev_name); goto out; } @@ -900,9 +900,9 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, deactivate_super(new); goto error; } - + new->s_flags |= MS_ACTIVE; - + /* Grab a reference to the gfs2 mount point */ atomic_inc(&sdp->sd_gfs2mnt->mnt_count); return simple_set_mnt(mnt, new); diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index bb2ef6a865335d..57af0192b24169 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -522,7 +522,7 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, case S_IFSOCK: break; default: - return -EOPNOTSUPP; + return -EOPNOTSUPP; }; gfs2_holder_init(dip->i_gl, 0, 0, ghs); diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index f9538849c4181a..8cfda64521393f 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -367,7 +367,7 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) return 0; } -/* +/* * We have to (at the moment) hold the inodes main lock to cover * the gap between unlocking the shared lock on the iopen lock and * taking the exclusive lock. I'd rather do a shared -> exclusive diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index da46e14388f4bd..c0a3c9a2a593de 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -556,7 +556,7 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) qd_hold(qd); slot_hold(qd); } - + mutex_unlock(&sdp->sd_quota_mutex); } @@ -777,7 +777,7 @@ restart: gfs2_glock_dq_uninit(&i_gh); - + gfs2_quota_in(&q, buf); qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC); @@ -1062,7 +1062,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) if (!ip->i_di.di_size || ip->i_di.di_size > (64 << 20) || ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1)) { gfs2_consist_inode(ip); - return -EIO; + return -EIO; } sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block; sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE); diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 4d4ea7e66edc07..0a8a4b87dcc643 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -444,11 +444,11 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd) switch (error) { case 0: break; - + case GLR_TRYFAILED: fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid); error = 0; - + default: goto fail; }; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 5f8e225c54977e..b261385c006556 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -896,7 +896,7 @@ static int get_local_rgrp(struct gfs2_inode *ip) rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc); while (rgd) { - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY, &al->al_rgd_gh); switch (error) { case 0: diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index fe207a3e206e9f..f6ce5e4eaf7e99 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -650,8 +650,7 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, l_sc->sc_total += total; l_sc->sc_free += free; l_sc->sc_dinodes += dinodes; - gfs2_statfs_change_out(l_sc, l_bh->b_data + - sizeof(struct gfs2_dinode)); + gfs2_statfs_change_out(l_sc, l_bh->b_data + sizeof(struct gfs2_dinode)); spin_unlock(&sdp->sd_statfs_spin); brelse(l_bh); @@ -678,7 +677,7 @@ int gfs2_statfs_sync(struct gfs2_sbd *sdp) spin_lock(&sdp->sd_statfs_spin); gfs2_statfs_change_in(m_sc, m_bh->b_data + - sizeof(struct gfs2_dinode)); + sizeof(struct gfs2_dinode)); if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) { spin_unlock(&sdp->sd_statfs_spin); goto out_bh; -- cgit 1.2.3-korg