libceph: a major OSD client update
This is a major sync up, up to ~Jewel. The highlights are: - per-session request trees (vs a global per-client tree) - per-session locking (vs a global per-client rwlock) - homeless OSD session - no ad-hoc global per-client lists - support for pool quotas - foundation for watch/notify v2 support - foundation for map check (pool deletion detection) support The switchover is incomplete: lingering requests can be setup and teared down but aren't ever reestablished. This functionality is restored with the introduction of the new lingering infrastructure (ceph_osd_linger_request, linger_work, etc) in a later commit. Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
parent
9dd2845ccb
commit
5aea3dcd50
5 changed files with 601 additions and 629 deletions
|
@ -193,12 +193,12 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
|
|||
if (copy_from_user(&dl, arg, sizeof(dl)))
|
||||
return -EFAULT;
|
||||
|
||||
down_read(&osdc->map_sem);
|
||||
down_read(&osdc->lock);
|
||||
r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
|
||||
&dl.object_no, &dl.object_offset,
|
||||
&olen);
|
||||
if (r < 0) {
|
||||
up_read(&osdc->map_sem);
|
||||
up_read(&osdc->lock);
|
||||
return -EIO;
|
||||
}
|
||||
dl.file_offset -= dl.object_offset;
|
||||
|
@ -217,7 +217,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
|
|||
|
||||
r = ceph_object_locator_to_pg(osdc->osdmap, &oid, &oloc, &pgid);
|
||||
if (r < 0) {
|
||||
up_read(&osdc->map_sem);
|
||||
up_read(&osdc->lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -230,7 +230,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
|
|||
} else {
|
||||
memset(&dl.osd_addr, 0, sizeof(dl.osd_addr));
|
||||
}
|
||||
up_read(&osdc->map_sem);
|
||||
up_read(&osdc->lock);
|
||||
|
||||
/* send result back to user */
|
||||
if (copy_to_user(arg, &dl, sizeof(dl)))
|
||||
|
|
|
@ -75,7 +75,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
|
|||
char buf[128];
|
||||
|
||||
dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
|
||||
down_read(&osdc->map_sem);
|
||||
down_read(&osdc->lock);
|
||||
pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
|
||||
if (pool_name) {
|
||||
size_t len = strlen(pool_name);
|
||||
|
@ -107,7 +107,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
|
|||
ret = -ERANGE;
|
||||
}
|
||||
}
|
||||
up_read(&osdc->map_sem);
|
||||
up_read(&osdc->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -141,13 +141,13 @@ static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
|
|||
s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
|
||||
const char *pool_name;
|
||||
|
||||
down_read(&osdc->map_sem);
|
||||
down_read(&osdc->lock);
|
||||
pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
|
||||
if (pool_name)
|
||||
ret = snprintf(val, size, "%s", pool_name);
|
||||
else
|
||||
ret = snprintf(val, size, "%lld", (unsigned long long)pool);
|
||||
up_read(&osdc->map_sem);
|
||||
up_read(&osdc->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -33,12 +33,13 @@ struct ceph_osd {
|
|||
int o_incarnation;
|
||||
struct rb_node o_node;
|
||||
struct ceph_connection o_con;
|
||||
struct list_head o_requests;
|
||||
struct rb_root o_requests;
|
||||
struct list_head o_linger_requests;
|
||||
struct list_head o_osd_lru;
|
||||
struct ceph_auth_handshake o_auth;
|
||||
unsigned long lru_ttl;
|
||||
struct list_head o_keepalive_item;
|
||||
struct mutex lock;
|
||||
};
|
||||
|
||||
#define CEPH_OSD_SLAB_OPS 2
|
||||
|
@ -144,8 +145,6 @@ struct ceph_osd_request_target {
|
|||
struct ceph_osd_request {
|
||||
u64 r_tid; /* unique for this client */
|
||||
struct rb_node r_node;
|
||||
struct list_head r_req_lru_item;
|
||||
struct list_head r_osd_item;
|
||||
struct list_head r_linger_item;
|
||||
struct list_head r_linger_osd_item;
|
||||
struct ceph_osd *r_osd;
|
||||
|
@ -219,19 +218,16 @@ struct ceph_osd_client {
|
|||
struct ceph_client *client;
|
||||
|
||||
struct ceph_osdmap *osdmap; /* current map */
|
||||
struct rw_semaphore map_sem;
|
||||
struct rw_semaphore lock;
|
||||
|
||||
struct mutex request_mutex;
|
||||
struct rb_root osds; /* osds */
|
||||
struct list_head osd_lru; /* idle osds */
|
||||
spinlock_t osd_lru_lock;
|
||||
u64 last_tid; /* tid of last request */
|
||||
struct rb_root requests; /* pending requests */
|
||||
struct list_head req_lru; /* in-flight lru */
|
||||
struct list_head req_unsent; /* unsent/need-resend queue */
|
||||
struct list_head req_notarget; /* map to no osd */
|
||||
struct list_head req_linger; /* lingering requests */
|
||||
int num_requests;
|
||||
struct ceph_osd homeless_osd;
|
||||
atomic64_t last_tid; /* tid of last request */
|
||||
atomic_t num_requests;
|
||||
atomic_t num_homeless;
|
||||
struct delayed_work timeout_work;
|
||||
struct delayed_work osds_timeout_work;
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
|
|
|
@ -182,21 +182,39 @@ static void dump_request(struct seq_file *s, struct ceph_osd_request *req)
|
|||
seq_putc(s, '\n');
|
||||
}
|
||||
|
||||
static void dump_requests(struct seq_file *s, struct ceph_osd *osd)
|
||||
{
|
||||
struct rb_node *n;
|
||||
|
||||
mutex_lock(&osd->lock);
|
||||
for (n = rb_first(&osd->o_requests); n; n = rb_next(n)) {
|
||||
struct ceph_osd_request *req =
|
||||
rb_entry(n, struct ceph_osd_request, r_node);
|
||||
|
||||
dump_request(s, req);
|
||||
}
|
||||
|
||||
mutex_unlock(&osd->lock);
|
||||
}
|
||||
|
||||
static int osdc_show(struct seq_file *s, void *pp)
|
||||
{
|
||||
struct ceph_client *client = s->private;
|
||||
struct ceph_osd_client *osdc = &client->osdc;
|
||||
struct rb_node *p;
|
||||
struct rb_node *n;
|
||||
|
||||
mutex_lock(&osdc->request_mutex);
|
||||
for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
|
||||
struct ceph_osd_request *req;
|
||||
down_read(&osdc->lock);
|
||||
seq_printf(s, "REQUESTS %d homeless %d\n",
|
||||
atomic_read(&osdc->num_requests),
|
||||
atomic_read(&osdc->num_homeless));
|
||||
for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
|
||||
struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
|
||||
|
||||
req = rb_entry(p, struct ceph_osd_request, r_node);
|
||||
|
||||
dump_request(s, req);
|
||||
dump_requests(s, osd);
|
||||
}
|
||||
mutex_unlock(&osdc->request_mutex);
|
||||
dump_requests(s, &osdc->homeless_osd);
|
||||
|
||||
up_read(&osdc->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue