544fbd16a4
rwb_enabled() can't be changed when there is any inflight IO. wbt_disable_default() may set rwb->wb_normal as zero, however the blk_stat timer may still be pending, and the timer function will update wrb->wb_normal again. This patch introduces blk_stat_deactivate() and applies it in wbt_disable_default(), then the following IO hang triggered when running parted & switching io scheduler can be fixed: [ 369.937806] INFO: task parted:3645 blocked for more than 120 seconds. [ 369.938941] Not tainted 4.20.0-rc6-00284-g906c801e5248 #498 [ 369.939797] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 369.940768] parted D 0 3645 3239 0x00000000 [ 369.941500] Call Trace: [ 369.941874] ? __schedule+0x6d9/0x74c [ 369.942392] ? wbt_done+0x5e/0x5e [ 369.942864] ? wbt_cleanup_cb+0x16/0x16 [ 369.943404] ? wbt_done+0x5e/0x5e [ 369.943874] schedule+0x67/0x78 [ 369.944298] io_schedule+0x12/0x33 [ 369.944771] rq_qos_wait+0xb5/0x119 [ 369.945193] ? karma_partition+0x1c2/0x1c2 [ 369.945691] ? wbt_cleanup_cb+0x16/0x16 [ 369.946151] wbt_wait+0x85/0xb6 [ 369.946540] __rq_qos_throttle+0x23/0x2f [ 369.947014] blk_mq_make_request+0xe6/0x40a [ 369.947518] generic_make_request+0x192/0x2fe [ 369.948042] ? submit_bio+0x103/0x11f [ 369.948486] ? __radix_tree_lookup+0x35/0xb5 [ 369.949011] submit_bio+0x103/0x11f [ 369.949436] ? blkg_lookup_slowpath+0x25/0x44 [ 369.949962] submit_bio_wait+0x53/0x7f [ 369.950469] blkdev_issue_flush+0x8a/0xae [ 369.951032] blkdev_fsync+0x2f/0x3a [ 369.951502] do_fsync+0x2e/0x47 [ 369.951887] __x64_sys_fsync+0x10/0x13 [ 369.952374] do_syscall_64+0x89/0x149 [ 369.952819] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 369.953492] RIP: 0033:0x7f95a1e729d4 [ 369.953996] Code: Bad RIP value. [ 369.954456] RSP: 002b:00007ffdb570dd48 EFLAGS: 00000246 ORIG_RAX: 000000000000004a [ 369.955506] RAX: ffffffffffffffda RBX: 000055c2139c6be0 RCX: 00007f95a1e729d4 [ 369.956389] RDX: 0000000000000001 RSI: 0000000000001261 RDI: 0000000000000004 [ 369.957325] RBP: 0000000000000002 R08: 0000000000000000 R09: 000055c2139c6ce0 [ 369.958199] R10: 0000000000000000 R11: 0000000000000246 R12: 000055c2139c0380 [ 369.959143] R13: 0000000000000004 R14: 0000000000000100 R15: 0000000000000008 Cc: stable@vger.kernel.org Cc: Paolo Valente <paolo.valente@linaro.org> Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
171 lines
4.6 KiB
C
171 lines
4.6 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef BLK_STAT_H
|
|
#define BLK_STAT_H
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/blkdev.h>
|
|
#include <linux/ktime.h>
|
|
#include <linux/rcupdate.h>
|
|
#include <linux/timer.h>
|
|
|
|
/**
|
|
* struct blk_stat_callback - Block statistics callback.
|
|
*
|
|
* A &struct blk_stat_callback is associated with a &struct request_queue. While
|
|
* @timer is active, that queue's request completion latencies are sorted into
|
|
* buckets by @bucket_fn and added to a per-cpu buffer, @cpu_stat. When the
|
|
* timer fires, @cpu_stat is flushed to @stat and @timer_fn is invoked.
|
|
*/
|
|
struct blk_stat_callback {
|
|
/*
|
|
* @list: RCU list of callbacks for a &struct request_queue.
|
|
*/
|
|
struct list_head list;
|
|
|
|
/**
|
|
* @timer: Timer for the next callback invocation.
|
|
*/
|
|
struct timer_list timer;
|
|
|
|
/**
|
|
* @cpu_stat: Per-cpu statistics buckets.
|
|
*/
|
|
struct blk_rq_stat __percpu *cpu_stat;
|
|
|
|
/**
|
|
* @bucket_fn: Given a request, returns which statistics bucket it
|
|
* should be accounted under. Return -1 for no bucket for this
|
|
* request.
|
|
*/
|
|
int (*bucket_fn)(const struct request *);
|
|
|
|
/**
|
|
* @buckets: Number of statistics buckets.
|
|
*/
|
|
unsigned int buckets;
|
|
|
|
/**
|
|
* @stat: Array of statistics buckets.
|
|
*/
|
|
struct blk_rq_stat *stat;
|
|
|
|
/**
|
|
* @fn: Callback function.
|
|
*/
|
|
void (*timer_fn)(struct blk_stat_callback *);
|
|
|
|
/**
|
|
* @data: Private pointer for the user.
|
|
*/
|
|
void *data;
|
|
|
|
struct rcu_head rcu;
|
|
};
|
|
|
|
struct blk_queue_stats *blk_alloc_queue_stats(void);
|
|
void blk_free_queue_stats(struct blk_queue_stats *);
|
|
|
|
void blk_stat_add(struct request *rq, u64 now);
|
|
|
|
/* record time/size info in request but not add a callback */
|
|
void blk_stat_enable_accounting(struct request_queue *q);
|
|
|
|
/**
|
|
* blk_stat_alloc_callback() - Allocate a block statistics callback.
|
|
* @timer_fn: Timer callback function.
|
|
* @bucket_fn: Bucket callback function.
|
|
* @buckets: Number of statistics buckets.
|
|
* @data: Value for the @data field of the &struct blk_stat_callback.
|
|
*
|
|
* See &struct blk_stat_callback for details on the callback functions.
|
|
*
|
|
* Return: &struct blk_stat_callback on success or NULL on ENOMEM.
|
|
*/
|
|
struct blk_stat_callback *
|
|
blk_stat_alloc_callback(void (*timer_fn)(struct blk_stat_callback *),
|
|
int (*bucket_fn)(const struct request *),
|
|
unsigned int buckets, void *data);
|
|
|
|
/**
|
|
* blk_stat_add_callback() - Add a block statistics callback to be run on a
|
|
* request queue.
|
|
* @q: The request queue.
|
|
* @cb: The callback.
|
|
*
|
|
* Note that a single &struct blk_stat_callback can only be added to a single
|
|
* &struct request_queue.
|
|
*/
|
|
void blk_stat_add_callback(struct request_queue *q,
|
|
struct blk_stat_callback *cb);
|
|
|
|
/**
|
|
* blk_stat_remove_callback() - Remove a block statistics callback from a
|
|
* request queue.
|
|
* @q: The request queue.
|
|
* @cb: The callback.
|
|
*
|
|
* When this returns, the callback is not running on any CPUs and will not be
|
|
* called again unless readded.
|
|
*/
|
|
void blk_stat_remove_callback(struct request_queue *q,
|
|
struct blk_stat_callback *cb);
|
|
|
|
/**
|
|
* blk_stat_free_callback() - Free a block statistics callback.
|
|
* @cb: The callback.
|
|
*
|
|
* @cb may be NULL, in which case this does nothing. If it is not NULL, @cb must
|
|
* not be associated with a request queue. I.e., if it was previously added with
|
|
* blk_stat_add_callback(), it must also have been removed since then with
|
|
* blk_stat_remove_callback().
|
|
*/
|
|
void blk_stat_free_callback(struct blk_stat_callback *cb);
|
|
|
|
/**
|
|
* blk_stat_is_active() - Check if a block statistics callback is currently
|
|
* gathering statistics.
|
|
* @cb: The callback.
|
|
*/
|
|
static inline bool blk_stat_is_active(struct blk_stat_callback *cb)
|
|
{
|
|
return timer_pending(&cb->timer);
|
|
}
|
|
|
|
/**
|
|
* blk_stat_activate_nsecs() - Gather block statistics during a time window in
|
|
* nanoseconds.
|
|
* @cb: The callback.
|
|
* @nsecs: Number of nanoseconds to gather statistics for.
|
|
*
|
|
* The timer callback will be called when the window expires.
|
|
*/
|
|
static inline void blk_stat_activate_nsecs(struct blk_stat_callback *cb,
|
|
u64 nsecs)
|
|
{
|
|
mod_timer(&cb->timer, jiffies + nsecs_to_jiffies(nsecs));
|
|
}
|
|
|
|
static inline void blk_stat_deactivate(struct blk_stat_callback *cb)
|
|
{
|
|
del_timer_sync(&cb->timer);
|
|
}
|
|
|
|
/**
|
|
* blk_stat_activate_msecs() - Gather block statistics during a time window in
|
|
* milliseconds.
|
|
* @cb: The callback.
|
|
* @msecs: Number of milliseconds to gather statistics for.
|
|
*
|
|
* The timer callback will be called when the window expires.
|
|
*/
|
|
static inline void blk_stat_activate_msecs(struct blk_stat_callback *cb,
|
|
unsigned int msecs)
|
|
{
|
|
mod_timer(&cb->timer, jiffies + msecs_to_jiffies(msecs));
|
|
}
|
|
|
|
void blk_rq_stat_add(struct blk_rq_stat *, u64);
|
|
void blk_rq_stat_sum(struct blk_rq_stat *, struct blk_rq_stat *);
|
|
void blk_rq_stat_init(struct blk_rq_stat *);
|
|
|
|
#endif
|