Merge branch 'for-3.19/core' of git://git.kernel.dk/linux-block
Pull block driver core update from Jens Axboe: "This is the pull request for the core block IO changes for 3.19. Not a huge round this time, mostly lots of little good fixes: - Fix a bug in sysfs blktrace interface causing a NULL pointer dereference, when enabled/disabled through that API. From Arianna Avanzini. - Various updates/fixes/improvements for blk-mq: - A set of updates from Bart, mostly fixing buts in the tag handling. - Cleanup/code consolidation from Christoph. - Extend queue_rq API to be able to handle batching issues of IO requests. NVMe will utilize this shortly. From me. - A few tag and request handling updates from me. - Cleanup of the preempt handling for running queues from Paolo. - Prevent running of unmapped hardware queues from Ming Lei. - Move the kdump memory limiting check to be in the correct location, from Shaohua. - Initialize all software queues at init time from Takashi. This prevents a kobject warning when CPUs are brought online that weren't online when a queue was registered. - Single writeback fix for I_DIRTY clearing from Tejun. Queued with the core IO changes, since it's just a single fix. - Version X of the __bio_add_page() segment addition retry from Maurizio. Hope the Xth time is the charm. - Documentation fixup for IO scheduler merging from Jan. - Introduce (and use) generic IO stat accounting helpers for non-rq drivers, from Gu Zheng. - Kill off artificial limiting of max sectors in a request from Christoph" * 'for-3.19/core' of git://git.kernel.dk/linux-block: (26 commits) bio: modify __bio_add_page() to accept pages that don't start a new segment blk-mq: Fix uninitialized kobject at CPU hotplugging blktrace: don't let the sysfs interface remove trace from running list blk-mq: Use all available hardware queues blk-mq: Micro-optimize bt_get() blk-mq: Fix a race between bt_clear_tag() and bt_get() blk-mq: Avoid that __bt_get_word() wraps multiple times blk-mq: Fix a use-after-free blk-mq: prevent unmapped hw queue from being scheduled blk-mq: re-check for available tags after running the hardware queue blk-mq: fix hang in bt_get() blk-mq: move the kdump check to blk_mq_alloc_tag_set blk-mq: cleanup tag free handling blk-mq: use 'nr_cpu_ids' as highest CPU ID count for hwq <-> cpu map blk: introduce generic io stat accounting help function blk-mq: handle the single queue case in blk_mq_hctx_next_cpu genhd: check for int overflow in disk_expand_part_tbl() blk-mq: add blk_mq_free_hctx_request() blk-mq: export blk_mq_free_request() blk-mq: use get_cpu/put_cpu instead of preempt_disable/preempt_enable ...
This commit is contained in:
commit
caf292ae5b
21 changed files with 255 additions and 146 deletions
|
@ -942,7 +942,11 @@ elevator_allow_merge_fn called whenever the block layer determines
|
|||
request safely. The io scheduler may still
|
||||
want to stop a merge at this point if it
|
||||
results in some sort of conflict internally,
|
||||
this hook allows it to do that.
|
||||
this hook allows it to do that. Note however
|
||||
that two *requests* can still be merged at later
|
||||
time. Currently the io scheduler has no way to
|
||||
prevent that. It can only learn about the fact
|
||||
from elevator_merge_req_fn callback.
|
||||
|
||||
elevator_dispatch_fn* fills the dispatch queue with ready requests.
|
||||
I/O schedulers are free to postpone requests by
|
||||
|
|
84
block/bio.c
84
block/bio.c
|
@ -748,6 +748,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
|
|||
}
|
||||
}
|
||||
|
||||
bio->bi_iter.bi_size += len;
|
||||
goto done;
|
||||
}
|
||||
|
||||
|
@ -763,20 +764,6 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
|
|||
if (bio->bi_vcnt >= bio->bi_max_vecs)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* we might lose a segment or two here, but rather that than
|
||||
* make this too complex.
|
||||
*/
|
||||
|
||||
while (bio->bi_phys_segments >= queue_max_segments(q)) {
|
||||
|
||||
if (retried_segments)
|
||||
return 0;
|
||||
|
||||
retried_segments = 1;
|
||||
blk_recount_segments(q, bio);
|
||||
}
|
||||
|
||||
/*
|
||||
* setup the new entry, we might clear it again later if we
|
||||
* cannot add the page
|
||||
|
@ -785,6 +772,23 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
|
|||
bvec->bv_page = page;
|
||||
bvec->bv_len = len;
|
||||
bvec->bv_offset = offset;
|
||||
bio->bi_vcnt++;
|
||||
bio->bi_phys_segments++;
|
||||
bio->bi_iter.bi_size += len;
|
||||
|
||||
/*
|
||||
* Perform a recount if the number of segments is greater
|
||||
* than queue_max_segments(q).
|
||||
*/
|
||||
|
||||
while (bio->bi_phys_segments > queue_max_segments(q)) {
|
||||
|
||||
if (retried_segments)
|
||||
goto failed;
|
||||
|
||||
retried_segments = 1;
|
||||
blk_recount_segments(q, bio);
|
||||
}
|
||||
|
||||
/*
|
||||
* if queue has other restrictions (eg varying max sector size
|
||||
|
@ -795,7 +799,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
|
|||
struct bvec_merge_data bvm = {
|
||||
.bi_bdev = bio->bi_bdev,
|
||||
.bi_sector = bio->bi_iter.bi_sector,
|
||||
.bi_size = bio->bi_iter.bi_size,
|
||||
.bi_size = bio->bi_iter.bi_size - len,
|
||||
.bi_rw = bio->bi_rw,
|
||||
};
|
||||
|
||||
|
@ -803,23 +807,25 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
|
|||
* merge_bvec_fn() returns number of bytes it can accept
|
||||
* at this offset
|
||||
*/
|
||||
if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) {
|
||||
bvec->bv_page = NULL;
|
||||
bvec->bv_len = 0;
|
||||
bvec->bv_offset = 0;
|
||||
return 0;
|
||||
}
|
||||
if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len)
|
||||
goto failed;
|
||||
}
|
||||
|
||||
/* If we may be able to merge these biovecs, force a recount */
|
||||
if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
|
||||
if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
|
||||
bio->bi_flags &= ~(1 << BIO_SEG_VALID);
|
||||
|
||||
bio->bi_vcnt++;
|
||||
bio->bi_phys_segments++;
|
||||
done:
|
||||
bio->bi_iter.bi_size += len;
|
||||
return len;
|
||||
|
||||
failed:
|
||||
bvec->bv_page = NULL;
|
||||
bvec->bv_len = 0;
|
||||
bvec->bv_offset = 0;
|
||||
bio->bi_vcnt--;
|
||||
bio->bi_iter.bi_size -= len;
|
||||
blk_recount_segments(q, bio);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1739,6 +1745,34 @@ void bio_check_pages_dirty(struct bio *bio)
|
|||
}
|
||||
}
|
||||
|
||||
void generic_start_io_acct(int rw, unsigned long sectors,
|
||||
struct hd_struct *part)
|
||||
{
|
||||
int cpu = part_stat_lock();
|
||||
|
||||
part_round_stats(cpu, part);
|
||||
part_stat_inc(cpu, part, ios[rw]);
|
||||
part_stat_add(cpu, part, sectors[rw], sectors);
|
||||
part_inc_in_flight(part, rw);
|
||||
|
||||
part_stat_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL(generic_start_io_acct);
|
||||
|
||||
void generic_end_io_acct(int rw, struct hd_struct *part,
|
||||
unsigned long start_time)
|
||||
{
|
||||
unsigned long duration = jiffies - start_time;
|
||||
int cpu = part_stat_lock();
|
||||
|
||||
part_stat_add(cpu, part, ticks[rw], duration);
|
||||
part_round_stats(cpu, part);
|
||||
part_dec_in_flight(part, rw);
|
||||
|
||||
part_stat_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL(generic_end_io_acct);
|
||||
|
||||
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
|
||||
void bio_flush_dcache_pages(struct bio *bi)
|
||||
{
|
||||
|
|
|
@ -525,6 +525,9 @@ void blk_cleanup_queue(struct request_queue *q)
|
|||
del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
|
||||
blk_sync_queue(q);
|
||||
|
||||
if (q->mq_ops)
|
||||
blk_mq_free_queue(q);
|
||||
|
||||
spin_lock_irq(lock);
|
||||
if (q->queue_lock != &q->__queue_lock)
|
||||
q->queue_lock = &q->__queue_lock;
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues,
|
||||
const int cpu)
|
||||
{
|
||||
return cpu / ((nr_cpus + nr_queues - 1) / nr_queues);
|
||||
return cpu * nr_queues / nr_cpus;
|
||||
}
|
||||
|
||||
static int get_first_sibling(unsigned int cpu)
|
||||
|
@ -90,7 +90,7 @@ unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set)
|
|||
unsigned int *map;
|
||||
|
||||
/* If cpus are offline, map them to first hctx */
|
||||
map = kzalloc_node(sizeof(*map) * num_possible_cpus(), GFP_KERNEL,
|
||||
map = kzalloc_node(sizeof(*map) * nr_cpu_ids, GFP_KERNEL,
|
||||
set->numa_node);
|
||||
if (!map)
|
||||
return NULL;
|
||||
|
|
|
@ -390,16 +390,15 @@ static void blk_mq_sysfs_init(struct request_queue *q)
|
|||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
struct blk_mq_ctx *ctx;
|
||||
int i, j;
|
||||
int i;
|
||||
|
||||
kobject_init(&q->mq_kobj, &blk_mq_ktype);
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
queue_for_each_hw_ctx(q, hctx, i)
|
||||
kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
|
||||
|
||||
hctx_for_each_ctx(hctx, ctx, j)
|
||||
kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
|
||||
}
|
||||
queue_for_each_ctx(q, ctx, i)
|
||||
kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
|
||||
}
|
||||
|
||||
/* see blk_register_queue() */
|
||||
|
|
|
@ -137,6 +137,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
|
|||
static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
|
||||
{
|
||||
int tag, org_last_tag, end;
|
||||
bool wrap = last_tag != 0;
|
||||
|
||||
org_last_tag = last_tag;
|
||||
end = bm->depth;
|
||||
|
@ -148,15 +149,16 @@ restart:
|
|||
* We started with an offset, start from 0 to
|
||||
* exhaust the map.
|
||||
*/
|
||||
if (org_last_tag && last_tag) {
|
||||
end = last_tag;
|
||||
if (wrap) {
|
||||
wrap = false;
|
||||
end = org_last_tag;
|
||||
last_tag = 0;
|
||||
goto restart;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
last_tag = tag + 1;
|
||||
} while (test_and_set_bit_lock(tag, &bm->word));
|
||||
} while (test_and_set_bit(tag, &bm->word));
|
||||
|
||||
return tag;
|
||||
}
|
||||
|
@ -246,14 +248,29 @@ static int bt_get(struct blk_mq_alloc_data *data,
|
|||
if (!(data->gfp & __GFP_WAIT))
|
||||
return -1;
|
||||
|
||||
bs = bt_wait_ptr(bt, hctx);
|
||||
do {
|
||||
bs = bt_wait_ptr(bt, hctx);
|
||||
prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);
|
||||
|
||||
tag = __bt_get(hctx, bt, last_tag);
|
||||
if (tag != -1)
|
||||
break;
|
||||
|
||||
/*
|
||||
* We're out of tags on this hardware queue, kick any
|
||||
* pending IO submits before going to sleep waiting for
|
||||
* some to complete.
|
||||
*/
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
|
||||
/*
|
||||
* Retry tag allocation after running the hardware queue,
|
||||
* as running the queue may also have found completions.
|
||||
*/
|
||||
tag = __bt_get(hctx, bt, last_tag);
|
||||
if (tag != -1)
|
||||
break;
|
||||
|
||||
blk_mq_put_ctx(data->ctx);
|
||||
|
||||
io_schedule();
|
||||
|
@ -268,8 +285,6 @@ static int bt_get(struct blk_mq_alloc_data *data,
|
|||
hctx = data->hctx;
|
||||
bt = &hctx->tags->bitmap_tags;
|
||||
}
|
||||
finish_wait(&bs->wait, &wait);
|
||||
bs = bt_wait_ptr(bt, hctx);
|
||||
} while (1);
|
||||
|
||||
finish_wait(&bs->wait, &wait);
|
||||
|
@ -340,11 +355,10 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag)
|
|||
struct bt_wait_state *bs;
|
||||
int wait_cnt;
|
||||
|
||||
/*
|
||||
* The unlock memory barrier need to order access to req in free
|
||||
* path and clearing tag bit
|
||||
*/
|
||||
clear_bit_unlock(TAG_TO_BIT(bt, tag), &bt->map[index].word);
|
||||
clear_bit(TAG_TO_BIT(bt, tag), &bt->map[index].word);
|
||||
|
||||
/* Ensure that the wait list checks occur after clear_bit(). */
|
||||
smp_mb();
|
||||
|
||||
bs = bt_wake_ptr(bt);
|
||||
if (!bs)
|
||||
|
@ -360,21 +374,6 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag)
|
|||
}
|
||||
}
|
||||
|
||||
static void __blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag)
|
||||
{
|
||||
BUG_ON(tag >= tags->nr_tags);
|
||||
|
||||
bt_clear_tag(&tags->bitmap_tags, tag);
|
||||
}
|
||||
|
||||
static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags,
|
||||
unsigned int tag)
|
||||
{
|
||||
BUG_ON(tag >= tags->nr_reserved_tags);
|
||||
|
||||
bt_clear_tag(&tags->breserved_tags, tag);
|
||||
}
|
||||
|
||||
void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
|
||||
unsigned int *last_tag)
|
||||
{
|
||||
|
@ -383,10 +382,13 @@ void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
|
|||
if (tag >= tags->nr_reserved_tags) {
|
||||
const int real_tag = tag - tags->nr_reserved_tags;
|
||||
|
||||
__blk_mq_put_tag(tags, real_tag);
|
||||
BUG_ON(real_tag >= tags->nr_tags);
|
||||
bt_clear_tag(&tags->bitmap_tags, real_tag);
|
||||
*last_tag = real_tag;
|
||||
} else
|
||||
__blk_mq_put_reserved_tag(tags, tag);
|
||||
} else {
|
||||
BUG_ON(tag >= tags->nr_reserved_tags);
|
||||
bt_clear_tag(&tags->breserved_tags, tag);
|
||||
}
|
||||
}
|
||||
|
||||
static void bt_for_each(struct blk_mq_hw_ctx *hctx,
|
||||
|
|
126
block/blk-mq.c
126
block/blk-mq.c
|
@ -279,17 +279,25 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
|
|||
blk_mq_queue_exit(q);
|
||||
}
|
||||
|
||||
void blk_mq_free_request(struct request *rq)
|
||||
void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
|
||||
{
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
|
||||
ctx->rq_completed[rq_is_sync(rq)]++;
|
||||
__blk_mq_free_request(hctx, ctx, rq);
|
||||
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_free_hctx_request);
|
||||
|
||||
void blk_mq_free_request(struct request *rq)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
struct request_queue *q = rq->q;
|
||||
|
||||
ctx->rq_completed[rq_is_sync(rq)]++;
|
||||
|
||||
hctx = q->mq_ops->map_queue(q, ctx->cpu);
|
||||
__blk_mq_free_request(hctx, ctx, rq);
|
||||
hctx = q->mq_ops->map_queue(q, rq->mq_ctx->cpu);
|
||||
blk_mq_free_hctx_request(hctx, rq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_free_request);
|
||||
|
||||
inline void __blk_mq_end_request(struct request *rq, int error)
|
||||
{
|
||||
|
@ -591,7 +599,7 @@ static void blk_mq_rq_timer(unsigned long priv)
|
|||
* If not software queues are currently mapped to this
|
||||
* hardware queue, there's nothing to check
|
||||
*/
|
||||
if (!hctx->nr_ctx || !hctx->tags)
|
||||
if (!blk_mq_hw_queue_mapped(hctx))
|
||||
continue;
|
||||
|
||||
blk_mq_tag_busy_iter(hctx, blk_mq_check_expired, &data);
|
||||
|
@ -690,6 +698,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
|
|||
struct request_queue *q = hctx->queue;
|
||||
struct request *rq;
|
||||
LIST_HEAD(rq_list);
|
||||
LIST_HEAD(driver_list);
|
||||
struct list_head *dptr;
|
||||
int queued;
|
||||
|
||||
WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask));
|
||||
|
@ -715,17 +725,28 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
|
|||
spin_unlock(&hctx->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Start off with dptr being NULL, so we start the first request
|
||||
* immediately, even if we have more pending.
|
||||
*/
|
||||
dptr = NULL;
|
||||
|
||||
/*
|
||||
* Now process all the entries, sending them to the driver.
|
||||
*/
|
||||
queued = 0;
|
||||
while (!list_empty(&rq_list)) {
|
||||
struct blk_mq_queue_data bd;
|
||||
int ret;
|
||||
|
||||
rq = list_first_entry(&rq_list, struct request, queuelist);
|
||||
list_del_init(&rq->queuelist);
|
||||
|
||||
ret = q->mq_ops->queue_rq(hctx, rq, list_empty(&rq_list));
|
||||
bd.rq = rq;
|
||||
bd.list = dptr;
|
||||
bd.last = list_empty(&rq_list);
|
||||
|
||||
ret = q->mq_ops->queue_rq(hctx, &bd);
|
||||
switch (ret) {
|
||||
case BLK_MQ_RQ_QUEUE_OK:
|
||||
queued++;
|
||||
|
@ -744,6 +765,13 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
|
|||
|
||||
if (ret == BLK_MQ_RQ_QUEUE_BUSY)
|
||||
break;
|
||||
|
||||
/*
|
||||
* We've done the first request. If we have more than 1
|
||||
* left in the list, set dptr to defer issue.
|
||||
*/
|
||||
if (!dptr && rq_list.next != rq_list.prev)
|
||||
dptr = &driver_list;
|
||||
}
|
||||
|
||||
if (!queued)
|
||||
|
@ -770,10 +798,11 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
|
|||
*/
|
||||
static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
int cpu = hctx->next_cpu;
|
||||
if (hctx->queue->nr_hw_queues == 1)
|
||||
return WORK_CPU_UNBOUND;
|
||||
|
||||
if (--hctx->next_cpu_batch <= 0) {
|
||||
int next_cpu;
|
||||
int cpu = hctx->next_cpu, next_cpu;
|
||||
|
||||
next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask);
|
||||
if (next_cpu >= nr_cpu_ids)
|
||||
|
@ -781,26 +810,32 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
|
|||
|
||||
hctx->next_cpu = next_cpu;
|
||||
hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
|
||||
|
||||
return cpu;
|
||||
}
|
||||
|
||||
return cpu;
|
||||
return hctx->next_cpu;
|
||||
}
|
||||
|
||||
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
|
||||
{
|
||||
if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
|
||||
if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state) ||
|
||||
!blk_mq_hw_queue_mapped(hctx)))
|
||||
return;
|
||||
|
||||
if (!async && cpumask_test_cpu(smp_processor_id(), hctx->cpumask))
|
||||
__blk_mq_run_hw_queue(hctx);
|
||||
else if (hctx->queue->nr_hw_queues == 1)
|
||||
kblockd_schedule_delayed_work(&hctx->run_work, 0);
|
||||
else {
|
||||
unsigned int cpu;
|
||||
if (!async) {
|
||||
int cpu = get_cpu();
|
||||
if (cpumask_test_cpu(cpu, hctx->cpumask)) {
|
||||
__blk_mq_run_hw_queue(hctx);
|
||||
put_cpu();
|
||||
return;
|
||||
}
|
||||
|
||||
cpu = blk_mq_hctx_next_cpu(hctx);
|
||||
kblockd_schedule_delayed_work_on(cpu, &hctx->run_work, 0);
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
|
||||
&hctx->run_work, 0);
|
||||
}
|
||||
|
||||
void blk_mq_run_queues(struct request_queue *q, bool async)
|
||||
|
@ -814,9 +849,7 @@ void blk_mq_run_queues(struct request_queue *q, bool async)
|
|||
test_bit(BLK_MQ_S_STOPPED, &hctx->state))
|
||||
continue;
|
||||
|
||||
preempt_disable();
|
||||
blk_mq_run_hw_queue(hctx, async);
|
||||
preempt_enable();
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_run_queues);
|
||||
|
@ -843,9 +876,7 @@ void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
|
|||
{
|
||||
clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
|
||||
|
||||
preempt_disable();
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_start_hw_queue);
|
||||
|
||||
|
@ -870,9 +901,7 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
|
|||
continue;
|
||||
|
||||
clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
|
||||
preempt_disable();
|
||||
blk_mq_run_hw_queue(hctx, async);
|
||||
preempt_enable();
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
|
||||
|
@ -898,16 +927,11 @@ static void blk_mq_delay_work_fn(struct work_struct *work)
|
|||
|
||||
void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
|
||||
{
|
||||
unsigned long tmo = msecs_to_jiffies(msecs);
|
||||
if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
|
||||
return;
|
||||
|
||||
if (hctx->queue->nr_hw_queues == 1)
|
||||
kblockd_schedule_delayed_work(&hctx->delay_work, tmo);
|
||||
else {
|
||||
unsigned int cpu;
|
||||
|
||||
cpu = blk_mq_hctx_next_cpu(hctx);
|
||||
kblockd_schedule_delayed_work_on(cpu, &hctx->delay_work, tmo);
|
||||
}
|
||||
kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
|
||||
&hctx->delay_work, msecs_to_jiffies(msecs));
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_delay_queue);
|
||||
|
||||
|
@ -1162,7 +1186,17 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
|||
goto run_queue;
|
||||
}
|
||||
|
||||
if (is_sync) {
|
||||
/*
|
||||
* If the driver supports defer issued based on 'last', then
|
||||
* queue it up like normal since we can potentially save some
|
||||
* CPU this way.
|
||||
*/
|
||||
if (is_sync && !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) {
|
||||
struct blk_mq_queue_data bd = {
|
||||
.rq = rq,
|
||||
.list = NULL,
|
||||
.last = 1
|
||||
};
|
||||
int ret;
|
||||
|
||||
blk_mq_bio_to_request(rq, bio);
|
||||
|
@ -1172,7 +1206,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
|||
* error (busy), just add it to our list as we previously
|
||||
* would have done
|
||||
*/
|
||||
ret = q->mq_ops->queue_rq(data.hctx, rq, true);
|
||||
ret = q->mq_ops->queue_rq(data.hctx, &bd);
|
||||
if (ret == BLK_MQ_RQ_QUEUE_OK)
|
||||
goto done;
|
||||
else {
|
||||
|
@ -1784,16 +1818,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
|
|||
if (!ctx)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
/*
|
||||
* If a crashdump is active, then we are potentially in a very
|
||||
* memory constrained environment. Limit us to 1 queue and
|
||||
* 64 tags to prevent using too much memory.
|
||||
*/
|
||||
if (is_kdump_kernel()) {
|
||||
set->nr_hw_queues = 1;
|
||||
set->queue_depth = min(64U, set->queue_depth);
|
||||
}
|
||||
|
||||
hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL,
|
||||
set->numa_node);
|
||||
|
||||
|
@ -2067,6 +2091,16 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
|
|||
set->queue_depth = BLK_MQ_MAX_DEPTH;
|
||||
}
|
||||
|
||||
/*
|
||||
* If a crashdump is active, then we are potentially in a very
|
||||
* memory constrained environment. Limit us to 1 queue and
|
||||
* 64 tags to prevent using too much memory.
|
||||
*/
|
||||
if (is_kdump_kernel()) {
|
||||
set->nr_hw_queues = 1;
|
||||
set->queue_depth = min(64U, set->queue_depth);
|
||||
}
|
||||
|
||||
set->tags = kmalloc_node(set->nr_hw_queues *
|
||||
sizeof(struct blk_mq_tags *),
|
||||
GFP_KERNEL, set->numa_node);
|
||||
|
|
|
@ -115,4 +115,9 @@ static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data,
|
|||
data->hctx = hctx;
|
||||
}
|
||||
|
||||
static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
return hctx->nr_ctx && hctx->tags;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -257,9 +257,7 @@ void blk_limits_max_hw_sectors(struct queue_limits *limits, unsigned int max_hw_
|
|||
__func__, max_hw_sectors);
|
||||
}
|
||||
|
||||
limits->max_hw_sectors = max_hw_sectors;
|
||||
limits->max_sectors = min_t(unsigned int, max_hw_sectors,
|
||||
BLK_DEF_MAX_SECTORS);
|
||||
limits->max_sectors = limits->max_hw_sectors = max_hw_sectors;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_limits_max_hw_sectors);
|
||||
|
||||
|
|
|
@ -492,17 +492,15 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
|
|||
* Currently, its primary task it to free all the &struct request
|
||||
* structures that were allocated to the queue and the queue itself.
|
||||
*
|
||||
* Caveat:
|
||||
* Hopefully the low level driver will have finished any
|
||||
* outstanding requests first...
|
||||
* Note:
|
||||
* The low level driver must have finished any outstanding requests first
|
||||
* via blk_cleanup_queue().
|
||||
**/
|
||||
static void blk_release_queue(struct kobject *kobj)
|
||||
{
|
||||
struct request_queue *q =
|
||||
container_of(kobj, struct request_queue, kobj);
|
||||
|
||||
blk_sync_queue(q);
|
||||
|
||||
blkcg_exit_queue(q);
|
||||
|
||||
if (q->elevator) {
|
||||
|
@ -517,9 +515,7 @@ static void blk_release_queue(struct kobject *kobj)
|
|||
if (q->queue_tags)
|
||||
__blk_queue_free_tags(q);
|
||||
|
||||
if (q->mq_ops)
|
||||
blk_mq_free_queue(q);
|
||||
else
|
||||
if (!q->mq_ops)
|
||||
blk_free_flush_queue(q->fq);
|
||||
|
||||
blk_trace_shutdown(q);
|
||||
|
|
|
@ -1070,9 +1070,16 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno)
|
|||
struct disk_part_tbl *old_ptbl = disk->part_tbl;
|
||||
struct disk_part_tbl *new_ptbl;
|
||||
int len = old_ptbl ? old_ptbl->len : 0;
|
||||
int target = partno + 1;
|
||||
int i, target;
|
||||
size_t size;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* check for int overflow, since we can get here from blkpg_ioctl()
|
||||
* with a user passed 'partno'.
|
||||
*/
|
||||
target = partno + 1;
|
||||
if (target < 0)
|
||||
return -EINVAL;
|
||||
|
||||
/* disk_max_parts() is zero during initialization, ignore if so */
|
||||
if (disk_max_parts(disk) && target > disk_max_parts(disk))
|
||||
|
|
|
@ -395,7 +395,7 @@ aoeblk_gdalloc(void *vp)
|
|||
WARN_ON(d->flags & DEVFL_TKILL);
|
||||
WARN_ON(d->gd);
|
||||
WARN_ON(d->flags & DEVFL_UP);
|
||||
blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
|
||||
blk_queue_max_hw_sectors(q, 1024);
|
||||
q->backing_dev_info.name = "aoe";
|
||||
q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE;
|
||||
d->bufpool = mp;
|
||||
|
|
|
@ -3775,9 +3775,10 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
|
|||
return false;
|
||||
}
|
||||
|
||||
static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
bool last)
|
||||
static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct request *rq = bd->rq;
|
||||
int ret;
|
||||
|
||||
if (unlikely(mtip_check_unal_depth(hctx, rq)))
|
||||
|
|
|
@ -313,15 +313,15 @@ static void null_request_fn(struct request_queue *q)
|
|||
}
|
||||
}
|
||||
|
||||
static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
bool last)
|
||||
static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
|
||||
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
|
||||
|
||||
cmd->rq = rq;
|
||||
cmd->rq = bd->rq;
|
||||
cmd->nq = hctx->driver_data;
|
||||
|
||||
blk_mq_start_request(rq);
|
||||
blk_mq_start_request(bd->rq);
|
||||
|
||||
null_handle_cmd(cmd);
|
||||
return BLK_MQ_RQ_QUEUE_OK;
|
||||
|
|
|
@ -159,10 +159,11 @@ static void virtblk_done(struct virtqueue *vq)
|
|||
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
|
||||
}
|
||||
|
||||
static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
|
||||
bool last)
|
||||
static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct virtio_blk *vblk = hctx->queue->queuedata;
|
||||
struct request *req = bd->rq;
|
||||
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
|
||||
unsigned long flags;
|
||||
unsigned int num;
|
||||
|
@ -223,7 +224,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
|
|||
return BLK_MQ_RQ_QUEUE_ERROR;
|
||||
}
|
||||
|
||||
if (last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
|
||||
if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
|
||||
notify = true;
|
||||
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
|
||||
|
||||
|
|
|
@ -1947,9 +1947,10 @@ static void scsi_mq_done(struct scsi_cmnd *cmd)
|
|||
blk_mq_complete_request(cmd->request);
|
||||
}
|
||||
|
||||
static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
|
||||
bool last)
|
||||
static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct request *req = bd->rq;
|
||||
struct request_queue *q = req->q;
|
||||
struct scsi_device *sdev = q->queuedata;
|
||||
struct Scsi_Host *shost = sdev->host;
|
||||
|
|
|
@ -479,12 +479,28 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
|
|||
* write_inode()
|
||||
*/
|
||||
spin_lock(&inode->i_lock);
|
||||
/* Clear I_DIRTY_PAGES if we've written out all dirty pages */
|
||||
if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
|
||||
inode->i_state &= ~I_DIRTY_PAGES;
|
||||
|
||||
dirty = inode->i_state & I_DIRTY;
|
||||
inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
|
||||
inode->i_state &= ~I_DIRTY;
|
||||
|
||||
/*
|
||||
* Paired with smp_mb() in __mark_inode_dirty(). This allows
|
||||
* __mark_inode_dirty() to test i_state without grabbing i_lock -
|
||||
* either they see the I_DIRTY bits cleared or we see the dirtied
|
||||
* inode.
|
||||
*
|
||||
* I_DIRTY_PAGES is always cleared together above even if @mapping
|
||||
* still has dirty pages. The flag is reinstated after smp_mb() if
|
||||
* necessary. This guarantees that either __mark_inode_dirty()
|
||||
* sees clear I_DIRTY_PAGES or we see PAGECACHE_TAG_DIRTY.
|
||||
*/
|
||||
smp_mb();
|
||||
|
||||
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
|
||||
inode->i_state |= I_DIRTY_PAGES;
|
||||
|
||||
spin_unlock(&inode->i_lock);
|
||||
|
||||
/* Don't write the inode if only I_DIRTY_PAGES was set */
|
||||
if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
|
||||
int err = write_inode(inode, wbc);
|
||||
|
@ -1148,12 +1164,11 @@ void __mark_inode_dirty(struct inode *inode, int flags)
|
|||
}
|
||||
|
||||
/*
|
||||
* make sure that changes are seen by all cpus before we test i_state
|
||||
* -- mikulas
|
||||
* Paired with smp_mb() in __writeback_single_inode() for the
|
||||
* following lockless i_state test. See there for details.
|
||||
*/
|
||||
smp_mb();
|
||||
|
||||
/* avoid the locking if we can */
|
||||
if ((inode->i_state & flags) == flags)
|
||||
return;
|
||||
|
||||
|
|
|
@ -443,6 +443,11 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int,
|
|||
extern void bio_set_pages_dirty(struct bio *bio);
|
||||
extern void bio_check_pages_dirty(struct bio *bio);
|
||||
|
||||
void generic_start_io_acct(int rw, unsigned long sectors,
|
||||
struct hd_struct *part);
|
||||
void generic_end_io_acct(int rw, struct hd_struct *part,
|
||||
unsigned long start_time);
|
||||
|
||||
#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
|
||||
# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
|
||||
#endif
|
||||
|
|
|
@ -79,7 +79,13 @@ struct blk_mq_tag_set {
|
|||
struct list_head tag_list;
|
||||
};
|
||||
|
||||
typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *, bool);
|
||||
struct blk_mq_queue_data {
|
||||
struct request *rq;
|
||||
struct list_head *list;
|
||||
bool last;
|
||||
};
|
||||
|
||||
typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *);
|
||||
typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int);
|
||||
typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
|
||||
typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
|
||||
|
@ -140,6 +146,7 @@ enum {
|
|||
BLK_MQ_F_TAG_SHARED = 1 << 1,
|
||||
BLK_MQ_F_SG_MERGE = 1 << 2,
|
||||
BLK_MQ_F_SYSFS_UP = 1 << 3,
|
||||
BLK_MQ_F_DEFER_ISSUE = 1 << 4,
|
||||
|
||||
BLK_MQ_S_STOPPED = 0,
|
||||
BLK_MQ_S_TAG_ACTIVE = 1,
|
||||
|
@ -162,6 +169,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
|
|||
void blk_mq_insert_request(struct request *, bool, bool, bool);
|
||||
void blk_mq_run_queues(struct request_queue *q, bool async);
|
||||
void blk_mq_free_request(struct request *rq);
|
||||
void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq);
|
||||
bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
|
||||
struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
|
||||
gfp_t gfp, bool reserved);
|
||||
|
|
|
@ -1184,7 +1184,6 @@ extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
|
|||
enum blk_default_limits {
|
||||
BLK_MAX_SEGMENTS = 128,
|
||||
BLK_SAFE_MAX_SECTORS = 255,
|
||||
BLK_DEF_MAX_SECTORS = 1024,
|
||||
BLK_MAX_SEGMENT_SIZE = 65536,
|
||||
BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL,
|
||||
};
|
||||
|
|
|
@ -1477,9 +1477,6 @@ static int blk_trace_remove_queue(struct request_queue *q)
|
|||
if (atomic_dec_and_test(&blk_probes_ref))
|
||||
blk_unregister_tracepoints();
|
||||
|
||||
spin_lock_irq(&running_trace_lock);
|
||||
list_del(&bt->running_list);
|
||||
spin_unlock_irq(&running_trace_lock);
|
||||
blk_trace_free(bt);
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue