Re: [PATCH v2 1/1] blk-mq: fill header with kernel-doc

From: Gabriel Krisman Bertazi
Date: Tue Oct 08 2019 - 12:35:43 EST

Next message: Marco Elver: "Re: [PATCH] rcu: Avoid to modify mask_ofl_ipi in sync_rcu_exp_select_node_cpus()"
Previous message: Ladislav Michl: "Re: [alsa-devel] Applied "ASoc: tas2770: Fix build error without GPIOLIB" to the asoc tree"
Next in thread: Bart Van Assche: "Re: [PATCH v2 1/1] blk-mq: fill header with kernel-doc"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

AndrÃ Almeida <andrealmeid@xxxxxxxxxxxxx> writes:

> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index e0fce93ac127..8b745f229789 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -10,74 +10,153 @@ struct blk_mq_tags;
> struct blk_flush_queue;
>
> /**
> - * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware block device
> + * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware
> + * block device
> */
> struct blk_mq_hw_ctx {
> struct {
> + /** @lock: Lock for accessing dispatch queue */
> spinlock_t lock;
> + /**
> + * @dispatch: Queue of dispatched requests, waiting for
> + * workers to send them to the hardware.
> + */

It's been a few years since I looked at the block layer, but isn't
this used to hold requests that were taken from the blk_mq_ctx, but
couldn't be dispatched because the queue was full?

> struct list_head dispatch;
> - unsigned long state; /* BLK_MQ_S_* flags */
> + /**
> + * @state: BLK_MQ_S_* flags. Define the state of the hw
> + * queue (active, scheduled to restart, stopped).
> + */
> + unsigned long state;
> } ____cacheline_aligned_in_smp;
>
> + /**
> + * @run_work: Worker for dispatch scheduled requests to hardware.
> + * BLK_MQ_CPU_WORK_BATCH workers will be assigned to a CPU, then the
> + * following ones will be assigned to the next_cpu.
> + */
> struct delayed_work run_work;
> + /** @cpumask: Map of available CPUs. */

Available cpus where this hctx can run.

> cpumask_var_t cpumask;
> + /**
> + * @next_cpu: Index of CPU/workqueue to be used in the next batch
> + * of workers.
> + */
> int next_cpu;
> + /**
> + * @next_cpu_batch: Counter of how many works letf in the batch before
> + * changing to the next CPU. A batch has the size
> + * of BLK_MQ_CPU_WORK_BATCH.
> + */
> int next_cpu_batch;
>
> - unsigned long flags; /* BLK_MQ_F_* flags */
> + /** @flags: BLK_MQ_F_* flags. Define the behaviour of the queue. */
> + unsigned long flags;
>
> + /** @sched_data: Data to support schedulers. */
> void *sched_data;
> + /** @queue: Queue of request to be dispatched. */
> struct request_queue *queue;

Maybe "the request queue this hctx belongs to"

> + /** @fq: Queue of requests to be flushed from memory to storage. */
> struct blk_flush_queue *fq;

Hmm, i think this needs clarification.
>
> + /**
> + * @driver_data: Pointer to data owned by the block driver that created
> + * this hctx
> + */
> void *driver_data;
>
> + /**
> + * @ctx_map: Bitmap for each software queue. If bit is on, there is a
> + * pending request.
> + */
> struct sbitmap ctx_map;
>
> + /**
> + * @dispatch_from: Queue to be used when there is no scheduler
> + * was selected.
> + */

"when there is no scheduler selected" or "when no scheduler was selected"

> struct blk_mq_ctx *dispatch_from;
> + /**
> + * @dispatch_busy: Number used by blk_mq_update_dispatch_busy() to
> + * decide if the hw_queue is busy using Exponential Weighted Moving
> + * Average algorithm.
> + */
> unsigned int dispatch_busy;
>
> + /** @type: HCTX_TYPE_* flags. Type of hardware queue. */
> unsigned short type;
> + /** @nr_ctx: Number of software queues. */
> unsigned short nr_ctx;
> + /** @ctxs: Array of software queues. */
> struct blk_mq_ctx **ctxs;
>
> + /** @dispatch_wait_lock: Lock for dispatch_wait queue. */
> spinlock_t dispatch_wait_lock;
> + /**
> + * @dispatch_wait: Waitqueue for dispatched requests. Request here will
> + * be processed when percpu_ref_is_zero(&q->q_usage_counter) evaluates
> + * true for q as a request_queue.
> + */

That's not really useful, since it is stating what the code does. It
would be interesting to say why this is needed, i.e. to re-run the queue
and dispatch requests if the queue was full in the previous attempt.

> wait_queue_entry_t dispatch_wait;
> + /** @wait_index: Index of next wait queue to be used. */
> atomic_t wait_index;
>
> + /** @tags: Request tags. */
> struct blk_mq_tags *tags;
> + /** @sched_tags: Request tags for schedulers. */
> struct blk_mq_tags *sched_tags;
>
> + /** @queued: Number of queued requests. */
> unsigned long queued;
> + /** @run: Number of dispatched requests. */
> unsigned long run;
> #define BLK_MQ_MAX_DISPATCH_ORDER 7
> + /** @dispatched: Number of dispatch requests by queue. */
> unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER];
>
> + /** @numa_node: NUMA node the storage adapter has been connected to. */
> unsigned int numa_node;
> + /** @queue_num: Index of this hardware queue. */
> unsigned int queue_num;
>
> + /** @nr_active: Number of active tags. */
> atomic_t nr_active;
>
> + /** @cpuhp_dead: List to store request if some CPU die. */
> struct hlist_node cpuhp_dead;
> + /** @kobj: Kernel object for sysfs. */
> struct kobject kobj;
>
> + /** @poll_considered: Count times blk_poll() was called. */
> unsigned long poll_considered;
> + /** @poll_invoked: Count how many requests blk_poll() polled. */
> unsigned long poll_invoked;
> + /** @poll_success: Count how many polled requests were completed. */
> unsigned long poll_success;
>
> #ifdef CONFIG_BLK_DEBUG_FS
> + /**
> + * @debugfs_dir: debugfs directory for this hardware queue. Named
> + * as cpu<cpu_number>.
> + */
> struct dentry *debugfs_dir;
> + /** @sched_debugfs_dir: debugfs directory for the scheduler. */
> struct dentry *sched_debugfs_dir;
> #endif
>
> + /** @hctx_list: List of all hardware queues. */
> struct list_head hctx_list;
>
> - /* Must be the last member - see also blk_mq_hw_ctx_size(). */
> + /**
> + * @srcu: Sleepable RCU. Use as lock when type of the hardware queue is
> + * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also
> + * blk_mq_hw_ctx_size().
> + */
> struct srcu_struct srcu[0];
> };
>
> /**
> - * struct blk_mq_queue_map - ctx -> hctx mapping
> + * struct blk_mq_queue_map - Map software queues to hardware queues
> * @mq_map: CPU ID to hardware queue index map. This is an array
> * with nr_cpu_ids elements. Each element has a value in the range
> * [@queue_offset, @queue_offset + @nr_queues).
> @@ -92,10 +171,17 @@ struct blk_mq_queue_map {
> unsigned int queue_offset;
> };
>
> +/**
> + * enum hctx_type - Type of hardware queue
> + * @HCTX_TYPE_DEFAULT: All I/O not otherwise accounted for.
> + * @HCTX_TYPE_READ: Just for READ I/O.
> + * @HCTX_TYPE_POLL: Polled I/O of any kind.
> + * @HCTX_MAX_TYPES: Number of types of hctx.
> + */
> enum hctx_type {
> - HCTX_TYPE_DEFAULT, /* all I/O not otherwise accounted for */
> - HCTX_TYPE_READ, /* just for READ I/O */
> - HCTX_TYPE_POLL, /* polled I/O of any kind */
> + HCTX_TYPE_DEFAULT,
> + HCTX_TYPE_READ,
> + HCTX_TYPE_POLL,
>
> HCTX_MAX_TYPES,
> };
> @@ -147,6 +233,12 @@ struct blk_mq_tag_set {
> struct list_head tag_list;
> };
>
> +/**
> + * struct blk_mq_queue_data - Data about a request inserted in a queue
> + *
> + * @rq: Data about the block IO request.
> + * @last: If it is the last request in the queue.
> + */
> struct blk_mq_queue_data {
> struct request *rq;
> bool last;
> @@ -174,81 +266,101 @@ typedef bool (busy_fn)(struct request_queue *);
> typedef void (complete_fn)(struct request *);
> typedef void (cleanup_rq_fn)(struct request *);
>
> -
> +/**
> + * struct blk_mq_ops - list of callback functions that implements block driver
> + * behaviour.
> + */
> struct blk_mq_ops {
> - /*
> - * Queue request
> + /**
> + * @queue_rq: Queue a new request from block IO.
> */
> queue_rq_fn *queue_rq;
>
> - /*
> - * If a driver uses bd->last to judge when to submit requests to
> - * hardware, it must define this function. In case of errors that
> - * make us stop issuing further requests, this hook serves the
> + /**
> + * @commit_rqs: If a driver uses bd->last to judge when to submit
> + * requests to hardware, it must define this function. In case of errors
> + * that make us stop issuing further requests, this hook serves the
> * purpose of kicking the hardware (which the last request otherwise
> * would have done).
> */
> commit_rqs_fn *commit_rqs;
>
> - /*
> - * Reserve budget before queue request, once .queue_rq is
> + /**
> + * @get_budget: Reserve budget before queue request, once .queue_rq is
> * run, it is driver's responsibility to release the
> * reserved budget. Also we have to handle failure case
> * of .get_budget for avoiding I/O deadlock.
> */
> get_budget_fn *get_budget;
> + /**
> + * @put_budget: Release the reserved budget.
> + */
> put_budget_fn *put_budget;
>
> - /*
> - * Called on request timeout
> + /**
> + * @timeout: Called on request timeout.
> */
> timeout_fn *timeout;
>
> - /*
> - * Called to poll for completion of a specific tag.
> + /**
> + * @poll: Called to poll for completion of a specific tag.
> */
> poll_fn *poll;
>
> + /**
> + * @complete: Mark the request as complete.
> + */
> complete_fn *complete;
>
> - /*
> - * Called when the block layer side of a hardware queue has been
> - * set up, allowing the driver to allocate/init matching structures.
> - * Ditto for exit/teardown.
> + /**
> + * @init_hctx: Called when the block layer side of a hardware queue has
> + * been set up, allowing the driver to allocate/init matching
> + * structures.
> */
> init_hctx_fn *init_hctx;
> + /**
> + * @exit_hctx: Ditto for exit/teardown.
> + */
> exit_hctx_fn *exit_hctx;
>
> - /*
> - * Called for every command allocated by the block layer to allow
> - * the driver to set up driver specific data.
> + /**
> + * @init_request: Called for every command allocated by the block layer
> + * to allow the driver to set up driver specific data.
> *
> * Tag greater than or equal to queue_depth is for setting up
> * flush request.
> - *
> - * Ditto for exit/teardown.
> */
> init_request_fn *init_request;
> + /**
> + * @exit_request: Ditto for exit/teardown.
> + */
> exit_request_fn *exit_request;
> - /* Called from inside blk_get_request() */
> +
> + /**
> + * @initialize_rq_fn: Called from inside blk_get_request().
> + */
> void (*initialize_rq_fn)(struct request *rq);
>
> - /*
> - * Called before freeing one request which isn't completed yet,
> - * and usually for freeing the driver private data
> + /**
> + * @cleanup_rq: Called before freeing one request which isn't completed
> + * yet, and usually for freeing the driver private data.
> */
> cleanup_rq_fn *cleanup_rq;
>
> - /*
> - * If set, returns whether or not this queue currently is busy
> + /**
> + * @busy: If set, returns whether or not this queue currently is busy.
> */
> busy_fn *busy;
>
> + /**
> + * @map_queues: This allows drivers specify their own queue mapping by
> + * overriding the setup-time function that builds the mq_map.
> + */
> map_queues_fn *map_queues;
>
> #ifdef CONFIG_BLK_DEBUG_FS
> - /*
> - * Used by the debugfs implementation to show driver-specific
> + /**
> + * @show_rq: Used by the debugfs implementation to show driver-specific
> * information about a request.
> */
> void (*show_rq)(struct seq_file *m, struct request *rq);
> @@ -391,14 +503,29 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q);
>
> unsigned int blk_mq_rq_cpu(struct request *rq);
>
> -/*
> +/**
> + * blk_mq_rq_from_pdu - cast a PDU to a request
> + * @pdu: the PDU (Protocol Data Unit) to be casted
> + *
> + * Return: request
> + *
> * Driver command data is immediately after the request. So subtract request
> - * size to get back to the original request, add request size to get the PDU.
> + * size to get back to the original request.
> */
> static inline struct request *blk_mq_rq_from_pdu(void *pdu)
> {
> return pdu - sizeof(struct request);
> }
> +
> +/**
> + * blk_mq_rq_to_pdu - cast a request to a PDU
> + * @rq: the request to be casted
> + *
> + * Return: pointer to the PDU
> + *
> + * Driver command data is immediately after the request. So add request to get
> + * the PDU.
> + */
> static inline void *blk_mq_rq_to_pdu(struct request *rq)
> {
> return rq + 1;

--
Gabriel Krisman Bertazi

Next message: Marco Elver: "Re: [PATCH] rcu: Avoid to modify mask_ofl_ipi in sync_rcu_exp_select_node_cpus()"
Previous message: Ladislav Michl: "Re: [alsa-devel] Applied "ASoc: tas2770: Fix build error without GPIOLIB" to the asoc tree"
Next in thread: Bart Van Assche: "Re: [PATCH v2 1/1] blk-mq: fill header with kernel-doc"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]