Re: [PATCH 1/4] block: add zone open, close and finish support

From: Matias BjÃrling
Date: Tue Jun 25 2019 - 06:35:08 EST


On 6/25/19 12:27 AM, Chaitanya Kulkarni wrote:
On 6/24/19 12:43 PM, Bart Van Assche wrote:
On 6/21/19 6:07 AM, Matias BjÃrling wrote:
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 95202f80676c..067ef9242275 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -284,13 +284,20 @@ enum req_opf {
REQ_OP_DISCARD = 3,
/* securely erase sectors */
REQ_OP_SECURE_ERASE = 5,
- /* reset a zone write pointer */
- REQ_OP_ZONE_RESET = 6,
/* write the same sector many times */
REQ_OP_WRITE_SAME = 7,
/* write the zero filled sector many times */
REQ_OP_WRITE_ZEROES = 9,
+ /* reset a zone write pointer */
+ REQ_OP_ZONE_RESET = 16,
+ /* Open zone(s) */
+ REQ_OP_ZONE_OPEN = 17,
+ /* Close zone(s) */
+ REQ_OP_ZONE_CLOSE = 18,
+ /* Finish zone(s) */
+ REQ_OP_ZONE_FINISH = 19,
+
/* SCSI passthrough using struct scsi_request */
REQ_OP_SCSI_IN = 32,
REQ_OP_SCSI_OUT = 33,
@@ -375,6 +382,22 @@ static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
bio->bi_opf = op | op_flags;
}

Are the new operation types ever passed to op_is_write()? The definition
of that function is as follows:

May be we should change numbering since blktrace also relies on the
having on_is_write() without looking at the rq_ops().

197 * Data direction bit lookup
198 */
199 static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
200 BLK_TC_ACT(BLK_TC_WRITE) }; <----
201
202 #define BLK_TC_RAHEAD BLK_TC_AHEAD
203 #define BLK_TC_PREFLUSH BLK_TC_FLUSH
204
205 /* The ilog2() calls fall out because they're constant */
206 #define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \
207 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ##
__name))
208
209 /*
210 * The worker for the various blk_add_trace*() types. Fills out a
211 * blk_io_trace structure and places it in a per-cpu subbuffer.
212 */
213 static void __blk_add_trace(struct blk_trace *bt, sector_t sector,
int bytes,
214 int op, int op_flags, u32 what, int error,
int pdu_len,
215 void *pdu_data, union kernfs_node_id *cgid)
216 {
217 struct task_struct *tsk = current;
218 struct ring_buffer_event *event = NULL;
219 struct ring_buffer *buffer = NULL;
220 struct blk_io_trace *t;
221 unsigned long flags = 0;
222 unsigned long *sequence;
223 pid_t pid;
224 int cpu, pc = 0;
225 bool blk_tracer = blk_tracer_enabled;
226 ssize_t cgid_len = cgid ? sizeof(*cgid) : 0;
227
228 if (unlikely(bt->trace_state != Blktrace_running &&
!blk_tracer))
229 return;
230
231 what |= ddir_act[op_is_write(op) ? WRITE : READ]; <---


static inline bool op_is_write(unsigned int op)
{
return (op & 1);
}



The zone mgmt commands are neither write nor reads commands. I guess, one could characterize them as write commands, but they don't write any data, they update a state of a zone on a drive. One should keep it as is? and make sure the zone mgmt commands don't get categorized as either read/write.