Re: [PATCH 2/2] tty: n_gsm: fix deadlock and link starvation in outgoing data path

From: Jiri Slaby
Date: Mon May 09 2022 - 04:57:36 EST


On 06. 05. 22, 16:47, D. Starke wrote:
From: Daniel Starke <daniel.starke@xxxxxxxxxxx>

The current implementation queues up new control and user packets as needed
and processes this queue down to the ldisc in the same code path.
That means that the upper and the lower layer are hard coupled in the code.
Due to this deadlocks can happen as seen below while transmitting data,
especially during ldisc congestion. Furthermore, the data channels starve
the control channel on high transmission load on the ldisc.

Introduce an additional control channel data queue to prevent timeouts and
link hangups during ldisc congestion. This is being processed before the
user channel data queue in gsm_data_kick(), i.e. with the highest priority.
Put the queue to ldisc data path into a tasklet and trigger it whenever new

Tasklet? There is an ongoing work to phase them all out. So please don't add a new one -- you'd have to use something different.

...
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -5,6 +5,14 @@
*
* * THIS IS A DEVELOPMENT SNAPSHOT IT IS NOT A FINAL RELEASE *
*
+ * Outgoing path:
+ * tty -> DLCI fifo -> scheduler -> GSM MUX data queue ---o-> ldisc
+ * control message -> GSM MUX control queue --´
+ *
+ * Incoming path:
+ * ldisc -> gsm_queue() -o--> tty
+ * `-> gsm_control_response()
+ *
* TO DO:
* Mostly done: ioctls for setting modes/timing
* Partly done: hooks so you can pull off frames to non tty devs
@@ -210,6 +218,9 @@ struct gsm_mux {
/* Events on the GSM channel */
wait_queue_head_t event;
+ /* ldisc write task */
+ struct tasklet_struct tx_tsk;
+
/* Bits for GSM mode decoding */
/* Framing Layer */
@@ -240,9 +251,11 @@ struct gsm_mux {
unsigned int tx_bytes; /* TX data outstanding */
#define TX_THRESH_HI 8192
#define TX_THRESH_LO 2048
- struct list_head tx_list; /* Pending data packets */
+ struct list_head tx0_list; /* Pending control packets */
+ struct list_head tx1_list; /* Pending data packets */
/* Control messages */
+ struct timer_list kick_timer; /* Kick TX queuing on timeout */
struct timer_list t2_timer; /* Retransmit timer for commands */
int cretries; /* Command retry counter */
struct gsm_control *pending_cmd;/* Our current pending command */
@@ -369,6 +382,8 @@ static const u8 gsm_fcs8[256] = {
static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len);
static int gsm_modem_update(struct gsm_dlci *dlci, u8 brk);
+static void gsmld_write_trigger(struct gsm_mux *gsm);
+static void gsmld_write_task(unsigned long arg);
/**
* gsm_fcs_add - update FCS
@@ -419,6 +434,29 @@ static int gsm_read_ea(unsigned int *val, u8 c)
return c & EA;
}
+/**
+ * gsm_read_ea_val - read a value until EA
+ * @val: variable holding value
+ * @data: buffer of data
+ * @clen: length of buffer
+ *
+ * Processes an EA value. Updates the passed variable and
+ * returns the processed data length.
+ */
+static int gsm_read_ea_val(unsigned int *val, const u8 *data, int clen)

So clen can be negative provided it is (signed) int?

+{
+ int len;
+
+ for (len = 0; clen > 0; len++, clen--) {
+ if (gsm_read_ea(val, *data++)) {
+ /* done */
+ len += 1;

len++

+ break;
+ }
+ }

Anyway, is that a harder-to-read variant of:
unsigned int len = 0;

for (; clen > 0; clen--) {
len++;
if (gsm_read_ea(val, *data++))
break;
}
?

+ return len;
+}
+
/**
* gsm_encode_modem - encode modem data bits
* @dlci: DLCI to encode from
@@ -544,94 +582,6 @@ static int gsm_stuff_frame(const u8 *input, u8 *output, int len)
return olen;
}
-/**
- * gsm_send - send a control frame
- * @gsm: our GSM mux
- * @addr: address for control frame
- * @cr: command/response bit seen as initiator
- * @control: control byte including PF bit
- *
- * Format up and transmit a control frame. These do not go via the
- * queueing logic as they should be transmitted ahead of data when
- * they are needed.
- *
- * FIXME: Lock versus data TX path
- */
-
-static void gsm_send(struct gsm_mux *gsm, int addr, int cr, int control)
-{
- int len;
- u8 cbuf[10];
- u8 ibuf[3];
- int ocr;
-
- /* toggle C/R coding if not initiator */
- ocr = cr ^ (gsm->initiator ? 0 : 1);
-
- switch (gsm->encoding) {
- case 0:
- cbuf[0] = GSM0_SOF;
- cbuf[1] = (addr << 2) | (ocr << 1) | EA;
- cbuf[2] = control;
- cbuf[3] = EA; /* Length of data = 0 */
- cbuf[4] = 0xFF - gsm_fcs_add_block(INIT_FCS, cbuf + 1, 3);
- cbuf[5] = GSM0_SOF;
- len = 6;
- break;
- case 1:
- case 2:
- /* Control frame + packing (but not frame stuffing) in mode 1 */
- ibuf[0] = (addr << 2) | (ocr << 1) | EA;
- ibuf[1] = control;
- ibuf[2] = 0xFF - gsm_fcs_add_block(INIT_FCS, ibuf, 2);
- /* Stuffing may double the size worst case */
- len = gsm_stuff_frame(ibuf, cbuf + 1, 3);
- /* Now add the SOF markers */
- cbuf[0] = GSM1_SOF;
- cbuf[len + 1] = GSM1_SOF;
- /* FIXME: we can omit the lead one in many cases */
- len += 2;
- break;
- default:
- WARN_ON(1);
- return;
- }
- gsmld_output(gsm, cbuf, len);
- if (!gsm->initiator) {
- cr = cr & gsm->initiator;
- control = control & ~PF;
- }
- gsm_print_packet("-->", addr, cr, control, NULL, 0);
-}
-
-/**
- * gsm_response - send a control response
- * @gsm: our GSM mux
- * @addr: address for control frame
- * @control: control byte including PF bit
- *
- * Format up and transmit a link level response frame.
- */
-
-static inline void gsm_response(struct gsm_mux *gsm, int addr, int control)
-{
- gsm_send(gsm, addr, 0, control);
-}
-
-/**
- * gsm_command - send a control command
- * @gsm: our GSM mux
- * @addr: address for control frame
- * @control: control byte including PF bit
- *
- * Format up and transmit a link level command frame.
- */
-
-static inline void gsm_command(struct gsm_mux *gsm, int addr, int control)
-{
- gsm_send(gsm, addr, 1, control);
-}
-
/* Data transmission */
#define HDR_LEN 6 /* ADDR CTRL [LEN.2] DATA FCS */
@@ -664,61 +614,158 @@ static struct gsm_msg *gsm_data_alloc(struct gsm_mux *gsm, u8 addr, int len,
}
/**
- * gsm_data_kick - poke the queue
+ * gsm_send_packet - sends a single packet
* @gsm: GSM Mux
- * @dlci: DLCI sending the data
+ * @msg: packet to send
*
- * The tty device has called us to indicate that room has appeared in
- * the transmit queue. Ram more data into the pipe if we have any
- * If we have been flow-stopped by a CMD_FCOFF, then we can only
- * send messages on DLCI0 until CMD_FCON
+ * The given packet is encoded and send out. No memory is freed.
+ * The caller must hold the gsm tx lock.
+ */
+static int gsm_send_packet(struct gsm_mux *gsm, struct gsm_msg *msg)
+{
+ int len, ret;
+
+
+ if (gsm->encoding == 0) {
+ gsm->txframe[0] = GSM0_SOF;
+ memcpy(gsm->txframe + 1, msg->data, msg->len);
+ gsm->txframe[msg->len + 1] = GSM0_SOF;
+ len = msg->len + 2;
+ } else {
+ gsm->txframe[0] = GSM1_SOF;
+ len = gsm_stuff_frame(msg->data, gsm->txframe + 1, msg->len);
+ gsm->txframe[len + 1] = GSM1_SOF;
+ len += 2;
+ }
+
+ if (debug & 4)
+ print_hex_dump_bytes("gsm_send_packet: ", DUMP_PREFIX_OFFSET,
+ gsm->txframe, len);
+ gsm_print_packet("-->", msg->addr, gsm->initiator, msg->ctrl, msg->data,
+ msg->len);
+
+ ret = gsmld_output(gsm, gsm->txframe, len);
+ if (ret <= 0)
+ return ret;
+ gsm->tx_bytes -= msg->len;
+
+ return 0;
+}
+
+/**
+ * gsm_is_ctrl_flow_msg - checks if control flow message
+ * @msg: message to check
*
- * FIXME: lock against link layer control transmissions
+ * Returns non zero if the given message is a flow control command of the
+ * control channel. Zero is returned in any other case.
*/
+static int gsm_is_ctrl_flow_msg(struct gsm_msg *msg)
+{
+ int ret;
+ unsigned int cmd;
+
+ if (msg->addr > 0)
+ return 0;
+
+ ret = 0;
+ switch (msg->ctrl & ~PF) {
+ case UI:
+ case UIH:
+ cmd = 0;
+ if (gsm_read_ea_val(&cmd, msg->data + 2, msg->len - 2) < 1)
+ break;
+ switch (cmd & ~PF) {
+ case CMD_FCOFF:
+ case CMD_FCON:
+ ret = 1;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
-static void gsm_data_kick(struct gsm_mux *gsm, struct gsm_dlci *dlci)
+/**
+ * gsm_data_kick - poke the queue
+ * @gsm: GSM Mux
+ *
+ * The tty device has called us to indicate that room has appeared in
+ * the transmit queue. Ram more data into the pipe if we have any.
+ * If we have been flow-stopped by a CMD_FCOFF, then we can only
+ * send messages on DLCI0 until CMD_FCON. The caller must hold
+ * the gsm tx lock.
+ */
+static int gsm_data_kick(struct gsm_mux *gsm)
{
struct gsm_msg *msg, *nmsg;
- int len;
+ struct gsm_dlci *dlci;
+ int ret;
- list_for_each_entry_safe(msg, nmsg, &gsm->tx_list, list) {
- if (gsm->constipated && msg->addr)
+ clear_bit(TTY_DO_WRITE_WAKEUP, &gsm->tty->flags);
+
+ /* Serialize control messages and control channel messages first */
+ list_for_each_entry_safe(msg, nmsg, &gsm->tx0_list, list) {
+ if (gsm->constipated && !gsm_is_ctrl_flow_msg(msg))
+ return -EAGAIN;
+ ret = gsm_send_packet(gsm, msg);
+ switch (ret) {
+ case -ENOSPC:
+ return -ENOSPC;
+ case -ENODEV:
+ /* ldisc not open */
+ gsm->tx_bytes -= msg->len;
+ list_del(&msg->list);
+ kfree(msg);
continue;
- if (gsm->encoding != 0) {
- gsm->txframe[0] = GSM1_SOF;
- len = gsm_stuff_frame(msg->data,
- gsm->txframe + 1, msg->len);
- gsm->txframe[len + 1] = GSM1_SOF;
- len += 2;
- } else {
- gsm->txframe[0] = GSM0_SOF;
- memcpy(gsm->txframe + 1 , msg->data, msg->len);
- gsm->txframe[msg->len + 1] = GSM0_SOF;
- len = msg->len + 2;
- }
-
- if (debug & 4)
- print_hex_dump_bytes("gsm_data_kick: ",
- DUMP_PREFIX_OFFSET,
- gsm->txframe, len);
- if (gsmld_output(gsm, gsm->txframe, len) <= 0)
+ default:
+ if (ret >= 0) {
+ list_del(&msg->list);
+ kfree(msg);
+ }
break;
- /* FIXME: Can eliminate one SOF in many more cases */
- gsm->tx_bytes -= msg->len;
-
- list_del(&msg->list);
- kfree(msg);
+ }
+ }
- if (dlci) {
- tty_port_tty_wakeup(&dlci->port);
- } else {
- int i = 0;
+ if (gsm->constipated)
+ return -EAGAIN;
- for (i = 0; i < NUM_DLCI; i++)
- if (gsm->dlci[i])
- tty_port_tty_wakeup(&gsm->dlci[i]->port);
+ /* Serialize other channels */
+ if (list_empty(&gsm->tx1_list))
+ return 0;
+ list_for_each_entry_safe(msg, nmsg, &gsm->tx1_list, list) {
+ dlci = gsm->dlci[msg->addr];
+ /* Send only messages for DLCIs with valid state */
+ if (dlci->state != DLCI_OPEN) {
+ gsm->tx_bytes -= msg->len;
+ list_del(&msg->list);
+ kfree(msg);
+ continue;
+ }
+ ret = gsm_send_packet(gsm, msg);
+ switch (ret) {
+ case -ENOSPC:
+ return -ENOSPC;
+ case -ENODEV:
+ /* ldisc not open */
+ gsm->tx_bytes -= msg->len;
+ list_del(&msg->list);
+ kfree(msg);
+ continue;
+ default:
+ if (ret >= 0) {
+ list_del(&msg->list);
+ kfree(msg);
+ }
+ break;
}
}
+
+ return 1;
}


That feels like a LOT of code shuffling. It's unreviewable. Please split into several patches.

thanks,
--
js
suse labs