Re: [PATCH v2 1/1] Bluetooth: Prioritize SCO traffic
From: Marcel Holtmann
Date: Tue Mar 24 2020 - 02:27:39 EST
Hi Luiz,
>>>> When scheduling TX packets, send all SCO/eSCO packets first, check for
>>>> pending SCO/eSCO packets after every ACL/LE packet and send them if any
>>>> are pending. This is done to make sure that we can meet SCO deadlines
>>>> on slow interfaces like UART.
>>>>
>>>> If we were to queue up multiple ACL packets without checking for a SCO
>>>> packet, we might miss the SCO timing. For example:
>>>>
>>>> The time it takes to send a maximum size ACL packet (1024 bytes):
>>>> t = 10/8 * 1024 bytes * 8 bits/byte * 1 packet / baudrate
>>>> where 10/8 is uart overhead due to start/stop bits per byte
>>>>
>>>> Replace t = 3.75ms (SCO deadline), which gives us a baudrate of 2730666.
>>>>
>>>> At a baudrate of 3000000, if we didn't check for SCO packets within 1024
>>>> bytes, we would miss the 3.75ms timing window.
>>>>
>>>> Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@xxxxxxxxxxxx>
>>>> ---
>>>>
>>>> Changes in v2:
>>>> * Refactor to check for SCO/eSCO after each ACL/LE packet sent
>>>> * Enabled SCO priority all the time and removed the sched_limit variable
>>>>
>>>> net/bluetooth/hci_core.c | 111 +++++++++++++++++++++------------------
>>>> 1 file changed, 61 insertions(+), 50 deletions(-)
>>>>
>>>> diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
>>>> index dbd2ad3a26ed..a29177e1a9d0 100644
>>>> --- a/net/bluetooth/hci_core.c
>>>> +++ b/net/bluetooth/hci_core.c
>>>> @@ -4239,6 +4239,60 @@ static void __check_timeout(struct hci_dev *hdev, unsigned int cnt)
>>>> }
>>>> }
>>>>
>>>> +/* Schedule SCO */
>>>> +static void hci_sched_sco(struct hci_dev *hdev)
>>>> +{
>>>> + struct hci_conn *conn;
>>>> + struct sk_buff *skb;
>>>> + int quote;
>>>> +
>>>> + BT_DBG("%s", hdev->name);
>>>> +
>>>> + if (!hci_conn_num(hdev, SCO_LINK))
>>>> + return;
>>>> +
>>>> + while (hdev->sco_cnt && (conn = hci_low_sent(hdev, SCO_LINK, "e))) {
>>>> + while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
>>>> + BT_DBG("skb %p len %d", skb, skb->len);
>>>> + hci_send_frame(hdev, skb);
>>>> +
>>>> + conn->sent++;
>>>> + if (conn->sent == ~0)
>>>> + conn->sent = 0;
>>>> + }
>>>> + }
>>>> +}
>>>> +
>>>> +static void hci_sched_esco(struct hci_dev *hdev)
>>>> +{
>>>> + struct hci_conn *conn;
>>>> + struct sk_buff *skb;
>>>> + int quote;
>>>> +
>>>> + BT_DBG("%s", hdev->name);
>>>> +
>>>> + if (!hci_conn_num(hdev, ESCO_LINK))
>>>> + return;
>>>> +
>>>> + while (hdev->sco_cnt && (conn = hci_low_sent(hdev, ESCO_LINK,
>>>> + "e))) {
>>>> + while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
>>>> + BT_DBG("skb %p len %d", skb, skb->len);
>>>> + hci_send_frame(hdev, skb);
>>>> +
>>>> + conn->sent++;
>>>> + if (conn->sent == ~0)
>>>> + conn->sent = 0;
>>>> + }
>>>> + }
>>>> +}
>>>> +
>>>> +static void hci_sched_sync(struct hci_dev *hdev)
>>>> +{
>>>> + hci_sched_sco(hdev);
>>>> + hci_sched_esco(hdev);
>>>> +}
>>>> +
>>>
>>> scrap this function. It has almost zero benefit.
>>
>> Done.
>>
>>>
>>>> static void hci_sched_acl_pkt(struct hci_dev *hdev)
>>>> {
>>>> unsigned int cnt = hdev->acl_cnt;
>>>> @@ -4270,6 +4324,9 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev)
>>>> hdev->acl_cnt--;
>>>> chan->sent++;
>>>> chan->conn->sent++;
>>>> +
>>>> + /* Send pending SCO packets right away */
>>>> + hci_sched_sync(hdev);
>>>
>>> hci_sched_esco();
>>> hci_sched_sco();
>>>
>>>> }
>>>> }
>>>>
>>>> @@ -4354,54 +4411,6 @@ static void hci_sched_acl(struct hci_dev *hdev)
>>>> }
>>>> }
>>>>
>>>> -/* Schedule SCO */
>>>> -static void hci_sched_sco(struct hci_dev *hdev)
>>>> -{
>>>> - struct hci_conn *conn;
>>>> - struct sk_buff *skb;
>>>> - int quote;
>>>> -
>>>> - BT_DBG("%s", hdev->name);
>>>> -
>>>> - if (!hci_conn_num(hdev, SCO_LINK))
>>>> - return;
>>>> -
>>>> - while (hdev->sco_cnt && (conn = hci_low_sent(hdev, SCO_LINK, "e))) {
>>>> - while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
>>>> - BT_DBG("skb %p len %d", skb, skb->len);
>>>> - hci_send_frame(hdev, skb);
>>>> -
>>>> - conn->sent++;
>>>> - if (conn->sent == ~0)
>>>> - conn->sent = 0;
>>>> - }
>>>> - }
>>>> -}
>>>> -
>>>> -static void hci_sched_esco(struct hci_dev *hdev)
>>>> -{
>>>> - struct hci_conn *conn;
>>>> - struct sk_buff *skb;
>>>> - int quote;
>>>> -
>>>> - BT_DBG("%s", hdev->name);
>>>> -
>>>> - if (!hci_conn_num(hdev, ESCO_LINK))
>>>> - return;
>>>> -
>>>> - while (hdev->sco_cnt && (conn = hci_low_sent(hdev, ESCO_LINK,
>>>> - "e))) {
>>>> - while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
>>>> - BT_DBG("skb %p len %d", skb, skb->len);
>>>> - hci_send_frame(hdev, skb);
>>>> -
>>>> - conn->sent++;
>>>> - if (conn->sent == ~0)
>>>> - conn->sent = 0;
>>>> - }
>>>> - }
>>>> -}
>>>> -
>>>> static void hci_sched_le(struct hci_dev *hdev)
>>>> {
>>>> struct hci_chan *chan;
>>>> @@ -4436,6 +4445,9 @@ static void hci_sched_le(struct hci_dev *hdev)
>>>> cnt--;
>>>> chan->sent++;
>>>> chan->conn->sent++;
>>>> +
>>>> + /* Send pending SCO packets right away */
>>>> + hci_sched_sync(hdev);
>>>
>>> Same as above. Just call the two functions.
>>
>> Done
>>
>>>
>>>> }
>>>> }
>>>>
>>>> @@ -4458,9 +4470,8 @@ static void hci_tx_work(struct work_struct *work)
>>>>
>>>> if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
>>>> /* Schedule queues and send stuff to HCI driver */
>>>> + hci_sched_sync(hdev);
>>>> hci_sched_acl(hdev);
>>>> - hci_sched_sco(hdev);
>>>> - hci_sched_esco(hdev);
>>>> hci_sched_le(hdev);
>>>
>>> I would actually just move _le up after _acl and then keep _sco and _esco at the bottom. The calls here are just for the case there are no ACL nor LE packets.
>>
>> Then we would send at least 1 ACL/LE packet before SCO even if there
>> were SCO pending when we entered this function. I think it is still
>> better to keep SCO/eSCO at the top.
>
> I wonder it wouldn't be better to have such prioritization done by the
> driver though, since this might just be spending extra cpu cycles in
> case there is enough bandwidth at the transport chances are the
> reordering here just doesn't make any difference in the end, you
> probably don't even need any changes to the core in order for the
> driver to detect what type of frame it is based on the skb, I recall
> we do already have such information in the driver so it just a matter
> to reorder the frames as needed there.
We could hide the extra _acl and _le calls inside _sco and _esco behind a QUIRK that the UART driver just sets. However I am not sure that will be actually much different. Even for USB transports it would be good to get the ISCO URBs on the way as quickly as possible.
What I was wondering why we actually do scheduling per connection type. In the original code base it was ACL and SCO. We only had two connection types and two packet types. So that kind made sense. However I wonder if we were misguided by doing this per connection type and not focusing on keeping this per packet type.
To that extend we introduced priority handling for the ACL and LE links. So no matter what the ACL and LE links will reorder their packets as needed. And the driver just executes this. So the core already reorders it.
I wonder really why we just not make the core insert the SCO packets accordingly into the ACL/LE stream so that the driver really only just has to transport them. What is good for an UART transport, will not be bad for an USB transport.
Regards
Marcel