Re: [PATCH v2] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei()

From: Chao Yu
Date: Sun Feb 17 2019 - 20:39:26 EST


Hi xiang,

On 2019/2/15 16:58, Gao Xiang wrote:
> Hi Chao,
>
> On 2019/2/15 15:02, Chao Yu wrote:
>> On 2019/2/1 20:16, Gao Xiang wrote:
>>> As Al pointed out, "
>>> ... and while we are at it, what happens to
>>> unsigned int nameoff = le16_to_cpu(de[mid].nameoff);
>>> unsigned int matched = min(startprfx, endprfx);
>>>
>>> struct qstr dname = QSTR_INIT(data + nameoff,
>>> unlikely(mid >= ndirents - 1) ?
>>> maxsize - nameoff :
>>> le16_to_cpu(de[mid + 1].nameoff) - nameoff);
>>>
>>> /* string comparison without already matched prefix */
>>> int ret = dirnamecmp(name, &dname, &matched);
>>> if le16_to_cpu(de[...].nameoff) is not monotonically increasing? I.e.
>>> what's to prevent e.g. (unsigned)-1 ending up in dname.len?
>>>
>>> Corrupted fs image shouldn't oops the kernel.. "
>>>
>>> Revisit the related lookup flow to address the issue.
>>>
>>> Fixes: d72d1ce60174 ("staging: erofs: add namei functions")
>>> Cc: <stable@xxxxxxxxxxxxxxx> # 4.19+
>>> Suggested-by: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
>>> Signed-off-by: Gao Xiang <gaoxiang25@xxxxxxxxxx>
>>> ---
>>> [ It should be better get reviewed first and for linux-next... ]
>>>
>>> change log v2:
>>> - fix the missing "kunmap_atomic" pointed out by Dan;
>>> - minor cleanup;
>>>
>>> drivers/staging/erofs/namei.c | 187 ++++++++++++++++++++++--------------------
>>> 1 file changed, 99 insertions(+), 88 deletions(-)
>>>
>>> diff --git a/drivers/staging/erofs/namei.c b/drivers/staging/erofs/namei.c
>>> index 5596c52e246d..321c881d720f 100644
>>> --- a/drivers/staging/erofs/namei.c
>>> +++ b/drivers/staging/erofs/namei.c
>>> @@ -15,74 +15,77 @@
>>>
>>> #include <trace/events/erofs.h>
>>>
>>> -/* based on the value of qn->len is accurate */
>>> -static inline int dirnamecmp(struct qstr *qn,
>>> - struct qstr *qd, unsigned int *matched)
>>> +struct erofs_qstr {
>>> + const unsigned char *name;
>>> + const unsigned char *end;
>>> +};
>>> +
>>> +/* based on the end of qn is accurate and it must have the trailing '\0' */
>>> +static inline int dirnamecmp(const struct erofs_qstr *qn,
>>> + const struct erofs_qstr *qd,
>>> + unsigned int *matched)
>>> {
>>> - unsigned int i = *matched, len = min(qn->len, qd->len);
>>> -loop:
>>> - if (unlikely(i >= len)) {
>>> - *matched = i;
>>> - if (qn->len < qd->len) {
>>> - /*
>>> - * actually (qn->len == qd->len)
>>> - * when qd->name[i] == '\0'
>>> - */
>>> - return qd->name[i] == '\0' ? 0 : -1;
>>> + unsigned int i = *matched;
>>> +
>>> + /*
>>> + * on-disk error, let's only BUG_ON in the debugging mode.
>>> + * otherwise, it will return 1 to just skip the invalid name
>>> + * and go on (in consideration of the lookup performance).
>>> + */
>>> + DBG_BUGON(qd->name > qd->end);
>>
>> qd->name == qd->end is not allowed as well?
>
> Make sense, it is only used for debugging mode, let me send another patch later...
>
>>
>> So will it be better to return directly here?
>>
>> if (unlikely(qd->name >= qd->end)) {
>> DBG_BUGON(1);
>> return 1;
>> }
>
> Corrupted image is rare happened in normal systems, I tend to only use
> DBG_BUGON here, and qn->name[i] is of course not '\0', so it will return 1;

If the image is corrupted, qn->name[i] may be anything, as you commented
above DBG_BUGON(), we really don't need to go through any later codes, it
can avoid potentially encoutnering wrong condition.

* otherwise, it will return 1 to just skip the invalid name

>
>>
>>> +
>>> + /* qd could not have trailing '\0' */
>>> + /* However it is absolutely safe if < qd->end */
>>> + while (qd->name + i < qd->end && qd->name[i] != '\0') {
>>> + if (qn->name[i] != qd->name[i]) {
>>> + *matched = i;
>>> + return qn->name[i] > qd->name[i] ? 1 : -1;
>>> }
>>> - return (qn->len > qd->len);
>>> + ++i;
>>> }
>>> -
>>> - if (qn->name[i] != qd->name[i]) {
>>> - *matched = i;
>>> - return qn->name[i] > qd->name[i] ? 1 : -1;
>>> - }
>>> -
>>> - ++i;
>>> - goto loop;
>>> + *matched = i;
>>> + /* See comments in __d_alloc on the terminating NUL character */
>>> + return qn->name[i] == '\0' ? 0 : 1;
>>> }
>>>
>>> -static struct erofs_dirent *find_target_dirent(
>>> - struct qstr *name,
>>> - u8 *data, int maxsize)
>>> +#define nameoff_from_disk(off, sz) (le16_to_cpu(off) & ((sz) - 1))
>>> +
>>> +static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name,
>>> + u8 *data,
>>> + unsigned int dirblksize,
>>> + const int ndirents)
>>> {
>>> - unsigned int ndirents, head, back;
>>> + int head, back;
>>> unsigned int startprfx, endprfx;
>>> struct erofs_dirent *const de = (struct erofs_dirent *)data;
>>>
>>> - /* make sure that maxsize is valid */
>>> - BUG_ON(maxsize < sizeof(struct erofs_dirent));
>>> -
>>> - ndirents = le16_to_cpu(de->nameoff) / sizeof(*de);
>>> -
>>> - /* corrupted dir (may be unnecessary...) */
>>> - BUG_ON(!ndirents);
>>> -
>>> - head = 0;
>>> + /* since the 1st dirent has been evaluated previously */
>>> + head = 1;
>>> back = ndirents - 1;
>>> startprfx = endprfx = 0;
>>>
>>> while (head <= back) {
>>> - unsigned int mid = head + (back - head) / 2;
>>> - unsigned int nameoff = le16_to_cpu(de[mid].nameoff);
>>> + const int mid = head + (back - head) / 2;
>>> + const int nameoff = nameoff_from_disk(de[mid].nameoff,
>>> + dirblksize);
>>> unsigned int matched = min(startprfx, endprfx);
>>> -
>>> - struct qstr dname = QSTR_INIT(data + nameoff,
>>> - unlikely(mid >= ndirents - 1) ?
>>> - maxsize - nameoff :
>>> - le16_to_cpu(de[mid + 1].nameoff) - nameoff);
>>> + struct erofs_qstr dname = {
>>> + .name = data + nameoff,
>>> + .end = unlikely(mid >= ndirents - 1) ?
>>> + data + dirblksize :
>>> + data + nameoff_from_disk(de[mid + 1].nameoff,
>>> + dirblksize)
>>> + };
>>>
>>> /* string comparison without already matched prefix */
>>> int ret = dirnamecmp(name, &dname, &matched);
>>>
>>> - if (unlikely(!ret))
>>> + if (unlikely(!ret)) {
>>> return de + mid;
>>> - else if (ret > 0) {
>>> + } else if (ret > 0) {
>>> head = mid + 1;
>>> startprfx = matched;
>>> - } else if (unlikely(mid < 1)) /* fix "mid" overflow */
>>> - break;
>>> - else {
>>> + } else {
>>> back = mid - 1;
>>> endprfx = matched;
>>> }
>>> @@ -91,12 +94,12 @@ static struct erofs_dirent *find_target_dirent(
>>> return ERR_PTR(-ENOENT);
>>> }
>>>
>>> -static struct page *find_target_block_classic(
>>> - struct inode *dir,
>>> - struct qstr *name, int *_diff)
>>> +static struct page *find_target_block_classic(struct inode *dir,
>>> + struct erofs_qstr *name,
>>> + int *_ndirents)
>>> {
>>> unsigned int startprfx, endprfx;
>>> - unsigned int head, back;
>>> + int head, back;
>>> struct address_space *const mapping = dir->i_mapping;
>>> struct page *candidate = ERR_PTR(-ENOENT);
>>>
>>> @@ -105,41 +108,43 @@ static struct page *find_target_block_classic(
>>> back = inode_datablocks(dir) - 1;
>>>
>>> while (head <= back) {
>>> - unsigned int mid = head + (back - head) / 2;
>>> + const int mid = head + (back - head) / 2;
>>> struct page *page = read_mapping_page(mapping, mid, NULL);
>>>
>>> - if (IS_ERR(page)) {
>>> -exact_out:
>>> - if (!IS_ERR(candidate)) /* valid candidate */
>>> - put_page(candidate);
>>> - return page;
>>> - } else {
>>> - int diff;
>>> - unsigned int ndirents, matched;
>>> - struct qstr dname;
>>> + if (!IS_ERR(page)) {
>>> struct erofs_dirent *de = kmap_atomic(page);
>>> - unsigned int nameoff = le16_to_cpu(de->nameoff);
>>> -
>>> - ndirents = nameoff / sizeof(*de);
>>> + const int nameoff = nameoff_from_disk(de->nameoff,
>>> + EROFS_BLKSIZ);
>>> + const int ndirents = nameoff / sizeof(*de);
>>> + int diff;
>>> + unsigned int matched;
>>> + struct erofs_qstr dname;
>>>
>>> - /* corrupted dir (should have one entry at least) */
>>> - BUG_ON(!ndirents || nameoff > PAGE_SIZE);
>>> + if (unlikely(!ndirents)) {
>>> + DBG_BUGON(1);
>>> + kunmap_atomic(de);
>>> + put_page(page);
>>> + page = ERR_PTR(-EIO);
>>> + goto out;
>>> + }
>>>
>>> matched = min(startprfx, endprfx);
>>>
>>> dname.name = (u8 *)de + nameoff;
>>> - dname.len = ndirents == 1 ?
>>> - /* since the rest of the last page is 0 */
>>> - EROFS_BLKSIZ - nameoff
>>> - : le16_to_cpu(de[1].nameoff) - nameoff;
>>> + if (ndirents == 1)
>>> + dname.end = (u8 *)de + EROFS_BLKSIZ;
>>> + else
>>> + dname.end = (u8 *)de +
>>> + nameoff_from_disk(de[1].nameoff,
>>> + EROFS_BLKSIZ);
>>>
>>> /* string comparison without already matched prefix */
>>> diff = dirnamecmp(name, &dname, &matched);
>>> kunmap_atomic(de);
>>>
>>> if (unlikely(!diff)) {
>>> - *_diff = 0;
>>> - goto exact_out;
>>> + *_ndirents = 0;
>>> + goto out;
>>> } else if (diff > 0) {
>>> head = mid + 1;
>>> startprfx = matched;
>>> @@ -147,47 +152,53 @@ static struct page *find_target_block_classic(
>>> if (likely(!IS_ERR(candidate)))
>>> put_page(candidate);
>>> candidate = page;
>>> + *_ndirents = ndirents;
>>> } else {
>>> put_page(page);
>>>
>>> - if (unlikely(mid < 1)) /* fix "mid" overflow */
>>> - break;
>>> -
>>> back = mid - 1;
>>> endprfx = matched;
>>> }
>>> + continue;
>>> }
>>> +out: /* free if the candidate is valid */
>>> + if (!IS_ERR(candidate))
>>> + put_page(candidate);
>>> + return page;
>>> }
>>> - *_diff = 1;
>>> return candidate;
>>> }
>>>
>>> int erofs_namei(struct inode *dir,
>>> - struct qstr *name,
>>> - erofs_nid_t *nid, unsigned int *d_type)
>>> + struct qstr *name,
>>> + erofs_nid_t *nid, unsigned int *d_type)
>>> {
>>> - int diff;
>>> + int ndirents;
>>> struct page *page;
>>> - u8 *data;
>>> + void *data;
>>> struct erofs_dirent *de;
>>> + struct erofs_qstr qn;
>>>
>>> if (unlikely(!dir->i_size))
>>> return -ENOENT;
>>>
>>> - diff = 1;
>>> - page = find_target_block_classic(dir, name, &diff);
>>> + qn.name = name->name;
>>> + qn.end = name->name + name->len;
>>> +
>>> + ndirents = 0;
>>> + page = find_target_block_classic(dir, &qn, &ndirents);
>>>
>>> - if (unlikely(IS_ERR(page)))
>>> + if (IS_ERR(page))
>>> return PTR_ERR(page);
>>>
>>> data = kmap_atomic(page);
>>> /* the target page has been mapped */
>>> - de = likely(diff) ?
>>> - /* since the rest of the last page is 0 */
>>> - find_target_dirent(name, data, EROFS_BLKSIZ) :
>>> - (struct erofs_dirent *)data;
>>> + if (ndirents)
>>> + de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents);
>>
>> If we want to support different blksize during dirent lookup, how about
>> adding parameter for find_target_block_classic() as find_target_dirent()?
>
> Yes, you are right. However, find_target_dirent is now totally designed in page unit
> because of get_meta_page usage, but find_target_dirent doesn't.
>
> I think if we support sub-page feature later (by using iomap or someelse), it
> could be added later.

No problem, it's not a big deal. :)

Thanks,

>
> Thanks,
> Gao Xiang
>
>
>>
>> Thanks,
>>
>>> + else
>>> + de = (struct erofs_dirent *)data;
>>>
>>> - if (likely(!IS_ERR(de))) {
>>> + if (!IS_ERR(de)) {
>>> *nid = le64_to_cpu(de->nid);
>>> *d_type = de->file_type;
>>> }
>>>
>>
>
> .
>