Re: [PATCH v2] staging: erofs: keep corrupted fs from crashing kernel in erofs_namei()

From: Gao Xiang
Date: Fri Feb 15 2019 - 03:58:38 EST


Hi Chao,

On 2019/2/15 15:02, Chao Yu wrote:
> On 2019/2/1 20:16, Gao Xiang wrote:
>> As Al pointed out, "
>> ... and while we are at it, what happens to
>> unsigned int nameoff = le16_to_cpu(de[mid].nameoff);
>> unsigned int matched = min(startprfx, endprfx);
>>
>> struct qstr dname = QSTR_INIT(data + nameoff,
>> unlikely(mid >= ndirents - 1) ?
>> maxsize - nameoff :
>> le16_to_cpu(de[mid + 1].nameoff) - nameoff);
>>
>> /* string comparison without already matched prefix */
>> int ret = dirnamecmp(name, &dname, &matched);
>> if le16_to_cpu(de[...].nameoff) is not monotonically increasing? I.e.
>> what's to prevent e.g. (unsigned)-1 ending up in dname.len?
>>
>> Corrupted fs image shouldn't oops the kernel.. "
>>
>> Revisit the related lookup flow to address the issue.
>>
>> Fixes: d72d1ce60174 ("staging: erofs: add namei functions")
>> Cc: <stable@xxxxxxxxxxxxxxx> # 4.19+
>> Suggested-by: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
>> Signed-off-by: Gao Xiang <gaoxiang25@xxxxxxxxxx>
>> ---
>> [ It should be better get reviewed first and for linux-next... ]
>>
>> change log v2:
>> - fix the missing "kunmap_atomic" pointed out by Dan;
>> - minor cleanup;
>>
>> drivers/staging/erofs/namei.c | 187 ++++++++++++++++++++++--------------------
>> 1 file changed, 99 insertions(+), 88 deletions(-)
>>
>> diff --git a/drivers/staging/erofs/namei.c b/drivers/staging/erofs/namei.c
>> index 5596c52e246d..321c881d720f 100644
>> --- a/drivers/staging/erofs/namei.c
>> +++ b/drivers/staging/erofs/namei.c
>> @@ -15,74 +15,77 @@
>>
>> #include <trace/events/erofs.h>
>>
>> -/* based on the value of qn->len is accurate */
>> -static inline int dirnamecmp(struct qstr *qn,
>> - struct qstr *qd, unsigned int *matched)
>> +struct erofs_qstr {
>> + const unsigned char *name;
>> + const unsigned char *end;
>> +};
>> +
>> +/* based on the end of qn is accurate and it must have the trailing '\0' */
>> +static inline int dirnamecmp(const struct erofs_qstr *qn,
>> + const struct erofs_qstr *qd,
>> + unsigned int *matched)
>> {
>> - unsigned int i = *matched, len = min(qn->len, qd->len);
>> -loop:
>> - if (unlikely(i >= len)) {
>> - *matched = i;
>> - if (qn->len < qd->len) {
>> - /*
>> - * actually (qn->len == qd->len)
>> - * when qd->name[i] == '\0'
>> - */
>> - return qd->name[i] == '\0' ? 0 : -1;
>> + unsigned int i = *matched;
>> +
>> + /*
>> + * on-disk error, let's only BUG_ON in the debugging mode.
>> + * otherwise, it will return 1 to just skip the invalid name
>> + * and go on (in consideration of the lookup performance).
>> + */
>> + DBG_BUGON(qd->name > qd->end);
>
> qd->name == qd->end is not allowed as well?

Make sense, it is only used for debugging mode, let me send another patch later...

>
> So will it be better to return directly here?
>
> if (unlikely(qd->name >= qd->end)) {
> DBG_BUGON(1);
> return 1;
> }

Corrupted image is rare happened in normal systems, I tend to only use
DBG_BUGON here, and qn->name[i] is of course not '\0', so it will return 1;

>
>> +
>> + /* qd could not have trailing '\0' */
>> + /* However it is absolutely safe if < qd->end */
>> + while (qd->name + i < qd->end && qd->name[i] != '\0') {
>> + if (qn->name[i] != qd->name[i]) {
>> + *matched = i;
>> + return qn->name[i] > qd->name[i] ? 1 : -1;
>> }
>> - return (qn->len > qd->len);
>> + ++i;
>> }
>> -
>> - if (qn->name[i] != qd->name[i]) {
>> - *matched = i;
>> - return qn->name[i] > qd->name[i] ? 1 : -1;
>> - }
>> -
>> - ++i;
>> - goto loop;
>> + *matched = i;
>> + /* See comments in __d_alloc on the terminating NUL character */
>> + return qn->name[i] == '\0' ? 0 : 1;
>> }
>>
>> -static struct erofs_dirent *find_target_dirent(
>> - struct qstr *name,
>> - u8 *data, int maxsize)
>> +#define nameoff_from_disk(off, sz) (le16_to_cpu(off) & ((sz) - 1))
>> +
>> +static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name,
>> + u8 *data,
>> + unsigned int dirblksize,
>> + const int ndirents)
>> {
>> - unsigned int ndirents, head, back;
>> + int head, back;
>> unsigned int startprfx, endprfx;
>> struct erofs_dirent *const de = (struct erofs_dirent *)data;
>>
>> - /* make sure that maxsize is valid */
>> - BUG_ON(maxsize < sizeof(struct erofs_dirent));
>> -
>> - ndirents = le16_to_cpu(de->nameoff) / sizeof(*de);
>> -
>> - /* corrupted dir (may be unnecessary...) */
>> - BUG_ON(!ndirents);
>> -
>> - head = 0;
>> + /* since the 1st dirent has been evaluated previously */
>> + head = 1;
>> back = ndirents - 1;
>> startprfx = endprfx = 0;
>>
>> while (head <= back) {
>> - unsigned int mid = head + (back - head) / 2;
>> - unsigned int nameoff = le16_to_cpu(de[mid].nameoff);
>> + const int mid = head + (back - head) / 2;
>> + const int nameoff = nameoff_from_disk(de[mid].nameoff,
>> + dirblksize);
>> unsigned int matched = min(startprfx, endprfx);
>> -
>> - struct qstr dname = QSTR_INIT(data + nameoff,
>> - unlikely(mid >= ndirents - 1) ?
>> - maxsize - nameoff :
>> - le16_to_cpu(de[mid + 1].nameoff) - nameoff);
>> + struct erofs_qstr dname = {
>> + .name = data + nameoff,
>> + .end = unlikely(mid >= ndirents - 1) ?
>> + data + dirblksize :
>> + data + nameoff_from_disk(de[mid + 1].nameoff,
>> + dirblksize)
>> + };
>>
>> /* string comparison without already matched prefix */
>> int ret = dirnamecmp(name, &dname, &matched);
>>
>> - if (unlikely(!ret))
>> + if (unlikely(!ret)) {
>> return de + mid;
>> - else if (ret > 0) {
>> + } else if (ret > 0) {
>> head = mid + 1;
>> startprfx = matched;
>> - } else if (unlikely(mid < 1)) /* fix "mid" overflow */
>> - break;
>> - else {
>> + } else {
>> back = mid - 1;
>> endprfx = matched;
>> }
>> @@ -91,12 +94,12 @@ static struct erofs_dirent *find_target_dirent(
>> return ERR_PTR(-ENOENT);
>> }
>>
>> -static struct page *find_target_block_classic(
>> - struct inode *dir,
>> - struct qstr *name, int *_diff)
>> +static struct page *find_target_block_classic(struct inode *dir,
>> + struct erofs_qstr *name,
>> + int *_ndirents)
>> {
>> unsigned int startprfx, endprfx;
>> - unsigned int head, back;
>> + int head, back;
>> struct address_space *const mapping = dir->i_mapping;
>> struct page *candidate = ERR_PTR(-ENOENT);
>>
>> @@ -105,41 +108,43 @@ static struct page *find_target_block_classic(
>> back = inode_datablocks(dir) - 1;
>>
>> while (head <= back) {
>> - unsigned int mid = head + (back - head) / 2;
>> + const int mid = head + (back - head) / 2;
>> struct page *page = read_mapping_page(mapping, mid, NULL);
>>
>> - if (IS_ERR(page)) {
>> -exact_out:
>> - if (!IS_ERR(candidate)) /* valid candidate */
>> - put_page(candidate);
>> - return page;
>> - } else {
>> - int diff;
>> - unsigned int ndirents, matched;
>> - struct qstr dname;
>> + if (!IS_ERR(page)) {
>> struct erofs_dirent *de = kmap_atomic(page);
>> - unsigned int nameoff = le16_to_cpu(de->nameoff);
>> -
>> - ndirents = nameoff / sizeof(*de);
>> + const int nameoff = nameoff_from_disk(de->nameoff,
>> + EROFS_BLKSIZ);
>> + const int ndirents = nameoff / sizeof(*de);
>> + int diff;
>> + unsigned int matched;
>> + struct erofs_qstr dname;
>>
>> - /* corrupted dir (should have one entry at least) */
>> - BUG_ON(!ndirents || nameoff > PAGE_SIZE);
>> + if (unlikely(!ndirents)) {
>> + DBG_BUGON(1);
>> + kunmap_atomic(de);
>> + put_page(page);
>> + page = ERR_PTR(-EIO);
>> + goto out;
>> + }
>>
>> matched = min(startprfx, endprfx);
>>
>> dname.name = (u8 *)de + nameoff;
>> - dname.len = ndirents == 1 ?
>> - /* since the rest of the last page is 0 */
>> - EROFS_BLKSIZ - nameoff
>> - : le16_to_cpu(de[1].nameoff) - nameoff;
>> + if (ndirents == 1)
>> + dname.end = (u8 *)de + EROFS_BLKSIZ;
>> + else
>> + dname.end = (u8 *)de +
>> + nameoff_from_disk(de[1].nameoff,
>> + EROFS_BLKSIZ);
>>
>> /* string comparison without already matched prefix */
>> diff = dirnamecmp(name, &dname, &matched);
>> kunmap_atomic(de);
>>
>> if (unlikely(!diff)) {
>> - *_diff = 0;
>> - goto exact_out;
>> + *_ndirents = 0;
>> + goto out;
>> } else if (diff > 0) {
>> head = mid + 1;
>> startprfx = matched;
>> @@ -147,47 +152,53 @@ static struct page *find_target_block_classic(
>> if (likely(!IS_ERR(candidate)))
>> put_page(candidate);
>> candidate = page;
>> + *_ndirents = ndirents;
>> } else {
>> put_page(page);
>>
>> - if (unlikely(mid < 1)) /* fix "mid" overflow */
>> - break;
>> -
>> back = mid - 1;
>> endprfx = matched;
>> }
>> + continue;
>> }
>> +out: /* free if the candidate is valid */
>> + if (!IS_ERR(candidate))
>> + put_page(candidate);
>> + return page;
>> }
>> - *_diff = 1;
>> return candidate;
>> }
>>
>> int erofs_namei(struct inode *dir,
>> - struct qstr *name,
>> - erofs_nid_t *nid, unsigned int *d_type)
>> + struct qstr *name,
>> + erofs_nid_t *nid, unsigned int *d_type)
>> {
>> - int diff;
>> + int ndirents;
>> struct page *page;
>> - u8 *data;
>> + void *data;
>> struct erofs_dirent *de;
>> + struct erofs_qstr qn;
>>
>> if (unlikely(!dir->i_size))
>> return -ENOENT;
>>
>> - diff = 1;
>> - page = find_target_block_classic(dir, name, &diff);
>> + qn.name = name->name;
>> + qn.end = name->name + name->len;
>> +
>> + ndirents = 0;
>> + page = find_target_block_classic(dir, &qn, &ndirents);
>>
>> - if (unlikely(IS_ERR(page)))
>> + if (IS_ERR(page))
>> return PTR_ERR(page);
>>
>> data = kmap_atomic(page);
>> /* the target page has been mapped */
>> - de = likely(diff) ?
>> - /* since the rest of the last page is 0 */
>> - find_target_dirent(name, data, EROFS_BLKSIZ) :
>> - (struct erofs_dirent *)data;
>> + if (ndirents)
>> + de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents);
>
> If we want to support different blksize during dirent lookup, how about
> adding parameter for find_target_block_classic() as find_target_dirent()?

Yes, you are right. However, find_target_dirent is now totally designed in page unit
because of get_meta_page usage, but find_target_dirent doesn't.

I think if we support sub-page feature later (by using iomap or someelse), it
could be added later.

Thanks,
Gao Xiang


>
> Thanks,
>
>> + else
>> + de = (struct erofs_dirent *)data;
>>
>> - if (likely(!IS_ERR(de))) {
>> + if (!IS_ERR(de)) {
>> *nid = le64_to_cpu(de->nid);
>> *d_type = de->file_type;
>> }
>>
>