Re: [PATCH v3 07/11] mm/memory_hotplug: Create memory block devices after arch_add_memory()

From: David Hildenbrand
Date: Wed Jun 05 2019 - 07:03:06 EST


On 05.06.19 10:58, David Hildenbrand wrote:
>>> /*
>>> * For now, we have a linear search to go find the appropriate
>>> * memory_block corresponding to a particular phys_index. If
>>> @@ -658,6 +670,11 @@ static int init_memory_block(struct memory_block **memory, int block_id,
>>> unsigned long start_pfn;
>>> int ret = 0;
>>>
>>> + mem = find_memory_block_by_id(block_id, NULL);
>>> + if (mem) {
>>> + put_device(&mem->dev);
>>> + return -EEXIST;
>>> + }
>>
>> find_memory_block_by_id() is not that close to the main idea in this patch.
>> Would it be better to split this part?
>
> I played with that but didn't like the temporary results (e.g. having to
> export find_memory_block_by_id()). I'll stick to this for now.
>
>>
>>> mem = kzalloc(sizeof(*mem), GFP_KERNEL);
>>> if (!mem)
>>> return -ENOMEM;
>>> @@ -699,44 +716,53 @@ static int add_memory_block(int base_section_nr)
>>> return 0;
>>> }
>>>
>>> +static void unregister_memory(struct memory_block *memory)
>>> +{
>>> + if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys))
>>> + return;
>>> +
>>> + /* drop the ref. we got via find_memory_block() */
>>> + put_device(&memory->dev);
>>> + device_unregister(&memory->dev);
>>> +}
>>> +
>>> /*
>>> - * need an interface for the VM to add new memory regions,
>>> - * but without onlining it.
>>> + * Create memory block devices for the given memory area. Start and size
>>> + * have to be aligned to memory block granularity. Memory block devices
>>> + * will be initialized as offline.
>>> */
>>> -int hotplug_memory_register(int nid, struct mem_section *section)
>>> +int create_memory_block_devices(unsigned long start, unsigned long size)
>>> {
>>> - int block_id = base_memory_block_id(__section_nr(section));
>>> - int ret = 0;
>>> + const int start_block_id = pfn_to_block_id(PFN_DOWN(start));
>>> + int end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
>>> struct memory_block *mem;
>>> + unsigned long block_id;
>>> + int ret = 0;
>>>
>>> - mutex_lock(&mem_sysfs_mutex);
>>> + if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) ||
>>> + !IS_ALIGNED(size, memory_block_size_bytes())))
>>> + return -EINVAL;
>>>
>>> - mem = find_memory_block(section);
>>> - if (mem) {
>>> - mem->section_count++;
>>> - put_device(&mem->dev);
>>> - } else {
>>> + mutex_lock(&mem_sysfs_mutex);
>>> + for (block_id = start_block_id; block_id != end_block_id; block_id++) {
>>> ret = init_memory_block(&mem, block_id, MEM_OFFLINE);
>>> if (ret)
>>> - goto out;
>>> - mem->section_count++;
>>> + break;
>>> + mem->section_count = sections_per_block;
>>> + }
>>> + if (ret) {
>>> + end_block_id = block_id;
>>> + for (block_id = start_block_id; block_id != end_block_id;
>>> + block_id++) {
>>> + mem = find_memory_block_by_id(block_id, NULL);
>>> + mem->section_count = 0;
>>> + unregister_memory(mem);
>>> + }
>>> }
>>
>> Would it be better to do this in reverse order?
>>
>> And unregister_memory() would free mem, so it is still necessary to set
>> section_count to 0?
>
> 1. I kept the existing behavior (setting it to 0) for now. I am planning
> to eventually remove the section count completely (it could be
> beneficial to detect removing of partially populated memory blocks).

Correction: We already use it to block offlining of partially populated
memory blocks \o/

>
> 2. Reverse order: We would have to start with "block_id - 1", I don't
> like that better.
>
> Thanks for having a look!
>


--

Thanks,

David / dhildenb