@@ -558,6 +558,7 @@ enum idle_type
#define SD_WAKE_AFFINE 32 /* Wake task to waking CPU */
#define SD_WAKE_BALANCE 64 /* Perform balancing at task wakeup */
#define SD_SHARE_CPUPOWER 128 /* Domain members share cpu power */
+#define SD_TRACKS_CPUSET 4096 /* Spam matches non-exclusive cpuset */
I'd just use the next bit, and we should convert these to hex as
well I guess... I don't know what I was thinking ;)
struct sched_group {
struct sched_group *next; /* Must be a circular list */
@@ -630,6 +631,9 @@ struct sched_domain {
extern void partition_sched_domains(cpumask_t *partition1,
cpumask_t *partition2);
+extern void add_sched_domain(const cpumask_t *cpu_map);
+extern void destroy_sched_domain(const cpumask_t *cpu_map);
Not a big deal, but I'd probably be happier with a single hook from
the cpusets code, which does the right thing depending on whether it
is exclusive or not.
Hard to know which way around the layering should go, but I think that
it is simply a hint to the balancer code to do something nice, and as
such we should leave it completely to the scheduler.
+#ifdef CONFIG_CPUSETS
CPUSETS only? Or do we want to try to help sched_setaffinity users as
well?
+struct sched_domain_bundle {
+ struct sched_domain sd_cpuset;
+ struct sched_group **sg_cpuset_nodes;
+ int use_count;
+};
Can you get away without using the bundle? Or does it make things easier?
You could add a new use_count to struct sched_domain if you'd like.... does
it make the setup/teardown too difficult?
+#define SCHED_DOMAIN_CPUSET_MAX 8 /* power of 2 */
+static DEFINE_PER_CPU(long, sd_cpusets_used) = { 1UL <<SCHED_DOMAIN_CPUSET_MAX};
+static DEFINE_PER_CPU(struct sched_domain_bundle[SCHED_DOMAIN_CPUSET_MAX],
+ sd_cpusets_bundle);
Do we need a limit of 8? If you're worried about memory, I guess there are
lots of ways to DOS the system... if you're worried about scheduler balancing
overhead, we could do a seperate traversal of these guys at a reduced
interval (I guess that still leaves some places needing help, though)
+static int find_existing_sched_domain(int cpu, const cpumask_t *cpu_map)
Probably some way to distinguish these guys as operating on your "bundle" stack
would make it a bit clearer?
+ /* tweak sched_domain params based upon domain size */
+ *sd = SD_NODE_INIT;
+ sd->flags |= SD_TRACKS_CPUSET;
+ sd->max_interval = 8*(min(new_sd_span, 32));
+ sd->span = *cpu_map;
+ cpu_set(cpu, new_sd_cpu_map);
Can we just have a new SD_xxx_INIT for these?
Index: linux/kernel/cpuset.c
===================================================================
--- linux.orig/kernel/cpuset.c 2006-07-05 15:51:38.873939805 -0700
+++ linux/kernel/cpuset.c 2006-07-05 16:01:14.892039725 -0700
@@ -828,8 +828,12 @@ static int update_cpumask(struct cpuset mutex_lock(&callback_mutex);
cs->cpus_allowed = trialcs.cpus_allowed;
mutex_unlock(&callback_mutex);
- if (is_cpu_exclusive(cs) && !cpus_unchanged)
- update_cpu_domains(cs);
+ if (!cpus_unchanged) {
+ if (is_cpu_exclusive(cs))
+ update_cpu_domains(cs);
+ else
+ add_sched_domain(&cs->cpus_allowed);
+ }
return 0;
}
@@ -1934,6 +1938,8 @@ static int cpuset_rmdir(struct inode *un
set_bit(CS_REMOVED, &cs->flags);
if (is_cpu_exclusive(cs))
update_cpu_domains(cs);
+ else
+ destroy_sched_domain(&cs->cpus_allowed);
list_del(&cs->sibling); /* delete my sibling from parent->children */
spin_lock(&cs->dentry->d_lock);
d = dget(cs->dentry);
So you're just doing the inside. What about the complement? That
way you'd get a nice symmetric allocation on all cpus for each
cpuset... but that probably would require making the limit greater
than 8.
And it would be probably required for good sched_setaffinity
balancing too.