From: James Cleverdon Forthcoming IBM boxes will be using Nocona and/or Opteron chips in clustered mode to get beyond 8 CPUs. In fact, there are plans to try for 128 CPUs when the Tulsa chip comes out. Thus, there are a fair number of signed vs. unsigned changes in the patch. Thanks to the HPET timer and some HW changes, I've been able to remove the MPS/ACPI string comparisons from the detection code. Instead, it scans bios_cpu_apicid and uses simple heuristics to select the correct IRQ delivery mode. No need for a config option. Hurrah! Likewise, I've been able to avoid the preprocessor tricks that the i386 sub-arch needed to build with one or more sub-arches. DESC x86-64-clustered-apic-support fix EDESC From: James Cleverdon Rafael has managed to dereference the genapic pointer before clustered_apic_check() was called to initialize it. I deliberately left genapic NULL to catch this. OK, between the PCI bridge blacklisted in check_ioapic() turning off APIC support, the bad checksum found in acpi_table_init(), and the v1.1 MPS table, the stray code path is in there somewhere.... A quick and dirty patch to get folks working again, would be to initialize genapic to &apic_flat in genapic.c: DESC x86-64-clustered-apic-support-fix fix EDESC arch/x86_64/kernel/genapic.c:23: warning: type defaults to `int' in declaration of `EXPORT_SYMBOL' arch/x86_64/kernel/genapic.c:23: warning: parameter names (without types) in function declaration arch/x86_64/kernel/genapic.c:23: warning: data definition has no type or storage class arch/x86_64/kernel/genapic_cluster.c:61: warning: `cluster_cpu_present_to_apicid' defined but not used DESC x86-64-clustered-apic-support fix EDESC From: James Cleverdon Raphael's crash on Saturday (2004-10-02) was caused by the lack of a call to clustered_apic_check() in the MPS parser. Since the function was a no-op until my patch, this is not surprising. You can either revert my quick kludge that initialized the genapic pointer to apic_flat, or leave it in because it is right for 99.999+% of systems. Signed-off-by: James Cleverdon Signed-off-by: Andrew Morton --- 25-akpm/arch/i386/kernel/acpi/boot.c | 2 25-akpm/arch/x86_64/kernel/Makefile | 3 25-akpm/arch/x86_64/kernel/apic.c | 22 ---- 25-akpm/arch/x86_64/kernel/genapic.c | 82 +++++++++++++++++ 25-akpm/arch/x86_64/kernel/genapic_cluster.c | 127 +++++++++++++++++++++++++++ 25-akpm/arch/x86_64/kernel/genapic_flat.c | 119 +++++++++++++++++++++++++ 25-akpm/arch/x86_64/kernel/io_apic.c | 61 ++---------- 25-akpm/arch/x86_64/kernel/mpparse.c | 1 25-akpm/arch/x86_64/kernel/smp.c | 100 --------------------- 25-akpm/arch/x86_64/kernel/smpboot.c | 9 - 25-akpm/include/asm-x86_64/apic.h | 2 25-akpm/include/asm-x86_64/apicdef.h | 33 ++++--- 25-akpm/include/asm-x86_64/genapic.h | 34 +++++++ 25-akpm/include/asm-x86_64/ipi.h | 113 ++++++++++++++++++++++++ 25-akpm/include/asm-x86_64/irq.h | 2 25-akpm/include/asm-x86_64/mach_apic.h | 28 +++++ 25-akpm/include/asm-x86_64/mpspec.h | 4 25-akpm/include/asm-x86_64/msi.h | 1 25-akpm/include/asm-x86_64/smp.h | 44 ++++----- 19 files changed, 573 insertions(+), 214 deletions(-) diff -puN arch/i386/kernel/acpi/boot.c~x86-64-clustered-apic-support arch/i386/kernel/acpi/boot.c --- 25/arch/i386/kernel/acpi/boot.c~x86-64-clustered-apic-support 2004-10-21 14:54:14.517716464 -0700 +++ 25-akpm/arch/i386/kernel/acpi/boot.c 2004-10-21 14:54:14.538713272 -0700 @@ -40,7 +40,7 @@ #ifdef CONFIG_X86_64 static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) { } -static inline void clustered_apic_check(void) { } +extern void __init clustered_apic_check(void); static inline int ioapic_setup_disabled(void) { return 0; } #include diff -puN arch/x86_64/kernel/apic.c~x86-64-clustered-apic-support arch/x86_64/kernel/apic.c --- 25/arch/x86_64/kernel/apic.c~x86-64-clustered-apic-support 2004-10-21 14:54:14.519716160 -0700 +++ 25-akpm/arch/x86_64/kernel/apic.c 2004-10-21 14:54:14.539713120 -0700 @@ -32,6 +32,7 @@ #include #include #include +#include int apic_verbosity; @@ -300,8 +301,7 @@ void __init setup_local_APIC (void) * Double-check whether this APIC is really registered. * This is meaningless in clustered apic mode, so we skip it. */ - if (!clustered_apic_mode && - !physid_isset(GET_APIC_ID(apic_read(APIC_ID)), phys_cpu_present_map)) + if (!apic_id_registered()) BUG(); /* @@ -309,23 +309,7 @@ void __init setup_local_APIC (void) * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel * document number 292116). So here it goes... */ - - if (!clustered_apic_mode) { - /* - * In clustered apic mode, the firmware does this for us - * Put the APIC into flat delivery mode. - * Must be "all ones" explicitly for 82489DX. - */ - apic_write_around(APIC_DFR, 0xffffffff); - - /* - * Set up the logical destination ID. - */ - value = apic_read(APIC_LDR); - value &= ~APIC_LDR_MASK; - value |= (1<<(smp_processor_id()+24)); - apic_write_around(APIC_LDR, value); - } + init_apic_ldr(); /* * Set Task Priority to 'accept all'. We never change this diff -puN /dev/null arch/x86_64/kernel/genapic.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25-akpm/arch/x86_64/kernel/genapic.c 2004-10-21 14:54:14.540712968 -0700 @@ -0,0 +1,82 @@ +/* + * Copyright 2004 James Cleverdon, IBM. + * Subject to the GNU Public License, v.2 + * + * Generic APIC sub-arch probe layer. + * + * Hacked for x86-64 by James Cleverdon from i386 architecture code by + * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and + * James Cleverdon. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* which logical CPU number maps to which CPU (physical APIC ID) */ +u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; +EXPORT_SYMBOL(x86_cpu_to_apicid); +u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; + +extern struct genapic apic_cluster; +extern struct genapic apic_flat; + +struct genapic *genapic = &apic_flat; + + +/* + * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. + */ +void __init clustered_apic_check(void) +{ + long i; + u8 clusters, max_cluster; + u8 id; + u8 cluster_cnt[NUM_APIC_CLUSTERS]; + + memset(cluster_cnt, 0, sizeof(cluster_cnt)); + + for (i = 0; i < NR_CPUS; i++) { + id = bios_cpu_apicid[i]; + if (id != BAD_APICID) + cluster_cnt[APIC_CLUSTERID(id)]++; + } + + clusters = 0; + max_cluster = 0; + for (i = 0; i < NUM_APIC_CLUSTERS; i++) { + if (cluster_cnt[i] > 0) { + ++clusters; + if (cluster_cnt[i] > max_cluster) + max_cluster = cluster_cnt[i]; + } + } + + /* + * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode, + * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical + * else physical mode. + * (We don't use lowest priority delivery + HW APIC IRQ steering, so + * can ignore the clustered logical case and go straight to physical.) + */ + if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) + genapic = &apic_flat; + else + genapic = &apic_cluster; + + printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); +} + +/* Same for both flat and clustered. */ + +void send_IPI_self(int vector) +{ + __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); +} diff -puN /dev/null arch/x86_64/kernel/genapic_cluster.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25-akpm/arch/x86_64/kernel/genapic_cluster.c 2004-10-21 14:54:14.541712816 -0700 @@ -0,0 +1,127 @@ +/* + * Copyright 2004 James Cleverdon, IBM. + * Subject to the GNU Public License, v.2 + * + * Clustered APIC subarch code. Up to 255 CPUs, physical delivery. + * (A more realistic maximum is around 230 CPUs.) + * + * Hacked for x86-64 by James Cleverdon from i386 architecture code by + * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and + * James Cleverdon. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LDR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +static void cluster_init_apic_ldr(void) +{ + unsigned long val, id; + long i, count; + u8 lid; + u8 my_id = hard_smp_processor_id(); + u8 my_cluster = APIC_CLUSTER(my_id); + + /* Create logical APIC IDs by counting CPUs already in cluster. */ + for (count = 0, i = NR_CPUS; --i >= 0; ) { + lid = x86_cpu_to_log_apicid[i]; + if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster) + ++count; + } + /* + * We only have a 4 wide bitmap in cluster mode. There's no way + * to get above 60 CPUs and still give each one it's own bit. + * But, we're using physical IRQ delivery, so we don't care. + * Use bit 3 for the 4th through Nth CPU in each cluster. + */ + if (count >= XAPIC_DEST_CPUS_SHIFT) + count = 3; + id = my_cluster | (1UL << count); + x86_cpu_to_log_apicid[smp_processor_id()] = id; + apic_write_around(APIC_DFR, APIC_DFR_CLUSTER); + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + val |= SET_APIC_LOGICAL_ID(id); + apic_write_around(APIC_LDR, val); +} + +static int cluster_cpu_present_to_apicid(int mps_cpu) +{ + if ((unsigned)mps_cpu < NR_CPUS) + return (int)bios_cpu_apicid[mps_cpu]; + else + return BAD_APICID; +} + +/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ + +static cpumask_t cluster_target_cpus(void) +{ + return cpumask_of_cpu(0); +} + +static void cluster_send_IPI_mask(cpumask_t mask, int vector) +{ + send_IPI_mask_sequence(mask, vector); +} + +static void cluster_send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + cluster_send_IPI_mask(mask, vector); +} + +static void cluster_send_IPI_all(int vector) +{ + cluster_send_IPI_mask(cpu_online_map, vector); +} + +static int cluster_apic_id_registered(void) +{ + return 1; +} + +static unsigned int cluster_cpu_mask_to_apicid(cpumask_t cpumask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + cpu = first_cpu(cpumask); + if ((unsigned)cpu < NR_CPUS) + return x86_cpu_to_apicid[cpu]; + else + return BAD_APICID; +} + + +struct genapic apic_cluster = { + .name = "clustered", + .int_delivery_mode = dest_Fixed, + .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + .int_delivery_dest = APIC_DEST_PHYSICAL | APIC_DM_FIXED, + .target_cpus = cluster_target_cpus, + .apic_id_registered = cluster_apic_id_registered, + .init_apic_ldr = cluster_init_apic_ldr, + .send_IPI_all = cluster_send_IPI_all, + .send_IPI_allbutself = cluster_send_IPI_allbutself, + .send_IPI_mask = cluster_send_IPI_mask, + .cpu_mask_to_apicid = cluster_cpu_mask_to_apicid, +}; diff -puN /dev/null arch/x86_64/kernel/genapic_flat.c --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25-akpm/arch/x86_64/kernel/genapic_flat.c 2004-10-21 14:54:14.541712816 -0700 @@ -0,0 +1,119 @@ +/* + * Copyright 2004 James Cleverdon, IBM. + * Subject to the GNU Public License, v.2 + * + * Flat APIC subarch code. Maximum 8 CPUs, logical delivery. + * + * Hacked for x86-64 by James Cleverdon from i386 architecture code by + * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and + * James Cleverdon. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +static cpumask_t flat_target_cpus(void) +{ + return cpu_online_map; +} + +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LDR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +static void flat_init_apic_ldr(void) +{ + unsigned long val; + unsigned long num, id; + + num = smp_processor_id(); + id = 1UL << num; + x86_cpu_to_log_apicid[num] = id; + apic_write_around(APIC_DFR, APIC_DFR_FLAT); + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + val |= SET_APIC_LOGICAL_ID(id); + apic_write_around(APIC_LDR, val); +} + +static void flat_send_IPI_allbutself(int vector) +{ + /* + * if there are no other CPUs in the system then + * we get an APIC send error if we try to broadcast. + * thus we have to avoid sending IPIs in this case. + */ + if (num_online_cpus() > 1) + __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_LOGICAL); +} + +static void flat_send_IPI_all(int vector) +{ + __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); +} + +static void flat_send_IPI_mask(cpumask_t cpumask, int vector) +{ + unsigned long mask = cpus_addr(cpumask)[0]; + unsigned long cfg; + unsigned long flags; + + local_save_flags(flags); + local_irq_disable(); + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + /* + * prepare target chip field + */ + cfg = __prepare_ICR2(mask); + apic_write_around(APIC_ICR2, cfg); + + /* + * program the ICR + */ + cfg = __prepare_ICR(0, vector, APIC_DEST_LOGICAL); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write_around(APIC_ICR, cfg); + local_irq_restore(flags); +} + +static int flat_apic_id_registered(void) +{ + return physid_isset(GET_APIC_ID(apic_read(APIC_ID)), phys_cpu_present_map); +} + +static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) +{ + return cpus_addr(cpumask)[0] & APIC_ALL_CPUS; +} + + +struct genapic apic_flat = { + .name = "flat", + .int_delivery_mode = dest_LowestPrio, + .int_dest_mode = (APIC_DEST_LOGICAL != 0), + .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST, + .target_cpus = flat_target_cpus, + .apic_id_registered = flat_apic_id_registered, + .init_apic_ldr = flat_init_apic_ldr, + .send_IPI_all = flat_send_IPI_all, + .send_IPI_allbutself = flat_send_IPI_allbutself, + .send_IPI_mask = flat_send_IPI_mask, + .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, +}; diff -puN arch/x86_64/kernel/io_apic.c~x86-64-clustered-apic-support arch/x86_64/kernel/io_apic.c --- 25/arch/x86_64/kernel/io_apic.c~x86-64-clustered-apic-support 2004-10-21 14:54:14.521715856 -0700 +++ 25-akpm/arch/x86_64/kernel/io_apic.c 2004-10-21 14:54:14.544712360 -0700 @@ -36,6 +36,7 @@ #include #include #include +#include #define __apicdebuginit __init @@ -726,8 +727,8 @@ void __init setup_IO_APIC_irqs(void) */ memset(&entry,0,sizeof(entry)); - entry.delivery_mode = dest_LowestPrio; - entry.dest_mode = INT_DELIVERY_MODE; + entry.delivery_mode = INT_DELIVERY_MODE; + entry.dest_mode = INT_DEST_MODE; entry.mask = 0; /* enable IRQ */ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); @@ -795,10 +796,10 @@ void __init setup_ExtINT_IRQ0_pin(unsign * We use logical delivery to get the timer IRQ * to the first CPU. */ - entry.dest_mode = INT_DELIVERY_MODE; + entry.dest_mode = INT_DEST_MODE; entry.mask = 0; /* unmask IRQ now */ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); - entry.delivery_mode = dest_LowestPrio; + entry.delivery_mode = INT_DELIVERY_MODE; entry.polarity = 0; entry.trigger = 0; entry.vector = vector; @@ -1146,7 +1147,6 @@ void disable_IO_APIC(void) static void __init setup_ioapic_ids_from_mpc (void) { union IO_APIC_reg_00 reg_00; - physid_mask_t phys_id_present_map = phys_cpu_present_map; int apic; int i; unsigned char old_id; @@ -1172,28 +1172,7 @@ static void __init setup_ioapic_ids_from mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; } - /* - * Sanity check, is the ID really free? Every APIC in a - * system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (physid_isset(mp_ioapics[apic].mpc_apicid, phys_id_present_map)) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].mpc_apicid); - for (i = 0; i < 0xf; i++) - if (!physid_isset(i, phys_id_present_map)) - break; - if (i >= 0xf) - panic("Max APIC ID exceeded!\n"); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - i); - physid_set(i, phys_id_present_map); - mp_ioapics[apic].mpc_apicid = i; - } else { - printk(KERN_INFO - "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid); - physid_set(mp_ioapics[apic].mpc_apicid, phys_id_present_map); - } + printk(KERN_INFO "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid); /* @@ -1348,9 +1327,9 @@ static void set_ioapic_affinity_irq(unsi dest = cpu_mask_to_apicid(mask); /* - * Only the first 8 bits are valid. + * Only the high 8 bits are valid. */ - dest = dest << 24; + dest = SET_APIC_LOGICAL_ID(dest); spin_lock_irqsave(&ioapic_lock, flags); __DO_ACTION(1, = dest, ) @@ -1832,7 +1811,7 @@ device_initcall(ioapic_init_sysfs); #ifdef CONFIG_ACPI_BOOT -#define IO_APIC_MAX_ID 15 +#define IO_APIC_MAX_ID 0xFE int __init io_apic_get_unique_id (int ioapic, int apic_id) { @@ -1949,8 +1928,8 @@ int io_apic_set_pci_routing (int ioapic, memset(&entry,0,sizeof(entry)); - entry.delivery_mode = dest_LowestPrio; - entry.dest_mode = INT_DELIVERY_MODE; + entry.delivery_mode = INT_DELIVERY_MODE; + entry.dest_mode = INT_DEST_MODE; entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); entry.trigger = edge_level; entry.polarity = active_high_low; @@ -1984,24 +1963,6 @@ int io_apic_set_pci_routing (int ioapic, #endif /*CONFIG_ACPI_BOOT*/ -#ifndef CONFIG_SMP -void send_IPI_self(int vector) -{ - unsigned int cfg; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write_around(APIC_ICR, cfg); -} -#endif - /* * This function currently is only a helper for the i386 smp boot process where diff -puN arch/x86_64/kernel/Makefile~x86-64-clustered-apic-support arch/x86_64/kernel/Makefile --- 25/arch/x86_64/kernel/Makefile~x86-64-clustered-apic-support 2004-10-21 14:54:14.522715704 -0700 +++ 25-akpm/arch/x86_64/kernel/Makefile 2004-10-21 14:54:14.544712360 -0700 @@ -17,7 +17,8 @@ obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o -obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o +obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o \ + genapic.o genapic_cluster.o genapic_flat.o obj-$(CONFIG_PM) += suspend.o obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o obj-$(CONFIG_CPU_FREQ) += cpufreq/ diff -puN arch/x86_64/kernel/smpboot.c~x86-64-clustered-apic-support arch/x86_64/kernel/smpboot.c --- 25/arch/x86_64/kernel/smpboot.c~x86-64-clustered-apic-support 2004-10-21 14:54:14.524715400 -0700 +++ 25-akpm/arch/x86_64/kernel/smpboot.c 2004-10-21 14:54:14.545712208 -0700 @@ -56,15 +56,12 @@ /* Number of siblings per CPU package */ int smp_num_siblings = 1; -char phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */ +/* Package ID of each logical CPU */ +u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; /* Bitmask of currently online CPUs */ cpumask_t cpu_online_map; -/* which logical CPU number maps to which CPU (physical APIC ID) */ -volatile char x86_cpu_to_apicid[NR_CPUS]; -EXPORT_SYMBOL(x86_cpu_to_apicid); - static cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; static cpumask_t smp_commenced_mask; @@ -658,6 +655,8 @@ static void __init do_boot_cpu (int apic cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ cpucount--; + x86_cpu_to_apicid[cpu] = BAD_APICID; + x86_cpu_to_log_apicid[cpu] = BAD_APICID; } } diff -puN arch/x86_64/kernel/smp.c~x86-64-clustered-apic-support arch/x86_64/kernel/smp.c --- 25/arch/x86_64/kernel/smp.c~x86-64-clustered-apic-support 2004-10-21 14:54:14.525715248 -0700 +++ 25-akpm/arch/x86_64/kernel/smp.c 2004-10-21 14:54:14.546712056 -0700 @@ -24,105 +24,7 @@ #include #include #include - -/* - * the following functions deal with sending IPIs between CPUs. - * - * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. - */ - -static inline unsigned int __prepare_ICR (unsigned int shortcut, int vector) -{ - unsigned int icr = APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL; - if (vector == KDB_VECTOR) - icr = (icr & (~APIC_VECTOR_MASK)) | APIC_DM_NMI; - return icr; -} - -static inline int __prepare_ICR2 (unsigned int mask) -{ - return SET_APIC_DEST_FIELD(mask); -} - -static inline void __send_IPI_shortcut(unsigned int shortcut, int vector) -{ - /* - * Subtle. In the case of the 'never do double writes' workaround - * we have to lock out interrupts to be safe. As we don't care - * of the value read we use an atomic rmw access to avoid costly - * cli/sti. Otherwise we use an even cheaper single atomic write - * to the APIC. - */ - unsigned int cfg; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - /* - * No need to touch the target chip field - */ - cfg = __prepare_ICR(shortcut, vector); - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write_around(APIC_ICR, cfg); -} - -static inline void send_IPI_allbutself(int vector) -{ - /* - * if there are no other CPUs in the system then - * we get an APIC send error if we try to broadcast. - * thus we have to avoid sending IPIs in this case. - */ - if (num_online_cpus() > 1) - __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); -} - -static inline void send_IPI_all(int vector) -{ - __send_IPI_shortcut(APIC_DEST_ALLINC, vector); -} - -void send_IPI_self(int vector) -{ - __send_IPI_shortcut(APIC_DEST_SELF, vector); -} - -static inline void send_IPI_mask(cpumask_t cpumask, int vector) -{ - unsigned long mask = cpus_addr(cpumask)[0]; - unsigned long cfg; - unsigned long flags; - - local_save_flags(flags); - local_irq_disable(); - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - /* - * prepare target chip field - */ - cfg = __prepare_ICR2(mask); - apic_write_around(APIC_ICR2, cfg); - - /* - * program the ICR - */ - cfg = __prepare_ICR(0, vector); - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write_around(APIC_ICR, cfg); - local_irq_restore(flags); -} +#include /* * Smarter SMP flushing macros. diff -puN include/asm-x86_64/apicdef.h~x86-64-clustered-apic-support include/asm-x86_64/apicdef.h --- 25/include/asm-x86_64/apicdef.h~x86-64-clustered-apic-support 2004-10-21 14:54:14.526715096 -0700 +++ 25-akpm/include/asm-x86_64/apicdef.h 2004-10-21 14:54:14.547711904 -0700 @@ -11,26 +11,26 @@ #define APIC_DEFAULT_PHYS_BASE 0xfee00000 #define APIC_ID 0x20 -#define APIC_ID_MASK (0x0F<<24) -#define GET_APIC_ID(x) (((x)>>24)&0x0F) +#define APIC_ID_MASK (0xFFu<<24) +#define GET_APIC_ID(x) (((x)>>24)&0xFFu) #define APIC_LVR 0x30 #define APIC_LVR_MASK 0xFF00FF -#define GET_APIC_VERSION(x) ((x)&0xFF) -#define GET_APIC_MAXLVT(x) (((x)>>16)&0xFF) -#define APIC_INTEGRATED(x) ((x)&0xF0) +#define GET_APIC_VERSION(x) ((x)&0xFFu) +#define GET_APIC_MAXLVT(x) (((x)>>16)&0xFFu) +#define APIC_INTEGRATED(x) ((x)&0xF0u) #define APIC_TASKPRI 0x80 -#define APIC_TPRI_MASK 0xFF +#define APIC_TPRI_MASK 0xFFu #define APIC_ARBPRI 0x90 -#define APIC_ARBPRI_MASK 0xFF +#define APIC_ARBPRI_MASK 0xFFu #define APIC_PROCPRI 0xA0 #define APIC_EOI 0xB0 #define APIC_EIO_ACK 0x0 /* Write this to the EOI register */ #define APIC_RRR 0xC0 #define APIC_LDR 0xD0 -#define APIC_LDR_MASK (0xFF<<24) -#define GET_APIC_LOGICAL_ID(x) (((x)>>24)&0xFF) +#define APIC_LDR_MASK (0xFFu<<24) +#define GET_APIC_LOGICAL_ID(x) (((x)>>24)&0xFFu) #define SET_APIC_LOGICAL_ID(x) (((x)<<24)) -#define APIC_ALL_CPUS 0xFF +#define APIC_ALL_CPUS 0xFFu #define APIC_DFR 0xE0 #define APIC_SPIV 0xF0 #define APIC_SPIV_FOCUS_DISABLED (1<<9) @@ -58,6 +58,7 @@ #define APIC_INT_ASSERT 0x04000 #define APIC_ICR_BUSY 0x01000 #define APIC_DEST_LOGICAL 0x00800 +#define APIC_DEST_PHYSICAL 0x00000 #define APIC_DM_FIXED 0x00000 #define APIC_DM_LOWEST 0x00100 #define APIC_DM_SMI 0x00200 @@ -111,6 +112,18 @@ #define MAX_IO_APICS 32 /* + * All x86-64 systems are xAPIC compatible. + * In the following, "apicid" is a physical APIC ID. + */ +#define XAPIC_DEST_CPUS_SHIFT 4 +#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) +#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) +#define APIC_CLUSTER(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK) +#define APIC_CLUSTERID(apicid) (APIC_CLUSTER(apicid) >> XAPIC_DEST_CPUS_SHIFT) +#define APIC_CPUID(apicid) ((apicid) & XAPIC_DEST_CPUS_MASK) +#define NUM_APIC_CLUSTERS ((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT) + +/* * the local APIC register structure, memory mapped. Not terribly well * tested, but we might eventually use this one in the future - the * problem why we cannot use it right now is the P5 APIC, it has an diff -puN include/asm-x86_64/apic.h~x86-64-clustered-apic-support include/asm-x86_64/apic.h --- 25/include/asm-x86_64/apic.h~x86-64-clustered-apic-support 2004-10-21 14:54:14.528714792 -0700 +++ 25-akpm/include/asm-x86_64/apic.h 2004-10-21 14:54:14.547711904 -0700 @@ -97,6 +97,7 @@ extern void nmi_watchdog_tick (struct pt extern int APIC_init_uniprocessor (void); extern void disable_APIC_timer(void); extern void enable_APIC_timer(void); +extern void clustered_apic_check(void); extern int check_nmi_watchdog(void); extern void nmi_watchdog_default(void); @@ -111,7 +112,6 @@ extern unsigned int nmi_watchdog; #endif /* CONFIG_X86_LOCAL_APIC */ -#define clustered_apic_mode 0 #define esr_disable 0 extern unsigned boot_cpu_id; diff -puN /dev/null include/asm-x86_64/genapic.h --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25-akpm/include/asm-x86_64/genapic.h 2004-10-21 14:54:14.547711904 -0700 @@ -0,0 +1,34 @@ +#ifndef _ASM_GENAPIC_H +#define _ASM_GENAPIC_H 1 + +/* + * Copyright 2004 James Cleverdon, IBM. + * Subject to the GNU Public License, v.2 + * + * Generic APIC sub-arch data struct. + * + * Hacked for x86-64 by James Cleverdon from i386 architecture code by + * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and + * James Cleverdon. + */ + +struct genapic { + char *name; + u32 int_delivery_mode; + u32 int_dest_mode; + u32 int_delivery_dest; /* for quick IPIs */ + int (*apic_id_registered)(void); + cpumask_t (*target_cpus)(void); + void (*init_apic_ldr)(void); + /* ipi */ + void (*send_IPI_mask)(cpumask_t mask, int vector); + void (*send_IPI_allbutself)(int vector); + void (*send_IPI_all)(int vector); + /* */ + unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); +}; + + +extern struct genapic *genapic; + +#endif diff -puN /dev/null include/asm-x86_64/ipi.h --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25-akpm/include/asm-x86_64/ipi.h 2004-10-21 14:54:14.548711752 -0700 @@ -0,0 +1,113 @@ +#ifndef __ASM_IPI_H +#define __ASM_IPI_H + +/* + * Copyright 2004 James Cleverdon, IBM. + * Subject to the GNU Public License, v.2 + * + * Generic APIC InterProcessor Interrupt code. + * + * Moved to include file by James Cleverdon from + * arch/x86-64/kernel/smp.c + * + * Copyrights from kernel/smp.c: + * + * (c) 1995 Alan Cox, Building #3 + * (c) 1998-99, 2000 Ingo Molnar + * (c) 2002,2003 Andi Kleen, SuSE Labs. + * Subject to the GNU Public License, v.2 + */ + +#include +#include +#include +#include + +/* + * the following functions deal with sending IPIs between CPUs. + * + * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. + */ + +static inline unsigned int __prepare_ICR (unsigned int shortcut, int vector, unsigned int dest) +{ + unsigned int icr = APIC_DM_FIXED | shortcut | vector | dest; + if (vector == KDB_VECTOR) + icr = (icr & (~APIC_VECTOR_MASK)) | APIC_DM_NMI; + return icr; +} + +static inline int __prepare_ICR2 (unsigned int mask) +{ + return SET_APIC_DEST_FIELD(mask); +} + +static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) +{ + /* + * Subtle. In the case of the 'never do double writes' workaround + * we have to lock out interrupts to be safe. As we don't care + * of the value read we use an atomic rmw access to avoid costly + * cli/sti. Otherwise we use an even cheaper single atomic write + * to the APIC. + */ + unsigned int cfg; + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + /* + * No need to touch the target chip field + */ + cfg = __prepare_ICR(shortcut, vector, dest); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write_around(APIC_ICR, cfg); +} + + +static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) +{ + unsigned long cfg, flags; + unsigned long query_cpu; + + /* + * Hack. The clustered APIC addressing mode doesn't allow us to send + * to an arbitrary mask, so I do a unicast to each CPU instead. + * - mbligh + */ + local_irq_save(flags); + + for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) { + if (cpu_isset(query_cpu, mask)) { + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + /* + * prepare target chip field + */ + cfg = __prepare_ICR2(x86_cpu_to_apicid[query_cpu]); + apic_write_around(APIC_ICR2, cfg); + + /* + * program the ICR + */ + cfg = __prepare_ICR(0, vector, APIC_DEST_PHYSICAL); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write_around(APIC_ICR, cfg); + } + } + local_irq_restore(flags); +} + +#endif /* __ASM_IPI_H */ diff -puN include/asm-x86_64/irq.h~x86-64-clustered-apic-support include/asm-x86_64/irq.h --- 25/include/asm-x86_64/irq.h~x86-64-clustered-apic-support 2004-10-21 14:54:14.529714640 -0700 +++ 25-akpm/include/asm-x86_64/irq.h 2004-10-21 14:54:14.548711752 -0700 @@ -36,7 +36,7 @@ #define NR_IRQ_VECTORS NR_IRQS #else #define NR_IRQS 224 -#define NR_IRQ_VECTORS NR_IRQS +#define NR_IRQ_VECTORS 1024 #endif static __inline__ int irq_canonicalize(int irq) diff -puN /dev/null include/asm-x86_64/mach_apic.h --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25-akpm/include/asm-x86_64/mach_apic.h 2004-10-21 14:54:14.549711600 -0700 @@ -0,0 +1,28 @@ +#ifndef __ASM_MACH_APIC_H +#define __ASM_MACH_APIC_H + +/* + * Copyright 2004 James Cleverdon, IBM. + * Subject to the GNU Public License, v.2 + * + * Generic APIC sub-arch defines. + * + * Hacked for x86-64 by James Cleverdon from i386 architecture code by + * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and + * James Cleverdon. + */ + +#include + +#define INT_DELIVERY_MODE (genapic->int_delivery_mode) +#define INT_DEST_MODE (genapic->int_dest_mode) +#define INT_DELIVERY_DEST (genapic->int_delivery_dest) +#define TARGET_CPUS (genapic->target_cpus()) +#define apic_id_registered (genapic->apic_id_registered) +#define init_apic_ldr (genapic->init_apic_ldr) +#define send_IPI_mask (genapic->send_IPI_mask) +#define send_IPI_allbutself (genapic->send_IPI_allbutself) +#define send_IPI_all (genapic->send_IPI_all) +#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) + +#endif /* __ASM_MACH_APIC_H */ diff -puN include/asm-x86_64/mpspec.h~x86-64-clustered-apic-support include/asm-x86_64/mpspec.h --- 25/include/asm-x86_64/mpspec.h~x86-64-clustered-apic-support 2004-10-21 14:54:14.531714336 -0700 +++ 25-akpm/include/asm-x86_64/mpspec.h 2004-10-21 14:54:14.549711600 -0700 @@ -14,9 +14,9 @@ #define SMP_MAGIC_IDENT (('_'<<24)|('P'<<16)|('M'<<8)|'_') /* - * a maximum of 16 APICs with the current APIC ID architecture. + * A maximum of 255 APICs with the current APIC ID architecture. */ -#define MAX_APICS 16 +#define MAX_APICS 128 struct intel_mp_floating { diff -puN include/asm-x86_64/msi.h~x86-64-clustered-apic-support include/asm-x86_64/msi.h --- 25/include/asm-x86_64/msi.h~x86-64-clustered-apic-support 2004-10-21 14:54:14.532714184 -0700 +++ 25-akpm/include/asm-x86_64/msi.h 2004-10-21 14:54:14.549711600 -0700 @@ -7,6 +7,7 @@ #define ASM_MSI_H #include +#include #include #define LAST_DEVICE_VECTOR 232 diff -puN include/asm-x86_64/smp.h~x86-64-clustered-apic-support include/asm-x86_64/smp.h --- 25/include/asm-x86_64/smp.h~x86-64-clustered-apic-support 2004-10-21 14:54:14.533714032 -0700 +++ 25-akpm/include/asm-x86_64/smp.h 2004-10-21 14:54:14.550711448 -0700 @@ -48,7 +48,7 @@ extern void (*mtrr_hook) (void); extern void zap_low_mappings(void); void smp_stop_cpu(void); extern cpumask_t cpu_sibling_map[NR_CPUS]; -extern char phys_proc_id[NR_CPUS]; +extern u8 phys_proc_id[NR_CPUS]; #define SMP_TRAMPOLINE_BASE 0x6000 @@ -74,14 +74,29 @@ extern __inline int hard_smp_processor_i return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); } +#define safe_smp_processor_id() (disable_apic ? 0 : x86_apicid_to_cpu(hard_smp_processor_id())) + +#endif /* !ASSEMBLY */ + +#define NO_PROC_ID 0xFF /* No processor magic marker */ + +#endif + +#ifndef ASSEMBLY /* * Some lowlevel functions might want to know about * the real APIC ID <-> CPU # mapping. - * AK: why is this volatile? */ -extern volatile char x86_cpu_to_apicid[NR_CPUS]; +extern u8 x86_cpu_to_apicid[NR_CPUS]; /* physical ID */ +extern u8 x86_cpu_to_log_apicid[NR_CPUS]; +extern u8 bios_cpu_apicid[]; + +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +{ + return cpus_addr(cpumask)[0]; +} -static inline char x86_apicid_to_cpu(char apicid) +static inline int x86_apicid_to_cpu(u8 apicid) { int i; @@ -92,10 +107,6 @@ static inline char x86_apicid_to_cpu(cha return -1; } -#define safe_smp_processor_id() (disable_apic ? 0 : x86_apicid_to_cpu(hard_smp_processor_id())) - -extern u8 bios_cpu_apicid[]; - static inline int cpu_present_to_apicid(int mps_cpu) { if (mps_cpu < NR_CPUS) @@ -106,23 +117,6 @@ static inline int cpu_present_to_apicid( #endif /* !ASSEMBLY */ -#define NO_PROC_ID 0xFF /* No processor magic marker */ - -#endif -#define INT_DELIVERY_MODE 1 /* logical delivery */ - -#ifndef ASSEMBLY -#ifdef CONFIG_SMP -#define TARGET_CPUS cpu_online_map -#else -#define TARGET_CPUS cpumask_of_cpu(0) -#endif -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) -{ - return cpus_addr(cpumask)[0]; -} -#endif - #ifndef CONFIG_SMP #define stack_smp_processor_id() 0 #define safe_smp_processor_id() 0 diff -puN arch/x86_64/kernel/mpparse.c~x86-64-clustered-apic-support arch/x86_64/kernel/mpparse.c --- 25/arch/x86_64/kernel/mpparse.c~x86-64-clustered-apic-support 2004-10-21 14:54:14.534713880 -0700 +++ 25-akpm/arch/x86_64/kernel/mpparse.c 2004-10-21 14:54:14.551711296 -0700 @@ -330,6 +330,7 @@ static int __init smp_read_mpc(struct mp } } } + clustered_apic_check(); if (!num_processors) printk(KERN_ERR "SMP mptable: no processors registered!\n"); return num_processors; _