arch/i386/kernel/smp.c
3492 /*
3493 * Intel MP v1.1/v1.4 specification support routines
3494 * for multi-pentium hosts.
3495 *
3496 * (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net>
3497 * (c) 1998 Ingo Molnar
3498 *
3499 * Supported by Caldera http://www.caldera.com.
3500 * Much of the core SMP work is based on previous
3501 * work by Thomas Radke, to whom a great many thanks
3502 * are extended.
3503 *
3504 * Thanks to Intel for making available several
3505 * different Pentium, Pentium Pro and
3506 * Pentium-II/Xeon MP machines.
3507 *
3508 * This code is released under the GNU public
3509 * license version 2 or later.
3510 *
3511 * Fixes
3512 * Felix Koop : NR_CPUS used properly
3513 * Jose Renau : Handle single CPU case.
3514 * Alan Cox : By repeated request 8) -
3515 * Total BogoMIP report.
3516 * Greg Wright : Fix for kernel stacks panic.
3517 * Erich Boleyn : MP v1.4 and additional changes.
3518 * Matthias Sattler : Changes for 2.1 kernel map.
3519 * Michel Lespinasse: Changes for 2.1 kernel map.
3520 * Michael Chastain : Change trampoline.S to gnu as.
3521 * Alan Cox : Dumb bug: 'B' step PPro's are fine
3522 * Ingo Molnar : Added APIC timers, based on code
3523 * from Jose Renau
3524 * Alan Cox : Added EBDA scanning
3525 * Ingo Molnar : various cleanups and rewrites */
3526
3527 #include <linux/config.h>
3528 #include <linux/mm.h>
3529 #include <linux/kernel_stat.h>
3530 #include <linux/delay.h>
3531 #include <linux/mc146818rtc.h>
3532 #include <linux/smp_lock.h>
3533 #include <linux/init.h>
3534 #include <asm/mtrr.h>
3535
3536 #include "irq.h"
3537
3538 extern unsigned long start_kernel;
3539 extern void update_one_process( struct task_struct *p,
3540 unsigned long ticks, unsigned long user,
3541 unsigned long system, int cpu);
3542 /* Some notes on processor bugs:
3543 *
3544 * Pentium and Pentium Pro (and all CPUs) have
3545 * bugs. The Linux issues for SMP are handled as
3546 * follows.
3547 *
3548 * Pentium Pro:
3549 * Occasional delivery of 'spurious interrupt' as trap
3550 * #16. This is very rare. The kernel logs the event and
3551 * recovers
3552 *
3553 * Pentium:
3554 * There is a marginal case where REP MOVS on 100MHz SMP
3555 * machines with B stepping processors can fail. XXX
3556 * should provide an L1cache=Writethrough or L1cache=off
3557 * option.
3558 *
3559 * B stepping CPUs may hang. There are hardware work
3560 * arounds for this. We warn about it in case your board
3561 * doesnt have the work arounds. Basically thats so I can
3562 * tell anyone with a B stepping CPU and SMP problems
3563 * "tough".
3564 *
3565 * Specific items [From Pentium Processor
3566 * Specification Update]
3567 *
3568 * 1AP. Linux doesn't use remote read
3569 * 2AP. Linux doesn't trust APIC errors
3570 * 3AP. We work around this
3571 * 4AP. Linux never generated 3 interrupts of the
3572 * same pri to cause a lost local interrupt.
3573 * 5AP. Remote read is never used
3574 * 9AP. XXX NEED TO CHECK WE HANDLE THIS XXX
3575 * 10AP. XXX NEED TO CHECK WE HANDLE THIS XXX
3576 * 11AP. Linux reads the APIC between writes to
3577 * avoid this, as per the documentation. Make
3578 * sure you preserve this as it affects the C
3579 * stepping chips too.
3580 *
3581 * If this sounds worrying believe me these bugs are
3582 * ___RARE___ and there's about nothing of note with
3583 * C stepping upwards. */
3584
3585
3586 /* Kernel spinlock */
3587 spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED;
3588
3589 /* function prototypes: */
3590
3591 static void cache_APIC_registers (void);
3592 static void stop_this_cpu (void);
3593
3594 /* Set if we find a B stepping CPU */
3595 static int smp_b_stepping = 0;
3596
3597 /* Setup configured maximum number of CPUs to activate */
3598 static int max_cpus = -1;
3599 /* Have we found an SMP box */
3600 int smp_found_config=0;
3601
3602 /* Bitmask of physically existing CPUs */
3603 unsigned long cpu_present_map = 0;
3604 /* Bitmask of currently online CPUs */
3605 unsigned long cpu_online_map = 0;
3606 /* Total count of live CPUs */
3607 int smp_num_cpus = 1;
3608 /* Set when the idlers are all forked */
3609 int smp_threads_ready=0;
3610 /* which CPU maps to which logical number */
3611 volatile int cpu_number_map[NR_CPUS];
3612 /* which logical number maps to which CPU */
3613 volatile int __cpu_logical_map[NR_CPUS];
3614 /* We always use 0 the rest is ready for parallel
3615 * delivery */
3616 static volatile
3617 unsigned long cpu_callin_map[NR_CPUS] = {0,};
3618 /* We always use 0 the rest is ready for parallel
3619 * delivery */
3620 static volatile
3621 unsigned long cpu_callout_map[NR_CPUS] = {0,};
3622 /* Used for the invalidate map that's also checked in the
3623 * spinlock */
3624 volatile unsigned long smp_invalidate_needed;
3625 /* Stack vector for booting CPUs */
3626 volatile unsigned long kstack_ptr;
3627 /* Per CPU bogomips and other parameters */
3628 struct cpuinfo_x86 cpu_data[NR_CPUS];
3629 /* Internal processor count */
3630 static unsigned int num_processors = 1;
3631 /* Address of the I/O apic (not yet used) */
3632 unsigned long mp_ioapic_addr = 0xFEC00000;
3633 /* Processor that is doing the boot up */
3634 unsigned char boot_cpu_id = 0;
3635 /* Tripped once we need to start cross invalidating */
3636 static int smp_activated = 0;
3637 /* APIC version number */
3638 int apic_version[NR_CPUS];
3639 /* Just debugging the assembler.. */
3640 unsigned long apic_retval;
3641
3642 /* Number of times the processor holds the lock */
3643 volatile unsigned long kernel_counter=0;
3644 /* Number of times the processor holds the syscall lock*/
3645 volatile unsigned long syscall_count=0;
3646
3647 /* Number of IPIs delivered */
3648 volatile unsigned long ipi_count;
3649
3650 const char lk_lockmsg[] =
3651 "lock from interrupt context at %p\n";
3652
3653 int mp_bus_id_to_type [MAX_MP_BUSSES] = { -1, };
3654 extern int mp_irq_entries;
3655 extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
3656 extern int mpc_default_type;
3657 int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { -1, };
3658 int mp_current_pci_id = 0;
3659 unsigned long mp_lapic_addr = 0;
3660 /* 1 if "noapic" boot option passed */
3661 int skip_ioapic_setup = 0;
3662
3663 /* #define SMP_DEBUG */
3664
3665 #ifdef SMP_DEBUG
3666 #define SMP_PRINTK(x) printk x
3667 #else
3668 #define SMP_PRINTK(x)
3669 #endif
3670
3671 /* IA s/w dev Vol 3, Section 7.4 */
3672 #define APIC_DEFAULT_PHYS_BASE 0xfee00000
3673
3674 /* Reads and clears the Pentium Timestamp-Counter */
3675 #define READ_TSC(x) __asm__ __volatile__ ( "rdtsc" \
3676 :"=a" (((unsigned long*)&(x))[0]), \
3677 "=d" (((unsigned long*)&(x))[1]))
3678
3679 #define CLEAR_TSC \
3680 __asm__ __volatile__ ("\t.byte 0x0f, 0x30;\n":: \
3681 "a"(0x00001000), "d"(0x00001000), "c"(0x10):"memory")
3682
3683 /* Setup routine for controlling SMP activation
3684 *
3685 * Command-line option of "nosmp" or "maxcpus=0"
3686 * will disable SMP activation entirely (the MPS
3687 * table probe still happens, though).
3688 *
3689 * Command-line option of "maxcpus=<NUM>", where
3690 * <NUM> is an integer greater than 0, limits the
3691 * maximum number of CPUs activated in SMP mode to
3692 * <NUM>. */
3693
3694 void __init smp_setup(char *str, int *ints)
3695 {
3696 if (ints && ints[0] > 0)
3697 max_cpus = ints[1];
3698 else
3699 max_cpus = 0;
3700 }
3701
3702 void ack_APIC_irq(void)
3703 {
3704 /* Clear the IPI */
3705
3706 /* Dummy read */
3707 apic_read(APIC_SPIV);
3708
3709 /* Docs say use 0 for future compatibility */
3710 apic_write(APIC_EOI, 0);
3711 }
3712
3713 /* Intel MP BIOS table parsing routines: */
3714
3715 #ifndef CONFIG_X86_VISWS_APIC
3716 /* Checksum an MP configuration block. */
3717
3718 static int mpf_checksum(unsigned char *mp, int len)
3719 {
3720 int sum=0;
3721 while(len--)
3722 sum+=*mp++;
3723 return sum&0xFF;
3724 }
3725
3726 /* Processor encoding in an MP configuration block */
3727
3728 static char *mpc_family(int family,int model)
3729 {
3730 static char n[32];
3731 static char *model_defs[]=
3732 {
3733 "80486DX","80486DX",
3734 "80486SX","80486DX/2 or 80487",
3735 "80486SL","Intel5X2(tm)",
3736 "Unknown","Unknown",
3737 "80486DX/4"
3738 };
3739 if (family==0x6)
3740 return("Pentium(tm) Pro");
3741 if (family==0x5)
3742 return("Pentium(tm)");
3743 if (family==0x0F && model==0x0F)
3744 return("Special controller");
3745 if (family==0x04 && model<9)
3746 return model_defs[model];
3747 sprintf(n,"Unknown CPU [%d:%d]",family, model);
3748 return n;
3749 }
3750
3751 /* Read the MPC */
3752
3753 static int __init
3754 smp_read_mpc(struct mp_config_table *mpc)
3755 {
3756 char str[16];
3757 int count=sizeof(*mpc);
3758 int ioapics = 0;
3759 unsigned char *mpt=((unsigned char *)mpc)+count;
3760
3761 if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4))
3762 {
3763 panic("SMP mptable: bad signature [%c%c%c%c]!\n",
3764 mpc->mpc_signature[0],
3765 mpc->mpc_signature[1],
3766 mpc->mpc_signature[2],
3767 mpc->mpc_signature[3]);
3768 return 1;
3769 }
3770 if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length))
3771 {
3772 panic("SMP mptable: checksum error!\n");
3773 return 1;
3774 }
3775 if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04)
3776 {
3777 printk("Bad Config Table version (%d)!!\n",
3778 mpc->mpc_spec);
3779 return 1;
3780 }
3781 memcpy(str,mpc->mpc_oem,8);
3782 str[8]=0;
3783 memcpy(ioapic_OEM_ID,str,9);
3784 printk("OEM ID: %s ",str);
3785
3786 memcpy(str,mpc->mpc_productid,12);
3787 str[12]=0;
3788 memcpy(ioapic_Product_ID,str,13);
3789 printk("Product ID: %s ",str);
3790
3791 printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
3792
3793 /* save the local APIC address, it might be
3794 * non-default */
3795 mp_lapic_addr = mpc->mpc_lapic;
3796
3797 /* Now process the configuration blocks. */
3798
3799 while(count<mpc->mpc_length)
3800 {
3801 switch(*mpt)
3802 {
3803 case MP_PROCESSOR:
3804 {
3805 struct mpc_config_processor *m=
3806 (struct mpc_config_processor *)mpt;
3807 if (m->mpc_cpuflag&CPU_ENABLED)
3808 {
3809 printk("Processor #%d %s APIC version %d\n",
3810 m->mpc_apicid,
3811 mpc_family((m->mpc_cpufeature&
3812 CPU_FAMILY_MASK)>>8,
3813 (m->mpc_cpufeature&
3814 CPU_MODEL_MASK)>>4),
3815 m->mpc_apicver);
3816 #ifdef SMP_DEBUG
3817 if (m->mpc_featureflag&(1<<0))
3818 printk(" Floating point unit present.\n");
3819 if (m->mpc_featureflag&(1<<7))
3820 printk(" Machine Exception supported.\n");
3821 if (m->mpc_featureflag&(1<<8))
3822 printk(" 64 bit compare & exchange "
3823 "supported.\n");
3824 if (m->mpc_featureflag&(1<<9))
3825 printk(" Internal APIC present.\n");
3826 #endif
3827 if (m->mpc_cpuflag&CPU_BOOTPROCESSOR)
3828 {
3829 SMP_PRINTK((" Bootup CPU\n"));
3830 boot_cpu_id=m->mpc_apicid;
3831 }
3832 else /* Boot CPU already counted */
3833 num_processors++;
3834
3835 if (m->mpc_apicid>NR_CPUS)
3836 printk("Processor #%d unused. (Max %d "
3837 "processors).\n",m->mpc_apicid, NR_CPUS);
3838 else
3839 {
3840 int ver = m->mpc_apicver;
3841
3842 cpu_present_map|=(1<<m->mpc_apicid);
3843 /* Validate version */
3844 if (ver == 0x0) {
3845 printk("BIOS bug, APIC version is 0 for "
3846 "CPU#%d! fixing up to 0x10. (tell "
3847 "your hw vendor)\n", m->mpc_apicid);
3848 ver = 0x10;
3849 }
3850 apic_version[m->mpc_apicid] = ver;
3851 }
3852 }
3853 mpt+=sizeof(*m);
3854 count+=sizeof(*m);
3855 break;
3856 }
3857 case MP_BUS:
3858 {
3859 struct mpc_config_bus *m=
3860 (struct mpc_config_bus *)mpt;
3861 memcpy(str,m->mpc_bustype,6);
3862 str[6]=0;
3863 SMP_PRINTK(("Bus #%d is %s\n",
3864 m->mpc_busid,
3865 str));
3866 if ((strncmp(m->mpc_bustype,"ISA",3) == 0) ||
3867 (strncmp(m->mpc_bustype,"EISA",4) == 0))
3868 mp_bus_id_to_type[m->mpc_busid] =
3869 MP_BUS_ISA;
3870 else
3871 if (strncmp(m->mpc_bustype,"PCI",3) == 0) {
3872 mp_bus_id_to_type[m->mpc_busid] =
3873 MP_BUS_PCI;
3874 mp_bus_id_to_pci_bus[m->mpc_busid] =
3875 mp_current_pci_id;
3876 mp_current_pci_id++;
3877 }
3878 mpt+=sizeof(*m);
3879 count+=sizeof(*m);
3880 break;
3881 }
3882 case MP_IOAPIC:
3883 {
3884 struct mpc_config_ioapic *m=
3885 (struct mpc_config_ioapic *)mpt;
3886 if (m->mpc_flags&MPC_APIC_USABLE)
3887 {
3888 ioapics++;
3889 printk("I/O APIC #%d Version %d at 0x%lX.\n",
3890 m->mpc_apicid,m->mpc_apicver,
3891 m->mpc_apicaddr);
3892 /* we use the first one only currently */
3893 if (ioapics == 1)
3894 mp_ioapic_addr = m->mpc_apicaddr;
3895 }
3896 mpt+=sizeof(*m);
3897 count+=sizeof(*m);
3898 break;
3899 }
3900 case MP_INTSRC:
3901 {
3902 struct mpc_config_intsrc *m=
3903 (struct mpc_config_intsrc *)mpt;
3904
3905 mp_irqs [mp_irq_entries] = *m;
3906 if (++mp_irq_entries == MAX_IRQ_SOURCES) {
3907 printk("Max irq sources exceeded!!\n");
3908 printk("Skipping remaining sources.\n");
3909 --mp_irq_entries;
3910 }
3911
3912 mpt+=sizeof(*m);
3913 count+=sizeof(*m);
3914 break;
3915 }
3916 case MP_LINTSRC:
3917 {
3918 struct mpc_config_intlocal *m=
3919 (struct mpc_config_intlocal *)mpt;
3920 mpt+=sizeof(*m);
3921 count+=sizeof(*m);
3922 break;
3923 }
3924 }
3925 }
3926 if (ioapics > 1)
3927 {
3928 printk("Warning: "
3929 "Multiple IO-APICs not yet supported.\n");
3930 printk("Warning: switching to non APIC mode.\n");
3931 skip_ioapic_setup=1;
3932 }
3933 return num_processors;
3934 }
3935
3936 /* Scan the memory blocks for an SMP configuration block.
3937 */
3938
3939 static int __init smp_scan_config(unsigned long base,
3940 unsigned long length)
3941 {
3942 unsigned long *bp=phys_to_virt(base);
3943 struct intel_mp_floating *mpf;
3944
3945 SMP_PRINTK(("Scan SMP from %p for %ld bytes.\n",
3946 bp,length));
3947 if (sizeof(*mpf)!=16)
3948 printk("Error: MPF size\n");
3949
3950 while (length>0)
3951 {
3952 if (*bp==SMP_MAGIC_IDENT)
3953 {
3954 mpf=(struct intel_mp_floating *)bp;
3955 if (mpf->mpf_length==1 &&
3956 !mpf_checksum((unsigned char *)bp,16) &&
3957 (mpf->mpf_specification == 1
3958 || mpf->mpf_specification == 4) )
3959 {
3960 printk("Intel MultiProcessor Specification "
3961 "v1.%d\n", mpf->mpf_specification);
3962 if (mpf->mpf_feature2&(1<<7))
3963 printk(" IMCR and PIC "
3964 "compatibility mode.\n");
3965 else
3966 printk(" Virtual Wire "
3967 "compatibility mode.\n");
3968 smp_found_config=1;
3969 /* Now see if we need to read further. */
3970 if (mpf->mpf_feature1!=0)
3971 {
3972 unsigned long cfg;
3973
3974 /* local APIC has default address */
3975 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
3976 /* We need to know what the local APIC id of
3977 * the boot CPU is! */
3978
3979 /* HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
3980 * It's not just a crazy hack. ;-) */
3981
3982 /* Standard page mapping functions don't work
3983 * yet. We know that page 0 is not used.
3984 * Steal it for now! */
3985
3986 cfg=pg0[0];
3987 pg0[0] = (mp_lapic_addr |
3988 _PAGE_RW | _PAGE_PRESENT);
3989 local_flush_tlb();
3990
3991 boot_cpu_id =
3992 GET_APIC_ID(*((volatile unsigned long *)
3993 APIC_ID));
3994
3995 /* Give it back */
3996 pg0[0]= cfg;
3997 local_flush_tlb();
3998
3999 /*
4000 * END OF HACK END OF HACK END OF HACK END OF HACK
4001 */
4002 /* 2 CPUs, numbered 0 & 1. */
4003 cpu_present_map=3;
4004 num_processors=2;
4005 printk("I/O APIC at 0xFEC00000.\n");
4006
4007 /* Save the default type number, we need it
4008 * later to set the IO-APIC up properly: */
4009 mpc_default_type = mpf->mpf_feature1;
4010
4011 printk("Bus #0 is ");
4012 }
4013 switch(mpf->mpf_feature1)
4014 {
4015 case 1:
4016 case 5:
4017 printk("ISA\n");
4018 break;
4019 case 2:
4020 printk("EISA with no IRQ8 chaining\n");
4021 break;
4022 case 6:
4023 case 3:
4024 printk("EISA\n");
4025 break;
4026 case 4:
4027 case 7:
4028 printk("MCA\n");
4029 break;
4030 case 0:
4031 break;
4032 default:
4033 printk("???\nUnknown standard configuration "
4034 "%d\n", mpf->mpf_feature1);
4035 return 1;
4036 }
4037 if (mpf->mpf_feature1>4)
4038 {
4039 printk("Bus #1 is PCI\n");
4040
4041 /* Set local APIC version to the integrated
4042 * form. It's initialized to zero otherwise,
4043 * representing a discrete 82489DX. */
4044 apic_version[0] = 0x10;
4045 apic_version[1] = 0x10;
4046 }
4047 /* Read the physical hardware table. Anything
4048 * here will override the defaults. */
4049 if (mpf->mpf_physptr)
4050 smp_read_mpc((void *)mpf->mpf_physptr);
4051
4052 __cpu_logical_map[0] = boot_cpu_id;
4053 global_irq_holder = boot_cpu_id;
4054 current->processor = boot_cpu_id;
4055
4056 printk("Processors: %d\n", num_processors);
4057 /* Only use the first configuration found. */
4058 return 1;
4059 }
4060 }
4061 bp+=4;
4062 length-=16;
4063 }
4064
4065 return 0;
4066 }
4067
4068 void __init init_intel_smp (void)
4069 {
4070 /* FIXME: Linux assumes you have 640K of base ram..
4071 * this continues the error...
4072 *
4073 * 1) Scan the bottom 1K for a signature
4074 * 2) Scan the top 1K of base RAM
4075 * 3) Scan the 64K of bios */
4076 if (!smp_scan_config(0x0,0x400) &&
4077 !smp_scan_config(639*0x400,0x400) &&
4078 !smp_scan_config(0xF0000,0x10000)) {
4079 /* If it is an SMP machine we should know now, unless
4080 * the configuration is in an EISA/MCA bus machine
4081 * with an extended bios data area.
4082 *
4083 * there is a real-mode segmented pointer pointing to
4084 * the 4K EBDA area at 0x40E, calculate and scan it
4085 * here.
4086 *
4087 * NOTE! There are Linux loaders that will corrupt
4088 * the EBDA area, and as such this kind of SMP config
4089 * may be less trustworthy, simply because the SMP
4090 * table may have been stomped on during early
4091 * boot. These loaders are buggy and should be fixed.
4092 */
4093 unsigned int address;
4094
4095 address = *(unsigned short *)phys_to_virt(0x40E);
4096 address<<=4;
4097 smp_scan_config(address, 0x1000);
4098 if (smp_found_config)
4099 printk(KERN_WARNING "WARNING: MP table in the EBDA"
4100 " can be UNSAFE, contact linux-smp@vger.rutgers."
4101 "edu if you experience SMP problems!\n");
4102 }
4103 }
4104
4105 #else
4106
4107 /* The Visual Workstation is Intel MP compliant in the
4108 * hardware sense, but it doesnt have a
4109 * BIOS(-configuration table). No problem for Linux. */
4110 void __init init_visws_smp(void)
4111 {
4112 smp_found_config = 1;
4113
4114 cpu_present_map |= 2; /* or in id 1 */
4115 apic_version[1] |= 0x10; /* integrated APIC */
4116 apic_version[0] |= 0x10;
4117
4118 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
4119 }
4120
4121 #endif
4122
4123 /* - Intel MP Configuration Table
4124 * - or SGI Visual Workstation configuration */
4125 void __init init_smp_config (void)
4126 {
4127 #ifndef CONFIG_VISWS
4128 init_intel_smp();
4129 #else
4130 init_visws_smp();
4131 #endif
4132 }
4133
4134 /* Trampoline 80x86 program as an array. */
4135
4136 extern unsigned char trampoline_data [];
4137 extern unsigned char trampoline_end [];
4138 static unsigned char *trampoline_base;
4139
4140 /* Currently trivial. Write the real->protected mode
4141 * bootstrap into the page concerned. The caller has made
4142 * sure it's suitably aligned. */
4143
4144 static unsigned long __init setup_trampoline(void)
4145 {
4146 memcpy(trampoline_base, trampoline_data,
4147 trampoline_end - trampoline_data);
4148 return virt_to_phys(trampoline_base);
4149 }
4150
4151 /* We are called very early to get the low memory for the
4152 * SMP bootup trampoline page. */
4153 unsigned long __init
4154 smp_alloc_memory(unsigned long mem_base)
4155 {
4156 if (virt_to_phys((void *)mem_base) >= 0x9F000)
4157 panic("smp_alloc_memory: Insufficient low memory for"
4158 " kernel trampoline 0x%lx.", mem_base);
4159 trampoline_base = (void *)mem_base;
4160 return mem_base + PAGE_SIZE;
4161 }
4162
4163 /* The bootstrap kernel entry code has set these up. Save
4164 * them for a given CPU */
4165 void __init smp_store_cpu_info(int id)
4166 {
4167 struct cpuinfo_x86 *c=&cpu_data[id];
4168
4169 *c = boot_cpu_data;
4170 c->pte_quick = 0;
4171 c->pgd_quick = 0;
4172 c->pgtable_cache_sz = 0;
4173 identify_cpu(c);
4174 /* Mask B, Pentium, but not Pentium MMX */
4175 if (c->x86_vendor == X86_VENDOR_INTEL &&
4176 c->x86 == 5 &&
4177 c->x86_mask >= 1 && c->x86_mask <= 4 &&
4178 c->x86_model <= 3)
4179 /* Remember we have B step Pentia with bugs */
4180 smp_b_stepping=1;
4181 }
4182
4183 /* Architecture specific routine called by the kernel
4184 * just before init is fired off. This allows the BP to
4185 * have everything in order [we hope]. At the end of
4186 * this all the APs will hit the system scheduling and
4187 * off we go. Each AP will load the system gdt's and jump
4188 * through the kernel init into idle(). At this point the
4189 * scheduler will one day take over and give them jobs to
4190 * do. smp_callin is a standard routine we use to track
4191 * CPUs as they power up. */
4192
4193 static atomic_t smp_commenced = ATOMIC_INIT(0);
4194
4195 void __init smp_commence(void)
4196 {
4197 /* Lets the callins below out of their loop. */
4198 SMP_PRINTK(("Setting commenced=1, go go go\n"));
4199
4200 wmb();
4201 atomic_set(&smp_commenced,1);
4202 }
4203
4204 void __init enable_local_APIC(void)
4205 {
4206 unsigned long value;
4207
4208 value = apic_read(APIC_SPIV);
4209 value |= (1<<8); /* Enable APIC (bit==1) */
4210 value &= ~(1<<9); /* Enable focus processor (bit==0) */
4211 value |= 0xff; /* Set spurious IRQ vector to 0xff */
4212 apic_write(APIC_SPIV,value);
4213
4214 /* Set Task Priority to 'accept all' */
4215 value = apic_read(APIC_TASKPRI);
4216 value &= ~APIC_TPRI_MASK;
4217 apic_write(APIC_TASKPRI,value);
4218
4219 /* Clear the logical destination ID, just to be safe.
4220 * also, put the APIC into flat delivery mode. */
4221 value = apic_read(APIC_LDR);
4222 value &= ~APIC_LDR_MASK;
4223 apic_write(APIC_LDR,value);
4224
4225 value = apic_read(APIC_DFR);
4226 value |= SET_APIC_DFR(0xf);
4227 apic_write(APIC_DFR, value);
4228
4229 udelay(100); /* B safe */
4230 }
4231
4232 unsigned long __init
4233 init_smp_mappings(unsigned long memory_start)
4234 {
4235 unsigned long apic_phys;
4236
4237 memory_start = PAGE_ALIGN(memory_start);
4238 if (smp_found_config) {
4239 apic_phys = mp_lapic_addr;
4240 } else {
4241 /* set up a fake all zeroes page to simulate the
4242 * local APIC and another one for the IO-APIC. We
4243 * could use the real zero-page, but it's safer this
4244 * way if some buggy code writes to this page ... */
4245 apic_phys = __pa(memory_start);
4246 memset((void *)memory_start, 0, PAGE_SIZE);
4247 memory_start += PAGE_SIZE;
4248 }
4249 set_fixmap(FIX_APIC_BASE,apic_phys);
4250 printk("mapped APIC to %08lx (%08lx)\n",
4251 APIC_BASE, apic_phys);
4252
4253 #ifdef CONFIG_X86_IO_APIC
4254 {
4255 unsigned long ioapic_phys;
4256
4257 if (smp_found_config) {
4258 ioapic_phys = mp_ioapic_addr;
4259 } else {
4260 ioapic_phys = __pa(memory_start);
4261 memset((void *)memory_start, 0, PAGE_SIZE);
4262 memory_start += PAGE_SIZE;
4263 }
4264 set_fixmap(FIX_IO_APIC_BASE,ioapic_phys);
4265 printk("mapped IOAPIC to %08lx (%08lx)\n",
4266 fix_to_virt(FIX_IO_APIC_BASE), ioapic_phys);
4267 }
4268 #endif
4269
4270 return memory_start;
4271 }
4272
4273 extern void calibrate_delay(void);
4274
4275 void __init smp_callin(void)
4276 {
4277 int cpuid;
4278 unsigned long timeout;
4279
4280 /* (This works even if the APIC is not enabled.) */
4281 cpuid = GET_APIC_ID(apic_read(APIC_ID));
4282
4283 SMP_PRINTK(("CPU#%d waiting for CALLOUT\n", cpuid));
4284
4285 /* STARTUP IPIs are fragile beasts as they might
4286 * sometimes trigger some glue motherboard
4287 * logic. Complete APIC bus silence for 1 second, this
4288 * overestimates the time the boot CPU is spending to
4289 * send the up to 2 STARTUP IPIs by a factor of
4290 * two. This should be enough. */
4291
4292 /* Waiting 2s total for startup (udelay is not yet
4293 * working) */
4294 timeout = jiffies + 2*HZ;
4295 while (time_before(jiffies,timeout))
4296 {
4297 /* Has the boot CPU finished its STARTUP sequence? */
4298 if (test_bit(cpuid,
4299 (unsigned long *)&cpu_callout_map[0]))
4300 break;
4301 }
4302
4303 while (!time_before(jiffies,timeout)) {
4304 printk("BUG: CPU%d started up but did not get a "
4305 "callout!\n", cpuid);
4306 stop_this_cpu();
4307 }
4308
4309 /* the boot CPU has finished the init stage and is
4310 * spinning on callin_map until we finish. We are free
4311 * to set up this CPU, first the APIC. (this is
4312 * probably redundant on most boards) */
4313 SMP_PRINTK(("CALLIN, before enable_local_APIC().\n"));
4314 enable_local_APIC();
4315
4316 /* Set up our APIC timer. */
4317 setup_APIC_clock();
4318
4319 __sti();
4320
4321 #ifdef CONFIG_MTRR
4322 /* Must be done before calibration delay is computed */
4323 mtrr_init_secondary_cpu ();
4324 #endif
4325 /* Get our bogomips. */
4326 calibrate_delay();
4327 SMP_PRINTK(("Stack at about %p\n",&cpuid));
4328
4329 /* Save our processor parameters */
4330 smp_store_cpu_info(cpuid);
4331
4332 /* Allow the master to continue. */
4333 set_bit(cpuid, (unsigned long *)&cpu_callin_map[0]);
4334 }
4335
4336 int cpucount = 0;
4337
4338 extern int cpu_idle(void * unused);
4339
4340 /* Activate a secondary processor. */
4341 int __init start_secondary(void *unused)
4342 {
4343 /* Don't put anything before smp_callin(), SMP booting
4344 * is too fragile that we want to limit the things done
4345 * here to the most necessary things. */
4346 smp_callin();
4347 while (!atomic_read(&smp_commenced))
4348 /* nothing */ ;
4349 return cpu_idle(NULL);
4350 }
4351
4352 /* Everything has been set up for the secondary CPUs -
4353 * they just need to reload everything from the task
4354 * structure */
4355 void __init initialize_secondary(void)
4356 {
4357 struct thread_struct * p = ¤t->tss;
4358
4359 /* Load up the LDT and the task register. */
4360 asm volatile("lldt %%ax": :"a" (p->ldt));
4361 asm volatile("ltr %%ax": :"a" (p->tr));
4362 stts();
4363
4364 /* We don't actually need to load the full TSS,
4365 * basically just the stack pointer and the eip. */
4366
4367 asm volatile(
4368 "movl %0,%%esp\n\t"
4369 "jmp *%1"
4370 :
4371 :"r" (p->esp),"r" (p->eip));
4372 }
4373
4374 extern struct {
4375 void * esp;
4376 unsigned short ss;
4377 } stack_start;
4378
4379 static void __init do_boot_cpu(int i)
4380 {
4381 unsigned long cfg;
4382 pgd_t maincfg;
4383 struct task_struct *idle;
4384 unsigned long send_status, accept_status;
4385 int timeout, num_starts, j;
4386 unsigned long start_eip;
4387
4388 /* We need an idle process for each processor. */
4389
4390 kernel_thread(start_secondary, NULL, CLONE_PID);
4391 cpucount++;
4392
4393 idle = task[cpucount];
4394 if (!idle)
4395 panic("No idle process for CPU %d", i);
4396
4397 idle->processor = i;
4398 __cpu_logical_map[cpucount] = i;
4399 cpu_number_map[i] = cpucount;
4400
4401 /* start_eip had better be page-aligned! */
4402 start_eip = setup_trampoline();
4403
4404 /* So we see what's up */
4405 printk("Booting processor %d eip %lx\n", i, start_eip);
4406 stack_start.esp = (void *) (1024 + PAGE_SIZE +
4407 (char *)idle);
4408
4409 /* This grunge runs the startup process for the
4410 * targeted processor. */
4411
4412 SMP_PRINTK(("Setting warm reset code and vector.\n"));
4413
4414 CMOS_WRITE(0xa, 0xf);
4415 local_flush_tlb();
4416 SMP_PRINTK(("1.\n"));
4417 *((volatile unsigned short *) phys_to_virt(0x469)) =
4418 start_eip >> 4;
4419 SMP_PRINTK(("2.\n"));
4420 *((volatile unsigned short *) phys_to_virt(0x467)) =
4421 start_eip & 0xf;
4422 SMP_PRINTK(("3.\n"));
4423
4424 maincfg=swapper_pg_dir[0];
4425 ((unsigned long *)swapper_pg_dir)[0]=0x102007;
4426
4427 /* Be paranoid about clearing APIC errors. */
4428
4429 if ( apic_version[i] & 0xF0 )
4430 {
4431 apic_write(APIC_ESR, 0);
4432 accept_status = (apic_read(APIC_ESR) & 0xEF);
4433 }
4434
4435 /* Status is now clean */
4436
4437 send_status = 0;
4438 accept_status = 0;
4439
4440 /* Starting actual IPI sequence... */
4441
4442 SMP_PRINTK(("Asserting INIT.\n"));
4443
4444 /* Turn INIT on */
4445
4446 cfg=apic_read(APIC_ICR2);
4447 cfg&=0x00FFFFFF;
4448 /* Target chip */
4449 apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i));
4450 cfg=apic_read(APIC_ICR);
4451 /* Clear bits */
4452 cfg&=~0xCDFFF;
4453 cfg |= (APIC_DEST_LEVELTRIG | APIC_DEST_ASSERT |
4454 APIC_DEST_DM_INIT);
4455 /* Send IPI */
4456 apic_write(APIC_ICR, cfg);
4457
4458 udelay(200);
4459 SMP_PRINTK(("Deasserting INIT.\n"));
4460
4461 cfg=apic_read(APIC_ICR2);
4462 cfg&=0x00FFFFFF;
4463 /* Target chip */
4464 apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i));
4465 cfg=apic_read(APIC_ICR);
4466 /* Clear bits */
4467 cfg&=~0xCDFFF;
4468 cfg |= (APIC_DEST_LEVELTRIG | APIC_DEST_DM_INIT);
4469 /* Send IPI */
4470 apic_write(APIC_ICR, cfg);
4471
4472 /* Should we send STARTUP IPIs?
4473 *
4474 * Determine this based on the APIC version. If we
4475 * don't have an integrated APIC, don't send the
4476 * STARTUP IPIs. */
4477
4478 if ( apic_version[i] & 0xF0 )
4479 num_starts = 2;
4480 else
4481 num_starts = 0;
4482
4483 /* Run STARTUP IPI loop. */
4484
4485 for (j = 1; !(send_status || accept_status)
4486 && (j <= num_starts) ; j++)
4487 {
4488 SMP_PRINTK(("Sending STARTUP #%d.\n",j));
4489 apic_write(APIC_ESR, 0);
4490 SMP_PRINTK(("After apic_write.\n"));
4491
4492 /* STARTUP IPI */
4493
4494 cfg=apic_read(APIC_ICR2);
4495 cfg&=0x00FFFFFF;
4496 /* Target chip */
4497 apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i));
4498 cfg=apic_read(APIC_ICR);
4499 /* Clear bits */
4500 cfg&=~0xCDFFF;
4501 /* Boot on the stack */
4502 cfg |= (APIC_DEST_DM_STARTUP | (start_eip >> 12));
4503 SMP_PRINTK(("Before start apic_write.\n"));
4504 /* Kick the second */
4505 apic_write(APIC_ICR, cfg);
4506
4507 SMP_PRINTK(("Startup point 1.\n"));
4508
4509 timeout = 0;
4510 SMP_PRINTK(("Waiting for send to finish...\n"));
4511 do {
4512 SMP_PRINTK(("+"));
4513 udelay(100);
4514 send_status = apic_read(APIC_ICR) & 0x1000;
4515 } while (send_status && (timeout++ < 1000));
4516
4517 /* Give the other CPU some time to accept the IPI. */
4518 udelay(200);
4519 accept_status = (apic_read(APIC_ESR) & 0xEF);
4520 }
4521 SMP_PRINTK(("After Startup.\n"));
4522
4523 if (send_status) /* APIC never delivered?? */
4524 printk("APIC never delivered???\n");
4525 if (accept_status) /* Send accept error */
4526 printk("APIC delivery error (%lx).\n",accept_status);
4527
4528 if ( !(send_status || accept_status) )
4529 {
4530 /* allow APs to start initializing. */
4531 SMP_PRINTK(("Before Callout %d.\n", i));
4532 set_bit(i, (unsigned long *)&cpu_callout_map[0]);
4533 SMP_PRINTK(("After Callout %d.\n", i));
4534
4535 for(timeout=0;timeout<50000;timeout++)
4536 {
4537 if (cpu_callin_map[0]&(1<<i))
4538 break; /* It has booted */
4539 udelay(100); /* Wait 5s total for a response */
4540 }
4541 if (cpu_callin_map[0]&(1<<i))
4542 {
4543 /* # CPUs logically, starting from 1 (BSP is 0) */
4544 #if 0
4545 cpu_number_map[i] = cpucount;
4546 __cpu_logical_map[cpucount] = i;
4547 #endif
4548 printk("OK.\n");
4549 printk("CPU%d: ", i);
4550 print_cpu_info(&cpu_data[i]);
4551 }
4552 else
4553 {
4554 if (*((volatile unsigned char *)phys_to_virt(8192))
4555 == 0xA5)
4556 printk("Stuck ??\n");
4557 else
4558 printk("Not responding.\n");
4559 }
4560 SMP_PRINTK(("CPU has booted.\n"));
4561 }
4562 else
4563 {
4564 __cpu_logical_map[cpucount] = -1;
4565 cpu_number_map[i] = -1;
4566 cpucount--;
4567 }
4568
4569 swapper_pg_dir[0]=maincfg;
4570 local_flush_tlb();
4571
4572 /* mark "stuck" area as not stuck */
4573 *((volatile unsigned long *)phys_to_virt(8192)) = 0;
4574 }
4575
4576 cycles_t cacheflush_time;
4577 extern unsigned long cpu_hz;
4578
4579 static void smp_tune_scheduling (void)
4580 {
4581 unsigned long cachesize;
4582 /* Rough estimation for SMP scheduling, this is the
4583 * number of cycles it takes for a fully memory-limited
4584 * process to flush the SMP-local cache.
4585 *
4586 * (For a P5 this pretty much means we will choose
4587 * another idle CPU almost always at wakeup time (this
4588 * is due to the small L1 cache), on PIIs it's around
4589 * 50-100 usecs, depending on the cache size) */
4590
4591 if (!cpu_hz) {
4592 /* this basically disables processor-affinity
4593 * scheduling on SMP without a TSC. */
4594 cacheflush_time = 0;
4595 return;
4596 } else {
4597 cachesize = boot_cpu_data.x86_cache_size;
4598 if (cachesize == -1)
4599 cachesize = 8; /* Pentiums */
4600
4601 cacheflush_time = cpu_hz/1024*cachesize/5000;
4602 }
4603
4604 printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
4605 (long)cacheflush_time/(cpu_hz/1000000),
4606 ((long)cacheflush_time*100/(cpu_hz/1000000)) % 100);
4607 }
4608
4609 unsigned int prof_multiplier[NR_CPUS];
4610 unsigned int prof_counter[NR_CPUS];
4611
4612 /* Cycle through the processors, sending APIC IPIs to
4613 * boot each. */
4614 void __init smp_boot_cpus(void)
4615 {
4616 int i;
4617
4618 #ifdef CONFIG_MTRR
4619 /* Must be done before other processors booted */
4620 mtrr_init_boot_cpu ();
4621 #endif
4622 /* Initialize the logical to physical CPU number
4623 * mapping and the per-CPU profiling counter/multiplier
4624 */
4625
4626 for (i = 0; i < NR_CPUS; i++) {
4627 cpu_number_map[i] = -1;
4628 prof_counter[i] = 1;
4629 prof_multiplier[i] = 1;
4630 }
4631
4632 /* Setup boot CPU information */
4633
4634 /* Final full version of the data */
4635 smp_store_cpu_info(boot_cpu_id);
4636 smp_tune_scheduling();
4637 printk("CPU%d: ", boot_cpu_id);
4638 print_cpu_info(&cpu_data[boot_cpu_id]);
4639
4640 /* not necessary because the MP table should list the
4641 * boot CPU too, but we do it for the sake of
4642 * robustness anyway. (and for the case when a non-SMP
4643 * board boots an SMP kernel) */
4644 cpu_present_map |= (1 << hard_smp_processor_id());
4645
4646 cpu_number_map[boot_cpu_id] = 0;
4647
4648 /* If we couldnt find an SMP configuration at boot
4649 * time, get out of here now! */
4650 if (!smp_found_config)
4651 {
4652 printk(KERN_NOTICE "SMP motherboard not detected. "
4653 "Using dummy APIC emulation.\n");
4654 #ifndef CONFIG_VISWS
4655 io_apic_irqs = 0;
4656 #endif
4657 cpu_online_map = cpu_present_map;
4658 goto smp_done;
4659 }
4660
4661 /* If SMP should be disabled, really disable it! */
4662
4663 if (!max_cpus)
4664 {
4665 smp_found_config = 0;
4666 printk(KERN_INFO "SMP mode deactivated, forcing use "
4667 "of dummy APIC emulation.\n");
4668 }
4669
4670 #ifdef SMP_DEBUG
4671 {
4672 int reg;
4673
4674 /* This is to verify that we're looking at a real
4675 * local APIC. Check these against your board if the
4676 * CPUs aren't getting started for no apparent
4677 * reason. */
4678 reg = apic_read(APIC_VERSION);
4679 SMP_PRINTK(("Getting VERSION: %x\n", reg));
4680
4681 apic_write(APIC_VERSION, 0);
4682 reg = apic_read(APIC_VERSION);
4683 SMP_PRINTK(("Getting VERSION: %x\n", reg));
4684
4685 /* The two version reads above should print the same
4686 * NON-ZERO!!! numbers. If the second one is zero,
4687 * there is a problem with the APIC write/read
4688 * definitions.
4689 *
4690 * The next two are just to see if we have sane
4691 * values. They're only really relevant if we're in
4692 * Virtual Wire compatibility mode, but most boxes
4693 * are anymore. */
4694 reg = apic_read(APIC_LVT0);
4695 SMP_PRINTK(("Getting LVT0: %x\n", reg));
4696
4697 reg = apic_read(APIC_LVT1);
4698 SMP_PRINTK(("Getting LVT1: %x\n", reg));
4699 }
4700 #endif
4701
4702 enable_local_APIC();
4703
4704 /* Set up our local APIC timer: */
4705 setup_APIC_clock ();
4706
4707 /* Now scan the CPU present map and fire up the other
4708 * CPUs. */
4709
4710 /* Add all detected CPUs. (later on we can down
4711 * individual CPUs which will change cpu_online_map but
4712 * not necessarily cpu_present_map. We are pretty much
4713 * ready for hot-swap CPUs.) */
4714 cpu_online_map = cpu_present_map;
4715 mb();
4716
4717 SMP_PRINTK(("CPU map: %lx\n", cpu_present_map));
4718
4719 for(i=0;i<NR_CPUS;i++)
4720 {
4721 /* Don't even attempt to start the boot CPU! */
4722 if (i == boot_cpu_id)
4723 continue;
4724
4725 if ((cpu_online_map & (1 << i))
4726 && (max_cpus < 0 || max_cpus > cpucount+1))
4727 {
4728 do_boot_cpu(i);
4729 }
4730
4731 /* Make sure we unmap all failed CPUs */
4732
4733 if (cpu_number_map[i] == -1 &&
4734 (cpu_online_map & (1 << i))) {
4735 printk("CPU #%d not responding. "
4736 "Removing from cpu_online_map.\n", i);
4737 cpu_online_map &= ~(1 << i);
4738 }
4739 }
4740
4741 /* Cleanup possible dangling ends... */
4742 #ifndef CONFIG_VISWS
4743 {
4744 unsigned long cfg;
4745
4746 /* Install writable page 0 entry. */
4747 cfg = pg0[0];
4748 /* writeable, present, addr 0 */
4749 pg0[0] = _PAGE_RW | _PAGE_PRESENT;
4750 local_flush_tlb();
4751
4752 /* Paranoid: Set warm reset code and vector here back
4753 * to default values. */
4754 CMOS_WRITE(0, 0xf);
4755
4756 *((volatile long *) phys_to_virt(0x467)) = 0;
4757
4758 /* Restore old page 0 entry. */
4759 pg0[0] = cfg;
4760 local_flush_tlb();
4761 }
4762 #endif
4763
4764 /* Allow the user to impress friends. */
4765 SMP_PRINTK(("Before bogomips.\n"));
4766 if (cpucount==0)
4767 {
4768 printk(KERN_ERR
4769 "Error: only one processor found.\n");
4770 cpu_online_map = (1<<hard_smp_processor_id());
4771 }
4772 else
4773 {
4774 unsigned long bogosum=0;
4775 for(i=0;i<32;i++)
4776 {
4777 if (cpu_online_map&(1<<i))
4778 bogosum+=cpu_data[i].loops_per_sec;
4779 }
4780 printk(KERN_INFO "Total of %d processors activated "
4781 "(%lu.%02lu BogoMIPS).\n",
4782 cpucount+1,
4783 (bogosum+2500)/500000,
4784 ((bogosum+2500)/5000)%100);
4785 SMP_PRINTK(("Before bogocount - "
4786 "setting activated=1.\n"));
4787 smp_activated=1;
4788 smp_num_cpus=cpucount+1;
4789 }
4790 if (smp_b_stepping)
4791 printk(KERN_WARNING "WARNING: SMP operation may be "
4792 "unreliable with B stepping processors.\n");
4793 SMP_PRINTK(("Boot done.\n"));
4794
4795 cache_APIC_registers();
4796 #ifndef CONFIG_VISWS
4797 /* Here we can be sure that there is an IO-APIC in the
4798 * system. Let's go and set it up: */
4799 if (!skip_ioapic_setup)
4800 setup_IO_APIC();
4801 #endif
4802
4803 smp_done:
4804 }
4805
4806
4807 /* the following functions deal with sending IPIs between
4808 * CPUs.
4809 *
4810 * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.*/
4811
4812
4813 /* Silly serialization to work around CPU bug in P5s. We
4814 * can safely turn it off on a 686. */
4815 #ifdef CONFIG_X86_GOOD_APIC
4816 # define FORCE_APIC_SERIALIZATION 0
4817 #else
4818 # define FORCE_APIC_SERIALIZATION 1
4819 #endif
4820
4821 static unsigned int cached_APIC_ICR;
4822 static unsigned int cached_APIC_ICR2;
4823
4824 /* Caches reserved bits, APIC reads are (mildly)
4825 * expensive and force otherwise unnecessary CPU
4826 * synchronization. (We could cache other APIC registers
4827 * too, but these are the main ones used in RL.) */
4828 #define slow_ICR (apic_read(APIC_ICR) & ~0xFDFFF)
4829 #define slow_ICR2 (apic_read(APIC_ICR2) & 0x00FFFFFF)
4830
4831 void cache_APIC_registers (void)
4832 {
4833 cached_APIC_ICR = slow_ICR;
4834 cached_APIC_ICR2 = slow_ICR2;
4835 mb();
4836 }
4837
4838 static inline unsigned int __get_ICR (void)
4839 {
4840 #if FORCE_APIC_SERIALIZATION
4841 /* Wait for the APIC to become ready - this should
4842 * never occur. It's a debugging check really. */
4843 int count = 0;
4844 unsigned int cfg;
4845
4846 while (count < 1000)
4847 {
4848 cfg = slow_ICR;
4849 if (!(cfg&(1<<12))) {
4850 if (count)
4851 atomic_add(count, (atomic_t*)&ipi_count);
4852 return cfg;
4853 }
4854 count++;
4855 udelay(10);
4856 }
4857 printk("CPU #%d: previous IPI still not cleared "
4858 "after 10mS\n", smp_processor_id());
4859 return cfg;
4860 #else
4861 return cached_APIC_ICR;
4862 #endif
4863 }
4864
4865 static inline unsigned int __get_ICR2 (void)
4866 {
4867 #if FORCE_APIC_SERIALIZATION
4868 return slow_ICR2;
4869 #else
4870 return cached_APIC_ICR2;
4871 #endif
4872 }
4873
4874 static inline int __prepare_ICR (unsigned int shortcut,
4875 int vector)
4876 {
4877 unsigned int cfg;
4878
4879 cfg = __get_ICR();
4880 cfg |= APIC_DEST_DM_FIXED|shortcut|vector;
4881
4882 return cfg;
4883 }
4884
4885 static inline int __prepare_ICR2 (unsigned int dest)
4886 {
4887 unsigned int cfg;
4888
4889 cfg = __get_ICR2();
4890 cfg |= SET_APIC_DEST_FIELD(dest);
4891
4892 return cfg;
4893 }
4894
4895 static inline void
4896 __send_IPI_shortcut(unsigned int shortcut, int vector)
4897 {
4898 unsigned int cfg;
4899 /* Subtle. In the case of the 'never do double writes'
4900 * workaround we have to lock out interrupts to be
4901 * safe. Otherwise it's just one single atomic write to
4902 * the APIC, no need for cli/sti. */
4903 #if FORCE_APIC_SERIALIZATION
4904 unsigned long flags;
4905
4906 __save_flags(flags);
4907 __cli();
4908 #endif
4909
4910 /* No need to touch the target chip field */
4911
4912 cfg = __prepare_ICR(shortcut, vector);
4913
4914 /* Send the IPI. The write to APIC_ICR
4915 * fires this off. */
4916 apic_write(APIC_ICR, cfg);
4917 #if FORCE_APIC_SERIALIZATION
4918 __restore_flags(flags);
4919 #endif
4920 }
4921
4922 static inline void send_IPI_allbutself(int vector)
4923 {
4924 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
4925 }
4926
4927 static inline void send_IPI_all(int vector)
4928 {
4929 __send_IPI_shortcut(APIC_DEST_ALLINC, vector);
4930 }
4931
4932 void send_IPI_self(int vector)
4933 {
4934 __send_IPI_shortcut(APIC_DEST_SELF, vector);
4935 }
4936
4937 static inline void send_IPI_single(int dest, int vector)
4938 {
4939 unsigned long cfg;
4940 #if FORCE_APIC_SERIALIZATION
4941 unsigned long flags;
4942
4943 __save_flags(flags);
4944 __cli();
4945 #endif
4946
4947 /* prepare target chip field */
4948
4949 cfg = __prepare_ICR2(dest);
4950 apic_write(APIC_ICR2, cfg);
4951
4952 /* program the ICR*/
4953 cfg = __prepare_ICR(0, vector);
4954
4955 /* Send the IPI. The write to APIC_ICR fires this off.
4956 */
4957 apic_write(APIC_ICR, cfg);
4958 #if FORCE_APIC_SERIALIZATION
4959 __restore_flags(flags);
4960 #endif
4961 }
4962
4963 /* This is fraught with deadlocks. Probably the situation
4964 * is not that bad as in the early days of SMP, so we
4965 * might ease some of the paranoia here. */
4966
4967 void smp_flush_tlb(void)
4968 {
4969 int cpu = smp_processor_id();
4970 int stuck;
4971 unsigned long flags;
4972
4973 /* it's important that we do not generate any APIC
4974 * traffic until the AP CPUs have booted up! */
4975 if (cpu_online_map) {
4976 /* The assignment is safe because it's volatile so
4977 * the compiler cannot reorder it, because the i586
4978 * has strict memory ordering and because only the
4979 * kernel lock holder may issue a tlb flush. If you
4980 * break any one of those three change this to an
4981 * atomic bus locked or. */
4982
4983 smp_invalidate_needed = cpu_online_map;
4984
4985 /* Processors spinning on some lock with IRQs
4986 * disabled will see this IRQ late. The
4987 * smp_invalidate_needed map will ensure they don't
4988 * do a spurious flush tlb or miss one. */
4989
4990 __save_flags(flags);
4991 __cli();
4992
4993 send_IPI_allbutself(INVALIDATE_TLB_VECTOR);
4994
4995 /* Spin waiting for completion */
4996 stuck = 50000000;
4997 while (smp_invalidate_needed) {
4998 /* Take care of "crossing" invalidates */
4999 if (test_bit(cpu, &smp_invalidate_needed))
5000 clear_bit(cpu, &smp_invalidate_needed);
5001 --stuck;
5002 if (!stuck) {
5003 printk("stuck on TLB IPI wait (CPU#%d)\n",cpu);
5004 break;
5005 }
5006 }
5007 __restore_flags(flags);
5008 }
5009
5010 /* Flush the local TLB */
5011 local_flush_tlb();
5012 }
5013
5014
5015 /* this function sends a 'reschedule' IPI to another CPU.
5016 * it goes straight through and wastes no time
5017 * serializing anything. Worst case is that we lose a
5018 * reschedule ... */
5019 void smp_send_reschedule(int cpu)
5020 {
5021 send_IPI_single(cpu, RESCHEDULE_VECTOR);
5022 }
5023
5024 /* this function sends a 'stop' IPI to all other CPUs in
5025 * the system. it goes straight through. */
5026 void smp_send_stop(void)
5027 {
5028 send_IPI_allbutself(STOP_CPU_VECTOR);
5029 }
5030
5031 /* this function sends an 'reload MTRR state' IPI to all
5032 * other CPUs in the system. it goes straight through,
5033 * completion processing is done on the mttr.c level. */
5034 void smp_send_mtrr(void)
5035 {
5036 send_IPI_allbutself(MTRR_CHANGE_VECTOR);
5037 }
5038
5039 /* Local timer interrupt handler. It does both profiling
5040 * and process statistics/rescheduling.
5041 *
5042 * We do profiling in every local tick,
5043 * statistics/rescheduling happen only every 'profiling
5044 * multiplier' ticks. The default multiplier is 1 and it
5045 * can be changed by writing the new multiplier value
5046 * into /proc/profile. */
5047 void smp_local_timer_interrupt(struct pt_regs * regs)
5048 {
5049 int cpu = smp_processor_id();
5050
5051 /* The profiling function is SMP safe. (nothing can
5052 * mess around with "current", and the profiling
5053 * counters are updated with atomic operations). This
5054 * is especially useful with a profiling
5055 * multiplier != 1 */
5056 if (!user_mode(regs))
5057 x86_do_profile(regs->eip);
5058
5059 if (!--prof_counter[cpu]) {
5060 int user=0,system=0;
5061 struct task_struct * p = current;
5062
5063 /* After doing the above, we need to make like a
5064 * normal interrupt - otherwise timer interrupts
5065 * ignore the global interrupt lock, which is the
5066 * WrongThing (tm) to do. */
5067
5068 if (user_mode(regs))
5069 user=1;
5070 else
5071 system=1;
5072
5073 irq_enter(cpu, 0);
5074 if (p->pid) {
5075 update_one_process(p, 1, user, system, cpu);
5076
5077 p->counter -= 1;
5078 if (p->counter < 0) {
5079 p->counter = 0;
5080 p->need_resched = 1;
5081 }
5082 if (p->priority < DEF_PRIORITY) {
5083 kstat.cpu_nice += user;
5084 kstat.per_cpu_nice[cpu] += user;
5085 } else {
5086 kstat.cpu_user += user;
5087 kstat.per_cpu_user[cpu] += user;
5088 }
5089
5090 kstat.cpu_system += system;
5091 kstat.per_cpu_system[cpu] += system;
5092
5093 }
5094 prof_counter[cpu]=prof_multiplier[cpu];
5095 irq_exit(cpu, 0);
5096 }
5097
5098 /* We take the 'long' return path, and there every
5099 * subsystem grabs the apropriate locks (kernel lock/
5100 * irq lock).
5101 *
5102 * we might want to decouple profiling from the 'long
5103 * path', and do the profiling totally in assembly.
5104 *
5105 * Currently this isn't too much of an issue
5106 * (performance wise), we can take more than 100K local
5107 * irqs per second on a 100 MHz P5. */
5108 }
5109
5110 /* Local APIC timer interrupt. This is the most natural
5111 * way for doing local interrupts, but local timer
5112 * interrupts can be emulated by broadcast interrupts
5113 * too. [in case the hw doesnt support APIC timers]
5114 *
5115 * [ if a single-CPU system runs an SMP kernel then we
5116 * call the local interrupt as well. Thus we cannot
5117 * inline the local irq ... ] */
5118 void smp_apic_timer_interrupt(struct pt_regs * regs)
5119 {
5120 /* NOTE! We'd better ACK the irq immediately, because
5121 * timer handling can be slow, and we want to be able
5122 * to accept NMI tlb invalidates during this time. */
5123 ack_APIC_irq();
5124 smp_local_timer_interrupt(regs);
5125 }
5126
5127 /* Reschedule call back. Nothing to do, all the work is
5128 * done automatically when we return from the interrupt.
5129 */
5130 asmlinkage void smp_reschedule_interrupt(void)
5131 {
5132 ack_APIC_irq();
5133 }
5134
5135 /* Invalidate call-back */
5136 asmlinkage void smp_invalidate_interrupt(void)
5137 {
5138 if (test_and_clear_bit(smp_processor_id(),
5139 &smp_invalidate_needed))
5140 local_flush_tlb();
5141
5142 ack_APIC_irq();
5143 }
5144
5145 static void stop_this_cpu (void)
5146 {
5147 /* Remove this CPU: */
5148 clear_bit(smp_processor_id(), &cpu_online_map);
5149
5150 if (cpu_data[smp_processor_id()].hlt_works_ok)
5151 for(;;) __asm__("hlt");
5152 for (;;);
5153 }
5154
5155 /* CPU halt call-back */
5156 asmlinkage void smp_stop_cpu_interrupt(void)
5157 {
5158 stop_this_cpu();
5159 }
5160
5161 void (*mtrr_hook) (void) = NULL;
5162
5163 asmlinkage void smp_mtrr_interrupt(void)
5164 {
5165 ack_APIC_irq();
5166 if (mtrr_hook) (*mtrr_hook)();
5167 }
5168
5169 /* This interrupt should _never_ happen with our APIC/SMP
5170 * architecture */
5171 asmlinkage void smp_spurious_interrupt(void)
5172 {
5173 ack_APIC_irq();
5174 /* see sw-dev-man vol 3, chapter 7.4.13.5 */
5175 printk("spurious APIC interrupt on CPU#%d, "
5176 "should never happen.\n", smp_processor_id());
5177 }
5178
5179 /* This part sets up the APIC 32 bit clock in LVTT1, with
5180 * HZ interrupts per second. We assume that the caller
5181 * has already set up the local APIC.
5182 *
5183 * The APIC timer is not exactly sync with the external
5184 * timer chip, it closely follows bus clocks. */
5185
5186 /* The timer chip is already set up at HZ interrupts per
5187 * second here, but we do not accept timer interrupts
5188 * yet. We only allow the BP to calibrate. */
5189 static unsigned int __init get_8254_timer_count(void)
5190 {
5191 unsigned int count;
5192
5193 outb_p(0x00, 0x43);
5194 count = inb_p(0x40);
5195 count |= inb_p(0x40) << 8;
5196
5197 return count;
5198 }
5199
5200 /* This function sets up the local APIC timer, with a
5201 * timeout of 'clocks' APIC bus clock. During calibration
5202 * we actually call this function twice, once with a
5203 * bogus timeout value, second time for real. The other
5204 * (noncalibrating) CPUs call this function only once,
5205 * with the real value.
5206 *
5207 * We are strictly in irqs off mode here, as we do not
5208 * want to get an APIC interrupt go off accidentally.
5209 *
5210 * We do reads before writes even if unnecessary, to get
5211 * around the APIC double write bug. */
5212 #define APIC_DIVISOR 16
5213
5214 void setup_APIC_timer(unsigned int clocks)
5215 {
5216 unsigned long lvtt1_value;
5217 unsigned int tmp_value;
5218
5219 /* Unfortunately the local APIC timer cannot be set up
5220 * into NMI mode. With the IO APIC we can re-route the
5221 * external timer interrupt and broadcast it as an NMI
5222 * to all CPUs, so no pain. */
5223 tmp_value = apic_read(APIC_LVTT);
5224 lvtt1_value = APIC_LVT_TIMER_PERIODIC |
5225 LOCAL_TIMER_VECTOR;
5226 apic_write(APIC_LVTT , lvtt1_value);
5227
5228 /* Divide PICLK by 16 */
5229 tmp_value = apic_read(APIC_TDCR);
5230 apic_write(APIC_TDCR , (tmp_value & ~APIC_TDR_DIV_1 )
5231 | APIC_TDR_DIV_16);
5232
5233 tmp_value = apic_read(APIC_TMICT);
5234 apic_write(APIC_TMICT, clocks/APIC_DIVISOR);
5235 }
5236
5237 void __init wait_8254_wraparound(void)
5238 {
5239 unsigned int curr_count, prev_count=~0;
5240 int delta;
5241
5242 curr_count = get_8254_timer_count();
5243
5244 do {
5245 prev_count = curr_count;
5246 curr_count = get_8254_timer_count();
5247 delta = curr_count-prev_count;
5248
5249 /* This limit for delta seems arbitrary, but it
5250 * isn't, it's slightly above the level of error a
5251 * buggy Mercury/Neptune chipset timer can cause. */
5252 } while (delta<300);
5253 }
5254
5255 /* In this function we calibrate APIC bus clocks to the
5256 * external timer. Unfortunately we cannot use jiffies
5257 * and the timer irq to calibrate, since some later
5258 * bootup code depends on getting the first irq? Ugh.
5259 *
5260 * We want to do the calibration only once since we want
5261 * to have local timer irqs syncron. CPUs connected by
5262 * the same APIC bus have the very same bus frequency.
5263 * And we want to have irqs off anyways, no accidental
5264 * APIC irq that way. */
5265
5266 int __init calibrate_APIC_clock(void)
5267 {
5268 unsigned long long t1,t2;
5269 long tt1,tt2;
5270 long calibration_result;
5271 int i;
5272
5273 printk("calibrating APIC timer ... ");
5274
5275 /* Put whatever arbitrary (but long enough) timeout
5276 * value into the APIC clock, we just want to get the
5277 * counter running for calibration. */
5278 setup_APIC_timer(1000000000);
5279
5280 /* The timer chip counts down to zero. Let's wait for a
5281 * wraparound to start exact measurement: (the current
5282 * tick might have been already half done) */
5283
5284 wait_8254_wraparound ();
5285
5286 /* We wrapped around just now. Let's start: */
5287 READ_TSC(t1);
5288 tt1=apic_read(APIC_TMCCT);
5289
5290 #define LOOPS (HZ/10)
5291 /* Let's wait LOOPS wraprounds: */
5292 for (i=0; i<LOOPS; i++)
5293 wait_8254_wraparound ();
5294
5295 tt2=apic_read(APIC_TMCCT);
5296 READ_TSC(t2);
5297
5298 /* The APIC bus clock counter is 32 bits only, it might
5299 * have overflown, but note that we use signed longs,
5300 * thus no extra care needed.
5301 *
5302 * underflown to be exact, as the timer counts down ;)
5303 */
5304
5305 calibration_result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
5306
5307 SMP_PRINTK(("\n..... %ld CPU clocks in 1 timer chip "
5308 "tick.", (unsigned long)(t2-t1)/LOOPS));
5309
5310 SMP_PRINTK(("\n..... %ld APIC bus clocks in 1 timer "
5311 "chip tick.", calibration_result));
5312
5313 printk("\n..... CPU clock speed is %ld.%04ld MHz.\n",
5314 ((long)(t2-t1)/LOOPS)/(1000000/HZ),
5315 ((long)(t2-t1)/LOOPS)%(1000000/HZ));
5316
5317 printk("..... system bus clock speed is %ld.%04ld "
5318 "MHz.\n",
5319 calibration_result/(1000000/HZ),
5320 calibration_result%(1000000/HZ) );
5321 #undef LOOPS
5322
5323 return calibration_result;
5324 }
5325
5326 static unsigned int calibration_result;
5327
5328 void __init setup_APIC_clock(void)
5329 {
5330 unsigned long flags;
5331
5332 static volatile int calibration_lock;
5333
5334 __save_flags(flags);
5335 __cli();
5336
5337 SMP_PRINTK(("setup_APIC_clock() called.\n"));
5338
5339 /* [ setup_APIC_clock() is called from all CPUs, but we
5340 * want to do this part of the setup only once ... and
5341 * it fits here best ] */
5342 if (!test_and_set_bit(0,&calibration_lock)) {
5343
5344 calibration_result=calibrate_APIC_clock();
5345 /* Signal completion to the other CPU[s]: */
5346 calibration_lock = 3;
5347
5348 } else {
5349 /* Other CPU is calibrating, wait for finish: */
5350 SMP_PRINTK(("waiting for other CPU "
5351 "calibrating APIC ... "));
5352 while (calibration_lock == 1);
5353 SMP_PRINTK(("done, continuing.\n"));
5354 }
5355
5356 /* Now set up the timer for real. */
5357 setup_APIC_timer (calibration_result);
5358
5359 /* We ACK the APIC, just in case there is something
5360 * pending. */
5361 ack_APIC_irq ();
5362
5363 __restore_flags(flags);
5364 }
5365
5366 /* the frequency of the profiling timer can be changed by
5367 * writing a multiplier value into /proc/profile.
5368 *
5369 * usually you want to run this on all CPUs ;) */
5370 int setup_profiling_timer(unsigned int multiplier)
5371 {
5372 int cpu = smp_processor_id();
5373 unsigned long flags;
5374
5375 /* Sanity check. [at least 500 APIC cycles should be
5376 * between APIC interrupts as a rule of thumb, to avoid
5377 * irqs flooding us] */
5378 if ( (!multiplier) ||
5379 (calibration_result/multiplier < 500))
5380 return -EINVAL;
5381
5382 save_flags(flags);
5383 cli();
5384 setup_APIC_timer(calibration_result/multiplier);
5385 prof_multiplier[cpu]=multiplier;
5386 restore_flags(flags);
5387
5388 return 0;
5389 }
5390
5391 #undef APIC_DIVISOR
5392
Сайт управляется системой
uCoz