netlib.narod.ru< Назад | Оглавление | Далее >

arch/i386/kernel/smp.c

 3492 /*
 3493  *      Intel MP v1.1/v1.4 specification support routines
 3494  *      for multi-pentium hosts.
 3495  *
 3496  *      (c) 1995 Alan Cox, CymruNET Ltd  <alan@cymru.net>
 3497  *      (c) 1998 Ingo Molnar
 3498  *
 3499  *      Supported by Caldera http://www.caldera.com.
 3500  *      Much of the core SMP work is based on previous
 3501  *      work by Thomas Radke, to whom a great many thanks
 3502  *      are extended.
 3503  *
 3504  *      Thanks to Intel for making available several
 3505  *      different Pentium, Pentium Pro and
 3506  *      Pentium-II/Xeon MP machines.
 3507  *
 3508  *      This code is released under the GNU public
 3509  *      license version 2 or later.
 3510  *
 3511  *      Fixes
 3512  * Felix Koop       : NR_CPUS used properly
 3513  * Jose Renau       : Handle single CPU case.
 3514  * Alan Cox         : By repeated request 8) -
 3515  *                    Total BogoMIP report.
 3516  * Greg Wright      : Fix for kernel stacks panic.
 3517  * Erich Boleyn     : MP v1.4 and additional changes.
 3518  * Matthias Sattler : Changes for 2.1 kernel map.
 3519  * Michel Lespinasse: Changes for 2.1 kernel map.
 3520  * Michael Chastain : Change trampoline.S to gnu as.
 3521  * Alan Cox         : Dumb bug: 'B' step PPro's are fine
 3522  * Ingo Molnar      : Added APIC timers, based on code
 3523  *                    from Jose Renau
 3524  * Alan Cox         : Added EBDA scanning
 3525  * Ingo Molnar      : various cleanups and rewrites */
 3526 
 3527 #include <linux/config.h>
 3528 #include <linux/mm.h>
 3529 #include <linux/kernel_stat.h>
 3530 #include <linux/delay.h>
 3531 #include <linux/mc146818rtc.h>
 3532 #include <linux/smp_lock.h>
 3533 #include <linux/init.h>
 3534 #include <asm/mtrr.h>
 3535 
 3536 #include "irq.h"
 3537 
 3538 extern unsigned long start_kernel;
 3539 extern void update_one_process( struct task_struct *p,
 3540         unsigned long ticks, unsigned long user,
 3541         unsigned long system, int cpu);
 3542 /*      Some notes on processor bugs:
 3543  *
 3544  *      Pentium and Pentium Pro (and all CPUs) have
 3545  *      bugs. The Linux issues for SMP are handled as
 3546  *      follows.
 3547  *
 3548  * Pentium Pro:
 3549  * Occasional delivery of 'spurious interrupt' as trap
 3550  * #16. This is very rare. The kernel logs the event and
 3551  * recovers
 3552  *
 3553  * Pentium:
 3554  * There is a marginal case where REP MOVS on 100MHz SMP
 3555  * machines with B stepping processors can fail. XXX
 3556  * should provide an L1cache=Writethrough or L1cache=off
 3557  * option.
 3558  *
 3559  * B stepping CPUs may hang. There are hardware work
 3560  * arounds for this. We warn about it in case your board
 3561  * doesnt have the work arounds. Basically thats so I can
 3562  * tell anyone with a B stepping CPU and SMP problems
 3563  * "tough".
 3564  *
 3565  *      Specific items [From Pentium Processor
 3566  *      Specification Update]
 3567  *
 3568  *      1AP.  Linux doesn't use remote read
 3569  *      2AP.  Linux doesn't trust APIC errors
 3570  *      3AP.  We work around this
 3571  *      4AP.  Linux never generated 3 interrupts of the
 3572  *            same pri to cause a lost local interrupt.
 3573  *      5AP.  Remote read is never used
 3574  *      9AP.  XXX NEED TO CHECK WE HANDLE THIS XXX
 3575  *      10AP. XXX NEED TO CHECK WE HANDLE THIS XXX
 3576  *      11AP. Linux reads the APIC between writes to
 3577  *            avoid this, as per the documentation. Make
 3578  *            sure you preserve this as it affects the C
 3579  *            stepping chips too.
 3580  *
 3581  *      If this sounds worrying believe me these bugs are
 3582  *      ___RARE___ and there's about nothing of note with
 3583  *      C stepping upwards.  */
 3584 
 3585 
 3586 /* Kernel spinlock */
 3587 spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED;
 3588 
 3589 /* function prototypes: */
 3590 
 3591 static void cache_APIC_registers (void);
 3592 static void stop_this_cpu (void);
 3593 
 3594 /* Set if we find a B stepping CPU */
 3595 static int smp_b_stepping = 0;
 3596 
 3597 /* Setup configured maximum number of CPUs to activate */
 3598 static int max_cpus = -1;
 3599 /* Have we found an SMP box */
 3600 int smp_found_config=0;
 3601 
 3602 /* Bitmask of physically existing CPUs */
 3603 unsigned long cpu_present_map = 0;
 3604 /* Bitmask of currently online CPUs */
 3605 unsigned long cpu_online_map = 0;
 3606 /* Total count of live CPUs */
 3607 int smp_num_cpus = 1;
 3608 /* Set when the idlers are all forked */
 3609 int smp_threads_ready=0;
 3610 /* which CPU maps to which logical number */
 3611 volatile int cpu_number_map[NR_CPUS];
 3612 /* which logical number maps to which CPU */
 3613 volatile int __cpu_logical_map[NR_CPUS];
 3614 /* We always use 0 the rest is ready for parallel
 3615  * delivery */
 3616 static volatile
 3617 unsigned long cpu_callin_map[NR_CPUS] = {0,};
 3618 /* We always use 0 the rest is ready for parallel
 3619  * delivery */
 3620 static volatile
 3621 unsigned long cpu_callout_map[NR_CPUS] = {0,};
 3622 /* Used for the invalidate map that's also checked in the
 3623  * spinlock */
 3624 volatile unsigned long smp_invalidate_needed;
 3625 /* Stack vector for booting CPUs */
 3626 volatile unsigned long kstack_ptr;
 3627 /* Per CPU bogomips and other parameters */
 3628 struct cpuinfo_x86 cpu_data[NR_CPUS];
 3629 /* Internal processor count */
 3630 static unsigned int num_processors = 1;
 3631 /* Address of the I/O apic (not yet used) */
 3632 unsigned long mp_ioapic_addr = 0xFEC00000;
 3633 /* Processor that is doing the boot up */
 3634 unsigned char boot_cpu_id = 0;
 3635 /* Tripped once we need to start cross invalidating */
 3636 static int smp_activated = 0;
 3637 /* APIC version number */
 3638 int apic_version[NR_CPUS];
 3639 /* Just debugging the assembler.. */
 3640 unsigned long apic_retval;
 3641 
 3642 /* Number of times the processor holds the lock */
 3643 volatile unsigned long kernel_counter=0;
 3644 /* Number of times the processor holds the syscall lock*/
 3645 volatile unsigned long syscall_count=0;
 3646 
 3647 /* Number of IPIs delivered */
 3648 volatile unsigned long ipi_count;
 3649 
 3650 const char lk_lockmsg[] =
 3651   "lock from interrupt context at %p\n";
 3652 
 3653 int mp_bus_id_to_type [MAX_MP_BUSSES] = { -1, };
 3654 extern int mp_irq_entries;
 3655 extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
 3656 extern int mpc_default_type;
 3657 int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { -1, };
 3658 int mp_current_pci_id = 0;
 3659 unsigned long mp_lapic_addr = 0;
 3660 /* 1 if "noapic" boot option passed */
 3661 int skip_ioapic_setup = 0;
 3662 
 3663 /* #define SMP_DEBUG */
 3664 
 3665 #ifdef SMP_DEBUG
 3666 #define SMP_PRINTK(x)   printk x
 3667 #else
 3668 #define SMP_PRINTK(x)
 3669 #endif
 3670 
 3671 /* IA s/w dev Vol 3, Section 7.4 */
 3672 #define APIC_DEFAULT_PHYS_BASE 0xfee00000
 3673 
 3674 /* Reads and clears the Pentium Timestamp-Counter */
 3675 #define READ_TSC(x)     __asm__ __volatile__ (  "rdtsc" \
 3676         :"=a" (((unsigned long*)&(x))[0]),              \
 3677          "=d" (((unsigned long*)&(x))[1]))
 3678 
 3679 #define CLEAR_TSC                                       \
 3680   __asm__ __volatile__ ("\t.byte 0x0f, 0x30;\n"::       \
 3681     "a"(0x00001000), "d"(0x00001000), "c"(0x10):"memory")
 3682 
 3683 /*      Setup routine for controlling SMP activation
 3684  *
 3685  *      Command-line option of "nosmp" or "maxcpus=0"
 3686  *      will disable SMP activation entirely (the MPS
 3687  *      table probe still happens, though).
 3688  *
 3689  *      Command-line option of "maxcpus=<NUM>", where
 3690  *      <NUM> is an integer greater than 0, limits the
 3691  *      maximum number of CPUs activated in SMP mode to
 3692  *      <NUM>.  */
 3693 
 3694 void __init smp_setup(char *str, int *ints)
 3695 {
 3696   if (ints && ints[0] > 0)
 3697     max_cpus = ints[1];
 3698   else
 3699     max_cpus = 0;
 3700 }
 3701 
 3702 void ack_APIC_irq(void)
 3703 {
 3704   /* Clear the IPI */
 3705 
 3706   /* Dummy read */
 3707   apic_read(APIC_SPIV);
 3708 
 3709   /* Docs say use 0 for future compatibility */
 3710   apic_write(APIC_EOI, 0);
 3711 }
 3712 
 3713 /* Intel MP BIOS table parsing routines: */
 3714 
 3715 #ifndef CONFIG_X86_VISWS_APIC
 3716 /* Checksum an MP configuration block. */
 3717 
 3718 static int mpf_checksum(unsigned char *mp, int len)
 3719 {
 3720   int sum=0;
 3721   while(len--)
 3722     sum+=*mp++;
 3723   return sum&0xFF;
 3724 }
 3725 
 3726 /* Processor encoding in an MP configuration block */
 3727 
 3728 static char *mpc_family(int family,int model)
 3729 {
 3730   static char n[32];
 3731   static char *model_defs[]=
 3732   {
 3733     "80486DX","80486DX",
 3734     "80486SX","80486DX/2 or 80487",
 3735     "80486SL","Intel5X2(tm)",
 3736     "Unknown","Unknown",
 3737     "80486DX/4"
 3738   };
 3739   if (family==0x6)
 3740     return("Pentium(tm) Pro");
 3741   if (family==0x5)
 3742     return("Pentium(tm)");
 3743   if (family==0x0F && model==0x0F)
 3744     return("Special controller");
 3745   if (family==0x04 && model<9)
 3746     return model_defs[model];
 3747   sprintf(n,"Unknown CPU [%d:%d]",family, model);
 3748   return n;
 3749 }
 3750 
 3751 /* Read the MPC */
 3752 
 3753 static int __init
 3754 smp_read_mpc(struct mp_config_table *mpc)
 3755 {
 3756   char str[16];
 3757   int count=sizeof(*mpc);
 3758   int ioapics = 0;
 3759   unsigned char *mpt=((unsigned char *)mpc)+count;
 3760 
 3761   if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4))
 3762   {
 3763     panic("SMP mptable: bad signature [%c%c%c%c]!\n",
 3764       mpc->mpc_signature[0],
 3765       mpc->mpc_signature[1],
 3766       mpc->mpc_signature[2],
 3767       mpc->mpc_signature[3]);
 3768     return 1;
 3769   }
 3770   if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length))
 3771   {
 3772     panic("SMP mptable: checksum error!\n");
 3773     return 1;
 3774   }
 3775   if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04)
 3776   {
 3777     printk("Bad Config Table version (%d)!!\n",
 3778            mpc->mpc_spec);
 3779     return 1;
 3780   }
 3781   memcpy(str,mpc->mpc_oem,8);
 3782   str[8]=0;
 3783   memcpy(ioapic_OEM_ID,str,9);
 3784   printk("OEM ID: %s ",str);
 3785 
 3786   memcpy(str,mpc->mpc_productid,12);
 3787   str[12]=0;
 3788   memcpy(ioapic_Product_ID,str,13);
 3789   printk("Product ID: %s ",str);
 3790 
 3791   printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
 3792 
 3793   /* save the local APIC address, it might be
 3794    * non-default */
 3795   mp_lapic_addr = mpc->mpc_lapic;
 3796 
 3797   /* Now process the configuration blocks. */
 3798 
 3799   while(count<mpc->mpc_length)
 3800   {
 3801     switch(*mpt)
 3802     {
 3803       case MP_PROCESSOR:
 3804       {
 3805         struct mpc_config_processor *m=
 3806           (struct mpc_config_processor *)mpt;
 3807         if (m->mpc_cpuflag&CPU_ENABLED)
 3808         {
 3809           printk("Processor #%d %s APIC version %d\n",
 3810             m->mpc_apicid,
 3811             mpc_family((m->mpc_cpufeature&
 3812               CPU_FAMILY_MASK)>>8,
 3813               (m->mpc_cpufeature&
 3814                 CPU_MODEL_MASK)>>4),
 3815             m->mpc_apicver);
 3816 #ifdef SMP_DEBUG
 3817           if (m->mpc_featureflag&(1<<0))
 3818             printk("    Floating point unit present.\n");
 3819           if (m->mpc_featureflag&(1<<7))
 3820             printk("    Machine Exception supported.\n");
 3821           if (m->mpc_featureflag&(1<<8))
 3822             printk("    64 bit compare & exchange "
 3823                    "supported.\n");
 3824           if (m->mpc_featureflag&(1<<9))
 3825             printk("    Internal APIC present.\n");
 3826 #endif
 3827           if (m->mpc_cpuflag&CPU_BOOTPROCESSOR)
 3828           {
 3829             SMP_PRINTK(("    Bootup CPU\n"));
 3830             boot_cpu_id=m->mpc_apicid;
 3831           }
 3832           else    /* Boot CPU already counted */
 3833             num_processors++;
 3834 
 3835           if (m->mpc_apicid>NR_CPUS)
 3836             printk("Processor #%d unused. (Max %d "
 3837                 "processors).\n",m->mpc_apicid, NR_CPUS);
 3838           else
 3839           {
 3840             int ver = m->mpc_apicver;
 3841 
 3842             cpu_present_map|=(1<<m->mpc_apicid);
 3843             /* Validate version */
 3844             if (ver == 0x0) {
 3845               printk("BIOS bug, APIC version is 0 for "
 3846                      "CPU#%d! fixing up to 0x10. (tell "
 3847                      "your hw vendor)\n", m->mpc_apicid);
 3848               ver = 0x10;
 3849             }
 3850             apic_version[m->mpc_apicid] = ver;
 3851           }
 3852         }
 3853         mpt+=sizeof(*m);
 3854         count+=sizeof(*m);
 3855         break;
 3856       }
 3857       case MP_BUS:
 3858       {
 3859         struct mpc_config_bus *m=
 3860           (struct mpc_config_bus *)mpt;
 3861         memcpy(str,m->mpc_bustype,6);
 3862         str[6]=0;
 3863         SMP_PRINTK(("Bus #%d is %s\n",
 3864           m->mpc_busid,
 3865           str));
 3866         if ((strncmp(m->mpc_bustype,"ISA",3) == 0) ||
 3867           (strncmp(m->mpc_bustype,"EISA",4) == 0))
 3868           mp_bus_id_to_type[m->mpc_busid] =
 3869             MP_BUS_ISA;
 3870         else
 3871         if (strncmp(m->mpc_bustype,"PCI",3) == 0) {
 3872           mp_bus_id_to_type[m->mpc_busid] =
 3873             MP_BUS_PCI;
 3874           mp_bus_id_to_pci_bus[m->mpc_busid] =
 3875             mp_current_pci_id;
 3876           mp_current_pci_id++;
 3877         }
 3878         mpt+=sizeof(*m);
 3879         count+=sizeof(*m);
 3880         break;
 3881       }
 3882       case MP_IOAPIC:
 3883       {
 3884         struct mpc_config_ioapic *m=
 3885           (struct mpc_config_ioapic *)mpt;
 3886         if (m->mpc_flags&MPC_APIC_USABLE)
 3887         {
 3888           ioapics++;
 3889           printk("I/O APIC #%d Version %d at 0x%lX.\n",
 3890             m->mpc_apicid,m->mpc_apicver,
 3891             m->mpc_apicaddr);
 3892           /* we use the first one only currently */
 3893           if (ioapics == 1)
 3894             mp_ioapic_addr = m->mpc_apicaddr;
 3895         }
 3896         mpt+=sizeof(*m);
 3897         count+=sizeof(*m);
 3898         break;
 3899       }
 3900       case MP_INTSRC:
 3901       {
 3902         struct mpc_config_intsrc *m=
 3903           (struct mpc_config_intsrc *)mpt;
 3904 
 3905         mp_irqs [mp_irq_entries] = *m;
 3906         if (++mp_irq_entries == MAX_IRQ_SOURCES) {
 3907           printk("Max irq sources exceeded!!\n");
 3908           printk("Skipping remaining sources.\n");
 3909           --mp_irq_entries;
 3910         }
 3911 
 3912         mpt+=sizeof(*m);
 3913         count+=sizeof(*m);
 3914         break;
 3915       }
 3916       case MP_LINTSRC:
 3917       {
 3918         struct mpc_config_intlocal *m=
 3919           (struct mpc_config_intlocal *)mpt;
 3920         mpt+=sizeof(*m);
 3921         count+=sizeof(*m);
 3922         break;
 3923       }
 3924     }
 3925   }
 3926   if (ioapics > 1)
 3927   {
 3928     printk("Warning: "
 3929            "Multiple IO-APICs not yet supported.\n");
 3930     printk("Warning: switching to non APIC mode.\n");
 3931     skip_ioapic_setup=1;
 3932   }
 3933   return num_processors;
 3934 }
 3935 
 3936 /* Scan the memory blocks for an SMP configuration block.
 3937  */
 3938 
 3939 static int __init smp_scan_config(unsigned long base,
 3940                                   unsigned long length)
 3941 {
 3942   unsigned long *bp=phys_to_virt(base);
 3943   struct intel_mp_floating *mpf;
 3944 
 3945   SMP_PRINTK(("Scan SMP from %p for %ld bytes.\n",
 3946     bp,length));
 3947   if (sizeof(*mpf)!=16)
 3948     printk("Error: MPF size\n");
 3949 
 3950   while (length>0)
 3951   {
 3952     if (*bp==SMP_MAGIC_IDENT)
 3953     {
 3954       mpf=(struct intel_mp_floating *)bp;
 3955       if (mpf->mpf_length==1 &&
 3956         !mpf_checksum((unsigned char *)bp,16) &&
 3957         (mpf->mpf_specification == 1
 3958          || mpf->mpf_specification == 4) )
 3959       {
 3960         printk("Intel MultiProcessor Specification "
 3961                "v1.%d\n", mpf->mpf_specification);
 3962         if (mpf->mpf_feature2&(1<<7))
 3963           printk("    IMCR and PIC "
 3964                  "compatibility mode.\n");
 3965         else
 3966           printk("    Virtual Wire "
 3967                  "compatibility mode.\n");
 3968         smp_found_config=1;
 3969         /* Now see if we need to read further. */
 3970         if (mpf->mpf_feature1!=0)
 3971         {
 3972           unsigned long cfg;
 3973 
 3974           /* local APIC has default address */
 3975           mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
 3976           /* We need to know what the local APIC id of
 3977            * the boot CPU is!  */
 3978 
 3979 /*      HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
 3980  *      It's not just a crazy hack.  ;-) */
 3981 
 3982           /* Standard page mapping functions don't work
 3983            * yet.  We know that page 0 is not used.
 3984            * Steal it for now!  */
 3985 
 3986           cfg=pg0[0];
 3987           pg0[0] = (mp_lapic_addr |
 3988                     _PAGE_RW | _PAGE_PRESENT);
 3989           local_flush_tlb();
 3990 
 3991           boot_cpu_id =
 3992             GET_APIC_ID(*((volatile unsigned long *)
 3993                           APIC_ID));
 3994 
 3995           /* Give it back */
 3996           pg0[0]= cfg;
 3997           local_flush_tlb();
 3998 
 3999 /*
 4000  * END OF HACK   END OF HACK   END OF HACK   END OF HACK
 4001  */
 4002           /* 2 CPUs, numbered 0 & 1. */
 4003           cpu_present_map=3;
 4004           num_processors=2;
 4005           printk("I/O APIC at 0xFEC00000.\n");
 4006 
 4007           /* Save the default type number, we need it
 4008            * later to set the IO-APIC up properly: */
 4009           mpc_default_type = mpf->mpf_feature1;
 4010 
 4011           printk("Bus #0 is ");
 4012         }
 4013         switch(mpf->mpf_feature1)
 4014         {
 4015           case 1:
 4016           case 5:
 4017             printk("ISA\n");
 4018             break;
 4019           case 2:
 4020             printk("EISA with no IRQ8 chaining\n");
 4021             break;
 4022           case 6:
 4023           case 3:
 4024             printk("EISA\n");
 4025             break;
 4026           case 4:
 4027           case 7:
 4028             printk("MCA\n");
 4029             break;
 4030           case 0:
 4031             break;
 4032           default:
 4033             printk("???\nUnknown standard configuration "
 4034                    "%d\n", mpf->mpf_feature1);
 4035             return 1;
 4036         }
 4037         if (mpf->mpf_feature1>4)
 4038         {
 4039           printk("Bus #1 is PCI\n");
 4040 
 4041           /* Set local APIC version to the integrated
 4042            * form.  It's initialized to zero otherwise,
 4043            * representing a discrete 82489DX.  */
 4044           apic_version[0] = 0x10;
 4045           apic_version[1] = 0x10;
 4046         }
 4047         /* Read the physical hardware table.  Anything
 4048          * here will override the defaults.  */
 4049         if (mpf->mpf_physptr)
 4050           smp_read_mpc((void *)mpf->mpf_physptr);
 4051 
 4052         __cpu_logical_map[0] = boot_cpu_id;
 4053         global_irq_holder = boot_cpu_id;
 4054         current->processor = boot_cpu_id;
 4055 
 4056         printk("Processors: %d\n", num_processors);
 4057         /* Only use the first configuration found. */
 4058         return 1;
 4059       }
 4060     }
 4061     bp+=4;
 4062     length-=16;
 4063   }
 4064 
 4065   return 0;
 4066 }
 4067 
 4068 void __init init_intel_smp (void)
 4069 {
 4070   /* FIXME: Linux assumes you have 640K of base ram..
 4071    * this continues the error...
 4072    *
 4073    * 1) Scan the bottom 1K for a signature
 4074    * 2) Scan the top 1K of base RAM
 4075    * 3) Scan the 64K of bios */
 4076   if (!smp_scan_config(0x0,0x400) &&
 4077       !smp_scan_config(639*0x400,0x400) &&
 4078       !smp_scan_config(0xF0000,0x10000)) {
 4079     /* If it is an SMP machine we should know now, unless
 4080      * the configuration is in an EISA/MCA bus machine
 4081      * with an extended bios data area.
 4082      *
 4083      * there is a real-mode segmented pointer pointing to
 4084      * the 4K EBDA area at 0x40E, calculate and scan it
 4085      * here.
 4086      *
 4087      * NOTE! There are Linux loaders that will corrupt
 4088      * the EBDA area, and as such this kind of SMP config
 4089      * may be less trustworthy, simply because the SMP
 4090      * table may have been stomped on during early
 4091      * boot. These loaders are buggy and should be fixed.
 4092      */
 4093     unsigned int address;
 4094 
 4095     address = *(unsigned short *)phys_to_virt(0x40E);
 4096     address<<=4;
 4097     smp_scan_config(address, 0x1000);
 4098     if (smp_found_config)
 4099       printk(KERN_WARNING "WARNING: MP table in the EBDA"
 4100         " can be UNSAFE, contact linux-smp@vger.rutgers."
 4101         "edu if you experience SMP problems!\n");
 4102   }
 4103 }
 4104 
 4105 #else
 4106 
 4107 /* The Visual Workstation is Intel MP compliant in the
 4108  * hardware sense, but it doesnt have a
 4109  * BIOS(-configuration table).  No problem for Linux.  */
 4110 void __init init_visws_smp(void)
 4111 {
 4112   smp_found_config = 1;
 4113 
 4114   cpu_present_map |= 2; /* or in id 1 */
 4115   apic_version[1] |= 0x10; /* integrated APIC */
 4116   apic_version[0] |= 0x10;
 4117 
 4118   mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
 4119 }
 4120 
 4121 #endif
 4122 
 4123 /* - Intel MP Configuration Table
 4124  * - or SGI Visual Workstation configuration */
 4125 void __init init_smp_config (void)
 4126 {
 4127 #ifndef CONFIG_VISWS
 4128   init_intel_smp();
 4129 #else
 4130   init_visws_smp();
 4131 #endif
 4132 }
 4133 
 4134 /* Trampoline 80x86 program as an array. */
 4135 
 4136 extern unsigned char trampoline_data [];
 4137 extern unsigned char trampoline_end  [];
 4138 static unsigned char *trampoline_base;
 4139 
 4140 /* Currently trivial. Write the real->protected mode
 4141  * bootstrap into the page concerned. The caller has made
 4142  * sure it's suitably aligned.  */
 4143 
 4144 static unsigned long __init setup_trampoline(void)
 4145 {
 4146   memcpy(trampoline_base, trampoline_data,
 4147          trampoline_end - trampoline_data);
 4148   return virt_to_phys(trampoline_base);
 4149 }
 4150 
 4151 /* We are called very early to get the low memory for the
 4152  * SMP bootup trampoline page.  */
 4153 unsigned long __init
 4154 smp_alloc_memory(unsigned long mem_base)
 4155 {
 4156   if (virt_to_phys((void *)mem_base) >= 0x9F000)
 4157     panic("smp_alloc_memory: Insufficient low memory for"
 4158           " kernel trampoline 0x%lx.", mem_base);
 4159   trampoline_base = (void *)mem_base;
 4160   return mem_base + PAGE_SIZE;
 4161 }
 4162 
 4163 /* The bootstrap kernel entry code has set these up. Save
 4164  * them for a given CPU */
 4165 void __init smp_store_cpu_info(int id)
 4166 {
 4167   struct cpuinfo_x86 *c=&cpu_data[id];
 4168 
 4169   *c = boot_cpu_data;
 4170   c->pte_quick = 0;
 4171   c->pgd_quick = 0;
 4172   c->pgtable_cache_sz = 0;
 4173   identify_cpu(c);
 4174   /* Mask B, Pentium, but not Pentium MMX */
 4175   if (c->x86_vendor == X86_VENDOR_INTEL &&
 4176       c->x86 == 5 &&
 4177       c->x86_mask >= 1 && c->x86_mask <= 4 &&
 4178       c->x86_model <= 3)
 4179     /* Remember we have B step Pentia with bugs */
 4180     smp_b_stepping=1;
 4181 }
 4182 
 4183 /* Architecture specific routine called by the kernel
 4184  * just before init is fired off. This allows the BP to
 4185  * have everything in order [we hope].  At the end of
 4186  * this all the APs will hit the system scheduling and
 4187  * off we go. Each AP will load the system gdt's and jump
 4188  * through the kernel init into idle(). At this point the
 4189  * scheduler will one day take over and give them jobs to
 4190  * do. smp_callin is a standard routine we use to track
 4191  * CPUs as they power up.  */
 4192 
 4193 static atomic_t smp_commenced = ATOMIC_INIT(0);
 4194 
 4195 void __init smp_commence(void)
 4196 {
 4197   /* Lets the callins below out of their loop. */
 4198   SMP_PRINTK(("Setting commenced=1, go go go\n"));
 4199 
 4200   wmb();
 4201   atomic_set(&smp_commenced,1);
 4202 }
 4203 
 4204 void __init enable_local_APIC(void)
 4205 {
 4206   unsigned long value;
 4207 
 4208   value = apic_read(APIC_SPIV);
 4209   value |= (1<<8);  /* Enable APIC (bit==1) */
 4210   value &= ~(1<<9); /* Enable focus processor (bit==0) */
 4211   value |= 0xff;    /* Set spurious IRQ vector to 0xff */
 4212   apic_write(APIC_SPIV,value);
 4213 
 4214   /* Set Task Priority to 'accept all' */
 4215   value = apic_read(APIC_TASKPRI);
 4216   value &= ~APIC_TPRI_MASK;
 4217   apic_write(APIC_TASKPRI,value);
 4218 
 4219   /* Clear the logical destination ID, just to be safe.
 4220    * also, put the APIC into flat delivery mode.  */
 4221   value = apic_read(APIC_LDR);
 4222   value &= ~APIC_LDR_MASK;
 4223   apic_write(APIC_LDR,value);
 4224 
 4225   value = apic_read(APIC_DFR);
 4226   value |= SET_APIC_DFR(0xf);
 4227   apic_write(APIC_DFR, value);
 4228 
 4229   udelay(100);                    /* B safe */
 4230 }
 4231 
 4232 unsigned long __init
 4233 init_smp_mappings(unsigned long memory_start)
 4234 {
 4235   unsigned long apic_phys;
 4236 
 4237   memory_start = PAGE_ALIGN(memory_start);
 4238   if (smp_found_config) {
 4239     apic_phys = mp_lapic_addr;
 4240   } else {
 4241     /* set up a fake all zeroes page to simulate the
 4242      * local APIC and another one for the IO-APIC. We
 4243      * could use the real zero-page, but it's safer this
 4244      * way if some buggy code writes to this page ...  */
 4245     apic_phys = __pa(memory_start);
 4246     memset((void *)memory_start, 0, PAGE_SIZE);
 4247     memory_start += PAGE_SIZE;
 4248   }
 4249   set_fixmap(FIX_APIC_BASE,apic_phys);
 4250   printk("mapped APIC to %08lx (%08lx)\n",
 4251          APIC_BASE, apic_phys);
 4252 
 4253 #ifdef CONFIG_X86_IO_APIC
 4254   {
 4255     unsigned long ioapic_phys;
 4256 
 4257     if (smp_found_config) {
 4258       ioapic_phys = mp_ioapic_addr;
 4259     } else {
 4260       ioapic_phys = __pa(memory_start);
 4261       memset((void *)memory_start, 0, PAGE_SIZE);
 4262       memory_start += PAGE_SIZE;
 4263     }
 4264     set_fixmap(FIX_IO_APIC_BASE,ioapic_phys);
 4265     printk("mapped IOAPIC to %08lx (%08lx)\n",
 4266         fix_to_virt(FIX_IO_APIC_BASE), ioapic_phys);
 4267   }
 4268 #endif
 4269 
 4270   return memory_start;
 4271 }
 4272 
 4273 extern void calibrate_delay(void);
 4274 
 4275 void __init smp_callin(void)
 4276 {
 4277   int cpuid;
 4278   unsigned long timeout;
 4279 
 4280   /* (This works even if the APIC is not enabled.)  */
 4281   cpuid = GET_APIC_ID(apic_read(APIC_ID));
 4282 
 4283   SMP_PRINTK(("CPU#%d waiting for CALLOUT\n", cpuid));
 4284 
 4285   /* STARTUP IPIs are fragile beasts as they might
 4286    * sometimes trigger some glue motherboard
 4287    * logic. Complete APIC bus silence for 1 second, this
 4288    * overestimates the time the boot CPU is spending to
 4289    * send the up to 2 STARTUP IPIs by a factor of
 4290    * two. This should be enough.  */
 4291 
 4292   /* Waiting 2s total for startup (udelay is not yet
 4293    * working) */
 4294   timeout = jiffies + 2*HZ;
 4295   while (time_before(jiffies,timeout))
 4296   {
 4297     /* Has the boot CPU finished its STARTUP sequence? */
 4298     if (test_bit(cpuid,
 4299                  (unsigned long *)&cpu_callout_map[0]))
 4300       break;
 4301   }
 4302 
 4303   while (!time_before(jiffies,timeout)) {
 4304     printk("BUG: CPU%d started up but did not get a "
 4305            "callout!\n", cpuid);
 4306     stop_this_cpu();
 4307   }
 4308 
 4309   /* the boot CPU has finished the init stage and is
 4310    * spinning on callin_map until we finish. We are free
 4311    * to set up this CPU, first the APIC. (this is
 4312    * probably redundant on most boards) */
 4313   SMP_PRINTK(("CALLIN, before enable_local_APIC().\n"));
 4314   enable_local_APIC();
 4315 
 4316   /* Set up our APIC timer. */
 4317   setup_APIC_clock();
 4318 
 4319   __sti();
 4320 
 4321 #ifdef CONFIG_MTRR
 4322   /* Must be done before calibration delay is computed */
 4323   mtrr_init_secondary_cpu ();
 4324 #endif
 4325   /* Get our bogomips. */
 4326   calibrate_delay();
 4327   SMP_PRINTK(("Stack at about %p\n",&cpuid));
 4328 
 4329   /* Save our processor parameters */
 4330   smp_store_cpu_info(cpuid);
 4331 
 4332   /* Allow the master to continue. */
 4333   set_bit(cpuid, (unsigned long *)&cpu_callin_map[0]);
 4334 }
 4335 
 4336 int cpucount = 0;
 4337 
 4338 extern int cpu_idle(void * unused);
 4339 
 4340 /* Activate a secondary processor. */
 4341 int __init start_secondary(void *unused)
 4342 {
 4343   /* Don't put anything before smp_callin(), SMP booting
 4344    * is too fragile that we want to limit the things done
 4345    * here to the most necessary things.  */
 4346   smp_callin();
 4347   while (!atomic_read(&smp_commenced))
 4348     /* nothing */ ;
 4349   return cpu_idle(NULL);
 4350 }
 4351 
 4352 /* Everything has been set up for the secondary CPUs -
 4353  * they just need to reload everything from the task
 4354  * structure */
 4355 void __init initialize_secondary(void)
 4356 {
 4357   struct thread_struct * p = &current->tss;
 4358 
 4359   /* Load up the LDT and the task register.  */
 4360   asm volatile("lldt %%ax": :"a" (p->ldt));
 4361   asm volatile("ltr %%ax": :"a" (p->tr));
 4362   stts();
 4363 
 4364   /* We don't actually need to load the full TSS,
 4365    * basically just the stack pointer and the eip.  */
 4366 
 4367   asm volatile(
 4368     "movl %0,%%esp\n\t"
 4369     "jmp *%1"
 4370     :
 4371     :"r" (p->esp),"r" (p->eip));
 4372 }
 4373 
 4374 extern struct {
 4375   void * esp;
 4376   unsigned short ss;
 4377 } stack_start;
 4378 
 4379 static void __init do_boot_cpu(int i)
 4380 {
 4381   unsigned long cfg;
 4382   pgd_t maincfg;
 4383   struct task_struct *idle;
 4384   unsigned long send_status, accept_status;
 4385   int timeout, num_starts, j;
 4386   unsigned long start_eip;
 4387 
 4388   /* We need an idle process for each processor. */
 4389 
 4390   kernel_thread(start_secondary, NULL, CLONE_PID);
 4391   cpucount++;
 4392 
 4393   idle = task[cpucount];
 4394   if (!idle)
 4395     panic("No idle process for CPU %d", i);
 4396 
 4397   idle->processor = i;
 4398   __cpu_logical_map[cpucount] = i;
 4399   cpu_number_map[i] = cpucount;
 4400 
 4401   /* start_eip had better be page-aligned! */
 4402   start_eip = setup_trampoline();
 4403 
 4404   /* So we see what's up */
 4405   printk("Booting processor %d eip %lx\n", i, start_eip);
 4406   stack_start.esp = (void *) (1024 + PAGE_SIZE +
 4407                               (char *)idle);
 4408 
 4409   /* This grunge runs the startup process for the
 4410    * targeted processor. */
 4411 
 4412   SMP_PRINTK(("Setting warm reset code and vector.\n"));
 4413 
 4414   CMOS_WRITE(0xa, 0xf);
 4415   local_flush_tlb();
 4416   SMP_PRINTK(("1.\n"));
 4417   *((volatile unsigned short *) phys_to_virt(0x469)) =
 4418     start_eip >> 4;
 4419   SMP_PRINTK(("2.\n"));
 4420   *((volatile unsigned short *) phys_to_virt(0x467)) =
 4421     start_eip & 0xf;
 4422   SMP_PRINTK(("3.\n"));
 4423 
 4424   maincfg=swapper_pg_dir[0];
 4425   ((unsigned long *)swapper_pg_dir)[0]=0x102007;
 4426 
 4427   /* Be paranoid about clearing APIC errors. */
 4428 
 4429   if ( apic_version[i] & 0xF0 )
 4430   {
 4431     apic_write(APIC_ESR, 0);
 4432     accept_status = (apic_read(APIC_ESR) & 0xEF);
 4433   }
 4434 
 4435   /* Status is now clean */
 4436 
 4437   send_status =   0;
 4438   accept_status = 0;
 4439 
 4440   /* Starting actual IPI sequence... */
 4441 
 4442   SMP_PRINTK(("Asserting INIT.\n"));
 4443 
 4444   /* Turn INIT on */
 4445 
 4446   cfg=apic_read(APIC_ICR2);
 4447   cfg&=0x00FFFFFF;
 4448   /* Target chip          */
 4449   apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i));
 4450   cfg=apic_read(APIC_ICR);
 4451   /* Clear bits           */
 4452   cfg&=~0xCDFFF;
 4453   cfg |= (APIC_DEST_LEVELTRIG | APIC_DEST_ASSERT |
 4454           APIC_DEST_DM_INIT);
 4455   /* Send IPI */
 4456   apic_write(APIC_ICR, cfg);
 4457 
 4458   udelay(200);
 4459   SMP_PRINTK(("Deasserting INIT.\n"));
 4460 
 4461   cfg=apic_read(APIC_ICR2);
 4462   cfg&=0x00FFFFFF;
 4463   /* Target chip          */
 4464   apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i));
 4465   cfg=apic_read(APIC_ICR);
 4466   /* Clear bits           */
 4467   cfg&=~0xCDFFF;
 4468   cfg |= (APIC_DEST_LEVELTRIG | APIC_DEST_DM_INIT);
 4469   /* Send IPI */
 4470   apic_write(APIC_ICR, cfg);
 4471 
 4472   /* Should we send STARTUP IPIs?
 4473    *
 4474    * Determine this based on the APIC version.  If we
 4475    * don't have an integrated APIC, don't send the
 4476    * STARTUP IPIs.  */
 4477 
 4478   if ( apic_version[i] & 0xF0 )
 4479     num_starts = 2;
 4480   else
 4481     num_starts = 0;
 4482 
 4483   /* Run STARTUP IPI loop. */
 4484 
 4485   for (j = 1; !(send_status || accept_status)
 4486         && (j <= num_starts) ; j++)
 4487   {
 4488     SMP_PRINTK(("Sending STARTUP #%d.\n",j));
 4489     apic_write(APIC_ESR, 0);
 4490     SMP_PRINTK(("After apic_write.\n"));
 4491 
 4492     /* STARTUP IPI */
 4493 
 4494     cfg=apic_read(APIC_ICR2);
 4495     cfg&=0x00FFFFFF;
 4496     /* Target chip          */
 4497     apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i));
 4498     cfg=apic_read(APIC_ICR);
 4499     /* Clear bits           */
 4500     cfg&=~0xCDFFF;
 4501     /* Boot on the stack    */
 4502     cfg |= (APIC_DEST_DM_STARTUP | (start_eip >> 12));
 4503     SMP_PRINTK(("Before start apic_write.\n"));
 4504     /* Kick the second      */
 4505     apic_write(APIC_ICR, cfg);
 4506 
 4507     SMP_PRINTK(("Startup point 1.\n"));
 4508 
 4509     timeout = 0;
 4510     SMP_PRINTK(("Waiting for send to finish...\n"));
 4511     do {
 4512       SMP_PRINTK(("+"));
 4513       udelay(100);
 4514       send_status = apic_read(APIC_ICR) & 0x1000;
 4515     } while (send_status && (timeout++ < 1000));
 4516 
 4517     /* Give the other CPU some time to accept the IPI. */
 4518     udelay(200);
 4519     accept_status = (apic_read(APIC_ESR) & 0xEF);
 4520   }
 4521   SMP_PRINTK(("After Startup.\n"));
 4522 
 4523   if (send_status)           /* APIC never delivered?? */
 4524     printk("APIC never delivered???\n");
 4525   if (accept_status)         /* Send accept error */
 4526     printk("APIC delivery error (%lx).\n",accept_status);
 4527 
 4528   if ( !(send_status || accept_status) )
 4529   {
 4530     /* allow APs to start initializing. */
 4531     SMP_PRINTK(("Before Callout %d.\n", i));
 4532     set_bit(i, (unsigned long *)&cpu_callout_map[0]);
 4533     SMP_PRINTK(("After Callout %d.\n", i));
 4534 
 4535     for(timeout=0;timeout<50000;timeout++)
 4536     {
 4537       if (cpu_callin_map[0]&(1<<i))
 4538         break;         /* It has booted */
 4539       udelay(100);     /* Wait 5s total for a response */
 4540     }
 4541     if (cpu_callin_map[0]&(1<<i))
 4542     {
 4543       /* # CPUs logically, starting from 1 (BSP is 0) */
 4544 #if 0
 4545       cpu_number_map[i] = cpucount;
 4546       __cpu_logical_map[cpucount] = i;
 4547 #endif
 4548       printk("OK.\n");
 4549       printk("CPU%d: ", i);
 4550       print_cpu_info(&cpu_data[i]);
 4551     }
 4552     else
 4553     {
 4554       if (*((volatile unsigned char *)phys_to_virt(8192))
 4555           == 0xA5)
 4556         printk("Stuck ??\n");
 4557       else
 4558         printk("Not responding.\n");
 4559     }
 4560   SMP_PRINTK(("CPU has booted.\n"));
 4561   }
 4562   else
 4563   {
 4564     __cpu_logical_map[cpucount] = -1;
 4565     cpu_number_map[i] = -1;
 4566     cpucount--;
 4567   }
 4568 
 4569   swapper_pg_dir[0]=maincfg;
 4570   local_flush_tlb();
 4571 
 4572   /* mark "stuck" area as not stuck */
 4573   *((volatile unsigned long *)phys_to_virt(8192)) = 0;
 4574 }
 4575 
 4576 cycles_t cacheflush_time;
 4577 extern unsigned long cpu_hz;
 4578 
 4579 static void smp_tune_scheduling (void)
 4580 {
 4581   unsigned long cachesize;
 4582   /* Rough estimation for SMP scheduling, this is the
 4583    * number of cycles it takes for a fully memory-limited
 4584    * process to flush the SMP-local cache.
 4585    *
 4586    * (For a P5 this pretty much means we will choose
 4587    * another idle CPU almost always at wakeup time (this
 4588    * is due to the small L1 cache), on PIIs it's around
 4589    * 50-100 usecs, depending on the cache size) */
 4590 
 4591   if (!cpu_hz) {
 4592     /* this basically disables processor-affinity
 4593      * scheduling on SMP without a TSC.  */
 4594     cacheflush_time = 0;
 4595     return;
 4596   } else {
 4597     cachesize = boot_cpu_data.x86_cache_size;
 4598     if (cachesize == -1)
 4599       cachesize = 8; /* Pentiums */
 4600 
 4601     cacheflush_time = cpu_hz/1024*cachesize/5000;
 4602   }
 4603 
 4604   printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
 4605     (long)cacheflush_time/(cpu_hz/1000000),
 4606     ((long)cacheflush_time*100/(cpu_hz/1000000)) % 100);
 4607 }
 4608 
 4609 unsigned int prof_multiplier[NR_CPUS];
 4610 unsigned int prof_counter[NR_CPUS];
 4611 
 4612 /* Cycle through the processors, sending APIC IPIs to
 4613  * boot each.  */
 4614 void __init smp_boot_cpus(void)
 4615 {
 4616   int i;
 4617 
 4618 #ifdef CONFIG_MTRR
 4619   /*  Must be done before other processors booted  */
 4620   mtrr_init_boot_cpu ();
 4621 #endif
 4622   /* Initialize the logical to physical CPU number
 4623    * mapping and the per-CPU profiling counter/multiplier
 4624    */
 4625 
 4626   for (i = 0; i < NR_CPUS; i++) {
 4627     cpu_number_map[i] = -1;
 4628     prof_counter[i] = 1;
 4629     prof_multiplier[i] = 1;
 4630   }
 4631 
 4632   /* Setup boot CPU information */
 4633 
 4634   /* Final full version of the data */
 4635   smp_store_cpu_info(boot_cpu_id);
 4636   smp_tune_scheduling();
 4637   printk("CPU%d: ", boot_cpu_id);
 4638   print_cpu_info(&cpu_data[boot_cpu_id]);
 4639 
 4640   /* not necessary because the MP table should list the
 4641    * boot CPU too, but we do it for the sake of
 4642    * robustness anyway.  (and for the case when a non-SMP
 4643    * board boots an SMP kernel) */
 4644   cpu_present_map |= (1 << hard_smp_processor_id());
 4645 
 4646   cpu_number_map[boot_cpu_id] = 0;
 4647 
 4648   /* If we couldnt find an SMP configuration at boot
 4649    * time, get out of here now!  */
 4650   if (!smp_found_config)
 4651   {
 4652     printk(KERN_NOTICE "SMP motherboard not detected. "
 4653            "Using dummy APIC emulation.\n");
 4654 #ifndef CONFIG_VISWS
 4655     io_apic_irqs = 0;
 4656 #endif
 4657     cpu_online_map = cpu_present_map;
 4658     goto smp_done;
 4659   }
 4660 
 4661   /* If SMP should be disabled, really disable it! */
 4662 
 4663   if (!max_cpus)
 4664   {
 4665     smp_found_config = 0;
 4666     printk(KERN_INFO "SMP mode deactivated, forcing use "
 4667            "of dummy APIC emulation.\n");
 4668   }
 4669 
 4670 #ifdef SMP_DEBUG
 4671   {
 4672     int reg;
 4673 
 4674     /* This is to verify that we're looking at a real
 4675      * local APIC.  Check these against your board if the
 4676      * CPUs aren't getting started for no apparent
 4677      * reason.  */
 4678     reg = apic_read(APIC_VERSION);
 4679     SMP_PRINTK(("Getting VERSION: %x\n", reg));
 4680 
 4681     apic_write(APIC_VERSION, 0);
 4682     reg = apic_read(APIC_VERSION);
 4683     SMP_PRINTK(("Getting VERSION: %x\n", reg));
 4684 
 4685     /* The two version reads above should print the same
 4686      * NON-ZERO!!! numbers.  If the second one is zero,
 4687      * there is a problem with the APIC write/read
 4688      * definitions.
 4689      *
 4690      * The next two are just to see if we have sane
 4691      * values.  They're only really relevant if we're in
 4692      * Virtual Wire compatibility mode, but most boxes
 4693      * are anymore.  */
 4694     reg = apic_read(APIC_LVT0);
 4695     SMP_PRINTK(("Getting LVT0: %x\n", reg));
 4696 
 4697     reg = apic_read(APIC_LVT1);
 4698     SMP_PRINTK(("Getting LVT1: %x\n", reg));
 4699   }
 4700 #endif
 4701 
 4702   enable_local_APIC();
 4703 
 4704   /* Set up our local APIC timer: */
 4705   setup_APIC_clock ();
 4706 
 4707   /* Now scan the CPU present map and fire up the other
 4708    * CPUs.  */
 4709 
 4710   /* Add all detected CPUs. (later on we can down
 4711    * individual CPUs which will change cpu_online_map but
 4712    * not necessarily cpu_present_map. We are pretty much
 4713    * ready for hot-swap CPUs.)  */
 4714   cpu_online_map = cpu_present_map;
 4715   mb();
 4716 
 4717   SMP_PRINTK(("CPU map: %lx\n", cpu_present_map));
 4718 
 4719   for(i=0;i<NR_CPUS;i++)
 4720   {
 4721     /* Don't even attempt to start the boot CPU! */
 4722     if (i == boot_cpu_id)
 4723       continue;
 4724 
 4725     if ((cpu_online_map & (1 << i))
 4726         && (max_cpus < 0 || max_cpus > cpucount+1))
 4727     {
 4728       do_boot_cpu(i);
 4729     }
 4730 
 4731     /* Make sure we unmap all failed CPUs */
 4732 
 4733     if (cpu_number_map[i] == -1 &&
 4734         (cpu_online_map & (1 << i))) {
 4735       printk("CPU #%d not responding. "
 4736              "Removing from cpu_online_map.\n", i);
 4737       cpu_online_map &= ~(1 << i);
 4738     }
 4739   }
 4740 
 4741   /* Cleanup possible dangling ends... */
 4742 #ifndef CONFIG_VISWS
 4743   {
 4744     unsigned long cfg;
 4745 
 4746     /* Install writable page 0 entry. */
 4747     cfg = pg0[0];
 4748     /* writeable, present, addr 0 */
 4749     pg0[0] = _PAGE_RW | _PAGE_PRESENT;
 4750     local_flush_tlb();
 4751 
 4752     /* Paranoid: Set warm reset code and vector here back
 4753      * to default values.  */
 4754     CMOS_WRITE(0, 0xf);
 4755 
 4756     *((volatile long *) phys_to_virt(0x467)) = 0;
 4757 
 4758     /* Restore old page 0 entry. */
 4759     pg0[0] = cfg;
 4760     local_flush_tlb();
 4761   }
 4762 #endif
 4763 
 4764   /* Allow the user to impress friends. */
 4765   SMP_PRINTK(("Before bogomips.\n"));
 4766   if (cpucount==0)
 4767   {
 4768     printk(KERN_ERR
 4769            "Error: only one processor found.\n");
 4770     cpu_online_map = (1<<hard_smp_processor_id());
 4771   }
 4772   else
 4773   {
 4774     unsigned long bogosum=0;
 4775     for(i=0;i<32;i++)
 4776     {
 4777       if (cpu_online_map&(1<<i))
 4778         bogosum+=cpu_data[i].loops_per_sec;
 4779     }
 4780     printk(KERN_INFO "Total of %d processors activated "
 4781            "(%lu.%02lu BogoMIPS).\n",
 4782       cpucount+1,
 4783       (bogosum+2500)/500000,
 4784       ((bogosum+2500)/5000)%100);
 4785     SMP_PRINTK(("Before bogocount - "
 4786                 "setting activated=1.\n"));
 4787     smp_activated=1;
 4788     smp_num_cpus=cpucount+1;
 4789   }
 4790   if (smp_b_stepping)
 4791     printk(KERN_WARNING "WARNING: SMP operation may be "
 4792            "unreliable with B stepping processors.\n");
 4793   SMP_PRINTK(("Boot done.\n"));
 4794 
 4795   cache_APIC_registers();
 4796 #ifndef CONFIG_VISWS
 4797   /* Here we can be sure that there is an IO-APIC in the
 4798    * system. Let's go and set it up: */
 4799   if (!skip_ioapic_setup)
 4800     setup_IO_APIC();
 4801 #endif
 4802 
 4803 smp_done:
 4804 }
 4805 
 4806 
 4807 /* the following functions deal with sending IPIs between
 4808  * CPUs.
 4809  *
 4810  * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.*/
 4811 
 4812 
 4813 /* Silly serialization to work around CPU bug in P5s.  We
 4814  * can safely turn it off on a 686.  */
 4815 #ifdef CONFIG_X86_GOOD_APIC
 4816 # define FORCE_APIC_SERIALIZATION 0
 4817 #else
 4818 # define FORCE_APIC_SERIALIZATION 1
 4819 #endif
 4820 
 4821 static unsigned int cached_APIC_ICR;
 4822 static unsigned int cached_APIC_ICR2;
 4823 
 4824 /* Caches reserved bits, APIC reads are (mildly)
 4825  * expensive and force otherwise unnecessary CPU
 4826  * synchronization.  (We could cache other APIC registers
 4827  * too, but these are the main ones used in RL.)  */
 4828 #define slow_ICR (apic_read(APIC_ICR) & ~0xFDFFF)
 4829 #define slow_ICR2 (apic_read(APIC_ICR2) & 0x00FFFFFF)
 4830 
 4831 void cache_APIC_registers (void)
 4832 {
 4833   cached_APIC_ICR = slow_ICR;
 4834   cached_APIC_ICR2 = slow_ICR2;
 4835   mb();
 4836 }
 4837 
 4838 static inline unsigned int __get_ICR (void)
 4839 {
 4840 #if FORCE_APIC_SERIALIZATION
 4841   /* Wait for the APIC to become ready - this should
 4842    * never occur. It's a debugging check really.  */
 4843   int count = 0;
 4844   unsigned int cfg;
 4845 
 4846   while (count < 1000)
 4847   {
 4848     cfg = slow_ICR;
 4849     if (!(cfg&(1<<12))) {
 4850       if (count)
 4851         atomic_add(count, (atomic_t*)&ipi_count);
 4852       return cfg;
 4853     }
 4854     count++;
 4855     udelay(10);
 4856   }
 4857   printk("CPU #%d: previous IPI still not cleared "
 4858          "after 10mS\n", smp_processor_id());
 4859   return cfg;
 4860 #else
 4861   return cached_APIC_ICR;
 4862 #endif
 4863 }
 4864 
 4865 static inline unsigned int __get_ICR2 (void)
 4866 {
 4867 #if FORCE_APIC_SERIALIZATION
 4868   return slow_ICR2;
 4869 #else
 4870   return cached_APIC_ICR2;
 4871 #endif
 4872 }
 4873 
 4874 static inline int __prepare_ICR (unsigned int shortcut,
 4875                                  int vector)
 4876 {
 4877   unsigned int cfg;
 4878 
 4879   cfg = __get_ICR();
 4880   cfg |= APIC_DEST_DM_FIXED|shortcut|vector;
 4881 
 4882   return cfg;
 4883 }
 4884 
 4885 static inline int __prepare_ICR2 (unsigned int dest)
 4886 {
 4887   unsigned int cfg;
 4888 
 4889   cfg = __get_ICR2();
 4890   cfg |= SET_APIC_DEST_FIELD(dest);
 4891 
 4892   return cfg;
 4893 }
 4894 
 4895 static inline void
 4896 __send_IPI_shortcut(unsigned int shortcut, int vector)
 4897 {
 4898   unsigned int cfg;
 4899 /* Subtle. In the case of the 'never do double writes'
 4900  * workaround we have to lock out interrupts to be
 4901  * safe. Otherwise it's just one single atomic write to
 4902  * the APIC, no need for cli/sti.  */
 4903 #if FORCE_APIC_SERIALIZATION
 4904   unsigned long flags;
 4905 
 4906   __save_flags(flags);
 4907   __cli();
 4908 #endif
 4909 
 4910   /* No need to touch the target chip field */
 4911 
 4912   cfg = __prepare_ICR(shortcut, vector);
 4913 
 4914   /* Send the IPI. The write to APIC_ICR
 4915    * fires this off. */
 4916   apic_write(APIC_ICR, cfg);
 4917 #if FORCE_APIC_SERIALIZATION
 4918   __restore_flags(flags);
 4919 #endif
 4920 }
 4921 
 4922 static inline void send_IPI_allbutself(int vector)
 4923 {
 4924   __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
 4925 }
 4926 
 4927 static inline void send_IPI_all(int vector)
 4928 {
 4929   __send_IPI_shortcut(APIC_DEST_ALLINC, vector);
 4930 }
 4931 
 4932 void send_IPI_self(int vector)
 4933 {
 4934   __send_IPI_shortcut(APIC_DEST_SELF, vector);
 4935 }
 4936 
 Комментарий
 4937 static inline void send_IPI_single(int dest, int vector)
 4938 {
 4939   unsigned long cfg;
 4940 #if FORCE_APIC_SERIALIZATION
 4941   unsigned long flags;
 4942 
 4943   __save_flags(flags);
 4944   __cli();
 4945 #endif
 4946 
 4947   /* prepare target chip field */
 4948 
 4949   cfg = __prepare_ICR2(dest);
 4950   apic_write(APIC_ICR2, cfg);
 4951 
 4952   /* program the ICR*/
 4953   cfg = __prepare_ICR(0, vector);
 4954 
 4955   /* Send the IPI. The write to APIC_ICR fires this off.
 4956    */
 4957   apic_write(APIC_ICR, cfg);
 4958 #if FORCE_APIC_SERIALIZATION
 4959   __restore_flags(flags);
 4960 #endif
 4961 }
 4962 
 4963 /* This is fraught with deadlocks. Probably the situation
 4964  * is not that bad as in the early days of SMP, so we
 4965  * might ease some of the paranoia here.  */
 4966 
 4967 void smp_flush_tlb(void)
 4968 {
 4969   int cpu = smp_processor_id();
 4970   int stuck;
 4971   unsigned long flags;
 4972 
 4973   /* it's important that we do not generate any APIC
 4974    * traffic until the AP CPUs have booted up!  */
 4975   if (cpu_online_map) {
 4976     /* The assignment is safe because it's volatile so
 4977      * the compiler cannot reorder it, because the i586
 4978      * has strict memory ordering and because only the
 4979      * kernel lock holder may issue a tlb flush. If you
 4980      * break any one of those three change this to an
 4981      * atomic bus locked or.  */
 4982 
 4983     smp_invalidate_needed = cpu_online_map;
 4984 
 4985     /* Processors spinning on some lock with IRQs
 4986      * disabled will see this IRQ late. The
 4987      * smp_invalidate_needed map will ensure they don't
 4988      * do a spurious flush tlb or miss one.  */
 4989 
 4990     __save_flags(flags);
 4991     __cli();
 4992 
 4993     send_IPI_allbutself(INVALIDATE_TLB_VECTOR);
 4994 
 4995     /* Spin waiting for completion */
 4996     stuck = 50000000;
 4997     while (smp_invalidate_needed) {
 4998       /* Take care of "crossing" invalidates */
 4999       if (test_bit(cpu, &smp_invalidate_needed))
 5000       clear_bit(cpu, &smp_invalidate_needed);
 5001       --stuck;
 5002       if (!stuck) {
 5003         printk("stuck on TLB IPI wait (CPU#%d)\n",cpu);
 5004         break;
 5005       }
 5006     }
 5007     __restore_flags(flags);
 5008   }
 5009 
 5010   /* Flush the local TLB */
 5011   local_flush_tlb();
 5012 }
 5013 
 5014 
 5015 /* this function sends a 'reschedule' IPI to another CPU.
 5016  * it goes straight through and wastes no time
 5017  * serializing anything. Worst case is that we lose a
 5018  * reschedule ...  */
 Комментарий
 5019 void smp_send_reschedule(int cpu)
 5020 {
 5021   send_IPI_single(cpu, RESCHEDULE_VECTOR);
 5022 }
 5023 
 5024 /* this function sends a 'stop' IPI to all other CPUs in
 5025  * the system.  it goes straight through.  */
 5026 void smp_send_stop(void)
 5027 {
 5028   send_IPI_allbutself(STOP_CPU_VECTOR);
 5029 }
 5030 
 5031 /* this function sends an 'reload MTRR state' IPI to all
 5032  * other CPUs in the system. it goes straight through,
 5033  * completion processing is done on the mttr.c level.  */
 5034 void smp_send_mtrr(void)
 5035 {
 5036   send_IPI_allbutself(MTRR_CHANGE_VECTOR);
 5037 }
 5038 
 5039 /* Local timer interrupt handler. It does both profiling
 5040  * and process statistics/rescheduling.
 5041  *
 5042  * We do profiling in every local tick,
 5043  * statistics/rescheduling happen only every 'profiling
 5044  * multiplier' ticks. The default multiplier is 1 and it
 5045  * can be changed by writing the new multiplier value
 5046  * into /proc/profile.  */
 5047 void smp_local_timer_interrupt(struct pt_regs * regs)
 5048 {
 5049   int cpu = smp_processor_id();
 5050 
 5051   /* The profiling function is SMP safe. (nothing can
 5052    * mess around with "current", and the profiling
 5053    * counters are updated with atomic operations). This
 5054    * is especially useful with a profiling
 5055    * multiplier != 1 */
 5056   if (!user_mode(regs))
 5057     x86_do_profile(regs->eip);
 5058 
 Комментарий
 5059   if (!--prof_counter[cpu]) {
 5060     int user=0,system=0;
 5061     struct task_struct * p = current;
 5062 
 5063     /* After doing the above, we need to make like a
 5064      * normal interrupt - otherwise timer interrupts
 5065      * ignore the global interrupt lock, which is the
 5066      * WrongThing (tm) to do.  */
 5067 
 5068     if (user_mode(regs))
 5069       user=1;
 5070     else
 5071       system=1;
 5072 
 5073     irq_enter(cpu, 0);
 5074     if (p->pid) {
 5075       update_one_process(p, 1, user, system, cpu);
 5076 
 5077       p->counter -= 1;
 5078       if (p->counter < 0) {
 5079         p->counter = 0;
 5080         p->need_resched = 1;
 5081       }
 5082       if (p->priority < DEF_PRIORITY) {
 5083         kstat.cpu_nice += user;
 5084         kstat.per_cpu_nice[cpu] += user;
 5085       } else {
 5086         kstat.cpu_user += user;
 5087         kstat.per_cpu_user[cpu] += user;
 5088       }
 5089 
 5090       kstat.cpu_system += system;
 5091       kstat.per_cpu_system[cpu] += system;
 5092 
 5093     }
 5094     prof_counter[cpu]=prof_multiplier[cpu];
 5095     irq_exit(cpu, 0);
 5096   }
 5097 
 5098   /* We take the 'long' return path, and there every
 5099    * subsystem grabs the apropriate locks (kernel lock/
 5100    * irq lock).
 5101    *
 5102    * we might want to decouple profiling from the 'long
 5103    * path', and do the profiling totally in assembly.
 5104    *
 5105    * Currently this isn't too much of an issue
 5106    * (performance wise), we can take more than 100K local
 5107    * irqs per second on a 100 MHz P5.  */
 5108 }
 5109 
 5110 /* Local APIC timer interrupt. This is the most natural
 5111  * way for doing local interrupts, but local timer
 5112  * interrupts can be emulated by broadcast interrupts
 5113  * too. [in case the hw doesnt support APIC timers]
 5114  *
 5115  * [ if a single-CPU system runs an SMP kernel then we
 5116  * call the local interrupt as well. Thus we cannot
 5117  * inline the local irq ... ] */
 5118 void smp_apic_timer_interrupt(struct pt_regs * regs)
 5119 {
 5120   /* NOTE! We'd better ACK the irq immediately, because
 5121    * timer handling can be slow, and we want to be able
 5122    * to accept NMI tlb invalidates during this time.  */
 5123   ack_APIC_irq();
 5124   smp_local_timer_interrupt(regs);
 5125 }
 5126 
 5127 /* Reschedule call back. Nothing to do, all the work is
 5128  * done automatically when we return from the interrupt.
 5129  */
 5130 asmlinkage void smp_reschedule_interrupt(void)
 5131 {
 5132   ack_APIC_irq();
 5133 }
 5134 
 5135 /* Invalidate call-back */
 5136 asmlinkage void smp_invalidate_interrupt(void)
 5137 {
 5138   if (test_and_clear_bit(smp_processor_id(),
 5139                          &smp_invalidate_needed))
 5140     local_flush_tlb();
 5141 
 5142   ack_APIC_irq();
 5143 }
 5144 
 5145 static void stop_this_cpu (void)
 5146 {
 5147   /* Remove this CPU: */
 5148   clear_bit(smp_processor_id(), &cpu_online_map);
 5149 
 5150   if (cpu_data[smp_processor_id()].hlt_works_ok)
 5151     for(;;) __asm__("hlt");
 5152   for (;;);
 5153 }
 5154 
 5155 /* CPU halt call-back */
 5156 asmlinkage void smp_stop_cpu_interrupt(void)
 5157 {
 5158   stop_this_cpu();
 5159 }
 5160 
 5161 void (*mtrr_hook) (void) = NULL;
 5162 
 5163 asmlinkage void smp_mtrr_interrupt(void)
 5164 {
 5165   ack_APIC_irq();
 5166   if (mtrr_hook) (*mtrr_hook)();
 5167 }
 5168 
 5169 /* This interrupt should _never_ happen with our APIC/SMP
 5170  * architecture */
 5171 asmlinkage void smp_spurious_interrupt(void)
 5172 {
 5173   ack_APIC_irq();
 5174   /* see sw-dev-man vol 3, chapter 7.4.13.5 */
 5175   printk("spurious APIC interrupt on CPU#%d, "
 5176          "should never happen.\n", smp_processor_id());
 5177 }
 5178 
 5179 /* This part sets up the APIC 32 bit clock in LVTT1, with
 5180  * HZ interrupts per second. We assume that the caller
 5181  * has already set up the local APIC.
 5182  *
 5183  * The APIC timer is not exactly sync with the external
 5184  * timer chip, it closely follows bus clocks.  */
 5185 
 5186 /* The timer chip is already set up at HZ interrupts per
 5187  * second here, but we do not accept timer interrupts
 5188  * yet. We only allow the BP to calibrate.  */
 5189 static unsigned int __init get_8254_timer_count(void)
 5190 {
 5191   unsigned int count;
 5192 
 5193   outb_p(0x00, 0x43);
 5194   count = inb_p(0x40);
 5195   count |= inb_p(0x40) << 8;
 5196 
 5197   return count;
 5198 }
 5199 
 5200 /* This function sets up the local APIC timer, with a
 5201  * timeout of 'clocks' APIC bus clock. During calibration
 5202  * we actually call this function twice, once with a
 5203  * bogus timeout value, second time for real. The other
 5204  * (noncalibrating) CPUs call this function only once,
 5205  * with the real value.
 5206  *
 5207  * We are strictly in irqs off mode here, as we do not
 5208  * want to get an APIC interrupt go off accidentally.
 5209  *
 5210  * We do reads before writes even if unnecessary, to get
 5211  * around the APIC double write bug.  */
 5212 #define APIC_DIVISOR 16
 5213 
 5214 void setup_APIC_timer(unsigned int clocks)
 5215 {
 5216   unsigned long lvtt1_value;
 5217   unsigned int tmp_value;
 5218 
 5219   /* Unfortunately the local APIC timer cannot be set up
 5220    * into NMI mode. With the IO APIC we can re-route the
 5221    * external timer interrupt and broadcast it as an NMI
 5222    * to all CPUs, so no pain.  */
 5223   tmp_value = apic_read(APIC_LVTT);
 5224   lvtt1_value = APIC_LVT_TIMER_PERIODIC |
 5225                 LOCAL_TIMER_VECTOR;
 5226   apic_write(APIC_LVTT , lvtt1_value);
 5227 
 5228   /* Divide PICLK by 16 */
 5229   tmp_value = apic_read(APIC_TDCR);
 5230   apic_write(APIC_TDCR , (tmp_value & ~APIC_TDR_DIV_1 )
 5231          | APIC_TDR_DIV_16);
 5232 
 5233   tmp_value = apic_read(APIC_TMICT);
 5234   apic_write(APIC_TMICT, clocks/APIC_DIVISOR);
 5235 }
 5236 
 5237 void __init wait_8254_wraparound(void)
 5238 {
 5239   unsigned int curr_count, prev_count=~0;
 5240   int delta;
 5241 
 5242   curr_count = get_8254_timer_count();
 5243 
 5244   do {
 5245     prev_count = curr_count;
 5246     curr_count = get_8254_timer_count();
 5247     delta = curr_count-prev_count;
 5248 
 5249     /* This limit for delta seems arbitrary, but it
 5250      * isn't, it's slightly above the level of error a
 5251      * buggy Mercury/Neptune chipset timer can cause.  */
 5252   } while (delta<300);
 5253 }
 5254 
 5255 /* In this function we calibrate APIC bus clocks to the
 5256  * external timer. Unfortunately we cannot use jiffies
 5257  * and the timer irq to calibrate, since some later
 5258  * bootup code depends on getting the first irq? Ugh.
 5259  *
 5260  * We want to do the calibration only once since we want
 5261  * to have local timer irqs syncron. CPUs connected by
 5262  * the same APIC bus have the very same bus frequency.
 5263  * And we want to have irqs off anyways, no accidental
 5264  * APIC irq that way.  */
 5265 
 5266 int __init calibrate_APIC_clock(void)
 5267 {
 5268   unsigned long long t1,t2;
 5269   long tt1,tt2;
 5270   long calibration_result;
 5271   int i;
 5272 
 5273   printk("calibrating APIC timer ... ");
 5274 
 5275   /* Put whatever arbitrary (but long enough) timeout
 5276    * value into the APIC clock, we just want to get the
 5277    * counter running for calibration.  */
 5278   setup_APIC_timer(1000000000);
 5279 
 5280   /* The timer chip counts down to zero. Let's wait for a
 5281    * wraparound to start exact measurement: (the current
 5282    * tick might have been already half done) */
 5283 
 5284   wait_8254_wraparound ();
 5285 
 5286   /* We wrapped around just now. Let's start: */
 5287   READ_TSC(t1);
 5288   tt1=apic_read(APIC_TMCCT);
 5289 
 5290 #define LOOPS (HZ/10)
 5291   /* Let's wait LOOPS wraprounds: */
 5292   for (i=0; i<LOOPS; i++)
 5293     wait_8254_wraparound ();
 5294 
 5295   tt2=apic_read(APIC_TMCCT);
 5296   READ_TSC(t2);
 5297 
 5298   /* The APIC bus clock counter is 32 bits only, it might
 5299    * have overflown, but note that we use signed longs,
 5300    * thus no extra care needed.
 5301    *
 5302    * underflown to be exact, as the timer counts down ;)
 5303    */
 5304 
 5305   calibration_result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
 5306 
 5307   SMP_PRINTK(("\n..... %ld CPU clocks in 1 timer chip "
 5308               "tick.", (unsigned long)(t2-t1)/LOOPS));
 5309 
 5310   SMP_PRINTK(("\n..... %ld APIC bus clocks in 1 timer "
 5311               "chip tick.", calibration_result));
 5312 
 5313   printk("\n..... CPU clock speed is %ld.%04ld MHz.\n",
 5314          ((long)(t2-t1)/LOOPS)/(1000000/HZ),
 5315          ((long)(t2-t1)/LOOPS)%(1000000/HZ));
 5316 
 5317   printk("..... system bus clock speed is %ld.%04ld "
 5318          "MHz.\n",
 5319          calibration_result/(1000000/HZ),
 5320          calibration_result%(1000000/HZ)  );
 5321 #undef LOOPS
 5322 
 5323   return calibration_result;
 5324 }
 5325 
 5326 static unsigned int calibration_result;
 5327 
 5328 void __init setup_APIC_clock(void)
 5329 {
 5330   unsigned long flags;
 5331 
 5332   static volatile int calibration_lock;
 5333 
 5334   __save_flags(flags);
 5335   __cli();
 5336 
 5337   SMP_PRINTK(("setup_APIC_clock() called.\n"));
 5338 
 5339   /* [ setup_APIC_clock() is called from all CPUs, but we
 5340    * want to do this part of the setup only once ... and
 5341    * it fits here best ] */
 5342   if (!test_and_set_bit(0,&calibration_lock)) {
 5343 
 5344     calibration_result=calibrate_APIC_clock();
 5345     /* Signal completion to the other CPU[s]: */
 5346     calibration_lock = 3;
 5347 
 5348   } else {
 5349     /* Other CPU is calibrating, wait for finish: */
 5350     SMP_PRINTK(("waiting for other CPU "
 5351                 "calibrating APIC ... "));
 5352     while (calibration_lock == 1);
 5353     SMP_PRINTK(("done, continuing.\n"));
 5354   }
 5355 
 5356   /* Now set up the timer for real. */
 5357   setup_APIC_timer (calibration_result);
 5358 
 5359   /* We ACK the APIC, just in case there is something
 5360    * pending.  */
 5361   ack_APIC_irq ();
 5362 
 5363   __restore_flags(flags);
 5364 }
 5365 
 5366 /* the frequency of the profiling timer can be changed by
 5367  * writing a multiplier value into /proc/profile.
 5368  *
 5369  * usually you want to run this on all CPUs ;) */
 5370 int setup_profiling_timer(unsigned int multiplier)
 5371 {
 5372   int cpu = smp_processor_id();
 5373   unsigned long flags;
 5374 
 5375   /* Sanity check. [at least 500 APIC cycles should be
 5376    * between APIC interrupts as a rule of thumb, to avoid
 5377    * irqs flooding us] */
 5378   if ( (!multiplier) ||
 5379        (calibration_result/multiplier < 500))
 5380     return -EINVAL;
 5381 
 5382   save_flags(flags);
 5383   cli();
 5384   setup_APIC_timer(calibration_result/multiplier);
 5385   prof_multiplier[cpu]=multiplier;
 5386   restore_flags(flags);
 5387 
 5388   return 0;
 5389 }
 5390 
 5391 #undef APIC_DIVISOR
 5392 

netlib.narod.ru< Назад | Оглавление | Далее >

Сайт управляется системой uCoz