Pull sn2-mmio-writes into release branch
authorTony Luck <tony.luck@intel.com>
Tue, 21 Mar 2006 16:21:26 +0000 (08:21 -0800)
committerTony Luck <tony.luck@intel.com>
Tue, 21 Mar 2006 16:21:26 +0000 (08:21 -0800)
Hand-fixed conflicts:
include/asm-ia64/machvec_sn2.h

Signed-off-by: Tony Luck <tony.luck@intel.com>
1  2 
arch/ia64/sn/kernel/setup.c
arch/ia64/sn/kernel/sn2/sn2_smp.c
include/asm-ia64/machvec_sn2.h
include/asm-ia64/processor.h
include/asm-ia64/system.h
include/asm-ia64/thread_info.h

index 5b84836c2171b1a53c3dddc0cc060055c1433a15,f1c1338b10b4db005cc37aeb44e45525224b85c9..8b6d5c8447089d8523ed807583c9c317cc9caf0b
@@@ -3,7 -3,7 +3,7 @@@
   * License.  See the file "COPYING" in the main directory of this archive
   * for more details.
   *
-  * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved.
+  * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved.
   */
  
  #include <linux/config.h>
@@@ -67,7 -67,6 +67,7 @@@ extern unsigned long last_time_offset
  extern void (*ia64_mark_idle) (int);
  extern void snidle(int);
  extern unsigned char acpi_kbd_controller_present;
 +extern unsigned long long (*ia64_printk_clock)(void);
  
  unsigned long sn_rtc_cycles_per_second;
  EXPORT_SYMBOL(sn_rtc_cycles_per_second);
@@@ -75,7 -74,7 +75,7 @@@
  DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
  EXPORT_PER_CPU_SYMBOL(__sn_hub_info);
  
 -DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_NUMNODES]);
 +DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]);
  EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid);
  
  DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda);
@@@ -125,6 -124,20 +125,6 @@@ struct screen_info sn_screen_info = 
        .orig_video_points = 16
  };
  
 -/*
 - * This is here so we can use the CMOS detection in ide-probe.c to
 - * determine what drives are present.  In theory, we don't need this
 - * as the auto-detection could be done via ide-probe.c:do_probe() but
 - * in practice that would be much slower, which is painful when
 - * running in the simulator.  Note that passing zeroes in DRIVE_INFO
 - * is sufficient (the IDE driver will autodetect the drive geometry).
 - */
 -#ifdef CONFIG_IA64_GENERIC
 -extern char drive_info[4 * 16];
 -#else
 -char drive_info[4 * 16];
 -#endif
 -
  /*
   * This routine can only be used during init, since
   * smp_boot_data is an init data structure.
@@@ -196,7 -209,7 +196,7 @@@ void __init early_sn_setup(void
  }
  
  extern int platform_intr_list[];
 -static int __initdata shub_1_1_found = 0;
 +static int __initdata shub_1_1_found;
  
  /*
   * sn_check_for_wars
@@@ -317,7 -330,6 +317,7 @@@ struct pcdp_vga_device 
  #define PCDP_PCI_TRANS_IOPORT 0x02
  #define PCDP_PCI_TRANS_MMIO   0x01
  
 +#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
  static void
  sn_scan_pcdp(void)
  {
                break; /* once we find the primary, we're done */
        }
  }
 +#endif
 +
 +static unsigned long sn2_rtc_initial;
 +
 +static unsigned long long ia64_sn2_printk_clock(void)
 +{
 +      unsigned long rtc_now = rtc_time();
 +
 +      return (rtc_now - sn2_rtc_initial) *
 +              (1000000000 / sn_rtc_cycles_per_second);
 +}
  
  /**
   * sn_setup - SN platform setup routine
@@@ -385,7 -386,6 +385,7 @@@ void __init sn_setup(char **cmdline_p
        u32 version = sn_sal_rev();
        extern void sn_cpu_init(void);
  
 +      sn2_rtc_initial = rtc_time();
        ia64_sn_plat_set_error_handling_features();     // obsolete
        ia64_sn_set_os_feature(OSF_MCA_SLV_TO_OS_INIT_SLV);
        ia64_sn_set_os_feature(OSF_FEAT_LOG_SBES);
         */
        build_cnode_tables();
  
 -      /*
 -       * Old PROMs do not provide an ACPI FADT. Disable legacy keyboard
 -       * support here so we don't have to listen to failed keyboard probe
 -       * messages.
 -       */
 -      if (version <= 0x0209 && acpi_kbd_controller_present) {
 -              printk(KERN_INFO "Disabling legacy keyboard support as prom "
 -                     "is too old and doesn't provide FADT\n");
 -              acpi_kbd_controller_present = 0;
 -      }
 -
 -      printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF);
 -
        status =
            ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec,
                               &drift);
  
        platform_intr_list[ACPI_INTERRUPT_CPEI] = IA64_CPE_VECTOR;
  
 +      ia64_printk_clock = ia64_sn2_printk_clock;
 +
 +      /*
 +       * Old PROMs do not provide an ACPI FADT. Disable legacy keyboard
 +       * support here so we don't have to listen to failed keyboard probe
 +       * messages.
 +       */
 +      if (version <= 0x0209 && acpi_kbd_controller_present) {
 +              printk(KERN_INFO "Disabling legacy keyboard support as prom "
 +                     "is too old and doesn't provide FADT\n");
 +              acpi_kbd_controller_present = 0;
 +      }
 +
 +      printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF);
 +
        /*
         * we set the default root device to /dev/hda
         * to make simulation easy
         * for sn.
         */
        pm_power_off = ia64_sn_power_down;
+       current->thread.flags |= IA64_THREAD_MIGRATION;
  }
  
  /**
@@@ -580,17 -579,13 +581,17 @@@ void __init sn_cpu_init(void
                        sn_prom_type = 2;
                else
                        sn_prom_type = 1;
 -              printk("Running on medusa with %s PROM\n", (sn_prom_type == 1) ? "real" : "fake");
 +              printk(KERN_INFO "Running on medusa with %s PROM\n",
 +                     (sn_prom_type == 1) ? "real" : "fake");
        }
  
        memset(pda, 0, sizeof(pda));
 -      if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2, &sn_hub_info->nasid_bitmask, &sn_hub_info->nasid_shift,
 -                              &sn_system_size, &sn_sharing_domain_size, &sn_partition_id,
 -                              &sn_coherency_id, &sn_region_size))
 +      if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2,
 +                              &sn_hub_info->nasid_bitmask,
 +                              &sn_hub_info->nasid_shift,
 +                              &sn_system_size, &sn_sharing_domain_size,
 +                              &sn_partition_id, &sn_coherency_id,
 +                              &sn_region_size))
                BUG();
        sn_hub_info->as_shift = sn_hub_info->nasid_shift - 2;
  
                        SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3};
                u64 *pio;
                pio = is_shub1() ? pio1 : pio2;
-               pda->pio_write_status_addr = (volatile unsigned long *) LOCAL_MMR_ADDR(pio[slice]);
+               pda->pio_write_status_addr =
+                  (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, pio[slice]);
                pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0;
        }
  
@@@ -722,8 -718,7 +724,8 @@@ void __init build_cnode_tables(void
        for_each_online_node(node) {
                kl_config_hdr_t *klgraph_header;
                nasid = cnodeid_to_nasid(node);
 -              if ((klgraph_header = ia64_sn_get_klconfig_addr(nasid)) == NULL)
 +              klgraph_header = ia64_sn_get_klconfig_addr(nasid);
 +              if (klgraph_header == NULL)
                        BUG();
                brd = NODE_OFFSET_TO_LBOARD(nasid, klgraph_header->ch_board_info);
                while (brd) {
@@@ -741,7 -736,7 +743,7 @@@ nasid_slice_to_cpuid(int nasid, int sli
  {
        long cpu;
  
 -      for (cpu=0; cpu < NR_CPUS; cpu++)
 +      for (cpu = 0; cpu < NR_CPUS; cpu++)
                if (cpuid_to_nasid(cpu) == nasid &&
                                        cpuid_to_slice(cpu) == slice)
                        return cpu;
index b2e1e746b47fca6ebbda649e50077ce56127c2f2,1b33fd5e4e3a05783a88d48707bfe4e492fe3d6a..d9d306c79f2d7a1743369380e637c4118d012c22
@@@ -46,24 -46,104 +46,24 @@@ DECLARE_PER_CPU(struct ptc_stats, ptcst
  
  static  __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock);
  
 -void sn2_ptc_deadlock_recovery(short *, short, int, volatile unsigned long *, unsigned long data0,
 -      volatile unsigned long *, unsigned long data1);
 +extern unsigned long
 +sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
 +                             volatile unsigned long *, unsigned long,
 +                             volatile unsigned long *, unsigned long);
 +void
 +sn2_ptc_deadlock_recovery(short *, short, short, int,
 +                        volatile unsigned long *, unsigned long,
 +                        volatile unsigned long *, unsigned long);
  
 -#ifdef DEBUG_PTC
  /*
 - * ptctest:
 - *
 - *    xyz - 3 digit hex number:
 - *            x - Force PTC purges to use shub:
 - *                    0 - no force
 - *                    1 - force
 - *            y - interupt enable
 - *                    0 - disable interrupts
 - *                    1 - leave interuupts enabled
 - *            z - type of lock:
 - *                    0 - global lock
 - *                    1 - node local lock
 - *                    2 - no lock
 - *
 - *    Note: on shub1, only ptctest == 0 is supported. Don't try other values!
 + * Note: some is the following is captured here to make degugging easier
 + * (the macros make more sense if you see the debug patch - not posted)
   */
 -
 -static unsigned int sn2_ptctest = 0;
 -
 -static int __init ptc_test(char *str)
 -{
 -      get_option(&str, &sn2_ptctest);
 -      return 1;
 -}
 -__setup("ptctest=", ptc_test);
 -
 -static inline int ptc_lock(unsigned long *flagp)
 -{
 -      unsigned long opt = sn2_ptctest & 255;
 -
 -      switch (opt) {
 -      case 0x00:
 -              spin_lock_irqsave(&sn2_global_ptc_lock, *flagp);
 -              break;
 -      case 0x01:
 -              spin_lock_irqsave(&sn_nodepda->ptc_lock, *flagp);
 -              break;
 -      case 0x02:
 -              local_irq_save(*flagp);
 -              break;
 -      case 0x10:
 -              spin_lock(&sn2_global_ptc_lock);
 -              break;
 -      case 0x11:
 -              spin_lock(&sn_nodepda->ptc_lock);
 -              break;
 -      case 0x12:
 -              break;
 -      default:
 -              BUG();
 -      }
 -      return opt;
 -}
 -
 -static inline void ptc_unlock(unsigned long flags, int opt)
 -{
 -      switch (opt) {
 -      case 0x00:
 -              spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
 -              break;
 -      case 0x01:
 -              spin_unlock_irqrestore(&sn_nodepda->ptc_lock, flags);
 -              break;
 -      case 0x02:
 -              local_irq_restore(flags);
 -              break;
 -      case 0x10:
 -              spin_unlock(&sn2_global_ptc_lock);
 -              break;
 -      case 0x11:
 -              spin_unlock(&sn_nodepda->ptc_lock);
 -              break;
 -      case 0x12:
 -              break;
 -      default:
 -              BUG();
 -      }
 -}
 -#else
 -
  #define sn2_ptctest   0
 -
 -static inline int ptc_lock(unsigned long *flagp)
 -{
 -      spin_lock_irqsave(&sn2_global_ptc_lock, *flagp);
 -      return 0;
 -}
 -
 -static inline void ptc_unlock(unsigned long flags, int opt)
 -{
 -      spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
 -}
 -#endif
 +#define local_node_uses_ptc_ga(sh1)   ((sh1) ? 1 : 0)
 +#define max_active_pio(sh1)           ((sh1) ? 32 : 7)
 +#define reset_max_active_on_deadlock()        1
 +#define PTC_LOCK(sh1)                 ((sh1) ? &sn2_global_ptc_lock : &sn_nodepda->ptc_lock)
  
  struct ptc_stats {
        unsigned long ptc_l;
        unsigned long shub_ptc_flushes;
        unsigned long nodes_flushed;
        unsigned long deadlocks;
 +      unsigned long deadlocks2;
        unsigned long lock_itc_clocks;
        unsigned long shub_itc_clocks;
        unsigned long shub_itc_clocks_max;
 +      unsigned long shub_ptc_flushes_not_my_mm;
  };
  
 +#define sn2_ptctest   0
 +
  static inline unsigned long wait_piowc(void)
  {
 -      volatile unsigned long *piows, zeroval;
 -      unsigned long ws;
 +      volatile unsigned long *piows;
 +      unsigned long zeroval, ws;
  
        piows = pda->pio_write_status_addr;
        zeroval = pda->pio_write_status_val;
        do {
                cpu_relax();
        } while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval);
 -      return ws;
 +      return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0;
  }
  
+ /**
+  * sn_migrate - SN-specific task migration actions
+  * @task: Task being migrated to new CPU
+  *
+  * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order.
+  * Context switching user threads which have memory-mapped MMIO may cause
+  * PIOs to issue from seperate CPUs, thus the PIO writes must be drained
+  * from the previous CPU's Shub before execution resumes on the new CPU.
+  */
+ void sn_migrate(struct task_struct *task)
+ {
+       pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu);
+       volatile unsigned long *adr = last_pda->pio_write_status_addr;
+       unsigned long val = last_pda->pio_write_status_val;
+       /* Drain PIO writes from old CPU's Shub */
+       while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK)
+                       != val))
+               cpu_relax();
+ }
  void sn_tlb_migrate_finish(struct mm_struct *mm)
  {
 -      if (mm == current->mm)
 +      /* flush_tlb_mm is inefficient if more than 1 users of mm */
 +      if (mm == current->mm && mm && atomic_read(&mm->mm_users) == 1)
                flush_tlb_mm(mm);
  }
  
@@@ -126,14 -222,12 +147,14 @@@ voi
  sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
                     unsigned long end, unsigned long nbits)
  {
 -      int i, opt, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0;
 -      int mymm = (mm == current->active_mm && current->mm);
 +      int i, ibegin, shub1, cnode, mynasid, cpu, lcpu = 0, nasid;
 +      int mymm = (mm == current->active_mm && mm == current->mm);
 +      int use_cpu_ptcga;
        volatile unsigned long *ptc0, *ptc1;
 -      unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value;
 +      unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0;
        short nasids[MAX_NUMNODES], nix;
        nodemask_t nodes_flushed;
 +      int active, max_active, deadlock;
  
        nodes_clear(nodes_flushed);
        i = 0;
        
  
        mynasid = get_nasid();
 +      use_cpu_ptcga = local_node_uses_ptc_ga(shub1);
 +      max_active = max_active_pio(shub1);
  
        itc = ia64_get_itc();
 -      opt = ptc_lock(&flags);
 +      spin_lock_irqsave(PTC_LOCK(shub1), flags);
        itc2 = ia64_get_itc();
 +
        __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc;
        __get_cpu_var(ptcstats).shub_ptc_flushes++;
        __get_cpu_var(ptcstats).nodes_flushed += nix;
 +      if (!mymm)
 +               __get_cpu_var(ptcstats).shub_ptc_flushes_not_my_mm++;
  
 +      if (use_cpu_ptcga && !mymm) {
 +              old_rr = ia64_get_rr(start);
 +              ia64_set_rr(start, (old_rr & 0xff) | (rr_value << 8));
 +              ia64_srlz_d();
 +      }
 +
 +      wait_piowc();
        do {
                if (shub1)
                        data1 = start | (1UL << SH1_PTC_1_START_SHFT);
                else
                        data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK);
 -              for (i = 0; i < nix; i++) {
 +              deadlock = 0;
 +              active = 0;
 +              for (ibegin = 0, i = 0; i < nix; i++) {
                        nasid = nasids[i];
 -                      if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid && mymm)) {
 +                      if (use_cpu_ptcga && unlikely(nasid == mynasid)) {
                                ia64_ptcga(start, nbits << 2);
                                ia64_srlz_i();
                        } else {
                                ptc0 = CHANGE_NASID(nasid, ptc0);
                                if (ptc1)
                                        ptc1 = CHANGE_NASID(nasid, ptc1);
 -                              pio_atomic_phys_write_mmrs(ptc0, data0, ptc1,
 -                                                         data1);
 -                              flushed = 1;
 +                              pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, data1);
 +                              active++;
 +                      }
 +                      if (active >= max_active || i == (nix - 1)) {
 +                              if ((deadlock = wait_piowc())) {
 +                                      sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1);
 +                                      if (reset_max_active_on_deadlock())
 +                                              max_active = 1;
 +                              }
 +                              active = 0;
 +                              ibegin = i + 1;
                        }
                }
 -              if (flushed
 -                  && (wait_piowc() &
 -                              (SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK))) {
 -                      sn2_ptc_deadlock_recovery(nasids, nix, mynasid, ptc0, data0, ptc1, data1);
 -              }
 -
                start += (1UL << nbits);
 -
        } while (start < end);
  
        itc2 = ia64_get_itc() - itc2;
        if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max)
                __get_cpu_var(ptcstats).shub_itc_clocks_max = itc2;
  
 -      ptc_unlock(flags, opt);
 +      if (old_rr) {
 +              ia64_set_rr(start, old_rr);
 +              ia64_srlz_d();
 +      }
 +
 +      spin_unlock_irqrestore(PTC_LOCK(shub1), flags);
  
        preempt_enable();
  }
   * TLB flush transaction.  The recovery sequence is somewhat tricky & is
   * coded in assembly language.
   */
 -void sn2_ptc_deadlock_recovery(short *nasids, short nix, int mynasid, volatile unsigned long *ptc0, unsigned long data0,
 -      volatile unsigned long *ptc1, unsigned long data1)
 +
 +void
 +sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid,
 +                        volatile unsigned long *ptc0, unsigned long data0,
 +                        volatile unsigned long *ptc1, unsigned long data1)
  {
 -      extern void sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
 -              volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long);
        short nasid, i;
 -      unsigned long *piows, zeroval;
 +      unsigned long *piows, zeroval, n;
  
        __get_cpu_var(ptcstats).deadlocks++;
  
        piows = (unsigned long *) pda->pio_write_status_addr;
        zeroval = pda->pio_write_status_val;
  
 -      for (i=0; i < nix; i++) {
 +
 +      for (i=ib; i <= ie; i++) {
                nasid = nasids[i];
 -              if (!(sn2_ptctest & 3) && nasid == mynasid)
 +              if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid)
                        continue;
                ptc0 = CHANGE_NASID(nasid, ptc0);
                if (ptc1)
                        ptc1 = CHANGE_NASID(nasid, ptc1);
 -              sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval);
 +
 +              n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval);
 +              __get_cpu_var(ptcstats).deadlocks2 += n;
        }
  
  }
@@@ -403,22 -473,20 +424,22 @@@ static int sn2_ptc_seq_show(struct seq_
        cpu = *(loff_t *) data;
  
        if (!cpu) {
 -              seq_printf(file, "# ptc_l change_rid shub_ptc_flushes shub_nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max\n");
 +              seq_printf(file,
 +                         "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2\n");
                seq_printf(file, "# ptctest %d\n", sn2_ptctest);
        }
  
        if (cpu < NR_CPUS && cpu_online(cpu)) {
                stat = &per_cpu(ptcstats, cpu);
 -              seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l,
 +              seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l,
                                stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed,
                                stat->deadlocks,
                                1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec,
                                1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec,
 -                              1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec);
 +                              1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec,
 +                              stat->shub_ptc_flushes_not_my_mm,
 +                              stat->deadlocks2);
        }
 -
        return 0;
  }
  
@@@ -429,7 -497,7 +450,7 @@@ static struct seq_operations sn2_ptc_se
        .show = sn2_ptc_seq_show
  };
  
 -int sn2_ptc_proc_open(struct inode *inode, struct file *file)
 +static int sn2_ptc_proc_open(struct inode *inode, struct file *file)
  {
        return seq_open(file, &sn2_ptc_seq_ops);
  }
@@@ -446,7 -514,7 +467,7 @@@ static struct proc_dir_entry *proc_sn2_
  static int __init sn2_ptc_init(void)
  {
        if (!ia64_platform_is("sn2"))
 -              return -ENOSYS;
 +              return 0;
  
        if (!(proc_sn2_ptc = create_proc_entry(PTC_BASENAME, 0444, NULL))) {
                printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME);
index 03d00faf03b5c414a473b8a28d4e793a9e63787b,6f0021bb387412fd65801d5ad4655b5fc4980cb2..da1d43755afea2de3730c40dbda81b09d41eda8e
@@@ -1,5 -1,5 +1,5 @@@
  /*
-  * Copyright (c) 2002-2003, 2006 Silicon Graphics, Inc.  All Rights Reserved.
+  * Copyright (c) 2002-2003,2006 Silicon Graphics, Inc.  All Rights Reserved.
   * 
   * This program is free software; you can redistribute it and/or modify it 
   * under the terms of version 2 of the GNU General Public License 
   * License along with this program; if not, write the Free Software 
   * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
   * 
 - * Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, 
 - * Mountain View, CA  94043, or:
 - * 
 - * http://www.sgi.com 
 - * 
   * For further information regarding this notice, see: 
   * 
   * http://oss.sgi.com/projects/GenInfo/NoticeExplan
@@@ -66,6 -71,7 +66,7 @@@ extern ia64_mv_dma_sync_single_for_devi
  extern ia64_mv_dma_sync_sg_for_device sn_dma_sync_sg_for_device;
  extern ia64_mv_dma_mapping_error      sn_dma_mapping_error;
  extern ia64_mv_dma_supported          sn_dma_supported;
+ extern ia64_mv_migrate_t              sn_migrate;
  
  /*
   * This stuff has dual use!
  #define platform_dma_sync_sg_for_device       sn_dma_sync_sg_for_device
  #define platform_dma_mapping_error            sn_dma_mapping_error
  #define platform_dma_supported                sn_dma_supported
+ #define platform_migrate              sn_migrate
  
  #include <asm/sn/io.h>
  
index 23c8e1be1911873ec0efd3441f6216eff536e401,29d5574d4375560ceb569aadbed106ec7e7e6450..128fefd8056fc6c308c3b33b2459bd12a4e7dfad
@@@ -50,7 -50,8 +50,8 @@@
  #define IA64_THREAD_PM_VALID  (__IA64_UL(1) << 2)     /* performance registers valid? */
  #define IA64_THREAD_UAC_NOPRINT       (__IA64_UL(1) << 3)     /* don't log unaligned accesses */
  #define IA64_THREAD_UAC_SIGBUS        (__IA64_UL(1) << 4)     /* generate SIGBUS on unaligned acc. */
-                                                       /* bit 5 is currently unused */
+ #define IA64_THREAD_MIGRATION (__IA64_UL(1) << 5)     /* require migration
+                                                          sync at ctx sw */
  #define IA64_THREAD_FPEMU_NOPRINT (__IA64_UL(1) << 6) /* don't log any fpswa faults */
  #define IA64_THREAD_FPEMU_SIGFPE  (__IA64_UL(1) << 7) /* send a SIGFPE for fpswa faults */
  
@@@ -559,23 -560,6 +560,23 @@@ ia64_eoi (void
  
  #define cpu_relax()   ia64_hint(ia64_hint_pause)
  
 +static inline int
 +ia64_get_irr(unsigned int vector)
 +{
 +      unsigned int reg = vector / 64;
 +      unsigned int bit = vector % 64;
 +      u64 irr;
 +
 +      switch (reg) {
 +      case 0: irr = ia64_getreg(_IA64_REG_CR_IRR0); break;
 +      case 1: irr = ia64_getreg(_IA64_REG_CR_IRR1); break;
 +      case 2: irr = ia64_getreg(_IA64_REG_CR_IRR2); break;
 +      case 3: irr = ia64_getreg(_IA64_REG_CR_IRR3); break;
 +      }
 +
 +      return test_bit(bit, &irr);
 +}
 +
  static inline void
  ia64_set_lrr0 (unsigned long val)
  {
index 06253871562303b223a9cbfef4ba017ed2f44f32,99b6f307e94bbb815e3d36ff458d717ec92857fb..cd4233d66f15ba1746f0ee94049ded722a887114
@@@ -244,12 -244,44 +244,19 @@@ extern void ia64_load_extra (struct tas
                __ia64_save_fpu((prev)->thread.fph);                            \
        }                                                                       \
        __switch_to(prev, next, last);                                          \
+       /* "next" in old context is "current" in new context */                 \
+       if (unlikely((current->thread.flags & IA64_THREAD_MIGRATION) &&        \
+                    (task_cpu(current) !=                                     \
+                                     task_thread_info(current)->last_cpu))) { \
+               platform_migrate(current);                                     \
+               task_thread_info(current)->last_cpu = task_cpu(current);       \
+       }                                                                      \
  } while (0)
  #else
  # define switch_to(prev,next,last)    __switch_to(prev, next, last)
  #endif
  
 -/*
 - * On IA-64, we don't want to hold the runqueue's lock during the low-level context-switch,
 - * because that could cause a deadlock.  Here is an example by Erich Focht:
 - *
 - * Example:
 - * CPU#0:
 - * schedule()
 - *    -> spin_lock_irq(&rq->lock)
 - *    -> context_switch()
 - *       -> wrap_mmu_context()
 - *          -> read_lock(&tasklist_lock)
 - *
 - * CPU#1:
 - * sys_wait4() or release_task() or forget_original_parent()
 - *    -> write_lock(&tasklist_lock)
 - *    -> do_notify_parent()
 - *       -> wake_up_parent()
 - *          -> try_to_wake_up()
 - *             -> spin_lock_irq(&parent_rq->lock)
 - *
 - * If the parent's rq happens to be on CPU#0, we'll wait for the rq->lock
 - * of that CPU which will not be released, because there we wait for the
 - * tasklist_lock to become available.
 - */
  #define __ARCH_WANT_UNLOCKED_CTXSW
 -
  #define ARCH_HAS_PREFETCH_SWITCH_STACK
  #define ia64_platform_is(x) (strcmp(x, platform_name) == 0)
  
index a6ee27343a4a1fef76314e54eabdab1a3b3ac659,81641a6905d1db8de0b57ab46d9df216287ed666..56394a2c7055daed97e57b1fb3966e2acfd19622
@@@ -26,9 -26,17 +26,10 @@@ struct thread_info 
        struct exec_domain *exec_domain;/* execution domain */
        __u32 flags;                    /* thread_info flags (see TIF_*) */
        __u32 cpu;                      /* current CPU */
+       __u32 last_cpu;                 /* Last CPU thread ran on */
        mm_segment_t addr_limit;        /* user-level address space limit */
        int preempt_count;              /* 0=premptable, <0=BUG; will also serve as bh-counter */
        struct restart_block restart_block;
 -      struct {
 -              int signo;
 -              int code;
 -              void __user *addr;
 -              unsigned long start_time;
 -              pid_t pid;
 -      } sigdelayed;                   /* Saved information for TIF_SIGDELAYED */
  };
  
  #define THREAD_SIZE                   KERNEL_STACK_SIZE
@@@ -82,6 -90,7 +83,6 @@@
  #define TIF_NEED_RESCHED      2       /* rescheduling necessary */
  #define TIF_SYSCALL_TRACE     3       /* syscall trace active */
  #define TIF_SYSCALL_AUDIT     4       /* syscall auditing active */
 -#define TIF_SIGDELAYED                5       /* signal delayed from MCA/INIT/NMI/PMI context */
  #define TIF_POLLING_NRFLAG    16      /* true if poll_idle() is polling TIF_NEED_RESCHED */
  #define TIF_MEMDIE            17
  #define TIF_MCA_INIT          18      /* this task is processing MCA or INIT */
  #define _TIF_NOTIFY_RESUME    (1 << TIF_NOTIFY_RESUME)
  #define _TIF_SIGPENDING               (1 << TIF_SIGPENDING)
  #define _TIF_NEED_RESCHED     (1 << TIF_NEED_RESCHED)
 -#define _TIF_SIGDELAYED               (1 << TIF_SIGDELAYED)
  #define _TIF_POLLING_NRFLAG   (1 << TIF_POLLING_NRFLAG)
  #define _TIF_MCA_INIT         (1 << TIF_MCA_INIT)
  #define _TIF_DB_DISABLED      (1 << TIF_DB_DISABLED)
  
  /* "work to do on user-return" bits */
 -#define TIF_ALLWORK_MASK      (_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SIGDELAYED)
 +#define TIF_ALLWORK_MASK      (_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)
  /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */
  #define TIF_WORK_MASK         (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT))