[IA64] cleanup and improve fsys_gettimeofday

author Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>

Tue, 29 Jan 2008 05:39:33 +0000 (14:39 +0900)

committer Tony Luck <tony.luck@intel.com>

Mon, 10 Mar 2008 23:35:47 +0000 (16:35 -0700)
author Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Tue, 29 Jan 2008 05:39:33 +0000 (14:39 +0900)
committer Tony Luck <tony.luck@intel.com>
Mon, 10 Mar 2008 23:35:47 +0000 (16:35 -0700)
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S

index 44841971f077bfd812bdc7da38638a6ef99784d5..6a72db7ddecc010e0ddccb246e7d701c00164f50 100644 (file)
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -210,27 +210,25 @@ ENTRY(fsys_gettimeofday)
         // Note that instructions are optimized for McKinley. McKinley can
         // process two bundles simultaneously and therefore we continuously
         // try to feed the CPU two bundles and then a stop.
         // Note that instructions are optimized for McKinley. McKinley can
         // process two bundles simultaneously and therefore we continuously
         // try to feed the CPU two bundles and then a stop.
-       //
-       // Additional note that code has changed a lot. Optimization is TBD.
-       // Comments begin with "?" are maybe outdated.
-       tnat.nz p6,p0 = r31     // ? branch deferred to fit later bundle
-       mov pr = r30,0xc000     // Set predicates according to function
+
         add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
         add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
+       tnat.nz p6,p0 = r31             // guard against Nat argument
+(p6)   br.cond.spnt.few .fail_einval
         movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
         ;;
         movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
         ;;
+       ld4 r2 = [r2]                   // process work pending flags
         movl r29 = itc_jitter_data      // itc_jitter
         add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20        // wall_time
         movl r29 = itc_jitter_data      // itc_jitter
         add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20        // wall_time
-       ld4 r2 = [r2]           // process work pending flags
-       ;;
-(p15)  add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20        // monotonic_time
         add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
         add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
-       add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
+       mov pr = r30,0xc000     // Set predicates according to function
+       ;;
         and r2 = TIF_ALLWORK_MASK,r2
         and r2 = TIF_ALLWORK_MASK,r2
-(p6)    br.cond.spnt.few .fail_einval  // ? deferred branch
+       add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
+(p15)  add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20        // monotonic_time
         ;;
         ;;
-       add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
+       add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20     // clksrc_cycle_last
         cmp.ne p6, p0 = 0, r2   // Fallback if work is scheduled
         cmp.ne p6, p0 = 0, r2   // Fallback if work is scheduled
-(p6)    br.cond.spnt.many fsys_fallback_syscall
+(p6)   br.cond.spnt.many fsys_fallback_syscall
         ;;
         // Begin critical section
  .time_redo:
         ;;
         // Begin critical section
  .time_redo:
@@ -258,7 +256,6 @@ ENTRY(fsys_gettimeofday)
  (p8)   mov r2 = ar.itc         // CPU_TIMER. 36 clocks latency!!!
  (p9)   ld8 r2 = [r30]          // MMIO_TIMER. Could also have latency issues..
  (p13)  ld8 r25 = [r19]         // get itc_lastcycle value
  (p8)   mov r2 = ar.itc         // CPU_TIMER. 36 clocks latency!!!
  (p9)   ld8 r2 = [r30]          // MMIO_TIMER. Could also have latency issues..
  (p13)  ld8 r25 = [r19]         // get itc_lastcycle value
-       ;;              // ? could be removed by moving the last add upward
         ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET     // tv_sec
         ;;
         ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET    // tv_nsec
         ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET     // tv_sec
         ;;
         ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET    // tv_nsec
@@ -285,13 +282,12 @@ ENTRY(fsys_gettimeofday)
  EX(.fail_efault, probe.w.fault r31, 3)
         xmpy.l f8 = f8,f7       // nsec_per_cyc*(counter-last_counter)
         ;;
  EX(.fail_efault, probe.w.fault r31, 3)
         xmpy.l f8 = f8,f7       // nsec_per_cyc*(counter-last_counter)
         ;;
-       // ? simulate tbit.nz.or p7,p0 = r28,0
         getf.sig r2 = f8
         mf
         ;;
         ld4 r10 = [r20]         // gtod_lock.sequence
         shr.u r2 = r2,r23       // shift by factor
         getf.sig r2 = f8
         mf
         ;;
         ld4 r10 = [r20]         // gtod_lock.sequence
         shr.u r2 = r2,r23       // shift by factor
-       ;;              // ? overloaded 3 bundles!
+       ;;
         add r8 = r8,r2          // Add xtime.nsecs
         cmp4.ne p7,p0 = r28,r10
  (p7)   br.cond.dpnt.few .time_redo     // sequence number changed, redo
         add r8 = r8,r2          // Add xtime.nsecs
         cmp4.ne p7,p0 = r28,r10
  (p7)   br.cond.dpnt.few .time_redo     // sequence number changed, redo
@@ -319,9 +315,9 @@ EX(.fail_efault, probe.w.fault r31, 3)
  EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles
  (p14)  xmpy.hu f8 = f8, f7             // xmpy has 5 cycles latency so use it
         ;;
  EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles
  (p14)  xmpy.hu f8 = f8, f7             // xmpy has 5 cycles latency so use it
         ;;
-       mov r8 = r0
  (p14)  getf.sig r2 = f8
         ;;
  (p14)  getf.sig r2 = f8
         ;;
+       mov r8 = r0
  (p14)  shr.u r21 = r2, 4
         ;;
  EX(.fail_efault, st8 [r31] = r9)
  (p14)  shr.u r21 = r2, 4
         ;;
  EX(.fail_efault, st8 [r31] = r9)
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c

index 2cb9425e04210fed5de6e6168074f04193210971..e0dca8743dbb11b87031b7ec9e8e845638a7bb4a 100644 (file)
--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@@ -135,10 +135,10 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
  
         while (offp < (s32 *) end) {
                 wp = (u64 *) ia64_imva((char *) offp + *offp);
  
         while (offp < (s32 *) end) {
                 wp = (u64 *) ia64_imva((char *) offp + *offp);
-               wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
-               wp[1] = 0x0004000000000200UL;
-               wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
-               wp[3] = 0x0084006880000200UL;
+               wp[0] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
+               wp[1] = 0x0084006880000200UL;
+               wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
+               wp[3] = 0x0004000000000200UL;
                 ia64_fc(wp); ia64_fc(wp + 2);
                 ++offp;
         }
                 ia64_fc(wp); ia64_fc(wp + 2);
                 ++offp;
         }
author	Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
	Tue, 29 Jan 2008 05:39:33 +0000 (14:39 +0900)
committer	Tony Luck <tony.luck@intel.com>
	Mon, 10 Mar 2008 23:35:47 +0000 (16:35 -0700)
arch/ia64/kernel/fsys.S		patch \| blob \| blame \| history
arch/ia64/kernel/patch.c		patch \| blob \| blame \| history