From: Paul Mackerras This patch adds code to the ppc32 alignment exception handler to make it handle the load/store string and load/store multiple word instructions. This is an issue for older CPUs such as the PPC601, which traps on load/store string instructions which cross a page boundary (newer CPUs handle this in hardware). I have a little test program which exercises this code, so I am reasonably confident it's correct. Signed-off-by: Paul Mackerras Signed-off-by: Andrew Morton --- 25-akpm/arch/ppc/kernel/align.c | 163 ++++++++++++++++++++++++---------------- 1 files changed, 99 insertions(+), 64 deletions(-) diff -puN arch/ppc/kernel/align.c~ppc32-handle-misaligned-string-multiple-insns arch/ppc/kernel/align.c --- 25/arch/ppc/kernel/align.c~ppc32-handle-misaligned-string-multiple-insns 2004-08-15 15:22:51.153086304 -0700 +++ 25-akpm/arch/ppc/kernel/align.c 2004-08-15 15:22:51.158085544 -0700 @@ -37,6 +37,7 @@ struct aligninfo { #define U 0x10 /* update index register */ #define M 0x20 /* multiple load/store */ #define S 0x40 /* single-precision fp, or byte-swap value */ +#define SX 0x40 /* byte count in XER */ #define HARD 0x80 /* string, stwcx. */ #define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */ @@ -88,10 +89,10 @@ static struct aligninfo aligninfo[128] = INVALID, /* 01 0 0101: lwax */ INVALID, /* 01 0 0110 */ INVALID, /* 01 0 0111 */ - { 0, LD+HARD }, /* 01 0 1000: lswx */ - { 0, LD+HARD }, /* 01 0 1001: lswi */ - { 0, ST+HARD }, /* 01 0 1010: stswx */ - { 0, ST+HARD }, /* 01 0 1011: stswi */ + { 4, LD+M+HARD+SX }, /* 01 0 1000: lswx */ + { 4, LD+M+HARD }, /* 01 0 1001: lswi */ + { 4, ST+M+HARD+SX }, /* 01 0 1010: stswx */ + { 4, ST+M+HARD }, /* 01 0 1011: stswi */ INVALID, /* 01 0 1100 */ INVALID, /* 01 0 1101 */ INVALID, /* 01 0 1110 */ @@ -189,7 +190,9 @@ fix_alignment(struct pt_regs *regs) #endif int i, t; int reg, areg; + int offset, nb0; unsigned char __user *addr; + unsigned char *rptr; union { long l; float f; @@ -206,7 +209,8 @@ fix_alignment(struct pt_regs *regs) * an alignment fault. -- paulus */ - instr = *((unsigned int *)regs->nip); + if (__get_user(instr, (unsigned int __user *) regs->nip)) + return 0; opcode = OPCD(instr); reg = RS(instr); areg = RA(instr); @@ -230,7 +234,7 @@ fix_alignment(struct pt_regs *regs) nb = aligninfo[instr].len; if (nb == 0) { - long *p; + long __user *p; int i; if (instr != DCBZ) @@ -242,13 +246,19 @@ fix_alignment(struct pt_regs *regs) * case when we are running with the cache disabled * for debugging. */ - p = (long *) (regs->dar & -L1_CACHE_BYTES); + p = (long __user *) (regs->dar & -L1_CACHE_BYTES); + if (user_mode(regs) + && verify_area(VERIFY_WRITE, p, L1_CACHE_BYTES)) + return -EFAULT; for (i = 0; i < L1_CACHE_BYTES / sizeof(long); ++i) - p[i] = 0; + if (__put_user(0, p+i)) + return -EFAULT; return 1; } flags = aligninfo[instr].flags; + if ((flags & (LD|ST)) == 0) + return 0; /* For the 4xx-family & Book-E processors, the 'dar' field of the * pt_regs structure is overloaded and is really from the DEAR. @@ -256,6 +266,66 @@ fix_alignment(struct pt_regs *regs) addr = (unsigned char __user *)regs->dar; + if (flags & M) { + /* lmw, stmw, lswi/x, stswi/x */ + nb0 = 0; + if (flags & HARD) { + if (flags & SX) { + nb = regs->xer & 127; + if (nb == 0) + return 1; + } else { + if (__get_user(instr, + (unsigned int __user *)regs->nip)) + return 0; + nb = (instr >> 11) & 0x1f; + if (nb == 0) + nb = 32; + } + if (nb + reg * 4 > 128) { + nb0 = nb + reg * 4 - 128; + nb = 128 - reg * 4; + } + } else { + /* lwm, stmw */ + nb = (32 - reg) * 4; + } + rptr = (unsigned char *) ®s->gpr[reg]; + if (flags & LD) { + for (i = 0; i < nb; ++i) + if (__get_user(rptr[i], addr+i)) + return -EFAULT; + if (nb0 > 0) { + rptr = (unsigned char *) ®s->gpr[0]; + addr += nb; + for (i = 0; i < nb0; ++i) + if (__get_user(rptr[i], addr+i)) + return -EFAULT; + } + for (; (i & 3) != 0; ++i) + rptr[i] = 0; + } else { + for (i = 0; i < nb; ++i) + if (__put_user(rptr[i], addr+i)) + return -EFAULT; + if (nb0 > 0) { + rptr = (unsigned char *) ®s->gpr[0]; + addr += nb; + for (i = 0; i < nb0; ++i) + if (__put_user(rptr[i], addr+i)) + return -EFAULT; + } + } + return 1; + } + + offset = 0; + if (nb < 4) { + /* read/write the least significant bits */ + data.l = 0; + offset = 4 - nb; + } + /* Verify the address of the operand */ if (user_mode(regs)) { if (verify_area((flags & ST? VERIFY_WRITE: VERIFY_READ), addr, nb)) @@ -268,45 +338,26 @@ fix_alignment(struct pt_regs *regs) giveup_fpu(current); preempt_enable(); } - if (flags & M) - return 0; /* too hard for now */ - /* If we read the operand, copy it in */ + /* If we read the operand, copy it in, else get register values */ if (flags & LD) { - if (nb == 2) { - data.v[0] = data.v[1] = 0; - if (__get_user(data.v[2], addr) - || __get_user(data.v[3], addr+1)) + for (i = 0; i < nb; ++i) + if (__get_user(data.v[offset+i], addr+i)) return -EFAULT; - } else { - for (i = 0; i < nb; ++i) - if (__get_user(data.v[i], addr+i)) - return -EFAULT; - } + } else if (flags & F) { + data.d = current->thread.fpr[reg]; + } else { + data.l = regs->gpr[reg]; } switch (flags & ~U) { - case LD+SE: + case LD+SE: /* sign extend */ if (data.v[2] >= 0x80) data.v[0] = data.v[1] = -1; - /* fall through */ - case LD: - regs->gpr[reg] = data.l; - break; - case LD+S: - if (nb == 2) { - SWAP(data.v[2], data.v[3]); - } else { - SWAP(data.v[0], data.v[3]); - SWAP(data.v[1], data.v[2]); - } - regs->gpr[reg] = data.l; - break; - case ST: - data.l = regs->gpr[reg]; break; + + case LD+S: /* byte-swap */ case ST+S: - data.l = regs->gpr[reg]; if (nb == 2) { SWAP(data.v[2], data.v[3]); } else { @@ -314,50 +365,34 @@ fix_alignment(struct pt_regs *regs) SWAP(data.v[1], data.v[2]); } break; - case LD+F: - current->thread.fpr[reg] = data.d; - break; - case ST+F: - data.d = current->thread.fpr[reg]; - break; - /* these require some floating point conversions... */ - /* we'd like to use the assignment, but we have to compile - * the kernel with -msoft-float so it doesn't use the - * fp regs for copying 8-byte objects. */ + + /* Single-precision FP load and store require conversions... */ case LD+F+S: preempt_disable(); enable_kernel_fp(); - cvt_fd(&data.f, ¤t->thread.fpr[reg], ¤t->thread.fpscr); - /* current->thread.fpr[reg] = data.f; */ + cvt_fd(&data.f, &data.d, ¤t->thread.fpscr); preempt_enable(); break; case ST+F+S: preempt_disable(); enable_kernel_fp(); - cvt_df(¤t->thread.fpr[reg], &data.f, ¤t->thread.fpscr); - /* data.f = current->thread.fpr[reg]; */ + cvt_df(&data.d, &data.f, ¤t->thread.fpscr); preempt_enable(); break; - default: - printk("align: can't handle flags=%x\n", flags); - return 0; } if (flags & ST) { - if (nb == 2) { - if (__put_user(data.v[2], addr) - || __put_user(data.v[3], addr+1)) + for (i = 0; i < nb; ++i) + if (__put_user(data.v[offset+i], addr+i)) return -EFAULT; - } else { - for (i = 0; i < nb; ++i) - if (__put_user(data.v[i], addr+i)) - return -EFAULT; - } + } else if (flags & F) { + current->thread.fpr[reg] = data.d; + } else { + regs->gpr[reg] = data.l; } - if (flags & U) { + if (flags & U) regs->gpr[areg] = regs->dar; - } return 1; } _