aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Benedict Torvalds <torvalds@klaava.Helsinki.FI>1992-08-23 22:17:41 +0000
committerNicolas Pitre <nico@cam.org>2007-08-19 14:19:06 -0400
commit2288d80abfb91ee08da7989ca1dd2338cc949fd3 (patch)
tree041e1c0d58821e7cd1767d38afafd6e148eab7ec
parentfe07d2d9e764decdab478241a2ad5a157392af86 (diff)
downloadarchive-2288d80abfb91ee08da7989ca1dd2338cc949fd3.tar.gz
linux-0.97 patchlevel 2v0.97-pl2
As promised, 0.97.pl2 is out today (well, over here it's already tomorrow, so I guess I'm 35 minutes late. Naughty, naughty). Right now, the patch (and full source for those that don't like to patch up the system) is available at "nic.funet.fi: pub/OS/Linux/testing/Linus", but I'll try to put it on some other sites as well if I'm able and energetic enough. Probably tomorrow - together with a binary for those that aren't willing to comple the kernel on their own. 0.97.2 has mostly my mm/fs patches, along with some relatively minor diffs by others (including file locking by Doug Evans). User-level changes are minor: but the mm has changed a lot, and the vfs routines have been changed to keep track of the error-messages a bit better. Also, the vfs-interface to "follow_link()" changed slightly: people who are making filesystems should look at the changes (but they are relatively minor, and shouldn't result in any problems - both the extended fs and minix fs needed just a simple change in their respective symlink.c files). The mm changes /might/ lower performance slightly, as the paging TLB's are now flushed at every task-switch due to the new system, but I doubt it's noticeable. The other performance changes (dynamic buffers etc) in 0.97(.pl1) should overshadow that particular problem. I hope this release means that these kinds of low-level rewrites aren't needed for a while: the last couple of releases have changed some very fundamental things. Nothing seems to have suffered too badly, but I'd be happier if it all got tested more thoroughly. Anyway, discounting the ps/free etc suite of programs, everything I have tried has worked flawlessly despite the big kernel changes. I'm still worried about the reports about messed-up buffers, but have been unable to reproduce the problem, and nobody has so far disillusioned me about my guess that it's a problem with the SCSI code (which at least gives me an excuse for not doing anything about it :-). Other problems include at least one report of spontaneous re-booting, which is totally inexplicable, so I'm blaming hardware once more until I can get better data on the thing. As to patches sent by others: 0.97.2 contains very little of that kind of code. I've been too busy either working, or implementing my own changes that I have simply ignored them for the most part. Remind me (or resend them relative to the new kernel) if you have a patch that is still needed. There is one new system call: 'vm86(struct vm86_struct * info)'. It's not ready for general use yet - it works, but will probably need some tweaking before being practical. But supporting a virtual 86 mode was so easy after the mm rewrite that I felt it was worth implementing: the vm86 code is less than 50 lines of C right now. Linus PS. The bright spot of the week goes to "The Oxford Beer Trolls" - all UK inhabitants should probably be locked into some (big) mental institution and TOBT should probably have a wing of their own, but thanks to them linux can now call itself "beerware" :-)
-rw-r--r--Makefile15
-rw-r--r--boot/head.s92
-rw-r--r--fs/Makefile8
-rw-r--r--fs/buffer.c1
-rw-r--r--fs/exec.c43
-rw-r--r--fs/ext/freelists.c1
-rw-r--r--fs/ext/inode.c1
-rw-r--r--fs/ext/symlink.c19
-rw-r--r--fs/fcntl.c10
-rw-r--r--fs/locks.c471
-rw-r--r--fs/minix/bitmap.c1
-rw-r--r--fs/minix/inode.c1
-rw-r--r--fs/minix/symlink.c19
-rw-r--r--fs/msdos/Makefile5
-rw-r--r--fs/namei.c137
-rw-r--r--fs/open.c186
-rw-r--r--fs/select.c15
-rw-r--r--fs/stat.c68
-rw-r--r--fs/super.c20
-rw-r--r--include/asm/system.h10
-rw-r--r--include/linux/ext_fs_i.h10
-rw-r--r--include/linux/fs.h32
-rw-r--r--include/linux/head.h2
-rw-r--r--include/linux/limits.h1
-rw-r--r--include/linux/minix_fs_i.h10
-rw-r--r--include/linux/mm.h10
-rw-r--r--include/linux/msdos_fs_i.h10
-rw-r--r--include/linux/sched.h45
-rw-r--r--include/linux/sys.h3
-rw-r--r--include/linux/unistd.h1
-rw-r--r--include/linux/vm86.h55
-rw-r--r--kernel/Makefile8
-rw-r--r--kernel/blk_drv/Makefile6
-rw-r--r--kernel/blk_drv/blk.h1
-rw-r--r--kernel/blk_drv/hd.c44
-rw-r--r--kernel/blk_drv/ll_rw_blk.c11
-rw-r--r--kernel/chr_drv/mem.c4
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/fork.c9
-rw-r--r--kernel/math/emulate.c3
-rw-r--r--kernel/ptrace.c2
-rw-r--r--kernel/sched.c8
-rw-r--r--kernel/signal.c5
-rw-r--r--kernel/sys.c51
-rw-r--r--kernel/sys_call.S23
-rw-r--r--kernel/traps.c2
-rw-r--r--mm/memory.c376
-rw-r--r--mm/swap.c111
-rw-r--r--net/Makefile4
-rw-r--r--net/unix.c2
50 files changed, 1337 insertions, 638 deletions
diff --git a/Makefile b/Makefile
index d143eac..879e95c 100644
--- a/Makefile
+++ b/Makefile
@@ -94,7 +94,7 @@ SUBDIRS =kernel mm fs net lib
KERNELHDRS =/usr/src/linux/include
.c.s:
- $(CC) $(CFLAGS) -S $<
+ $(CC) $(CFLAGS) -S -o $*.s $<
.s.o:
$(AS) -c -o $*.o $<
.c.o:
@@ -103,11 +103,11 @@ KERNELHDRS =/usr/src/linux/include
all: Version Image
linuxsubdirs: dummy
- @for i in $(SUBDIRS); do (cd $$i; echo $$i; $(MAKE)) || exit; done
+ @for i in $(SUBDIRS); do (cd $$i && echo $$i && $(MAKE)) || exit; done
Version:
@./makever.sh
- @echo \#define UTS_RELEASE \"0.97-`cat .version`\" > include/linux/config_rel.h
+ @echo \#define UTS_RELEASE \"0.97.pl2-`cat .version`\" > include/linux/config_rel.h
@echo \#define UTS_VERSION \"`date +%D`\" > include/linux/config_ver.h
touch include/linux/config.h
@@ -127,6 +127,9 @@ tools/build: tools/build.c
boot/head.o: boot/head.s
+init/main.o: init/main.c
+ $(CC) $(CFLAGS) $(PROFILING) -c -o $*.o $<
+
tools/system: boot/head.o init/main.o linuxsubdirs
$(LD) $(LDFLAGS) -M boot/head.o init/main.o \
$(ARCHIVES) \
@@ -157,17 +160,17 @@ clean:
rm -f Image System.map tmp_make core boot/bootsect boot/setup \
boot/bootsect.s boot/setup.s init/main.s
rm -f init/*.o tools/system tools/build boot/*.o
- for i in $(SUBDIRS); do (cd $$i; $(MAKE) clean); done
+ for i in $(SUBDIRS); do (cd $$i && $(MAKE) clean); done
backup: clean
- cd .. ; tar cf - linux | compress - > backup.Z
+ cd .. && tar cf - linux | compress - > backup.Z
sync
depend dep:
sed '/\#\#\# Dependencies/q' < Makefile > tmp_make
for i in init/*.c;do echo -n "init/";$(CPP) -M $$i;done >> tmp_make
cp tmp_make Makefile
- for i in $(SUBDIRS); do (cd $$i; $(MAKE) dep) || exit; done
+ for i in $(SUBDIRS); do (cd $$i && $(MAKE) dep) || exit; done
dummy:
diff --git a/boot/head.s b/boot/head.s
index 30ef2d8..2444502 100644
--- a/boot/head.s
+++ b/boot/head.s
@@ -12,11 +12,11 @@
* the page directory.
*/
.text
-.globl _idt,_gdt,_pg_dir,_tmp_floppy_area,_floppy_track_buffer
+.globl _idt,_gdt,_swapper_pg_dir,_tmp_floppy_area,_floppy_track_buffer
/*
- * pg_dir is the main page directory, address 0x00000000
+ * swapper_pg_dir is the main page directory, address 0x00000000
*/
-_pg_dir:
+_swapper_pg_dir:
startup_32:
cld
movl $0x10,%eax
@@ -26,13 +26,6 @@ startup_32:
mov %ax,%gs
lss _stack_start,%esp
call setup_idt
- call setup_gdt
- movl $0x10,%eax # reload all the segment registers
- mov %ax,%ds # after changing gdt. CS was already
- mov %ax,%es # reloaded in 'setup_gdt'
- mov %ax,%fs
- mov %ax,%gs
- lss _stack_start,%esp
xorl %eax,%eax
1: incl %eax # check that A20 really IS enabled
movl %eax,0x000000 # loop forever if it isn't
@@ -94,9 +87,9 @@ check_x87:
* setup_idt
*
* sets up a idt with 256 entries pointing to
- * ignore_int, interrupt gates. It then loads
- * idt. Everything that wants to install itself
- * in the idt-table may do so themselves. Interrupts
+ * ignore_int, interrupt gates. It doesn't actually load
+ * idt - that can be done only after paging has been enabled
+ * and the kernel moved to 0xC0000000. Interrupts
* are enabled elsewhere, when we can be relatively
* sure everything is ok. This routine will be over-
* written by the page tables.
@@ -115,21 +108,6 @@ rp_sidt:
addl $8,%edi
dec %ecx
jne rp_sidt
- lidt idt_descr
- ret
-
-/*
- * setup_gdt
- *
- * This routines sets up a new gdt and loads it.
- * Only two entries are currently built, the same
- * ones that were built in init.s. The routine
- * is VERY complicated at two whole lines, so this
- * rather long comment is certainly needed :-).
- * This routine will beoverwritten by the page tables.
- */
-setup_gdt:
- lgdt gdt_descr
ret
/*
@@ -185,6 +163,15 @@ _floppy_track_buffer:
after_page_tables:
call setup_paging
+ lgdt gdt_descr
+ lidt idt_descr
+ ljmp $0x08,$1f
+1: movl $0x10,%eax # reload all the segment registers
+ mov %ax,%ds # after changing gdt.
+ mov %ax,%es
+ mov %ax,%fs
+ mov %ax,%gs
+ lss _stack_start,%esp
pushl $0 # These are the parameters to main :-)
pushl $0
pushl $0
@@ -248,14 +235,17 @@ ignore_int:
*/
.align 2
setup_paging:
- movl $1024*5,%ecx /* 5 pages - pg_dir+4 page tables */
+ movl $1024*5,%ecx /* 5 pages - swapper_pg_dir+4 page tables */
xorl %eax,%eax
- xorl %edi,%edi /* pg_dir is at 0x000 */
+ xorl %edi,%edi /* swapper_pg_dir is at 0x000 */
cld;rep;stosl
- movl $pg0+7,_pg_dir /* set present bit/user r/w */
- movl $pg1+7,_pg_dir+4 /* --------- " " --------- */
- movl $pg2+7,_pg_dir+8 /* --------- " " --------- */
- movl $pg3+7,_pg_dir+12 /* --------- " " --------- */
+/* Identity-map the kernel in low 4MB memory for ease of transition */
+ movl $pg0+7,_swapper_pg_dir /* set present bit/user r/w */
+/* But the real place is at 0xC0000000 */
+ movl $pg0+7,_swapper_pg_dir+3072 /* set present bit/user r/w */
+ movl $pg1+7,_swapper_pg_dir+3076 /* --------- " " --------- */
+ movl $pg2+7,_swapper_pg_dir+3080 /* --------- " " --------- */
+ movl $pg3+7,_swapper_pg_dir+3084 /* --------- " " --------- */
movl $pg3+4092,%edi
movl $0xfff007,%eax /* 16Mb - 4096 + 7 (r/w user,p) */
std
@@ -263,29 +253,39 @@ setup_paging:
subl $0x1000,%eax
jge 1b
cld
- xorl %eax,%eax /* pg_dir is at 0x0000 */
+ xorl %eax,%eax /* swapper_pg_dir is at 0x0000 */
movl %eax,%cr3 /* cr3 - page directory start */
movl %cr0,%eax
orl $0x80000000,%eax
movl %eax,%cr0 /* set paging (PG) bit */
ret /* this also flushes prefetch-queue */
-.align 2
+/*
+ * The interrupt descriptor table has room for 256 idt's
+ */
+.align 4
.word 0
idt_descr:
.word 256*8-1 # idt contains 256 entries
- .long _idt
-.align 2
+ .long 0xc0000000+_idt
+
+.align 4
+_idt:
+ .fill 256,8,0 # idt is uninitialized
+
+/*
+ * The real GDT is also 256 entries long - no real reason
+ */
+.align 4
.word 0
gdt_descr:
- .word 256*8-1 # so does gdt (not that that's any
- .long _gdt # magic number, but it works for me :^)
-
- .align 3
-_idt: .fill 256,8,0 # idt is uninitialized
+ .word 256*8-1
+ .long 0xc0000000+_gdt
-_gdt: .quad 0x0000000000000000 /* NULL descriptor */
- .quad 0x00c09a0000000fff /* 16Mb */
- .quad 0x00c0920000000fff /* 16Mb */
+.align 4
+_gdt:
+ .quad 0x0000000000000000 /* NULL descriptor */
+ .quad 0xc0c09a0000000fff /* 16Mb at 0xC0000000 */
+ .quad 0xc0c0920000000fff /* 16Mb */
.quad 0x0000000000000000 /* TEMPORARY - don't use */
.fill 252,8,0 /* space for LDT's and TSS's etc */
diff --git a/fs/Makefile b/fs/Makefile
index bfe604b..8459132 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -18,7 +18,7 @@ SUBDIRS =minix ext msdos
OBJS= open.o read_write.o inode.o file_table.o buffer.o super.o \
block_dev.o stat.o exec.o pipe.o namei.o fcntl.o ioctl.o \
- select.o fifo.o
+ select.o fifo.o locks.o
all: fs.o fssubdirs
@@ -26,18 +26,18 @@ fs.o: $(OBJS)
$(LD) -r -o fs.o $(OBJS)
fssubdirs: dummy
- @for i in $(SUBDIRS); do (cd $$i; echo $$i; $(MAKE)) || exit; done
+ @for i in $(SUBDIRS); do (cd $$i && echo $$i && $(MAKE)) || exit; done
clean:
rm -f core *.o *.a tmp_make
for i in *.c; do rm -f `basename $$i .c`.s;done
- for i in $(SUBDIRS); do (cd $$i; $(MAKE) clean); done
+ for i in $(SUBDIRS); do (cd $$i && $(MAKE) clean); done
depend dep:
sed '/\#\#\# Dependencies/q' < Makefile > tmp_make
for i in *.c;do $(CPP) -M $$i;done >> tmp_make
cp tmp_make Makefile
- for i in $(SUBDIRS); do (cd $$i; $(MAKE) dep) || exit; done
+ for i in $(SUBDIRS); do (cd $$i && $(MAKE) dep) || exit; done
dummy:
diff --git a/fs/buffer.c b/fs/buffer.c
index b8604be..2c5b953 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -494,7 +494,6 @@ void grow_buffers(int size)
tmp = bh;
bh->b_data = (char * ) (page+i);
bh->b_size = size;
- i += size;
}
tmp = bh;
while (1) {
diff --git a/fs/exec.c b/fs/exec.c
index 2be1aa3..c0f1c21 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -80,7 +80,7 @@ int core_dump(long signr, struct pt_regs * regs)
if(current->rlim[RLIMIT_CORE].rlim_cur < PAGE_SIZE/1024) return 0;
__asm__("mov %%fs,%0":"=r" (fs));
__asm__("mov %0,%%fs"::"r" ((unsigned short) 0x10));
- if (open_namei("core",O_CREAT | O_WRONLY | O_TRUNC,0600,&inode))
+ if (open_namei("core",O_CREAT | O_WRONLY | O_TRUNC,0600,&inode,NULL))
goto end_coredump;
if (!S_ISREG(inode->i_mode))
goto end_coredump;
@@ -169,17 +169,15 @@ int sys_uselib(const char * library)
struct inode * inode;
struct buffer_head * bh;
struct exec ex;
+ int error;
- if (get_limit(0x17) != TASK_SIZE)
+ if (!library || get_limit(0x17) != TASK_SIZE)
return -EINVAL;
if ((libnum >= MAX_SHARED_LIBS) || (libnum < 0))
return -EINVAL;
- if (library)
- inode = namei(library);
- else
- inode = NULL;
- if (!inode)
- return -ENOENT;
+ error = namei(library,&inode);
+ if (error)
+ return error;
if (!inode->i_sb || !S_ISREG(inode->i_mode) || !permission(inode,MAY_READ)) {
iput(inode);
return -EACCES;
@@ -203,6 +201,7 @@ int sys_uselib(const char * library)
current->libraries[libnum].library = inode;
current->libraries[libnum].start = ex.a_entry;
current->libraries[libnum].length = (ex.a_data+ex.a_text+0xfff) & 0xfffff000;
+ current->libraries[libnum].bss = (ex.a_bss+0xfff) & 0xfffff000;
#if 0
printk("Loaded library %d at %08x, length %08x\n",
libnum,
@@ -334,19 +333,19 @@ static unsigned long change_ldt(unsigned long text_size,unsigned long * page)
code_limit = TASK_SIZE;
data_limit = TASK_SIZE;
- code_base = get_base(current->ldt[1]);
- data_base = code_base;
+ code_base = data_base = 0;
+ current->start_code = code_base;
set_base(current->ldt[1],code_base);
set_limit(current->ldt[1],code_limit);
set_base(current->ldt[2],data_base);
set_limit(current->ldt[2],data_limit);
/* make sure fs points to the NEW data segment */
__asm__("pushl $0x17\n\tpop %%fs"::);
- data_base += data_limit - LIBRARY_SIZE;
+ data_base += data_limit;
for (i=MAX_ARG_PAGES-1 ; i>=0 ; i--) {
data_base -= PAGE_SIZE;
if (page[i])
- put_dirty_page(page[i],data_base);
+ put_dirty_page(current,page[i],data_base);
}
return data_limit;
}
@@ -405,8 +404,9 @@ int do_execve(unsigned long * eip,long tmp,char * filename,
panic("execve called from supervisor mode");
for (i=0 ; i<MAX_ARG_PAGES ; i++) /* clear page-table */
page[i]=0;
- if (!(inode=namei(filename))) /* get executables inode */
- return -ENOENT;
+ retval = namei(filename,&inode); /* get executable inode */
+ if (retval)
+ return retval;
argc = count(argv);
envc = count(envp);
@@ -520,12 +520,10 @@ restart_interp:
*/
old_fs = get_fs();
set_fs(get_ds());
- if (!(inode=namei(interp))) { /* get executables inode */
- set_fs(old_fs);
- retval = -ENOENT;
- goto exec_error1;
- }
+ retval = namei(interp,&inode);
set_fs(old_fs);
+ if (retval)
+ goto exec_error1;
goto restart_interp;
}
brelse(bh);
@@ -582,19 +580,18 @@ restart_interp:
if ((current->close_on_exec>>i)&1)
sys_close(i);
current->close_on_exec = 0;
- free_page_tables(get_base(current->ldt[1]),get_limit(0x0f));
- free_page_tables(get_base(current->ldt[2]),get_limit(0x17));
+ clear_page_tables(current);
if (last_task_used_math == current)
last_task_used_math = NULL;
current->used_math = 0;
p += change_ldt(ex.a_text,page);
- p -= LIBRARY_SIZE + MAX_ARG_PAGES*PAGE_SIZE;
+ p -= MAX_ARG_PAGES*PAGE_SIZE;
p = (unsigned long) create_tables((char *)p,argc,envc);
current->brk = ex.a_bss +
(current->end_data = ex.a_data +
(current->end_code = ex.a_text));
current->start_stack = p;
- current->rss = (LIBRARY_OFFSET - p + PAGE_SIZE-1) / PAGE_SIZE;
+ current->rss = (TASK_SIZE - p + PAGE_SIZE-1) / PAGE_SIZE;
current->suid = current->euid = e_uid;
current->sgid = current->egid = e_gid;
if (N_MAGIC(ex) == OMAGIC)
diff --git a/fs/ext/freelists.c b/fs/ext/freelists.c
index 454796c..a2f6ed3 100644
--- a/fs/ext/freelists.c
+++ b/fs/ext/freelists.c
@@ -290,6 +290,7 @@ printk("ext_free_inode: inode empty, skipping to %d\n", efi->next);
inode->i_ino = j;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_op = NULL;
+ inode->i_blocks = inode->i_blksize = 0;
#ifdef EXTFS_DEBUG
printk("ext_new_inode : allocating inode %d\n", inode->i_ino);
#endif
diff --git a/fs/ext/inode.c b/fs/ext/inode.c
index 67ed523..e8874e5 100644
--- a/fs/ext/inode.c
+++ b/fs/ext/inode.c
@@ -295,6 +295,7 @@ void ext_read_inode(struct inode * inode)
inode->i_nlink = raw_inode->i_nlinks;
inode->i_size = raw_inode->i_size;
inode->i_mtime = inode->i_atime = inode->i_ctime = raw_inode->i_time;
+ inode->i_blocks = inode->i_blksize = 0;
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
inode->i_rdev = raw_inode->i_zone[0];
else for (block = 0; block < 12; block++)
diff --git a/fs/ext/symlink.c b/fs/ext/symlink.c
index 1daf1cb..96b3b6e 100644
--- a/fs/ext/symlink.c
+++ b/fs/ext/symlink.c
@@ -21,7 +21,7 @@
#include <linux/stat.h>
static int ext_readlink(struct inode *, char *, int);
-static struct inode * ext_follow_link(struct inode *, struct inode *);
+static int ext_follow_link(struct inode *, struct inode *, int, int, struct inode **);
/*
* symlinks can't do much...
@@ -43,8 +43,10 @@ struct inode_operations ext_symlink_inode_operations = {
NULL /* truncate */
};
-static struct inode * ext_follow_link(struct inode * dir, struct inode * inode)
+static int ext_follow_link(struct inode * dir, struct inode * inode,
+ int flag, int mode, struct inode ** res_inode)
{
+ int error;
unsigned short fs;
struct buffer_head * bh;
@@ -54,27 +56,30 @@ static struct inode * ext_follow_link(struct inode * dir, struct inode * inode)
}
if (!inode) {
iput(dir);
- return NULL;
+ *res_inode = NULL;
+ return -ENOENT;
}
if (!S_ISLNK(inode->i_mode)) {
iput(dir);
- return inode;
+ *res_inode = inode;
+ return 0;
}
__asm__("mov %%fs,%0":"=r" (fs));
if ((current->link_count > 5) || !inode->i_data[0] ||
!(bh = bread(inode->i_dev, inode->i_data[0], BLOCK_SIZE))) {
iput(dir);
iput(inode);
- return NULL;
+ *res_inode = NULL;
+ return -ELOOP;
}
iput(inode);
__asm__("mov %0,%%fs"::"r" ((unsigned short) 0x10));
current->link_count++;
- inode = _namei(bh->b_data,dir,1);
+ error = open_namei(bh->b_data,flag,mode,res_inode,dir);
current->link_count--;
__asm__("mov %0,%%fs"::"r" (fs));
brelse(bh);
- return inode;
+ return error;
}
static int ext_readlink(struct inode * inode, char * buffer, int buflen)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index ed2decf..fa0d23b 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -14,6 +14,8 @@
#include <linux/string.h>
extern int sys_close(int fd);
+extern int fcntl_getlk(unsigned int, struct flock *);
+extern int fcntl_setlk(unsigned int, unsigned int, struct flock *);
static int dupfd(unsigned int fd, unsigned int arg)
{
@@ -72,8 +74,12 @@ int sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
filp->f_flags &= ~(O_APPEND | O_NONBLOCK);
filp->f_flags |= arg & (O_APPEND | O_NONBLOCK);
return 0;
- case F_GETLK: case F_SETLK: case F_SETLKW:
- return -ENOSYS;
+ case F_GETLK:
+ return fcntl_getlk(fd, (struct flock *) arg);
+ case F_SETLK:
+ return fcntl_setlk(fd, cmd, (struct flock *) arg);
+ case F_SETLKW:
+ return fcntl_setlk(fd, cmd, (struct flock *) arg);
default:
/* sockets need a few special fcntls. */
if (S_ISSOCK (filp->f_inode->i_mode))
diff --git a/fs/locks.c b/fs/locks.c
new file mode 100644
index 0000000..d99821b
--- /dev/null
+++ b/fs/locks.c
@@ -0,0 +1,471 @@
+/*
+ * linux/fs/locks.c
+ *
+ * Provide support for fcntl()'s F_GETLK, F_SETLK, and F_SETLKW calls.
+ * Doug Evans, 92Aug07, dje@sspiff.uucp.
+ *
+ * FIXME: two things aren't handled yet:
+ * - deadlock detection/avoidance (of dubious merit, but since it's in
+ * the definition, I guess it should be provided eventually)
+ * - mandatory locks (requires lots of changes elsewhere)
+ */
+
+#include <asm/segment.h>
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+
+#define OFFSET_MAX 0x7fffffff /* FIXME: move elsewhere? */
+
+static int copy_flock(struct file *filp, struct file_lock *fl, struct flock *l);
+static int conflict(struct file_lock *caller_fl, struct file_lock *sys_fl);
+static int overlap(struct file_lock *fl1, struct file_lock *fl2);
+static int lock_it(struct file *filp, struct file_lock *caller);
+static int unlock_it(struct file *filp, struct file_lock *caller);
+static struct file_lock *alloc_lock(struct file *filp, struct file_lock *template);
+static void free_lock(struct file *filp, struct file_lock *fl);
+
+static struct file_lock file_lock_table[NR_FILE_LOCKS];
+static struct file_lock *file_lock_free_list;
+
+/*
+ * Called at boot time to initialize the lock table ...
+ */
+
+void fcntl_init_locks(void)
+{
+ struct file_lock *fl;
+
+ for (fl = &file_lock_table[0]; fl < file_lock_table + NR_FILE_LOCKS - 1; fl++) {
+ fl->fl_next = fl + 1;
+ fl->fl_owner = NULL;
+ }
+ file_lock_table[NR_FILE_LOCKS - 1].fl_next = NULL;
+ file_lock_table[NR_FILE_LOCKS - 1].fl_owner = NULL;
+ file_lock_free_list = &file_lock_table[0];
+}
+
+int fcntl_getlk(unsigned int fd, struct flock *l)
+{
+ struct flock flock;
+ struct file *filp;
+ struct file_lock *fl,file_lock;
+
+ if (fd >= NR_OPEN || !(filp = current->filp[fd]))
+ return -EBADF;
+ verify_area(l, sizeof(*l));
+ memcpy_fromfs(&flock, l, sizeof(flock));
+ if (flock.l_type == F_UNLCK)
+ return -EINVAL;
+ if (!copy_flock(filp, &file_lock, &flock))
+ return -EINVAL;
+
+ for (fl = filp->f_inode->i_flock; fl != NULL; fl = fl->fl_next) {
+ if (conflict(&file_lock, fl)) {
+ flock.l_pid = fl->fl_owner->pid;
+ flock.l_start = fl->fl_start;
+ flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
+ fl->fl_end - fl->fl_start + 1;
+ flock.l_whence = fl->fl_whence;
+ flock.l_type = fl->fl_type;
+ memcpy_tofs(l, &flock, sizeof(flock));
+ return 0;
+ }
+ }
+
+ flock.l_type = F_UNLCK; /* no conflict found */
+ memcpy_tofs(l, &flock, sizeof(flock));
+ return 0;
+}
+
+/*
+ * This function implements both F_SETLK and F_SETLKW.
+ */
+
+int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l)
+{
+ struct file *filp;
+ struct file_lock *fl,file_lock;
+ struct flock flock;
+
+ /*
+ * Get arguments and validate them ...
+ */
+
+ if (fd >= NR_OPEN || !(filp = current->filp[fd]))
+ return -EBADF;
+ verify_area(l, sizeof(*l));
+ memcpy_fromfs(&flock, l, sizeof(flock));
+ if (!copy_flock(filp, &file_lock, &flock))
+ return -EINVAL;
+ switch (file_lock.fl_type) {
+ case F_RDLCK :
+ if (!(filp->f_mode & 1))
+ return -EBADF;
+ break;
+ case F_WRLCK :
+ if (!(filp->f_mode & 2))
+ return -EBADF;
+ break;
+ case F_UNLCK :
+ break;
+ }
+
+ /*
+ * F_UNLCK needs to be handled differently ...
+ */
+
+ if (file_lock.fl_type == F_UNLCK)
+ return unlock_it(filp, &file_lock);
+
+ /*
+ * Scan for a conflicting lock ...
+ */
+
+repeat:
+ for (fl = filp->f_inode->i_flock; fl != NULL; fl = fl->fl_next) {
+ if (!conflict(&file_lock, fl))
+ continue;
+ /*
+ * File is locked by another process. If this is F_SETLKW
+ * wait for the lock to be released.
+ * FIXME: We need to check for deadlocks here.
+ */
+ if (cmd == F_SETLKW) {
+ interruptible_sleep_on(&fl->fl_wait);
+ goto repeat;
+ }
+ return -EAGAIN;
+ }
+
+ /*
+ * Lock doesn't conflict with any other lock ...
+ */
+
+ return lock_it(filp, &file_lock);
+}
+
+/*
+ * This function is called when the file is closed.
+ */
+
+void fcntl_remove_locks(struct task_struct *task, struct file *filp)
+{
+ struct file_lock *fl,*next;
+
+ for (fl = filp->f_inode->i_flock; fl != NULL; ) {
+ /*
+ * If this one is freed, {fl_next} gets clobbered when the
+ * entry is moved to the free list, so grab it now ...
+ */
+ next = fl->fl_next;
+ if (fl->fl_owner == task)
+ free_lock(filp, fl);
+ fl = next;
+ }
+}
+
+/*
+ * Verify a "struct flock" and copy it to a "struct file_lock" ...
+ * Result is a boolean indicating success.
+ */
+
+static int copy_flock(struct file *filp, struct file_lock *fl, struct flock *l)
+{
+ off_t start;
+
+ if (!filp->f_inode) /* just in case */
+ return 0;
+ if (!S_ISREG(filp->f_inode->i_mode))
+ return 0;
+ if (l->l_type != F_UNLCK && l->l_type != F_RDLCK && l->l_type != F_WRLCK)
+ return 0;
+ switch (l->l_whence) {
+ case 0 /*SEEK_SET*/ : start = 0; break;
+ case 1 /*SEEK_CUR*/ : start = filp->f_pos; break;
+ case 2 /*SEEK_END*/ : start = filp->f_inode->i_size; break;
+ default : return 0;
+ }
+ if ((start += l->l_start) < 0 || l->l_len < 0)
+ return 0;
+ fl->fl_type = l->l_type;
+ fl->fl_start = start; /* we record the absolute position */
+ fl->fl_whence = 0; /* FIXME: do we record {l_start} as passed? */
+ if (l->l_len == 0 || (fl->fl_end = start + l->l_len - 1) < 0)
+ fl->fl_end = OFFSET_MAX;
+ fl->fl_owner = current;
+ fl->fl_wait = NULL; /* just for cleanliness */
+ return 1;
+}
+
+/*
+ * Determine if lock {sys_fl} blocks lock {caller_fl} ...
+ */
+
+static int conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
+{
+ if (caller_fl->fl_owner == sys_fl->fl_owner)
+ return 0;
+ if (!overlap(caller_fl, sys_fl))
+ return 0;
+ switch (caller_fl->fl_type) {
+ case F_RDLCK :
+ return sys_fl->fl_type != F_RDLCK;
+ case F_WRLCK :
+ return 1; /* overlapping region not owned by caller */
+ }
+ return 0; /* shouldn't get here, but just in case */
+}
+
+static int overlap(struct file_lock *fl1, struct file_lock *fl2)
+{
+ if (fl1->fl_start <= fl2->fl_start) {
+ return fl1->fl_end >= fl2->fl_start;
+ } else {
+ return fl2->fl_end >= fl1->fl_start;
+ }
+}
+
+/*
+ * Add a lock to a file ...
+ * Result is 0 for success or -ENOLCK.
+ *
+ * We try to be real clever here and always minimize the number of table
+ * entries we use. For example we merge adjacent locks whenever possible. This
+ * consumes a bit of cpu and code space, is it really worth it? Beats me.
+ *
+ * I've tried to keep the following as small and simple as possible. If you can
+ * make it smaller or simpler, please do. /dje 92Aug11
+ *
+ * WARNING: We assume the lock doesn't conflict with any other lock.
+ */
+
+static int lock_it(struct file *filp, struct file_lock *caller)
+{
+ struct file_lock *fl,*new;
+
+ /*
+ * It's easier if we allocate a slot for the lock first, and then
+ * release it later if we have to (IE: if it can be merged with
+ * another). This way the for() loop always knows that {caller} is an
+ * existing entry. This will cause the routine to fail unnecessarily
+ * in rare cases, but perfection can be pushed too far. :-)
+ */
+
+ if ((caller = alloc_lock(filp, caller)) == NULL)
+ return -ENOLCK;
+
+ /*
+ * First scan to see if we are changing/augmenting an existing lock ...
+ */
+
+ for (fl = filp->f_inode->i_flock; fl != NULL; fl = fl->fl_next) {
+ if (caller->fl_owner != fl->fl_owner)
+ continue;
+ if (caller == fl)
+ continue;
+ if (!overlap(caller, fl)) {
+ /*
+ * Detect adjacent regions (if same lock type) ...
+ */
+ if (caller->fl_type != fl->fl_type)
+ continue;
+ if (caller->fl_end + 1 == fl->fl_start) {
+ fl->fl_start = caller->fl_start;
+ free_lock(filp, caller);
+ caller = fl;
+ /* must continue, may overlap others now */
+ } else if (caller->fl_start - 1 == fl->fl_end) {
+ fl->fl_end = caller->fl_end;
+ free_lock(filp, caller);
+ caller = fl;
+ /* must continue, may overlap others now */
+ }
+ continue;
+ }
+ /*
+ * We've found an overlapping region. Is it a change of lock
+ * type, or are we changing the size of the locked space?
+ */
+ if (caller->fl_type != fl->fl_type) {
+ if (caller->fl_start > fl->fl_start && caller->fl_end < fl->fl_end) {
+ /*
+ * The new lock splits the old one in two ...
+ * {fl} is the bottom piece, {caller} is the
+ * new lock, and {new} is the top piece.
+ */
+ if ((new = alloc_lock(filp, fl)) == NULL) {
+ free_lock(filp, caller);
+ return -ENOLCK;
+ }
+ fl->fl_end = caller->fl_start - 1;
+ new->fl_start = caller->fl_end + 1;
+ return 0;
+ }
+ if (caller->fl_start <= fl->fl_start && caller->fl_end >= fl->fl_end) {
+ /*
+ * The new lock completely replaces old one ...
+ */
+ free_lock(filp, fl);
+ return 0;
+ }
+ if (caller->fl_end < fl->fl_end) {
+ fl->fl_start = caller->fl_end + 1;
+ /* must continue, may be more overlaps */
+ } else if (caller->fl_start > fl->fl_start) {
+ fl->fl_end = caller->fl_start - 1;
+ /* must continue, may be more overlaps */
+ } else {
+ printk("lock_it: program bug: unanticipated overlap\n");
+ free_lock(filp, caller);
+ return -ENOLCK;
+ }
+ } else { /* The new lock augments an existing lock ... */
+ int grew = 0;
+
+ if (caller->fl_start < fl->fl_start) {
+ fl->fl_start = caller->fl_start;
+ grew = 1;
+ }
+ if (caller->fl_end > fl->fl_end) {
+ fl->fl_end = caller->fl_end;
+ grew = 1;
+ }
+ free_lock(filp, caller);
+ caller = fl;
+ if (!grew)
+ return 0;
+ /* must continue, may be more overlaps */
+ }
+ }
+
+ /*
+ * New lock doesn't overlap any regions ...
+ * alloc_lock() has already been called, so we're done!
+ */
+
+ return 0;
+}
+
+/*
+ * Handle F_UNLCK ...
+ * Result is 0 for success, or -EINVAL or -ENOLCK.
+ * ENOLCK can happen when a lock is split into two.
+ */
+
+static int unlock_it(struct file *filp, struct file_lock *caller)
+{
+ int one_unlocked = 0;
+ struct file_lock *fl,*next;
+
+ for (fl = filp->f_inode->i_flock; fl != NULL; ) {
+ if (caller->fl_owner != fl->fl_owner || !overlap(caller, fl)) {
+ fl = fl->fl_next;
+ continue;
+ }
+ one_unlocked = 1;
+ if (caller->fl_start > fl->fl_start && caller->fl_end < fl->fl_end) {
+ /*
+ * Lock is split in two ...
+ * {fl} is the bottom piece, {next} is the top piece.
+ */
+ if ((next = alloc_lock(filp, fl)) == NULL)
+ return -ENOLCK;
+ fl->fl_end = caller->fl_start - 1;
+ next->fl_start = caller->fl_end + 1;
+ return 0;
+ }
+ /*
+ * At this point we know there is an overlap and we know the
+ * lock isn't split into two ...
+ *
+ * Unless the lock table is broken, entries will not overlap.
+ * IE: User X won't have an entry locking bytes 1-3 and another
+ * entry locking bytes 3-5. Therefore, if the area being
+ * unlocked is a subset of the total area, we don't need to
+ * traverse any more of the list. The code is a tad more
+ * complicated by this optimization. Perhaps it's not worth it.
+ *
+ * WARNING: We assume free_lock() does not alter
+ * {fl_start, fl_end}.
+ *
+ * {fl_next} gets clobbered when the entry is moved to
+ * the free list, so grab it now ...
+ */
+ next = fl->fl_next;
+ if (caller->fl_start <= fl->fl_start && caller->fl_end >= fl->fl_end) {
+ free_lock(filp, fl);
+ } else if (caller->fl_start > fl->fl_start) {
+ fl->fl_end = caller->fl_start - 1;
+ } else {
+ /* caller->fl_end < fl->fl_end */
+ fl->fl_start = caller->fl_end + 1;
+ }
+ if (caller->fl_start >= fl->fl_start && caller->fl_end <= fl->fl_end)
+ return 0; /* no more to be found */
+ fl = next;
+ /* must continue, there may be more to unlock */
+ }
+
+ return one_unlocked ? 0 : -EINVAL;
+}
+
+static struct file_lock *alloc_lock(struct file *filp, struct file_lock *template)
+{
+ struct file_lock *new;
+
+ if (file_lock_free_list == NULL)
+ return NULL; /* no available entry */
+ if (file_lock_free_list->fl_owner != NULL)
+ panic("alloc_lock: broken free list\n");
+
+ new = file_lock_free_list; /* remove from free list */
+ file_lock_free_list = file_lock_free_list->fl_next;
+
+ *new = *template;
+
+ new->fl_next = filp->f_inode->i_flock; /* insert into file's list */
+ filp->f_inode->i_flock = new;
+
+ new->fl_owner = current; /* FIXME: needed? */
+ new->fl_wait = NULL;
+ return new;
+}
+
+/*
+ * Add a lock to the free list ...
+ *
+ * WARNING: We must not alter {fl_start, fl_end}. See unlock_it().
+ */
+
+static void free_lock(struct file *filp, struct file_lock *fl)
+{
+ struct file_lock **fl_p;
+
+ if (fl->fl_owner == NULL) /* sanity check */
+ panic("free_lock: broken lock list\n");
+
+ /*
+ * We only use a singly linked list to save some memory space
+ * (the only place we'd use a doubly linked list is here).
+ */
+
+ for (fl_p = &filp->f_inode->i_flock; *fl_p != NULL; fl_p = &(*fl_p)->fl_next) {
+ if (*fl_p == fl)
+ break;
+ }
+ if (*fl_p == NULL) {
+ printk("free_lock: lock is not in file's lock list\n");
+ } else {
+ *fl_p = (*fl_p)->fl_next;
+ }
+
+ fl->fl_next = file_lock_free_list; /* add to free list */
+ file_lock_free_list = fl;
+ fl->fl_owner = NULL; /* for sanity checks */
+
+ wake_up(&fl->fl_wait);
+}
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index ddf74f6..51082b2 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -230,6 +230,7 @@ struct inode * minix_new_inode(int dev)
inode->i_ino = j + i*8192;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_op = NULL;
+ inode->i_blocks = inode->i_blksize = 0;
return inode;
}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index b90a4e8..d8ae3bc 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -230,6 +230,7 @@ void minix_read_inode(struct inode * inode)
inode->i_nlink = raw_inode->i_nlinks;
inode->i_size = raw_inode->i_size;
inode->i_mtime = inode->i_atime = inode->i_ctime = raw_inode->i_time;
+ inode->i_blocks = inode->i_blksize = 0;
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
inode->i_rdev = raw_inode->i_zone[0];
else for (block = 0; block < 9; block++)
diff --git a/fs/minix/symlink.c b/fs/minix/symlink.c
index 65263a0..b5683ba 100644
--- a/fs/minix/symlink.c
+++ b/fs/minix/symlink.c
@@ -15,7 +15,7 @@
#include <linux/stat.h>
static int minix_readlink(struct inode *, char *, int);
-static struct inode * minix_follow_link(struct inode *, struct inode *);
+static int minix_follow_link(struct inode *, struct inode *, int, int, struct inode **);
/*
* symlinks can't do much...
@@ -37,8 +37,10 @@ struct inode_operations minix_symlink_inode_operations = {
NULL /* truncate */
};
-static struct inode * minix_follow_link(struct inode * dir, struct inode * inode)
+static int minix_follow_link(struct inode * dir, struct inode * inode,
+ int flag, int mode, struct inode ** res_inode)
{
+ int error;
unsigned short fs;
struct buffer_head * bh;
@@ -48,27 +50,30 @@ static struct inode * minix_follow_link(struct inode * dir, struct inode * inode
}
if (!inode) {
iput(dir);
- return NULL;
+ *res_inode = NULL;
+ return -ENOENT;
}
if (!S_ISLNK(inode->i_mode)) {
iput(dir);
- return inode;
+ *res_inode = inode;
+ return 0;
}
__asm__("mov %%fs,%0":"=r" (fs));
if ((current->link_count > 5) || !inode->i_data[0] ||
!(bh = bread(inode->i_dev, inode->i_data[0], BLOCK_SIZE))) {
iput(dir);
iput(inode);
- return NULL;
+ *res_inode = NULL;
+ return -ELOOP;
}
iput(inode);
__asm__("mov %0,%%fs"::"r" ((unsigned short) 0x10));
current->link_count++;
- inode = _namei(bh->b_data,dir,1);
+ error = open_namei(bh->b_data,flag,mode,res_inode,dir);
current->link_count--;
__asm__("mov %0,%%fs"::"r" (fs));
brelse(bh);
- return inode;
+ return error;
}
static int minix_readlink(struct inode * inode, char * buffer, int buflen)
diff --git a/fs/msdos/Makefile b/fs/msdos/Makefile
index 485a62b..0d172a2 100644
--- a/fs/msdos/Makefile
+++ b/fs/msdos/Makefile
@@ -8,10 +8,9 @@
# Note 2! The CFLAGS definitions are now in the main makefile...
.c.s:
- $(CC) $(CFLAGS) \
- -S -o $*.s $<
+ $(CC) $(CFLAGS) -S $<
.c.o:
- $(CC) $(CFLAGS) -c -o $*.o $<
+ $(CC) $(CFLAGS) -c $<
.s.o:
$(AS) -o $*.o $<
diff --git a/fs/namei.c b/fs/namei.c
index 42eb868..17b3b44 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -19,9 +19,6 @@
#include <linux/fcntl.h>
#include <linux/stat.h>
-struct inode * _namei(const char * filename, struct inode * base,
- int follow_links);
-
#define ACC_MODE(x) ("\004\002\006\377"[(x)&O_ACCMODE])
/*
@@ -91,18 +88,21 @@ int lookup(struct inode * dir,const char * name, int len,
return dir->i_op->lookup(dir,name,len,result);
}
-struct inode * follow_link(struct inode * dir, struct inode * inode)
+int follow_link(struct inode * dir, struct inode * inode,
+ int flag, int mode, struct inode ** res_inode)
{
if (!dir || !inode) {
iput(dir);
iput(inode);
- return NULL;
+ *res_inode = NULL;
+ return -ENOENT;
}
if (!inode->i_op || !inode->i_op->follow_link) {
iput(dir);
- return inode;
+ *res_inode = inode;
+ return 0;
}
- return inode->i_op->follow_link(dir,inode);
+ return inode->i_op->follow_link(dir,inode,flag,mode,res_inode);
}
/*
@@ -111,14 +111,15 @@ struct inode * follow_link(struct inode * dir, struct inode * inode)
* dir_namei() returns the inode of the directory of the
* specified name, and the name within that directory.
*/
-static struct inode * dir_namei(const char * pathname,
- int * namelen, const char ** name, struct inode * base)
+static int dir_namei(const char * pathname, int * namelen, const char ** name,
+ struct inode * base, struct inode ** res_inode)
{
char c;
const char * thisname;
int len,error;
struct inode * inode;
+ *res_inode = NULL;
if (!base) {
base = current->pwd;
base->i_count++;
@@ -139,41 +140,48 @@ static struct inode * dir_namei(const char * pathname,
error = lookup(base,thisname,len,&inode);
if (error) {
iput(base);
- return NULL;
+ return error;
}
- if (!(base = follow_link(base,inode)))
- return NULL;
+ error = follow_link(base,inode,0,0,&base);
+ if (error)
+ return error;
}
*name = thisname;
*namelen = len;
- return base;
+ *res_inode = base;
+ return 0;
}
-struct inode * _namei(const char * pathname, struct inode * base,
- int follow_links)
+static int _namei(const char * pathname, struct inode * base,
+ int follow_links, struct inode ** res_inode)
{
const char * basename;
int namelen,error;
struct inode * inode;
- if (!(base = dir_namei(pathname,&namelen,&basename,base)))
- return NULL;
+ *res_inode = NULL;
+ error = dir_namei(pathname,&namelen,&basename,base,&base);
+ if (error)
+ return error;
base->i_count++; /* lookup uses up base */
error = lookup(base,basename,namelen,&inode);
if (error) {
iput(base);
- return NULL;
+ return error;
}
- if (follow_links)
- inode = follow_link(base,inode);
- else
+ if (follow_links) {
+ error = follow_link(base,inode,0,0,&inode);
+ if (error)
+ return error;
+ } else
iput(base);
- return inode;
+ *res_inode = inode;
+ return 0;
}
-struct inode * lnamei(const char * pathname)
+int lnamei(const char * pathname, struct inode ** res_inode)
{
- return _namei(pathname, NULL, 0);
+ return _namei(pathname,NULL,0,res_inode);
}
/*
@@ -183,9 +191,9 @@ struct inode * lnamei(const char * pathname)
* Open, link etc use their own routines, but this is enough for things
* like 'chmod' etc.
*/
-struct inode * namei(const char * pathname)
+int namei(const char * pathname, struct inode ** res_inode)
{
- return _namei(pathname,NULL,1);
+ return _namei(pathname,NULL,1,res_inode);
}
/*
@@ -194,7 +202,7 @@ struct inode * namei(const char * pathname)
* namei for open - this is in fact almost the whole open-routine.
*/
int open_namei(const char * pathname, int flag, int mode,
- struct inode ** res_inode)
+ struct inode ** res_inode, struct inode * base)
{
const char * basename;
int namelen,error,i;
@@ -205,8 +213,9 @@ int open_namei(const char * pathname, int flag, int mode,
flag |= O_WRONLY;
mode &= 07777 & ~current->umask;
mode |= I_REGULAR;
- if (!(dir = dir_namei(pathname,&namelen,&basename,NULL)))
- return -ENOENT;
+ error = dir_namei(pathname,&namelen,&basename,base,&dir);
+ if (error)
+ return error;
if (!namelen) { /* special case: '/usr/' etc */
if (!(flag & (O_ACCMODE|O_CREAT|O_TRUNC))) {
*res_inode=dir;
@@ -241,8 +250,8 @@ int open_namei(const char * pathname, int flag, int mode,
iput(inode);
return -EEXIST;
}
- if (!(inode = follow_link(dir,inode)))
- return -ELOOP;
+ if (error = follow_link(dir,inode,flag,mode,&inode))
+ return error;
if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
if (IS_NODEV(inode)) {
iput(inode);
@@ -289,11 +298,12 @@ int open_namei(const char * pathname, int flag, int mode,
int do_mknod(const char * filename, int mode, int dev)
{
const char * basename;
- int namelen;
+ int namelen, error;
struct inode * dir;
-
- if (!(dir = dir_namei(filename,&namelen,&basename, NULL)))
- return -ENOENT;
+
+ error = dir_namei(filename,&namelen,&basename, NULL, &dir);
+ if (error)
+ return error;
if (!namelen) {
iput(dir);
return -ENOENT;
@@ -323,11 +333,12 @@ int sys_mknod(const char * filename, int mode, int dev)
int sys_mkdir(const char * pathname, int mode)
{
const char * basename;
- int namelen;
+ int namelen, error;
struct inode * dir;
- if (!(dir = dir_namei(pathname,&namelen,&basename, NULL)))
- return -ENOENT;
+ error = dir_namei(pathname,&namelen,&basename,NULL,&dir);
+ if (error)
+ return error;
if (!namelen) {
iput(dir);
return -ENOENT;
@@ -350,11 +361,12 @@ int sys_mkdir(const char * pathname, int mode)
int sys_rmdir(const char * name)
{
const char * basename;
- int namelen;
+ int namelen, error;
struct inode * dir;
- if (!(dir = dir_namei(name,&namelen,&basename, NULL)))
- return -ENOENT;
+ error = dir_namei(name,&namelen,&basename,NULL,&dir);
+ if (error)
+ return error;
if (!namelen) {
iput(dir);
return -ENOENT;
@@ -377,11 +389,12 @@ int sys_rmdir(const char * name)
int sys_unlink(const char * name)
{
const char * basename;
- int namelen;
+ int namelen, error;
struct inode * dir;
- if (!(dir = dir_namei(name,&namelen,&basename, NULL)))
- return -ENOENT;
+ error = dir_namei(name,&namelen,&basename,NULL,&dir);
+ if (error)
+ return error;
if (!namelen) {
iput(dir);
return -EPERM;
@@ -405,11 +418,11 @@ int sys_symlink(const char * oldname, const char * newname)
{
struct inode * dir;
const char * basename;
- int namelen;
+ int namelen, error;
- dir = dir_namei(newname,&namelen,&basename, NULL);
- if (!dir)
- return -ENOENT;
+ error = dir_namei(newname,&namelen,&basename,NULL,&dir);
+ if (error)
+ return error;
if (!namelen) {
iput(dir);
return -ENOENT;
@@ -433,15 +446,15 @@ int sys_link(const char * oldname, const char * newname)
{
struct inode * oldinode, * dir;
const char * basename;
- int namelen;
+ int namelen, error;
- oldinode = namei(oldname);
- if (!oldinode)
- return -ENOENT;
- dir = dir_namei(newname,&namelen,&basename, NULL);
- if (!dir) {
+ error = namei(oldname, &oldinode);
+ if (error)
+ return error;
+ error = dir_namei(newname,&namelen,&basename,NULL,&dir);
+ if (error) {
iput(oldinode);
- return -EACCES;
+ return error;
}
if (!namelen) {
iput(oldinode);
@@ -475,11 +488,11 @@ int sys_rename(const char * oldname, const char * newname)
{
struct inode * old_dir, * new_dir;
const char * old_base, * new_base;
- int old_len, new_len;
+ int old_len, new_len, error;
- old_dir = dir_namei(oldname,&old_len,&old_base, NULL);
- if (!old_dir)
- return -ENOENT;
+ error = dir_namei(oldname,&old_len,&old_base,NULL,&old_dir);
+ if (error)
+ return error;
if (!permission(old_dir,MAY_WRITE)) {
iput(old_dir);
return -EACCES;
@@ -490,10 +503,10 @@ int sys_rename(const char * oldname, const char * newname)
iput(old_dir);
return -EPERM;
}
- new_dir = dir_namei(newname,&new_len,&new_base, NULL);
- if (!new_dir) {
+ error = dir_namei(newname,&new_len,&new_base,NULL,&new_dir);
+ if (error) {
iput(old_dir);
- return -ENOENT;
+ return error;
}
if (!permission(new_dir,MAY_WRITE)) {
iput(old_dir);
diff --git a/fs/open.c b/fs/open.c
index 183b10c..3ee6c98 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -17,6 +17,8 @@
#include <linux/tty.h>
#include <asm/segment.h>
+extern void fcntl_remove_locks(struct task_struct *, struct file *);
+
struct file_operations * chrdev_fops[MAX_CHRDEV] = {
NULL,
};
@@ -33,10 +35,12 @@ int sys_ustat(int dev, struct ustat * ubuf)
int sys_statfs(const char * path, struct statfs * buf)
{
struct inode * inode;
+ int error;
verify_area(buf, sizeof(struct statfs));
- if (!(inode = namei(path)))
- return -ENOENT;
+ error = namei(path,&inode);
+ if (error)
+ return error;
if (!inode->i_sb->s_op->statfs) {
iput(inode);
return -ENOSYS;
@@ -65,9 +69,11 @@ int sys_fstatfs(unsigned int fd, struct statfs * buf)
int sys_truncate(const char * path, unsigned int length)
{
struct inode * inode;
+ int error;
- if (!(inode = namei(path)))
- return -ENOENT;
+ error = namei(path,&inode);
+ if (error)
+ return error;
if (S_ISDIR(inode->i_mode) || !permission(inode,MAY_WRITE)) {
iput(inode);
return -EACCES;
@@ -112,9 +118,11 @@ int sys_utime(char * filename, struct utimbuf * times)
{
struct inode * inode;
long actime,modtime;
+ int error;
- if (!(inode=namei(filename)))
- return -ENOENT;
+ error = namei(filename,&inode);
+ if (error)
+ return error;
if (IS_RDONLY(inode)) {
iput(inode);
return -EROFS;
@@ -151,8 +159,9 @@ int sys_access(const char * filename,int mode)
int res, i_mode;
mode &= 0007;
- if (!(inode=namei(filename)))
- return -EACCES;
+ res = namei(filename,&inode);
+ if (res)
+ return res;
i_mode = res = inode->i_mode & 0777;
iput(inode);
if (current->uid == inode->i_uid)
@@ -176,9 +185,11 @@ int sys_access(const char * filename,int mode)
int sys_chdir(const char * filename)
{
struct inode * inode;
+ int error;
- if (!(inode = namei(filename)))
- return -ENOENT;
+ error = namei(filename,&inode);
+ if (error)
+ return error;
if (!S_ISDIR(inode->i_mode)) {
iput(inode);
return -ENOTDIR;
@@ -195,9 +206,11 @@ int sys_chdir(const char * filename)
int sys_chroot(const char * filename)
{
struct inode * inode;
+ int error;
- if (!(inode=namei(filename)))
- return -ENOENT;
+ error = namei(filename,&inode);
+ if (error)
+ return error;
if (!S_ISDIR(inode->i_mode)) {
iput(inode);
return -ENOTDIR;
@@ -232,9 +245,11 @@ int sys_fchmod(unsigned int fd, mode_t mode)
int sys_chmod(const char * filename, mode_t mode)
{
struct inode * inode;
+ int error;
- if (!(inode = namei(filename)))
- return -ENOENT;
+ error = namei(filename,&inode);
+ if (error)
+ return error;
if ((current->euid != inode->i_uid) && !suser()) {
iput(inode);
return -EPERM;
@@ -274,9 +289,11 @@ int sys_fchown(unsigned int fd, uid_t user, gid_t group)
int sys_chown(const char * filename, uid_t user, gid_t group)
{
struct inode * inode;
+ int error;
- if (!(inode = lnamei(filename)))
- return -ENOENT;
+ error = lnamei(filename,&inode);
+ if (error)
+ return error;
if (IS_RDONLY(inode)) {
iput(inode);
return -EROFS;
@@ -310,7 +327,7 @@ int sys_open(const char * filename,int flag,int mode)
if (!f)
return -ENFILE;
current->filp[fd] = f;
- if ((i = open_namei(filename,flag,mode,&inode))<0) {
+ if ((i = open_namei(filename,flag,mode,&inode,NULL))<0) {
current->filp[fd]=NULL;
f->f_count--;
return i;
@@ -338,25 +355,23 @@ int sys_creat(const char * pathname, int mode)
return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
}
-static int
-close_fp (struct file *filp)
+static int close_fp(struct file *filp)
{
- struct inode *inode;
+ struct inode *inode;
if (filp->f_count == 0) {
printk("Close: file count is 0\n");
return 0;
}
-
+ inode = filp->f_inode;
+ if (S_ISREG(inode->i_mode))
+ fcntl_remove_locks(current, filp);
if (filp->f_count > 1) {
filp->f_count--;
return 0;
}
-
- inode = filp->f_inode;
if (filp->f_op && filp->f_op->release)
filp->f_op->release(inode,filp);
-
filp->f_count--;
filp->f_inode = NULL;
iput(inode);
@@ -376,94 +391,75 @@ int sys_close(unsigned int fd)
return (close_fp (filp));
}
-/* This routine looks through all the process's and closes any
- references to the current processes tty. To avoid problems with
- process sleeping on an inode which has already been iput, anyprocess
- which is sleeping on the tty is sent a sigkill (It's probably a rogue
- process.) Also no process should ever have /dev/console as it's
- controlling tty, or have it open for reading. So we don't have to
- worry about messing with all the daemons abilities to write messages
- to the console. (Besides they should be using syslog.) */
-
-int
-sys_vhangup(void)
+/*
+ * This routine looks through all the process's and closes any
+ * references to the current processes tty. To avoid problems with
+ * process sleeping on an inode which has already been iput, anyprocess
+ * which is sleeping on the tty is sent a sigkill (It's probably a rogue
+ * process.) Also no process should ever have /dev/console as it's
+ * controlling tty, or have it open for reading. So we don't have to
+ * worry about messing with all the daemons abilities to write messages
+ * to the console. (Besides they should be using syslog.)
+ */
+int sys_vhangup(void)
{
- int i;
- int j;
- struct file *filep;
- struct tty_struct *tty;
- extern void kill_wait (struct wait_queue **q, int signal);
- extern int kill_pg (int pgrp, int sig, int priv);
-
- if (!suser()) return (-EPERM);
-
- /* send the SIGHUP signal. */
- kill_pg (current->pgrp, SIGHUP, 0);
+ int i,j;
+ struct file *filep;
+ struct tty_struct *tty;
+ extern void kill_wait (struct wait_queue **q, int signal);
+ extern int kill_pg (int pgrp, int sig, int priv);
- /* See if there is a controlling tty. */
- if (current->tty < 0) return (0);
-
- for (i = 0; i < NR_TASKS; i++)
- {
- if (task[i] == NULL) continue;
- for (j = 0; j < NR_OPEN; j++)
- {
- filep = task[i]->filp[j];
-
- if (filep == NULL) continue;
-
- /* now we need to check to see if this file points to the
- device we are trying to close. */
-
- if (!S_ISCHR (filep->f_inode->i_mode)) continue;
-
- /* This will catch both /dev/tty and the explicit terminal
- device. However, we must make sure that f_rdev is
- defined and correct. */
-
- if ((MAJOR(filep->f_inode->i_rdev) == 5 ||
- MAJOR(filep->f_inode->i_rdev) == 4 ) &&
- (MAJOR(filep->f_rdev) == 4 &&
- MINOR(filep->f_rdev) == MINOR (current->tty)))
- {
- task[i]->filp[j] = NULL;
+ if (!suser())
+ return -EPERM;
+ /* send the SIGHUP signal. */
+ kill_pg(current->pgrp, SIGHUP, 0);
+ /* See if there is a controlling tty. */
+ if (current->tty < 0)
+ return 0;
+ for (i = 0; i < NR_TASKS; i++) {
+ if (task[i] == NULL)
+ continue;
+ for (j = 0; j < NR_OPEN; j++) {
+ filep = task[i]->filp[j];
+ if (!filep)
+ continue;
+ if (!S_ISCHR(filep->f_inode->i_mode))
+ continue;
+ if ((MAJOR(filep->f_inode->i_rdev) == 5 ||
+ MAJOR(filep->f_inode->i_rdev) == 4 ) &&
+ (MAJOR(filep->f_rdev) == 4 &&
+ MINOR(filep->f_rdev) == MINOR (current->tty))) {
/* so now we have found something to close. We
need to kill every process waiting on the
inode. */
-
- kill_wait (&filep->f_inode->i_wait, SIGKILL);
+ task[i]->filp[j] = NULL;
+ kill_wait (&filep->f_inode->i_wait, SIGKILL);
/* now make sure they are awake before we close the
file. */
- wake_up (&filep->f_inode->i_wait);
+ wake_up (&filep->f_inode->i_wait);
/* finally close the file. */
- current->close_on_exec &= ~(1<<j);
- close_fp (filep);
- }
-
- }
-
+ current->close_on_exec &= ~(1<<j);
+ close_fp (filep);
+ }
+ }
/* can't let them keep a reference to it around.
But we can't touch current->tty until after the
loop is complete. */
- if (task[i]->tty == current->tty && task[i] != current)
- {
- task[i]->tty = -1;
- }
- }
-
+ if (task[i]->tty == current->tty && task[i] != current) {
+ task[i]->tty = -1;
+ }
+ }
/* need to do tty->session = 0 */
- tty = TTY_TABLE(MINOR(current->tty));
- tty->session = 0;
- tty->pgrp = -1;
- current->tty = -1;
-
-
- return (0);
+ tty = TTY_TABLE(MINOR(current->tty));
+ tty->session = 0;
+ tty->pgrp = -1;
+ current->tty = -1;
+ return 0;
}
diff --git a/fs/select.c b/fs/select.c
index 5cd8e0f..4dc1682 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -51,6 +51,8 @@ static int check_in(select_table * wait, struct inode * inode, struct file * fil
{
if (file->f_op && file->f_op->select)
return file->f_op->select(inode,file,SEL_IN,wait);
+ if (inode && S_ISREG(inode->i_mode))
+ return 1;
return 0;
}
@@ -58,6 +60,8 @@ static int check_out(select_table * wait, struct inode * inode, struct file * fi
{
if (file->f_op && file->f_op->select)
return file->f_op->select(inode,file,SEL_OUT,wait);
+ if (inode && S_ISREG(inode->i_mode))
+ return 1;
return 0;
}
@@ -65,6 +69,8 @@ static int check_ex(select_table * wait, struct inode * inode, struct file * fil
{
if (file->f_op && file->f_op->select)
return file->f_op->select(inode,file,SEL_EX,wait);
+ if (inode && S_ISREG(inode->i_mode))
+ return 1;
return 0;
}
@@ -85,15 +91,6 @@ int do_select(fd_set in, fd_set out, fd_set ex,
return -EBADF;
if (!current->filp[i]->f_inode)
return -EBADF;
- if (current->filp[i]->f_inode->i_pipe)
- continue;
- if (S_ISCHR(current->filp[i]->f_inode->i_mode))
- continue;
- if (S_ISFIFO(current->filp[i]->f_inode->i_mode))
- continue;
- if (S_ISSOCK(current->filp[i]->f_inode->i_mode))
- continue;
- return -EBADF;
}
repeat:
wait_table.nr = 0;
diff --git a/fs/stat.c b/fs/stat.c
index 225f9d3..459f418 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -56,35 +56,41 @@ static void cp_new_stat(struct inode * inode, struct new_stat * statbuf)
tmp.st_mtime = inode->i_mtime;
tmp.st_ctime = inode->i_ctime;
/*
- * Right now we fake the st_blocks numbers: we'll eventually have to
- * add st_blocks to the inode, and let the vfs routines keep track of
- * it all. This algorithm doesn't guarantee correct block numbers, but
- * at least it tries to come up with a plausible answer...
- *
- * In fact, the minix fs doesn't use these numbers (it uses 7 and 512
- * instead of 10 and 256), but who cares... It's not that exact anyway.
+ * st_blocks and st_blksize are approximated with a simple algorithm if
+ * they aren't supported directly by the filesystem. The minix and msdos
+ * filesystems don't keep track of blocks, so they would either have to
+ * be counted explicitly (by delving into the file itself), or by using
+ * this simple algorithm to get a reasonable (although not 100% accurate)
+ * value.
*/
- blocks = (tmp.st_size + 1023) / 1024;
- if (blocks > 10) {
- indirect = (blocks - 11)/256+1;
- if (blocks > 10+256) {
- indirect += (blocks - 267)/(256*256)+1;
- if (blocks > 10+256+256*256)
- indirect++;
+ if (!inode->i_blksize) {
+ blocks = (tmp.st_size + 511) / 512;
+ if (blocks > 10) {
+ indirect = (blocks - 11)/256+1;
+ if (blocks > 10+256) {
+ indirect += (blocks - 267)/(256*256)+1;
+ if (blocks > 10+256+256*256)
+ indirect++;
+ }
+ blocks += indirect;
}
- blocks += indirect;
+ tmp.st_blksize = 512;
+ tmp.st_blocks = blocks;
+ } else {
+ tmp.st_blksize = inode->i_blksize;
+ tmp.st_blocks = inode->i_blocks;
}
- tmp.st_blksize = 1024;
- tmp.st_blocks = blocks;
memcpy_tofs(statbuf,&tmp,sizeof(tmp));
}
int sys_stat(char * filename, struct old_stat * statbuf)
{
struct inode * inode;
+ int error;
- if (!(inode=namei(filename)))
- return -ENOENT;
+ error = namei(filename,&inode);
+ if (error)
+ return error;
cp_old_stat(inode,statbuf);
iput(inode);
return 0;
@@ -93,9 +99,11 @@ int sys_stat(char * filename, struct old_stat * statbuf)
int sys_newstat(char * filename, struct new_stat * statbuf)
{
struct inode * inode;
+ int error;
- if (!(inode=namei(filename)))
- return -ENOENT;
+ error = namei(filename,&inode);
+ if (error)
+ return error;
cp_new_stat(inode,statbuf);
iput(inode);
return 0;
@@ -104,9 +112,11 @@ int sys_newstat(char * filename, struct new_stat * statbuf)
int sys_lstat(char * filename, struct old_stat * statbuf)
{
struct inode * inode;
+ int error;
- if (!(inode = lnamei(filename)))
- return -ENOENT;
+ error = lnamei(filename,&inode);
+ if (error)
+ return error;
cp_old_stat(inode,statbuf);
iput(inode);
return 0;
@@ -115,9 +125,11 @@ int sys_lstat(char * filename, struct old_stat * statbuf)
int sys_newlstat(char * filename, struct new_stat * statbuf)
{
struct inode * inode;
+ int error;
- if (!(inode = lnamei(filename)))
- return -ENOENT;
+ error = lnamei(filename,&inode);
+ if (error)
+ return error;
cp_new_stat(inode,statbuf);
iput(inode);
return 0;
@@ -148,12 +160,14 @@ int sys_newfstat(unsigned int fd, struct new_stat * statbuf)
int sys_readlink(const char * path, char * buf, int bufsiz)
{
struct inode * inode;
+ int error;
if (bufsiz <= 0)
return -EINVAL;
verify_area(buf,bufsiz);
- if (!(inode = lnamei(path)))
- return -ENOENT;
+ error = lnamei(path,&inode);
+ if (error)
+ return error;
if (!inode->i_op || !inode->i_op->readlink) {
iput(inode);
return -EINVAL;
diff --git a/fs/super.c b/fs/super.c
index 4d0e030..c3c47dc 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -21,6 +21,7 @@
int sync_dev(int dev);
void wait_for_keypress(void);
+void fcntl_init_locks(void);
/* set_bit uses setb, as gas doesn't recognize setc */
#define set_bit(bitnr,addr) ({ \
@@ -178,8 +179,9 @@ int sys_umount(char * dev_name)
if (!suser())
return -EPERM;
- if (!(inode = namei(dev_name)))
- return -ENOENT;
+ retval = namei(dev_name,&inode);
+ if (retval)
+ return retval;
dev = inode->i_rdev;
if (!S_ISBLK(inode->i_mode)) {
iput(inode);
@@ -208,9 +210,11 @@ static int do_mount(int dev, const char * dir, char * type, int flags, void * da
{
struct inode * inode, * dir_i;
struct super_block * sb;
+ int error;
- if (!(dir_i = namei(dir)))
- return -ENOENT;
+ error = namei(dir,&dir_i);
+ if (error)
+ return error;
if (dir_i->i_count != 1 || dir_i->i_mount) {
iput(dir_i);
return -EBUSY;
@@ -256,7 +260,7 @@ int sys_mount(char * dev_name, char * dir_name, char * type,
{
struct inode * inode;
int dev;
- int retval = 0;
+ int retval;
char tmp[100],*t;
int i;
unsigned long flags = 0;
@@ -264,8 +268,9 @@ int sys_mount(char * dev_name, char * dir_name, char * type,
if (!suser())
return -EPERM;
- if (!(inode = namei(dev_name)))
- return -ENOENT;
+ retval = namei(dev_name,&inode);
+ if (retval)
+ return retval;
dev = inode->i_rdev;
if (!S_ISBLK(inode->i_mode))
retval = -EPERM;
@@ -314,6 +319,7 @@ void mount_root(void)
panic("bad i-node size");
for(i=0;i<NR_FILE;i++)
file_table[i].f_count=0;
+ fcntl_init_locks();
if (MAJOR(ROOT_DEV) == 2) {
printk("Insert root floppy and press ENTER");
wait_for_keypress();
diff --git a/include/asm/system.h b/include/asm/system.h
index 877ae34..59b0ba2 100644
--- a/include/asm/system.h
+++ b/include/asm/system.h
@@ -49,8 +49,8 @@ __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
*((gate_addr)+1) = (((base) & 0x0000ffff)<<16) | \
((limit) & 0x0ffff); }
-#define _set_tssldt_desc(n,addr,type) \
-__asm__ __volatile__ ("movw $232,%1\n\t" \
+#define _set_tssldt_desc(n,addr,limit,type) \
+__asm__ __volatile__ ("movw $" #limit ",%1\n\t" \
"movw %%ax,%2\n\t" \
"rorl $16,%%eax\n\t" \
"movb %%al,%3\n\t" \
@@ -58,9 +58,9 @@ __asm__ __volatile__ ("movw $232,%1\n\t" \
"movb $0x00,%5\n\t" \
"movb %%ah,%6\n\t" \
"rorl $16,%%eax" \
- ::"a" (addr), "m" (*(n)), "m" (*(n+2)), "m" (*(n+4)), \
+ ::"a" (addr+0xc0000000), "m" (*(n)), "m" (*(n+2)), "m" (*(n+4)), \
"m" (*(n+5)), "m" (*(n+6)), "m" (*(n+7)) \
)
-#define set_tss_desc(n,addr) _set_tssldt_desc(((char *) (n)),addr,"0x89")
-#define set_ldt_desc(n,addr) _set_tssldt_desc(((char *) (n)),addr,"0x82")
+#define set_tss_desc(n,addr) _set_tssldt_desc(((char *) (n)),((int)(addr)),231,"0x89")
+#define set_ldt_desc(n,addr) _set_tssldt_desc(((char *) (n)),((int)(addr)),23,"0x82")
diff --git a/include/linux/ext_fs_i.h b/include/linux/ext_fs_i.h
new file mode 100644
index 0000000..c64bc62
--- /dev/null
+++ b/include/linux/ext_fs_i.h
@@ -0,0 +1,10 @@
+#ifndef _EXT_FS_I
+#define _EXT_FS_I
+
+/*
+ * extended file system inode data in memory
+ */
+struct ext_inode_info {
+};
+
+#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2c64bd4..4ce9b6f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -111,6 +111,10 @@ struct buffer_head {
struct buffer_head * b_reqnext; /* request queue */
};
+#include <linux/minix_fs_i.h>
+#include <linux/ext_fs_i.h>
+#include <linux/msdos_fs_i.h>
+
struct inode {
dev_t i_dev;
unsigned long i_ino;
@@ -123,11 +127,14 @@ struct inode {
time_t i_atime;
time_t i_mtime;
time_t i_ctime;
+ unsigned long i_blksize;
+ unsigned long i_blocks;
unsigned long i_data[16];
struct inode_operations * i_op;
struct super_block * i_sb;
struct wait_queue * i_wait;
struct wait_queue * i_wait2; /* for pipes */
+ struct file_lock *i_flock;
unsigned short i_count;
unsigned short i_flags;
unsigned char i_lock;
@@ -136,6 +143,11 @@ struct inode {
unsigned char i_mount;
unsigned char i_seek;
unsigned char i_update;
+ union {
+ struct minix_inode_info minix_i;
+ struct ext_inode_info ext_i;
+ struct msdos_inode_info msdos_i;
+ } u;
};
struct file {
@@ -149,6 +161,16 @@ struct file {
off_t f_pos;
};
+struct file_lock {
+ struct file_lock *fl_next; /* singly linked list */
+ struct task_struct *fl_owner; /* NULL if on free list, for sanity checks */
+ struct wait_queue *fl_wait;
+ char fl_type;
+ char fl_whence;
+ off_t fl_start;
+ off_t fl_end;
+};
+
#include <linux/minix_fs_sb.h>
#include <linux/ext_fs_sb.h>
#include <linux/msdos_fs_sb.h>
@@ -196,7 +218,7 @@ struct inode_operations {
int (*mknod) (struct inode *,const char *,int,int,int);
int (*rename) (struct inode *,const char *,int,struct inode *,const char *,int);
int (*readlink) (struct inode *,char *,int);
- struct inode * (*follow_link) (struct inode *, struct inode *);
+ int (*follow_link) (struct inode *, struct inode *, int flag, int mode, struct inode ** res_inode);
int (*bmap) (struct inode *,int);
void (*truncate) (struct inode *);
};
@@ -239,13 +261,11 @@ extern void floppy_off(unsigned int dev);
extern void sync_inodes(void);
extern void wait_on(struct inode * inode);
extern int bmap(struct inode * inode,int block);
-extern struct inode * namei(const char * pathname);
-extern struct inode * lnamei(const char * pathname);
+extern int namei(const char * pathname, struct inode ** res_inode);
+extern int lnamei(const char * pathname, struct inode ** res_inode);
extern int permission(struct inode * inode,int mask);
-extern struct inode * _namei(const char * filename, struct inode * base,
- int follow_links);
extern int open_namei(const char * pathname, int flag, int mode,
- struct inode ** res_inode);
+ struct inode ** res_inode, struct inode * base);
extern int do_mknod(const char * filename, int mode, int dev);
extern void iput(struct inode * inode);
extern struct inode * iget(int dev,int nr);
diff --git a/include/linux/head.h b/include/linux/head.h
index b871742..8911a68 100644
--- a/include/linux/head.h
+++ b/include/linux/head.h
@@ -5,7 +5,7 @@ typedef struct desc_struct {
unsigned long a,b;
} desc_table[256];
-extern unsigned long pg_dir[1024];
+extern unsigned long swapper_pg_dir[1024];
extern desc_table idt,gdt;
#define GDT_NUL 0
diff --git a/include/linux/limits.h b/include/linux/limits.h
index 1de0388..f3912fa 100644
--- a/include/linux/limits.h
+++ b/include/linux/limits.h
@@ -8,6 +8,7 @@
#define NR_FILE 128
#define NR_SUPER 8
#define NR_HASH 997
+#define NR_FILE_LOCKS 32
#define BLOCK_SIZE 1024
#define BLOCK_SIZE_BITS 10
#define MAX_CHRDEV 16
diff --git a/include/linux/minix_fs_i.h b/include/linux/minix_fs_i.h
new file mode 100644
index 0000000..dabe5af
--- /dev/null
+++ b/include/linux/minix_fs_i.h
@@ -0,0 +1,10 @@
+#ifndef _MINIX_FS_I
+#define _MINIX_FS_I
+
+/*
+ * minix fs inode data in memory
+ */
+struct minix_inode_info {
+};
+
+#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a7b6af2..9a0ff3e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -57,10 +57,12 @@ extern void rw_swap_page(int rw, unsigned int nr, char * buf);
/* memory.c */
extern unsigned long get_free_page(int priority);
-extern unsigned long put_dirty_page(unsigned long page,unsigned long address);
+extern unsigned long put_dirty_page(struct task_struct * tsk,unsigned long page,
+ unsigned long address);
extern void free_page(unsigned long addr);
-extern int free_page_tables(unsigned long from,unsigned long size);
-extern int copy_page_tables(unsigned long from,unsigned long to,long size);
+extern void free_page_tables(struct task_struct * tsk);
+extern void clear_page_tables(struct task_struct * tsk);
+extern int copy_page_tables(struct task_struct * new);
extern int unmap_page_range(unsigned long from, unsigned long size);
extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size,
int permiss);
@@ -82,7 +84,7 @@ extern void swap_free(unsigned int page_nr);
extern void swap_in(unsigned long *table_ptr);
#define invalidate() \
-__asm__("movl %%eax,%%cr3"::"a" (0))
+__asm__ __volatile__("movl %%cr3,%%eax\n\tmovl %%eax,%%cr3":::"ax")
extern unsigned long low_memory;
extern unsigned long high_memory;
diff --git a/include/linux/msdos_fs_i.h b/include/linux/msdos_fs_i.h
new file mode 100644
index 0000000..bd900c0
--- /dev/null
+++ b/include/linux/msdos_fs_i.h
@@ -0,0 +1,10 @@
+#ifndef _MSDOS_FS_I
+#define _MSDOS_FS_I
+
+/*
+ * msdos file system inode data in memory
+ */
+struct msdos_inode_info {
+};
+
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 21a3fbb..fab929f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3,33 +3,22 @@
#define HZ 100
+/*
+ * This is the maximum nr of tasks - change it if you need to
+ */
#define NR_TASKS 64
-#define TASK_SIZE 0x04000000
-#define LIBRARY_SIZE 0x00400000
+
+/*
+ * User space process size: 3GB. This is hardcoded into a few places,
+ * so don't change it unless you know what you are doing.
+ */
+#define TASK_SIZE 0xc0000000
/*
* Size of io_bitmap in longwords: 32 is ports 0-0x3ff.
*/
#define IO_BITMAP_SIZE 32
-#if (TASK_SIZE & 0x3fffff)
-#error "TASK_SIZE must be multiple of 4M"
-#endif
-
-#if (LIBRARY_SIZE & 0x3fffff)
-#error "LIBRARY_SIZE must be a multiple of 4M"
-#endif
-
-#if (LIBRARY_SIZE >= (TASK_SIZE/2))
-#error "LIBRARY_SIZE too damn big!"
-#endif
-
-#if (((TASK_SIZE>>16)*NR_TASKS) != 0x10000)
-#error "TASK_SIZE*NR_TASKS must be 4GB"
-#endif
-
-#define LIBRARY_OFFSET (TASK_SIZE - LIBRARY_SIZE)
-
#define CT_TO_SECS(x) ((x) / HZ)
#define CT_TO_USECS(x) (((x) % HZ) * 1000000/HZ)
@@ -43,6 +32,7 @@
#include <linux/time.h>
#include <linux/param.h>
#include <linux/resource.h>
+#include <linux/vm86.h>
#if (NR_OPEN > 32)
#error "Currently the close-on-exec-flags and select masks are in one long, max 32 files/proc"
@@ -115,6 +105,7 @@ struct task_struct {
long signal;
struct sigaction sigaction[32];
long blocked; /* bitmap of masked signals */
+ unsigned long saved_kernel_stack;
/* various fields */
int exit_code;
int dumpable:1;
@@ -146,6 +137,7 @@ struct task_struct {
unsigned short used_math;
unsigned short rss; /* number of resident pages */
char comm[8];
+ struct vm86_struct * vm86_info;
/* file system info */
int link_count;
int tty; /* -1 if no tty, so it must be signed */
@@ -157,6 +149,7 @@ struct task_struct {
struct inode * library;
unsigned long start;
unsigned long length;
+ unsigned long bss;
} libraries[MAX_SHARED_LIBS];
int numlibraries;
struct file * filp[NR_OPEN];
@@ -173,9 +166,6 @@ struct task_struct {
#define PF_ALIGNWARN 0x00000001 /* Print alignment warning msgs */
/* Not implemented yet, only for 486*/
#define PF_PTRACED 0x00000010 /* set if ptrace (0) has been called. */
-#define PF_VM86 0x00000020 /* set if process can execute a vm86 */
- /* task. */
- /* not impelmented. */
/*
* INIT_TASK is used to set up the first task table, touch at
@@ -183,7 +173,7 @@ struct task_struct {
*/
#define INIT_TASK \
/* state etc */ { 0,15,15, \
-/* signals */ 0,{{},},0, \
+/* signals */ 0,{{},},0,0, \
/* ec,brk... */ 0,0,0,0,0,0,0,0, \
/* pid etc.. */ 0,0,0,0, \
/* suppl grps*/ {NOGROUP,}, \
@@ -199,15 +189,16 @@ struct task_struct {
/* math */ 0, \
/* rss */ 2, \
/* comm */ "swapper", \
+/* vm86_info */ NULL, \
/* fs info */ 0,-1,0022,NULL,NULL,NULL, \
/* libraries */ { { NULL, 0, 0}, }, 0, \
/* filp */ {NULL,}, 0, \
{ \
{0,0}, \
-/* ldt */ {0x9f,0xc0fa00}, \
- {0x9f,0xc0f200} \
+/* ldt */ {0x9f,0xc0c0fa00}, \
+ {0x9f,0xc0c0f200} \
}, \
-/*tss*/ {0,PAGE_SIZE+(long)&init_task,0x10,0,0,0,0,(long)&pg_dir,\
+/*tss*/ {0,PAGE_SIZE+(long)&init_task,0x10,0,0,0,0,(long)&swapper_pg_dir,\
0,0,0,0,0,0,0,0, \
0,0,0x17,0x17,0x17,0x17,0x17,0x17, \
_LDT(0),0x80000000,{0xffffffff}, \
diff --git a/include/linux/sys.h b/include/linux/sys.h
index a479faa..885fe2b 100644
--- a/include/linux/sys.h
+++ b/include/linux/sys.h
@@ -115,6 +115,7 @@ extern int sys_newuname();
extern int sys_iopl();
extern int sys_vhangup();
extern int sys_idle();
+extern int sys_vm86();
fn_ptr sys_call_table[] = { sys_setup, sys_exit, sys_fork, sys_read,
sys_write, sys_open, sys_close, sys_waitpid, sys_creat, sys_link,
@@ -137,7 +138,7 @@ sys_truncate, sys_ftruncate, sys_fchmod, sys_fchown, sys_getpriority,
sys_setpriority, sys_profil, sys_statfs, sys_fstatfs, sys_ioperm,
sys_socketcall, sys_syslog, sys_setitimer, sys_getitimer, sys_newstat,
sys_newlstat, sys_newfstat, sys_newuname, sys_iopl, sys_vhangup,
-sys_idle };
+sys_idle, sys_vm86 };
/* So we don't have to do any more manual updating.... */
int NR_syscalls = sizeof(sys_call_table)/sizeof(fn_ptr);
diff --git a/include/linux/unistd.h b/include/linux/unistd.h
index 569c552..a15853f 100644
--- a/include/linux/unistd.h
+++ b/include/linux/unistd.h
@@ -119,6 +119,7 @@
#define __NR_iopl 110
#define __NR_vhangup 111
#define __NR_idle 112
+#define __NR_vm86 113
extern int errno;
diff --git a/include/linux/vm86.h b/include/linux/vm86.h
new file mode 100644
index 0000000..96b8959
--- /dev/null
+++ b/include/linux/vm86.h
@@ -0,0 +1,55 @@
+#ifndef _LINUX_VM86_H
+#define _LINUX_VM86_H
+
+#define VM_MASK 0x00020000
+
+/*
+ * This is the stack-layout when we have done a "SAVE_ALL" from vm86
+ * mode - the main change is that the old segment descriptors aren't
+ * useful any more and are forced to be zero by the kernel (and the
+ * hardware when a trap occurs), and the real segment descriptors are
+ * at the end of the structure. Look at ptrace.h to see the "normal"
+ * setup.
+ */
+
+struct vm86_regs {
+/*
+ * normal regs, with special meaning for the segment descriptors..
+ */
+ long ebx;
+ long ecx;
+ long edx;
+ long esi;
+ long edi;
+ long ebp;
+ long eax;
+ long __null_ds;
+ long __null_es;
+ long __null_fs;
+ long __null_gs;
+ long orig_eax;
+ long eip;
+ long cs;
+ long eflags;
+ long esp;
+ long ss;
+/*
+ * these are specific to v86 mode:
+ */
+ long es;
+ long ds;
+ long fs;
+ long gs;
+};
+
+/*
+ * flags isn't even used yet: it's just there as an example of
+ * what kind of information we might want to give sys_vm86() (or
+ * want it to return to us).
+ */
+struct vm86_struct {
+ struct vm86_regs regs;
+ unsigned long flags;
+};
+
+#endif
diff --git a/kernel/Makefile b/kernel/Makefile
index 695e77c..3bb280e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -29,25 +29,25 @@ kernel.o: $(OBJS)
sync
kernelsubdirs: dummy
- @for i in $(SUBDIRS); do (cd $$i; echo $$i; $(MAKE)) || exit; done
+ @for i in $(SUBDIRS); do (cd $$i && echo $$i && $(MAKE)) || exit; done
sys_call.s: sys_call.S
sys_call.o: sys_call.s
sched.o: sched.c
- $(CC) $(CFLAGS) -fno-omit-frame-pointer -c $<
+ $(CC) $(CFLAGS) $(PROFILING) -fno-omit-frame-pointer -c $<
clean:
rm -f core *.o *.a tmp_make sys_call.s
for i in *.c;do rm -f `basename $$i .c`.s;done
- for i in $(SUBDIRS); do (cd $$i; $(MAKE) clean); done
+ for i in $(SUBDIRS); do (cd $$i && $(MAKE) clean); done
dep:
sed '/\#\#\# Dependencies/q' < Makefile > tmp_make
for i in *.c;do $(CPP) -M $$i;done >> tmp_make
cp tmp_make Makefile
- for i in $(SUBDIRS); do (cd $$i; $(MAKE) dep) || exit; done
+ for i in $(SUBDIRS); do (cd $$i && $(MAKE) dep) || exit; done
dummy:
diff --git a/kernel/blk_drv/Makefile b/kernel/blk_drv/Makefile
index 232a0c8..55d48ca 100644
--- a/kernel/blk_drv/Makefile
+++ b/kernel/blk_drv/Makefile
@@ -28,18 +28,18 @@ blk_drv.a: $(OBJS)
sync
scsisubdirs: dummy
- @for i in $(SUBDIRS); do (cd $$i; echo $$i; $(MAKE)) || exit; done
+ @for i in $(SUBDIRS); do (cd $$i && echo $$i && $(MAKE)) || exit; done
clean:
rm -f core *.o *.a tmp_make
for i in *.c;do rm -f `basename $$i .c`.s;done
- for i in $(SUBDIRS); do (cd $$i; $(MAKE) clean); done
+ for i in $(SUBDIRS); do (cd $$i && $(MAKE) clean); done
dep:
sed '/\#\#\# Dependencies/q' < Makefile > tmp_make
for i in *.c;do $(CPP) -M $$i;done >> tmp_make
cp tmp_make Makefile
- for i in $(SUBDIRS); do (cd $$i; $(MAKE) dep); done
+ for i in $(SUBDIRS); do (cd $$i && $(MAKE) dep); done
dummy:
diff --git a/kernel/blk_drv/blk.h b/kernel/blk_drv/blk.h
index 70dab1b..2288c57 100644
--- a/kernel/blk_drv/blk.h
+++ b/kernel/blk_drv/blk.h
@@ -69,6 +69,7 @@ extern struct wait_queue * wait_for_request;
extern int * blk_size[NR_BLK_DEV];
+extern unsigned long hd_init(unsigned long mem_start, unsigned long mem_end);
extern int is_read_only(int dev);
extern void set_device_ro(int dev,int flag);
diff --git a/kernel/blk_drv/hd.c b/kernel/blk_drv/hd.c
index f8d7d21..77c76fc 100644
--- a/kernel/blk_drv/hd.c
+++ b/kernel/blk_drv/hd.c
@@ -53,7 +53,8 @@ static inline unsigned char CMOS_READ(unsigned char addr)
static void recal_intr(void);
static void bad_rw_intr(void);
-static int recalibrate = 0;
+static char recalibrate[ MAX_HD ] = { 0, };
+
static int reset = 0;
#if (HD_DELAY > 0)
@@ -221,6 +222,8 @@ void unexpected_hd_interrupt(void)
static void bad_rw_intr(void)
{
+ int i;
+
if (!CURRENT)
return;
if (++CURRENT->errors >= MAX_ERRORS)
@@ -228,7 +231,8 @@ static void bad_rw_intr(void)
else if (CURRENT->errors > MAX_ERRORS/2)
reset = 1;
else
- recalibrate = 1;
+ for (i=0; i < NR_HD; i++)
+ recalibrate[i] = 1;
}
static inline int wait_DRQ(void)
@@ -378,7 +382,7 @@ static void hd_times_out(void)
static void do_hd_request(void)
{
unsigned int block,dev;
- unsigned int sec,head,cyl;
+ unsigned int sec,head,cyl,track;
unsigned int nsect;
repeat:
@@ -399,24 +403,26 @@ repeat:
}
block += hd[dev].start_sect;
dev >>= 6;
- sec = block % hd_info[dev].sect;
- block /= hd_info[dev].sect;
- head = block % hd_info[dev].head;
- cyl = block / hd_info[dev].head;
- sec++;
+ sec = block % hd_info[dev].sect + 1;
+ track = block / hd_info[dev].sect;
+ head = track % hd_info[dev].head;
+ cyl = track / hd_info[dev].head;
#ifdef DEBUG
printk("hd%d : cyl = %d, head = %d, sector = %d, buffer = %08x\n",
dev, cyl, head, sec, CURRENT->buffer);
#endif
cli();
if (reset) {
- recalibrate = 1;
+ int i;
+
+ for (i=0; i < NR_HD; i++)
+ recalibrate[i] = 1;
reset_hd();
sti();
return;
}
- if (recalibrate) {
- recalibrate = 0;
+ if (recalibrate[dev]) {
+ recalibrate[dev] = 0;
hd_out(dev,hd_info[dev].sect,0,0,0,WIN_RESTORE,&recal_intr);
if (reset)
goto repeat;
@@ -434,13 +440,16 @@ repeat:
}
port_write(HD_DATA,CURRENT->buffer,256);
sti();
- } else if (CURRENT->cmd == READ) {
+ return;
+ }
+ if (CURRENT->cmd == READ) {
hd_out(dev,nsect,sec,head,cyl,WIN_READ,&read_intr);
if (reset)
goto repeat;
sti();
- } else
- panic("unknown hd-command");
+ return;
+ }
+ panic("unknown hd-command");
}
static int hd_ioctl(struct inode * inode, struct file * file,
@@ -481,7 +490,6 @@ static void hd_release(struct inode * inode, struct file * file)
sync_dev(inode->i_rdev);
}
-
static void hd_geninit();
static struct gendisk hd_gendisk = {
@@ -500,11 +508,11 @@ static struct gendisk hd_gendisk = {
static void hd_geninit(void)
{
- int drive;
+ int drive, i;
#ifndef HD_TYPE
extern struct drive_info drive_info;
void *BIOS = (void *) &drive_info;
- int cmos_disks, i;
+ int cmos_disks;
for (drive=0 ; drive<2 ; drive++) {
hd_info[drive].cyl = *(unsigned short *) BIOS;
@@ -593,7 +601,7 @@ static struct sigaction hd_sigaction = {
NULL
};
-unsigned long hd_init(unsigned long mem_start)
+unsigned long hd_init(unsigned long mem_start, unsigned long mem_end)
{
blk_dev[MAJOR_NR].request_fn = DEVICE_REQUEST;
blkdev_fops[MAJOR_NR] = &hd_fops;
diff --git a/kernel/blk_drv/ll_rw_blk.c b/kernel/blk_drv/ll_rw_blk.c
index 1a5f640..0589009 100644
--- a/kernel/blk_drv/ll_rw_blk.c
+++ b/kernel/blk_drv/ll_rw_blk.c
@@ -102,9 +102,6 @@ void set_device_ro(int dev,int flag)
* add-request adds a request to the linked list.
* It disables interrupts so that it can muck with the
* request-lists in peace.
- *
- * Note that swapping requests always go before other requests,
- * and are done in the order they appear.
*/
static void add_request(struct blk_dev_struct * dev, struct request * req)
{
@@ -121,11 +118,6 @@ static void add_request(struct blk_dev_struct * dev, struct request * req)
return;
}
for ( ; tmp->next ; tmp = tmp->next) {
- if (!req->bh)
- if (tmp->next->bh)
- break;
- else
- continue;
if ((IN_ORDER(tmp,req) ||
!IN_ORDER(tmp,tmp->next)) &&
IN_ORDER(req,tmp->next))
@@ -208,9 +200,10 @@ repeat:
sti();
goto repeat;
-found: sti();
+found:
/* fill up the request-info, and add it to the queue */
req->dev = bh->b_dev;
+ sti();
req->cmd = rw;
req->errors = 0;
req->sector = sector;
diff --git a/kernel/chr_drv/mem.c b/kernel/chr_drv/mem.c
index 1496239..6caba5b 100644
--- a/kernel/chr_drv/mem.c
+++ b/kernel/chr_drv/mem.c
@@ -38,7 +38,7 @@ static int read_mem(struct inode * inode, struct file * file,char * buf, int cou
while (count > 0) {
if (current->signal & ~current->blocked)
break;
- pde = (unsigned long) pg_dir + (addr >> 20 & 0xffc);
+ pde = current->tss.cr3 + (addr >> 20 & 0xffc);
pte = *(unsigned long *) pde;
if (!(pte & PAGE_PRESENT))
break;
@@ -75,7 +75,7 @@ static int write_mem(struct inode * inode, struct file * file,char * buf, int co
while (count > 0) {
if (current->signal & ~current->blocked)
break;
- pde = (unsigned long) pg_dir + (addr >> 20 & 0xffc);
+ pde = current->tss.cr3 + (addr >> 20 & 0xffc);
pte = *(unsigned long *) pde;
if (!(pte & PAGE_PRESENT))
break;
diff --git a/kernel/exit.c b/kernel/exit.c
index 8cc5451..11ec282 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -321,8 +321,7 @@ volatile void do_exit(long code)
int i;
fake_volatile:
- free_page_tables(get_base(current->ldt[1]),get_limit(0x0f));
- free_page_tables(get_base(current->ldt[2]),get_limit(0x17));
+ free_page_tables(current);
for (i=0 ; i<NR_OPEN ; i++)
if (current->filp[i])
sys_close(i);
diff --git a/kernel/fork.c b/kernel/fork.c
index e80abe8..8024f29 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -56,15 +56,12 @@ int copy_mem(int nr,struct task_struct * p)
}
if (data_limit < code_limit)
panic("Bad data_limit");
- new_data_base = new_code_base = nr * TASK_SIZE;
+ new_data_base = old_data_base;
+ new_code_base = old_code_base;
p->start_code = new_code_base;
set_base(p->ldt[1],new_code_base);
set_base(p->ldt[2],new_data_base);
- if (copy_page_tables(old_data_base,new_data_base,data_limit)) {
- free_page_tables(new_data_base,data_limit);
- return -ENOMEM;
- }
- return 0;
+ return copy_page_tables(p);
}
static int find_empty_process(void)
diff --git a/kernel/math/emulate.c b/kernel/math/emulate.c
index 1df0691..9c86a6b 100644
--- a/kernel/math/emulate.c
+++ b/kernel/math/emulate.c
@@ -62,6 +62,9 @@ static void do_emu(struct info * info)
else
I387.swd &= 0x7fff;
ORIG_EIP = EIP;
+/* We cannot handle emulation in v86-mode */
+ if (EFLAGS & 0x00020000)
+ math_abort(info,SIGILL);
/* 0x0007 means user code space */
if (CS != 0x000F) {
printk("math_emulate: %04x:%08x\n\r",CS,EIP);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 07f3a8d..73575ab 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -88,7 +88,6 @@ static unsigned long get_long(struct task_struct * tsk,
{
unsigned long page;
- addr += tsk->start_code;
repeat:
page = tsk->tss.cr3 + ((addr >> 20) & 0xffc);
page = *(unsigned long *) page;
@@ -117,7 +116,6 @@ static void put_long(struct task_struct * tsk, unsigned long addr,
{
unsigned long page;
- addr += tsk->start_code;
repeat:
page = tsk->tss.cr3 + ((addr >> 20) & 0xffc);
page = *(unsigned long *) page;
diff --git a/kernel/sched.c b/kernel/sched.c
index 1e8a710..9a4baea 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -393,7 +393,7 @@ struct timer_struct timer_table[32];
* irq uses this to decide if it should update the user or system
* times.
*/
-static void do_timer(int regs)
+static void do_timer(struct pt_regs * regs)
{
unsigned long mask;
struct timer_struct *tp = timer_table+0;
@@ -401,7 +401,7 @@ static void do_timer(int regs)
static int avg_cnt = 0;
jiffies++;
- if (3 & ((struct pt_regs *) regs)->cs) {
+ if ((VM_MASK & regs->eflags) || (3 & regs->cs)) {
current->utime++;
/* Update ITIMER_VIRT for current task if not in a system call */
if (current->it_virt_value && !(--current->it_virt_value)) {
@@ -412,7 +412,7 @@ static void do_timer(int regs)
current->stime++;
#ifdef PROFILE_SHIFT
if (prof_buffer && current != task[0]) {
- unsigned long eip = ((struct pt_regs *) regs)->eip;
+ unsigned long eip = regs->eip;
eip >>= PROFILE_SHIFT;
if (eip < prof_len)
prof_buffer[eip]++;
@@ -543,5 +543,5 @@ void sched_init(void)
outb_p(0x36,0x43); /* binary, mode 3, LSB/MSB, ch 0 */
outb_p(LATCH & 0xff , 0x40); /* LSB */
outb(LATCH >> 8 , 0x40); /* MSB */
- request_irq(TIMER_IRQ,do_timer);
+ request_irq(TIMER_IRQ,(void (*)(int)) do_timer);
}
diff --git a/kernel/signal.c b/kernel/signal.c
index ca850fa..85b248b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -135,11 +135,6 @@ int do_signal(long signr,struct pt_regs * regs)
int longs;
unsigned long * tmp_esp;
-#ifdef notdef
- printk("pid: %d, signr: %x, eax=%d, oeax = %d, int=%d\n",
- current->pid, signr, regs->eax, regs->orig_eax,
- sa->sa_flags & SA_INTERRUPT);
-#endif
sa_handler = (unsigned long) sa->sa_handler;
if ((regs->orig_eax != -1) &&
((regs->eax == -ERESTARTSYS) || (regs->eax == -ERESTARTNOINTR))) {
diff --git a/kernel/sys.c b/kernel/sys.c
index 94a8de5..7368805 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -13,12 +13,14 @@
#include <linux/utsname.h>
#include <linux/param.h>
#include <linux/resource.h>
+#include <linux/signal.h>
#include <linux/string.h>
+#include <linux/ptrace.h>
#include <asm/segment.h>
/*
- * this indicates wether you can reboot with ctrl-alt-del: the deault is yes
+ * this indicates wether you can reboot with ctrl-alt-del: the default is yes
*/
static int C_A_D = 1;
@@ -128,6 +130,53 @@ int sys_prof()
return -ENOSYS;
}
+unsigned long save_v86_state(int signr,struct vm86_regs * regs)
+{
+ unsigned long stack;
+
+ if (!current->vm86_info) {
+ printk("no vm86_info: BAD\n");
+ do_exit(SIGSEGV);
+ }
+ memcpy_tofs(&(current->vm86_info->regs),regs,sizeof(*regs));
+ stack = current->tss.esp0;
+ current->tss.esp0 = current->saved_kernel_stack;
+ current->saved_kernel_stack = 0;
+ return stack;
+}
+
+int sys_vm86(struct vm86_struct * v86)
+{
+ struct vm86_struct info;
+ struct pt_regs * pt_regs = (struct pt_regs *) &v86;
+
+ if (current->saved_kernel_stack)
+ return -EPERM;
+ memcpy_fromfs(&info,v86,sizeof(info));
+/*
+ * make sure the vm86() system call doesn't try to do anything silly
+ */
+ info.regs.__null_ds = 0;
+ info.regs.__null_es = 0;
+ info.regs.__null_fs = 0;
+ info.regs.__null_gs = 0;
+/*
+ * The eflags register is also special: we cannot trust that the user
+ * has set it up safely, so this makes sure interrupt etc flags are
+ * inherited from protected mode.
+ */
+ info.regs.eflags &= 0x00000dd5;
+ info.regs.eflags |= 0xfffff22a & pt_regs->eflags;
+ info.regs.eflags |= VM_MASK;
+ current->saved_kernel_stack = current->tss.esp0;
+ current->tss.esp0 = (unsigned long) pt_regs;
+ current->vm86_info = v86;
+ __asm__ __volatile__("movl %0,%%esp\n\t"
+ "pushl $ret_from_sys_call\n\t"
+ "ret"::"g" ((long) &(info.regs)),"a" (info.regs.eax));
+ return 0;
+}
+
extern void hard_reset_now(void);
/*
diff --git a/kernel/sys_call.S b/kernel/sys_call.S
index 44c0723..916455e 100644
--- a/kernel/sys_call.S
+++ b/kernel/sys_call.S
@@ -58,6 +58,10 @@ EFLAGS = 0x38
OLDESP = 0x3C
OLDSS = 0x40
+IF_MASK = 0x00000200
+NT_MASK = 0x00004000
+VM_MASK = 0x00020000
+
/*
* these are offsets into the task-struct.
*/
@@ -67,6 +71,7 @@ priority = 8
signal = 12
sigaction = 16 # MUST be 16 (=len of sigaction)
blocked = (33*16)
+saved_kernel_stack = ((33*16)+4)
/*
* offsets within sigaction
@@ -121,11 +126,17 @@ _system_call:
movl %eax,EAX(%esp) # save the return value
.align 4,0x90
ret_from_sys_call:
+ movl EFLAGS(%esp),%eax
+ testl $VM_MASK,%eax
+ jne 1f
cmpw $0x0f,CS(%esp) # was old code segment supervisor ?
jne 2f
cmpw $0x17,OLDSS(%esp) # was stack segment = 0x17 ?
jne 2f
-1: cmpl $0,_need_resched
+1: orl $IF_MASK,%eax # these just try to make sure
+ andl $~NT_MASK,%eax # the program doesn't do anything
+ movl %eax,EFLAGS(%esp) # stupid
+ cmpl $0,_need_resched
jne reschedule
movl _current,%eax
cmpl _task,%eax # task[0] cannot have signals
@@ -141,10 +152,18 @@ ret_from_sys_call:
bsfl %ecx,%ecx
je 2f
btrl %ecx,%ebx
+ incl %ecx
movl %ebx,signal(%eax)
movl %esp,%ebx
+ testl $VM_MASK,EFLAGS(%esp)
+ je 3f
pushl %ebx
- incl %ecx
+ pushl %ecx
+ call _save_v86_state
+ popl %ecx
+ movl %eax,%ebx
+ movl %eax,%esp
+3: pushl %ebx
pushl %ecx
call _do_signal
popl %ecx
diff --git a/kernel/traps.c b/kernel/traps.c
index 8d7d039..9a5f086 100644
--- a/kernel/traps.c
+++ b/kernel/traps.c
@@ -63,7 +63,7 @@ static void die_if_kernel(char * str,long esp_ptr,long nr)
long * esp = (long *) esp_ptr;
int i;
- if ((0xffff & esp[1]) == 0xf)
+ if ((esp[2] & VM_MASK) || ((0xffff & esp[1]) == 0xf))
return;
printk("%s: %04x\n\r",str,nr&0xffff);
printk("EIP: %04x:%p\nEFLAGS: %p\n", 0xffff & esp[1],esp[0],esp[2]);
diff --git a/mm/memory.c b/mm/memory.c
index 1595b4f..5fd804b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -34,11 +34,9 @@
#include <linux/sched.h>
#include <linux/head.h>
#include <linux/kernel.h>
+#include <linux/errno.h>
#include <linux/string.h>
-#define CODE_SPACE(addr) ((((addr)+4095)&~4095) < \
-current->start_code + current->end_code)
-
unsigned long low_memory = 0;
unsigned long high_memory = 0;
unsigned long free_page_list = 0;
@@ -85,45 +83,92 @@ void free_page(unsigned long addr)
printk("trying to free free page (%08x): memory probably corrupted\n",addr);
}
+static void free_one_table(unsigned long * page_dir)
+{
+ int j;
+ unsigned long pg_table = *page_dir;
+ unsigned long * page_table;
+
+ if (!pg_table)
+ return;
+ if (!(pg_table & 1)) {
+ printk("Bad page table: [%08x]=%08x\n",page_dir,pg_table);
+ *page_dir = 0;
+ return;
+ }
+ *page_dir = 0;
+ if (pg_table < low_memory)
+ return;
+ page_table = (unsigned long *) (pg_table & 0xfffff000);
+ for (j = 0 ; j < 1024 ; j++,page_table++) {
+ unsigned long pg = *page_table;
+
+ if (!pg)
+ continue;
+ *page_table = 0;
+ if (1 & pg)
+ free_page(0xfffff000 & pg);
+ else
+ swap_free(pg >> 1);
+ }
+ free_page(0xfffff000 & pg_table);
+}
+
/*
- * This function frees a continuos block of page tables, as needed
- * by 'exit()'. As does copy_page_tables(), this handles only 4Mb blocks.
+ * This function clears all user-level page tables of a process - this
+ * is needed by execve(), so that old pages aren't in the way. Note that
+ * unlike 'free_page_tables()', this function still leaves a valid
+ * page-table-tree in memory: it just removes the user pages. The two
+ * functions are similar, but there is a fundamental difference.
*/
-int free_page_tables(unsigned long from,unsigned long size)
+void clear_page_tables(struct task_struct * tsk)
{
- unsigned long page;
- unsigned long page_dir;
- unsigned long *pg_table;
- unsigned long * dir, nr;
+ int i;
+ unsigned long * page_dir;
- if (from & 0x3fffff)
- panic("free_page_tables called with wrong alignment");
- if (!from)
+ if (!tsk)
+ return;
+ if (tsk == task[0])
+ panic("task[0] (swapper) doesn't support exec() yet\n");
+ page_dir = (unsigned long *) tsk->tss.cr3;
+ if (!page_dir) {
+ printk("Trying to clear kernel page-directory: not good\n");
+ return;
+ }
+ for (i = 0 ; i < 768 ; i++,page_dir++)
+ free_one_table(page_dir);
+ invalidate();
+ return;
+}
+
+/*
+ * This function frees up all page tables of a process when it exits.
+ */
+void free_page_tables(struct task_struct * tsk)
+{
+ int i;
+ unsigned long pg_dir;
+ unsigned long * page_dir;
+
+ if (!tsk)
+ return;
+ if (tsk == task[0]) {
+ printk("task[0] (swapper) killed: unable to recover\n");
panic("Trying to free up swapper memory space");
- size = (size + 0x3fffff) >> 22;
- dir = (unsigned long *) ((from>>20) & 0xffc); /* _pg_dir = 0 */
- for ( ; size-->0 ; dir++) {
- if (!(page_dir = *dir))
- continue;
- *dir = 0;
- if (!(page_dir & 1)) {
- printk("free_page_tables: bad page directory.");
- continue;
- }
- pg_table = (unsigned long *) (0xfffff000 & page_dir);
- for (nr=0 ; nr<1024 ; nr++,pg_table++) {
- if (!(page = *pg_table))
- continue;
- *pg_table = 0;
- if (1 & page)
- free_page(0xfffff000 & page);
- else
- swap_free(page >> 1);
- }
- free_page(0xfffff000 & page_dir);
}
+ pg_dir = tsk->tss.cr3;
+ if (!pg_dir) {
+ printk("Trying to free kernel page-directory: not good\n");
+ return;
+ }
+ tsk->tss.cr3 = (unsigned long) swapper_pg_dir;
+ if (tsk == current)
+ __asm__ __volatile__("movl %0,%%cr3"::"a" (tsk->tss.cr3));
+ page_dir = (unsigned long *) pg_dir;
+ for (i = 0 ; i < 1024 ; i++,page_dir++)
+ free_one_table(page_dir);
+ free_page(pg_dir);
invalidate();
- return 0;
}
/*
@@ -143,66 +188,80 @@ int free_page_tables(unsigned long from,unsigned long size)
* 1 Mb-range, so the pages can be shared with the kernel. Thus the
* special case for nr=xxxx.
*/
-int copy_page_tables(unsigned long from,unsigned long to,long size)
+int copy_page_tables(struct task_struct * tsk)
{
- unsigned long * from_page_table;
- unsigned long * to_page_table;
- unsigned long this_page;
- unsigned long * from_dir, * to_dir;
- unsigned long new_page;
- unsigned long nr;
-
- if ((from&0x3fffff) || (to&0x3fffff))
- panic("copy_page_tables called with wrong alignment");
- from_dir = (unsigned long *) ((from>>20) & 0xffc); /* _pg_dir = 0 */
- to_dir = (unsigned long *) ((to>>20) & 0xffc);
- size = ((unsigned) (size+0x3fffff)) >> 22;
- for( ; size-->0 ; from_dir++,to_dir++) {
- if (*to_dir)
- printk("copy_page_tables: already exist, "
- "probable memory corruption\n");
- if (!*from_dir)
+ int i;
+ unsigned long temp_page = 0;
+ unsigned long old_pg_dir, *old_page_dir;
+ unsigned long new_pg_dir, *new_page_dir;
+
+ old_pg_dir = current->tss.cr3;
+ new_pg_dir = get_free_page(GFP_KERNEL);
+ if (!new_pg_dir)
+ return -ENOMEM;
+ tsk->tss.cr3 = new_pg_dir;
+ old_page_dir = (unsigned long *) old_pg_dir;
+ new_page_dir = (unsigned long *) new_pg_dir;
+ for (i = 0 ; i < 1024 ; i++,old_page_dir++,new_page_dir++) {
+ int j;
+ unsigned long old_pg_table, *old_page_table;
+ unsigned long new_pg_table, *new_page_table;
+
+ old_pg_table = *old_page_dir;
+ if (!old_pg_table)
continue;
- if (!(1 & *from_dir)) {
+ if (!(1 & old_pg_table)) {
printk("copy_page_tables: page table swapped out, "
"probable memory corruption");
- *from_dir = 0;
+ *old_page_dir = 0;
+ continue;
+ }
+ if (old_pg_table < low_memory) {
+ *new_page_dir = old_pg_table;
continue;
}
- from_page_table = (unsigned long *) (0xfffff000 & *from_dir);
- if (!(to_page_table = (unsigned long *) get_free_page(GFP_KERNEL)))
- return -1; /* Out of memory, see freeing */
- *to_dir = ((unsigned long) to_page_table) | PAGE_ACCESSED | 7;
- nr = (from==0)?0xA0:1024;
- for ( ; nr-- > 0 ; from_page_table++,to_page_table++) {
+ new_pg_table = get_free_page(GFP_KERNEL);
+ if (!new_pg_table) {
+ free_page_tables(tsk);
+ free_page(temp_page);
+ return -ENOMEM;
+ }
+ *new_page_dir = new_pg_table | PAGE_ACCESSED | 7;
+ old_page_table = (unsigned long *) (0xfffff000 & old_pg_table);
+ new_page_table = (unsigned long *) (0xfffff000 & new_pg_table);
+ for (j = 0 ; j < 1024 ; j++,old_page_table++,new_page_table++) {
+ unsigned long pg;
repeat:
- this_page = *from_page_table;
- if (!this_page)
+ pg = *old_page_table;
+ if (!pg)
continue;
- if (!(1 & this_page)) {
- if (!(new_page = get_free_page(GFP_KERNEL)))
- return -1;
- ++current->rss;
- read_swap_page(this_page>>1, (char *) new_page);
- if (*from_page_table != this_page) {
- free_page(new_page);
- goto repeat;
- }
- *to_page_table = this_page;
- *from_page_table = new_page | (PAGE_DIRTY | PAGE_ACCESSED | 7);
+ if (pg & 1) {
+ pg &= ~2;
+ *new_page_table = pg;
+ if (pg < low_memory)
+ continue;
+ *old_page_table = pg;
+ mem_map[(pg-low_memory)>>12]++;
continue;
}
- this_page &= ~2;
- *to_page_table = this_page;
- if (this_page > low_memory) {
- *from_page_table = this_page;
- this_page -= low_memory;
- this_page >>= 12;
- if (!mem_map[this_page]++)
- --nr_free_pages;
+ if (!temp_page) {
+ temp_page = get_free_page(GFP_KERNEL);
+ if (!temp_page) {
+ free_page_tables(tsk);
+ return -ENOMEM;
+ }
+ goto repeat;
}
+ ++current->rss;
+ read_swap_page(pg>>1, (char *) temp_page);
+ if (*old_page_table != pg)
+ goto repeat;
+ *new_page_table = pg;
+ *old_page_table = temp_page | (PAGE_DIRTY | PAGE_ACCESSED | 7);
+ temp_page = 0;
}
}
+ free_page(temp_page);
invalidate();
return 0;
}
@@ -222,7 +281,7 @@ int unmap_page_range(unsigned long from, unsigned long size)
if (!from)
panic("unmap_page_range trying to free swapper memory space");
size = (size + 0xfff) >> 12;
- dir = (unsigned long *) ((from >> 20) & 0xffc); /* _pg_dir = 0 */
+ dir = (unsigned long *) (current->tss.cr3 + ((from >> 20) & 0xffc));
poff = (from >> 12) & 0x3ff;
if ((pcnt = 1024 - poff) > size)
pcnt = size;
@@ -284,7 +343,7 @@ int remap_page_range(unsigned long from, unsigned long to, unsigned long size,
if ((from & 0xfff) || (to & 0xfff))
panic("remap_page_range called with wrong alignment");
- dir = (unsigned long *) ((from >> 20) & 0xffc); /* _pg_dir = 0 */
+ dir = (unsigned long *) (current->tss.cr3 + ((from >> 20) & 0xffc));
size = (size + 0xfff) >> 12;
poff = (from >> 12) & 0x3ff;
if ((pcnt = 1024 - poff) > size)
@@ -363,7 +422,7 @@ int remap_page_range(unsigned long from, unsigned long to, unsigned long size,
* out of memory (either when trying to access page-table or
* page.)
*/
-static unsigned long put_page(unsigned long page,unsigned long address)
+static unsigned long put_page(struct task_struct * tsk,unsigned long page,unsigned long address)
{
unsigned long tmp, *page_table;
@@ -377,13 +436,13 @@ static unsigned long put_page(unsigned long page,unsigned long address)
printk("put_page: mem_map disagrees with %p at %p\n",page,address);
return 0;
}
- page_table = (unsigned long *) ((address>>20) & 0xffc);
+ page_table = (unsigned long *) (tsk->tss.cr3 + ((address>>20) & 0xffc));
if ((*page_table)&1)
page_table = (unsigned long *) (0xfffff000 & *page_table);
else {
tmp = get_free_page(GFP_KERNEL);
if (!tmp) {
- oom(current);
+ oom(tsk);
tmp = BAD_PAGETABLE;
}
*page_table = tmp | PAGE_ACCESSED | 7;
@@ -406,7 +465,7 @@ static unsigned long put_page(unsigned long page,unsigned long address)
* and we want the dirty-status to be correct (for VM). Thus the same
* routine, but this time we mark it dirty too.
*/
-unsigned long put_dirty_page(unsigned long page, unsigned long address)
+unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address)
{
unsigned long tmp, *page_table;
@@ -416,7 +475,7 @@ unsigned long put_dirty_page(unsigned long page, unsigned long address)
printk("put_dirty_page: trying to put page %p at %p\n",page,address);
if (mem_map[(page-low_memory)>>12] != 1)
printk("mem_map disagrees with %p at %p\n",page,address);
- page_table = (unsigned long *) ((address>>20) & 0xffc);
+ page_table = (unsigned long *) (tsk->tss.cr3 + ((address>>20) & 0xffc));
if ((*page_table)&1)
page_table = (unsigned long *) (0xfffff000 & *page_table);
else {
@@ -491,7 +550,7 @@ void do_wp_page(unsigned long error_code, unsigned long address,
{
unsigned long pde, pte, page;
- pde = (address>>20) & 0xffc;
+ pde = tsk->tss.cr3 + ((address>>20) & 0xffc);
pte = *(unsigned long *) pde;
if ((pte & 3) != 3) {
printk("do_wp_page: bogus page-table at address %08x (%08x)\n",address,pte);
@@ -499,12 +558,6 @@ void do_wp_page(unsigned long error_code, unsigned long address,
send_sig(SIGSEGV, tsk, 1);
return;
}
- if (address < TASK_SIZE) {
- printk("do_wp_page: kernel WP error at address %08x (%08x)\n",address,pte);
- *(unsigned long *) pde = BAD_PAGETABLE | 7;
- send_sig(SIGSEGV, tsk, 1);
- return;
- }
pte &= 0xfffff000;
pte += (address>>10) & 0xffc;
page = *(unsigned long *) pte;
@@ -514,7 +567,7 @@ void do_wp_page(unsigned long error_code, unsigned long address,
send_sig(SIGSEGV, tsk, 1);
return;
}
- ++current->min_flt;
+ tsk->min_flt++;
un_wp_page((unsigned long *) pte, tsk);
}
@@ -522,7 +575,7 @@ void write_verify(unsigned long address)
{
unsigned long page;
- page = *(unsigned long *) ((address>>20) & 0xffc);
+ page = *(unsigned long *) (current->tss.cr3 + ((address>>20) & 0xffc));
if (!(page & PAGE_PRESENT))
return;
page &= 0xfffff000;
@@ -532,16 +585,16 @@ void write_verify(unsigned long address)
return;
}
-static void get_empty_page(unsigned long address)
+static void get_empty_page(struct task_struct * tsk, unsigned long address)
{
unsigned long tmp;
tmp = get_free_page(GFP_KERNEL);
if (!tmp) {
- oom(current);
+ oom(tsk);
tmp = BAD_PAGE;
}
- if (!put_page(tmp,address))
+ if (!put_page(tsk,tmp,address))
free_page(tmp);
}
@@ -553,7 +606,8 @@ static void get_empty_page(unsigned long address)
* NOTE! This assumes we have checked that p != current, and that they
* share the same executable or library.
*/
-static int try_to_share(unsigned long address, struct task_struct * p)
+static int try_to_share(unsigned long address, struct task_struct * tsk,
+ struct task_struct * p)
{
unsigned long from;
unsigned long to;
@@ -561,9 +615,8 @@ static int try_to_share(unsigned long address, struct task_struct * p)
unsigned long to_page;
unsigned long phys_addr;
- from_page = to_page = ((address>>20) & 0xffc);
- from_page += ((p->start_code>>20) & 0xffc);
- to_page += ((current->start_code>>20) & 0xffc);
+ from_page = p->tss.cr3 + ((address>>20) & 0xffc);
+ to_page = tsk->tss.cr3 + ((address>>20) & 0xffc);
/* is there a page-directory at from? */
from = *(unsigned long *) from_page;
if (!(from & 1))
@@ -607,7 +660,7 @@ static int try_to_share(unsigned long address, struct task_struct * p)
* We first check if it is at all feasible by checking executable->i_count.
* It should be >1 if there are other tasks sharing this inode.
*/
-static int share_page(struct inode * inode, unsigned long address)
+static int share_page(struct task_struct * tsk, struct inode * inode, unsigned long address)
{
struct task_struct ** p;
int i;
@@ -617,19 +670,16 @@ static int share_page(struct inode * inode, unsigned long address)
for (p = &LAST_TASK ; p > &FIRST_TASK ; --p) {
if (!*p)
continue;
- if (current == *p)
+ if (tsk == *p)
continue;
- if (address < LIBRARY_OFFSET) {
- if (inode != (*p)->executable)
- continue;
- } else {
+ if (inode != (*p)->executable) {
for (i=0; i < (*p)->numlibraries; i++)
if (inode == (*p)->libraries[i].library)
break;
if (i >= (*p)->numlibraries)
continue;
}
- if (try_to_share(address,*p))
+ if (try_to_share(address,tsk,*p))
return 1;
}
return 0;
@@ -671,15 +721,7 @@ void do_no_page(unsigned long error_code, unsigned long address,
unsigned int block,i;
struct inode * inode;
- if (address < TASK_SIZE) {
- printk("\n\rBAD!! KERNEL PAGE MISSING\n\r");
- do_exit(SIGSEGV);
- }
- if (address - tsk->start_code >= TASK_SIZE) {
- printk("Bad things happen: nonexistent page error in do_no_page\n\r");
- do_exit(SIGSEGV);
- }
- page = get_empty_pgtable((unsigned long *) ((address >> 20) & 0xffc));
+ page = get_empty_pgtable((unsigned long *) (tsk->tss.cr3 + ((address >> 20) & 0xffc)));
if (!page)
return;
page &= 0xfffff000;
@@ -696,53 +738,56 @@ void do_no_page(unsigned long error_code, unsigned long address,
return;
}
address &= 0xfffff000;
- tmp = address - tsk->start_code;
inode = NULL;
block = 0;
- if (tmp < tsk->end_data) {
+ if (address < tsk->end_data) {
inode = tsk->executable;
- block = 1 + tmp / BLOCK_SIZE;
+ block = 1 + address / BLOCK_SIZE;
} else {
i = tsk->numlibraries;
while (i-- > 0) {
- if (tmp < tsk->libraries[i].start)
+ if (address < tsk->libraries[i].start)
continue;
- block = tmp - tsk->libraries[i].start;
- if (block >= tsk->libraries[i].length)
+ block = address - tsk->libraries[i].start;
+ if (block >= tsk->libraries[i].length + tsk->libraries[i].bss)
continue;
inode = tsk->libraries[i].library;
- block = 1 + block / BLOCK_SIZE;
+ if (block < tsk->libraries[i].length)
+ block = 1 + block / BLOCK_SIZE;
+ else
+ block = 0;
break;
}
}
if (!inode) {
++tsk->min_flt;
- get_empty_page(address);
+ get_empty_page(tsk,address);
if (tsk != current)
return;
- if (tmp >= LIBRARY_OFFSET || tmp < tsk->brk)
+ if (address < tsk->brk)
return;
- if (tmp+8192 >= (user_esp & 0xfffff000))
+ if (address+8192 >= (user_esp & 0xfffff000))
return;
send_sig(SIGSEGV,tsk,1);
return;
}
- if (tsk == current)
- if (share_page(inode,tmp)) {
- ++tsk->min_flt;
- return;
- }
+ if (share_page(tsk,inode,address)) {
+ ++tsk->min_flt;
+ return;
+ }
++tsk->maj_flt;
page = get_free_page(GFP_KERNEL);
if (!page) {
oom(current);
- put_page(BAD_PAGE,address);
+ put_page(tsk,BAD_PAGE,address);
return;
}
- for (i=0 ; i<4 ; block++,i++)
- nr[i] = bmap(inode,block);
- bread_page(page,inode->i_dev,nr);
- i = tmp + 4096 - tsk->end_data;
+ if (block) {
+ for (i=0 ; i<4 ; block++,i++)
+ nr[i] = bmap(inode,block);
+ bread_page(page,inode->i_dev,nr);
+ }
+ i = address + 4096 - tsk->end_data;
if (i>4095)
i = 0;
tmp = page + 4096;
@@ -750,7 +795,7 @@ void do_no_page(unsigned long error_code, unsigned long address,
tmp--;
*(char *)tmp = 0;
}
- if (put_page(page,address))
+ if (put_page(tsk,page,address))
return;
free_page(page);
oom(current);
@@ -758,9 +803,8 @@ void do_no_page(unsigned long error_code, unsigned long address,
void show_mem(void)
{
- int i,j,k,free=0,total=0;
+ int i,free=0,total=0;
int shared = 0;
- unsigned long * pg_tbl;
printk("Mem-info:\n\r");
printk("Free pages: %6d\n",nr_free_pages);
@@ -776,41 +820,14 @@ void show_mem(void)
}
printk("%d free pages of %d\n\r",free,total);
printk("%d pages shared\n\r",shared);
- printk("%d free pages via nr_free_pages\n\r", nr_free_pages);
- k = 0;
- for(i=4 ; i<1024 ;) {
- if (1&pg_dir[i]) {
- if (pg_dir[i]>high_memory) {
- printk("page directory[%d]: %08X\n\r",
- i,pg_dir[i]);
- i++;
- continue;
- }
- if (pg_dir[i]>low_memory)
- free++,k++;
- pg_tbl=(unsigned long *) (0xfffff000 & pg_dir[i]);
- for(j=0 ; j<1024 ; j++)
- if ((pg_tbl[j]&1) && pg_tbl[j]>low_memory)
- if (pg_tbl[j]>high_memory)
- printk("page_dir[%d][%d]: %08X\n\r",
- i,j, pg_tbl[j]);
- else
- k++,free++;
- }
- i++;
- if (!(i&15) && k) {
- k++,free++; /* one page/process for task_struct */
- printk("Process %d: %d pages\n\r",(i>>4)-1,k);
- k = 0;
- }
- }
- printk("Memory found: %d (%d)\n\r",free-shared,total);
}
-/* This routine handles page faults. It determines the address,
- and the problem then passes it off to one of the appropriate
- routines. */
+/*
+ * This routine handles page faults. It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ */
void do_page_fault(unsigned long *esp, unsigned long error_code)
{
unsigned long address;
@@ -822,13 +839,10 @@ void do_page_fault(unsigned long *esp, unsigned long error_code)
user_esp = 0;
/* get the address */
__asm__("movl %%cr2,%0":"=r" (address));
- if (!(error_code & 1)) {
+ if (!(error_code & 1))
do_no_page(error_code, address, current, user_esp);
- return;
- } else {
+ else
do_wp_page(error_code, address, current, user_esp);
- return;
- }
}
unsigned long mem_init(unsigned long start_mem, unsigned long end_mem)
diff --git a/mm/swap.c b/mm/swap.c
index cc95a72..ce3d798 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -179,16 +179,9 @@ int try_to_swap_out(unsigned long * table_ptr)
return 1;
}
-/*
- * We never page the pages in task[0] - kernel memory.
- * We page all other pages.
- */
-#define FIRST_VM_PAGE (TASK_SIZE>>12)
-#define LAST_VM_PAGE (1024*1024)
-#define VM_PAGES (LAST_VM_PAGE - FIRST_VM_PAGE)
-
-static unsigned int dir_entry = 1024;
-static unsigned int page_entry = 0;
+static int swap_task = 1;
+static int swap_table = 0;
+static int swap_page = 0;
/*
* sys_idle() does nothing much: it just searches for likely candidates for
@@ -201,23 +194,32 @@ int sys_idle(void)
unsigned long page;
need_resched = 1;
- if (dir_entry >= 1024)
- dir_entry = FIRST_VM_PAGE>>10;
- p = task[dir_entry >> 4];
- page = pg_dir[dir_entry];
- if (!(page & 1) || !p || !p->swappable) {
- dir_entry++;
+ if (swap_task >= NR_TASKS)
+ swap_task = 1;
+ p = task[swap_task];
+ if (!p || !p->swappable) {
+ swap_task++;
+ return 0;
+ }
+ if (swap_table >= 1024) {
+ swap_task++;
+ swap_table = 0;
+ return 0;
+ }
+ page = ((unsigned long *) p->tss.cr3)[swap_table];
+ if (!(page & 1) || (page < low_memory)) {
+ swap_table++;
return 0;
}
page &= 0xfffff000;
- if (page_entry >= 1024) {
- page_entry = 0;
- dir_entry++;
+ if (swap_page >= 1024) {
+ swap_page = 0;
+ swap_table++;
return 0;
}
- page = *(page_entry + (unsigned long *) page);
+ page = *(swap_page + (unsigned long *) page);
if ((page < low_memory) || !(page & PAGE_PRESENT) || (page & PAGE_ACCESSED))
- page_entry++;
+ swap_page++;
return 0;
}
@@ -231,48 +233,54 @@ int sys_idle(void)
*/
int swap_out(unsigned int priority)
{
- int counter = VM_PAGES / 2;
+ int counter = NR_TASKS;
int pg_table;
struct task_struct * p;
+ counter <<= priority;
+check_task:
+ if (counter-- < 0)
+ return 0;
+ if (swap_task >= NR_TASKS) {
+ swap_task = 1;
+ goto check_task;
+ }
+ p = task[swap_task];
+ if (!p || !p->swappable) {
+ swap_task++;
+ goto check_task;
+ }
check_dir:
- if (counter < 0)
- goto no_swap;
- if (dir_entry >= 1024)
- dir_entry = FIRST_VM_PAGE>>10;
- if (!(p = task[dir_entry >> 4]) || !p->swappable) {
- counter -= 1024;
- dir_entry++;
+ if (swap_table >= 1024) {
+ swap_table = 0;
+ swap_task++;
+ goto check_task;
+ }
+ pg_table = ((unsigned long *) p->tss.cr3)[swap_table];
+ if (pg_table < low_memory) {
+ swap_table++;
goto check_dir;
}
- if (!(1 & (pg_table = pg_dir[dir_entry]))) {
- if (pg_table) {
- printk("bad page-table at pg_dir[%d]: %08x\n\r",
- dir_entry,pg_table);
- pg_dir[dir_entry] = 0;
- }
- counter -= 1024;
- dir_entry++;
+ if (!(1 & pg_table)) {
+ printk("bad page-table at pg_dir[%d]: %08x\n\r",
+ swap_table,pg_table);
+ ((unsigned long *) p->tss.cr3)[swap_table] = 0;
+ swap_table++;
goto check_dir;
}
pg_table &= 0xfffff000;
check_table:
- if (counter < 0)
- goto no_swap;
- if (page_entry >= 1024) {
- page_entry = 0;
- dir_entry++;
+ if (swap_page >= 1024) {
+ swap_page = 0;
+ swap_table++;
goto check_dir;
}
- if (try_to_swap_out(page_entry + (unsigned long *) pg_table)) {
+ if (try_to_swap_out(swap_page + (unsigned long *) pg_table)) {
p->rss--;
return 1;
}
- page_entry++;
- counter--;
+ swap_page++;
goto check_table;
-no_swap:
- return 0;
}
static int try_to_free_page(void)
@@ -335,10 +343,8 @@ repeat:
}
if (priority <= GFP_BUFFER)
return 0;
- if (try_to_free_page()) {
- schedule();
+ if (try_to_free_page())
goto repeat;
- }
return 0;
}
@@ -355,8 +361,9 @@ int sys_swapon(const char * specialfile)
if (!suser())
return -EPERM;
- if (!(swap_inode = namei(specialfile)))
- return -ENOENT;
+ i = namei(specialfile,&swap_inode);
+ if (i)
+ return i;
if (swap_file || swap_device || swap_bitmap || swap_lockmap) {
iput(swap_inode);
return -EBUSY;
diff --git a/net/Makefile b/net/Makefile
index 72a28e1..b61a843 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -27,7 +27,7 @@ net.o: $(OBJS) subdirs
subdirs: dummy
- for i in $(SUBDIRS); do (cd $$i; echo $$i; $(MAKE)) || exit; done
+ for i in $(SUBDIRS); do (cd $$i && echo $$i && $(MAKE)) || exit; done
clean:
rm -f core *.o *.a tmp_make
@@ -37,7 +37,7 @@ dep:
sed '/\#\#\# Dependencies/q' < Makefile > tmp_make
for i in *.c;do $(CPP) -M $$i;done >> tmp_make
cp tmp_make Makefile
- @for i in $(SUBDIRS); do (cd $$i; echo $$i; $(MAKE) dep || exit; done
+ @for i in $(SUBDIRS); do (cd $$i && echo $$i && $(MAKE) dep) || exit; done
dummy:
diff --git a/net/unix.c b/net/unix.c
index b0a2f10..26bc918 100644
--- a/net/unix.c
+++ b/net/unix.c
@@ -351,7 +351,7 @@ unix_proto_bind(struct socket *sock, struct sockaddr *umyaddr,
set_fs(get_ds());
i = do_mknod(fname, S_IFSOCK | 0777, 0);
if (i == 0)
- i = open_namei(fname, 0, S_IFSOCK, &upd->inode);
+ i = open_namei(fname, 0, S_IFSOCK, &upd->inode, NULL);
set_fs(old_fs);
if (i < 0) {
printk("unix_proto_bind: can't open socket %s\n", fname);