280 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			Markdown
		
	
	
	
	
	
			
		
		
	
	
			280 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			Markdown
		
	
	
	
	
	
layout: post
 | 
						|
title: "Process Syscalls"
 | 
						|
subtitle: "Almost caught up..."
 | 
						|
tags: [osdev]
 | 
						|
 | 
						|
I've described my [syscall interface](/blog/2013/06/System-Calls/) previously. I've also described
 | 
						|
the [file-related syscalls](/blog/2013/12/VFS-syscalls/). In order to build [newlib](http://wiki.osdev.org/Porting_Newlib), some more
 | 
						|
syscalls are required.
 | 
						|
 | 
						|
Those are:
 | 
						|
 | 
						|
    :::c
 | 
						|
    void *sbrk(int incr);
 | 
						|
    int getpid();
 | 
						|
    int fork();
 | 
						|
    void _exit(int rc);
 | 
						|
    int wait(int *status);
 | 
						|
    int kill(int pid, int sig);
 | 
						|
    int execve(char *name, char **argv, char **env);
 | 
						|
 | 
						|
Let's just go through them one at a time:
 | 
						|
 | 
						|
###sbrk
 | 
						|
`sbrk` is a bit special, since it actually has two versions - one for
 | 
						|
kernel use and one for user space processes.
 | 
						|
 | 
						|
The user space one makes use of the [process memory
 | 
						|
manager](/blog/2013/06/Even-More-Memory/) to return
 | 
						|
a chunk of new memory for the `malloc` functions.
 | 
						|
 | 
						|
    :::c
 | 
						|
    void *usr_sbrk(int incr)
 | 
						|
    {
 | 
						|
        process_t *p = current->proc;
 | 
						|
        mem_area_t *area = find_including(p, p->mm.data_end);
 | 
						|
        if(area)
 | 
						|
        {
 | 
						|
            if(area->end > (p->mm.data_end + incr))
 | 
						|
            {
 | 
						|
                // The current memory area is large enough
 | 
						|
            } else {
 | 
						|
                // Increase memory area
 | 
						|
                new_area(p, area->end, p->mm.data_end + incr, \
 | 
						|
                    MM_FLAG_READ | MM_FLAG_WRITE | MM_FLAG_CANSHARE, \
 | 
						|
                    MM_TYPE_DATA);
 | 
						|
            }
 | 
						|
        } else {
 | 
						|
            // Create a new memory area
 | 
						|
            new-area(p, p->mm.data_end, p->mm.data_end + incr, \
 | 
						|
                MM_FLAG_READ | MM_FLAG_WRITE | MM_FLAG_CANSHARE, \
 | 
						|
                MM_TYPE_DATA);
 | 
						|
        }
 | 
						|
        p->mm.data_end = p->mm.data_end + incr;
 | 
						|
        return (void *)(p->mm.data_end - incr);
 | 
						|
    }
 | 
						|
 | 
						|
The kernel space version is just a simple linear allocator
 | 
						|
 | 
						|
    :::c
 | 
						|
    uintptr_t kmem_top = KERNEL_HEAP_START;
 | 
						|
    uintptr_t kmem_ptr = KERNEL_HEAP_START;
 | 
						|
    void *sbrk(int incr)
 | 
						|
    {
 | 
						|
        if(kmem_ptr + incr > KERNEL_HEAP_END)
 | 
						|
        {
 | 
						|
            // PANIC!
 | 
						|
            ...
 | 
						|
        }
 | 
						|
        while(kmem_top < kmem_ptr + incr)
 | 
						|
        {
 | 
						|
            vmm_page_set(kmem_top, vmm_page_val(pmm_alloc_page(), \
 | 
						|
                PAGE_PRESENT | PAGE_WRITE));
 | 
						|
            kmem_top += PAGE_SIZE;
 | 
						|
        }
 | 
						|
        kmem_ptr = kmem_ptr + incr;
 | 
						|
        return (void *)kmem_ptr - incr;
 | 
						|
    }
 | 
						|
 | 
						|
Hopefully it's obvious why the kernel one is called `sbrk` while the
 | 
						|
user one has a different name.
 | 
						|
 | 
						|
###getpid
 | 
						|
`getpid` is rather obvious:
 | 
						|
 | 
						|
    :::c
 | 
						|
    int getpid()
 | 
						|
    {
 | 
						|
        return current->proc->pid;
 | 
						|
    }
 | 
						|
 | 
						|
###fork
 | 
						|
`fork` clones the current process and starts a new thread of execution.
 | 
						|
 | 
						|
    :::c
 | 
						|
    int fork()
 | 
						|
    {
 | 
						|
        process_t *child = fork_process();
 | 
						|
        thread_t *ch_thread = list_entry(child->threads.next, thread_t, process_threads);
 | 
						|
        ch_thread->r.eax = 0;
 | 
						|
        scheduler_insert(ch_thread);
 | 
						|
        return child->pid;
 | 
						|
    }
 | 
						|
 | 
						|
###_exit
 | 
						|
`_exit` stops a program and wakes up any processes that are sleeping on
 | 
						|
it.
 | 
						|
 | 
						|
    :::c
 | 
						|
    void _exit(int rc)
 | 
						|
    {
 | 
						|
        process_t *p = current->proc;
 | 
						|
 | 
						|
        // Close all open files
 | 
						|
        int i;
 | 
						|
        for(i = 0; i < NUM_FILEDES; i++)
 | 
						|
        {
 | 
						|
            if(p->fd[i])
 | 
						|
                close(i);
 | 
						|
        }
 | 
						|
        exit_process(current->proc, rc);
 | 
						|
        current->state = THREAD_STATE_FINISHED;
 | 
						|
        schedule();
 | 
						|
    }
 | 
						|
 | 
						|
`_exit` doesn't return, and in fact `schedule()` will never return as
 | 
						|
far as this thread is concerned.
 | 
						|
Note that the process still exists. It is not completely destroyed until
 | 
						|
its parent process has executed a `wait` syscall.
 | 
						|
 | 
						|
###wait
 | 
						|
Actually, I didn't quite implement `wait` yet, but instead use
 | 
						|
a `waitpid` for now, which is a bit more specific:
 | 
						|
 | 
						|
    :::c
 | 
						|
    int waitpid(int pid)
 | 
						|
    {
 | 
						|
        process_t *proc = get_process(pid);
 | 
						|
        while(proc->state != PROC_STATE_FINISHED)
 | 
						|
        {
 | 
						|
            scheduler_sleep(current, &proc->waiting);
 | 
						|
            schedule();
 | 
						|
        }
 | 
						|
        int ret = proc->exit_code;
 | 
						|
        free_process(proc);
 | 
						|
        return ret;
 | 
						|
    }
 | 
						|
 | 
						|
This _should_ contain a check that process `pid` is a child of the
 | 
						|
current process too...
 | 
						|
 | 
						|
###kill
 | 
						|
I'll let `kill` wait for now. My next post will probably be on signals,
 | 
						|
so it'll fit better there anyway.
 | 
						|
 | 
						|
###execve
 | 
						|
Now, here's the big stuff.
 | 
						|
 | 
						|
`execve` launches new programs from the filesystem, so what it has to do
 | 
						|
is:
 | 
						|
 | 
						|
- Find the correct executable
 | 
						|
- Save the arguments
 | 
						|
- Save the environmental variables
 | 
						|
- Free the user memory space
 | 
						|
- Load the executable
 | 
						|
- Prepare a new user stack
 | 
						|
- Restore the arguments and environment variables
 | 
						|
 | 
						|
First of all, the executable is found. If it doesn't exist, we want to
 | 
						|
fail as early as possible - before we destroy everything.
 | 
						|
 | 
						|
    :::c
 | 
						|
    int execve(char *name, char **argv, char **env)
 | 
						|
    {
 | 
						|
        INODE executable = vfs_namei(name);
 | 
						|
        if(!executable)
 | 
						|
        {
 | 
						|
            errno = ENOENT;
 | 
						|
            return -1;
 | 
						|
        }
 | 
						|
        ...
 | 
						|
 | 
						|
The arguments and environment are null-terminated lists of strings
 | 
						|
stored in user space, so they have to be copied into kernel space before
 | 
						|
the user space is destroyed:
 | 
						|
 | 
						|
    :::c
 | 
						|
        ...
 | 
						|
        usigned int envc = 0;
 | 
						|
        char **temp_env = 0;
 | 
						|
        if(env)
 | 
						|
        {
 | 
						|
            while(env[envc++]); // Count number of environmental variables
 | 
						|
 | 
						|
            temp_env = calloc(envc, sizeof(char *));
 | 
						|
            unsigned int i = 0;
 | 
						|
            while(env[i])
 | 
						|
            {
 | 
						|
                temp_env[i] = strdup(env[i]);
 | 
						|
                i++;
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        // Do the same thing for argv
 | 
						|
        ...
 | 
						|
 | 
						|
Next, Delete all memory from the previous executable and [load the new
 | 
						|
one](/blog/2013/08/Loading-Elf/):
 | 
						|
 | 
						|
    :::c
 | 
						|
        procmm_removeall(current->proc);
 | 
						|
        load_elf(executable);
 | 
						|
        current->r.eax = current->r.ebx = current->r.ecx = \
 | 
						|
            current->r.edx = 0;
 | 
						|
 | 
						|
We need to put the arguments and environment back into the new
 | 
						|
executable's user space, so a new stack area is created:
 | 
						|
 | 
						|
    :::c
 | 
						|
        new_area(current->proc, USER_STACK_TOP, USER_STACK_TOP, \
 | 
						|
            MM_FLAG_WRITE | MM_FLAG_GROWSDOWN | MM_FLAG_ADDONUSE, \
 | 
						|
            MM_TYPE_STACK);
 | 
						|
        current->kernel_thread = (registers_t *)current;
 | 
						|
        uint32_t *pos = (uint32_t *)USER_STACK_TOP;
 | 
						|
 | 
						|
Then, copy the environment and arguments onto the stack:
 | 
						|
 | 
						|
    :::c
 | 
						|
        if(env)
 | 
						|
        {
 | 
						|
            pos = pos - envc*sizeof(char *)/sizeof(uint32_t) - 1;
 | 
						|
            env = (char **)pos;
 | 
						|
            int i = 0;
 | 
						|
            while(temp_env[i])
 | 
						|
            {
 | 
						|
                pos = pos - strlen(temp_env[i])/sizeof(uint32_t) - 2;
 | 
						|
                memcpy(pos, temp_env[i], strlen(temp_env[i])+1);
 | 
						|
                env[i] = (char *)pos;
 | 
						|
                i++;
 | 
						|
            }
 | 
						|
            env[envc-1] = 0;
 | 
						|
        }
 | 
						|
        // Do the same for argc
 | 
						|
        ...
 | 
						|
 | 
						|
And finally, push the argument count, argument list and environment list
 | 
						|
onto the stack:
 | 
						|
 | 
						|
    :::c
 | 
						|
        pos = pos - 3;
 | 
						|
        pos[0] = (uint32_t)argc - 1;
 | 
						|
        pos[1] = (uint32_t)argv;
 | 
						|
        pos[2] = (uint32_t)env;
 | 
						|
 | 
						|
        current->r.useresp = current->r.ebp = (uint32_t)pos;
 | 
						|
        current->r.ecx = (uint32_t)pos;
 | 
						|
 | 
						|
        return 0;
 | 
						|
    }
 | 
						|
 | 
						|
This pushes argc, argv and env as arguments to the executabl. We can
 | 
						|
use this to set up the `environ` variable of newlib. The crt0 in newlib
 | 
						|
pushes `ecx` to the stack and then calls `_init` which looks like this:
 | 
						|
 | 
						|
    :::c
 | 
						|
    extern char **environ;
 | 
						|
    void _init(uint32_t *args)
 | 
						|
    {
 | 
						|
        int argc;
 | 
						|
        char **argv;
 | 
						|
        if(args)
 | 
						|
        {
 | 
						|
            argc = args[0];
 | 
						|
            argv = (char **)args[1];
 | 
						|
            environ = (char **)args[2];
 | 
						|
        } else {...}
 | 
						|
 | 
						|
        exit(main(argc, argv));
 | 
						|
    }
 |