Homework 9

lkct-1600012920

11.8

// Add sigchld_hdl()

void sigchld_hdl(int sig)
{
    int old_errno = errno;
    while (Wait(NULL) > 0)
        ;
    errno = old_errno;
}

// In main(), add sig handler binding before establishing listening

int main(int argc, char **argv) 
{
    ... /* var defs */
    ... /* check command line args */

    Signal(SIGCHLD, sigchld_hdl);

    ... /* open listen fd and wait to accept */
}

// In serve_dynamic(), remove explicit waiting

void serve_dynamic(int fd, char *filename, char *cgiargs) 
{
    ... /* return first part of HTTP response */
  
    ... /* fork child and execve */
    
    /* remove Wait(NULL) on the last line */
}

第九次作业反馈

第一题：正确
第二题：正确

11.9

// In serve_static(), change ways to send body

void serve_static(int fd, char *filename, int filesize) 
{
    int srcfd;
    char *srcp, filetype[MAXLINE], buf[MAXBUF];

    ... /* Send response headers to client */

    /* Send response body to client */
    srcfd = Open(filename, O_RDONLY, 0);
    srcp = (char *)Malloc(filesize);
    Rio_readn(srcfd, srcp, filesize);
    Close(srcfd);
    Rio_writen(fd, srcp, filesize);
    Free(srcp);
}

Homework 8

lkct-1600012920

/**
 * This program reads info about memory mapping of this precess from /proc/$pid/
 * to determine accessible VM pages. Vars are declared to statically allocate to
 * hold back dynamic allocation of new pages (maybe can't avoid completely).
 */
#include <stdio.h>
#include <unistd.h>

pid_t pid;                // pid of current process
char filename[32];        // filename of maps file
FILE *maps;               // file pointer of maps file
char buf[256];            // buffer of lines in maps
unsigned long start, end; // start and end of VMA

int main()
{
    pid = getpid();
    sprintf(filename, "/proc/%d/maps", pid);
    maps = fopen(filename, "r");          // open maps file
    while (fgets(buf, 255, maps) != NULL) // get a line
    {
        sscanf(buf, "%lx-%lx", &start, &end); // "start-end" at start of line
        start >>= 12;
        end >>= 12; // lower 12 bits are VPO
        while (start != end)
        {
            printf("0x%lx\n", start++); // print each VPN
        }
    }
    return 0;
}

/**
 * This program trys to access every possible page number, and catches SIGSEGV
 * to exclude those can't be accessed. Beacuse each section starts at relatively
 * fixed address, a start array is used to save the boundaries of possible
 * addresses to reduce the number of many must-fail trials. However, shared libs
 * can start at a wide range of addresses, so it takes quite some time to find
 * them. Also, stack grows downwards, so that section is printed reversed to
 * avoid large space consumption by saving and reversing those.
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <errno.h>
#include <setjmp.h>

typedef void handler_t(int);

volatile unsigned long pageNo; /* current page No. */
volatile int section;          /* section in arrays below */
volatile int visited;          /* currently visited one page successfully */
volatile unsigned long start[5] = {
    /* start pageNo of sections */
    0x400,          /* text */
    0x600,          /* data and heap */
    0x7f0000000,    /* shared libs */
    0x7ffffffff,    /* stack and vvar+vdso */
    0xffffffffff600 /* vdso */
};
volatile int dir[4] = {1, 1, 1, -1}; /* direction to enumerate */
volatile int vis[4] = {1, 2, 1, 2};  /* needed visit times, num of cont parts */

sigjmp_buf env; /* jmp buf for longjmp*/

/*
 * unix_error - unix-style error routine
 */
void unix_error(char *msg)
{
    fprintf(stdout, "%s: %s\n", msg, strerror(errno));
    exit(1);
}

/*
 * Signal - wrapper for the sigaction function
 */
handler_t *Signal(int signum, handler_t *handler)
{
    struct sigaction action, old_action;

    action.sa_handler = handler;
    sigemptyset(&action.sa_mask); /* block sigs of type being handled */
    action.sa_flags = SA_RESTART; /* restart syscalls if possible */

    if (sigaction(signum, &action, &old_action) < 0)
        unix_error("Signal error");
    return (old_action.sa_handler);
}

/*
 * sig_handler - catches SIGSEGV to continue enumeration
 */
void sig_handler(int sig)
{
    pageNo += dir[section]; /* move on to next page */

    if (visited) /* a continual part finished, move to next part */
    {
        visited = 0;
        --vis[section];
        if (vis[section] == 0) /* section finished, next section */
        {
            ++section;
            pageNo = start[section];
        }
    }

    siglongjmp(env, 1); /* try again */
}

int main()
{
    Signal(SIGSEGV, sig_handler);

    section = 0; /* init */
    visited = 0;
    pageNo = start[section];
    char c;
    while (1)
    {
        sigsetjmp(env, 1);
        c = *(char *)(pageNo << 12); /* try to access, can't use optimization */
        printf("0x%lx\n", pageNo);   /* access trail succeeded */

        if (section >= 4) /* all finished */
            break;

        pageNo += dir[section]; /* try next */
        visited = 1;
    }

    return 0;
}

The two solutions are coss-compared and gives the same set of page numbers.
But they differ in method and efficiency.

Homework 7

lkct-1600012920

8.9

Process pair	Concurrent?
AB	N
AC	Y
AD	Y
BC	Y
BD	Y
CD	Y

8.18

ACE

8.24

// complete code is presented because it's hard to describe changes simply

#include "csapp.h"
#define N 2

int main()
{
    int status, i, sig;
    pid_t pid;
    char buf[1 << 6];

    /* Parent creates N children */
    for (i = 0; i < N; i++)
        if ((pid = Fork()) == 0) /* Child */
            *(char *)&main = 0;  /* main in .text is read only */

    /* Parent reaps N children in no particular order */
    while ((pid = waitpid(-1, &status, 0)) > 0)
    {
        if (WIFEXITED(status))
            printf("child %d terminated normally with exit status=%d\n",
                   pid, WEXITSTATUS(status));
        else if (WIFSIGNALED(status))
        {
            sprintf(buf, "child %d terminated by signal %d",
                    pid, (sig = WTERMSIG(status)));
            psignal(sig, buf);
        }
        else
            printf("child %d terminated abnormally\n", pid);
    }

    /* The only normal termination is if there are no more children */
    if (errno != ECHILD)
        unix_error("waitpid error");

    exit(0);
}

10.6

fd2 = 4

10.9

if (Fork() == 0) { /* Child */
    int fd = Open("foo.txt", O_RDONLY, 0); /* should == 3 */
    Dup2(fd, STDIN_FILENO);
    Close(fd); /* fd3 closed, causes problem */
    Execve("fstatcheck", argv, envp);
}

第七次作业反馈

第一题：正确
第二题：正确
第三题：正确
第四题：正确
第五题：正确

hw6 correction

7.6

Symbol	swap.o .symtab entry?	Symbol type	Module where defined	Section
count	Y	--- ?	swap.o	.bss

Homework 6

lkct-1600012920

7.6

Symbol	swap.o .symtab entry?	Symbol type	Module where defined	Section
buf	Y	extern	m.o	.data
bufp0	Y	global	swap.o	.data
bufp1	Y	local	swap.o	.bss
swap	Y	global	swap.o	.text
temp	N	---	---	---
incr	Y	local	swap.o	.text
count	Y	local	swap.o	.bss

7.7

// only changed definition of variable x in bar5.c

static double x;

7.12

A.

0xa

B.

0x22

7.13

A.

libc.a: 1579
libm.a: 471

B.

没有不同

C.

linux-vdso.so.1
libc.so.6
ld-linux-x86-64.so.2

第六次作业反馈

第一题：有一处错误
第二题：正确
第三题：正确
第四题：正确

Homework 5

lkct-1600012920

4.47

A.

/* Bubble sort: Pointer version */
void bubble_p(long *data, long count)
{
    long i;
    long *p;
    for (count--; count > 0; count--)
    {
        p = data;
        for (i = 0; i < count; i++, p++)
        {
            if (*(p + 1) < *p)
            {
                long t = *(p + 1);
                *(p + 1) = *p;
                *p = t;
            }
        }
    }
}

B.

# Execution begins at address 0 
    .pos 0 
init:
    irmovl Stack, %esp  # Set up stack pointer  
    irmovl Stack, %ebp  # Set up base pointer   
    call main           # Execute main program
    halt                # Terminate program 

.align 4
# data block
src:
    .long 0x00a
    .long 0x0b0
    .long 0xc00
    .long 0x111
    .long 0x222
    .long 0x333

main:
    pushl %ebp
    rrmovl %esp, %ebp
    irmovl 6, %eax
    pushl %eax          # push arg 2: count = 6
    irmovl src, %eax
    pushl %eax          # push arg 1: data = src
    call bubble_p       # call bubble_p(data, count)
    rrmovl %ebp, %esp
    popl %ebp
    ret

# void bubble_p(long *data, long count)
bubble_p:
    pushl %ebp
    pushl %esi              # save esi (callee saved)
    pushl %edi              # save edi (callee saved)
    pushl %ebx              # save ebx (callee saved)
    rrmovl %esp, %ebp

    mrmovl 20(%ebp), %edi   # edi = data
    mrmovl 24(%ebp), %esi   # esi = count
    jmp Test                # goto Test:
Loop0:
    rrmovl %edi, %eax       # eax = p = data
    xorl %edx, %edx         # edx = i = 0
Loop1:
    mrmovl 4(%eax), %ecx    # ecx = *(p + 1)
    mrmovl (%eax), %ebx     # ebx = *p
    subl %ebx, %ecx         # ecx - ebx
    jge Nops                # !(*(p + 1) < *p), goto Nops:
    mrmovl 4(%eax), %ecx    # ecx = *(p + 1)
    rmmovl %ebx, 4(%eax)    # *(p + 1) = ebx
    rmmovl %ecx, (%eax)     # *p = ecx
Nops:
    irmovl $1, %ecx
    addl %ecx, %edx         # i++
    irmovl $4, %ecx
    addl %ecx, %eax         # p++
    rrmovl %edx, %ecx
    subl %esi, %ecx         # i - count
    jl Loop1                # i < count, foro Loop1:
Test:
    irmovl $-1, %ecx
    addl %ecx, %esi         # count--
    jg Loop0                # goto Loop0:

    rrmovl %ebp, %esp
    popl %ebx               # restore regs
    popl %edi
    popl %esi
    popl %ebp
    ret

# The stack starts here and grows to lower addresses
    .pos 0x100		
Stack:

4.56

Here only changes of pipe-btfnt.hcl are presented

# change PC selection according to prediction and M_Cnd

## What address should instruction be fetched at
int f_pc = [
    # Mispredicted branch.  Fetch at incremented PC
    M_icode == IJXX && (M_ifun != UNCOND && M_valE < M_valA && !M_Cnd) : M_valA;
    # Mispredicted branch.  Fetch at given address
    M_icode == IJXX && (M_ifun != UNCOND && M_valE >= M_valA && M_Cnd) : M_valE;
    # Completion of RET instruction.
    W_icode == IRET : W_valM;
    # Default: Use predicted value of PC
    1 : F_predPC;
];

# predict PC using backward-taken-forward-not-taken

# Predict next value of PC
int f_predPC = [
    # BBTFNT: This is where you'll change the branch prediction rule
    f_icode in { ICALL } 
      || f_icode == IJXX && (f_ifun == UNCOND || f_ifun != UNCOND && f_valC < f_valP) : f_valC;
    1 : f_valP;
];

# get E_valC into e_valE then M_valE for f_pc

## Select input A to ALU
int aluA = [
    E_icode in { IRRMOVL, IOPL } : E_valA;
    E_icode in { IIRMOVL, IRMMOVL, IMRMOVL, IJXX } : E_valC;
    E_icode in { ICALL, IPUSHL } : -4;
    E_icode in { IRET, IPOPL } : 4;
    # Other instructions don't need ALU
];

## Select input B to ALU
int aluB = [
    E_icode in { IRMMOVL, IMRMOVL, IOPL, ICALL, 
             IPUSHL, IRET, IPOPL } : E_valB;
    E_icode in { IRRMOVL, IIRMOVL, IJXX } : 0;
    # Other instructions don't need ALU
];

# condition of misprediction changed according to prediction

bool D_bubble =
    # Mispredicted branch
    E_icode == IJXX && (E_ifun != UNCOND && (E_valC < E_valA && !e_Cnd || E_valC >= E_valA && e_Cnd)) ||
    # BBTFNT: This condition will change
    # Stalling at fetch while ret passes through pipeline
    # but not condition for a load/use hazard
    !(E_icode in { IMRMOVL, IPOPL } && E_dstM in { d_srcA, d_srcB }) &&
      IRET in { D_icode, E_icode, M_icode };

bool E_bubble =
    # Mispredicted branch
    E_icode == IJXX && (E_ifun != UNCOND && (E_valC < E_valA && !e_Cnd || E_valC >= E_valA && e_Cnd)) ||
    # BBTFNT: This condition will change
    # Conditions for a load/use hazard
    E_icode in { IMRMOVL, IPOPL } &&
     E_dstM in { d_srcA, d_srcB};

5.13

A.

| %rax | %rbp | %rbx | %rcx | %xmm0 | %xmm1 |
    |      |      |      |       |        
    |      |      |      |       |            ----
    |      |      |      |>------+----------->
    |      |>-----+------+-------+----------->load            vmovsd 0(%rbp, %rcx, 8), %xmm1
    |      |      |      |       |       |<--<
    |      |      |      |       |       |    ----            ------
    |      |      |      |>------+-------+--->
    |>-----+------+------+-------+-------+--->load
    |      |      |      |       |       |        >-->|
    |      |      |      |       |       |    ----    |       vmulsd (%rax, %rcx, 8), %xmm1, %xmm1
    |      |      |      |       |       ->-->    <--<-
    |      |      |      |       |            mul
    |      |      |      |       |       |<--<
    |      |      |      |       |       |    ----            ------
    |      |      |      |       |       |>-->
    |      |      |      |       ->------+--->add             vaddsd %xmm1, %xmm0, %xmm0
    |      |      |      |       |<------+---<
    |      |      |      |       |       |    ----            ------
    |      |      |      ->------+-------+--->
    |      |      |              |       |    add             addq   $1, %rcx
    |      |      |      |<------+-------+---<
    |      |      |      |       |       |    ----            ------
    |      |      |      |>------+-------+--->
    |      |      |>-----+-------+-------+--->cmp             cmpq   %rbx, %rcx
    |      |      |      |       |       |        >-->|
    |      |      |      |       |       |    ----    |       ------
    |      |      |      |       |       |        <--<-
    |      |      |      |       |       |    jne             jne    .L15
    |      |      |      |       |       |
    |      |      |      |       |       |    ----
    |      |      |      |       |       |
| %rax | %rbp | %rbx | %rcx | %xmm0 | %xmm1 |

| %xmm0 | %rbp |  %rax | %rcx | %rbx |
    ||      |       |      |      |
    ||      |  |<---+-----<|      |
    ||      |>load  |      |      |
    ||      | |     |      |      |
    ||      | |  |<-+-----<|      |
    ||      | |load<|      |      |
    ||      | |   | |      |      |
    ||      | >mul< |     add     |
    ||      |   |   |      |>--->cmp
   add<-----+---<   |      |      |
    ||      |       |      |     jne
| %xmm0 | %rbp |  %rax | %rcx |

关键路径以||标出

B.

3.00

C.

1.00

D.

浮点乘法发射时间为1个周期，而且乘法涉及的操作数不受运算影响，故可以提前、连续地发射乘法运算；相反，浮点加法在关键路径上，故其3个周期的延迟成为影响CPE的主要因素。

5.14

long limit = length - 5;

for (i = 0; i < limit; i += 6)
{
    sum = sum + udata[i]     * vdata[i]     + udata[i + 1] * vdata[i + 1]
              + udata[i + 2] * vdata[i + 2] + udata[i + 3] * vdata[i + 3]
              + udata[i + 4] * vdata[i + 4] + udata[i + 5] * vdata[i + 5];
}

for (; i < length; i++)
{
    sum = sum + udata[i] * vdata[i];
}

// 以上只给出修改了的部分

A.

Haswell只有两个加载单元，每周期只能加载两个值，而内积的每个元素的计算都要加载u、v中各一个值，所以吞吐量界限为1.00CPE，不可能更小。

B.

浮点时kx1循环展开无论展开到多少都必须对每个元素顺序执行一次浮点加法，每次都要用到上一次的结果，而浮点加延迟为3个周期，故CPE不能改进到3.00以下。

第五次作业反馈

第一题：正确
第二题：正确
第三题：正确
第四题：正确

Homework 4

lkct-1600012920

3.68

A = 9
B = 5

3.69

A.

CNT = 7

B.

typedef struct {
    long idx;
    long x[4];
} a_struct;

3.70

A.

e1.p 0
e1.y 8
e2.x 0
e2.next 8

B.

16

C.

void proc(union ele *up)
{
    up->e2.x = *(up->e2.next->e1.p) - up->e2.next->e1.y;
}

第四次作业反馈

第一题：正确
第二题：正确
第三题：正确

Homework 3

lkct-1600012920

3.60

long loop(long x, int n)
{
    long result = 0;
    long mask;
    for (mask = 1; mask != 0; mask = mask << n)
    {
        result |= (mask & x);
    }
    return result;
}

// x in %rdi, n in %esi, result in %rax, mask in %rdx, %r8 is tmp(no variable)

3.62

typedef enum {MODE_A, MODE_B, MODE_C, MODE_D, MODE_E} mode_t;

long switch3(long *p1, long *p2, mode_t action)
{
    long result = 0;
    switch (action)
    {
    case MODE_A:
        result = *p2;
        *p2 = *p1;
        break;
    case MODE_B:
        *p1 = result = *p2 + *p1;
        break;
    case MODE_C:
        *p1 = 59;
        result = *p2;
        break;
    case MODE_D:
        *p1 = *p2;
        result = 27;
        break;
    case MODE_E:
        result = 27;
        break;
    default:
        result = 12;
        break;
    }
    return result;
}

3.64

A.

&A[i][j][k] = A + 8 * ((i * S + j) * T + k);

B.

R = 7; S = 5; T = 13;

第三次作业反馈

第一题：正确
第二题：正确
第三题：正确

hw2 correction

2.88

Format A		Format B
bit	val	bit	val
0 00000 101	5/131072	0 0000 0001	1/1024
1 11011 000	-4096	1 1110 1111	-248

Homework 2

lkct-1600012920

2.88

Format A		Format B
bit	val	bit	val
1 01110 001	-9/16	1 0110 0010	-9/16
0 10110 101	208	0 1110 1010	208
1 00111 110	-7/1024	1 0000 0111	-7/1024
0 00000 101	5/131072	0 0000 0000	0
1 11011 000	-4096	1 1111 0000	-inf
0 11000 100	768	0 1111 0000	inf

2.92

float_bits float_negate(float_bits f)
{
    unsigned exp = (f >> 23) & 0xFF;
    unsigned frac = f & 0x7FFFFF;
    if ((exp == 0xFF) && (frac != 0)) // if nan, return f
        return f;
    return (1 << 31) ^ f; // not nan, flip sign bit
}

// compared with float negate, returns the same except for -nan

2.96

int float_f2i(float_bits f)
{
    unsigned s = f >> 31;
    unsigned exp = (f >> 23) & 0xFF;
    unsigned M = (1 << 23) + (f & 0x7FFFFF);
    if (exp > 157)
    {
        /*
         * return 0x80000000 when
         * nan : exp == 255
         * inf : exp == 255
         * abs > 0x7FFFFFFF : E > 30, exp > 157
         */
        return 0x80000000;
    }
    if (exp < 127)
    {
        // exp < 127, E < 0, abs < 1
        return 0;
    }
    // M << 7 to highest possible bit, then rsh to correct position
    return (1 - s * 2) * ((M << 7) >> (157 - exp));
}

// compared with float to int cast, returns all the same

第二次作业反馈

第一题：有1个错误
第二题：正确
第三题：正确

Homework 1

lkct-1600012920

2.61

A.

!(~x) // if and only if x == 0xFFFFFFFF, ~x == 0, thus !(...) == 1

B.

!x // turn 0 to 1, other to 0

C.

!(~(x & 0xFF)) // same as A, but use x & 0xFF to rid higher bytes

D.

!(x & (0xFF << (8 * (sizeof(int) - 1)))) // same as B, but use x & 0xFF000000 to rid lower bytes. number of 0s is determined by length of int

2.62

int int_shifts_are_arithmetic()
{
    return ((-1) >> 1) < 0;
}

// -1 is 0xFFFF (number of Fs does not matter). if arithmetic, -1 >> 1 is still 0xFFFF, which is still -1 (< 0). if not, -1 >> 1 is 0x7FFF, which if INT_MAX (> 0)

2.65

int odd_ones(unsigned x) // 11 operators in total
{
    int a = x ^ (x >> 16);
    int b = a ^ (a >> 8);
    int c = b ^ (b >> 4);
    int d = c ^ (c >> 2);
    int e = d ^ (d >> 1);
    return e & 1;
}

// 0 ^ 0 == 1 ^ 1 is 0, 1 ^ 0 == 0 ^ 1 is 1, thus ^ doesn't change parity of number os 1s. therefore, parity of number of 1s in a's lower 16 bits equals that in x's 32 bit, and so on, parity of number of 1s in e's lower 1 bit equals that in d's lower 2 bit, also equals that in x's 32 bit. so e & 1 is the answer

第一次作业反馈

第一题：正确
第二题：正确
第三题：正确

Files

1600012920.md

Latest commit

History

1600012920.md

File metadata and controls

Homework 9

11.8

第九次作业反馈

11.9

Homework 8

Homework 7

8.9

8.18

8.24

10.6

10.9

第七次作业反馈

hw6 correction

7.6

Homework 6

7.6

7.7

7.12

A.

B.

7.13

A.

B.

C.

第六次作业反馈

Homework 5

4.47

A.

B.

4.56

5.13

A.

B.

C.

D.

5.14

A.

B.

第五次作业反馈

Homework 4

3.68

3.69

A.

B.

3.70

A.

B.

C.

第四次作业反馈

Homework 3

3.60

3.62

3.64

A.

B.

第三次作业反馈

hw2 correction

2.88

Homework 2

2.88

2.92

2.96

第二次作业反馈

Homework 1

2.61

A.

B.

C.

D.

2.62

2.65

第一次作业反馈