lkct-1600012920
// Add sigchld_hdl()
void sigchld_hdl(int sig)
{
int old_errno = errno;
while (Wait(NULL) > 0)
;
errno = old_errno;
}
// In main(), add sig handler binding before establishing listening
int main(int argc, char **argv)
{
... /* var defs */
... /* check command line args */
Signal(SIGCHLD, sigchld_hdl);
... /* open listen fd and wait to accept */
}
// In serve_dynamic(), remove explicit waiting
void serve_dynamic(int fd, char *filename, char *cgiargs)
{
... /* return first part of HTTP response */
... /* fork child and execve */
/* remove Wait(NULL) on the last line */
}
第一题:正确
第二题:正确
// In serve_static(), change ways to send body
void serve_static(int fd, char *filename, int filesize)
{
int srcfd;
char *srcp, filetype[MAXLINE], buf[MAXBUF];
... /* Send response headers to client */
/* Send response body to client */
srcfd = Open(filename, O_RDONLY, 0);
srcp = (char *)Malloc(filesize);
Rio_readn(srcfd, srcp, filesize);
Close(srcfd);
Rio_writen(fd, srcp, filesize);
Free(srcp);
}
lkct-1600012920
/**
* This program reads info about memory mapping of this precess from /proc/$pid/
* to determine accessible VM pages. Vars are declared to statically allocate to
* hold back dynamic allocation of new pages (maybe can't avoid completely).
*/
#include <stdio.h>
#include <unistd.h>
pid_t pid; // pid of current process
char filename[32]; // filename of maps file
FILE *maps; // file pointer of maps file
char buf[256]; // buffer of lines in maps
unsigned long start, end; // start and end of VMA
int main()
{
pid = getpid();
sprintf(filename, "/proc/%d/maps", pid);
maps = fopen(filename, "r"); // open maps file
while (fgets(buf, 255, maps) != NULL) // get a line
{
sscanf(buf, "%lx-%lx", &start, &end); // "start-end" at start of line
start >>= 12;
end >>= 12; // lower 12 bits are VPO
while (start != end)
{
printf("0x%lx\n", start++); // print each VPN
}
}
return 0;
}
/**
* This program trys to access every possible page number, and catches SIGSEGV
* to exclude those can't be accessed. Beacuse each section starts at relatively
* fixed address, a start array is used to save the boundaries of possible
* addresses to reduce the number of many must-fail trials. However, shared libs
* can start at a wide range of addresses, so it takes quite some time to find
* them. Also, stack grows downwards, so that section is printed reversed to
* avoid large space consumption by saving and reversing those.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <errno.h>
#include <setjmp.h>
typedef void handler_t(int);
volatile unsigned long pageNo; /* current page No. */
volatile int section; /* section in arrays below */
volatile int visited; /* currently visited one page successfully */
volatile unsigned long start[5] = {
/* start pageNo of sections */
0x400, /* text */
0x600, /* data and heap */
0x7f0000000, /* shared libs */
0x7ffffffff, /* stack and vvar+vdso */
0xffffffffff600 /* vdso */
};
volatile int dir[4] = {1, 1, 1, -1}; /* direction to enumerate */
volatile int vis[4] = {1, 2, 1, 2}; /* needed visit times, num of cont parts */
sigjmp_buf env; /* jmp buf for longjmp*/
/*
* unix_error - unix-style error routine
*/
void unix_error(char *msg)
{
fprintf(stdout, "%s: %s\n", msg, strerror(errno));
exit(1);
}
/*
* Signal - wrapper for the sigaction function
*/
handler_t *Signal(int signum, handler_t *handler)
{
struct sigaction action, old_action;
action.sa_handler = handler;
sigemptyset(&action.sa_mask); /* block sigs of type being handled */
action.sa_flags = SA_RESTART; /* restart syscalls if possible */
if (sigaction(signum, &action, &old_action) < 0)
unix_error("Signal error");
return (old_action.sa_handler);
}
/*
* sig_handler - catches SIGSEGV to continue enumeration
*/
void sig_handler(int sig)
{
pageNo += dir[section]; /* move on to next page */
if (visited) /* a continual part finished, move to next part */
{
visited = 0;
--vis[section];
if (vis[section] == 0) /* section finished, next section */
{
++section;
pageNo = start[section];
}
}
siglongjmp(env, 1); /* try again */
}
int main()
{
Signal(SIGSEGV, sig_handler);
section = 0; /* init */
visited = 0;
pageNo = start[section];
char c;
while (1)
{
sigsetjmp(env, 1);
c = *(char *)(pageNo << 12); /* try to access, can't use optimization */
printf("0x%lx\n", pageNo); /* access trail succeeded */
if (section >= 4) /* all finished */
break;
pageNo += dir[section]; /* try next */
visited = 1;
}
return 0;
}
The two solutions are coss-compared and gives the same set of page numbers.
But they differ in method and efficiency.
lkct-1600012920
Process pair | Concurrent? |
---|---|
AB | N |
AC | Y |
AD | Y |
BC | Y |
BD | Y |
CD | Y |
ACE
// complete code is presented because it's hard to describe changes simply
#include "csapp.h"
#define N 2
int main()
{
int status, i, sig;
pid_t pid;
char buf[1 << 6];
/* Parent creates N children */
for (i = 0; i < N; i++)
if ((pid = Fork()) == 0) /* Child */
*(char *)&main = 0; /* main in .text is read only */
/* Parent reaps N children in no particular order */
while ((pid = waitpid(-1, &status, 0)) > 0)
{
if (WIFEXITED(status))
printf("child %d terminated normally with exit status=%d\n",
pid, WEXITSTATUS(status));
else if (WIFSIGNALED(status))
{
sprintf(buf, "child %d terminated by signal %d",
pid, (sig = WTERMSIG(status)));
psignal(sig, buf);
}
else
printf("child %d terminated abnormally\n", pid);
}
/* The only normal termination is if there are no more children */
if (errno != ECHILD)
unix_error("waitpid error");
exit(0);
}
fd2 = 4
if (Fork() == 0) { /* Child */
int fd = Open("foo.txt", O_RDONLY, 0); /* should == 3 */
Dup2(fd, STDIN_FILENO);
Close(fd); /* fd3 closed, causes problem */
Execve("fstatcheck", argv, envp);
}
第一题:正确
第二题:正确
第三题:正确
第四题:正确
第五题:正确
Symbol | swap.o .symtab entry? | Symbol type | Module where defined | Section |
---|---|---|---|---|
count | Y | --- ? | swap.o | .bss |
lkct-1600012920
Symbol | swap.o .symtab entry? | Symbol type | Module where defined | Section |
---|---|---|---|---|
buf | Y | extern | m.o | .data |
bufp0 | Y | global | swap.o | .data |
bufp1 | Y | local | swap.o | .bss |
swap | Y | global | swap.o | .text |
temp | N | --- | --- | --- |
incr | Y | local | swap.o | .text |
count | Y | local | swap.o | .bss |
// only changed definition of variable x in bar5.c
static double x;
0xa
0x22
libc.a: 1579
libm.a: 471
没有不同
linux-vdso.so.1
libc.so.6
ld-linux-x86-64.so.2
第一题:有一处错误
第二题:正确
第三题:正确
第四题:正确
lkct-1600012920
/* Bubble sort: Pointer version */
void bubble_p(long *data, long count)
{
long i;
long *p;
for (count--; count > 0; count--)
{
p = data;
for (i = 0; i < count; i++, p++)
{
if (*(p + 1) < *p)
{
long t = *(p + 1);
*(p + 1) = *p;
*p = t;
}
}
}
}
# Execution begins at address 0
.pos 0
init:
irmovl Stack, %esp # Set up stack pointer
irmovl Stack, %ebp # Set up base pointer
call main # Execute main program
halt # Terminate program
.align 4
# data block
src:
.long 0x00a
.long 0x0b0
.long 0xc00
.long 0x111
.long 0x222
.long 0x333
main:
pushl %ebp
rrmovl %esp, %ebp
irmovl 6, %eax
pushl %eax # push arg 2: count = 6
irmovl src, %eax
pushl %eax # push arg 1: data = src
call bubble_p # call bubble_p(data, count)
rrmovl %ebp, %esp
popl %ebp
ret
# void bubble_p(long *data, long count)
bubble_p:
pushl %ebp
pushl %esi # save esi (callee saved)
pushl %edi # save edi (callee saved)
pushl %ebx # save ebx (callee saved)
rrmovl %esp, %ebp
mrmovl 20(%ebp), %edi # edi = data
mrmovl 24(%ebp), %esi # esi = count
jmp Test # goto Test:
Loop0:
rrmovl %edi, %eax # eax = p = data
xorl %edx, %edx # edx = i = 0
Loop1:
mrmovl 4(%eax), %ecx # ecx = *(p + 1)
mrmovl (%eax), %ebx # ebx = *p
subl %ebx, %ecx # ecx - ebx
jge Nops # !(*(p + 1) < *p), goto Nops:
mrmovl 4(%eax), %ecx # ecx = *(p + 1)
rmmovl %ebx, 4(%eax) # *(p + 1) = ebx
rmmovl %ecx, (%eax) # *p = ecx
Nops:
irmovl $1, %ecx
addl %ecx, %edx # i++
irmovl $4, %ecx
addl %ecx, %eax # p++
rrmovl %edx, %ecx
subl %esi, %ecx # i - count
jl Loop1 # i < count, foro Loop1:
Test:
irmovl $-1, %ecx
addl %ecx, %esi # count--
jg Loop0 # goto Loop0:
rrmovl %ebp, %esp
popl %ebx # restore regs
popl %edi
popl %esi
popl %ebp
ret
# The stack starts here and grows to lower addresses
.pos 0x100
Stack:
Here only changes of pipe-btfnt.hcl are presented
# change PC selection according to prediction and M_Cnd
## What address should instruction be fetched at
int f_pc = [
# Mispredicted branch. Fetch at incremented PC
M_icode == IJXX && (M_ifun != UNCOND && M_valE < M_valA && !M_Cnd) : M_valA;
# Mispredicted branch. Fetch at given address
M_icode == IJXX && (M_ifun != UNCOND && M_valE >= M_valA && M_Cnd) : M_valE;
# Completion of RET instruction.
W_icode == IRET : W_valM;
# Default: Use predicted value of PC
1 : F_predPC;
];
# predict PC using backward-taken-forward-not-taken
# Predict next value of PC
int f_predPC = [
# BBTFNT: This is where you'll change the branch prediction rule
f_icode in { ICALL }
|| f_icode == IJXX && (f_ifun == UNCOND || f_ifun != UNCOND && f_valC < f_valP) : f_valC;
1 : f_valP;
];
# get E_valC into e_valE then M_valE for f_pc
## Select input A to ALU
int aluA = [
E_icode in { IRRMOVL, IOPL } : E_valA;
E_icode in { IIRMOVL, IRMMOVL, IMRMOVL, IJXX } : E_valC;
E_icode in { ICALL, IPUSHL } : -4;
E_icode in { IRET, IPOPL } : 4;
# Other instructions don't need ALU
];
## Select input B to ALU
int aluB = [
E_icode in { IRMMOVL, IMRMOVL, IOPL, ICALL,
IPUSHL, IRET, IPOPL } : E_valB;
E_icode in { IRRMOVL, IIRMOVL, IJXX } : 0;
# Other instructions don't need ALU
];
# condition of misprediction changed according to prediction
bool D_bubble =
# Mispredicted branch
E_icode == IJXX && (E_ifun != UNCOND && (E_valC < E_valA && !e_Cnd || E_valC >= E_valA && e_Cnd)) ||
# BBTFNT: This condition will change
# Stalling at fetch while ret passes through pipeline
# but not condition for a load/use hazard
!(E_icode in { IMRMOVL, IPOPL } && E_dstM in { d_srcA, d_srcB }) &&
IRET in { D_icode, E_icode, M_icode };
bool E_bubble =
# Mispredicted branch
E_icode == IJXX && (E_ifun != UNCOND && (E_valC < E_valA && !e_Cnd || E_valC >= E_valA && e_Cnd)) ||
# BBTFNT: This condition will change
# Conditions for a load/use hazard
E_icode in { IMRMOVL, IPOPL } &&
E_dstM in { d_srcA, d_srcB};
| %rax | %rbp | %rbx | %rcx | %xmm0 | %xmm1 |
| | | | |
| | | | | ----
| | | |>------+----------->
| |>-----+------+-------+----------->load vmovsd 0(%rbp, %rcx, 8), %xmm1
| | | | | |<--<
| | | | | | ---- ------
| | | |>------+-------+--->
|>-----+------+------+-------+-------+--->load
| | | | | | >-->|
| | | | | | ---- | vmulsd (%rax, %rcx, 8), %xmm1, %xmm1
| | | | | ->--> <--<-
| | | | | mul
| | | | | |<--<
| | | | | | ---- ------
| | | | | |>-->
| | | | ->------+--->add vaddsd %xmm1, %xmm0, %xmm0
| | | | |<------+---<
| | | | | | ---- ------
| | | ->------+-------+--->
| | | | | add addq $1, %rcx
| | | |<------+-------+---<
| | | | | | ---- ------
| | | |>------+-------+--->
| | |>-----+-------+-------+--->cmp cmpq %rbx, %rcx
| | | | | | >-->|
| | | | | | ---- | ------
| | | | | | <--<-
| | | | | | jne jne .L15
| | | | | |
| | | | | | ----
| | | | | |
| %rax | %rbp | %rbx | %rcx | %xmm0 | %xmm1 |
| %xmm0 | %rbp | %rax | %rcx | %rbx |
|| | | | |
|| | |<---+-----<| |
|| |>load | | |
|| | | | | |
|| | | |<-+-----<| |
|| | |load<| | |
|| | | | | | |
|| | >mul< | add |
|| | | | |>--->cmp
add<-----+---< | | |
|| | | | jne
| %xmm0 | %rbp | %rax | %rcx |
关键路径以||标出
3.00
1.00
浮点乘法发射时间为1个周期,而且乘法涉及的操作数不受运算影响,故可以提前、连续地发射乘法运算;相反,浮点加法在关键路径上,故其3个周期的延迟成为影响CPE的主要因素。
long limit = length - 5;
for (i = 0; i < limit; i += 6)
{
sum = sum + udata[i] * vdata[i] + udata[i + 1] * vdata[i + 1]
+ udata[i + 2] * vdata[i + 2] + udata[i + 3] * vdata[i + 3]
+ udata[i + 4] * vdata[i + 4] + udata[i + 5] * vdata[i + 5];
}
for (; i < length; i++)
{
sum = sum + udata[i] * vdata[i];
}
// 以上只给出修改了的部分
Haswell只有两个加载单元,每周期只能加载两个值,而内积的每个元素的计算都要加载u、v中各一个值,所以吞吐量界限为1.00CPE,不可能更小。
浮点时kx1循环展开无论展开到多少都必须对每个元素顺序执行一次浮点加法,每次都要用到上一次的结果,而浮点加延迟为3个周期,故CPE不能改进到3.00以下。
第一题:正确
第二题:正确
第三题:正确
第四题:正确
lkct-1600012920
A = 9
B = 5
CNT = 7
typedef struct {
long idx;
long x[4];
} a_struct;
e1.p 0
e1.y 8
e2.x 0
e2.next 8
16
void proc(union ele *up)
{
up->e2.x = *(up->e2.next->e1.p) - up->e2.next->e1.y;
}
第一题:正确
第二题:正确
第三题:正确
lkct-1600012920
long loop(long x, int n)
{
long result = 0;
long mask;
for (mask = 1; mask != 0; mask = mask << n)
{
result |= (mask & x);
}
return result;
}
// x in %rdi, n in %esi, result in %rax, mask in %rdx, %r8 is tmp(no variable)
typedef enum {MODE_A, MODE_B, MODE_C, MODE_D, MODE_E} mode_t;
long switch3(long *p1, long *p2, mode_t action)
{
long result = 0;
switch (action)
{
case MODE_A:
result = *p2;
*p2 = *p1;
break;
case MODE_B:
*p1 = result = *p2 + *p1;
break;
case MODE_C:
*p1 = 59;
result = *p2;
break;
case MODE_D:
*p1 = *p2;
result = 27;
break;
case MODE_E:
result = 27;
break;
default:
result = 12;
break;
}
return result;
}
&A[i][j][k] = A + 8 * ((i * S + j) * T + k);
R = 7; S = 5; T = 13;
第一题:正确
第二题:正确
第三题:正确
Format A | Format B | ||
---|---|---|---|
bit | val | bit | val |
0 00000 101 | 5/131072 | 0 0000 0001 | 1/1024 |
1 11011 000 | -4096 | 1 1110 1111 | -248 |
lkct-1600012920
Format A | Format B | ||
---|---|---|---|
bit | val | bit | val |
1 01110 001 | -9/16 | 1 0110 0010 | -9/16 |
0 10110 101 | 208 | 0 1110 1010 | 208 |
1 00111 110 | -7/1024 | 1 0000 0111 | -7/1024 |
0 00000 101 | 5/131072 | 0 0000 0000 | 0 |
1 11011 000 | -4096 | 1 1111 0000 | -inf |
0 11000 100 | 768 | 0 1111 0000 | inf |
float_bits float_negate(float_bits f)
{
unsigned exp = (f >> 23) & 0xFF;
unsigned frac = f & 0x7FFFFF;
if ((exp == 0xFF) && (frac != 0)) // if nan, return f
return f;
return (1 << 31) ^ f; // not nan, flip sign bit
}
// compared with float negate, returns the same except for -nan
int float_f2i(float_bits f)
{
unsigned s = f >> 31;
unsigned exp = (f >> 23) & 0xFF;
unsigned M = (1 << 23) + (f & 0x7FFFFF);
if (exp > 157)
{
/*
* return 0x80000000 when
* nan : exp == 255
* inf : exp == 255
* abs > 0x7FFFFFFF : E > 30, exp > 157
*/
return 0x80000000;
}
if (exp < 127)
{
// exp < 127, E < 0, abs < 1
return 0;
}
// M << 7 to highest possible bit, then rsh to correct position
return (1 - s * 2) * ((M << 7) >> (157 - exp));
}
// compared with float to int cast, returns all the same
第一题:有1个错误
第二题:正确
第三题:正确
lkct-1600012920
!(~x)
// if and only if x == 0xFFFFFFFF, ~x == 0, thus !(...) == 1
!x
// turn 0 to 1, other to 0
!(~(x & 0xFF))
// same as A, but use x & 0xFF to rid higher bytes
!(x & (0xFF << (8 * (sizeof(int) - 1))))
// same as B, but use x & 0xFF000000 to rid lower bytes. number of 0s is determined by length of int
int int_shifts_are_arithmetic()
{
return ((-1) >> 1) < 0;
}
// -1 is 0xFFFF (number of Fs does not matter). if arithmetic, -1 >> 1 is still 0xFFFF, which is still -1 (< 0). if not, -1 >> 1 is 0x7FFF, which if INT_MAX (> 0)
int odd_ones(unsigned x) // 11 operators in total
{
int a = x ^ (x >> 16);
int b = a ^ (a >> 8);
int c = b ^ (b >> 4);
int d = c ^ (c >> 2);
int e = d ^ (d >> 1);
return e & 1;
}
// 0 ^ 0 == 1 ^ 1 is 0, 1 ^ 0 == 0 ^ 1 is 1, thus ^ doesn't change parity of number os 1s. therefore, parity of number of 1s in a's lower 16 bits equals that in x's 32 bit, and so on, parity of number of 1s in e's lower 1 bit equals that in d's lower 2 bit, also equals that in x's 32 bit. so e & 1 is the answer
第一题:正确
第二题:正确
第三题:正确