From cbff2a5951bcbb560802dbe9f4c2f55a0ca2b16d Mon Sep 17 00:00:00 2001 From: Andrew Taylor Date: Fri, 26 Jul 2019 09:28:13 +1000 Subject: [PATCH] more explanations --- compiler_explanations.py | 17 ++-- docs/assign_function_to_int.md | 13 ++++ docs/assign_to_array.md | 27 +++++++ docs/eof_byte.md | 60 +++++++++++++++ docs/function-definition-not-allowed-here.md | 33 ++++++++ docs/function-variable-clash.md | 32 ++++++++ docs/missing_newline.md | 39 ++++++++++ docs/stack_use_after_return.md | 43 +++++++++++ docs/zero_byte.md | 81 ++++++++++++++++++++ drive_gdb.py | 79 +++++++++++-------- watch_valgrind.py | 6 +- 11 files changed, 388 insertions(+), 42 deletions(-) create mode 100644 docs/assign_function_to_int.md create mode 100644 docs/assign_to_array.md create mode 100644 docs/eof_byte.md create mode 100644 docs/function-definition-not-allowed-here.md create mode 100644 docs/function-variable-clash.md create mode 100644 docs/missing_newline.md create mode 100644 docs/stack_use_after_return.md create mode 100644 docs/zero_byte.md diff --git a/compiler_explanations.py b/compiler_explanations.py index a5ab998..ec78508 100644 --- a/compiler_explanations.py +++ b/compiler_explanations.py @@ -2,8 +2,7 @@ import copy, re, sys import colors - -DEFAULT_EXPLANATION_URL = "https://comp1511unsw.github.io/dcc/" +from drive_gdb import explanation_url def get_explanation(message, colorize_output): for e in explanations: @@ -55,7 +54,7 @@ def __init__(self, label=None, precondition=None, regex=None, explanation=None, def get(self, message, colorize_output): explanation = self.get_short_explanation(message, colorize_output) if explanation and (self.long_explanation or self.long_explanation_url): - url = self.long_explanation_url or DEFAULT_EXPLANATION_URL + self.label + '.html' + url = self.long_explanation_url or explanation_url(self.label) explanation += "\n See more information here: " + url return explanation @@ -479,8 +478,8 @@ def get_short_explanation(self, message, colorize_output): if (argc == 1) return 42; else if (argc == 1) - return 43; - else + return 43; + else return 44; } """, @@ -502,7 +501,7 @@ def get_short_explanation(self, message, colorize_output): if (argc == 1) return 42; else - return 42; + return 42; } """, ), @@ -522,7 +521,7 @@ def get_short_explanation(self, message, colorize_output): if (argc > 1 || argc < 3) return 42; else - return 43; + return 43; } """, ), @@ -542,7 +541,7 @@ def get_short_explanation(self, message, colorize_output): if (argc > 1 && argc < 1) return 42; else - return 43; + return 43; } """, ), @@ -562,7 +561,7 @@ def get_short_explanation(self, message, colorize_output): if (argc > 1 ||argc > 1) return 42; else - return 43; + return 43; } """, ), diff --git a/docs/assign_function_to_int.md b/docs/assign_function_to_int.md new file mode 100644 index 0000000..5d807de --- /dev/null +++ b/docs/assign_function_to_int.md @@ -0,0 +1,13 @@ +You will get this error if for example, you write code like this:: + +```c +int a; +a = square; +``` + +when you wanted to do this: + +```c +int a; +a = square(5); +``` diff --git a/docs/assign_to_array.md b/docs/assign_to_array.md new file mode 100644 index 0000000..6d900e1 --- /dev/null +++ b/docs/assign_to_array.md @@ -0,0 +1,27 @@ +You are not permitted to assign arrays in C. + +You can NOT do this: + +```c +#define ARRAY_SIZE 10 + +int array1[ARRAY_SIZE] = {0,1,2,3,4,5,6,7,8,9}; +int array2[ARRAY_SIZE]; + +array2 = array1; +``` + +You can instead use a loop to copy each array element individually. + +```c + +#define ARRAY_SIZE 10 + +int array1[ARRAY_SIZE] = {0,1,2,3,4,5,6,7,8,9}; +int array2[ARRAY_SIZE]; + + +for (int i = 0; i < 10; i++) { + array2[ARRAY_SIZE] = array1[ARRAY_SIZE]; +} +``` diff --git a/docs/eof_byte.md b/docs/eof_byte.md new file mode 100644 index 0000000..c23cdc4 --- /dev/null +++ b/docs/eof_byte.md @@ -0,0 +1,60 @@ +Accidentally printing the special **EOF** value returned by the functions _getchar_, _getc_ and _fgetc_. + +For example, this program prints the **EOF** value before the loop exits: + +```c +#include + +int main(void) { + int c = 0; + while (c != EOF) { + int c = getchar(); + putchar(c); + } + return 0; +} +``` + +The special **EOF** value typically is defined to be `-1` (in ``) +and when printed is invisible. So the program appears to work. + +```console +$ dcc cat.c +$ echo cat | ./a.out +cat +$ +``` + +But the program will fail automated testing because it is printing an extra byte. + +This is a program that output the same bytes as the above example. + +```c +#include + +int main(void) { + + putchar('c'); + putchar('a'); + putchar('t'); + putchar('\n'); + putchar(EOF); + + return 0; +} +``` + +This is a program which doesn't print the EOF value: + +```c +#include + +int main(void) { + int c = getchar(); + while (c != EOF) { + putchar(c); + c = getchar(); + } + return 0; +} +``` diff --git a/docs/function-definition-not-allowed-here.md b/docs/function-definition-not-allowed-here.md new file mode 100644 index 0000000..c3adcd5 --- /dev/null +++ b/docs/function-definition-not-allowed-here.md @@ -0,0 +1,33 @@ + +If you forget a curly bracket, e.g. +```c +int sum(int x, int y) { + if (x > y) { + return x + y; + } else { + return x - y; + // <-- missing closing curly bracket +} + +int f(int x) { + return sum(x, x); +} +``` + +The compiler will give an error when the next function definition starts. + +You can fix by adding the missing curly bracket (brace): + +```c +int sum(int x, int y) { + if (x > y) { + return x + y; + } else { + return x - y; + } +} + +int f(int x) { + return sum(x, x); +} +``` diff --git a/docs/function-variable-clash.md b/docs/function-variable-clash.md new file mode 100644 index 0000000..fb5ef9c --- /dev/null +++ b/docs/function-variable-clash.md @@ -0,0 +1,32 @@ +It is legal to have local variable with the same name as a function, but you can't then call the function +because the local variable declaration hides the function. + +For example: + +```c +int sum(int x, int y) { + return x + y; +} + +int f(int sum) { + int square = sum * sum; + + // error sum because sum is a variable + return sum(square, square); +} +``` + +You can fix the name clash by changing the name of the variable: + +```c +int sum(int x, int y) { + return x + y; +} + +int f(int a) { + int square = a * a; + + // error sum because sum is a variable + return sum(square, square); +} +``` diff --git a/docs/missing_newline.md b/docs/missing_newline.md new file mode 100644 index 0000000..a01a3ca --- /dev/null +++ b/docs/missing_newline.md @@ -0,0 +1,39 @@ +Forgetting to print a newline is a common mistake. + +For example, this program doesn't print a newline. + +```c +int main(void) { + int answer = 7 * 6; + printf("%d", answer); +} + +``` + +So when its compiled and run you'll see something like this: + +```console +$ dcc answer.c +$ ./a.out +42$ +``` + +If you add a **\n** to the _printf_ like this: + +```c +int main(void) { + int answer = 7 * 6; + printf("%d\n", answer); +} +``` + +It will fix the problem: + + +```console +$ dcc answer.c +$ ./a.out +42 +$ +``` + diff --git a/docs/stack_use_after_return.md b/docs/stack_use_after_return.md new file mode 100644 index 0000000..668b668 --- /dev/null +++ b/docs/stack_use_after_return.md @@ -0,0 +1,43 @@ +A C function can not return a pointer to a local variable. + +A local variable variable does not exist after the function returns. + +For example, you can NOT do this: + +```c +struct node { + struct node *next; + int data; +}; + +struct node *create_node(int i) { + struct node n; + + n.data = i; + n.next = NULL; + + return &n; +} +``` + +A function can return a pointer provided by malloc: + +For example, you can do this: + + +```c +struct node { + struct node *next; + int data; +}; + +struct node *create_node(int i) { + struct node *p; + + p = malloc(sizeof (struct node)); + p->data = i; + p->next = NULL; + + return p; +} +``` diff --git a/docs/zero_byte.md b/docs/zero_byte.md new file mode 100644 index 0000000..ffb6a00 --- /dev/null +++ b/docs/zero_byte.md @@ -0,0 +1,81 @@ +Accidentally printing a zero byte is easy to do in a C program but hard to debug. + +For example, this program accidentally prints a zero byte when it prints `string[3]`: + +```c +#include + +int main(void) { + char *string = "dog"; + + for (int i = 0; i < 4; i++) { + putchar(string[i]); + } + + putchar('\n'); + + return 0; +} +``` + +And because printing of a zero byte is invisible, the program looks like it works: + +```console +$ dcc dog.c +$ ./a.out +dog +$ +``` + + + +This is an equivalent program: + +```c +#include + +int main(void) { + + putchar('d'); + putchar('o'); + putchar('g'); + putchar('\0'); // or putchar(0); + putchar('\n'); + + return 0; +} +``` + +This is a program which doesn't print the zero byte: + +```c +#include + +int main(void) { + char *string = "dog"; + + for (int i = 0; string[i] != '\0'; i++) { + putchar(string[i]); + } + + putchar('\n'); + + return 0; +} +``` + +It is equivalent to this program: + +```c +#include + +int main(void) { + + putchar('d'); + putchar('o'); + putchar('g'); + putchar('\n'); + + return 0; +} +``` diff --git a/drive_gdb.py b/drive_gdb.py index eaace3f..0d9e470 100644 --- a/drive_gdb.py +++ b/drive_gdb.py @@ -1,8 +1,6 @@ import collections, os, platform, re, sys, signal, traceback -from explain_output_difference import explain_output_difference +from explain_output_difference import explain_output_difference, explanation_url import colors - -DEFAULT_EXPLANATION_URL = "https://comp1511unsw.github.io/dcc/" # # Code below is executed from gdb. @@ -85,11 +83,11 @@ def explain_error(output_stream, color): print("runtime error", color("uninitialized variable used", 'red'), file=output_stream) if loc: - print(explain_location(loc, color), file=output_stream) + print(explain_location(loc, color), end='', file=output_stream) print(relevant_variables(loc.surrounding_source(color, clean=True), color), end='', file=output_stream) if (len(stack) > 1): - print(color('Function Call Traceback', 'cyan'), file=output_stream) + print(color('\nFunction Call Traceback', 'cyan'), file=output_stream) for (frame, caller) in zip(stack, stack[1:]): print(frame.function_call(color), 'called at line', color(caller.line_number, 'red'), 'of', color(caller.filename, 'red'), file=output_stream) print(stack[-1].function_call(color), file=output_stream) @@ -126,7 +124,7 @@ def explain_ubsan_error(loc, output_stream, color): source = '' if loc: - source = loc.source_line(clean=True) + source = clean_c_source(loc.source_line()) debug_print(3, 'source', source) explanation = None @@ -244,7 +242,7 @@ def explain_asan_error(loc, output_stream, color): print(prefix, """You have used a pointer to a local variable that no longer exists. When a function returns its local variables are destroyed. """, file=output_stream) - print('For more information see:', DEFAULT_EXPLANATION_URL + '/stack_use_after_return.html', file=output_stream) + print('For more information see:', explanation_url('stack_use_after_return'), file=output_stream) elif "use after" in report: print(prefix, "access to memory that has already been freed.\n", file=output_stream) elif "double free" in report: @@ -290,19 +288,45 @@ def short_description(self, color): return self.function_call(color) + ' in ' + self.location(color) def long_description(self, color): - return 'in ' + self.short_description(color) + ':\n\n' + self.surrounding_source(color, markMiddle=True) + where = 'in ' + self.short_description(color) + source = self.surrounding_source(color, markMiddle=True) + if source: + where += ':\n\n' + source + return where + + def source_line(self): + return fileline(self.filename, self.line_number) - def source_line(self, clean=False): - return fileline(self.filename, self.line_number, clean) def surrounding_source(self, color, radius=2, clean=False, markMiddle=False): - source = '' - for offset in range(-radius, radius+1): - line = fileline(self.filename, self.line_number+offset, clean=clean) - if markMiddle and offset == 0: + lines = [] + marked_line = None + for offset in range(-3*radius, 2*radius): + line = fileline(self.filename, self.line_number+offset) + + if re.match(r'^\S', line) and offset < 0: + lines = [] + + if markMiddle and offset == 0 and line : + marked_line = line line = color(re.sub(r'^ {0,3}', '-->', line), 'red') - source += line - return source + + lines.append(clean_c_source(line) if clean else line) + + if re.match(r'^\S', line) and offset > 0: + break + + while lines and re.match(r'^\s*$', lines[0]): + lines.pop(0) + + while lines and re.match(r'^\s*$', lines[-1]): + lines.pop() + + if len(lines) == 1 and not marked_line: + return '' + + return ''.join(lines).rstrip('\n') + '\n' + def is_user_location(self): if not re.match(r'^[a-zA-Z]', self.function): return False @@ -311,25 +335,18 @@ def is_user_location(self): return True -def fileline(filename, line_number, clean=False): +def fileline(filename, line_number): line_number = int(line_number) - if filename in source: - if line_number < 0 or line_number > len(source[filename]): - return '' - if clean: - return clean_c_source(source[filename][line_number - 1]) - return source[filename][line_number - 1] try: + if filename in source: + return source[filename][line_number - 1] with open(filename, encoding='utf-8', errors='replace') as f: source[filename] = f.readlines() for line in source[filename]: m = re.match(r'^\s*#\s*define\s*(\w+)\s*(.*\S)', line) if m: hash_define[filename][m.group(1)] = (line.rstrip(), m.group(2)) - line = source[filename][line_number - 1].rstrip() + "\n" - if clean: - line = clean_c_source(line) - return line + return source[filename][line_number - 1].rstrip() + "\n" except IOError: debug_print(2, "fileline error can not open: %s" % (filename)) except IndexError: @@ -378,7 +395,8 @@ def parse_gdb_stack_frame(line): filename.startswith("/usr/") or filename.startswith("../sysdeps/") or filename.endswith("libioP.h") or - filename.endswith("iofclose.c") + filename.endswith("iofclose.c") or + filename.startswith("<") ): m = None if m: @@ -432,7 +450,7 @@ def relevant_variables(c_source, color, arrays=[]): except RuntimeError as e: debug_print(2, 'print_variables_expressions: RuntimeError', e) if explanation: - prefix = color('Values when execution stopped:', 'cyan') + prefix = color('\nValues when execution stopped:', 'cyan') explanation = prefix + '\n\n' + explanation return explanation @@ -579,12 +597,13 @@ def explain_location(loc, color): if not isinstance(loc, Location): return "Execution stopped at '%s'" % (loc) else: - return 'Execution stopped here ' + loc.long_description(color) + return 'Execution stopped ' + loc.long_description(color) def debug_print(level, *args, **kwargs): if debug_level >= level: kwargs['file'] = sys.stderr print(*args, **kwargs) + if __name__ == '__main__': drive_gdb() diff --git a/watch_valgrind.py b/watch_valgrind.py index 9edd2e2..810d611 100644 --- a/watch_valgrind.py +++ b/watch_valgrind.py @@ -1,6 +1,6 @@ -import os, re, sys, signal, subprocess +import os, re, sys, signal from start_gdb import start_gdb -from drive_gdb import DEFAULT_EXPLANATION_URL +from drive_gdb import explanation_url import colors # valgrind is being used - we have been invoked via the binary to watch for valgrind errors @@ -41,7 +41,7 @@ def watch_valgrind(): You have used a pointer to a local variable that no longer exists. When a function returns its local variables are destroyed. -For more information see: {DEFAULT_EXPLANATION_URL}/stack_use_after_return.html' +For more information see: {explanation_url("stack_use_after_return")}' """ os.environ['DCC_VALGRIND_ERROR'] = error print('\n' + error, file=sys.stderr)