Skip to content

Commit f1fd3b4

Browse files
committed
rewrite code
Finding the match now uses a one pass approach rather than two: the match is determined while the try is being built. Also, replaced getword with the improved version from 6-1.
1 parent 32d4e0d commit f1fd3b4

File tree

1 file changed

+118
-170
lines changed

1 file changed

+118
-170
lines changed

chapter06/6-2.c

Lines changed: 118 additions & 170 deletions
Original file line numberDiff line numberDiff line change
@@ -4,47 +4,46 @@
44
* 6 characters, but different somewhere thereafter. Don't count words within
55
* string and comments. Make 6 a parameter that can be set from the command
66
* line.
7-
* Note: getword comment detection could be improved.
7+
*
88
* By Faisal Saadatmand
99
*/
1010

11-
#include <stdio.h>
1211
#include <ctype.h>
12+
#include <stdio.h>
13+
#include <stdlib.h> /* for malloc && atoi */
1314
#include <string.h>
14-
#include <stdlib.h> /* for malloc && atoi */
1515

1616
#define MAXWORD 100
1717
#define BUFSIZE 100
18-
#define NKEYS (sizeof keytab / sizeof keytab[0])
19-
#define NSYMBOLS (sizeof symbol / sizeof symbol[0])
20-
21-
/* functions */
22-
struct tnode *addtree(struct tnode *, char *);
23-
void treeprint(struct tnode *);
24-
int getword(char *, int);
25-
struct tnode *talloc(void); /* alocate memory to new tree node */
26-
char *strDup(char *); /* copy string into safe place */
27-
struct key *binsearch(char *, struct key *, int);
28-
void findVariables(struct tnode *, int);
29-
struct tnode *freetree(struct tnode *);
30-
31-
/* globals */
32-
int buf[BUFSIZE]; /* buffer from ungetch */
33-
int bufp = 0; /* next free position in buf */
34-
35-
struct tnode { /* the tree node: */
36-
char *word; /* points to the text */
37-
int count; /* number of occurrences */
38-
int match; /* matching word */
39-
struct tnode *left; /* left child */
40-
struct tnode *right; /* right child */
18+
#define NKEYS (int) (sizeof keytab / sizeof keytab[0])
19+
20+
/* types */
21+
struct tnode { /* the tree node: */
22+
char *word; /* points to the text */
23+
int match; /* number of occurrences */
24+
struct tnode *left; /* left child */
25+
struct tnode *right; /* right child */
4126
};
4227

4328
struct key {
4429
char *word;
4530
int count;
4631
};
4732

33+
/* functions */
34+
int getword(char *, int);
35+
struct key *binsearch(char *, struct key *, int);
36+
struct tnode *addtree(struct tnode *, char *, size_t n);
37+
struct tnode *talloc(void); /* alocate memory to new tree node */
38+
char *strDup(char *); /* copy string into safe place */
39+
void checkmatch(char *, struct tnode *, size_t, int *);
40+
void printtree(struct tnode *);
41+
void freetree(struct tnode *);
42+
43+
/* globals */
44+
int buf[BUFSIZE]; /* buffer from ungetch */
45+
int bufp = 0; /* next free position in buf */
46+
4847
struct key keytab[] ={
4948
{ "auto", 0 },
5049
{ "break", 0 },
@@ -80,141 +79,71 @@ struct key keytab[] ={
8079
{ "while", 0 },
8180
};
8281

83-
struct key symbol[] = { /* array is sorted for binary search */
84-
{ "\"", 0 },
85-
{ "#", 0 },
86-
{ "*", 0 },
87-
{ "/", 0 },
88-
{ "\\", 0 },
89-
{ "_", 0 },
90-
};
91-
9282
/* addtree: add a node with w, at or below p */
93-
struct tnode *addtree(struct tnode *p, char *w)
83+
struct tnode *addtree(struct tnode *p, char *w, size_t n)
9484
{
9585
int cond;
86+
static int found;
9687

97-
if (p == NULL) { /* a new word has arrived */
98-
p = talloc(); /* make a new node */
99-
p->word = strDup(w); /* copy data to it */
100-
p->count = 1;
101-
p->match = 0; /* initialize match */
88+
if (!p) { /* a new word has arrived */
89+
p = talloc(); /* make a new node */
90+
p->word = strDup(w); /* copy data to it */
91+
p->match = *(&found); /* p->match = value pointed to by &found */
10292
p->left = p->right = NULL;
103-
} else if ((cond = strcmp(w, p->word)) == 0)
104-
p->count++; /* repeated word */
105-
else if (cond < 0) /* less thant into left subtree */
106-
p->left = addtree(p->left, w);
107-
else
108-
p->right = addtree(p->right, w);
109-
110-
return p;
111-
}
112-
113-
/* treeprint: in-order print of tree p */
114-
void treeprint(struct tnode *p)
115-
{
116-
if (p != NULL) {
117-
treeprint(p->left);
118-
if (p->match > 0)
119-
printf("%4d %4d %s\n", p->count, p->match, p->word);
120-
treeprint(p->right);
93+
} else if ((cond = strcmp(w, p->word)) < 0) { /* less than ? */
94+
checkmatch(w, p, n, &found);
95+
p->left = addtree(p->left, w, n); /* go left */
96+
} else if (cond > 0) { /* greater than */
97+
checkmatch(w, p, n, &found);
98+
p->right = addtree(p->right, w, n); /* go right */
12199
}
100+
found = 0; /* reset */
101+
return p;
122102
}
123103

124-
int getch(void) /* get a (possibly pushed back) character */
104+
/* checkmatch: set current node's flag variable and the found variable to 1, if
105+
* w matches a word in the tree */
106+
void checkmatch(char *w, struct tnode *p, size_t n, int *found)
125107
{
126-
return (bufp > 0) ? buf[--bufp] : getchar();
108+
if (!strncmp(w, p->word, n)) /* is w a match? */
109+
p->match = *found = 1; /* mark the current and the next nodes */
127110
}
128111

129-
void ungetch(int c) /* push character back on input */
112+
/* printree: in-order print of tree p */
113+
void printree(struct tnode *p)
130114
{
131-
if (bufp >= BUFSIZE)
132-
printf("ungetch: too many characters\n");
133-
else
134-
buf[bufp++] = c;
135-
}
136-
137-
/* getword: get next word or character from input */
138-
int getword(char *word, int lim)
139-
{
140-
int c, getch(void);
141-
void ungetch(int);
142-
char *w = word;
143-
struct key *p;
144-
145-
while (isspace(c = getch()))
146-
;
147-
148-
if (c != EOF) {
149-
*w++ = c;
150-
*w = '\0';
151-
} else
152-
return c;
153-
154-
if (!isalpha(c) && (p = binsearch(word, symbol, NSYMBOLS)) == NULL)
155-
return c;
156-
157-
switch (c) {
158-
case '\\': /* handle escape sequences */
159-
c = getch();
160-
break;
161-
case '\"': /* skip words inside string constant */
162-
while ((c = getch()) != '\"')
163-
if (c == EOF)
164-
return c;
165-
break;
166-
case '#': /* skip preprocessor control lines */
167-
while ((c = getch()) != '\n')
168-
;
169-
ungetch(c);
170-
break;
171-
case '/': /* skip words inside C comments */
172-
if ((c = getch()) == '*') {
173-
while ((c = getch()))
174-
if (c == '*' && (c = getch()) == '/')
175-
break;
176-
else if (c == EOF)
177-
return c;
178-
} else /* don't skip pointer variables */
179-
ungetch(c);
180-
break;
181-
default:
182-
for ( ; --lim > 0; w++)
183-
if (!isalnum(*w = getch()) && *w != '_') {
184-
ungetch(*w);
185-
break;
186-
}
187-
break;
188-
}
189-
190-
*w = '\0';
191-
return word[0];
115+
if (!p) /* exist condition */
116+
return;
117+
printree(p->left);
118+
if (p->match)
119+
printf(" %s\n", p->word);
120+
printree(p->right);
192121
}
193122

194123
/* talloc: make a tnode */
195124
struct tnode *talloc(void)
196125
{
197-
return (struct tnode *) malloc(sizeof(struct tnode));
126+
return malloc(sizeof(struct tnode));
198127
}
199128

200129
/* freetree: free allocated heap memory of node tree */
201-
struct tnode *freetree(struct tnode *node)
130+
void freetree(struct tnode *node)
202131
{
203-
if (node != NULL) {
204-
freetree(node->left);
205-
freetree(node->right);
206-
free(node->word);
207-
free(node);
208-
}
209-
return node;
132+
if (!node)
133+
return;
134+
freetree(node->left);
135+
freetree(node->right);
136+
free(node->word);
137+
free(node);
210138
}
139+
211140
/*strDup: make a duplicate of s */
212141
char *strDup(char *s)
213142
{
214143
char *p;
215144

216-
p = (char *) malloc(strlen(s) + 1); /* +1 for '\0' */
217-
if (p != NULL)
145+
p = malloc(strlen(s) + 1); /* +1 for '\0' */
146+
if (p)
218147
strcpy(p, s);
219148
return p;
220149
}
@@ -239,49 +168,68 @@ struct key *binsearch(char *word, struct key *tab, int n)
239168
return NULL;
240169
}
241170

242-
/* findVariables: finds matching variables in a binary search tree, using LDR
243-
* (inorder) traversal */
244-
void findVariables(struct tnode *p, int n)
171+
/* getword: get next word or character from input */
172+
int getword(char *word, int lim)
245173
{
246-
if (p != NULL) {
247-
findVariables(p->left, n);
248-
if (p->left != NULL)
249-
if (strncmp(p->word, p->left->word, n) == 0)
250-
p->match = p->left->match = 1;
251-
if (p->right != NULL)
252-
if (strncmp(p->word, p->right->word, n) == 0)
253-
p->match = p->right->match = 1;
254-
findVariables(p->right, n);
255-
}
174+
int c, getch(void);
175+
void ungetch(int);
176+
char *w = word;
177+
178+
while (isspace(c = getch()))
179+
;
180+
if (c != EOF)
181+
*w++ = c;
182+
if (isalpha(c) || c == '_' || c == '#') {
183+
for ( ; --lim > 0; ++w)
184+
if (!isalnum(*w = getch()) && *w != '_') {
185+
ungetch(*w);
186+
break;
187+
}
188+
} else if (c == '\'') /* skip character constants */
189+
while ((c = getch()) != '\'')
190+
;
191+
else if (c == '\"') { /* skip string constants */
192+
while ((c = getch()) != '\"')
193+
if (c == '\\')
194+
getch();
195+
} else if (c == '/' && (c = getch()) == '*') /* skip comments */
196+
while ((c = getch()) != EOF)
197+
if (c == '*' && (c = getch()) == '/')
198+
break;
199+
*w ='\0';
200+
return c;
256201
}
257202

258-
/* word frequency count */
259-
int main(int argc, char *argv[])
203+
/* get a (possibly pushed back) character */
204+
int getch(void)
260205
{
261-
struct tnode *root; /* root node */
262-
struct key *p; /* currently searched word */
263-
char word[MAXWORD]; /* currently read word */
264-
int nChar; /* number of characters to match */
206+
return (bufp > 0) ? buf[--bufp] : getchar();
207+
}
265208

266-
if (argc != 2)
267-
nChar = 6;
209+
/* push character back on input */
210+
void ungetch(int c)
211+
{
212+
if (bufp >= BUFSIZE)
213+
printf("ungetch: too many characters\n");
268214
else
269-
nChar = atoi(argv[1]);
215+
buf[bufp++] = c;
216+
}
270217

271-
root = NULL; /* initialize root node */
272-
while (getword(word, MAXWORD) != EOF)
273-
if ((isalpha(word[0]) || word[0] == '_' || word[0] == '*')
274-
&& (int) strlen(word) > nChar) {
275-
if ((p = binsearch(word, keytab, NKEYS)) == NULL) /* skip C */
276-
root = addtree(root, word); /* reserved words */
277-
else
278-
++p->count; /* not necessary */
279-
}
280-
findVariables(root, nChar);
281-
treeprint(root);
282-
root = freetree(root); /* clean up */
218+
int main(int argc, char *argv[])
219+
{
220+
struct tnode *root; /* root node */
221+
char word[MAXWORD]; /* currently read word */
222+
size_t nChar; /* number of characters to match */
283223

284-
for (size_t i = 0; i < sizeof(keytab) / sizeof(keytab[0]); ++i)
285-
printf("%s %i\n", keytab[i].word, keytab[i].count);
224+
nChar = (--argc == 1) ? atoi(*++argv) : 6; /* Note: no input error check */
225+
root = NULL;
226+
while (getword(word, MAXWORD) != EOF)
227+
if ((isalpha(word[0]) || word[0] == '_') && strlen(word) >= nChar &&
228+
!binsearch(word, keytab, NKEYS)) /* skip reserved words */
229+
root = addtree(root, word, nChar);
230+
printree(root);
231+
/* clean up */
232+
freetree(root);
233+
root = NULL;
286234
return 0;
287235
}

0 commit comments

Comments
 (0)