44 * 6 characters, but different somewhere thereafter. Don't count words within
55 * string and comments. Make 6 a parameter that can be set from the command
66 * line.
7- * Note: getword comment detection could be improved.
7+ *
88 * By Faisal Saadatmand
99 */
1010
11- #include <stdio.h>
1211#include <ctype.h>
12+ #include <stdio.h>
13+ #include <stdlib.h> /* for malloc && atoi */
1314#include <string.h>
14- #include <stdlib.h> /* for malloc && atoi */
1515
1616#define MAXWORD 100
1717#define BUFSIZE 100
18- #define NKEYS (sizeof keytab / sizeof keytab[0])
19- #define NSYMBOLS (sizeof symbol / sizeof symbol[0])
20-
21- /* functions */
22- struct tnode * addtree (struct tnode * , char * );
23- void treeprint (struct tnode * );
24- int getword (char * , int );
25- struct tnode * talloc (void ); /* alocate memory to new tree node */
26- char * strDup (char * ); /* copy string into safe place */
27- struct key * binsearch (char * , struct key * , int );
28- void findVariables (struct tnode * , int );
29- struct tnode * freetree (struct tnode * );
30-
31- /* globals */
32- int buf [BUFSIZE ]; /* buffer from ungetch */
33- int bufp = 0 ; /* next free position in buf */
34-
35- struct tnode { /* the tree node: */
36- char * word ; /* points to the text */
37- int count ; /* number of occurrences */
38- int match ; /* matching word */
39- struct tnode * left ; /* left child */
40- struct tnode * right ; /* right child */
18+ #define NKEYS (int) (sizeof keytab / sizeof keytab[0])
19+
20+ /* types */
21+ struct tnode { /* the tree node: */
22+ char * word ; /* points to the text */
23+ int match ; /* number of occurrences */
24+ struct tnode * left ; /* left child */
25+ struct tnode * right ; /* right child */
4126};
4227
4328struct key {
4429 char * word ;
4530 int count ;
4631};
4732
33+ /* functions */
34+ int getword (char * , int );
35+ struct key * binsearch (char * , struct key * , int );
36+ struct tnode * addtree (struct tnode * , char * , size_t n );
37+ struct tnode * talloc (void ); /* alocate memory to new tree node */
38+ char * strDup (char * ); /* copy string into safe place */
39+ void checkmatch (char * , struct tnode * , size_t , int * );
40+ void printtree (struct tnode * );
41+ void freetree (struct tnode * );
42+
43+ /* globals */
44+ int buf [BUFSIZE ]; /* buffer from ungetch */
45+ int bufp = 0 ; /* next free position in buf */
46+
4847struct key keytab [] = {
4948 { "auto" , 0 },
5049 { "break" , 0 },
@@ -80,141 +79,71 @@ struct key keytab[] ={
8079 { "while" , 0 },
8180};
8281
83- struct key symbol [] = { /* array is sorted for binary search */
84- { "\"" , 0 },
85- { "#" , 0 },
86- { "*" , 0 },
87- { "/" , 0 },
88- { "\\" , 0 },
89- { "_" , 0 },
90- };
91-
9282/* addtree: add a node with w, at or below p */
93- struct tnode * addtree (struct tnode * p , char * w )
83+ struct tnode * addtree (struct tnode * p , char * w , size_t n )
9484{
9585 int cond ;
86+ static int found ;
9687
97- if (p == NULL ) { /* a new word has arrived */
98- p = talloc (); /* make a new node */
99- p -> word = strDup (w ); /* copy data to it */
100- p -> count = 1 ;
101- p -> match = 0 ; /* initialize match */
88+ if (!p ) { /* a new word has arrived */
89+ p = talloc (); /* make a new node */
90+ p -> word = strDup (w ); /* copy data to it */
91+ p -> match = * (& found ); /* p->match = value pointed to by &found */
10292 p -> left = p -> right = NULL ;
103- } else if ((cond = strcmp (w , p -> word )) == 0 )
104- p -> count ++ ; /* repeated word */
105- else if (cond < 0 ) /* less thant into left subtree */
106- p -> left = addtree (p -> left , w );
107- else
108- p -> right = addtree (p -> right , w );
109-
110- return p ;
111- }
112-
113- /* treeprint: in-order print of tree p */
114- void treeprint (struct tnode * p )
115- {
116- if (p != NULL ) {
117- treeprint (p -> left );
118- if (p -> match > 0 )
119- printf ("%4d %4d %s\n" , p -> count , p -> match , p -> word );
120- treeprint (p -> right );
93+ } else if ((cond = strcmp (w , p -> word )) < 0 ) { /* less than ? */
94+ checkmatch (w , p , n , & found );
95+ p -> left = addtree (p -> left , w , n ); /* go left */
96+ } else if (cond > 0 ) { /* greater than */
97+ checkmatch (w , p , n , & found );
98+ p -> right = addtree (p -> right , w , n ); /* go right */
12199 }
100+ found = 0 ; /* reset */
101+ return p ;
122102}
123103
124- int getch (void ) /* get a (possibly pushed back) character */
104+ /* checkmatch: set current node's flag variable and the found variable to 1, if
105+ * w matches a word in the tree */
106+ void checkmatch (char * w , struct tnode * p , size_t n , int * found )
125107{
126- return (bufp > 0 ) ? buf [-- bufp ] : getchar ();
108+ if (!strncmp (w , p -> word , n )) /* is w a match? */
109+ p -> match = * found = 1 ; /* mark the current and the next nodes */
127110}
128111
129- void ungetch (int c ) /* push character back on input */
112+ /* printree: in-order print of tree p */
113+ void printree (struct tnode * p )
130114{
131- if (bufp >= BUFSIZE )
132- printf ("ungetch: too many characters\n" );
133- else
134- buf [bufp ++ ] = c ;
135- }
136-
137- /* getword: get next word or character from input */
138- int getword (char * word , int lim )
139- {
140- int c , getch (void );
141- void ungetch (int );
142- char * w = word ;
143- struct key * p ;
144-
145- while (isspace (c = getch ()))
146- ;
147-
148- if (c != EOF ) {
149- * w ++ = c ;
150- * w = '\0' ;
151- } else
152- return c ;
153-
154- if (!isalpha (c ) && (p = binsearch (word , symbol , NSYMBOLS )) == NULL )
155- return c ;
156-
157- switch (c ) {
158- case '\\' : /* handle escape sequences */
159- c = getch ();
160- break ;
161- case '\"' : /* skip words inside string constant */
162- while ((c = getch ()) != '\"' )
163- if (c == EOF )
164- return c ;
165- break ;
166- case '#' : /* skip preprocessor control lines */
167- while ((c = getch ()) != '\n' )
168- ;
169- ungetch (c );
170- break ;
171- case '/' : /* skip words inside C comments */
172- if ((c = getch ()) == '*' ) {
173- while ((c = getch ()))
174- if (c == '*' && (c = getch ()) == '/' )
175- break ;
176- else if (c == EOF )
177- return c ;
178- } else /* don't skip pointer variables */
179- ungetch (c );
180- break ;
181- default :
182- for ( ; -- lim > 0 ; w ++ )
183- if (!isalnum (* w = getch ()) && * w != '_' ) {
184- ungetch (* w );
185- break ;
186- }
187- break ;
188- }
189-
190- * w = '\0' ;
191- return word [0 ];
115+ if (!p ) /* exist condition */
116+ return ;
117+ printree (p -> left );
118+ if (p -> match )
119+ printf (" %s\n" , p -> word );
120+ printree (p -> right );
192121}
193122
194123/* talloc: make a tnode */
195124struct tnode * talloc (void )
196125{
197- return ( struct tnode * ) malloc (sizeof (struct tnode ));
126+ return malloc (sizeof (struct tnode ));
198127}
199128
200129/* freetree: free allocated heap memory of node tree */
201- struct tnode * freetree (struct tnode * node )
130+ void freetree (struct tnode * node )
202131{
203- if (node != NULL ) {
204- freetree (node -> left );
205- freetree (node -> right );
206- free (node -> word );
207- free (node );
208- }
209- return node ;
132+ if (!node )
133+ return ;
134+ freetree (node -> left );
135+ freetree (node -> right );
136+ free (node -> word );
137+ free (node );
210138}
139+
211140/*strDup: make a duplicate of s */
212141char * strDup (char * s )
213142{
214143 char * p ;
215144
216- p = ( char * ) malloc (strlen (s ) + 1 ); /* +1 for '\0' */
217- if (p != NULL )
145+ p = malloc (strlen (s ) + 1 ); /* +1 for '\0' */
146+ if (p )
218147 strcpy (p , s );
219148 return p ;
220149}
@@ -239,49 +168,68 @@ struct key *binsearch(char *word, struct key *tab, int n)
239168 return NULL ;
240169}
241170
242- /* findVariables: finds matching variables in a binary search tree, using LDR
243- * (inorder) traversal */
244- void findVariables (struct tnode * p , int n )
171+ /* getword: get next word or character from input */
172+ int getword (char * word , int lim )
245173{
246- if (p != NULL ) {
247- findVariables (p -> left , n );
248- if (p -> left != NULL )
249- if (strncmp (p -> word , p -> left -> word , n ) == 0 )
250- p -> match = p -> left -> match = 1 ;
251- if (p -> right != NULL )
252- if (strncmp (p -> word , p -> right -> word , n ) == 0 )
253- p -> match = p -> right -> match = 1 ;
254- findVariables (p -> right , n );
255- }
174+ int c , getch (void );
175+ void ungetch (int );
176+ char * w = word ;
177+
178+ while (isspace (c = getch ()))
179+ ;
180+ if (c != EOF )
181+ * w ++ = c ;
182+ if (isalpha (c ) || c == '_' || c == '#' ) {
183+ for ( ; -- lim > 0 ; ++ w )
184+ if (!isalnum (* w = getch ()) && * w != '_' ) {
185+ ungetch (* w );
186+ break ;
187+ }
188+ } else if (c == '\'' ) /* skip character constants */
189+ while ((c = getch ()) != '\'' )
190+ ;
191+ else if (c == '\"' ) { /* skip string constants */
192+ while ((c = getch ()) != '\"' )
193+ if (c == '\\' )
194+ getch ();
195+ } else if (c == '/' && (c = getch ()) == '*' ) /* skip comments */
196+ while ((c = getch ()) != EOF )
197+ if (c == '*' && (c = getch ()) == '/' )
198+ break ;
199+ * w = '\0' ;
200+ return c ;
256201}
257202
258- /* word frequency count */
259- int main ( int argc , char * argv [] )
203+ /* get a (possibly pushed back) character */
204+ int getch ( void )
260205{
261- struct tnode * root ; /* root node */
262- struct key * p ; /* currently searched word */
263- char word [MAXWORD ]; /* currently read word */
264- int nChar ; /* number of characters to match */
206+ return (bufp > 0 ) ? buf [-- bufp ] : getchar ();
207+ }
265208
266- if (argc != 2 )
267- nChar = 6 ;
209+ /* push character back on input */
210+ void ungetch (int c )
211+ {
212+ if (bufp >= BUFSIZE )
213+ printf ("ungetch: too many characters\n" );
268214 else
269- nChar = atoi (argv [1 ]);
215+ buf [bufp ++ ] = c ;
216+ }
270217
271- root = NULL ; /* initialize root node */
272- while (getword (word , MAXWORD ) != EOF )
273- if ((isalpha (word [0 ]) || word [0 ] == '_' || word [0 ] == '*' )
274- && (int ) strlen (word ) > nChar ) {
275- if ((p = binsearch (word , keytab , NKEYS )) == NULL ) /* skip C */
276- root = addtree (root , word ); /* reserved words */
277- else
278- ++ p -> count ; /* not necessary */
279- }
280- findVariables (root , nChar );
281- treeprint (root );
282- root = freetree (root ); /* clean up */
218+ int main (int argc , char * argv [])
219+ {
220+ struct tnode * root ; /* root node */
221+ char word [MAXWORD ]; /* currently read word */
222+ size_t nChar ; /* number of characters to match */
283223
284- for (size_t i = 0 ; i < sizeof (keytab ) / sizeof (keytab [0 ]); ++ i )
285- printf ("%s %i\n" , keytab [i ].word , keytab [i ].count );
224+ nChar = (-- argc == 1 ) ? atoi (* ++ argv ) : 6 ; /* Note: no input error check */
225+ root = NULL ;
226+ while (getword (word , MAXWORD ) != EOF )
227+ if ((isalpha (word [0 ]) || word [0 ] == '_' ) && strlen (word ) >= nChar &&
228+ !binsearch (word , keytab , NKEYS )) /* skip reserved words */
229+ root = addtree (root , word , nChar );
230+ printree (root );
231+ /* clean up */
232+ freetree (root );
233+ root = NULL ;
286234 return 0 ;
287235}
0 commit comments