// scanning text and counting word frequencies // note - can use stripWords to clean up words before sending to this program #include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> #include <assert.h> #include "/u1/h0/jkinne/public_html/cs202-f2019/code/data_structures/binary_tree/bst.h" tree_t *root_by_count = NULL; /* Begin word_counts_t type declaration and functions for using */ typedef struct word_counts { char *word; // the "key" int num; // more "data" } word_counts_t; int compare_word_counts(void *data1, int data1_size, void *data2, int data2_size) { word_counts_t *d1 = (word_counts_t *) data1, *d2 = (word_counts_t *) data2; return strcmp(d1->word, d2->word); } int compare_word_counts_int(void *data1, int data1_size, void *data2, int data2_size) { word_counts_t *d1 = (word_counts_t *) data1, *d2 = (word_counts_t *) data2; return d1->num - d2->num; } void print_word_counts(void *data, int data_size, int depth) { word_counts_t *wc = (word_counts_t *) data; printf("%*s%s : %d\n", depth, "", wc->word, wc->num); } void free_word_counts(void *data, int data_size) { if (data == NULL) return; word_counts_t *wc = (word_counts_t *) data; if (wc->word != NULL) free(wc->word); free(wc); } /* End word_counts_t type declaration and functions for using */ // insert everything from into to void insert_by_count(tree_t *from, void *to) { if (from == NULL) return; tree_t ** copy_to_pp = (tree_t **) to; *copy_to_pp = bst_insert(*copy_to_pp, from->data, from->data_size, FLAG_ALLOW_DUPLICATES, compare_word_counts_int, print_word_counts, free_word_counts); } int main(int argc, char * argv[]) { tree_t * root = NULL; char s[100]; tree_t * p; word_counts_t wc; wc.num = 1; while (scanf("%99s", s) == 1) { wc.word = s; p = bst_lookup(root, &wc, sizeof(wc)); if (p != NULL) (((word_counts_t *) p->data)->num)++; else { wc.word = strdup(s); // put word on the heap before inserting root = bst_insert(root, &wc, sizeof(wc), 0, compare_word_counts, print_word_counts, free_word_counts); } } bst_print(root, 10, FLAG_TREE | FLAG_SUMMARY); printf("\n"); bst_map(root, insert_by_count, &root_by_count); bst_print(root_by_count, 50, 0); return 0; }