logoISU  

CS256 - Principles of Structured Design

Fall 2021

Displaying ./code/sternflCode/w4.c

#include<stdio.h>
#include<string.h>

#define SZ 200000
#define FNAME "/u1/junk/shakespeare.txt"

int diff = 'A'-'a';

char word[SZ];
int i=0;  //beginning of free space!

int wordChar(int c) {
  if (c>='a' && c<='z') return 1;
  if (c>='A' && c<='Z') return 1;
  return 0;
}

//stores a the next word at the top of the array
//updates i
//word is stored with all upper case letters
int getNextWord(FILE *f) {
  int c;
  while ( (c=fgetc(f)) != EOF) {
    if (!wordChar(c)) continue;
    while (wordChar(c)) {
      if (c>='a'  && c<='z')
         c = c + diff;
      word[i] = c;
      i++;
      c = fgetc(f);
    }
    word[i]=0;
    i++;
    return 1;
  }
  return 0;
}

//returns 1 if the word that starts at index i0 is new
//otherwise if returns a 0
//word+i0 lastWordFound by getNextWord
int  wordPos;
int itsNew(int i0) {
  wordPos=0;
     //j is keeping track of the start of the each word
  for(int j=0; j<i0; j=j+strlen(word+j)+1, wordPos++) {
    if ( strcmp(word+j, word+i0)==0)
      return 0;
  }
  return 1;
}



int main() {
  FILE *f = fopen(FNAME, "r");
  int count=0;
  int i0 = i;
  int jcount = 0;
  int count2 =0;
  int longestLength=0;
  int secondLength=0;
  int posOfSecond=0;
  int posOfLongestWord=0;
  char prev[30];
  int mh = 0;
  int wc[23779];  //wc[0] counts first word stored
                  //wc[23778] count of last new word in shakespeare
  while (getNextWord(f)) {
    /*
        count each word
        word+i0   increase wc[??] by 1
    */
    if (strcmp(prev, "MOST")==0 && strcmp(word+i0, "HAPPY")==0)
       mh = mh+1;
    strcpy(prev,word+i0);
    if ( strcmp(word+i0, "JULIET")==0)
      jcount++;
    
    if (itsNew(i0)) {
      wc[wordPos]=1;
      int len = strlen(word+i0);
      if (len<27 && len>secondLength) {
        secondLength=len;
        posOfSecond= i0;
      }
      if (len>longestLength) {
         longestLength=len;
         posOfLongestWord = i0;
      }
      count++;    //want to count distinct words
      if (strlen(word+i0)==2) {
        count2++;
        //#printf("%s\n", word+i0);
        //if (count>20) return 0;
      }
      i0 = i;
    }
    else {
      i = i0;
      wc[wordPos]++;
    }
    printf("wordPos %d\n",wordPos);
    if (count>20) return 0;
    if (i>SZ-50) {printf("used up most of the space\n"); break;}
  }
  //which value in wc is largest?
  //Need to find the index of the biggest value in wc[]
  printf("number of 'most happy' is %d\n", mh);
  printf("Number of words is %d\n", count);
  printf("average length %d\n",   i / 29779);
  printf("Juliet occurs %d times\n", jcount);
  printf("number of different two letter words is %d\n", count2);
  printf("longest %s\n", word+posOfLongestWord);
  printf("second longest %s\n", word+posOfSecond);
  return 0;
}