////////////////////////////////////////////////////////////////// // // slowmapwordcounter.cpp // // Counts and reports the number of times each word in // the given input file occurs, and puts the report in // the given output file. // // Uses an elegant word-counting loop, but it's slower // than the ugly "insert"-based loop. // // REVISION HISTORY: // 2/10/01 (Jeff Ondich) Wrote the original version. // 2/14/01 (JO) Modified the counting loop. // ////////////////////////////////////////////////////////////////// #include #include #include void CleanWord( string& theWord ); int main( int argc, char *argv[] ) { // Deal with command line and open the files. if( argc != 3 ) { cerr << "Usage: " << argv[0] << " inputFile outputFile" << endl; cerr << argv[0] << " collects word frequency data from inputFile" << endl; cerr << " and places the results in outputFile." << endl; exit( 1 ); } ifstream in( argv[1] ); if( !in.is_open() ) { cerr << "Cannot open " << argv[1] << endl; exit( 1 ); } ofstream out( argv[2] ); if( !out.is_open() ) { cerr << "Cannot open " << argv[2] << endl; exit( 1 ); } // Get the words out of the input file and count them. map wordMap; string word; while( in >> word ) { CleanWord( word ); if( wordMap.find( word ) == wordMap.end() ) wordMap[word] = 1; else wordMap[word]++; } // Send the results to the output file. map::const_iterator wordIterator = wordMap.begin(); while( wordIterator != wordMap.end() ) { out << (*wordIterator).first << '\t' << (*wordIterator).second << endl; wordIterator++; } out.close(); in.close(); return 0; } ////////////////////////////////////////////////////////////////// // // CleanWord removes all non-alphabetic characters // from the given string (except for hyphens and // apostrophes), and reduces all upper case letters // to lower case. Thus, once CleanWord is done, theWord // consists entirely of lower case letters, hyphens, // and apostrophes. // ////////////////////////////////////////////////////////////////// void CleanWord( string& theWord ) { string s = ""; int originalLength = theWord.length(); for( int i=0; i < originalLength; i++ ) { char c = theWord[i]; if( isalpha(c) || c == '\'' || c == '-' ) s += tolower(c); } theWord = s; }