欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

[单链表]统计文本中英文单词出现次数,并输出前200个单词及其出现次数

程序员文章站 2022-05-28 19:38:04
...

统计文本中各英文单词出现次数,并按词频逆序排列。

#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#define SIZE 50

typedef struct words
{
	char str[SIZE];	//单词最长100个
	int count;
	struct words *next;
}Words;


// 初始化链表
Words* InitWord()
{
	Words *headWord = (Words*)malloc(sizeof(Words));
	strcpy(headWord->str,"");
	headWord->count = 0;
	headWord->next = NULL;
	return headWord;
}

// 增加新单词
void AddWord(Words *headWord,const char *str)//头插入
{
	Words *pos = headWord->next;	//储存当前第一个结构体的指针
	Words *newWord = (Words*)malloc(sizeof(Words));
	strcpy(newWord->str,str);
	headWord->next = newWord;
	if(pos == NULL)	newWord->next = NULL;//第一个单词,储存在链表尾部,next指向空
	else			newWord->next = pos;
	newWord->count=1;
}

int CheckStr(Words *headWord,const char *str)//检查单词是否出现过,若出现单词数量加一,返回1,没出现返回0
{
	Words *p = headWord->next;
	if(headWord->next == NULL)	
		{
			AddWord(headWord,str);
			return 1;
		}
	while(strcmp(p->str,str) != 0)
	{
		p = p->next;
		if(p == NULL)	break;
	}
	if(p == NULL)	return 0;//没有该单词,返回0标记
	else
	{
		++(p->count);	//已有单词,数量加一
		return 1;
	}
	return -1;			//异常出错
}

// 读单词,该函数每次读取一个单词
int ReadWord(FILE *fp,int *plen,char *str)
{
	fseek(fp, *plen, SEEK_SET);
	unsigned char ch;
	//if( feof(fp) )	//文件结束
 //   { 
 //       return 0;
 //   }
	int flg_eof = 0;
	int i=0;		// i 表示index
	while(fread(&ch,sizeof(char),1,fp) != 0)	//注:isalpha(unsigned(ch)),需要强制转换
	{
		if(!isalpha(unsigned(ch)) && i == 0)	continue;//排除单词 不是字母 无法读取问题
		if(isalpha(unsigned(ch)) || ch == '\'')//类似I'm中的 ' 符号,有且只能有一个
		{
			flg_eof = 1;	// 标记是否读过单词
			str[i++] = ch;
		}
		else
		{
			break;
		}
	}
	if(flg_eof == 0)	return 0;	//没有读文件,文件结尾了
	str[i] = '\0';
	*plen = ftell(fp);	// 对于文件首的偏移字节数
	return *plen;
}

// 读取文件,调用“读取单词函数”,每读取一个单词检查是否需要 添加新单词 或 计数+1
void ReadFile(Words *headWord,char *name)//文件操作,从文件中读取单词
{
	FILE *fp ;
	int len = 0;		//标记文件指针在文件中的位置
	if((fp= fopen(name,"r")) == NULL)
	{
		fp = fopen(name, "w+");/* 如果创建失败,新建一个文件  w+:可读写
					文件存在则文件长度清为零,文件不存在时创建一个   */
	}

	char str[SIZE];
	while(ReadWord(fp,&len,str) != 0)//fscanf(fp,"%s",str) != EOF
	{
		if(CheckStr(headWord,str) == 0)	//没有该单词,新增一个
		{
			AddWord(headWord,str);
		}	

	}

	fclose(fp);
}

//单词交换顺序
void SwapWord(Words *posWord,Words *posNext)
{
	// 交换单词
	char tmpWord[SIZE]="";
	strcpy(tmpWord ,posWord->str);  
	strcpy(posWord->str, posNext->str);
	strcpy(posNext->str, tmpWord);

	// 交换对应单词计数
	int tmpCount;
	tmpCount	   = posWord->count;
	posWord->count = posNext->count;
	posNext->count = tmpCount;

}
// 单词排序,按计数从大到小
void WordsSort(Words *headWord)
{
	if(headWord->next == NULL)	return;
	Words *flg_word = NULL;
	int flag;
	while(flg_word != headWord->next)
	{
		flag = 0;
		Words *pos = headWord->next;
		Words *pos_next = pos->next;

		while(pos_next != NULL)
		{
			if(pos->count < pos_next->count)
			{
				SwapWord(pos,pos_next);
				flag = 1;
			}

			if(pos_next->next == flg_word)	//仿冒泡的i<n-i-1. 相当于flg_word每次减一
			{
				flg_word = pos_next;
				break;	//可有可无,while中已有条件
			}
			pos = pos->next;
			pos_next = pos_next->next;

		}
		if(flag == 0)	break;
	}

}

// 打印词频排行榜
void PrintWords(Words *headWord,const char *name)
{
	Words *pMove = headWord->next;
	if(headWord->next == NULL)	return;
	int count = 0;
	printf("\t<<%s>>中出现的最高频率的单词:\n",name);
	printf("\t单词:\t\t\t\t\t\t出现次数:\n");
	while(pMove != NULL && count<200)
	{
		printf("\t%-50s%d\n",pMove->str,pMove->count);
		pMove = pMove->next;
		count++;
	}
	printf("\n");
}

//释放链表
void free_Word(Words* headNode)
{
	Words *point = NULL;
	while(headNode != NULL)
	{
		point = headNode;//指向所释放的空间
		headNode = headNode->next;//指针后移
		free(point);
	}
}



int main()
{
	Words *headWord = InitWord();		//初始化链表,创建表头
	char bookName[][100] = {"小王子.txt","CountWords.cpp","test.txt","Harry Potter and The Half-Blood Prince.txt" };
										//当前文件*.cpp文件    //哈利波特
	ReadFile(headWord,bookName[0]);		//文件操作
	WordsSort(headWord);				//排序
	PrintWords(headWord,bookName[0]);	//打印
	free_Word(headWord);				//释放链表
	return 0;
}


[单链表]统计文本中英文单词出现次数,并输出前200个单词及其出现次数


文件下载:
小王子.txt
链接:https://wwa.lanzous.com/icWhOe8z34j
Harry Potter and The Half-Blood Prince.txt
链接:https://wwa.lanzous.com/inix8e8z33i