统计一段时间内(500)单词出现的频率
单词最长为10
void init();
初始化函数
void searchWord(int tsec, char str[], int num);
在tsec时刻,搜索了某一个单词str, 它的频率是num
void banWord(int tsec, char str[]);
在tsec时刻,ban到了单词str,在以后的rank中不会再出现str
void mergeWords(int tsec, char str1[], char str2[]);
在tsec时刻,合并单词,str2不在出现在rank中,且str2的频率都计算到str1中,相当于父子关系
int getRank10(int, char[][MAX_WORD_LEN + 1]);
在tsec时刻,返回该时刻以及前500s内的所有单词出现频率最高的单词。
#define MAX_WORD_LEN 10
#define USEFUL 0
#define BAN 1
#define MERGED 2
static void mstrcpy(char dest[], const char src[])
{
int i = 0;
while ((dest[i] = src[i]) != '\0') i++;
}
static bool check(char str1[], char str2[]){
int i = 0;
while (str1[i] != '\0' && str1[i] == str2[i]) i++;
if (str2[i] == '\0') return false;
return str1[i] < str2[i] ? true : false;
}
//>>>>>>>>>>>>>>>>>DATA struct>>>>>>>>>>>>>>
struct Node
{
int state;
Node* fathe;
int val;
int isRank; //是不是前10
char word[11];
Node* keys[26];
};
struct SearchInfo
{
int timeId;
Node* ptr;
int fre; //frequency
};
struct RankNode{
Node* ptr;
char word[11];
RankNode* front;
RankNode* next;
};
//>>>>>>>>>>>>>>>>>DATA struct>>>>>>>>>>>>>>
//>>>>>>>>>>>>>>>>Global Value>>>>>>>>>>>
//-----saerch info
SearchInfo searchWindow[50000]; int f = 0, r = 0;
//-----word list
Node wordSet[50000] = { 0 }; int countWord = 0; Node* root;
//----searh links
RankNode links[3501]; int countLink = 0;
//<<<<<<<<<<<<<<<<Global Value<<<<<<<<<<<<<<<<<<
//>>>>>>>>>>>>>>>Common func>>>>>>>>>>>>>>>>>
Node* getNode(char str[]){
Node* p = root;
int i = 0;
while (str[i] != '\0')
{
if (p->keys[str[i] - 'a'] == nullptr){
Node*s = &wordSet[countWord++];
p->keys[str[i] - 'a'] = s;
}
p = p->keys[str[i] - 'a'];
++i;
}
return p;
}
Node* getRoot(Node* p){
while (p->fathe != nullptr)
{
p = p->fathe;
}
return p;
}
void outWindow(int tsec){
while (f < r && searchWindow[f].timeId < tsec - 500){
Node* n = searchWindow[f].ptr;
n->val = n->val - searchWindow[f].fre;
if (n->state == MERGED){
Node* orignal = getRoot(n);
orignal->val = orignal->val - searchWindow[f].fre;
}
f++;
}
}
//<<<<<<<<<<<<<<Common func<<<<<<<<<<<<<<<<<<<<<<
void init(){
f = 0; r = 0;
for (int i = 0; i < countWord; ++i){ //clear word list
wordSet[i].fathe = nullptr; wordSet[i].state = USEFUL; wordSet[i].val = 0;
//for (int j = 0; j < 26; j++) wordSet[i].keys[j] = nullptr;
}
//优化3 减少不必要的操作
//countWord = 0;
root = &wordSet[countWord++];
}
void searchWord(int tsec, char str[], int num){
//in Window
Node* n = getNode(str);
if (n->state != BAN){
searchWindow[r].timeId = tsec; searchWindow[r].fre = num;
mstrcpy(n->word, str); searchWindow[r++].ptr = n;
//动态更新节点val
n->val = n->val + num;
if (n->fathe != nullptr){
n = getRoot(n); //it's original
if (n->state != BAN) n->val = n->val + num;
}
}
//out Window
outWindow(tsec);
}
void banWord(int tsec, char str[]){
Node* n = getNode(str);
//动态更新节点val
if (n->state == MERGED && n->val != 0){
Node* o = getRoot(n);
o->val = o->val - n->val;
}
n->state = BAN;
n->fathe = nullptr;
outWindow(tsec);
}
void mergeWords(int tsec, char str1[], char str2[]){
Node* n1 = getNode(str1);
mstrcpy(n1->word, str1);//merge 可以新建节点
Node* n2 = getNode(str2);//优化2 不关心merge后的复杂情况
Node* o = getRoot(n1);
n2->fathe = o;
n2->state = MERGED;
//动态更新节点val
o->val = o->val + n2->val;
outWindow(tsec);
}
int getRank10(int tsec, char cur_rank[][MAX_WORD_LEN + 1]){
outWindow(tsec);
countLink = 0; int ret = 0;
RankNode* head = &links[countLink++]; head->next = nullptr; head->front = nullptr;
int ff = f;
while (f < r){
Node* n = searchWindow[f].ptr;
//优化1: 减少memcopy
if (n->state == MERGED) n = getRoot(n);
if (n->state == USEFUL && n->isRank == 0){
if (ret < 10){
//insert to link
RankNode* p = head;
//减少函数压入栈的时间 相当于内联函数
while (p->next)
{
if (n->val > p->next->ptr->val || (n->val == p->next->ptr->val && check(n->word, p->next->word)))
p = p->next;
else break;
}
RankNode* s = &links[countLink++];
s->ptr = n; mstrcpy(s->word, n->word);
s->front = p; s->next = p->next;
if (p->next != nullptr) p->next->front = s;
p->next = s;
n->isRank = 1;
ret++;
}
else{
//check the tail
RankNode* p = head->next;
if (p->ptr->val < n->val || (n->val == p->ptr->val && check(n->word, p->word))){
while (p->next)
{
if (n->val > p->next->ptr->val || (n->val == p->next->ptr->val && check(n->word, p->next->word)))
p = p->next;
else break;
}
RankNode* s = &links[countLink++];
s->ptr = n; mstrcpy(s->word, n->word);
s->front = p; s->next = p->next;
if (p->next != nullptr) p->next->front = s;
p->next = s;
n->isRank = 1;
head->next->ptr->isRank = 0;
head->next = head->next->next;
}
}
}
f++;
}
f = ff;
//copy to the result
RankNode* t = head->next;
for (int i = 0; i < ret; ++i){
mstrcpy(cur_rank[ret - 1 - i], t->word);
t->ptr->isRank = 0;
t = t->next;
}
return ret;
}
来源:CSDN
作者:Frankiehp
链接:https://blog.csdn.net/Frankiehp/article/details/103981035