Windows平台实现心跳机制

让人想犯罪 __ 提交于 2020-01-14 04:50:53

背景

最近在做毕业设计,需要用到一些windows中节点通讯(UDP/TCP套接字通讯)以及节点间心跳检测的知识,之前没有学过,看了几篇博客,并结合最近看的关于UNIX下套接字编程的理论(很幸运有些函数和理论同样适用于windows)

Windows下节点间UDP通讯

参考博客

上边链接中的博客在运行的时候会出现一些bug,需要对自己的VS运行环境稍作修改,具体操作见:

参考博客1

参考博客2

小项目概述

1  要实现的是一个3个节点的集群,包含一个master节点和2个worker节点

2  每一个节点都有自己的能力值(CPU和内存的综合评分),端口号,IP地址(因为在同一个机器上,我们使用进程来模拟节点,所以每一个节点的IP地址都是localhost)

3 该集群现在的任务就是worker节点以一定频次通过心跳机制检测master节点是否依然存活。这里的心跳机制采用的是master节点以一定频次向worker节点发送alive信息的方法,一旦worker在给定的超时时间没有收到Master的alive信息,worker就认为Master节点失效。

4 默认情况下,recvfrom函数是阻塞式的,要想实现超时自动返回,可以使用IO复用中的select方法,变阻塞为非阻塞,worker的超时时间j就可以作为select的超时时间。

程序代码

#include <iostream>
#include <thread>
#include <string>
#include <vector>
#include <algorithm>
#include <Winsock2.h>
#include <thread>
#include <stdio.h>
#include <string.h>
#include <signal.h>
#include <Windows.h>

//允许使用一些旧的网络编程函数
#pragma comment(lib, "ws2_32.lib")

using namespace std;

string role, ability, port_string; 
SOCKET sockSrv;

//节点信息的结构体
typedef struct ClusterNode
{
	string name;
	string port;
	int ability;

	//排序规则
	//根据能力值进行排序,如果能力值相同,master排在前面,端口号大的排在前面
	friend bool operator < (const ClusterNode& c1, const ClusterNode& c2)
	{
		if (c1.ability != c2.ability)
		{
			return c1.ability < c2.ability;
		}

		else if (c1.name.substr(0, 6) == "master")
			return false;
		else if (c2.name.substr(0, 6) == "master")
			return true;
		else
		{
			int port1 = atoi(c1.port.c_str());
			int port2 = atoi(c2.port.c_str());
			return port1 < port2;
		}
	}

	//构造函数
	ClusterNode(string name, string port, int ability)
	{
		this->name = name;
		this->port = port;
		this->ability = ability;
	}

}ClusterNode;

vector<ClusterNode> vt;

//判断recvfrom是否可读,使用IO复用
int readable_timeo(int fd, int sec)
{
	fd_set rset;
	struct timeval tv;

	FD_ZERO(&rset);
	FD_SET(fd, &rset);

	tv.tv_sec = sec;
	tv.tv_usec = 0;

	return select(fd + 1, &rset, NULL, NULL, &tv);
}

//Master向worker发送心跳包
void send_heartbeat(int portnum)
{
	//采用UDP通信
	WORD wVersionRequested;
	WSADATA wsaData;
	int err;

	wVersionRequested = MAKEWORD(1, 1);

	err = WSAStartup(wVersionRequested, &wsaData);
	if (err != 0) {
		return;
	}

	if (LOBYTE(wsaData.wVersion) != 1 ||
		HIBYTE(wsaData.wVersion) != 1) {
		WSACleanup();
		return;
	}

	SOCKET sockClient = socket(AF_INET, SOCK_DGRAM, 0);
	SOCKADDR_IN addrClient;
	addrClient.sin_addr.S_un.S_addr = inet_addr("127.0.0.1");
	addrClient.sin_family = AF_INET;
	addrClient.sin_port = htons(portnum);

	//char recvBuf[100] = "\0";
	//char tempBuf[200] = "\0";
	char sendBuf[10] = "\0";

	int len = sizeof(SOCKADDR);
	while (true) {
		cout << "I am going to send alive message to worker_" <<portnum<<"!"<< endl;
		sendBuf[0] = '1';
		sendBuf[1] = '\0';
		sendto(sockClient, sendBuf, strlen(sendBuf) + 1, 0, (SOCKADDR*)& addrClient, len);
		//recvfrom(sockClient, recvBuf, 100, 0, (SOCKADDR*)& addrClient, &len);
		/*if (recvBuf[0] == 'q') {
			sendto(sockClient, (const char*)'q', strlen((const char*)'q') + 1, 0, (SOCKADDR*)& addrClient, len);
			printf("Chat end!\n");
			break;
		}*/

		/*sprintf_s(tempBuf, "%s say: %s", inet_ntoa(addrClient.sin_addr), recvBuf);
		printf("%s \n", tempBuf);*/

		//每隔5秒发送心跳消息
		Sleep(5000);
	}

	closesocket(sockClient);
	WSACleanup();
}

//Worker接受心跳包
void receive_heartbeat(int portnum)
{
	WORD wVersionRequested;
	WSADATA wsaData;
	int err;

	wVersionRequested = MAKEWORD(1, 1);
	err = WSAStartup(wVersionRequested, &wsaData);

	if (err != 0) {
		return;
	}

	if (LOBYTE(wsaData.wVersion) != 1 ||
		HIBYTE(wsaData.wVersion) != 1) {
		WSACleanup();
		return;
	}

	//创建套接字
	sockSrv = socket(AF_INET, SOCK_DGRAM, 0);
	//创建地址结构体.
	SOCKADDR_IN addrSrv;
	addrSrv.sin_addr.S_un.S_addr = htonl(INADDR_ANY);
	addrSrv.sin_family = AF_INET;
	addrSrv.sin_port = htons(portnum);

	//绑定套接字和地址.
	bind(sockSrv, (SOCKADDR*)& addrSrv, sizeof(SOCKADDR));
	char recvBuf[10];
	//char sendBuf[100];
	//char tempBuf[200];

	SOCKADDR_IN addrClient;
	int len = sizeof(SOCKADDR);

	//要实现超时检测功能,可以使用SIGALRM为recvfrom设置超时
	while (true) {

		//先检测可读条件
		if (readable_timeo(sockSrv, 10) == 0)     //没有可读条件,证明超时
		{
			cout << "Timeout Error! Master has crashed!" << endl;
		}

		else
		{
			//接收数据.
			recvfrom(sockSrv, recvBuf, 10, 0, (SOCKADDR*)& addrClient, &len);
			if ('1' == recvBuf[0]) {
				cout << "Master is still alive!" << endl;
			}
		}
	}

	//关闭套接字.
	closesocket(sockSrv);
	//关闭套接字库.
	WSACleanup();
}

int main(int argc, char* argv[])
{
	//首先获取标识是Master还是Worker的字符串,能力值,端口号
	role = argv[1];   //角色
	ability = argv[2];   //能力值
	port_string = argv[3];    //端口号

	//如果是Master
	if (role == "master")
	{
		//先建立表格,并存储排序
		string name_new = "master_" + port_string;
		ClusterNode temp(name_new, port_string, atoi(ability.c_str()));
		vt.push_back(temp);

		name_new = "worker_5001";
		temp=ClusterNode(name_new, "5001", 64);
		vt.push_back(temp);

		name_new = "worker_5002";
		temp= ClusterNode(name_new, "5002", 64);
		vt.push_back(temp);
		
		//默认从小到大排序
		sort(vt.begin(), vt.end());

		//master要向worker定时发送心跳包,当worker超时没有收到时,会主动询问心跳信息
		thread t1(send_heartbeat,5001);
		//thread t2(Reply_request_alive, 5001);
		thread t2(send_heartbeat, 5002);
		//thread t4(Reply_request_alive, 5002);
		t1.join();
		//t2.join();
		t2.join();
		//t4.join();
	}

	//如果是worker
	else if (role == "worker")
	{
		//先建立表格,并存储排序
		string name_new = "worker_" + port_string;
		ClusterNode temp(name_new, port_string, atoi(ability.c_str()));
		vt.push_back(temp);

		name_new = "master_5000";
		temp = ClusterNode(name_new, "5000", 128);
		vt.push_back(temp);

		if (port_string == "5001")
		{
			name_new = "worker_5002";
			temp = ClusterNode(name_new, "5002", 64);
			vt.push_back(temp);
		}
		else
		{
			name_new = "worker_5001";
			temp = ClusterNode(name_new, "5001", 64);
			vt.push_back(temp);
		}

		//排序
		sort(vt.begin(), vt.end());

		//worker接受心跳包信息
		thread t1(receive_heartbeat, atoi(port_string.c_str()));
		t1.join();
	}

	//输出vector数组
	/*for (int i = 0; i < vt.size(); i++)
	{
		ClusterNode temp = vt[i];
		cout << "第" << i << "个节点信息是:" << temp.name << " " << temp.port << " " << temp.ability << endl;
	}*/
	return 0;
}

这个文件函数从命令行接收三个参数:角色(master/worker),能力值(master为128,其它2个是64),端口号(Master是5000,其它2个是5001/5002)

运行效果

打开三个终端界面,如图所示;

第一个是master节点,后2个是worker节点,master节点以一定频次向worker报告自己的存活信息。

当我们把master程序关闭(模拟Master崩溃),worker节点超时未收到Matser信息,会得知master崩溃。

 

 

 

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!