背景
最近在做毕业设计,需要用到一些windows中节点通讯(UDP/TCP套接字通讯)以及节点间心跳检测的知识,之前没有学过,看了几篇博客,并结合最近看的关于UNIX下套接字编程的理论(很幸运有些函数和理论同样适用于windows)
Windows下节点间UDP通讯
上边链接中的博客在运行的时候会出现一些bug,需要对自己的VS运行环境稍作修改,具体操作见:
小项目概述
1 要实现的是一个3个节点的集群,包含一个master节点和2个worker节点
2 每一个节点都有自己的能力值(CPU和内存的综合评分),端口号,IP地址(因为在同一个机器上,我们使用进程来模拟节点,所以每一个节点的IP地址都是localhost)
3 该集群现在的任务就是worker节点以一定频次通过心跳机制检测master节点是否依然存活。这里的心跳机制采用的是master节点以一定频次向worker节点发送alive信息的方法,一旦worker在给定的超时时间没有收到Master的alive信息,worker就认为Master节点失效。
4 默认情况下,recvfrom函数是阻塞式的,要想实现超时自动返回,可以使用IO复用中的select方法,变阻塞为非阻塞,worker的超时时间j就可以作为select的超时时间。
程序代码
#include <iostream>
#include <thread>
#include <string>
#include <vector>
#include <algorithm>
#include <Winsock2.h>
#include <thread>
#include <stdio.h>
#include <string.h>
#include <signal.h>
#include <Windows.h>
//允许使用一些旧的网络编程函数
#pragma comment(lib, "ws2_32.lib")
using namespace std;
string role, ability, port_string;
SOCKET sockSrv;
//节点信息的结构体
typedef struct ClusterNode
{
string name;
string port;
int ability;
//排序规则
//根据能力值进行排序,如果能力值相同,master排在前面,端口号大的排在前面
friend bool operator < (const ClusterNode& c1, const ClusterNode& c2)
{
if (c1.ability != c2.ability)
{
return c1.ability < c2.ability;
}
else if (c1.name.substr(0, 6) == "master")
return false;
else if (c2.name.substr(0, 6) == "master")
return true;
else
{
int port1 = atoi(c1.port.c_str());
int port2 = atoi(c2.port.c_str());
return port1 < port2;
}
}
//构造函数
ClusterNode(string name, string port, int ability)
{
this->name = name;
this->port = port;
this->ability = ability;
}
}ClusterNode;
vector<ClusterNode> vt;
//判断recvfrom是否可读,使用IO复用
int readable_timeo(int fd, int sec)
{
fd_set rset;
struct timeval tv;
FD_ZERO(&rset);
FD_SET(fd, &rset);
tv.tv_sec = sec;
tv.tv_usec = 0;
return select(fd + 1, &rset, NULL, NULL, &tv);
}
//Master向worker发送心跳包
void send_heartbeat(int portnum)
{
//采用UDP通信
WORD wVersionRequested;
WSADATA wsaData;
int err;
wVersionRequested = MAKEWORD(1, 1);
err = WSAStartup(wVersionRequested, &wsaData);
if (err != 0) {
return;
}
if (LOBYTE(wsaData.wVersion) != 1 ||
HIBYTE(wsaData.wVersion) != 1) {
WSACleanup();
return;
}
SOCKET sockClient = socket(AF_INET, SOCK_DGRAM, 0);
SOCKADDR_IN addrClient;
addrClient.sin_addr.S_un.S_addr = inet_addr("127.0.0.1");
addrClient.sin_family = AF_INET;
addrClient.sin_port = htons(portnum);
//char recvBuf[100] = "\0";
//char tempBuf[200] = "\0";
char sendBuf[10] = "\0";
int len = sizeof(SOCKADDR);
while (true) {
cout << "I am going to send alive message to worker_" <<portnum<<"!"<< endl;
sendBuf[0] = '1';
sendBuf[1] = '\0';
sendto(sockClient, sendBuf, strlen(sendBuf) + 1, 0, (SOCKADDR*)& addrClient, len);
//recvfrom(sockClient, recvBuf, 100, 0, (SOCKADDR*)& addrClient, &len);
/*if (recvBuf[0] == 'q') {
sendto(sockClient, (const char*)'q', strlen((const char*)'q') + 1, 0, (SOCKADDR*)& addrClient, len);
printf("Chat end!\n");
break;
}*/
/*sprintf_s(tempBuf, "%s say: %s", inet_ntoa(addrClient.sin_addr), recvBuf);
printf("%s \n", tempBuf);*/
//每隔5秒发送心跳消息
Sleep(5000);
}
closesocket(sockClient);
WSACleanup();
}
//Worker接受心跳包
void receive_heartbeat(int portnum)
{
WORD wVersionRequested;
WSADATA wsaData;
int err;
wVersionRequested = MAKEWORD(1, 1);
err = WSAStartup(wVersionRequested, &wsaData);
if (err != 0) {
return;
}
if (LOBYTE(wsaData.wVersion) != 1 ||
HIBYTE(wsaData.wVersion) != 1) {
WSACleanup();
return;
}
//创建套接字
sockSrv = socket(AF_INET, SOCK_DGRAM, 0);
//创建地址结构体.
SOCKADDR_IN addrSrv;
addrSrv.sin_addr.S_un.S_addr = htonl(INADDR_ANY);
addrSrv.sin_family = AF_INET;
addrSrv.sin_port = htons(portnum);
//绑定套接字和地址.
bind(sockSrv, (SOCKADDR*)& addrSrv, sizeof(SOCKADDR));
char recvBuf[10];
//char sendBuf[100];
//char tempBuf[200];
SOCKADDR_IN addrClient;
int len = sizeof(SOCKADDR);
//要实现超时检测功能,可以使用SIGALRM为recvfrom设置超时
while (true) {
//先检测可读条件
if (readable_timeo(sockSrv, 10) == 0) //没有可读条件,证明超时
{
cout << "Timeout Error! Master has crashed!" << endl;
}
else
{
//接收数据.
recvfrom(sockSrv, recvBuf, 10, 0, (SOCKADDR*)& addrClient, &len);
if ('1' == recvBuf[0]) {
cout << "Master is still alive!" << endl;
}
}
}
//关闭套接字.
closesocket(sockSrv);
//关闭套接字库.
WSACleanup();
}
int main(int argc, char* argv[])
{
//首先获取标识是Master还是Worker的字符串,能力值,端口号
role = argv[1]; //角色
ability = argv[2]; //能力值
port_string = argv[3]; //端口号
//如果是Master
if (role == "master")
{
//先建立表格,并存储排序
string name_new = "master_" + port_string;
ClusterNode temp(name_new, port_string, atoi(ability.c_str()));
vt.push_back(temp);
name_new = "worker_5001";
temp=ClusterNode(name_new, "5001", 64);
vt.push_back(temp);
name_new = "worker_5002";
temp= ClusterNode(name_new, "5002", 64);
vt.push_back(temp);
//默认从小到大排序
sort(vt.begin(), vt.end());
//master要向worker定时发送心跳包,当worker超时没有收到时,会主动询问心跳信息
thread t1(send_heartbeat,5001);
//thread t2(Reply_request_alive, 5001);
thread t2(send_heartbeat, 5002);
//thread t4(Reply_request_alive, 5002);
t1.join();
//t2.join();
t2.join();
//t4.join();
}
//如果是worker
else if (role == "worker")
{
//先建立表格,并存储排序
string name_new = "worker_" + port_string;
ClusterNode temp(name_new, port_string, atoi(ability.c_str()));
vt.push_back(temp);
name_new = "master_5000";
temp = ClusterNode(name_new, "5000", 128);
vt.push_back(temp);
if (port_string == "5001")
{
name_new = "worker_5002";
temp = ClusterNode(name_new, "5002", 64);
vt.push_back(temp);
}
else
{
name_new = "worker_5001";
temp = ClusterNode(name_new, "5001", 64);
vt.push_back(temp);
}
//排序
sort(vt.begin(), vt.end());
//worker接受心跳包信息
thread t1(receive_heartbeat, atoi(port_string.c_str()));
t1.join();
}
//输出vector数组
/*for (int i = 0; i < vt.size(); i++)
{
ClusterNode temp = vt[i];
cout << "第" << i << "个节点信息是:" << temp.name << " " << temp.port << " " << temp.ability << endl;
}*/
return 0;
}
这个文件函数从命令行接收三个参数:角色(master/worker),能力值(master为128,其它2个是64),端口号(Master是5000,其它2个是5001/5002)
运行效果
打开三个终端界面,如图所示;
第一个是master节点,后2个是worker节点,master节点以一定频次向worker报告自己的存活信息。
当我们把master程序关闭(模拟Master崩溃),worker节点超时未收到Matser信息,会得知master崩溃。
来源:CSDN
作者:带你去网吧里偷耳机
链接:https://blog.csdn.net/qq_40123329/article/details/103751072