问题
Note!!! The question is for people who are experts in boost::asio library
. Unfortunately, I cannot do the code more compact, It contains a minimum amount to describe the problem. The code is example, artificially created. Places where it crashes known and described in comments, it is designed to illustrate the crashes!!! NO need
any help with debugging of the code...
The question is about how to design the asio server, not about - where it crashes!!!
This example is close to "chat server" design from official boost::asio documentation. But, unlike the official example, where only objects of the connection class are created/destroyed dynamically, in my example, both, server and its connection class entities are created/destroyed dynamically... I am sure that the implementation of such pattern should be well known among the asio lovers and the problem described below should already be solved by somebody...
Please see the code. Here, the entities of CAsioServer and CAsioConnection are created and destroyed on fly.
#include <map>
#include <array>
#include <set>
#include <vector>
#include <deque>
#include <thread>
#include <iostream>
#include <asio.hpp>
#include <iomanip>
class CAsioConnection
: public std::enable_shared_from_this<CAsioConnection>
{
public:
using PtrType = std::shared_ptr<CAsioConnection>;
CAsioConnection(asio::ip::tcp::socket socket, std::set<CAsioConnection::PtrType>& connections)
: socket_(std::move(socket)), connections_(connections)
{
std::cout << "-- CAsioConnection is creating, socket: " << socket_.native_handle() << "\n";
}
virtual ~CAsioConnection()
{
std::cout << "-- CAsioConnection is destroying , socket: " << socket_.native_handle() << "\n";
}
void read() { do_read(); }
private:
void do_read(void)
{
uint8_t buff[3];
asio::async_read(socket_, asio::buffer(buff,3),
[this](std::error_code ec, std::size_t /*length*/) {
if (!ec)
{
do_read();
}
else
{
std::cout << "-- CAsioConnection::do_read() error : " << ec.message() << "\n";
// Here is the crash N2
connections_.erase(shared_from_this());
// Crash may be fixed by the code below
//if (ec.value() != 1236) // (winerror.h) #define ERROR_CONNECTION_ABORTED 1236L
// connections_.erase(shared_from_this());
}
});
}
asio::ip::tcp::socket socket_;
std::set<CAsioConnection::PtrType>& connections_;
};
class CAsioServer
: public std::enable_shared_from_this<CAsioServer>
{
public:
using PtrType = std::shared_ptr<CAsioServer>;
CAsioServer(int port, asio::io_context& io, const asio::ip::tcp::endpoint& endpoint)
: port_(port), acceptor_(io, endpoint)
{
std::cout << "-- CAsioServer is creating, port: " << port_ << "\n";
}
virtual ~CAsioServer()
{
std::cout << "-- CAsioServer is destroying , port: " << port_ << "\n";
}
int port(void) { return port_; }
void accept(void) { do_accept(); }
private:
void do_accept()
{
acceptor_.async_accept([this](std::error_code ec, asio::ip::tcp::socket socket) {
if (!ec)
{
std::cout << "-- CAsioServer::do_accept() connection to socket: " << socket.native_handle() << "\n";
auto c = std::make_shared<CAsioConnection>(std::move(socket), connections_);
connections_.insert(c);
c->read();
}
else
{
// Here is the crash N1
std::cout << "-- CAsioServer::do_accept() error : " << ec.message() << "\n";
// Crash may be fixed by the code below
//if (ec.value() == 995) // (winerror.h) #define ERROR_OPERATION_ABORTED 995L
// return;
}
// Actually here is the crash N1 )), but the fix is above...
do_accept();
});
}
int port_;
asio::ip::tcp::acceptor acceptor_;
std::set<CAsioConnection::PtrType> connections_;
};
//*****************************************************************************
class CTcpBase
{
public:
CTcpBase()
{
// heart beat timer to keep it alive
do_heart_beat();
t_ = std::thread([this] {
std::cout << "-- io context is RUNNING!!!\n";
io_.run();
std::cout << "-- io context has been STOPED!!!\n";
});
}
virtual ~CTcpBase()
{
io_.stop();
if (t_.joinable())
t_.join();
}
void add_server(int port)
{
io_.post([this, port]
{
for (auto s : servers_)
if (port == s->port())
return;
auto endpoint = asio::ip::tcp::endpoint(asio::ip::tcp::v4(), port);
auto s = std::make_shared<CAsioServer>(port, io_, endpoint);
s->accept();
servers_.insert(s);
});
}
void remove_server(int port)
{
io_.post([this, port]
{
for (auto s : servers_)
if (port == s->port())
{ servers_.erase(s); return; }
});
}
private:
void do_heart_beat(void)
{
std::cout << "-- beat\n";
auto timer = std::make_shared<asio::steady_timer>(io_, asio::chrono::milliseconds(3000));
timer->async_wait([timer, this](const asio::error_code& ec) {
do_heart_beat();
});
}
asio::io_context io_;
std::thread t_;
std::set<CAsioServer::PtrType> servers_;
};
//*****************************************************************************
int main(void)
{
CTcpBase tcp_base;
std::cout << "CONNECT the server to port 502\n";
tcp_base.add_server(502);
std::this_thread::sleep_for(std::chrono::seconds(20));
std::cout << "REMOVE the server from port 502\n";
tcp_base.remove_server(502);
std::this_thread::sleep_for(std::chrono::seconds(10));
return 0;
}
It supposed that CTcpBase::add_server()
and CTcpBase::remove_server()
will be called by outer clients from different threads. And asio context handles it in its own thread.
Let’s consider the two scenarios:
- Start application and wait a half of minute.
The crash happens in
CAsioServer::do_accept()
see the output below. Debug Console Output - Start application. Make connection to the port 502 by any outer client and wait less than 20 seconds.
The crash happens in
CAsioConnection::do_read()
see the output below. Debug Console Output
It seems asio framework calls postponed asio::async_read()
and acceptor_.async_accept()
handlers when its class' entities already destroyed.
I have fixed the handlers with error checking, but the solution doesn't seem to be reliable. Who knows what other errors and scenarios there might be… Sometimes, when client disconnects, I need to clean the connection_
set at asio::async_read()
, how can I be sure that server or connection objects are still alive?…
Is any way to ask boost::asio framework to prevent calling the postponed handlers for objects that are already destroyed? Or how to recognize (be 100% sure)
by the error code that the object has already been destroyed? Or my be there are other solutions or design patterns in the scope of asio - how to handle dinamically created/destroyed servers and its connections in one running thread without mutexes and stuff...
回答1:
First check that your io_service
runs strictly single-threaded. This is not visible from the code. If it were not, then shared state (like connections_
) needs synchronized access.
In fact you can have a logical strand in the form of the accept loop, but to take advantage of this you should make all accesses to
connections_
happen there, see e.g.
- here where we have the session list hold the sessions directly, no shared-pointer necessary at all: How to pass a boost asio tcp socket to a thread for sending heartbeat to client or server
- or here where we do have shared pointers and we store weak pointers in the sessions list, that can be "garbage collected" from inside the accept loop: ASIO - How to stop simple coroutine based server?
UPDATE
buff
was a local variable, which leads to Undefined Behaviour because it's not valid for the entire time of the async_read operation.In general, it doesn't really make sense to have the
shared_from_this
idiom and also keep a container of shared pointers which already dictates the lifetime.Your problem appears to be that sometimes
CAsioServer
is simply destroyed, meaning that all elements ofconnections_
are released and at that time theirCAsioConnection
objects may be destructed . It will also destructCAsioServer
.Whenever an Asio object is destructed, any pending async operations will fail with
asio::error:operation_aborted
, which indeed means you have respond. However, when the completion handler is invoked, the object has already become invalid.In my comment I just noticed a missing key ingredient: you never capture/bind the shared pointer to
CAsioConnection
in any of the the completion handlers.This is highly un-idiomatic.
Instead you use shared pointers to govern lifetime. Iff you also need a list of connections then make it a list of weak pointers so it only observes lifetime.
Points of change:
no need to make the server enable_shared_from_this
connections_
should hold weak pointers or even non-owning pointers. Weak pointers are obviously a lot safer here. In fact, you could elect to drop that container since nothing seems to be using it. In the sample below I elect to keep it so you can see it in action.capture
shared_from_this
in the completion handler to ensure that the object is still valid when it fires:asio::async_read(socket_, asio::buffer(buff,3), [this, self=shared_from_this()](error_code ec, std::size_t /*length*/) {
Simplified
Note I chose std::list
because it removes the need for equality/ordering (see std::owner_less<>
) which was getting ugly because of the way store a reference to the container inside the CAsioConnection
class - making it cyclically dependent (the CAsioConnection
type is not yet complete before instantiating the owner_less<>
class). I just opted out of the (unneeded?) complexity.
Live On Coliru
#include <boost/asio.hpp>
#include <iostream>
#include <list>
#include <memory>
namespace asio = boost::asio;
using error_code = boost::system::error_code; // compat
class CAsioConnection : public std::enable_shared_from_this<CAsioConnection> {
public:
using PtrType = std::shared_ptr<CAsioConnection>;
CAsioConnection(asio::ip::tcp::socket socket) : socket_(std::move(socket)) {
log(__FUNCTION__);
}
~CAsioConnection() { log(__FUNCTION__); }
void read() { do_read(); }
private:
void log(std::string_view msg) const {
error_code ec;
std::clog << msg << ", socket: " << socket_.remote_endpoint(ec) << "\n";
}
uint8_t buff[256];
void do_read() {
asio::async_read(socket_, asio::buffer(buff),
[this, self = shared_from_this()](error_code ec, std::size_t length) {
if (!ec) {
log(__FUNCTION__ + (" length: " + std::to_string(length)));
do_read();
} else {
log(__FUNCTION__ + (" error: " + ec.message()));
}
});
}
asio::ip::tcp::socket socket_;
};
class CAsioServer {
public:
CAsioServer(asio::io_context& io, const asio::ip::tcp::endpoint& endpoint)
: acceptor_(io, endpoint) { log(__FUNCTION__); }
~CAsioServer() { log(__FUNCTION__); }
int port() const { return acceptor_.local_endpoint().port(); }
void accept() { do_accept(); }
private:
void do_accept() {
acceptor_.async_accept([this](error_code ec,
asio::ip::tcp::socket socket) {
if (!ec) {
auto c = std::make_shared<CAsioConnection>(std::move(socket));
connections_.push_back(c);
c->read();
} else {
log(__FUNCTION__ + (" error: " + ec.message()));
}
connections_.remove_if(std::mem_fn(&WeakPtr::expired));
if (acceptor_.is_open())
do_accept();
});
}
void log(std::string_view msg) const {
std::clog << msg << ", port: " << port() << "\n";
}
asio::ip::tcp::acceptor acceptor_;
using WeakPtr = std::weak_ptr<CAsioConnection>;
std::list<WeakPtr> connections_;
};
int main() {
boost::asio::io_context io;
CAsioServer server(io, { {}, 7878 });
server.accept();
io.run_for(std::chrono::seconds(10));
}
Output:
./a.out& sleep 1; nc -w 1 127.0.0.1 7878 < main.cpp
CAsioServer, port: 7878
CAsioConnection, socket: 127.0.0.1:50628
operator() length: 256, socket: 127.0.0.1:50628
operator() length: 256, socket: 127.0.0.1:50628
operator() length: 256, socket: 127.0.0.1:50628
operator() length: 256, socket: 127.0.0.1:50628
operator() length: 256, socket: 127.0.0.1:50628
operator() length: 256, socket: 127.0.0.1:50628
operator() length: 256, socket: 127.0.0.1:50628
operator() length: 256, socket: 127.0.0.1:50628
operator() length: 256, socket: 127.0.0.1:50628
operator() error: End of file, socket: 127.0.0.1:50628
~CAsioConnection, socket: 127.0.0.1:50628
~CAsioServer, port: 7878
回答2:
Elementary, my dear Watson
The key of problem – I am very trusting person
I should mention that I use non-boost Asio ver. 1.18.0, with VS2017 and Win10. Thus, all explanation below has relation to windows’ part of Asio. With some probability, the posix implementation works a little bit different.
The main idea of the initial implementation was: - to have the ability to control the population of server/connection objects just by adding/removing them from an appropriate set<>
collection.
The text below describes why it does not work without an additional effort.
According to the Asio documentation:
~basic_stream_socket();
This function destroys the socket, cancelling any outstanding asynchronous operations associated with the socket as if by calling cancel.
My mistake was to think that cancelling of asynchronous operations will be performed in scope of destructor with calling async handlers also.
Its funny, I thought why do they use that self
pointers inside async handlers, if the async handlers should be rejected during the object’s destruction stage. The right answer – the async handlers will not be rejected )).
In fact, async handlers will be called afterwards, the class entity will be already destroyed by that time.
What is happening:
- When destroying server or connection class: WinSock2
::closesocket()
is called for socket handle in~basic_stream_socket()
. - On the next iteration inside the
iocontext.run()
:win_iocp_io_context::do_one()
calls::GetQueuedCompletionStatus()
to get async operation result and to start the async handler which was associated with destroyed socket.
There are two scenarios which are interesting for us:
- Socket waits data.
- Socket is destroying (e.g. inside the connection class destructor).
- Async handler with error is called.
In this scenario we may check the error code and close async handler even if class was already destroyed. Bad, but worked solution I demonstrated in the code at my question.
- Socket gets some data. Async handler was not started yet.
- Socket is destroying (e.g. inside the connection class destructor).
- Async handler is started WITHOUT ERRORS!!! Disaster.
In this scenario the error code can’t save us. The crash happens. Thus, the approach of checking the error codes inside the async handlers is not working.
The code below solves all problems by introducing hasta_la_vista()
method for server and connection classes. Not super elegant but reinforced concrete solution:
#include <map>
#include <array>
#include <set>
#include <vector>
#include <deque>
#include <thread>
#include <iostream>
#include <asio.hpp>
#include <iomanip>
class CAsioConnection
: public std::enable_shared_from_this<CAsioConnection>
{
public:
using PtrType = std::shared_ptr<CAsioConnection>;
CAsioConnection(asio::ip::tcp::socket socket, std::set<CAsioConnection::PtrType>& connections)
: socket_(std::move(socket)), connections_(connections), destroying_in_progress(false)
{
std::cout << "-- CAsioConnection is creating\n";
}
virtual ~CAsioConnection()
{
std::cout << "-- CAsioConnection is destroying\n";
}
void read() { do_read(); }
void hasta_la_vista(void)
{
destroying_in_progress = true;
std::error_code ec;
socket_.cancel(ec);
}
private:
void do_read(void)
{
auto self(shared_from_this());
asio::async_read(socket_, asio::buffer(buff),
[this, self](std::error_code ec, std::size_t /*length*/) {
if (destroying_in_progress)
return;
if (!ec)
{
do_read();
}
else
{
std::cout << "-- CAsioConnection::do_read() error : (" << ec.value() << ") " << ec.message() << "\n";
hasta_la_vista();
connections_.erase(shared_from_this());
}
});
}
uint8_t buff[3];
asio::ip::tcp::socket socket_;
bool destroying_in_progress;
std::set<CAsioConnection::PtrType>& connections_;
};
//*****************************************************************************
class CAsioServer
: public std::enable_shared_from_this<CAsioServer>
{
public:
using PtrType = std::shared_ptr<CAsioServer>;
CAsioServer(int port, asio::io_context& io, const asio::ip::tcp::endpoint& endpoint)
: port_(port), destroying_in_progress(false), acceptor_(io, endpoint)
{
std::cout << "-- CAsioServer is creating, port: " << port_ << "\n";
}
virtual ~CAsioServer()
{
for (auto c : connections_)
{
c->hasta_la_vista();
}
std::cout << "-- CAsioServer is destroying , port: " << port_ << "\n";
}
int port(void) { return port_; }
void accept(void) { do_accept(); }
void hasta_la_vista(void)
{
destroying_in_progress = true;
std::error_code ec;
acceptor_.cancel(ec);
}
private:
void do_accept()
{
auto self(shared_from_this());
acceptor_.async_accept([this, self](std::error_code ec, asio::ip::tcp::socket socket) {
if (destroying_in_progress)
return;
if (!ec)
{
std::cout << "-- CAsioServer::do_accept() connection to socket: " << socket.native_handle() << "\n";
auto c = std::make_shared<CAsioConnection>(std::move(socket), connections_);
connections_.insert(c);
c->read();
}
else
{
std::cout << "-- CAsioServer::do_accept() error : (" << ec.value() << ") "<< ec.message() << "\n";
}
do_accept();
});
}
int port_;
bool destroying_in_progress;
asio::ip::tcp::acceptor acceptor_;
std::set<CAsioConnection::PtrType> connections_;
};
//*****************************************************************************
class CTcpBase
{
public:
CTcpBase()
{
// heart beat timer to keep it alive
do_heart_beat();
t_ = std::thread([this] {
std::cout << "-- io context is RUNNING!!!\n";
io_.run();
std::cout << "-- io context has been STOPED!!!\n";
});
}
virtual ~CTcpBase()
{
io_.stop();
if (t_.joinable())
t_.join();
}
void add_server(int port)
{
io_.post([this, port] {
for (auto& s : servers_)
if (port == s->port())
return;
auto endpoint = asio::ip::tcp::endpoint(asio::ip::tcp::v4(), port);
auto s = std::make_shared<CAsioServer>(port, io_, endpoint);
s->accept();
servers_.insert(s);
});
}
void remove_server(int port)
{
io_.post([this, port] {
for (auto s : servers_)
if (port == s->port())
{
s->hasta_la_vista();
servers_.erase(s);
return;
}
});
}
private:
void do_heart_beat(void)
{
std::cout << "-- beat\n";
auto timer = std::make_shared<asio::steady_timer>(io_, asio::chrono::milliseconds(3000));
timer->async_wait([timer, this](const std::error_code& ec) {
do_heart_beat();
});
}
asio::io_context io_;
std::thread t_;
std::set<CAsioServer::PtrType> servers_;
};
//*****************************************************************************
int main(void)
{
CTcpBase tcp_base;
std::cout << "CONNECT the server to port 502\n";
tcp_base.add_server(502);
std::this_thread::sleep_for(std::chrono::seconds(20));
std::cout << "REMOVE the server from port 502\n";
tcp_base.remove_server(502);
std::this_thread::sleep_for(std::chrono::seconds(10));
return 0;
}
来源:https://stackoverflow.com/questions/63711285/how-to-eliminate-crashes-when-destroying-boostasio-entities-on-fly