A thread pool with Boost.Asio

在之前的文章中，我们有介绍如何推导一个简单的任务队列。

本文介绍如何利用Boost.Asio构建不需要显示地加锁或同步的线程池。

Boost.Asio 有两种支持多线程的方式:

在多线程的场景下，每个线程都持有一个io_service，并且每个线程都调用各自的io_service的run()方法。
全局只分配一个io_service，并且让这个io_service在多个线程之间共享，每个线程都调用全局的io_service的run()方法。

每个线程一个 I/O Service

让我们先分析第一种方案：在多线程的场景下，每个线程都持有一个io_service (通常的做法是，让线程数和 CPU 核心数保持一致)。那么这种方案有什么特点呢？

在多核的机器上，这种方案可以充分利用多个 CPU 核心。
某个 socket 描述符并不会在多个线程之间共享，所以不需要引入同步机制。
在 event handler 中不能执行阻塞的操作，否则将会阻塞掉io_service所在的线程。

下面我们实现了一个AsioIOServicePool，封装了线程池的创建操作：

class AsioIOServicePool
{
public:
    using IOService = boost::asio::io_context;
    using ExecutorType = boost::asio::io_context::executor_type;
    using Work = boost::asio::executor_work_guard<ExecutorType>;
    using WorkPtr = std::unique_ptr<Work>;

    // the constructor just launches some amount of threads
    AsioIOServicePool(std::size_t size = std::thread::hardware_concurrency())
        : ioServices_(size),
          works_(size),
          nextIOService_(0)
    {
        for (std::size_t i = 0; i < size; ++i)
        {
            works_[i] = std::unique_ptr<Work>(new Work(ioServices_[i]));
        }

        // one io_context per thread
        for (std::size_t i = 0; i < ioServices_.size(); ++i)
        {
            threads_.emplace_back([this, i] ()
                                  {
                                      ioServices_[i].run();
                                  });
        }
    }

    AsioIOServicePool(const AsioIOServicePool &) = delete;
    AsioIOServicePool &operator=(const AsioIOServicePool &) = delete;

    // 使用 round-robin 的方式返回一个 io_service
    boost::asio::io_service &getIOService()
    {
        auto &service = ioServices_[nextIOService_++];
        if (nextIOService_ == ioServices_.size())
        {
            nextIOService_ = 0;
        }

        return service;
    }

    void stop()
    {
        // Once the work object is destroyed, the service will stop.
        for (auto &work: works_) {
            work.reset();
        }
        for (auto &t: threads_) {
            t.join();
        }
    }

private:
    std::vector<IOService>       ioServices_;
    std::vector<WorkPtr>         works_;
    std::vector<std::thread>     threads_;
    std::size_t                  nextIOService_;
};

AsioIOServicePool使用起来也很简单：

std::mutex mtx;             // protect std::cout
AsioIOServicePool pool;
 
boost::asio::steady_timer timer{pool.getIOService(), std::chrono::seconds{2}};
timer.async_wait([&mtx] (const boost::system::error_code &ec)
                  {
                      std::lock_guard<std::mutex> lock(mtx);
                      std::cout << "Hello, World! " << std::endl;
                  });
pool.stop();

一个 I/O Service 与多个线程

另一种方案则是先分配一个全局io_service，然后开启多个线程，每个线程都调用这个io_service的run()方法。这样，当某个异步事件完成时，io_service就会将相应的 event handler 交给任意一个线程去执行。

然而这种方案在实际使用中，需要注意一些问题:

在 event handler 中允许执行阻塞的操作 (例如数据库查询操作)。
线程数可以大于 CPU 核心数，譬如说，如果需要在 event handler 中执行阻塞的操作，为了提高程序的响应速度，这时就需要提高线程的数目。
由于多个线程同时运行事件循环(event loop)，所以会导致一个问题：即一个 socket 描述符可能会在多个线程之间共享，容易出现竞态条件(race condition)。譬如说，如果某个 socket 的可读事件很快发生了两次，那么就会出现两个线程同时读同一个 socket 的问题 (可以使用strand解决这个问题)。

值得一提的还有，成员变量 work_guard_ 的作用是让 io_context 即使在没有异步任务可执行时也保持运行（即 io_context::run 不返回）。详见 Stack Overflow 的讨论：Why should I use io_service::work?

#include <functional>
#include <iostream>
#include <mutex>
#include <thread>
#include <vector>

#define BOOST_ASIO_NO_DEPRECATED
#include <boost/thread/thread.hpp>
#include <boost/asio.hpp>

class ThreadPool {
public:
    // the constructor just launches some amount of threads
    explicit ThreadPool(std::size_t size) :
        io_context_(size),
        strand_(io_context_),
        work_guard_(boost::asio::make_work_guard(io_context_))
    {
        // one io_context and multi-thread
        for (std::size_t i = 0; i < size; ++i) {
            // all the threads do is execute the io_context::run()
            group_.create_thread([&](){ io_context_.run(); });
        }
    }
	
    // the destructor joins all threads
    ~ThreadPool() {
        // Once the work object is destroyed, the service will stop.
        work_guard_.reset();
        group_.join_all();
    }

    // Add new work item to the pool.
    template<class F>
    void Enqueue(F f) {
        // Submits a completion token or function object for execution. 
        boost::asio::post(io_context_, f);
    }

private:
    boost::thread_group group_;
    boost::asio::io_context io_context_;
    
    // prevent the run() method from return.
    typedef boost::asio::io_context::executor_type ExecutorType;
    boost::asio::executor_work_guard<ExecutorType> work_guard_;
};

// For output.
std::mutex g_io_mutex;

int main ( int argc, char* argv[] ) {
    int thread_num = std::thread::hardware_concurrency();
    std::cout << "thread num: " << thread_num<< std::endl;

    ThreadPool pool(thread_num);
    // Queue a bunch of work items.
    for (int i = 0; i < 4; ++i) {
        pool.Enqueue([i] {
            {
                std::lock_guard<std::mutex> lock(g_io_mutex);
                std::cout << "Hello" << "(" << i << ") " << std::endl;
            }

      		std::this_thread::sleep_for(std::chrono::seconds(1));

            {
                std::lock_guard<std::mutex> lock(g_io_mutex);
                std::cout << "World" << "(" << i << ")" << std::endl;
            }
        });
    }

    return 0;
}

输出（每次都不一样）：

Hello(0)
Hello(1)
Hello(2)
Hello(3)
<Wait about 1 second>
World(3)
World(2)
World(1)
World(0)