Windows和pthread中提供的自旋锁
程序员文章站
2022-09-28 08:36:31
Windows和POSIX中都提供了自旋锁,我们也可以通过C++11的atomic来实现自旋锁。那么两者性能上面是什么关系?先引入实现代码: 下面给出一个简单测试,两组线程,一组用来插入,另外一组用来取出。测试结果显示: (1)无论是Windows,还是POSIX提供的C语言版本的自旋锁,都和C++ ......
Windows和POSIX中都提供了自旋锁,我们也可以通过C++11的atomic来实现自旋锁。那么两者性能上面是什么关系?先引入实现代码:
#ifndef __spinlock_h__ #define __spinlock_h__ #include <atomic> #ifdef _WIN32 #include <Windows.h> class spinlock_mutex { public: static constexpr DWORD SPINLOCK_COUNT = -1; public: // 在初始化时,会出现资源不足的问题,这里忽略这个问题 // 具体参考Critical Sections and Error Handling(Windows via C/C++) spinlock_mutex() { InitializeCriticalSectionAndSpinCount(&m_cs, SPINLOCK_COUNT); } ~spinlock_mutex() { DeleteCriticalSection(&m_cs); } void lock() { EnterCriticalSection(&m_cs); } bool try_lock() { return TryEnterCriticalSection(&m_cs) == TRUE; } void unlock() { LeaveCriticalSection(&m_cs); } private: CRITICAL_SECTION m_cs; }; #elif defined(_POSIX_C_SOURCE) #include <pthread.h> class spinlock_mutex { public: // 这里不处理可能出现的调用错误 spinlock_mutex() { pthread_spin_init(&m_cs, PTHREAD_PROCESS_PRIVATE); } ~spinlock_mutex() { pthread_spin_destroy(&m_cs); } void lock() { pthread_spin_lock(&m_cs); } bool try_lock() { return pthread_spin_trylock(&m_cs) == 0; } void unlock() { pthread_spin_unlock(&m_cs); } private: pthread_spinlock_t m_cs; }; #else class spinlock_mutex { std::atomic_flag flag; public: spinlock_mutex() : flag{ ATOMIC_FLAG_INIT } {} void lock() { while (flag.test_and_set(std::memory_order_acquire)); } void unlock() { flag.clear(std::memory_order_release); } bool try_lock() { return !flag.test_and_set(std::memory_order_acquire); } }; #endif #endif // __spinlock_h__
下面给出一个简单测试,两组线程,一组用来插入,另外一组用来取出。测试结果显示:
(1)无论是Windows,还是POSIX提供的C语言版本的自旋锁,都和C++11使用atomic构建的自旋锁效率相近。
(2)在插入线程数和取出线程数相同的情况下,线程数越多,效率越低。
下面是测试代码:
#include <memory> #include <cassert> #include <iostream> #include <vector> #include <thread> #include <future> #include <random> #include <chrono> #include "spinlock.h" #include <forward_list> struct student_name { student_name(int age = 0) : age(age), next(nullptr) { } int age; student_name* next; }; spinlock_mutex g_mtx; std::forward_list<int> g_students; std::atomic<int> g_inserts; // insert num (successful) std::atomic<int> g_drops; // drop num (successful) std::atomic<int> g_printNum; // as same as g_drops std::atomic<long long> g_ageInSum; // age sum when producing student_name std::atomic<long long> g_ageOutSum; // age sum when consuming student_name std::atomic<bool> goOn(true); constexpr int INSERT_THREAD_NUM = 1; constexpr int DROP_THREAD_NUM = 1; constexpr int ONE_THREAD_PRODUCE_NUM = 5000000; // when testing, no more than this number, you know 20,000,00 * 100 * 10 ~= MAX_INT if thread num <= 10 inline void printOne(student_name* t) { g_printNum.fetch_add(1, std::memory_order_relaxed); g_ageOutSum.fetch_add(t->age, std::memory_order_relaxed); g_drops.fetch_add(1, std::memory_order_relaxed); delete t; } void insert_students(int idNo) { std::default_random_engine dre(time(nullptr)); std::uniform_int_distribution<int> ageDi(1, 99); for (int i = 0; i < ONE_THREAD_PRODUCE_NUM; ++i) { int newAge = ageDi(dre); g_ageInSum.fetch_add(newAge, std::memory_order_relaxed); { std::lock_guard<spinlock_mutex> lock(g_mtx); g_students.push_front(newAge); } // use memory_order_relaxed avoiding affect folly memory order g_inserts.fetch_add(1, std::memory_order_relaxed); } } void drop_students(int idNo) { while (auto go = goOn.load(std::memory_order_consume)) { { std::forward_list<int> tmp; { std::lock_guard<spinlock_mutex> lock(g_mtx); std::swap(g_students, tmp); } auto it = tmp.begin(); while (it != tmp.end()) { g_printNum.fetch_add(1, std::memory_order_relaxed); g_ageOutSum.fetch_add(*it, std::memory_order_relaxed); g_drops.fetch_add(1, std::memory_order_relaxed); ++it; } } } } int main() { auto start = std::chrono::system_clock::now(); std::vector<std::future<void>> insert_threads; std::vector<std::future<void>> drop_threads; for (auto i = 0; i != INSERT_THREAD_NUM; ++i) { insert_threads.push_back(std::async(std::launch::async, insert_students, i)); } for (auto i = 0; i != DROP_THREAD_NUM; ++i) { drop_threads.push_back(std::async(std::launch::async, drop_students, i)); } for (auto& thread : insert_threads) { thread.get(); } std::this_thread::sleep_for(std::chrono::milliseconds(1000)); goOn.store(false, std::memory_order_release); for (auto& thread : drop_threads) { thread.get(); } { std::forward_list<int> tmp; { std::lock_guard<spinlock_mutex> lock(g_mtx); std::swap(g_students, tmp); } auto it = tmp.begin(); while (it != tmp.end()) { g_printNum.fetch_add(1, std::memory_order_relaxed); g_ageOutSum.fetch_add(*it, std::memory_order_relaxed); g_drops.fetch_add(1, std::memory_order_relaxed); ++it; } } auto end = std::chrono::system_clock::now(); std::chrono::duration<double> diff = end - start; std::cout << "Time to insert and drop is: " << diff.count() << " s\n"; std::cout << "insert count1: " << g_inserts.load() << std::endl; std::cout << "drop count1: " << g_drops.load() << std::endl; std::cout << "print num1: " << g_printNum.load() << std::endl; std::cout << "age in1: " << g_ageInSum.load() << std::endl; std::cout << "age out1: " << g_ageOutSum.load() << std::endl; std::cout << std::endl; }
关于自选锁,还有以下内容需要说明:
(1)应用层用spinlock的最大问题是不能跟kernel一样的关中断(cli/sti),假设并发稍微多点,线程1在lock之后unlock之前发生了时钟中断,
* 一段时间后才会被切回来调用unlock,那么这段时间中另一个调用lock的线程不就得空跑while了?这才是最浪费cpu时间的地方。
* 所以不能关中断就只能sleep了,怎么着都存在巨大的冲突代价。
(2)具体参考:https://www.zhihu.com/question/55764216