summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarsten Haitzler (Rasterman) <raster@rasterman.com>2019-12-03 09:18:35 +0000
committerCarsten Haitzler (Rasterman) <raster@rasterman.com>2019-12-03 11:45:42 +0000
commit7787b16e20bef12967b8a6bd74d6d8e6516a8066 (patch)
tree39268ded319a693ca433d0d0efea0edb0ed67dce
parent9e5455619546473341aff4b2df8129b0c599ebbd (diff)
eina - threadqueue - revert series of comments that moved to mempools
Revert "eina: remove no longer used function _eina_thread_queue_msg_block_real_free" This reverts commit 695b44526c968787374fd421327422a6eea710a7. Revert "eina/threadqueue: use mempool_del for hash free function" This reverts commit b0cb3b935a8faf2d67bae38a54683946cb01d0b9. Revert "eina_thread_queue: use normal mempools for block allocation" This reverts commit 14ae3e3dec7866e74f2990dca417eac44da41058. Why? Threadqueue is a highly performance sensitive API. _eina_thread_queue_msg_block_new() may be called quite often. Doing a hash lookup to then find a mempool handle to then allocate from was not the same as what was there and was going to be far more costly. This would have actual performance impact as we have to compute a hash and rummage through a hash, hunt for an environment var too. The original code looked at a spare block pool where blocks *MAY* be of different sizes (not always the same size so using a mempool is actually wrong and will stop threadqueue from being able to send larger messages at all). If you send large messages, larger blocks would have been allocated and put in this pool. In almost all cases the first item in the pool would be big enough so we don't hunt and the find pulls out the first memory, resets the fields that are needed and returns that block. If it needs a bigger one, it does hunt. This is going to be rare that such big blocks are needed so I never tried to optimize this (but it could be done with an array of sizes to make a walk to find the right sized element cheap if the need arises). Performance dropped quite a lot. On aarch64 The above mempool usage dropped message rate from 1037251 msg/sec to 610316. On x86 it was even worse. It dropped from 2815775 msg/sec to 378653. So backing this out sees the message rate is 7.4 times faster and on aarch64 it's 1.7 times faster. So moving to a mempool was actually just wrong (size is not always the same). Also this ended up with a mempool of 64k for thread queue blocks even if we only sent messages sporadically, as opposed to a single 4kb block. So backing this out saves memory by only having 1 or 2 4k blocks around most of the time, not a 64k mempool. So the above patch then follow-on patches were done without accounting for the performance implications. There were good reasons to do what I did - because this code was highly tuned even to the point where I used atomics instead of locks specifically to cut down some contention overhead. Beware when you change something that there may be steep performance implications. 7.4 times faster to go back to what was there is a great example.
-rw-r--r--src/lib/eina/eina_thread_queue.c103
1 files changed, 76 insertions, 27 deletions
diff --git a/src/lib/eina/eina_thread_queue.c b/src/lib/eina/eina_thread_queue.c
index d6ba62d..531800d 100644
--- a/src/lib/eina/eina_thread_queue.c
+++ b/src/lib/eina/eina_thread_queue.c
@@ -74,7 +74,9 @@ struct _Eina_Thread_Queue_Msg_Block
74// avoid reallocation via malloc/free etc. to avoid free memory pages and 74// avoid reallocation via malloc/free etc. to avoid free memory pages and
75// pressure on the malloc subsystem 75// pressure on the malloc subsystem
76static int _eina_thread_queue_log_dom = -1; 76static int _eina_thread_queue_log_dom = -1;
77static int _eina_thread_queue_block_pool_count = 0;
77static Eina_Spinlock _eina_thread_queue_block_pool_lock; 78static Eina_Spinlock _eina_thread_queue_block_pool_lock;
79static Eina_Thread_Queue_Msg_Block *_eina_thread_queue_block_pool = NULL;
78 80
79#ifdef ERR 81#ifdef ERR
80# undef ERR 82# undef ERR
@@ -86,51 +88,57 @@ static Eina_Spinlock _eina_thread_queue_block_pool_lock;
86#endif 88#endif
87#define DBG(...) EINA_LOG_DOM_DBG(_eina_thread_queue_log_dom, __VA_ARGS__) 89#define DBG(...) EINA_LOG_DOM_DBG(_eina_thread_queue_log_dom, __VA_ARGS__)
88 90
89static Eina_Hash *mempools;
90
91// api's to get message blocks from the pool or put them back in 91// api's to get message blocks from the pool or put them back in
92static Eina_Thread_Queue_Msg_Block * 92static Eina_Thread_Queue_Msg_Block *
93_eina_thread_queue_msg_block_new(int size) 93_eina_thread_queue_msg_block_new(int size)
94{ 94{
95 Eina_Thread_Queue_Msg_Block *blk; 95 Eina_Thread_Queue_Msg_Block *blk;
96 Eina_Mempool *mp;
97 size_t mp_size = sizeof(Eina_Thread_Queue_Msg_Block) - sizeof(Eina_Thread_Queue_Msg) + size;
98 96
99 eina_spinlock_take(&(_eina_thread_queue_block_pool_lock)); 97 eina_spinlock_take(&(_eina_thread_queue_block_pool_lock));
100 mp = eina_hash_find(mempools, &size); 98 if (_eina_thread_queue_block_pool)
101 if (!mp)
102 { 99 {
103 const char *choice = getenv("EINA_MEMPOOL"); 100 blk = _eina_thread_queue_block_pool;
104 if ((!choice) || (!choice[0])) 101 if (blk->size >= size)
105 choice = "chained_mempool"; 102 {
106 mp = eina_mempool_add(choice, "Eina_Thread_Queue_Msg_Block", NULL, mp_size, 16); 103 blk->first = 0;
107 eina_hash_add(mempools, &size, mp); 104 blk->last = 0;
105 blk->ref = 0;
106 blk->full = 0;
107 _eina_thread_queue_block_pool = blk->next;
108 blk->next = NULL;
109 _eina_thread_queue_block_pool_count--;
110 eina_spinlock_release(&(_eina_thread_queue_block_pool_lock));
111 return blk;
112 }
113 blk = NULL;
108 } 114 }
109 eina_spinlock_release(&(_eina_thread_queue_block_pool_lock)); 115 eina_spinlock_release(&(_eina_thread_queue_block_pool_lock));
110 116
111 blk = eina_mempool_calloc(mp, mp_size); 117 blk = malloc(sizeof(Eina_Thread_Queue_Msg_Block) -
118 sizeof(Eina_Thread_Queue_Msg) +
119 size);
112 if (!blk) 120 if (!blk)
113 { 121 {
114 ERR("Thread queue block buffer of size %i allocation failed", size); 122 ERR("Thread queue block buffer of size %i allocation failed", size);
115 return NULL; 123 return NULL;
116 } 124 }
125 blk->next = NULL;
117#ifndef ATOMIC 126#ifndef ATOMIC
118 eina_spinlock_new(&(blk->lock_ref)); 127 eina_spinlock_new(&(blk->lock_ref));
119 eina_spinlock_new(&(blk->lock_first)); 128 eina_spinlock_new(&(blk->lock_first));
120#endif 129#endif
121 eina_lock_new(&(blk->lock_non_0_ref)); 130 eina_lock_new(&(blk->lock_non_0_ref));
122 blk->size = size; 131 blk->size = size;
132 blk->first = 0;
133 blk->last = 0;
134 blk->ref = 0;
135 blk->full = 0;
123 return blk; 136 return blk;
124} 137}
125 138
126static void 139static void
127_eina_thread_queue_msg_block_free(Eina_Thread_Queue_Msg_Block *blk) 140_eina_thread_queue_msg_block_real_free(Eina_Thread_Queue_Msg_Block *blk)
128{ 141{
129 Eina_Mempool *mp;
130
131 eina_spinlock_take(&(_eina_thread_queue_block_pool_lock));
132 mp = eina_hash_find(mempools, &blk->size);
133 eina_spinlock_release(&(_eina_thread_queue_block_pool_lock));
134 eina_lock_take(&(blk->lock_non_0_ref)); 142 eina_lock_take(&(blk->lock_non_0_ref));
135 eina_lock_release(&(blk->lock_non_0_ref)); 143 eina_lock_release(&(blk->lock_non_0_ref));
136 eina_lock_free(&(blk->lock_non_0_ref)); 144 eina_lock_free(&(blk->lock_non_0_ref));
@@ -142,7 +150,29 @@ _eina_thread_queue_msg_block_free(Eina_Thread_Queue_Msg_Block *blk)
142 eina_spinlock_release(&(blk->lock_first)); 150 eina_spinlock_release(&(blk->lock_first));
143 eina_spinlock_free(&(blk->lock_first)); 151 eina_spinlock_free(&(blk->lock_first));
144#endif 152#endif
145 eina_mempool_free(mp, blk); 153 free(blk);
154}
155
156static void
157_eina_thread_queue_msg_block_free(Eina_Thread_Queue_Msg_Block *blk)
158{
159 if (blk->size == MIN_SIZE)
160 {
161 eina_spinlock_take(&(_eina_thread_queue_block_pool_lock));
162 if (_eina_thread_queue_block_pool_count < 20)
163 {
164 _eina_thread_queue_block_pool_count++;
165 blk->next = _eina_thread_queue_block_pool;
166 _eina_thread_queue_block_pool = blk;
167 eina_spinlock_release(&(_eina_thread_queue_block_pool_lock));
168 }
169 else
170 {
171 eina_spinlock_release(&(_eina_thread_queue_block_pool_lock));
172 _eina_thread_queue_msg_block_real_free(blk);
173 }
174 }
175 else _eina_thread_queue_msg_block_real_free(blk);
146} 176}
147 177
148static Eina_Bool 178static Eina_Bool
@@ -154,6 +184,21 @@ _eina_thread_queue_msg_block_pool_init(void)
154static void 184static void
155_eina_thread_queue_msg_block_pool_shutdown(void) 185_eina_thread_queue_msg_block_pool_shutdown(void)
156{ 186{
187 eina_spinlock_take(&(_eina_thread_queue_block_pool_lock));
188 while (_eina_thread_queue_block_pool)
189 {
190 Eina_Thread_Queue_Msg_Block *blk, *blknext;
191
192 for (;;)
193 {
194 blk = _eina_thread_queue_block_pool;
195 if (!blk) break;
196 blknext = blk->next;
197 _eina_thread_queue_msg_block_real_free(blk);
198 _eina_thread_queue_block_pool = blknext;
199 }
200 }
201 eina_spinlock_release(&(_eina_thread_queue_block_pool_lock));
157 eina_spinlock_free(&_eina_thread_queue_block_pool_lock); 202 eina_spinlock_free(&_eina_thread_queue_block_pool_lock);
158} 203}
159 204
@@ -186,15 +231,19 @@ _eina_thread_queue_msg_alloc(Eina_Thread_Queue *thq, int size, Eina_Thread_Queue
186 size = ((size + 7) >> 3) << 3; 231 size = ((size + 7) >> 3) << 3;
187 if (!thq->data) 232 if (!thq->data)
188 { 233 {
189 size = MAX(size, MIN_SIZE); 234 if (size < MIN_SIZE)
190 thq->data = _eina_thread_queue_msg_block_new(size); 235 thq->data = _eina_thread_queue_msg_block_new(MIN_SIZE);
236 else
237 thq->data = _eina_thread_queue_msg_block_new(size);
191 thq->last = thq->data; 238 thq->last = thq->data;
192 } 239 }
193 blk = thq->last; 240 blk = thq->last;
194 if (blk->full) 241 if (blk->full)
195 { 242 {
196 size = MAX(size, MIN_SIZE); 243 if (size < MIN_SIZE)
197 blk->next = _eina_thread_queue_msg_block_new(size); 244 blk->next = _eina_thread_queue_msg_block_new(MIN_SIZE);
245 else
246 blk->next = _eina_thread_queue_msg_block_new(size);
198 blk = blk->next; 247 blk = blk->next;
199 thq->last = blk; 248 thq->last = blk;
200 } 249 }
@@ -206,8 +255,10 @@ _eina_thread_queue_msg_alloc(Eina_Thread_Queue *thq, int size, Eina_Thread_Queue
206 } 255 }
207 else 256 else
208 { 257 {
209 size = MAX(size, MIN_SIZE); 258 if (size < MIN_SIZE)
210 blk->next = _eina_thread_queue_msg_block_new(size); 259 blk->next = _eina_thread_queue_msg_block_new(MIN_SIZE);
260 else
261 blk->next = _eina_thread_queue_msg_block_new(size);
211 blk = blk->next; 262 blk = blk->next;
212 thq->last = blk; 263 thq->last = blk;
213 blk->last += size; 264 blk->last += size;
@@ -335,7 +386,6 @@ eina_thread_queue_init(void)
335 ERR("Cannot init thread queue block pool spinlock"); 386 ERR("Cannot init thread queue block pool spinlock");
336 return EINA_FALSE; 387 return EINA_FALSE;
337 } 388 }
338 mempools = eina_hash_int32_new((Eina_Free_Cb)eina_mempool_del);
339 return EINA_TRUE; 389 return EINA_TRUE;
340} 390}
341 391
@@ -344,7 +394,6 @@ eina_thread_queue_shutdown(void)
344{ 394{
345 _eina_thread_queue_msg_block_pool_shutdown(); 395 _eina_thread_queue_msg_block_pool_shutdown();
346 eina_log_domain_unregister(_eina_thread_queue_log_dom); 396 eina_log_domain_unregister(_eina_thread_queue_log_dom);
347 eina_hash_free(mempools);
348 return EINA_TRUE; 397 return EINA_TRUE;
349} 398}
350 399