summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGuillaume Friloux <guillaume.friloux@gmail.com>2014-02-10 16:01:40 +0100
committerGuillaume Friloux <guillaume.friloux@gmail.com>2014-02-10 16:01:40 +0100
commit42592e472a67e5ae5f399cb9ebc8abdf5e22a0cb (patch)
tree5470ae8375f679d8652fbc8158ea627ff1eb40ac
parent190d34beb4495ace579333ab664329c210eebda3 (diff)
Fix an infinite loop if elasticsearch goes crazy.
We had a case here where elasticsearch stopped answering to HTTP queries SMMAN was sending. This led SMMAN to a state where he had 1015 sockets opened to elasticsearch, and constantly trying to open logfiles as they had new logs in it, but unable to open them because we reached the 1024 nofile limit. So this patch change behavior : - Add a 10s timeout to ecore_con_url objects - Dont instantly stat() files right after processing if an error happenned at last try. This bug happenned twice here with a server sending over 1k logs in the same second.
-rw-r--r--src/lib/spy/spy_file.c8
-rw-r--r--src/lib/spy/spy_private.h1
-rw-r--r--src/lib/store/store_main.c1
3 files changed, 9 insertions, 1 deletions
diff --git a/src/lib/spy/spy_file.c b/src/lib/spy/spy_file.c
index 646115a..765f162 100644
--- a/src/lib/spy/spy_file.c
+++ b/src/lib/spy/spy_file.c
@@ -136,6 +136,7 @@ _spy_file_cb(void *data,
136 if (sf->read.fd == -1) 136 if (sf->read.fd == -1)
137 { 137 {
138 ERR("Failed to open %s : %s", sf->name, strerror(errno)); 138 ERR("Failed to open %s : %s", sf->name, strerror(errno));
139 sf->read.error = EINA_TRUE;
139 return; 140 return;
140 } 141 }
141 142
@@ -151,6 +152,7 @@ _spy_file_cb(void *data,
151 close(sf->read.fd); 152 close(sf->read.fd);
152 free(sf->read.databuf); 153 free(sf->read.databuf);
153 sf->read.databuf = NULL; 154 sf->read.databuf = NULL;
155 sf->read.error = EINA_TRUE;
154 return; 156 return;
155 } 157 }
156 158
@@ -183,7 +185,10 @@ _spy_file_end_cb(void *data,
183 185
184 sf->poll.running = EINA_FALSE; 186 sf->poll.running = EINA_FALSE;
185 187
186 ecore_job_add(_spy_file_job, sf); 188 if (!sf->read.error)
189 ecore_job_add(_spy_file_job, sf);
190 else
191 sf->read.error = EINA_FALSE;
187} 192}
188 193
189/** 194/**
@@ -254,6 +259,7 @@ spy_file_poll(void *data)
254 sf->read.length = toread; 259 sf->read.length = toread;
255 260
256 sf->poll.running = EINA_TRUE; 261 sf->poll.running = EINA_TRUE;
262 sf->read.error = EINA_FALSE;
257 et = ecore_thread_run(_spy_file_cb, 263 et = ecore_thread_run(_spy_file_cb,
258 _spy_file_end_cb, 264 _spy_file_end_cb,
259 _spy_file_cancel_cb, 265 _spy_file_cancel_cb,
diff --git a/src/lib/spy/spy_private.h b/src/lib/spy/spy_private.h
index 4851bc9..3258546 100644
--- a/src/lib/spy/spy_private.h
+++ b/src/lib/spy/spy_private.h
@@ -37,6 +37,7 @@ struct _Spy_File
37 Eina_Strbuf *buf; 37 Eina_Strbuf *buf;
38 char *databuf; 38 char *databuf;
39 ssize_t nbr; 39 ssize_t nbr;
40 Eina_Bool error : 1;
40 } read; 41 } read;
41 42
42 struct 43 struct
diff --git a/src/lib/store/store_main.c b/src/lib/store/store_main.c
index f1c964f..4f35cfc 100644
--- a/src/lib/store/store_main.c
+++ b/src/lib/store/store_main.c
@@ -95,6 +95,7 @@ store_add(Store *store,
95 sa->data.sent = strdup(buf); 95 sa->data.sent = strdup(buf);
96 DBG("store[%p] sa[%p] buf[%s]", store, sa, buf); 96 DBG("store[%p] sa[%p] buf[%s]", store, sa, buf);
97 ecore_con_url_data_set(sa->ec, sa); 97 ecore_con_url_data_set(sa->ec, sa);
98 ecore_con_url_timeout_set(sa->ec, 10.0);
98 r = ecore_con_url_post(sa->ec, buf, len, "text/json"); 99 r = ecore_con_url_post(sa->ec, buf, len, "text/json");
99 if (!r) 100 if (!r)
100 { 101 {