[Opendnssec-develop] Locking mechanism problems

Jerry Lundström jerry at opendnssec.org
Thu Sep 29 07:45:25 UTC 2011


Hi,

I got this assertion error from Patrik Wallström yesterday and been looking
into it some:

ods-signerd: ../../../signer/src/daemon/cmdhandler.c:322:
cmdhandler_handle_cmd_sign: assertion zone->task failed

>From what I can see the problem is that zonelist_update() may leave zone
objects with a zone->task set to NULL before engine_update_zones() can
assign a task to them. This can happen inside cmdhandler_handle_cmd_update()
because it releases the lock on the zonelist before running
engine_update_zones():

1. cmdhandler_handle_cmd_update : lock zonelist
2. cmdhandler_handle_cmd_update : zonelist_update()
3. cmdhandler_handle_cmd_update : unlock zonelist
4. cmdhandler_handle_cmd_update : engine_update_zones()
5. engine_update_zones : lock zonelist
6. engine_update_zones : update zones
7. engine_update_zones : unlock zonelist

Some other thread might try and lock the zonelist at step 2 and then it
would get that lock before engine_update_zones() can do its part.

A quick fix to this is to make engine_update_zones() aware of the lock on
zonelist so it doesn't try and lock it and release it after, suggested patch
below.

Altho I am seeing this almost everywhere and I would like to discuss our
approach. I would really like to see read and write locks implemented to
better support thread interoperability. Using
pthread_mutex_trylock()/pthread_mutex_timedlock() where it suites to counter
hangs. Using PTHREAD_MUTEX_RECURSIVE so different segments can lock the same
locked object without letting each other know providing better OO. Of course
as complexity in the locking mechanism increases so is the chance of dead
locks.

Why this hasn't turned up more might be that each zone is handled by one
thread, only time two threads might be working on the same zone is if you do
manually commands, and that the locking right now is very loose.

Thoughts?

/Jerry

Index: signer/src/daemon/cmdhandler.c
===================================================================
--- signer/src/daemon/cmdhandler.c (revision 5654)
+++ signer/src/daemon/cmdhandler.c (working copy)
@@ -190,11 +190,11 @@
             cmdc->engine->zonelist->just_removed = 0;
             cmdc->engine->zonelist->just_added = 0;
             cmdc->engine->zonelist->just_updated = 0;
-            lock_basic_unlock(&cmdc->engine->zonelist->zl_lock);
 
             ods_log_debug("[%s] commit zone list changes", cmdh_str);
-            engine_update_zones(cmdc->engine);
+            engine_update_zones(cmdc->engine, 1);
             ods_log_debug("[%s] signer configurations updated", cmdh_str);
+            lock_basic_unlock(&cmdc->engine->zonelist->zl_lock);
         } else {
             lock_basic_unlock(&cmdc->engine->zonelist->zl_lock);
             (void)snprintf(buf, ODS_SE_MAXLINE, "Zone list has errors.\n");
Index: signer/src/daemon/engine.c
===================================================================
--- signer/src/daemon/engine.c (revision 5654)
+++ signer/src/daemon/engine.c (working copy)
@@ -811,7 +811,7 @@
  *
  */
 void
-engine_update_zones(engine_type* engine)
+engine_update_zones(engine_type* engine, int zonelist_is_locked)
 {
     ldns_rbnode_t* node = LDNS_RBTREE_NULL;
     zone_type* zone = NULL;
@@ -833,8 +833,10 @@
     now = time_now();
     reload_zonefetcher(engine);
 
-    lock_basic_lock(&engine->zonelist->zl_lock);
-    /* [LOCK] zonelist */
+    if (!zonelist_is_locked) {
+    lock_basic_lock(&engine->zonelist->zl_lock);
+        /* [LOCK] zonelist */
+    }
     node = ldns_rbtree_first(engine->zonelist->zones);
     while (node && node != LDNS_RBTREE_NULL) {
         zone = (zone_type*) node->data;
@@ -929,8 +931,10 @@
         }
         node = ldns_rbtree_next(node);
     }
-    /* [UNLOCK] zonelist */
-    lock_basic_unlock(&engine->zonelist->zl_lock);
+    if (!zonelist_is_locked) {
+ /* [UNLOCK] zonelist */
+ lock_basic_unlock(&engine->zonelist->zl_lock);
+    }
     if (wake_up) {
         engine_wakeup_workers(engine);
     }
@@ -1097,7 +1101,7 @@
         /* update zones */
         if (zl_changed == ODS_STATUS_OK) {
             ods_log_debug("[%s] commit zone list changes", engine_str);
-            engine_update_zones(engine);
+            engine_update_zones(engine, 0);
             ods_log_debug("[%s] signer configurations updated",
engine_str);
             zl_changed = ODS_STATUS_UNCHANGED;
         }
Index: signer/src/daemon/engine.h
===================================================================
--- signer/src/daemon/engine.h (revision 5654)
+++ signer/src/daemon/engine.h (working copy)
@@ -101,7 +101,7 @@
  * \param[in] engine engine
  *
  */
-void engine_update_zones(engine_type* engine);
+void engine_update_zones(engine_type* engine, int zonelist_is_locked);
 
 /**
  * Clean up engine.



-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.opendnssec.org/pipermail/opendnssec-develop/attachments/20110929/22383c20/attachment.htm>


More information about the Opendnssec-develop mailing list