Linux RCU Synchronization Mechanism


In this post, we will get to know the insight about Linux Synchronization mechanism – RCU(Read Copy Update).
linusgeek.blogspot.com

RCU (Read Copy Update ) is one of the synchronization mechanism found under Linux for both User Space and Kernel Space. In this blog, I’ll be explaining about Kernel Space RCU implementation with a sample code.

Read -Copy- Update is used as a synchronization mechanism when performance is pivotal for a system. Using RCU performance of "Read" operation can be made significantly more compared with other synchronization mechanisms like a semaphore.  As the name says Read-the list, Copy-the node, Update-update the list for writing into the linked list. Then what about read operation ?. Using RCU for reading a linked list is almost lock-free. In non-preempt Kernel, it is almost equal to nops. 

Below is the sample that explains RCU implementation using the miscellaneous driver and is compiled under GPL license.

#include <linux/module.h>
#include <linux/init.h> 
#include <linux/kernel.h>
#include <linux/miscdevice.h>
#include <linux/version.h>
#include <linux/kthread.h> 
#include <linux/list.h>
#include <linux/rculist.h>
#include <linux/spinlock.h>
#include <linux/preempt.h>
#include <linux/slab.h>

/** Structure to create a Linked List to store Employe DataBase */
typedef struct EmployDB {
    char empName[80];
    uint64_t empId;
    struct list_head node;
    struct rcu_head rcu;
}EmployDB_t;

static LIST_HEAD(EmployDBHeadNode);
static spinlock_t writeSpinLock;
static int empNum = 0;

/** Prototype declaration*/
static void addEmploy(char *pEmpName);
static void freeCallback(struct rcu_head *rcu);
static void deleteEmployNode(char *pEmpName, int syncFree);
static void displayEmployDB();
static void updateEmployInfo(char *pEmpName, char *pEmpNewName, int syncFree);

static struct miscdevice misc_struct={
    //Minor number is given, if we give it should be unique and not taken by any module
    .minor = MISC_DYNAMIC_MINOR,
    .name = "Trial_Driver",
};

static void addEmploy(char *pEmpName)
{
    EmployDB_t *newEmploy ;

    newEmploy = kzalloc(sizeof(EmployDB_t), GFP_KERNEL);
    if(!newEmploy)
    {
        return;
    }

    strcpy(newEmploy->empName, pEmpName);
    newEmploy->empId = empNum++;

    spin_lock(&writeSpinLock);
    list_add_rcu(&newEmploy->node, &EmployDBHeadNode);
    spin_unlock(&writeSpinLock);
}

/** Callback function for async reclaim */
static void freeCallback(struct rcu_head *rcu)
{
    EmployDB_t *empNode  = container_of(rcu, struct  EmployDB, rcu);
    kfree(empNode);
}

static void deleteEmployNode(char *pEmpName, int syncFree)
{
    EmployDB_t *empNode = NULL;
 
    spin_lock(&writeSpinLock);
    list_for_each_entry(empNode, &EmployDBHeadNode, node) {

        if(!strcmp(pEmpName, empNode->empName))
        {
            list_del_rcu(&empNode->node);
            spin_unlock(&writeSpinLock);

            if(syncFree) {
               printk("Calling synchronize_rcu.....\n");
                /** wait for all RCU read-side critical sections to complete*/
                synchronize_rcu();
                kfree(empNode);
            }
            else
            {
            call_rcu(&empNode->rcu, freeCallback);
            }
           return;
        }
    }
    spin_unlock(&writeSpinLock);
}

static void displayEmployDB()
{
    EmployDB_t *empNode = NULL;

    rcu_read_lock();

    list_for_each_entry_rcu(empNode, &EmployDBHeadNode, node) {
        printk(" Employ Name: {%s}\n", empNode->empName);
        pr_info(" Employ ID: {%d}\n", empNode->empId);
    }
    rcu_read_unlock();
}

/** Update a Employ information*/
static void updateEmployInfo(char *pEmpName, char *pEmpNewName, int syncFree)
{
    EmployDB_t *empNode = NULL;
    EmployDB_t *empOldNode = NULL;
    EmployDB_t *empNewNode = NULL;
   
    rcu_read_lock();
    list_for_each_entry(empNode, &EmployDBHeadNode, node) {

     if(!strcmp(empNode->empName, pEmpName))
     {
            empOldNode = empNode;
            break;
        }
    }

    if(!empOldNode)
        return;

    empNewNode = kzalloc(sizeof(EmployDB_t), GFP_ATOMIC);
    if(!empNewNode)
    {
        return;
    }

    memcpy(empNewNode, empOldNode, sizeof(EmployDB_t));
    strcpy(empNewNode->empName, pEmpNewName);
    spin_lock(&writeSpinLock);
    list_replace_rcu(&empOldNode->node, &empNewNode->node );
    spin_unlock(&writeSpinLock);
    rcu_read_unlock();

    if(syncFree) {
        synchronize_rcu();
        kfree(empOldNode);
    }
    else
    {
        call_rcu(&empOldNode->rcu, freeCallback);     
    }
}

/*Initialization function */
static int __init trialInit(void)
{
    int result = 0;

    printk(KERN_INFO "In function intit module inserted\n");

    //0 - Success, -ve  on failure
    result = misc_register(&misc_struct);
    if(result < 0)
        printk(KERN_ALERT "misc_register Failed..?");
    else
        printk(KERN_INFO " device Registered with minor number = %i \n", misc_struct.minor);

   //initialize spin lock
    spin_lock_init(&writeSpinLock);

    printk("\nAdding Employee's to DataBase:\n");
    //Initialize for Trident array
    addEmploy("Employ1");
    addEmploy("Employ2");
    addEmploy("Employ3");
    addEmploy("Employ4");

    printk("Displaying Employ list:\n");
    //Display List
    displayEmployDB();

    printk("Deleting Employ from DB:\n");
    //Delete a Employ node
    deleteEmployNode("Employ3", 1);

    printk("Displaying Employ list:\n");
     //Display List
    displayEmployDB();

    printk("Updating Employ Name:\n");
    //Update a employ name
    updateEmployInfo("Employ1", "Employ3", 1);

    printk("Displaying Employ list:\n");
     //Display List
    displayEmployDB();
 
    return result;
}

static void __exit trialExit(void)
{
    misc_deregister(&misc_struct);
    printk("\nDevice Unregistered\n");
}

module_init(trialInit);
module_exit(trialExit);

MODULE_AUTHOR("linusgeek.blogspot.com");
MODULE_DESCRIPTION("Trial driver for RCU");
MODULE_LICENSE("GPL");

Makefile:
obj-m += rcu_example.o

all:
    make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules

clean:
    make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean

In this sample module, RCU synchronization mechanism is used to protect the Employee Database.
The list has to be protected for a concurrent write operation and read operation can be done concurrently. 

"trialInit" is the entry point into the miscellaneous driver. misc_register API is used to register the kernel module. Then the spinlock is initialized and is used to protect the Linked List against concurrent write to the list.  Before Reading a List rcu lock(rcu_read_lock) is taken to avoid data corruption and unlocked after completing read operation. rcu_read_lock and rcu_read_unlock
are the lightweight operation, which improves the performance compare to the conventional synchronization technique.

During update/deletion operation there will be 2 versions of Linked List exist. 1st version (old list) is used by pre-existing reader's and 2nd version(updated version) is used by the new Reader's.
When updating to the linked list is complete then the node can be freed once the wait-for read-side critical section is complete. Wait-for read-side critical section is the code that is protected by rcu_read_lock and rcu_read_unlock.

This freeing up the removed/updated node can be performed in two ways either synchronously using sync_rcu() or asynchronously using call_rcu(), which calls registered callback function. 

This module can be inserted and run like:
         insmod rcu_example.ko
         dmesg
         rmmod rcu_example

Throughout this blog, we went through the RCU synchronization technique. In the next blog, we'll be going through  Kernel side implementation of RCU methods. Till then goodbye

Comments

Popular posts from this blog

PostgreSQL Write-Ahead-Logging(WAL) Archiving Functionality

RCU Kernel Implementation

Database Emergency Exit from unforeseen Disasters