Linux RCU Synchronization Mechanism
In this post, we will get to know the insight about Linux Synchronization
mechanism – RCU(Read Copy Update).
Read -Copy- Update is used as a synchronization mechanism when performance is pivotal for a system. Using RCU performance of "Read" operation can be made significantly more compared with other synchronization mechanisms like a semaphore. As the name says Read-the list, Copy-the node, Update-update the list for writing into the linked list. Then what about read operation ?. Using RCU for reading a linked list is almost lock-free. In non-preempt Kernel, it is almost equal to nops.
Below is the sample that explains RCU implementation using the miscellaneous driver and is compiled under GPL license.
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/miscdevice.h>
#include <linux/version.h>
#include <linux/kthread.h>
#include <linux/list.h>
#include <linux/rculist.h>
#include <linux/spinlock.h>
#include <linux/preempt.h>
#include <linux/slab.h>
/** Structure to create a Linked List to store Employe DataBase */
typedef struct EmployDB {
char empName[80];
uint64_t empId;
struct list_head node;
struct rcu_head rcu;
}EmployDB_t;
static LIST_HEAD(EmployDBHeadNode);
static spinlock_t writeSpinLock;
static int empNum = 0;
/** Prototype declaration*/
static void addEmploy(char *pEmpName);
static void freeCallback(struct rcu_head *rcu);
static void deleteEmployNode(char *pEmpName, int syncFree);
static void displayEmployDB();
static void updateEmployInfo(char *pEmpName, char *pEmpNewName, int syncFree);
static struct miscdevice misc_struct={
//Minor number is given, if we give it should be unique and not taken by any module
.minor = MISC_DYNAMIC_MINOR,
.name = "Trial_Driver",
};
static void addEmploy(char *pEmpName)
{
EmployDB_t *newEmploy ;
newEmploy = kzalloc(sizeof(EmployDB_t), GFP_KERNEL);
if(!newEmploy)
{
return;
}
strcpy(newEmploy->empName, pEmpName);
newEmploy->empId = empNum++;
spin_lock(&writeSpinLock);
list_add_rcu(&newEmploy->node, &EmployDBHeadNode);
spin_unlock(&writeSpinLock);
}
/** Callback function for async reclaim */
static void freeCallback(struct rcu_head *rcu)
{
EmployDB_t *empNode = container_of(rcu, struct EmployDB, rcu);
kfree(empNode);
}
static void deleteEmployNode(char *pEmpName, int syncFree)
{
EmployDB_t *empNode = NULL;
spin_lock(&writeSpinLock);
list_for_each_entry(empNode, &EmployDBHeadNode, node) {
if(!strcmp(pEmpName, empNode->empName))
{
list_del_rcu(&empNode->node);
spin_unlock(&writeSpinLock);
if(syncFree) {
printk("Calling synchronize_rcu.....\n");
/** wait for all RCU read-side critical sections to complete*/
synchronize_rcu();
kfree(empNode);
}
else
{
call_rcu(&empNode->rcu, freeCallback);
}
return;
}
}
spin_unlock(&writeSpinLock);
}
static void displayEmployDB()
{
EmployDB_t *empNode = NULL;
rcu_read_lock();
list_for_each_entry_rcu(empNode, &EmployDBHeadNode, node) {
printk(" Employ Name: {%s}\n", empNode->empName);
pr_info(" Employ ID: {%d}\n", empNode->empId);
}
rcu_read_unlock();
}
/** Update a Employ information*/
static void updateEmployInfo(char *pEmpName, char *pEmpNewName, int syncFree)
{
EmployDB_t *empNode = NULL;
EmployDB_t *empOldNode = NULL;
EmployDB_t *empNewNode = NULL;
rcu_read_lock();
list_for_each_entry(empNode, &EmployDBHeadNode, node) {
if(!strcmp(empNode->empName, pEmpName))
{
empOldNode = empNode;
break;
}
}
if(!empOldNode)
return;
empNewNode = kzalloc(sizeof(EmployDB_t), GFP_ATOMIC);
if(!empNewNode)
{
return;
}
memcpy(empNewNode, empOldNode, sizeof(EmployDB_t));
strcpy(empNewNode->empName, pEmpNewName);
spin_lock(&writeSpinLock);
list_replace_rcu(&empOldNode->node, &empNewNode->node );
spin_unlock(&writeSpinLock);
rcu_read_unlock();
if(syncFree) {
synchronize_rcu();
kfree(empOldNode);
}
else
{
call_rcu(&empOldNode->rcu, freeCallback);
}
}
/*Initialization function */
static int __init trialInit(void)
{
int result = 0;
printk(KERN_INFO "In function intit module inserted\n");
//0 - Success, -ve on failure
result = misc_register(&misc_struct);
if(result < 0)
printk(KERN_ALERT "misc_register Failed..?");
else
printk(KERN_INFO " device Registered with minor number = %i \n", misc_struct.minor);
//initialize spin lock
spin_lock_init(&writeSpinLock);
printk("\nAdding Employee's to DataBase:\n");
//Initialize for Trident array
addEmploy("Employ1");
addEmploy("Employ2");
addEmploy("Employ3");
addEmploy("Employ4");
printk("Displaying Employ list:\n");
//Display List
displayEmployDB();
printk("Deleting Employ from DB:\n");
//Delete a Employ node
deleteEmployNode("Employ3", 1);
printk("Displaying Employ list:\n");
//Display List
displayEmployDB();
printk("Updating Employ Name:\n");
//Update a employ name
updateEmployInfo("Employ1", "Employ3", 1);
printk("Displaying Employ list:\n");
//Display List
displayEmployDB();
return result;
}
static void __exit trialExit(void)
{
misc_deregister(&misc_struct);
printk("\nDevice Unregistered\n");
}
module_init(trialInit);
module_exit(trialExit);
MODULE_AUTHOR("linusgeek.blogspot.com");
MODULE_DESCRIPTION("Trial driver for RCU");
MODULE_LICENSE("GPL");
Makefile:
obj-m += rcu_example.o
all:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
clean:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
In this sample module, RCU synchronization mechanism is used to protect the Employee Database.
The list has to be protected for a concurrent write operation and read operation can be done concurrently.
"trialInit" is the entry point into the miscellaneous driver. misc_register API is used to register the kernel module. Then the spinlock is initialized and is used to protect the Linked List against concurrent write to the list. Before Reading a List rcu lock(rcu_read_lock) is taken to avoid data corruption and unlocked after completing read operation. rcu_read_lock and rcu_read_unlock
are the lightweight operation, which improves the performance compare to the conventional synchronization technique.
During update/deletion operation there will be 2 versions of Linked List exist. 1st version (old list) is used by pre-existing reader's and 2nd version(updated version) is used by the new Reader's.
When updating to the linked list is complete then the node can be freed once the wait-for read-side critical section is complete. Wait-for read-side critical section is the code that is protected by rcu_read_lock and rcu_read_unlock.
This freeing up the removed/updated node can be performed in two ways either synchronously using sync_rcu() or asynchronously using call_rcu(), which calls registered callback function.
This module can be inserted and run like:
insmod rcu_example.ko
dmesg
rmmod rcu_example
Throughout this blog, we went through the RCU synchronization technique. In the next blog, we'll be going through Kernel side implementation of RCU methods. Till then goodbye
Comments
Post a Comment