netlink and send() lockup on NGW100

Go To Last Post
2 posts / 0 new
Author
Message
#1
  • 1
  • 2
  • 3
  • 4
  • 5
Total votes: 0

I have the following code (extracted from the avahi package) that exits fine on my Ubuntu dev box, but generates a soft lockup on my NGW100 target board.

Here's the NGW100 output ...

~ # netlink
BUG: soft lockup - CPU#0 stuck for 61s! [netlink:285]

The code appears to hang at the send() function call.

I've tried *many* different buildroot setups (the Atmel 2.1.0, 2.2.0-rc4 and -rc5, the latest buildroot cvs), as well as both uClibc 0.9.29 and the snapshot release, and they all appear to hang at the send() function.

Can someone else try this code and post their results ?

I simple compiled as follows ...

avr32-linux-gcc netlink.c -o netlink

... and copied the binary to the target.

Have I got something wrong ? Or is this a bug in the kernel or uclibc ?

Here's the code itself ...

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

#include 
#include 
#include 

#include 
#include 

int main(void) {
    int fd = -1;
    struct sockaddr_nl addr;
    struct nlmsghdr *n;
    struct rtgenmsg *gen;
    uint8_t req[1024];
    const int on = 1;

    if ((fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE)) < 0) {
        printf("Error : socket(PF_NETLINK): %s", strerror(errno));
        return -1;
    }
    
    memset(&addr, 0, sizeof(addr));
    addr.nl_family = AF_NETLINK;
    addr.nl_groups = RTMGRP_LINK|RTMGRP_IPV4_IFADDR|RTMGRP_IPV6_IFADDR;
    addr.nl_pid = getpid();

    if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
        printf("Error : bind(): %s", strerror(errno));
        return -1;
    }

    if (setsockopt(fd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on)) < 0) {
        printf("Error : SO_PASSCRED: %s", strerror(errno));
        return -1;
    }

    /* Issue a wild dump NETLINK request */
    
    memset(&req, 0, sizeof(req));
    n = (struct nlmsghdr*) req;
    n->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
    n->nlmsg_type = RTM_GETLINK;
    n->nlmsg_flags = NLM_F_ROOT|NLM_F_REQUEST;
    n->nlmsg_pid = 0;

    n->nlmsg_seq++;
    n->nlmsg_flags |= NLM_F_ACK;

    if (send(fd, n, n->nlmsg_len, 0) < 0) {
        printf("Error : send(): %s", strerror(errno));
        return -1;
    }

    return 0;
}

I have debugged it as best I can using gdb ... here is the output ...

mpfj@mpfj-ubuntu:/usr/local/dev/avr32/avahi/test$ avr32-linux-gdb netlink
GNU gdb 6.7.1.atmel.1.0.3
Copyright (C) 2007 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later 
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "--host=i386-pc-linux-gnu --target=avr32-linux-uclibc"...
(gdb) set solib-absolute-prefix /usr/local/dev/avr32/buildroot/build_avr32/staging_dir/
Reading symbols from /usr/local/dev/avr32/buildroot-cvs/build_avr32/staging_dir/lib/libc.so.0...done.
Loaded symbols for /usr/local/dev/avr32/buildroot/build_avr32/staging_dir/lib/libc.so.0
Reading symbols from /usr/local/dev/avr32/buildroot-cvs/build_avr32/staging_dir/lib/ld-uClibc.so.0...done.
Loaded symbols for /usr/local/dev/avr32/buildroot/build_avr32/staging_dir/lib/ld-uClibc.so.0
Remote debugging using 10.0.0.103:1024
0x2aaab884 in _start () from /usr/local/dev/avr32/buildroot/build_avr32/staging_dir/lib/ld-uClibc.so.0
(gdb) list
28	    }
29	    
30	    memset(&addr, 0, sizeof(addr));
31	    addr.nl_family = AF_NETLINK;
32	    addr.nl_groups = RTMGRP_LINK|RTMGRP_IPV4_IFADDR|RTMGRP_IPV6_IFADDR;
33	    addr.nl_pid = getpid();
34	
35	    if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
36	        printf("Error : bind(): %s", strerror(errno));
37	        return -1;
(gdb) break main
Note: breakpoint 1 also set at pc 0x149e.
Breakpoint 2 at 0x149e: file netlink.c, line 18.
(gdb) cont
Continuing.

Breakpoint 1, main () at netlink.c:18
18	    int fd = -1;
(gdb) s
23	    const int on = 1;
(gdb) 
25	    if ((fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE)) < 0) {
(gdb) 
*__GI_socket (family=16, type=2, protocol=0) at libc/inet/socketcalls.c:362
362	_syscall3(int, socket, int, family, int, type, int, protocol);
(gdb) 
main () at netlink.c:30
30	    memset(&addr, 0, sizeof(addr));
(gdb) 
31	    addr.nl_family = AF_NETLINK;
(gdb) 
32	    addr.nl_groups = RTMGRP_LINK|RTMGRP_IPV4_IFADDR|RTMGRP_IPV6_IFADDR;
(gdb) 
33	    addr.nl_pid = getpid();
(gdb) 
__libc_getpid () at libc/sysdeps/linux/common/getpid.c:18
18	_syscall0(pid_t, __libc_getpid);
(gdb) 
main () at netlink.c:35
35	    if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
(gdb) 
*__GI_bind (sockfd=3, myaddr=0x7ff38e24, addrlen=12) at libc/inet/socketcalls.c:61
61	_syscall3(int, bind, int, sockfd, const struct sockaddr *, myaddr, socklen_t, addrlen);
(gdb) 
main () at netlink.c:40
40	    if (setsockopt(fd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on)) < 0) {
(gdb) 
*__GI_setsockopt (fd=3, level=1, optname=16, optval=0x7ff38a20, optlen=4) at libc/inet/socketcalls.c:324
324	_syscall5(int, setsockopt, int, fd, int, level, int, optname, const void *, optval, socklen_t, optlen);
(gdb) 
main () at netlink.c:47
47	    memset(&req, 0, sizeof(req));
(gdb) 
48	    n = (struct nlmsghdr*) req;
(gdb) 
49	    n->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
(gdb) 
50	    n->nlmsg_type = RTM_GETLINK;
(gdb) 
51	    n->nlmsg_flags = NLM_F_ROOT|NLM_F_REQUEST;
(gdb) 
52	    n->nlmsg_pid = 0;
(gdb) 
54	    n->nlmsg_seq++;
(gdb) 
55	    n->nlmsg_flags |= NLM_F_ACK;
(gdb) 
57	    if (send(fd, n, n->nlmsg_len, 0) < 0) {
(gdb) 
__libc_send (sockfd=3, buffer=0x7ff38a24, len=17, flags=0) at libc/inet/socketcalls.c:248
248	_syscall4(ssize_t, __libc_send, int, sockfd, const void *, buffer, size_t, len, int, flags);
(gdb) 
  • 1
  • 2
  • 3
  • 4
  • 5
Total votes: 0

This issue is *finally* fixed.

I needed to update the kernel from 2.5.25.10 to 2.6.26.

I'm not sure what the exact issue was, but that solves it for me.