Linux TCP/IP 协议栈学习(3)—— Linux Socket (Part I)

Chapter 5: Linux Sockets 
Sockets provide a standard protocol-independent interface between the application-level programs and the TCP/IP stack. 
 
From the viewpoint of TCP/IP, everything above the transport layer is part of the application
 
The socket API is the best known networking interface for Unix application and network programming. 
 
 
One definition of the socket interface is that it is the interface between the transport layer protocols in the TCP/IP stack and all protocols above
 
the socket interface is also the interface between the kernel and the application layer for all network programming functions.
 
the socket interface is the only way that applications make use of the TCP/IP suite of protocols.  
 
Sockets have three fundamental purposes. They are used to transfer data, manage connections for TCP, and control or tune the operation of the TCP/IP stack. 
 
once the socket is open, generic I/O calls such as read and write can be used to move data through the open socket.
 
结构体 sock inet_sock sock_common 参见专门的笔记,这里给一个总体的介绍
 
struct sock - network layer representation of sockets
struct sock_common - minimal network layer representation of sockets
 
struct inet_sock - representation of INET sockets,When a sock structure instance is allocated from the slab, following the sock structure is the 
inet_sock, which contains a protocol information part for IPv6 and IPv4.
 
The socket structure is the general structure that holds control and states information for the socket layer.
 
/**
 *  struct socket - general BSD socket
 *  @state: socket state (%SS_CONNECTED, etc)
 *  @type: socket type (%SOCK_STREAM, etc)
 *  @flags: socket flags (%SOCK_ASYNC_NOSPACE, etc)
 *  @ops: protocol specific socket operations
 *  @fasync_list: Asynchronous wake up list
 *  @file: File back pointer for gc
 *  @sk: internal networking protocol agnostic socket representation
 *  @wait: wait queue for several uses
 */
struct socket {
       socket_state            state ;
 
       kmemcheck_bitfield_begin(type);
       short              type;
       kmemcheck_bitfield_end(type);
 
       unsigned long            flags;
       /*
       * Please keep fasync_list & wait fields in the same cache line
       */
       struct fasync_struct    *fasync_list ;
       wait_queue_head_t wait;
 
       struct file       *file;
       struct sock       *sk;
/*Ops points to the protocol-specific operations for the socket*/
       const struct proto_ops   *ops ;
};
 
 
proto_ops structure contains the family type for this particular set of socket operations. For IPv4, it will be set to AF_INET.  
 
struct proto_ops {
       int          family;
       struct module     * owner;
       int          (*release )   (struct socket *sock );
       int          (*bind )          ( struct socket * sock,
                             struct sockaddr * myaddr,
                             int sockaddr_len);
       int          (*connect )   (struct socket *sock ,
                             struct sockaddr * vaddr,
                             int sockaddr_len, int flags);
       int          (*socketpair )(struct socket *sock1 ,
                             struct socket * sock2);
       int          (*accept )    (struct socket *sock ,
                             struct socket * newsock, int flags);
       int          (*getname )   (struct socket *sock ,
                             struct sockaddr * addr,
                             int *sockaddr_len , int peer );
       unsigned int      (*poll)          (struct file *file , struct socket *sock,
                             struct poll_table_struct * wait);
       int          (*ioctl )     (struct socket *sock , unsigned int cmd,
                             unsigned long arg );
#ifdef CONFIG_COMPAT
       int          (*compat_ioctl ) (struct socket *sock , unsigned int cmd,
                             unsigned long arg );
#endif
       int          (*listen )    (struct socket *sock , int len );
       int          (*shutdown )  (struct socket *sock , int flags );
       int          (*setsockopt )(struct socket *sock , int level ,
                             int optname, char __user * optval, unsigned int optlen);
       int          (*getsockopt )(struct socket *sock , int level ,
                             int optname, char __user * optval, int __user *optlen);
#ifdef CONFIG_COMPAT
       int          (*compat_setsockopt )(struct socket *sock , int level ,
                             int optname, char __user * optval, unsigned int optlen);
       int          (*compat_getsockopt )(struct socket *sock , int level ,
                             int optname, char __user * optval, int __user *optlen);
#endif
       int          (*sendmsg )   (struct kiocb *iocb , struct socket *sock ,
                             struct msghdr * m, size_t total_len);
       int          (*recvmsg )   (struct kiocb *iocb , struct socket *sock ,
                             struct msghdr * m, size_t total_len,
                             int flags);
       int          (*mmap )          ( struct file * file, struct socket *sock,
                             struct vm_area_struct * vma);
       ssize_t            (*sendpage )  (struct socket *sock , struct page *page,
                             int offset, size_t size, int flags);
       ssize_t      (*splice_read )(struct socket *sock  loff_t * ppos,
                              struct pipe_inode_info * pipe, size_t len , unsigned int flags );
};
 
Socket Layer Initialization :
 
AF_INET is registered during kernel initialization, and the internal hooks that connect the AF_INET family with the TCP/IP protocol suite are done during socket initialization.
 
 
static int __init sock_init(void)
{
       /*
       *      Initialize sock SLAB cache.
       */
 
       sk_init();
 
       /*
       *      Initialize skbuff SLAB cache
       */
       skb_init();
 
       /*
       *      Initialize the protocols module.
       */
/*
we build the pseudo-file system for sockets, and the first step is to set up the socket inode 
cache. Linux, like other Unix operating systems, uses the inode as the basic unit for filesystem
implementation.
*/
       init_inodecache();
       register_filesystem(&sock_fs_type );
       sock_mnt = kern_mount (&sock_fs_type );
 
       /* The real protocol initialization is performed in later initcalls.
       */
 
#ifdef CONFIG_NETFILTER
       netfilter_init();
#endif
 
       return 0;
}
 
Family Values and the Protocol Switch Table :
 
the socket layer is used to interface with multiple protocol families and multiple protocols within a protocol family. 
 
After incoming packets are processed by the protocol stack, they eventually are passed up to the socket layer to be handed off to an application layer program. The socket layer must determine which socket should receive the packet, even though there may be multiple sockets open over different protocols. This is called socket de-multiplexing, and the protocol switch table is the core mechanism.
 
Figure 5.1 illustrates the registration process. It shows how the inet_protosw structure is initialized with proto and proto_ops structures for TCP/IP, the AF_INET family
 
 
Each of the registered protocols is kept in a table called the protocol switch table. Each entry in the table is an instance of the inet_protosw,The registration function, inet_register_protosw, puts the protocol described by the argument p into the protocol switch tableThe unregistration function, inet_unregister_protowsw, removes a protocol described by the argument p from the protocol switch table.
 
Each protocol instance in the protocol switch table is an instance of the inet_protosw structure, defined in file linux/include/protocol.h.
/* This is used to register socket interfaces for IP protocols.  */
struct inet_protosw {
       struct list_head list;
 
        /* These two fields form the lookup key.  */
       unsigned short     type;   /* This is the 2nd argument to socket(2). */
/*
This is the protocol number for the protocol that is being registered.
*/
       unsigned short     protocol; /* This is the L4 protocol number.  */
/*
The field prot points to the protocol block structure. This structure is used when a socket is
created. This structure is used to build an interface to any protocol that supports a socket
interface. The next field, ops, points to a protocol-specific set of operation functions for this
protocol.
*/
       struct proto      *prot;
       const struct proto_ops *ops ;
 
       char             no_check;   /* checksum on rcv/xmit/none? */
/*
If flags is set to INET_PROTOSW_PERMANENT, the protocol is permanent and can’t be
unregistered.
*/
       unsigned char      flags;      /* See INET_PROTOSW_* below.  */
};
 
 
The permanent protocols in IPv4 are registered by the function inet_init
具体参见 inet_init() 函数 笔记
 
static int __init inet_init(void)
. . .
/* The code actually registers the protocols after they have been placed into an array. */
    for (r = &inetsw [0]; r < &inetsw[SOCK_MAX ]; ++r)
        INIT_LIST_HEAD(r);
 
    for (q = inetsw_array ; q < &inetsw_array[ INETSW_ARRAY_LEN]; ++q)
        inet_register_protosw(q);
. . .
}  
 
The protocols in the array are UDP, TCP, and raw. The values for each protocol is initialized into the inet_protosw structure at compile time as shown here. 
static struct inet_protosw inetsw_array [] =
        { 
/* The first protocol is TCP, so type is SOCK_STREAM and flags is set to permanent. */
        { 
                      type:           SOCK_STREAM ,
                protocol:       IPPROTO_TCP ,
                prot:           &tcp_prot ,
                ops:            &inet_stream_ops ,
                capability:     -1 ,
                no_check:       0,
                flags:          INET_PROTOSW_PERMANENT ,
        } ,
        { 
The second protocol is UDP, so type is SOCK_DGRAM and flags is also set to permanent
          { 
              type:           SOCK_DGRAM ,
                protocol:       IPPROTO_UDP ,
                prot:           &udp_prot ,
                ops:            &inet_dgram_ops ,
                capability:     -1 ,
                no_check:       UDP_CSUM_DEFAULT ,
                flags:          INET_PROTOSW_PERMANENT ,
               } ,
       { 
The third protocol is "raw" , so type is SOCK_RAW and flags is also set to reuse. Notice the
protocol value is IPPROTO_IP, which is zero , and indicates the "wild card," which means that a
raw socket can actually be used to set options in any protocol in the IF_INET family. This
corresponds to the fact that the protocol field is typically set to zero for a raw socket
              { 
 
        type:                  SOCK_RAW ,
               protocol:       IPPROTO_IP ,/* wild card */
               prot:           &raw_prot ,
               ops:            &inet_dgram_ops ,
               capability:     CAP_NET_RAW,
               no_check:       UDP_CSUM_DEFAULT ,
               flags:              INET_PROTOSW_REUSE ,
                } 
} ;
 
The socket layer family registration facility provides two functions and one key data structure
 
The first function, sock_register , registers the protocol family with the socket layer
 
int sock_register(struct net_proto_family *fam); 
 
static const struct net_proto_family * net_families[NPROTO] __read_mostly;
 
struct net_proto_family {
       int          family;
       int          (*create )(struct net *net , struct socket *sock ,
                         int protocol, int kern);
       struct module     * owner;
};
 
posted @ 2012-12-31 18:35  KingsLanding  阅读(1852)  评论(0编辑  收藏  举报