linux提供了原始套接字RAW_SOCKET,可以抓取数据链路层的报文。这样可以对报文进行深入分析。今天介绍一下AF_PACKET的用法,分为两种方式。第一种方法是通过套接字,打开指定的网卡,然后使用recvmsg读取,实际过程需要需要将报文从内核区拷贝到用户区。第二种方法是使用packet_mmap,使用共享内存方式,在内核空间中分配一块内核缓冲区,然后用户空间程序调用mmap映射到用户空间。将接收到的skb拷贝到那块内核缓冲区中,这样用户空间的程序就可以直接读到捕获的数据包了。PACKET_MMAP减少了系统调用,不用recvmsg就可以读取到捕获的报文,相比原始套接字+recvfrom的方式,减少了一次拷贝和一次系统调用。libpcap就是采用第二种方式。suricata默认方式也是使用packet mmap抓包。
2、测试例子
为了方便测试,可以使用linux提供的sock_filter过滤ip地址。使用tcpdump可以反汇编出来过滤的条件。以www.qq.com为例进行说明:
ping www.qq.com得到ip地址:101.226.103.106
tcpdump ip -s 2048 -d host 101.226.103.106
(000) ldh [12]
(001) jeq #0x800 jt 2 jf 7
(002) ld [26]
(003) jeq #0x65e2676a jt 6 jf 4
(004) ld [30]
(005) jeq #0x65e2676a jt 6 jf 7
(006) ret #2048
(007) ret #0
tcpdump -dd host 101.226.103.106
{ 0x28, 0, 0, 0x0000000c },
{ 0x15, 0, 4, 0x00000800 },
{ 0x20, 0, 0, 0x0000001a },
{ 0x15, 8, 0, 0x65e2676a },
{ 0x20, 0, 0, 0x0000001e },
{ 0x15, 6, 7, 0x65e2676a },
{ 0x15, 1, 0, 0x00000806 },
{ 0x15, 0, 5, 0x00008035 },
{ 0x20, 0, 0, 0x0000001c },
{ 0x15, 2, 0, 0x65e2676a },
{ 0x20, 0, 0, 0x00000026 },
{ 0x15, 0, 1, 0x65e2676a },
{ 0x6, 0, 0, 0x0000ffff },
{ 0x6, 0, 0, 0x00000000 }
更新详细过滤细节可以参考:
http://blog.csdn.net/eqiang8271/article/details/8489769
(1)第一种方法:
1 #include <sys/types.h>
2 #include <sys/time.h>
3 #include <sys/ioctl.h>
4 #include <sys/socket.h>
5 #include <linux/types.h>
6 #include <netinet/in.h>
7 #include <netinet/udp.h>
8 #include <netinet/ip.h>
9 #include <netpacket/packet.h>
10 #include <net/ethernet.h>
11 #include <arpa/inet.h>
12 #include <string.h>
13 #include <signal.h>
14 #include <net/if.h>
15 #include <stdio.h>
16 #include <sys/uio.h>
17 #include <fcntl.h>
18 #include <unistd.h>
19 #include <linux/filter.h>
20 #include <stdlib.h>
22 #define ETH_HDR_LEN 14
23 #define IP_HDR_LEN 20
24 #define UDP_HDR_LEN 8
25 #define TCP_HDR_LEN 20
27 static int sock;
29 void sig_handler(int sig)
30 {
31 struct ifreq ethreq;
32 if(sig == SIGTERM)
33 printf("SIGTERM recieved, exiting.../n");
34 else if(sig == SIGINT)
35 printf("SIGINT recieved, exiting.../n");
36 else if(sig == SIGQUIT)
37 printf("SIGQUIT recieved, exiting.../n");
38 // turn off the PROMISCOUS mode
39 strncpy(ethreq.ifr_name, "eth1", IFNAMSIZ);
40 if(ioctl(sock, SIOCGIFFLAGS, ðreq) != -1) {
41 ethreq.ifr_flags &= ~IFF_PROMISC;
42 ioctl(sock, SIOCSIFFLAGS, ðreq);
43 }
44 close(sock);
45 exit(0);
46 }
48 int main(int argc, char ** argv) {
49 int n;
50 char buf[2048];
51 unsigned char *ethhead;
52 unsigned char *iphead;
53 struct ifreq ethreq;
54 struct sigaction sighandle;
56 #if 0
57 $tcpdump ip -s 2048 -d host 192.168.1.2
58 (000) ldh [12]
59 (001) jeq #0x800 jt 2 jf 7
60 (002) ld [26]
61 (003) jeq #0xc0a80102 jt 6 jf 4
62 (004) ld [30]
63 (005) jeq #0xc0a80102 jt 6 jf 7
64 (006) ret #2048
65 (007) ret #0
66 #endif
68 #if 0
69 测试访问www.qq.com
70 ping www.qq.com 得到ip地址为:101.226.103.106
71 tcpdump -dd host 101.226.103.106
72 { 0x28, 0, 0, 0x0000000c },
73 { 0x15, 0, 4, 0x00000800 },
74 { 0x20, 0, 0, 0x0000001a },
75 { 0x15, 8, 0, 0x65e2676a },
76 { 0x20, 0, 0, 0x0000001e },
77 { 0x15, 6, 7, 0x65e2676a },
78 { 0x15, 1, 0, 0x00000806 },
79 { 0x15, 0, 5, 0x00008035 },
80 { 0x20, 0, 0, 0x0000001c },
81 { 0x15, 2, 0, 0x65e2676a },
82 { 0x20, 0, 0, 0x00000026 },
83 { 0x15, 0, 1, 0x65e2676a },
84 { 0x6, 0, 0, 0x0000ffff },
85 { 0x6, 0, 0, 0x00000000 },
86 #endif
87 struct sock_filter bpf_code[] = {
88 { 0x28, 0, 0, 0x0000000c },
89 { 0x15, 0, 5, 0x00000800 },
90 { 0x20, 0, 0, 0x0000001a },
91 { 0x15, 2, 0, 0x65e2676a },
92 { 0x20, 0, 0, 0x00000026 },
93 { 0x15, 0, 1, 0x65e2676a },
94 { 0x6, 0, 0, 0x0000ffff },
95 { 0x6, 0, 0, 0x00000000 }
96 };
98 struct sock_fprog filter;
99 filter.len = sizeof(bpf_code)/sizeof(bpf_code[0]);
100 filter.filter = bpf_code;
102 sighandle.sa_flags = 0;
103 sighandle.sa_handler = sig_handler;
104 sigemptyset(&sighandle.sa_mask);
105 //sigaddset(&sighandle.sa_mask, SIGTERM);
106 //sigaddset(&sighandle.sa_mask, SIGINT);
107 //sigaddset(&sighandle.sa_mask, SIGQUIT);
108 sigaction(SIGTERM, &sighandle, NULL);
109 sigaction(SIGINT, &sighandle, NULL);
110 sigaction(SIGQUIT, &sighandle, NULL);
112 // AF_PACKET allows application to read pecket from and write packet to network device
113 // SOCK_DGRAM the packet exclude ethernet header
114 // SOCK_RAW raw data from the device including ethernet header
115 // ETH_P_IP all IP packets
116 if((sock = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_IP))) == -1) {
117 perror("socket");
118 exit(1);
119 }
121 // set NIC to promiscous mode, so we can recieve all packets of the network
122 strncpy(ethreq.ifr_name, "eth1", IFNAMSIZ);
123 if(ioctl(sock, SIOCGIFFLAGS, ðreq) == -1) {
124 perror("ioctl");
125 close(sock);
126 exit(1);
127 }
129 ethreq.ifr_flags |= IFF_PROMISC;
130 if(ioctl(sock, SIOCSIFFLAGS, ðreq) == -1) {
131 perror("ioctl");
132 close(sock);
133 exit(1);
134 }
136 #if 1
137 // attach the bpf filter
138 if(setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter)) == -1) {
139 perror("setsockopt");
140 close(sock);
141 exit(1);
142 }
143 #endif
145 while(1) {
146 n = recvfrom(sock, buf, sizeof(buf), 0, NULL, NULL);
147 if(n < (ETH_HDR_LEN+IP_HDR_LEN+UDP_HDR_LEN)) {
148 printf("invalid packet\n");
149 continue;
150 }
152 printf("%d bytes recieved\n", n);
154 ethhead = buf;
155 printf("Ethernet: MAC[%02X:%02X:%02X:%02X:%02X:%02X]", ethhead[0], ethhead[1], ethhead[2],
156 ethhead[3], ethhead[4], ethhead[5]);
157 printf("->[%02X:%02X:%02X:%02X:%02X:%02X]", ethhead[6], ethhead[7], ethhead[8],
158 ethhead[9], ethhead[10], ethhead[11]);
159 printf(" type[%04x]\n", (ntohs(ethhead[12]|ethhead[13]<<8)));
161 iphead = ethhead + ETH_HDR_LEN;
162 // header length as 32-bit
163 printf("IP: Version: %d HeaderLen: %d[%d]", (*iphead>>4), (*iphead & 0x0f), (*iphead & 0x0f)*4);
164 printf(" TotalLen %d", (iphead[2]<<8|iphead[3]));
165 printf(" IP [%d.%d.%d.%d]", iphead[12], iphead[13], iphead[14], iphead[15]);
166 printf("->[%d.%d.%d.%d]", iphead[16], iphead[17], iphead[18], iphead[19]);
167 printf(" %d", iphead[9]);
169 if(iphead[9] == IPPROTO_TCP)
170 printf("[TCP]");
171 else if(iphead[9] == IPPROTO_UDP)
172 printf("[UDP]");
173 else if(iphead[9] == IPPROTO_ICMP)
174 printf("[ICMP]");
175 else if(iphead[9] == IPPROTO_IGMP)
176 printf("[IGMP]");
177 else if(iphead[9] == IPPROTO_IGMP)
178 printf("[IGMP]");
179 else
180 printf("[OTHERS]");
182 printf(" PORT [%d]->[%d]\n", (iphead[20]<<8|iphead[21]), (iphead[22]<<8|iphead[23]));
183 }
184 close(sock);
185 exit(0);
186 }
(2)第二种方法:
#include <stdio.h>
#include <sys/types.h> /* See NOTES */
#include <sys/socket.h>
#include <sys/mman.h>
#include <poll.h>
#include <linux/types.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <linux/if_packet.h>
#include <linux/if_ether.h>
#include <linux/filter.h>
#include <net/ethernet.h>
#define ETH_HDR_LEN 14
void CallBackPacket(char *data)
unsigned char *ethhead;
unsigned char *iphead;
printf("Recv A Packet.\n");
ethhead = data;
printf("Ethernet: MAC[%02X:%02X:%02X:%02X:%02X:%02X]", ethhead[0], ethhead[1], ethhead[2],
ethhead[3], ethhead[4], ethhead[5]);
printf("->[%02X:%02X:%02X:%02X:%02X:%02X]", ethhead[6], ethhead[7], ethhead[8],
ethhead[9], ethhead[10], ethhead[11]);
printf(" type[%04x]\n", (ntohs(ethhead[12]|ethhead[13]<<8)));
iphead = ethhead + ETH_HDR_LEN;
// header length as 32-bit
printf("IP: Version: %d HeaderLen: %d[%d]", (*iphead>>4), (*iphead & 0x0f), (*iphead & 0x0f)*4);
printf(" TotalLen %d", (iphead[2]<<8|iphead[3]));
printf(" IP [%d.%d.%d.%d]", iphead[12], iphead[13], iphead[14], iphead[15]);
printf("->[%d.%d.%d.%d]", iphead[16], iphead[17], iphead[18], iphead[19]);
printf(" %d", iphead[9]);
int main()
int fd = 0, ret = 0;
char *buff = NULL;
fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
//可以使用ARP进行一下测试
//fd = socket(PF_PACKET, SOCK_DGRAM, htons (ETH_P_ARP));
if(fd<0)
perror("socket");
goto failed_2;
//PACKET_VERSION和SO_BINDTODEVICE可以省略
#if 1
const int tpacket_version = TPACKET_V1;
/* set tpacket hdr version. */
ret = setsockopt(fd, SOL_PACKET, PACKET_VERSION, &tpacket_version, sizeof (int));
if(ret<0)
perror("setsockopt");
goto failed_2;
//#define NETDEV_NAME "wlan0"
#define NETDEV_NAME "eth1"
/* bind to device. */
ret = setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, NETDEV_NAME, sizeof (NETDEV_NAME));
if(ret<0)
perror("setsockopt");
goto failed_2;
#endif
struct tpacket_req req;
#define PER_PACKET_SIZE 2048
const int BUFFER_SIZE = 1024*1024*16; //16MB的缓冲区
req.tp_block_size = 4096;
req.tp_block_nr = BUFFER_SIZE/req.tp_block_size;
req.tp_frame_size = PER_PACKET_SIZE;
req.tp_frame_nr = BUFFER_SIZE/req.tp_frame_size;
ret = setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *)&req, sizeof(req));
if(ret<0)
perror("setsockopt");
goto failed_2;
#if 1
struct sock_filter bpf_code[] = {
{ 0x28, 0, 0, 0x0000000c },
{ 0x15, 0, 5, 0x00000800 },
{ 0x20, 0, 0, 0x0000001a },
{ 0x15, 2, 0, 0x65e2676a },
{ 0x20, 0, 0, 0x00000026 },
{ 0x15, 0, 1, 0x65e2676a },
{ 0x6, 0, 0, 0x0000ffff },
{ 0x6, 0, 0, 0x00000000 }
struct sock_fprog filter;
filter.len = sizeof(bpf_code)/sizeof(bpf_code[0]);
filter.filter = bpf_code;
// attach the bpf filter
if(setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter)) == -1) {
perror("setsockopt");
close(fd);
goto failed_2;
#endif
buff = (char *)mmap(0, BUFFER_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
if(buff == MAP_FAILED)
perror("mmap");
goto failed_2;
int nIndex=0, i=0;
while(1)
//这里在poll前先检查是否已经有报文被捕获了
struct tpacket_hdr* pHead = (struct tpacket_hdr*)(buff+ nIndex*PER_PACKET_SIZE);
//如果frame的状态已经为TP_STATUS_USER了,说明已经在poll前已经有一个数据包被捕获了,如果poll后不再有数据包被捕获,那么这个报文不会被处理,这就是所谓的竞争情况。
if(pHead->tp_status == TP_STATUS_USER)
goto process_packet;
//poll检测报文捕获
struct pollfd pfd;
pfd.fd = fd;
//pfd.events = POLLIN|POLLRDNORM|POLLERR;
pfd.events = POLLIN;
pfd.revents = 0;
ret = poll(&pfd, 1, -1);
if(ret<0)
perror("poll");
goto failed_1;
process_packet:
//尽力的去处理环形缓冲区中的数据frame,直到没有数据frame了
for(i=0; i < req.tp_frame_nr; i++)
struct tpacket_hdr* pHead = (struct tpacket_hdr*)(buff+ nIndex*PER_PACKET_SIZE);
//XXX: 由于frame都在一个环形缓冲区中,因此如果下一个frame中没有数据了,后面的frame也就没有frame了
if(pHead->tp_status == TP_STATUS_KERNEL)
break;
//处理数据frame
CallBackPacket((char*)pHead+pHead->tp_net);
//重新设置frame的状态为TP_STATUS_KERNEL
pHead->tp_len = 0;
pHead->tp_status = TP_STATUS_KERNEL;
//更新环形缓冲区的索引,指向下一个frame
nIndex++;
nIndex %= req.tp_frame_nr;
success:
close(fd);
munmap(buff, BUFFER_SIZE);
return 0;
failed_1:
munmap(buff, BUFFER_SIZE);
failed_2:
close(fd);
return -1;
3、测试结果:
执行main程序,使用curl http://www.qq.com