Hi, Robert,
Here are some questions:
1) If we just pull Eth Header and IP header, does it reach the
position out of the head room? I dont' observe it before.
2) You don't move_addr_to_kernel/move_addr_to_user, you just need
change_fs to KERNEL_DS and then change it back.
3) Why not use "struct sock" directly to manipulate the socket instead
of using int file descriptor?
On 7/16/07, Robert Valentan <R.Valentan@xxxxxxxxxxxxx> wrote:
Hallo!
The attached patch makes the vnet-module working for i386
and x86_64. (I had no other systems for testing)
(My patch from 6.6.2007 is included)
- x86_64: the x86-64 kernel has __ARCH_WANT_SYS_SOCKETCALL
defined, but has no socketcall. Replacing the _syscall's
to a working one.
- moves the "skb_pull" function from kernel to skb_util.c
because pulling in the data will rise a "BUG_ON" in the
kernel. The kernel-function does not allow to pull into
the payload!
- the skb_buff is not always possible to modify a received
buffer to send it again to the network.
(vnet_forward.c and etherip.c) The code now work's..
I think with no or less time-penalty.
Summary:
Solved:
With this patch the vnet-module is working on i386 and x86_64,
including peering-option. (except the following "open")
Open:
- Vnet's only with security-option "none" are working.
- The module-option "vnet_encaps" is ignored, always "udp" is used.
Signed-off-by: Robert Valentan <R.Valentan@xxxxxxxxxxxxx>
--
regards
Robert Valentan
diff -r aee991c7723a tools/vnet/vnet-module/esp.c
--- a/tools/vnet/vnet-module/esp.c Wed May 9 15:34:47 2007
+++ b/tools/vnet/vnet-module/esp.c Sun Jul 15 17:03:47 2007
@@ -341,12 +341,13 @@
dprintf("> ETH header pull...\n");
memmove(skb->data, skb->mac.raw, ETH_HLEN);
skb->mac.raw = skb->data;
- __skb_pull(skb, ETH_HLEN);
+ skb_pull_vn(skb, ETH_HLEN);
}
dprintf("> IP header pull...\n");
memmove(skb->data, skb->nh.raw, ip_n);
skb->nh.raw = skb->data;
- __skb_pull(skb, ip_n);
+ skb_pull_vn(skb, ip_n);
+ skb->h.raw = skb->data;
esph = (void*)skb->data;
// Add spi and sequence number.
esph->spi = sa->ident.spi;
@@ -457,7 +458,7 @@
// Move skb->data back to ethernet header.
// Do in 2 moves to ensure offsets are +ve,
// since args to skb_pull/skb_push are unsigned.
- __skb_pull(skb, head_n);
+ skb_pull_vn(skb, head_n);
__skb_push(skb, skb->data - skb->mac.raw);
// After this esph is invalid.
esph = NULL;
@@ -763,7 +764,7 @@
dprintf(">\n");
#ifdef DEBUG
dprintf("> recv skb=\n");
- skb_print_bits(skb, 0, skb->len);
+ skb_print_bits("", skb, 0, skb->len);
#endif
ip_n = (skb->nh.iph->ihl << 2);
if(skb->data == skb->mac.raw){
@@ -773,7 +774,7 @@
err = -EINVAL;
goto exit;
}
- skb_pull(skb, eth_n + ip_n);
+ skb_pull_vn(skb, eth_n + ip_n);
}
addr = skb->nh.iph->daddr;
err = esp_skb_header(skb, &esph);
diff -r aee991c7723a tools/vnet/vnet-module/etherip.c
--- a/tools/vnet/vnet-module/etherip.c Wed May 9 15:34:47 2007
+++ b/tools/vnet/vnet-module/etherip.c Sun Jul 15 17:03:47 2007
@@ -270,6 +270,7 @@
u32 saddr, daddr;
char vnetbuf[VNET_ID_BUF];
struct ethhdr *eth;
+ struct sk_buff *newskb;
dprintf(">\n");
saddr = skb->nh.iph->saddr;
@@ -293,7 +294,7 @@
err = -EINVAL;
goto exit;
}
- skb_pull(skb, pull_n);
+ skb_pull_vn(skb, pull_n);
}
// Assume skb->data points at etherip header.
etheriph = (void*)skb->data;
@@ -318,7 +319,18 @@
goto exit;
}
// Point at the headers in the contained ethernet frame.
- skb->mac.raw = skb_pull(skb, etherip_n);
+ skb->mac.raw = skb_pull_vn(skb, etherip_n);
+
+ newskb = alloc_skb(skb->len, GFP_ATOMIC);
+ if (!newskb) {
+ wprintf("> alloc new sk_buff failed \n");
+ goto exit;
+ }
+ newskb->mac.raw = skb_put(newskb, skb->len);
+ skb_copy_bits(skb, 0, newskb->data, skb->len);
+ kfree_skb(skb);
+ skb = newskb;
+
eth = eth_hdr(skb);
// Simulate the logic from eth_type_trans()
@@ -340,27 +352,12 @@
// Assuming a standard Ethernet frame.
// Should check for protocol? Support ETH_P_8021Q too.
- skb->nh.raw = skb_pull(skb, ETH_HLEN);
-
-#ifdef __KERNEL__
- // Fix IP options, checksum, skb dst, netfilter state.
- memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
- if (skb->ip_summed == CHECKSUM_HW){
- skb->ip_summed = CHECKSUM_NONE;
- }
- dst_release(skb->dst);
- skb->dst = NULL;
- nf_reset(skb);
-#ifdef CONFIG_BRIDGE_NETFILTER
- if(skb->nf_bridge){
- // Stop the eth header being clobbered by
nf_bridge_maybe_copy_header().
- _nf_bridge_save_header(skb);
- }
-#endif
-#endif // __KERNEL__
-
- dprintf("> Unpacked srcaddr=" IPFMT " vnet=%s srcmac=" MACFMT " dstmac=" MACFMT
"\n",
+ skb->nh.raw = skb_pull_vn(skb, ETH_HLEN);
+ skb->h.raw = newskb->nh.raw + sizeof(struct iphdr);
+
+ dprintf("> Unpacked srcaddr=" IPFMT " dstaddr=" IPFMT " vnet=%s srcmac=" MACFMT "
dstmac=" MACFMT "\n",
NIPQUAD(skb->nh.iph->saddr),
+ NIPQUAD(skb->nh.iph->daddr),
VnetId_ntoa(&vnet, vnetbuf),
MAC6TUPLE(eth->h_source),
MAC6TUPLE(eth->h_dest));
diff -r aee991c7723a tools/vnet/vnet-module/skb_util.h
--- a/tools/vnet/vnet-module/skb_util.h Wed May 9 15:34:47 2007
+++ b/tools/vnet/vnet-module/skb_util.h Sun Jul 15 17:03:47 2007
@@ -66,6 +66,21 @@
}
#endif
+
+/*
+ * It's a copy from {kernel}/include/linux/skbuff.h func '__skb_pull' and
'skb_pull'
+ * to aviodthe BUG_ON when pulling into the data (getting forwarded ip-frames)
+ */
+static inline unsigned char *__skb_pull_vn(struct sk_buff *skb, unsigned int
len)
+{
+ skb->len -= len;
+ //BUG_ON(skb->len < skb->data_len);
+ return skb->data += len;
+}
+static inline unsigned char *skb_pull_vn(struct sk_buff *skb, unsigned int len)
+{
+ return unlikely(len > skb->len) ? NULL : __skb_pull_vn(skb, len);
+}
#ifdef __KERNEL__
diff -r aee991c7723a tools/vnet/vnet-module/varp.c
--- a/tools/vnet/vnet-module/varp.c Wed May 9 15:34:47 2007
+++ b/tools/vnet/vnet-module/varp.c Sun Jul 15 17:03:47 2007
@@ -1365,7 +1365,7 @@
goto exit;
}
}
- varph = (void*)skb_pull(skb, sizeof(struct udphdr));
+ varph = (void*)skb_pull_vn(skb, sizeof(struct udphdr));
if(skb->len < sizeof(struct VnetMsgHdr)){
wprintf("> Varp msg too short: %d < %d\n", skb->len, sizeof(struct
VnetMsgHdr));
goto exit;
@@ -1378,11 +1378,11 @@
}
break;
case VUDP_ID: // Etherip-in-udp packet.
- skb_pull(skb, sizeof(struct VnetMsgHdr));
+ skb_pull_vn(skb, sizeof(struct VnetMsgHdr));
err = etherip_protocol_recv(skb);
goto exit;
case VFWD_ID: // Forwarded.
- skb_pull(skb, sizeof(struct VnetMsgHdr));
+ skb_pull_vn(skb, sizeof(struct VnetMsgHdr));
err = vnet_forward_recv(skb);
goto exit;
default:
diff -r aee991c7723a tools/vnet/vnet-module/varp_socket.c
--- a/tools/vnet/vnet-module/varp_socket.c Wed May 9 15:34:47 2007
+++ b/tools/vnet/vnet-module/varp_socket.c Sun Jul 15 17:03:47 2007
@@ -79,8 +79,8 @@
* Architectures using socketcall() define __ARCH_WANT_SYS_SOCKETCALL.
*/
-#ifdef __ARCH_WANT_SYS_SOCKETCALL
-
+#if defined(__ARCH_WANT_SYS_SOCKETCALLxx) && defined(__NR_socketcall)
+
/* Define the socketcall() syscall.
* Multiplexes all the socket-related calls.
*
@@ -190,59 +190,260 @@
/* No socketcall - define the individual syscalls. */
-static inline _syscall3(int, socket,
- int, family,
- int, type,
- int, protocol);
-
-static inline _syscall3(int, bind,
- int, fd,
- struct sockaddr *, umyaddr,
- int, addrlen);
-
-static inline _syscall3(int, connect,
- int, fd,
- struct sockaddr *, uservaddr,
- int, addrlen);
-
-static inline _syscall6(int, sendto,
- int, fd,
- void *, buff,
- size_t, len,
- unsigned, flags,
- struct sockaddr *, addr,
- int, addr_len);
-
-static inline _syscall6(int, recvfrom,
- int, fd,
- void *, ubuf,
- size_t, size,
- unsigned, flags,
- struct sockaddr *, addr,
- int *, addr_len);
-
-static inline _syscall5(int, setsockopt,
- int, fd,
- int, level,
- int, optname,
- void *, optval,
- int, optlen);
-
-static inline _syscall5(int, getsockopt,
- int, fd,
- int, level,
- int, optname,
- void *, optval,
- int *, optlen);
-
-static inline _syscall2(int, shutdown,
- int, fd,
- int, how);
-
-static inline _syscall3(int, getsockname,
- int, fd,
- struct sockaddr *, usockaddr,
- int *, usockaddr_len);
+/* the following code is copied from linux-kernel/net/socket.c
+ * As replacement of the __NR_socketcall, which exists not in x86_64 and
+ * same other systems.
+ * An alternate will be an export of the copied-functions in net/socket.c
+ */
+#define MAX_SOCK_ADDR 128
+
+int socket(int family, int type, int protocol){
+
+ int retval;
+ struct socket *sock;
+ retval = sock_create(family, type, protocol, &sock);
+ if (retval < 0)
+ goto out;
+
+ retval = sock_map_fd(sock);
+ if (retval < 0)
+ goto out_release;
+
+out:
+ /* It may be already another descriptor 8) Not kernel problem. */
+ return retval;
+
+out_release:
+ sock_release(sock);
+ return retval;
+
+}
+
+
+int bind(int fd, struct sockaddr *umyaddr, int addrlen){
+
+ struct socket *sock;
+ char address[MAX_SOCK_ADDR];
+ int err;
+
+ if((sock = sockfd_lookup(fd,&err))!=NULL)
+ {
+ if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0) {
+ err = security_socket_bind(sock, (struct sockaddr
*)address, addrlen);
+ if (err) {
+ sockfd_put(sock);
+ return err;
+ }
+ err = sock->ops->bind(sock, (struct sockaddr *)address,
addrlen);
+ }
+ sockfd_put(sock);
+ }
+ return err;
+}
+
+
+int connect(int fd, struct sockaddr *uservaddr, int addrlen){
+
+ struct socket *sock;
+ char address[MAX_SOCK_ADDR];
+ int err;
+
+ sock = sockfd_lookup(fd, &err);
+ if (!sock)
+ goto out;
+ err = move_addr_to_kernel(uservaddr, addrlen, address);
+ if (err < 0)
+ goto out_put;
+
+ err = security_socket_connect(sock, (struct sockaddr *)address,
addrlen);
+ if (err)
+ goto out_put;
+
+ err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen,
+ sock->file->f_flags);
+out_put:
+ sockfd_put(sock);
+out:
+ return err;
+}
+
+int sendto(int fd, void * buff, size_t len,
+ unsigned flags, struct sockaddr *addr,
+ int addr_len){
+
+ struct socket *sock;
+ char address[MAX_SOCK_ADDR];
+ int err;
+ struct msghdr msg;
+ struct iovec iov;
+
+ sock = sockfd_lookup(fd, &err);
+ if (!sock)
+ goto out;
+ iov.iov_base=buff;
+ iov.iov_len=len;
+ msg.msg_name=NULL;
+ msg.msg_iov=&iov;
+ msg.msg_iovlen=1;
+ msg.msg_control=NULL;
+ msg.msg_controllen=0;
+ msg.msg_namelen=0;
+ if(addr)
+ {
+ err = move_addr_to_kernel(addr, addr_len, address);
+ if (err < 0)
+ goto out_put;
+ msg.msg_name=address;
+ msg.msg_namelen=addr_len;
+ }
+ if (sock->file->f_flags & O_NONBLOCK)
+ flags |= MSG_DONTWAIT;
+ msg.msg_flags = flags;
+ err = sock_sendmsg(sock, &msg, len);
+
+out_put:
+ sockfd_put(sock);
+out:
+ return err;
+}
+
+
+int recvfrom(int fd, void * ubuf, size_t size,
+ unsigned flags, struct sockaddr *addr,
+ int *addr_len){
+
+ struct socket *sock;
+ struct iovec iov;
+ struct msghdr msg;
+ char address[MAX_SOCK_ADDR];
+ int err,err2;
+
+ sock = sockfd_lookup(fd, &err);
+ if (!sock)
+ goto out;
+
+ msg.msg_control=NULL;
+ msg.msg_controllen=0;
+ msg.msg_iovlen=1;
+ msg.msg_iov=&iov;
+ iov.iov_len=size;
+ iov.iov_base=ubuf;
+ msg.msg_name=address;
+ msg.msg_namelen=MAX_SOCK_ADDR;
+ if (sock->file->f_flags & O_NONBLOCK)
+ flags |= MSG_DONTWAIT;
+ err=sock_recvmsg(sock, &msg, size, flags);
+
+ if(err >= 0 && addr != NULL)
+ {
+ err2=move_addr_to_user(address, msg.msg_namelen, addr,
addr_len);
+ if(err2<0)
+ err=err2;
+ }
+ sockfd_put(sock);
+out:
+ return err;
+}
+
+int setsockopt(int fd, int level, int optname, void *optval, int optlen){
+
+ int err;
+ struct socket *sock;
+
+ if (optlen < 0)
+ return -EINVAL;
+
+ if ((sock = sockfd_lookup(fd, &err))!=NULL)
+ {
+ err = security_socket_setsockopt(sock,level,optname);
+ if (err) {
+ sockfd_put(sock);
+ return err;
+ }
+
+ if (level == SOL_SOCKET)
+ err=sock_setsockopt(sock,level,optname,optval,optlen);
+ else
+ err=sock->ops->setsockopt(sock, level, optname, optval,
optlen);
+ sockfd_put(sock);
+ }
+ return err;
+}
+
+
+/* not possible, because sock_getsockopt is not exported ...
+int getsockopt(int fd, int level, int optname, void *optval, int *optlen){
+
+ int err;
+ struct socket *sock;
+
+ if ((sock = sockfd_lookup(fd, &err))!=NULL)
+ {
+ err = security_socket_getsockopt(sock, level, optname);
+ if (err) {
+ sockfd_put(sock);
+ return err;
+ }
+
+ if (level == SOL_SOCKET)
+ err=sock_getsockopt(sock,level,optname,optval,optlen);
+ else
+ err=sock->ops->getsockopt(sock, level, optname, optval,
optlen);
+ sockfd_put(sock);
+ }
+ return err;
+}
+*/
+
+
+int shutdown(int fd, int how){
+
+ int err;
+ struct socket *sock;
+
+ if ((sock = sockfd_lookup(fd, &err))!=NULL)
+ {
+ err = security_socket_shutdown(sock, how);
+ if (err) {
+ sockfd_put(sock);
+ return err;
+ }
+
+ err=sock->ops->shutdown(sock, how);
+ sockfd_put(sock);
+ }
+ return err;
+}
+
+int getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len){
+
+ struct socket *sock;
+ char address[MAX_SOCK_ADDR];
+ int len, err;
+
+ sock = sockfd_lookup(fd, &err);
+ if (!sock)
+ goto out;
+
+ err = security_socket_getsockname(sock);
+ if (err)
+ goto out_put;
+
+ err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
+ if (err)
+ goto out_put;
+ err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
+
+out_put:
+ sockfd_put(sock);
+out:
+ return err;
+}
+
+/**
+ * End of copy from net/socket.c
+ */
+
#endif /* __ARCH_WANT_SYS_SOCKETCALL */
diff -r aee991c7723a tools/vnet/vnet-module/vnet_forward.c
--- a/tools/vnet/vnet-module/vnet_forward.c Wed May 9 15:34:47 2007
+++ b/tools/vnet/vnet-module/vnet_forward.c Sun Jul 15 17:03:47 2007
@@ -186,7 +186,7 @@
printk("\nWrapped packet:\n");
print_iphdr(__FUNCTION__, skb);
print_udphdr(__FUNCTION__, skb);
- skb_print_bits(__FUNCTION__, skb, 0, 0 * skb->len);
+ skb_print_bits(__FUNCTION__, skb, 0, skb->len);
#endif
err = _skb_xmit(skb, saddr);
@@ -304,7 +304,7 @@
peer->rx_packets++;
skb->mac.raw = NULL;
skb->nh.raw = skb->data;
- skb->h.raw = (void*)(skb->nh.iph + 1);
+ skb->h.raw = skb->data + sizeof(struct iphdr);
if(!skb->nh.iph->saddr){
skb->nh.iph->saddr = addr.u.ip4.s_addr;
}
@@ -328,12 +328,17 @@
// Handle (a copy of) it ourselves, because
// if it is looped-back by xmit it will be ignored.
- //recvskb = skb_clone(skb, GFP_ATOMIC);
- recvskb = pskb_copy(skb, GFP_ATOMIC);
+ recvskb = alloc_skb(skb->len, GFP_ATOMIC);
if(recvskb){
+ recvskb->protocol = htons(ETH_P_IP);
+
+ recvskb->nh.raw = skb_put(recvskb, skb->len);
+ recvskb->h.raw = recvskb->data + sizeof(struct iphdr);
+ skb_copy_bits(skb, 0, recvskb->data, skb->len);
+
// Data points at the unwrapped iphdr, but varp_handle_message()
// expects it to point at the udphdr, so pull.
- skb_pull(recvskb, sizeof(struct iphdr));
+ skb_pull_vn(recvskb, sizeof(struct iphdr));
if(varp_handle_message(recvskb) <= 0){
kfree_skb(recvskb);
}
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
--
best regards,
hanzhu
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|