patch-1.3.36 linux/net/ipv4/ip_output.c
Next file: linux/net/ipv4/ip_sockglue.c
Previous file: linux/net/ipv4/ip_options.c
Back to the patch index
Back to the overall index
- Lines: 1130
- Date:
Tue Oct 17 13:42:37 1995
- Orig file:
v1.3.35/linux/net/ipv4/ip_output.c
- Orig date:
Thu Jan 1 02:00:00 1970
diff -u --recursive --new-file v1.3.35/linux/net/ipv4/ip_output.c linux/net/ipv4/ip_output.c
@@ -0,0 +1,1129 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * The Internet Protocol (IP) output module.
+ *
+ * Version: @(#)ip.c 1.0.16b 9/1/93
+ *
+ * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
+ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ * Donald Becker, <becker@super.org>
+ * Alan Cox, <Alan.Cox@linux.org>
+ * Richard Underwood
+ * Stefan Becker, <stefanb@yello.ping.de>
+ * Jorge Cwik, <jorge@laser.satlink.net>
+ * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *
+ * See ip_input.c for original log
+ */
+
+#include <asm/segment.h>
+#include <asm/system.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/config.h>
+
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+
+#include <net/snmp.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/arp.h>
+#include <net/icmp.h>
+#include <net/raw.h>
+#include <net/checksum.h>
+#include <linux/igmp.h>
+#include <linux/ip_fw.h>
+#include <linux/firewall.h>
+#include <linux/mroute.h>
+#include <net/netlink.h>
+
+/*
+ * Loop a packet back to the sender.
+ */
+
+static void ip_loopback(struct device *old_dev, struct sk_buff *skb)
+{
+ struct device *dev=&loopback_dev;
+ int len=ntohs(skb->ip_hdr->tot_len);
+ struct sk_buff *newskb=dev_alloc_skb(len+dev->hard_header_len+15);
+
+ if(newskb==NULL)
+ return;
+
+ newskb->link3=NULL;
+ newskb->sk=NULL;
+ newskb->dev=dev;
+ newskb->saddr=skb->saddr;
+ newskb->daddr=skb->daddr;
+ newskb->raddr=skb->raddr;
+ newskb->free=1;
+ newskb->lock=0;
+ newskb->users=0;
+ newskb->pkt_type=skb->pkt_type;
+
+ /*
+ * Put a MAC header on the packet
+ */
+ ip_send(newskb, skb->ip_hdr->daddr, len, dev, skb->ip_hdr->saddr);
+ /*
+ * Add the rest of the data space.
+ */
+ newskb->ip_hdr=(struct iphdr *)skb_put(newskb, len);
+ memcpy(newskb->proto_priv, skb->proto_priv, sizeof(skb->proto_priv));
+
+ /*
+ * Copy the data
+ */
+ memcpy(newskb->ip_hdr,skb->ip_hdr,len);
+
+ /* Recurse. The device check against IFF_LOOPBACK will stop infinite recursion */
+
+ /*printk("Loopback output queued [%lX to %lX].\n", newskb->ip_hdr->saddr,newskb->ip_hdr->daddr);*/
+ ip_queue_xmit(NULL, dev, newskb, 1);
+}
+
+
+
+/*
+ * Take an skb, and fill in the MAC header.
+ */
+
+int ip_send(struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
+{
+ int mac = 0;
+
+ skb->dev = dev;
+ skb->arp = 1;
+ if (dev->hard_header)
+ {
+ /*
+ * Build a hardware header. Source address is our mac, destination unknown
+ * (rebuild header will sort this out)
+ */
+ skb_reserve(skb,(dev->hard_header_len+15)&~15); /* 16 byte aligned IP headers are good */
+ mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
+ if (mac < 0)
+ {
+ mac = -mac;
+ skb->arp = 0;
+ skb->raddr = daddr; /* next routing address */
+ }
+ }
+ return mac;
+}
+
+static int ip_send_room(struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
+{
+ int mac = 0;
+
+ skb->dev = dev;
+ skb->arp = 1;
+ if (dev->hard_header)
+ {
+ skb_reserve(skb,MAX_HEADER);
+ mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
+ if (mac < 0)
+ {
+ mac = -mac;
+ skb->arp = 0;
+ skb->raddr = daddr; /* next routing address */
+ }
+ }
+ return mac;
+}
+
+int ip_id_count = 0;
+
+/*
+ * This routine builds the appropriate hardware/IP headers for
+ * the routine. It assumes that if *dev != NULL then the
+ * protocol knows what it's doing, otherwise it uses the
+ * routing/ARP tables to select a device struct.
+ */
+int ip_build_header(struct sk_buff *skb, __u32 saddr, __u32 daddr,
+ struct device **dev, int type, struct options *opt, int len, int tos, int ttl)
+{
+ struct rtable *rt;
+ __u32 raddr;
+ int tmp;
+ __u32 src;
+ struct iphdr *iph;
+ __u32 final_daddr = daddr;
+
+ if (opt && opt->srr)
+ daddr = opt->faddr;
+
+ /*
+ * See if we need to look up the device.
+ */
+
+#ifdef CONFIG_IP_MULTICAST
+ if(MULTICAST(daddr) && *dev==NULL && skb->sk && *skb->sk->ip_mc_name)
+ *dev=dev_get(skb->sk->ip_mc_name);
+#endif
+ if (*dev == NULL)
+ {
+ if(skb->localroute)
+ rt = ip_rt_local(daddr, NULL, &src);
+ else
+ rt = ip_rt_route(daddr, NULL, &src);
+ if (rt == NULL)
+ {
+ ip_statistics.IpOutNoRoutes++;
+ return(-ENETUNREACH);
+ }
+
+ *dev = rt->rt_dev;
+ /*
+ * If the frame is from us and going off machine it MUST MUST MUST
+ * have the output device ip address and never the loopback
+ */
+ if (LOOPBACK(saddr) && !LOOPBACK(daddr))
+ saddr = src;/*rt->rt_dev->pa_addr;*/
+ raddr = rt->rt_gateway;
+
+ }
+ else
+ {
+ /*
+ * We still need the address of the first hop.
+ */
+ if(skb->localroute)
+ rt = ip_rt_local(daddr, NULL, &src);
+ else
+ rt = ip_rt_route(daddr, NULL, &src);
+ /*
+ * If the frame is from us and going off machine it MUST MUST MUST
+ * have the output device ip address and never the loopback
+ */
+ if (LOOPBACK(saddr) && !LOOPBACK(daddr))
+ saddr = src;/*rt->rt_dev->pa_addr;*/
+
+ raddr = (rt == NULL) ? 0 : rt->rt_gateway;
+ }
+
+ /*
+ * No source addr so make it our addr
+ */
+ if (saddr == 0)
+ saddr = src;
+
+ /*
+ * No gateway so aim at the real destination
+ */
+ if (raddr == 0)
+ raddr = daddr;
+
+ /*
+ * Now build the MAC header.
+ */
+
+ if(type==IPPROTO_TCP)
+ tmp = ip_send_room(skb, raddr, len, *dev, saddr);
+ else
+ tmp = ip_send(skb, raddr, len, *dev, saddr);
+
+ /*
+ * Book keeping
+ */
+
+ skb->dev = *dev;
+ skb->saddr = saddr;
+
+ /*
+ * Now build the IP header.
+ */
+
+ /*
+ * If we are using IPPROTO_RAW, then we don't need an IP header, since
+ * one is being supplied to us by the user
+ */
+
+ if(type == IPPROTO_RAW)
+ return (tmp);
+
+ /*
+ * Build the IP addresses
+ */
+
+ if (opt)
+ iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr) + opt->optlen);
+ else
+ iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr));
+
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->tos = tos;
+ iph->frag_off = 0;
+ iph->ttl = ttl;
+ iph->daddr = daddr;
+ iph->saddr = saddr;
+ iph->protocol = type;
+ skb->ip_hdr = iph;
+
+ if (!opt || !opt->optlen)
+ return sizeof(struct iphdr) + tmp;
+ if (opt->is_strictroute && rt && rt->rt_gateway)
+ {
+ ip_statistics.IpOutNoRoutes++;
+ return -ENETUNREACH;
+ }
+ iph->ihl += opt->optlen>>2;
+ ip_options_build(skb, opt, final_daddr, (*dev)->pa_addr, 0);
+ return iph->ihl*4 + tmp;
+}
+
+
+/*
+ * Generate a checksum for an outgoing IP datagram.
+ */
+
+void ip_send_check(struct iphdr *iph)
+{
+ iph->check = 0;
+ iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+}
+
+/*
+ * Queues a packet to be sent, and starts the transmitter
+ * if necessary. if free = 1 then we free the block after
+ * transmit, otherwise we don't. If free==2 we not only
+ * free the block but also don't assign a new ip seq number.
+ * This routine also needs to put in the total length,
+ * and compute the checksum
+ */
+
+void ip_queue_xmit(struct sock *sk, struct device *dev,
+ struct sk_buff *skb, int free)
+{
+ struct iphdr *iph;
+/* unsigned char *ptr;*/
+
+ /* Sanity check */
+ if (dev == NULL)
+ {
+ NETDEBUG(printk("IP: ip_queue_xmit dev = NULL\n"));
+ return;
+ }
+
+ IS_SKB(skb);
+
+ /*
+ * Do some book-keeping in the packet for later
+ */
+
+
+ skb->dev = dev;
+ skb->when = jiffies;
+
+ /*
+ * Find the IP header and set the length. This is bad
+ * but once we get the skb data handling code in the
+ * hardware will push its header sensibly and we will
+ * set skb->ip_hdr to avoid this mess and the fixed
+ * header length problem
+ */
+
+ iph = skb->ip_hdr;
+ iph->tot_len = ntohs(skb->len-(((unsigned char *)iph)-skb->data));
+
+#ifdef CONFIG_FIREWALL
+ if(call_out_firewall(PF_INET, skb, iph) < FW_ACCEPT)
+ /* just don't send this packet */
+ return;
+#endif
+
+ /*
+ * No reassigning numbers to fragments...
+ */
+
+ if(free!=2)
+ iph->id = htons(ip_id_count++);
+ else
+ free=1;
+
+ /* All buffers without an owner socket get freed */
+ if (sk == NULL)
+ free = 1;
+
+ skb->free = free;
+
+ /*
+ * Do we need to fragment. Again this is inefficient.
+ * We need to somehow lock the original buffer and use
+ * bits of it.
+ */
+
+ if(ntohs(iph->tot_len)> dev->mtu)
+ {
+ ip_fragment(sk,skb,dev,0);
+ IS_SKB(skb);
+ kfree_skb(skb,FREE_WRITE);
+ return;
+ }
+
+ /*
+ * Add an IP checksum
+ */
+
+ ip_send_check(iph);
+
+ /*
+ * Print the frame when debugging
+ */
+
+ /*
+ * More debugging. You cannot queue a packet already on a list
+ * Spot this and moan loudly.
+ */
+ if (skb->next != NULL)
+ {
+ NETDEBUG(printk("ip_queue_xmit: next != NULL\n"));
+ skb_unlink(skb);
+ }
+
+ /*
+ * If a sender wishes the packet to remain unfreed
+ * we add it to his send queue. This arguably belongs
+ * in the TCP level since nobody else uses it. BUT
+ * remember IPng might change all the rules.
+ */
+
+ if (!free)
+ {
+ unsigned long flags;
+ /* The socket now has more outstanding blocks */
+
+ sk->packets_out++;
+
+ /* Protect the list for a moment */
+ save_flags(flags);
+ cli();
+
+ if (skb->link3 != NULL)
+ {
+ NETDEBUG(printk("ip.c: link3 != NULL\n"));
+ skb->link3 = NULL;
+ }
+ if (sk->send_head == NULL)
+ {
+ sk->send_tail = skb;
+ sk->send_head = skb;
+ }
+ else
+ {
+ sk->send_tail->link3 = skb;
+ sk->send_tail = skb;
+ }
+ /* skb->link3 is NULL */
+
+ /* Interrupt restore */
+ restore_flags(flags);
+ }
+ else
+ /* Remember who owns the buffer */
+ skb->sk = sk;
+
+ /*
+ * If the indicated interface is up and running, send the packet.
+ */
+
+ ip_statistics.IpOutRequests++;
+#ifdef CONFIG_IP_ACCT
+ ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
+#endif
+
+#ifdef CONFIG_IP_MULTICAST
+
+ /*
+ * Multicasts are looped back for other local users
+ */
+
+ if (MULTICAST(iph->daddr) && !(dev->flags&IFF_LOOPBACK))
+ {
+ if(sk==NULL || sk->ip_mc_loop)
+ {
+ if(iph->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
+ {
+ ip_loopback(dev,skb);
+ }
+ else
+ {
+ struct ip_mc_list *imc=dev->ip_mc_list;
+ while(imc!=NULL)
+ {
+ if(imc->multiaddr==iph->daddr)
+ {
+ ip_loopback(dev,skb);
+ break;
+ }
+ imc=imc->next;
+ }
+ }
+ }
+ /* Multicasts with ttl 0 must not go beyond the host */
+
+ if(skb->ip_hdr->ttl==0)
+ {
+ kfree_skb(skb, FREE_READ);
+ return;
+ }
+ }
+#endif
+ if((dev->flags&IFF_BROADCAST) && (iph->daddr==dev->pa_brdaddr||iph->daddr==0xFFFFFFFF) && !(dev->flags&IFF_LOOPBACK))
+ ip_loopback(dev,skb);
+
+ if (dev->flags & IFF_UP)
+ {
+ /*
+ * If we have an owner use its priority setting,
+ * otherwise use NORMAL
+ */
+
+ if (sk != NULL)
+ {
+ dev_queue_xmit(skb, dev, sk->priority);
+ }
+ else
+ {
+ dev_queue_xmit(skb, dev, SOPRI_NORMAL);
+ }
+ }
+ else
+ {
+ if(sk)
+ sk->err = ENETDOWN;
+ ip_statistics.IpOutDiscards++;
+ if (free)
+ kfree_skb(skb, FREE_WRITE);
+ }
+}
+
+
+/*
+ * Build and send a packet, with as little as one copy
+ *
+ * Doesn't care much about ip options... option length can be
+ * different for fragment at 0 and other fragments.
+ *
+ * Note that the fragment at the highest offset is sent first,
+ * so the getfrag routine can fill in the TCP/UDP checksum header
+ * field in the last fragment it sends... actually it also helps
+ * the reassemblers, they can put most packets in at the head of
+ * the fragment queue, and they know the total size in advance. This
+ * last feature will measurable improve the Linux fragment handler.
+ *
+ * The callback has five args, an arbitrary pointer (copy of frag),
+ * the source IP address (may depend on the routing table), the
+ * destination adddress (char *), the offset to copy from, and the
+ * length to be copied.
+ *
+ */
+
+int ip_build_xmit(struct sock *sk,
+ void getfrag (const void *,
+ __u32,
+ char *,
+ unsigned int,
+ unsigned int),
+ const void *frag,
+ unsigned short int length,
+ __u32 daddr,
+ __u32 user_saddr,
+ struct options * opt,
+ int flags,
+ int type)
+{
+ struct rtable *rt;
+ unsigned int fraglen, maxfraglen, fragheaderlen;
+ int offset, mf;
+ __u32 saddr;
+ unsigned short id;
+ struct iphdr *iph;
+ int local=0;
+ struct device *dev;
+ int nfrags=0;
+ __u32 true_daddr = daddr;
+
+ if (opt && opt->srr && !sk->ip_hdrincl)
+ daddr = opt->faddr;
+
+ ip_statistics.IpOutRequests++;
+
+#ifdef CONFIG_IP_MULTICAST
+ if(sk && MULTICAST(daddr) && *sk->ip_mc_name)
+ {
+ dev=dev_get(sk->ip_mc_name);
+ if(!dev)
+ return -ENODEV;
+ rt=NULL;
+ if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
+ saddr = sk->saddr;
+ else
+ saddr = dev->pa_addr;
+ }
+ else
+ {
+#endif
+ /*
+ * Perform the IP routing decisions
+ */
+
+ if(sk->localroute || flags&MSG_DONTROUTE)
+ local=1;
+
+ rt = sk->ip_route_cache;
+
+ /*
+ * See if the routing cache is outdated. We need to clean this up once we are happy it is reliable
+ * by doing the invalidation actively in the route change and header change.
+ */
+
+ saddr=sk->ip_route_saddr;
+ if(!rt || sk->ip_route_stamp != rt_stamp ||
+ daddr!=sk->ip_route_daddr || sk->ip_route_local!=local ||
+ (sk->saddr && sk->saddr != saddr))
+ {
+ if(local)
+ rt = ip_rt_local(daddr, NULL, &saddr);
+ else
+ rt = ip_rt_route(daddr, NULL, &saddr);
+ sk->ip_route_local=local;
+ sk->ip_route_daddr=daddr;
+ sk->ip_route_saddr=saddr;
+ sk->ip_route_stamp=rt_stamp;
+ sk->ip_route_cache=rt;
+ sk->ip_hcache_ver=NULL;
+ sk->ip_hcache_state= 0;
+ }
+ else if(rt)
+ {
+ /*
+ * Attempt header caches only if the cached route is being reused. Header cache
+ * is not ultra cheap to set up. This means we only set it up on the second packet,
+ * so one shot communications are not slowed. We assume (seems reasonable) that 2 is
+ * probably going to be a stream of data.
+ */
+ if(rt->rt_dev->header_cache && sk->ip_hcache_state!= -1)
+ {
+ if(sk->ip_hcache_ver==NULL || sk->ip_hcache_stamp!=*sk->ip_hcache_ver)
+ rt->rt_dev->header_cache(rt->rt_dev,sk,saddr,daddr);
+ else
+ /* Can't cache. Remember this */
+ sk->ip_hcache_state= -1;
+ }
+ }
+
+ if (rt == NULL)
+ {
+ ip_statistics.IpOutNoRoutes++;
+ return(-ENETUNREACH);
+ }
+
+ if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
+ saddr = sk->saddr;
+
+ dev=rt->rt_dev;
+#ifdef CONFIG_IP_MULTICAST
+ }
+#endif
+ if (user_saddr)
+ saddr = user_saddr;
+
+ /*
+ * Now compute the buffer space we require
+ */
+
+ /*
+ * Try the simple case first. This leaves broadcast, multicast, fragmented frames, and by
+ * choice RAW frames within 20 bytes of maximum size(rare) to the long path
+ */
+
+ length += 20;
+ if (!sk->ip_hdrincl && opt)
+ {
+ length += opt->optlen;
+ if (opt->is_strictroute && rt && rt->rt_gateway)
+ {
+ ip_statistics.IpOutNoRoutes++;
+ return -ENETUNREACH;
+ }
+ }
+ if(length <= dev->mtu && !MULTICAST(daddr) && daddr!=0xFFFFFFFF && daddr!=dev->pa_brdaddr)
+ {
+ int error;
+ struct sk_buff *skb=sock_alloc_send_skb(sk, length+15+dev->hard_header_len,0, 0,&error);
+ if(skb==NULL)
+ {
+ ip_statistics.IpOutDiscards++;
+ return error;
+ }
+ skb->dev=dev;
+ skb->free=1;
+ skb->when=jiffies;
+ skb->sk=sk;
+ skb->arp=0;
+ skb->saddr=saddr;
+ skb->raddr=(rt&&rt->rt_gateway)?rt->rt_gateway:daddr;
+ skb_reserve(skb,(dev->hard_header_len+15)&~15);
+ if(sk->ip_hcache_state>0)
+ {
+ memcpy(skb_push(skb,dev->hard_header_len),sk->ip_hcache_data,dev->hard_header_len);
+ skb->arp=1;
+ }
+ else if(dev->hard_header)
+ {
+ if(dev->hard_header(skb,dev,ETH_P_IP,NULL,NULL,0)>0)
+ skb->arp=1;
+ }
+ else
+ skb->arp=1;
+ skb->ip_hdr=iph=(struct iphdr *)skb_put(skb,length);
+ dev_lock_list();
+ if(!sk->ip_hdrincl)
+ {
+ iph->version=4;
+ iph->ihl=5;
+ iph->tos=sk->ip_tos;
+ iph->tot_len = htons(length);
+ iph->id=htons(ip_id_count++);
+ iph->frag_off = 0;
+ iph->ttl=sk->ip_ttl;
+ iph->protocol=type;
+ iph->saddr=saddr;
+ iph->daddr=daddr;
+ if (opt)
+ {
+ iph->ihl += opt->optlen>>2;
+ ip_options_build(skb, opt,
+ true_daddr, dev->pa_addr, 0);
+ }
+ iph->check=0;
+ iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+ getfrag(frag,saddr,((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
+ }
+ else
+ getfrag(frag,saddr,(void *)iph,0,length-20);
+ dev_unlock_list();
+#ifdef CONFIG_FIREWALL
+ if(call_out_firewall(PF_INET, skb, iph)< FW_ACCEPT)
+ {
+ kfree_skb(skb, FREE_WRITE);
+ return -EPERM;
+ }
+#endif
+#ifdef CONFIG_IP_ACCT
+ ip_fw_chk((void *)skb->data,dev,ip_acct_chain, IP_FW_F_ACCEPT,1);
+#endif
+ if(dev->flags&IFF_UP)
+ dev_queue_xmit(skb,dev,sk->priority);
+ else
+ {
+ ip_statistics.IpOutDiscards++;
+ kfree_skb(skb, FREE_WRITE);
+ }
+ return 0;
+ }
+ length-=20;
+ if (sk && !sk->ip_hdrincl && opt)
+ {
+ length -= opt->optlen;
+ fragheaderlen = dev->hard_header_len + sizeof(struct iphdr) + opt->optlen;
+ maxfraglen = ((dev->mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
+ }
+ else
+ {
+ fragheaderlen = dev->hard_header_len;
+ if(!sk->ip_hdrincl)
+ fragheaderlen += 20;
+
+ /*
+ * Fragheaderlen is the size of 'overhead' on each buffer. Now work
+ * out the size of the frames to send.
+ */
+
+ maxfraglen = ((dev->mtu-20) & ~7) + fragheaderlen;
+ }
+
+ /*
+ * Start at the end of the frame by handling the remainder.
+ */
+
+ offset = length - (length % (maxfraglen - fragheaderlen));
+
+ /*
+ * Amount of memory to allocate for final fragment.
+ */
+
+ fraglen = length - offset + fragheaderlen;
+
+ if(length-offset==0)
+ {
+ fraglen = maxfraglen;
+ offset -= maxfraglen-fragheaderlen;
+ }
+
+
+ /*
+ * The last fragment will not have MF (more fragments) set.
+ */
+
+ mf = 0;
+
+ /*
+ * Can't fragment raw packets
+ */
+
+ if (sk->ip_hdrincl && offset > 0)
+ return(-EMSGSIZE);
+
+ /*
+ * Lock the device lists.
+ */
+
+ dev_lock_list();
+
+ /*
+ * Get an identifier
+ */
+
+ id = htons(ip_id_count++);
+
+ /*
+ * Being outputting the bytes.
+ */
+
+ do
+ {
+ struct sk_buff * skb;
+ int error;
+ char *data;
+
+ /*
+ * Get the memory we require with some space left for alignment.
+ */
+
+ skb = sock_alloc_send_skb(sk, fraglen+15, 0, 0, &error);
+ if (skb == NULL)
+ {
+ ip_statistics.IpOutDiscards++;
+ if(nfrags>1)
+ ip_statistics.IpFragCreates++;
+ dev_unlock_list();
+ return(error);
+ }
+
+ /*
+ * Fill in the control structures
+ */
+
+ skb->next = skb->prev = NULL;
+ skb->dev = dev;
+ skb->when = jiffies;
+ skb->free = 1; /* dubious, this one */
+ skb->sk = sk;
+ skb->arp = 0;
+ skb->saddr = saddr;
+ skb->raddr = (rt&&rt->rt_gateway) ? rt->rt_gateway : daddr;
+ skb_reserve(skb,(dev->hard_header_len+15)&~15);
+ data = skb_put(skb, fraglen-dev->hard_header_len);
+
+ /*
+ * Save us ARP and stuff. In the optimal case we do no route lookup (route cache ok)
+ * no ARP lookup (arp cache ok) and output. The cache checks are still too slow but
+ * this can be fixed later. For gateway routes we ought to have a rt->.. header cache
+ * pointer to speed header cache builds for identical targets.
+ */
+
+ if(sk->ip_hcache_state>0)
+ {
+ memcpy(skb_push(skb,dev->hard_header_len),sk->ip_hcache_data, dev->hard_header_len);
+ skb->arp=1;
+ }
+ else if (dev->hard_header)
+ {
+ if(dev->hard_header(skb, dev, ETH_P_IP,
+ NULL, NULL, 0)>0)
+ skb->arp=1;
+ }
+
+ /*
+ * Find where to start putting bytes.
+ */
+
+ skb->ip_hdr = iph = (struct iphdr *)data;
+
+ /*
+ * Only write IP header onto non-raw packets
+ */
+
+ if(!sk->ip_hdrincl)
+ {
+
+ iph->version = 4;
+ iph->ihl = 5; /* ugh */
+ if (opt) {
+ iph->ihl += opt->optlen>>2;
+ ip_options_build(skb, opt,
+ true_daddr, dev->pa_addr, offset);
+ }
+ iph->tos = sk->ip_tos;
+ iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
+ iph->id = id;
+ iph->frag_off = htons(offset>>3);
+ iph->frag_off |= mf;
+#ifdef CONFIG_IP_MULTICAST
+ if (MULTICAST(daddr))
+ iph->ttl = sk->ip_mc_ttl;
+ else
+#endif
+ iph->ttl = sk->ip_ttl;
+ iph->protocol = type;
+ iph->check = 0;
+ iph->saddr = saddr;
+ iph->daddr = daddr;
+ iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+ data += iph->ihl*4;
+
+ /*
+ * Any further fragments will have MF set.
+ */
+
+ mf = htons(IP_MF);
+ }
+
+ /*
+ * User data callback
+ */
+
+ getfrag(frag, saddr, data, offset, fraglen-fragheaderlen);
+
+ /*
+ * Account for the fragment.
+ */
+
+#ifdef CONFIG_FIREWALL
+ if(!offset && call_out_firewall(PF_INET, skb, iph) < FW_ACCEPT)
+ {
+ kfree_skb(skb, FREE_WRITE);
+ dev_unlock_list();
+ return -EPERM;
+ }
+#endif
+#ifdef CONFIG_IP_ACCT
+ if(!offset)
+ ip_fw_chk(iph, dev, ip_acct_chain, IP_FW_F_ACCEPT, 1);
+#endif
+ offset -= (maxfraglen-fragheaderlen);
+ fraglen = maxfraglen;
+
+#ifdef CONFIG_IP_MULTICAST
+
+ /*
+ * Multicasts are looped back for other local users
+ */
+
+ if (MULTICAST(daddr) && !(dev->flags&IFF_LOOPBACK))
+ {
+ /*
+ * Loop back any frames. The check for IGMP_ALL_HOSTS is because
+ * you are always magically a member of this group.
+ *
+ * Always loop back all host messages when running as a multicast router.
+ */
+
+ if(sk==NULL || sk->ip_mc_loop)
+ {
+ if(skb->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
+ ip_loopback(rt?rt->rt_dev:dev,skb);
+ else
+ {
+ struct ip_mc_list *imc=rt?rt->rt_dev->ip_mc_list:dev->ip_mc_list;
+ while(imc!=NULL)
+ {
+ if(imc->multiaddr==daddr)
+ {
+ ip_loopback(rt?rt->rt_dev:dev,skb);
+ break;
+ }
+ imc=imc->next;
+ }
+ }
+ }
+
+ /*
+ * Multicasts with ttl 0 must not go beyond the host. Fixme: avoid the
+ * extra clone.
+ */
+
+ if(skb->ip_hdr->ttl==0)
+ kfree_skb(skb, FREE_READ);
+ }
+#endif
+
+ nfrags++;
+
+ /*
+ * BSD loops broadcasts
+ */
+
+ if((dev->flags&IFF_BROADCAST) && (daddr==0xFFFFFFFF || daddr==dev->pa_brdaddr) && !(dev->flags&IFF_LOOPBACK))
+ ip_loopback(dev,skb);
+
+ /*
+ * Now queue the bytes into the device.
+ */
+
+ if (dev->flags & IFF_UP)
+ {
+ dev_queue_xmit(skb, dev, sk->priority);
+ }
+ else
+ {
+ /*
+ * Whoops...
+ */
+
+ ip_statistics.IpOutDiscards++;
+ if(nfrags>1)
+ ip_statistics.IpFragCreates+=nfrags;
+ kfree_skb(skb, FREE_WRITE);
+ dev_unlock_list();
+ /*
+ * BSD behaviour.
+ */
+ if(sk!=NULL)
+ sk->err=ENETDOWN;
+ return(0); /* lose rest of fragments */
+ }
+ }
+ while (offset >= 0);
+ if(nfrags>1)
+ ip_statistics.IpFragCreates+=nfrags;
+ dev_unlock_list();
+ return(0);
+}
+
+
+/*
+ * IP protocol layer initialiser
+ */
+
+static struct packet_type ip_packet_type =
+{
+ 0, /* MUTTER ntohs(ETH_P_IP),*/
+ NULL, /* All devices */
+ ip_rcv,
+ NULL,
+ NULL,
+};
+
+#ifdef CONFIG_RTNETLINK
+
+/*
+ * Netlink hooks for IP
+ */
+
+void ip_netlink_msg(unsigned long msg, __u32 daddr, __u32 gw, __u32 mask, short flags, short metric, char *name)
+{
+ struct sk_buff *skb=alloc_skb(sizeof(struct netlink_rtinfo), GFP_ATOMIC);
+ struct netlink_rtinfo *nrt;
+ struct sockaddr_in *s;
+ if(skb==NULL)
+ return;
+ nrt=(struct netlink_rtinfo *)skb_put(skb, sizeof(struct netlink_rtinfo));
+ nrt->rtmsg_type=msg;
+ s=(struct sockaddr_in *)&nrt->rtmsg_dst;
+ s->sin_family=AF_INET;
+ s->sin_addr.s_addr=daddr;
+ s=(struct sockaddr_in *)&nrt->rtmsg_gateway;
+ s->sin_family=AF_INET;
+ s->sin_addr.s_addr=gw;
+ s=(struct sockaddr_in *)&nrt->rtmsg_genmask;
+ s->sin_family=AF_INET;
+ s->sin_addr.s_addr=mask;
+ nrt->rtmsg_flags=flags;
+ nrt->rtmsg_metric=metric;
+ strcpy(nrt->rtmsg_device,name);
+ netlink_post(NETLINK_ROUTE, skb);
+}
+
+#endif
+
+/*
+ * Device notifier
+ */
+
+static int ip_rt_event(unsigned long event, void *ptr)
+{
+ struct device *dev=ptr;
+ if(event==NETDEV_DOWN)
+ {
+ ip_netlink_msg(RTMSG_DELDEVICE, 0,0,0,0,0,dev->name);
+ ip_rt_flush(dev);
+ }
+/*
+ * Join the intial group if multicast.
+ */
+ if(event==NETDEV_UP)
+ {
+#ifdef CONFIG_IP_MULTICAST
+ ip_mc_allhost(dev);
+#endif
+ ip_netlink_msg(RTMSG_NEWDEVICE, 0,0,0,0,0,dev->name);
+ }
+ return NOTIFY_DONE;
+}
+
+struct notifier_block ip_rt_notifier={
+ ip_rt_event,
+ NULL,
+ 0
+};
+
+/*
+ * IP registers the packet type and then calls the subprotocol initialisers
+ */
+
+void ip_init(void)
+{
+ ip_packet_type.type=htons(ETH_P_IP);
+ dev_add_pack(&ip_packet_type);
+
+ /* So we flush routes when a device is downed */
+ register_netdevice_notifier(&ip_rt_notifier);
+
+/* ip_raw_init();
+ ip_packet_init();
+ ip_tcp_init();
+ ip_udp_init();*/
+
+#ifdef CONFIG_IP_MULTICAST
+ proc_net_register(&(struct proc_dir_entry) {
+ PROC_NET_IGMP, 4, "igmp",
+ S_IFREG | S_IRUGO, 1, 0, 0,
+ 0, &proc_net_inode_operations,
+ ip_mc_procinfo
+ });
+#endif
+}
+
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov
with Sam's (original) version of this