/* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * The Internet Protocol (IP) module. * * Version: @(#)ip.c 1.0.16b 9/1/93 * * Authors: Ross Biro, <bir7@leland.Stanford.Edu> * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Donald Becker, <becker@super.org> * Alan Cox, <Alan.Cox@linux.org> * Richard Underwood * Stefan Becker, <stefanb@yello.ping.de> * Jorge Cwik, <jorge@laser.satlink.net> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * * * Fixes: * Alan Cox : Commented a couple of minor bits of surplus code * Alan Cox : Undefining IP_FORWARD doesn't include the code * (just stops a compiler warning). * Alan Cox : Frames with >=MAX_ROUTE record routes, strict routes or loose routes * are junked rather than corrupting things. * Alan Cox : Frames to bad broadcast subnets are dumped * We used to process them non broadcast and * boy could that cause havoc. * Alan Cox : ip_forward sets the free flag on the * new frame it queues. Still crap because * it copies the frame but at least it * doesn't eat memory too. * Alan Cox : Generic queue code and memory fixes. * Fred Van Kempen : IP fragment support (borrowed from NET2E) * Gerhard Koerting: Forward fragmented frames correctly. * Gerhard Koerting: Fixes to my fix of the above 8-). * Gerhard Koerting: IP interface addressing fix. * Linus Torvalds : More robustness checks * Alan Cox : Even more checks: Still not as robust as it ought to be * Alan Cox : Save IP header pointer for later * Alan Cox : ip option setting * Alan Cox : Use ip_tos/ip_ttl settings * Alan Cox : Fragmentation bogosity removed * (Thanks to Mark.Bush@prg.ox.ac.uk) * Dmitry Gorodchanin : Send of a raw packet crash fix. * Alan Cox : Silly ip bug when an overlength * fragment turns up. Now frees the * queue. * Linus Torvalds/ : Memory leakage on fragmentation * Alan Cox : handling. * Gerhard Koerting: Forwarding uses IP priority hints * Teemu Rantanen : Fragment problems. * Alan Cox : General cleanup, comments and reformat * Alan Cox : SNMP statistics * Alan Cox : BSD address rule semantics. Also see * UDP as there is a nasty checksum issue * if you do things the wrong way. * Alan Cox : Always defrag, moved IP_FORWARD to the config.in file * Alan Cox : IP options adjust sk->priority. * Pedro Roque : Fix mtu/length error in ip_forward. * Alan Cox : Avoid ip_chk_addr when possible. * Richard Underwood : IP multicasting. * Alan Cox : Cleaned up multicast handlers. * Alan Cox : RAW sockets demultiplex in the BSD style. * Gunther Mayer : Fix the SNMP reporting typo * Alan Cox : Always in group 224.0.0.1 * Pauline Middelink : Fast ip_checksum update when forwarding * Masquerading support. * Alan Cox : Multicast loopback error for 224.0.0.1 * Alan Cox : IP_MULTICAST_LOOP option. * Alan Cox : Use notifiers. * Bjorn Ekwall : Removed ip_csum (from slhc.c too) * Bjorn Ekwall : Moved ip_fast_csum to ip.h (inline!) * Stefan Becker : Send out ICMP HOST REDIRECT * Arnt Gulbrandsen : ip_build_xmit * Alan Cox : Per socket routing cache * Alan Cox : Fixed routing cache, added header cache. * Alan Cox : Loopback didn't work right in original ip_build_xmit - fixed it. * Alan Cox : Only send ICMP_REDIRECT if src/dest are the same net. * Alan Cox : Incoming IP option handling. * Alan Cox : Set saddr on raw output frames as per BSD. * Alan Cox : Stopped broadcast source route explosions. * Alan Cox : Can disable source routing * Takeshi Sone : Masquerading didn't work. * Dave Bonn,Alan Cox : Faster IP forwarding whenever possible. * Alan Cox : Memory leaks, tramples, misc debugging. * Alan Cox : Fixed multicast (by popular demand 8)) * Alan Cox : Fixed forwarding (by even more popular demand 8)) * Alan Cox : Fixed SNMP statistics [I think] * Gerhard Koerting : IP fragmentation forwarding fix * Alan Cox : Device lock against page fault. * Alan Cox : IP_HDRINCL facility. * Werner Almesberger : Zero fragment bug * Alan Cox : RAW IP frame length bug * Alan Cox : Outgoing firewall on build_xmit * A.N.Kuznetsov : IP_OPTIONS support throughout the kernel * Alan Cox : Multicast routing hooks * Jos Vos : Do accounting *before* call_in_firewall * Willy Konynenberg : Transparent proxying support * * * * To Fix: * IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient * and could be made very efficient with the addition of some virtual memory hacks to permit * the allocation of a buffer that can then be 'grown' by twiddling page tables. * Output fragmentation wants updating along with the buffer management to use a single * interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet * output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause * fragmentation anyway. * * FIXME: copy frag 0 iph to qp->iph * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ #include <asm/segment.h> #include <asm/system.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/mm.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/config.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/proc_fs.h> #include <linux/stat.h> #include <net/snmp.h> #include <net/ip.h> #include <net/protocol.h> #include <net/route.h> #include <net/tcp.h> #include <net/udp.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/arp.h> #include <net/icmp.h> #include <net/raw.h> #include <net/checksum.h> #include <linux/igmp.h> #include <linux/ip_fw.h> #ifdef CONFIG_IP_MASQUERADE #include <net/ip_masq.h> #endif #include <linux/firewall.h> #include <linux/mroute.h> #include <net/netlink.h> #ifdef CONFIG_NET_ALIAS #include <linux/net_alias.h> #endif extern int last_retran; extern void sort_send(struct sock *sk); #define min(a,b) ((a)<(b)?(a):(b)) /* * SNMP management statistics */ #ifdef CONFIG_IP_FORWARD struct ip_mib ip_statistics={1,64,}; /* Forwarding=Yes, Default TTL=64 */ #else struct ip_mib ip_statistics={2,64,}; /* Forwarding=No, Default TTL=64 */ #endif /* * Handle the issuing of an ioctl() request * for the ip device. This is scheduled to * disappear */ int ip_ioctl(struct sock *sk, int cmd, unsigned long arg) { switch(cmd) { default: return(-EINVAL); } } #ifdef CONFIG_IP_TRANSPARENT_PROXY /* * Check the packet against our socket administration to see * if it is related to a connection on our system. * Needed for transparent proxying. */ int ip_chksock(struct sk_buff *skb) { switch (skb->h.iph->protocol) { case IPPROTO_ICMP: return icmp_chkaddr(skb); case IPPROTO_TCP: return tcp_chkaddr(skb); case IPPROTO_UDP: return udp_chkaddr(skb); default: return 0; } } #endif /* * This function receives all incoming IP datagrams. * * On entry skb->data points to the start of the IP header and * the MAC header has been removed. */ int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) { struct iphdr *iph = skb->h.iph; struct sock *raw_sk=NULL; unsigned char hash; unsigned char flag = 0; struct inet_protocol *ipprot; int brd=IS_MYADDR; struct options * opt = NULL; int is_frag=0; __u32 daddr; #ifdef CONFIG_FIREWALL int fwres; __u16 rport; #endif #ifdef CONFIG_IP_MROUTE int mroute_pkt=0; #endif #ifdef CONFIG_NET_IPV6 /* * Intercept IPv6 frames. We dump ST-II and invalid types just below.. */ if(iph->version == 6) return ipv6_rcv(skb,dev,pt); #endif ip_statistics.IpInReceives++; /* * Account for the packet (even if the packet is * not accepted by the firewall!). */ #ifdef CONFIG_IP_ACCT ip_fw_chk(iph,dev,NULL,ip_acct_chain,0,IP_FW_MODE_ACCT_IN); #endif /* * Tag the ip header of this packet so we can find it */ skb->ip_hdr = iph; /* * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum. * RFC1122: 3.1.2.3 MUST discard a frame with invalid source address [NEEDS FIXING]. * * Is the datagram acceptable? * * 1. Length at least the size of an ip header * 2. Version of 4 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums] * 4. Doesn't have a bogus length * (5. We ought to check for IP multicast addresses and undefined types.. does this matter ?) */ if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0 || skb->len < ntohs(iph->tot_len)) { ip_statistics.IpInHdrErrors++; kfree_skb(skb, FREE_WRITE); return(0); } /* * Our transport medium may have padded the buffer out. Now we know it * is IP we can trim to the true length of the frame. * Note this now means skb->len holds ntohs(iph->tot_len). */ skb_trim(skb,ntohs(iph->tot_len)); /* * Try to select closest <src,dst> alias device, if any. * net_alias_dev_rcv_sel32 returns main device if it * fails to found other. */ #ifdef CONFIG_NET_ALIAS if (iph->daddr != skb->dev->pa_addr && net_alias_has(skb->dev)) skb->dev = dev = net_alias_dev_rcv_sel32(skb->dev, AF_INET, iph->saddr, iph->daddr); #endif if (iph->ihl > 5) { skb->ip_summed = 0; if (ip_options_compile(NULL, skb)) return(0); opt = (struct options*)skb->proto_priv; #ifdef CONFIG_IP_NOSR if (opt->srr) { kfree_skb(skb, FREE_READ); return -EINVAL; } #endif } #if defined(CONFIG_IP_TRANSPARENT_PROXY) && !defined(CONFIG_IP_ALWAYS_DEFRAG) #define CONFIG_IP_ALWAYS_DEFRAG 1 #endif #ifdef CONFIG_IP_ALWAYS_DEFRAG /* * Defragment all incoming traffic before even looking at it. * If you have forwarding enabled, this makes the system a * defragmenting router. Not a common thing. * You probably DON'T want to enable this unless you have to. * You NEED to use this if you want to use transparent proxying, * otherwise, we can't vouch for your sanity. */ /* * See if the frame is fragmented. */ if(iph->frag_off) { if (iph->frag_off & htons(IP_MF)) is_frag|=IPFWD_FRAGMENT; /* * Last fragment ? */ if (iph->frag_off & htons(IP_OFFSET)) is_frag|=IPFWD_LASTFRAG; /* * Reassemble IP fragments. */ if(is_frag) { /* Defragment. Obtain the complete packet if there is one */ skb=ip_defrag(iph,skb,dev); if(skb==NULL) return 0; skb->dev = dev; iph=skb->h.iph; is_frag = 0; /* * When the reassembled packet gets forwarded, the ip * header checksum should be correct. * For better performance, this should actually only * be done in that particular case, i.e. set a flag * here and calculate the checksum in ip_forward. */ ip_send_check(iph); } } #endif /* * See if the firewall wants to dispose of the packet. */ #ifdef CONFIG_FIREWALL if ((fwres=call_in_firewall(PF_INET, skb->dev, iph, &rport))<FW_ACCEPT) { if(fwres==FW_REJECT) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev); kfree_skb(skb, FREE_WRITE); return 0; } #ifdef CONFIG_IP_TRANSPARENT_PROXY if (fwres==FW_REDIRECT) skb->redirport = rport; else #endif skb->redirport = 0; #endif #ifndef CONFIG_IP_ALWAYS_DEFRAG /* * Remember if the frame is fragmented. */ if(iph->frag_off) { if (iph->frag_off & htons(IP_MF)) is_frag|=IPFWD_FRAGMENT; /* * Last fragment ? */ if (iph->frag_off & htons(IP_OFFSET)) is_frag|=IPFWD_LASTFRAG; } #endif /* * Do any IP forwarding required. chk_addr() is expensive -- avoid it someday. * * This is inefficient. While finding out if it is for us we could also compute * the routing table entry. This is where the great unified cache theory comes * in as and when someone implements it * * For most hosts over 99% of packets match the first conditional * and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at * function entry. */ daddr = iph->daddr; #ifdef CONFIG_IP_TRANSPARENT_PROXY /* * ip_chksock adds still more overhead for forwarded traffic... */ if ( iph->daddr == skb->dev->pa_addr || skb->redirport || (brd = ip_chk_addr(iph->daddr)) != 0 || ip_chksock(skb)) #else if ( iph->daddr == skb->dev->pa_addr || (brd = ip_chk_addr(iph->daddr)) != 0) #endif { if (opt && opt->srr) { int srrspace, srrptr; __u32 nexthop; unsigned char * optptr = ((unsigned char *)iph) + opt->srr; if (brd != IS_MYADDR || skb->pkt_type != PACKET_HOST) { kfree_skb(skb, FREE_WRITE); return 0; } for ( srrptr=optptr[2], srrspace = optptr[1]; srrptr <= srrspace; srrptr += 4 ) { int brd2; if (srrptr + 3 > srrspace) { icmp_send(skb, ICMP_PARAMETERPROB, 0, opt->srr+2, skb->dev); kfree_skb(skb, FREE_WRITE); return 0; } memcpy(&nexthop, &optptr[srrptr-1], 4); if ((brd2 = ip_chk_addr(nexthop)) == 0) break; if (brd2 != IS_MYADDR) { /* * ANK: should we implement weak tunneling of multicasts? * Are they obsolete? DVMRP specs (RFC-1075) is old enough... * [They are obsolete] */ kfree_skb(skb, FREE_WRITE); return -EINVAL; } memcpy(&daddr, &optptr[srrptr-1], 4); } if (srrptr <= srrspace) { opt->srr_is_hit = 1; opt->is_changed = 1; #ifdef CONFIG_IP_FORWARD if (ip_forward(skb, dev, is_frag, nexthop)) kfree_skb(skb, FREE_WRITE); #else ip_statistics.IpInAddrErrors++; kfree_skb(skb, FREE_WRITE); #endif return 0; } } #ifdef CONFIG_IP_MULTICAST if(!(dev->flags&IFF_ALLMULTI) && brd==IS_MULTICAST && iph->daddr!=IGMP_ALL_HOSTS && !(dev->flags&IFF_LOOPBACK)) { /* * Check it is for one of our groups */ struct ip_mc_list *ip_mc=dev->ip_mc_list; do { if(ip_mc==NULL) { kfree_skb(skb, FREE_WRITE); return 0; } if(ip_mc->multiaddr==iph->daddr) break; ip_mc=ip_mc->next; } while(1); } #endif #ifndef CONFIG_IP_ALWAYS_DEFRAG /* * Reassemble IP fragments. */ if(is_frag) { /* Defragment. Obtain the complete packet if there is one */ skb=ip_defrag(iph,skb,dev); if(skb==NULL) return 0; skb->dev = dev; iph=skb->h.iph; } #endif #ifdef CONFIG_IP_MASQUERADE /* * Do we need to de-masquerade this packet? */ { int ret = ip_fw_demasquerade(&skb,dev); if (ret < 0) { kfree_skb(skb, FREE_WRITE); return 0; } if (ret) { struct iphdr *iph=skb->h.iph; if (ip_forward(skb, dev, IPFWD_MASQUERADED, iph->daddr)) kfree_skb(skb, FREE_WRITE); return 0; } } #endif /* * Point into the IP datagram, just past the header. */ skb->ip_hdr = iph; skb->h.raw += iph->ihl*4; #ifdef CONFIG_IP_MROUTE /* * Check the state on multicast routing (multicast and not 224.0.0.z) */ if(brd==IS_MULTICAST && (iph->daddr&htonl(0xFFFFFF00))!=htonl(0xE0000000)) mroute_pkt=1; #endif /* * Deliver to raw sockets. This is fun as to avoid copies we want to make no surplus copies. * * RFC 1122: SHOULD pass TOS value up to the transport layer. */ hash = iph->protocol & (SOCK_ARRAY_SIZE-1); /* * If there maybe a raw socket we must check - if not we don't care less */ if((raw_sk=raw_prot.sock_array[hash])!=NULL) { struct sock *sknext=NULL; struct sk_buff *skb1; raw_sk=get_sock_raw(raw_sk, iph->protocol, iph->saddr, iph->daddr); if(raw_sk) /* Any raw sockets */ { do { /* Find the next */ sknext=get_sock_raw(raw_sk->next, iph->protocol, iph->saddr, iph->daddr); if(sknext) skb1=skb_clone(skb, GFP_ATOMIC); else break; /* One pending raw socket left */ if(skb1) raw_rcv(raw_sk, skb1, dev, iph->saddr,daddr); raw_sk=sknext; } while(raw_sk!=NULL); /* * Here either raw_sk is the last raw socket, or NULL if none */ /* * We deliver to the last raw socket AFTER the protocol checks as it avoids a surplus copy */ } } /* * skb->h.raw now points at the protocol beyond the IP header. */ hash = iph->protocol & (MAX_INET_PROTOS -1); for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next) { struct sk_buff *skb2; if (ipprot->protocol != iph->protocol) continue; /* * See if we need to make a copy of it. This will * only be set if more than one protocol wants it. * and then not for the last one. If there is a pending * raw delivery wait for that */ #ifdef CONFIG_IP_MROUTE if (ipprot->copy || raw_sk || mroute_pkt) #else if (ipprot->copy || raw_sk) #endif { skb2 = skb_clone(skb, GFP_ATOMIC); if(skb2==NULL) continue; } else { skb2 = skb; } flag = 1; /* * Pass on the datagram to each protocol that wants it, * based on the datagram protocol. We should really * check the protocol handler's return values here... */ ipprot->handler(skb2, dev, opt, daddr, (ntohs(iph->tot_len) - (iph->ihl * 4)), iph->saddr, 0, ipprot); } /* * All protocols checked. * If this packet was a broadcast, we may *not* reply to it, since that * causes (proven, grin) ARP storms and a leakage of memory (i.e. all * ICMP reply messages get queued up for transmission...) */ #ifdef CONFIG_IP_MROUTE /* * Forward the last copy to the multicast router. If * there is a pending raw delivery however make a copy * and forward that. */ if(mroute_pkt) { flag=1; if(raw_sk==NULL) ipmr_forward(skb, is_frag); else { struct sk_buff *skb2=skb_clone(skb, GFP_ATOMIC); if(skb2) { skb2->free=1; ipmr_forward(skb2, is_frag); } } } #endif if(raw_sk!=NULL) /* Shift to last raw user */ raw_rcv(raw_sk, skb, dev, iph->saddr, daddr); else if (!flag) /* Free and report errors */ { if (brd != IS_BROADCAST && brd!=IS_MULTICAST) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0, dev); kfree_skb(skb, FREE_WRITE); } return(0); } /* * Do any unicast IP forwarding required. */ /* * Don't forward multicast or broadcast frames. */ if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST) { kfree_skb(skb,FREE_WRITE); return 0; } /* * The packet is for another target. Forward the frame */ #ifdef CONFIG_IP_FORWARD if (opt && opt->is_strictroute) { icmp_send(skb, ICMP_PARAMETERPROB, 0, 16, skb->dev); kfree_skb(skb, FREE_WRITE); return -1; } if (ip_forward(skb, dev, is_frag, iph->daddr)) kfree_skb(skb, FREE_WRITE); #else /* printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n", iph->saddr,iph->daddr);*/ ip_statistics.IpInAddrErrors++; kfree_skb(skb, FREE_WRITE); #endif return(0); }