/*
 * INET         An implementation of the TCP/IP protocol suite for the LINUX
 *              operating system.  INET is implemented using the  BSD Socket
 *              interface as the means of communication with the user level.
 *
 *              The IP forwarding functionality.
 *              
 * Authors:     see ip.c
 *
 * Fixes:
 *              Many            :       Split from ip.c , see ip_input.c for 
 *                                      history.
 *              Dave Gregorich  :       NULL ip_rt_put fix for multicast 
 *                                      routing.
 *              Jos Vos         :       Add call_out_firewall before sending,
 *                                      use output device for accounting.
 *              Jos Vos         :       Call forward firewall after routing
 *                                      (always use output device).
 */

#include <linux/config.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/icmp.h>
#include <linux/netdevice.h>
#include <net/sock.h>
#include <net/ip.h>
#include <net/icmp.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/firewall.h>
#include <linux/ip_fw.h>
#ifdef CONFIG_IP_MASQUERADE
#include <net/ip_masq.h>
#endif
#include <net/checksum.h>
#include <linux/route.h>
#include <net/route.h>
 
#ifdef CONFIG_IP_FORWARD
#ifdef CONFIG_IP_MROUTE

/*
 *      Encapsulate a packet by attaching a valid IPIP header to it.
 *      This avoids tunnel drivers and other mess and gives us the speed so
 *      important for multicast video.
 */
 
static void ip_encap(struct sk_buff *skb, int len, struct device *out, __u32 daddr)
{
        /*
         *      There is space for the IPIP header and MAC left.
         *
         *      Firstly push down and install the IPIP header.
         */
        struct iphdr *iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
        if(len>65515)
                len=65515;
        iph->version    =       4;
        iph->tos        =       skb->ip_hdr->tos;
        iph->ttl        =       skb->ip_hdr->ttl;
        iph->frag_off   =       0;
        iph->daddr      =       daddr;
        iph->saddr      =       out->pa_addr;
        iph->protocol   =       IPPROTO_IPIP;
        iph->ihl        =       5;
        iph->tot_len    =       htons(skb->len);
        iph->id         =       htons(ip_id_count++);
        ip_send_check(iph);

        skb->dev = out;
        skb->arp = 1;
        skb->raddr=daddr;
        /*
         *      Now add the physical header (driver will push it down).
         */
        if (out->hard_header && out->hard_header(skb, out, ETH_P_IP, NULL, NULL, len)<0)
                        skb->arp=0;
        /*
         *      Read to queue for transmission.
         */
}

#endif

/*
 *      Forward an IP datagram to its next destination.
 */

int ip_forward(struct sk_buff *skb, struct device *dev, int is_frag,
               __u32 target_addr)
{
        struct device *dev2;    /* Output device */
        struct iphdr *iph;      /* Our header */
        struct sk_buff *skb2;   /* Output packet */
        struct rtable *rt;      /* Route we use */
        unsigned char *ptr;     /* Data pointer */
        unsigned long raddr;    /* Router IP address */
        struct   options * opt  = (struct options*)skb->proto_priv;
        struct hh_cache *hh = NULL;
        int encap = 0;          /* Encap length */
#ifdef CONFIG_FIREWALL
        int fw_res = 0;         /* Forwarding result */ 
#ifdef CONFIG_IP_MASQUERADE     
        struct sk_buff *skb_in = skb;   /* So we can remember if the masquerader did some swaps */
#endif /* CONFIG_IP_MASQUERADE */
#endif /* CONFIG_FIREWALL */
        
        /*
         *      According to the RFC, we must first decrease the TTL field. If
         *      that reaches zero, we must reply an ICMP control message telling
         *      that the packet's lifetime expired.
         *
         *      Exception:
         *      We may not generate an ICMP for an ICMP. icmp_send does the
         *      enforcement of this so we can forget it here. It is however
         *      sometimes VERY important.
         */

        iph = skb->h.iph;
        iph->ttl--;

        /*
         *      Re-compute the IP header checksum.
         *      This is inefficient. We know what has happened to the header
         *      and could thus adjust the checksum as Phil Karn does in KA9Q
         */

        iph->check = ntohs(iph->check) + 0x0100;
        if ((iph->check & 0xFF00) == 0)
                iph->check++;           /* carry overflow */
        iph->check = htons(iph->check);

        if (iph->ttl <= 0)
        {
                /* Tell the sender its packet died... */
                icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0, dev);
                return -1;
        }

#ifdef CONFIG_IP_MROUTE
        if(!(is_frag&IPFWD_MULTICASTING))
        {
#endif  
                /*
                 * OK, the packet is still valid.  Fetch its destination address,
                 * and give it to the IP sender for further processing.
                 */

                rt = ip_rt_route(target_addr, 0);

                if (rt == NULL)
                {
                        /*
                         *      Tell the sender its packet cannot be delivered. Again
                         *      ICMP is screened later.
                         */
                        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, 0, dev);
                        return -1;
                }
        
        
                /*
                 * Gosh.  Not only is the packet valid; we even know how to
                 * forward it onto its final destination.  Can we say this
                 * is being plain lucky?
                 * If the router told us that there is no GW, use the dest.
                 * IP address itself- we seem to be connected directly...
                 */

                raddr = rt->rt_gateway;
        
                if (opt->is_strictroute && (rt->rt_flags & RTF_GATEWAY)) {
                        /*
                         *      Strict routing permits no gatewaying
                         */
        
                        ip_rt_put(rt);
                        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0, dev);
                        return -1;
                }

                /*
                 *      Having picked a route we can now send the frame out
                 *      after asking the firewall permission to do so.
                 */

                dev2 = rt->rt_dev;
                hh = rt->rt_hh;
                /*
                 *      In IP you never have to forward a frame on the interface that it 
                 *      arrived upon. We now generate an ICMP HOST REDIRECT giving the route
                 *      we calculated.
                 */
#ifndef CONFIG_IP_NO_ICMP_REDIRECT
                if (dev == dev2 && 
                        !((iph->saddr^dev->pa_addr)&dev->pa_mask) &&
                        /* The daddr!=raddr test isn't obvious - what it's doing
                           is avoiding sending a frame the receiver will not 
                           believe anyway.. */
                        iph->daddr != raddr/*ANK*/ && !opt->srr)
                                icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, raddr, dev);
#endif
#ifdef CONFIG_IP_MROUTE
        }
        else
        {
                /*
                 *      Multicast route forward. Routing is already done
                 */
                dev2=skb->dev;
                raddr=skb->raddr;
                if(is_frag&IPFWD_MULTITUNNEL)   /* VIFF_TUNNEL mode */
                        encap=20;
                rt=NULL;
        }
#endif  
        
        /* 
         *      See if we are allowed to forward this.
         *      Note: demasqueraded fragments are always 'back'warded.
         */
        
#ifdef CONFIG_FIREWALL
        if(!(is_frag&IPFWD_MASQUERADED))
        {
#ifdef CONFIG_IP_MASQUERADE
                /* 
                 *      Check that any ICMP packets are not for a 
                 *      masqueraded connection.  If so rewrite them
                 *      and skip the firewall checks
                 */
                if (iph->protocol == IPPROTO_ICMP)
                {
                        if ((fw_res = ip_fw_masq_icmp(&skb, dev2)) < 0)
                                /* Problem - ie bad checksum */
                                return -1;

                        if (fw_res)
                                /* ICMP matched - skip firewall */
                                goto skip_call_fw_firewall;
                }
#endif
                fw_res=call_fw_firewall(PF_INET, dev2, iph, NULL);
                switch (fw_res) {
                case FW_ACCEPT:
                case FW_MASQUERADE:
                        break;
                case FW_REJECT:
                        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, dev);
                        /* fall thru */
                default:
                        return -1;
                }

#ifdef CONFIG_IP_MASQUERADE
                skip_call_fw_firewall:
#endif          
        }
#endif

        /*
         * We now may allocate a new buffer, and copy the datagram into it.
         * If the indicated interface is up and running, kick it.
         */

        if (dev2->flags & IFF_UP)
        {
#ifdef CONFIG_IP_MASQUERADE
                /*
                 * If this fragment needs masquerading, make it so...
                 * (Don't masquerade de-masqueraded fragments)
                 */
                if (!(is_frag&IPFWD_MASQUERADED) && fw_res==FW_MASQUERADE)
                        if (ip_fw_masquerade(&skb, dev2) < 0)
                        {
                                /*
                                 * Masquerading failed; silently discard this packet.
                                 */
                                if (rt)
                                        ip_rt_put(rt);
                                return -1;
                        }
#endif
                IS_SKB(skb);

                if (skb->len+encap > dev2->mtu && (ntohs(iph->frag_off) & IP_DF)) 
                {
                        ip_statistics.IpFragFails++;
                        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(dev2->mtu), dev);
                        if(rt)
                                ip_rt_put(rt);
                        return -1;
                }

#ifdef CONFIG_IP_MROUTE
                if(skb_headroom(skb)-encap<dev2->hard_header_len)
                {
                        skb2 = alloc_skb(dev2->hard_header_len + skb->len + encap + 15, GFP_ATOMIC);
#else
                if(skb_headroom(skb)<dev2->hard_header_len)
                {
                        skb2 = alloc_skb(dev2->hard_header_len + skb->len + 15, GFP_ATOMIC);
#endif          
                        /*
                         *      This is rare and since IP is tolerant of network failures
                         *      quite harmless.
                         */
                
                        if (skb2 == NULL)
                        {
                                NETDEBUG(printk("\nIP: No memory available for IP forward\n"));
                                if(rt)
                                        ip_rt_put(rt);
                                return -1;
                        }
                
                        IS_SKB(skb2);
                        /*
                         *      Add the physical headers.
                         */
                        skb2->protocol=htons(ETH_P_IP);
#ifdef CONFIG_IP_MROUTE
                        if(is_frag&IPFWD_MULTITUNNEL)
                        {
                                skb_reserve(skb,(encap+dev->hard_header_len+15)&~15);   /* 16 byte aligned IP headers are good */
                                ip_encap(skb2,skb->len, dev2, raddr);
                        }
                        else
#endif                  
                                ip_send(rt,skb2,raddr,skb->len,dev2,dev2->pa_addr);

                        /*
                         *      We have to copy the bytes over as the new header wouldn't fit
                         *      the old buffer. This should be very rare.
                         */              
                        
                        ptr = skb_put(skb2,skb->len);
                        skb2->free = 1;
                        skb2->h.raw = ptr;

                        /*
                         *      Copy the packet data into the new buffer.
                         */
                        memcpy(ptr, skb->h.raw, skb->len);
                        memcpy(skb2->proto_priv, skb->proto_priv, sizeof(skb->proto_priv));
                        iph = skb2->ip_hdr = skb2->h.iph;
                }
                else
                {
                        /* 
                         *      Build a new MAC header. 
                         */

                        skb2 = skb;             
                        skb2->dev=dev2;
#ifdef CONFIG_IP_MROUTE
                        if(is_frag&IPFWD_MULTITUNNEL)
                                ip_encap(skb,skb->len, dev2, raddr);
                        else
                        {
#endif
                                skb->arp=1;
                                skb->raddr=raddr;
                                if (hh)
                                {
                                        memcpy(skb_push(skb, dev2->hard_header_len), hh->hh_data, dev2->hard_header_len);
                                        if (!hh->hh_uptodate)
                                        {
#if RT_CACHE_DEBUG >= 2
                                                printk("ip_forward: hh miss %08x via %08x\n", target_addr, rt->rt_gateway);
#endif                                          
                                                skb->arp = 0;
                                        }
                                }
                                else if (dev2->hard_header)
                                {
                                        if(dev2->hard_header(skb, dev2, ETH_P_IP, NULL, NULL, skb->len)<0)
                                                skb->arp=0;
                                }
#ifdef CONFIG_IP_MROUTE
                        }                               
#endif                  
                }
#ifdef CONFIG_FIREWALL
                if((fw_res = call_out_firewall(PF_INET, skb2->dev, iph, NULL)) < FW_ACCEPT)
                {
                        /* FW_ACCEPT and FW_MASQUERADE are treated equal:
                           masquerading is only supported via forward rules */
                        if (fw_res == FW_REJECT)
                                icmp_send(skb2, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, dev);
                        if (skb != skb2)
                                kfree_skb(skb2,FREE_WRITE);
                        return -1;
                }
#endif
                ip_statistics.IpForwDatagrams++;

                if (opt->optlen) 
                {
                        unsigned char * optptr;
                        if (opt->rr_needaddr) 
                        {
                                optptr = (unsigned char *)iph + opt->rr;
                                memcpy(&optptr[optptr[2]-5], &dev2->pa_addr, 4);
                                opt->is_changed = 1;
                        }
                        if (opt->srr_is_hit) 
                        {
                                int srrptr, srrspace;

                                optptr = (unsigned char *)iph + opt->srr;

                                for ( srrptr=optptr[2], srrspace = optptr[1];
                                      srrptr <= srrspace;
                                     srrptr += 4
                                    ) 
                                {
                                        if (srrptr + 3 > srrspace)
                                                break;
                                        if (memcmp(&target_addr, &optptr[srrptr-1], 4) == 0)
                                                break;
                                }
                                if (srrptr + 3 <= srrspace) 
                                {
                                        opt->is_changed = 1;
                                        memcpy(&optptr[srrptr-1], &dev2->pa_addr, 4);
                                        iph->daddr = target_addr;
                                        optptr[2] = srrptr+4;
                                }
                                else
                                        printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
                        }
                        if (opt->ts_needaddr) 
                        {
                                optptr = (unsigned char *)iph + opt->ts;
                                memcpy(&optptr[optptr[2]-9], &dev2->pa_addr, 4);
                                opt->is_changed = 1;
                        }
                        if (opt->is_changed) 
                        {
                                opt->is_changed = 0;
                                ip_send_check(iph);
                        }
                }
/*
 * ANK:  this is point of "no return", we cannot send an ICMP,
 *       because we changed SRR option.
 */

                /*
                 *      See if it needs fragmenting. Note in ip_rcv we tagged
                 *      the fragment type. This must be right so that
                 *      the fragmenter does the right thing.
                 */

                if(skb2->len > dev2->mtu + dev2->hard_header_len)
                {
                        ip_fragment(NULL,skb2,dev2, is_frag);
                        kfree_skb(skb2,FREE_WRITE);
                }
                else
                {
#ifdef CONFIG_IP_ACCT           
                        /*
                         *      Count mapping we shortcut
                         */
                         
                        ip_fw_chk(iph,dev2,NULL,ip_acct_chain,0,IP_FW_MODE_ACCT_OUT);
#endif                  
                        
                        /*
                         *      Map service types to priority. We lie about
                         *      throughput being low priority, but it's a good
                         *      choice to help improve general usage.
                         */
                        if(iph->tos & IPTOS_LOWDELAY)
                                dev_queue_xmit(skb2, dev2, SOPRI_INTERACTIVE);
                        else if(iph->tos & IPTOS_THROUGHPUT)
                                dev_queue_xmit(skb2, dev2, SOPRI_BACKGROUND);
                        else
                                dev_queue_xmit(skb2, dev2, SOPRI_NORMAL);
                }
        }
        else
        {
                if(rt)
                        ip_rt_put(rt);
                return -1;
        }
        if(rt)
                ip_rt_put(rt);
        
        /*
         *      Tell the caller if their buffer is free.
         */      
         
        if(skb==skb2)
                return 0;       

#ifdef CONFIG_IP_MASQUERADE     
        /*
         *      The original is free. Free our copy and
         *      tell the caller not to free.
         */
        if(skb!=skb_in)
        {
                kfree_skb(skb_in, FREE_WRITE);
                return 0;
        }
#endif  
        return 1;
}


#endif