ip_read.c implements the IP interface for read calls.

     1  /*
     2  ip_read.c
       
     3  Copyright 1995 Philip Homburg
     4  */
       
     5  #include "inet.h"
     6  #include "buf.h"
     7  #include "clock.h"
     8  #include "event.h"
     9  #include "type.h"
       
    10  #include "assert.h"
    11  #include "icmp_lib.h"
    12  #include "io.h"
    13  #include "ip.h"
    14  #include "ip_int.h"
    15  #include "ipr.h"
       
    16  THIS_FILE
       
    17  FORWARD ip_ass_t *find_ass_ent ARGS(( ip_port_t *ip_port, U16_t id,
    18          int proto, ipaddr_t src, ipaddr_t dst ));
    19  FORWARD acc_t *merge_frags ARGS(( acc_t *first, acc_t *second ));
    20  FORWARD int ip_frag_chk ARGS(( acc_t *pack ));
    21  FORWARD acc_t *reassemble ARGS(( ip_port_t *ip_port, acc_t *pack,
    22          ip_hdr_t *ip_hdr ));
    23  FORWARD int broadcast_dst ARGS(( ip_port_t *ip_port, ipaddr_t dest ));
    24  FORWARD void packet2user ARGS(( ip_fd_t *ip_fd, acc_t *pack,
    25          time_t exp_time ));
       

ip_read() is called in 2 ways. 1) ip_read is called by a lower level protocol interface in order to read a
packet. For example read_ip_packets() attempts to read a packet by calling ip_read().
2) If a user process directly opens the ip device (eg by calling Open("/dev/ip",
whatever_mode_you_want) then ip_read is called by the corresponding file descriptor
in the sr_fd_table in sr.c. ip_read() is very similar to udp_read().
ip_read() is called by a lower level protocol interface at the very beginning after it
is configured. After that the ip interface automatically puts a received packet in to
lower level protocol interface's queue in packet2user() so that ip_read() need not be
called again directly by the lower level protocol interface.

    26  PUBLIC int ip_read (fd, count)
    27  int fd;
    28  size_t count;
    29  {
    30          ip_fd_t *ip_fd;
    31          acc_t *pack;
       
    32          ip_fd= &ip_fd_table[fd];

IFF_OPTSET flag is set after ip channel has been successfully configured by a NWIOSIPOPT ioctl call.

    33          if (!(ip_fd->if_flags & IFF_OPTSET))
    34                  return (*ip_fd->if_put_userdata)(ip_fd->if_srfd, EBADMODE,
    35                          (acc_t *)0, FALSE);
       

Saves count in case read call gets suspended.

    36          ip_fd->if_rd_count= count;
       

Marks that the ip channel is processing a read call.
If ip_read is being called by a lower level protocol interface like UDP or TCP, then the
IFF_READ_IP flag is never unset because the lower level protocol interface has a queue which
which saves all of the received packets. The IP interface determines whether or not to unset
the IFF_READ_IP flag by checking whether or not the if_put_userdata field of ip_fd has been set.

    37          ip_fd->if_flags |= IFF_READ_IP;

If a packet has arrived already, dequeue it and return it to the user. Otherwise return NW_SUSPEND.

    38          if (ip_fd->if_rdbuf_head)
    39          {

If a packet has arrived already, dequeue it and return it to the user. Otherwise return NW_SUSPEND.

    40                  if (get_time() <= ip_fd->if_exp_time)
    41                  {
    42                          pack= ip_fd->if_rdbuf_head;
    43                          ip_fd->if_rdbuf_head= pack->acc_ext_link;

Send packet to user by calling packet2user().

    44                          packet2user (ip_fd, pack, ip_fd->if_exp_time);
    45                          assert(!(ip_fd->if_flags & IFF_READ_IP));
    46                          return NW_OK;
    47                  }

Send packet to user by calling packet2user().

    48                  while (ip_fd->if_rdbuf_head)
    49                  {
    50                          pack= ip_fd->if_rdbuf_head;
    51                          ip_fd->if_rdbuf_head= pack->acc_ext_link;
    52                          bf_afree(pack);
    53                  }
    54          }
    55          return NW_SUSPEND;
    56  }
       

reassemble(): reassemble attempts to reassemble a defragmented datagram which has been
received in several different packets.

    57  PRIVATE acc_t *reassemble (ip_port, pack, pack_hdr)
    58  ip_port_t *ip_port;
    59  acc_t *pack;

pack_hdr is the ip header of the packet pack.
The ip header structure ip_hdr_t is defined in include/net/gen/ip_hdr.h.

typedef struct ip_hdr
{
    u8_t ih_vers_ihl,
        ih_tos;
    u16_t ih_length,
        ih_id,
        ih_flags_fragoff;
    u8_t ih_ttl,
        ih_proto;
    u16_t ih_hdr_chk;
    ipaddr_t ih_src,
        ih_dst;
} ip_hdr_t;

The contents of the IP header are defined in RFC 791.


    0                   1                   2                   3
    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |Version|  IHL  |Type of Service|          Total Length         |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |         Identification        |Flags|      Fragment Offset    |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |  Time to Live |    Protocol   |         Header Checksum       |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                       Source Address                          |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                    Destination Address                        |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                    Options                    |    Padding    |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+


ih_vers_ihl: ip header version, IP header length.
ih_tos: precendence, type of service.
ih_length: length of datagram.
ih_id: datagram id number. Unique id number for a datagram. If a datagram is
fragmented into several packets, each of the packets of the datagam have the same
datagram id number.
ih_flags_fragoff: Fragmentation information.
Fragmentation information is stored using the leftmost bit, 2nd leftmost bit, 3rd
leftmost bit, and the rest is used to calculate the offset of the fragmented packet.
Fragmentation information is calculated using the following flags defined in include/net/gen/ip_hdr.h.

#define IH_FRAGOFF_MASK 0x1fff
#define IH_MORE_FRAGS 0x2000
#define IH_DONT_FRAG 0x4000
#define IH_FLAGS_UNUSED 0x8000

ih_ttl: time to live.
ih_proto: protocol field for lower level protocol.
ih_hdr_chk: checksum of ip header.
ih_src: source ip address.
ih_dst: destination ip address.
    60  ip_hdr_t *pack_hdr;
    61  {
    62          ip_ass_t *ass_ent;
    63          size_t pack_hdr_len, pack_data_len, pack_offset, tmp_offset;
    64          u16_t pack_flags_fragoff;
    65          acc_t *prev_acc, *curr_acc, *next_acc, *head_acc, *tmp_acc;
    66          ip_hdr_t *tmp_hdr;
    67          time_t first_time;
       

First reassemble calls find_ass_ent to return a ip_ass_t structure. If no packet for
the datagram had previously arrived a new ip_ass_t structure is returned by find_ass_ent;
otherwise if a packet for the datagram has already arrived then the previously used ip_ass_t
structure is returned by find_ass_ent.

    68          ass_ent= find_ass_ent (ip_port, pack_hdr->ih_id,
    69                  pack_hdr->ih_proto, pack_hdr->ih_src, pack_hdr->ih_dst);
       
    70          pack_flags_fragoff= ntohs(pack_hdr->ih_flags_fragoff);

pack_hdr_len = size of ip header.

    71          pack_hdr_len= (pack_hdr->ih_vers_ihl & IH_IHL_MASK) * 4;

pack_data_len = size of payload data.

    72          pack_data_len= ntohs(pack_hdr->ih_length)-pack_hdr_len;

pack_offset = offset of data in pack in the datagram.

    73          pack_offset= (pack_flags_fragoff & IH_FRAGOFF_MASK)*8;
    74          pack->acc_ext_link= NULL;
       

Received packets are put in a linked list sorted by offset in datagram where offset = tmp_offset.
head_acc = head of linked list.

    75          head_acc= ass_ent->ia_frags;
    76          ass_ent->ia_frags= NULL;
    77          if (head_acc == NULL)
    78          {
    79                  ass_ent->ia_frags= pack;
    80                  return NULL;
    81          }
       
    82          prev_acc= NULL;
    83          curr_acc= NULL;
    84          next_acc= head_acc;
       

Set curr_acc = packet before pack. Set next_acc = packet after pack.

    85          while(next_acc)
    86          {
    87                  tmp_hdr= (ip_hdr_t *)ptr2acc_data(next_acc);
    88                  tmp_offset= (ntohs(tmp_hdr->ih_flags_fragoff) &
    89                          IH_FRAGOFF_MASK)*8;
       
    90                  if (pack_offset < tmp_offset)
    91                          break;
       
    92                  prev_acc= curr_acc;
    93                  curr_acc= next_acc;
    94                  next_acc= next_acc->acc_ext_link;
    95          }

Insert pack in to list headed by head_acc.

    96          if (curr_acc == NULL)
    97          {
    98                  assert(prev_acc == NULL);
    99                  assert(next_acc != NULL);
       
   100                  curr_acc= merge_frags(pack, next_acc);
   101                  head_acc= curr_acc;
   102          }
   103          else
   104          {
   105                  curr_acc= merge_frags(curr_acc, pack);
   106                  if (next_acc != NULL)
   107                          curr_acc= merge_frags(curr_acc, next_acc);
   108                  if (prev_acc != NULL)
   109                          prev_acc->acc_ext_link= curr_acc;
   110                  else
   111                          head_acc= curr_acc;
   112          }

Set ia_frags to head of linked list.

   113          ass_ent->ia_frags= head_acc;
       
   114          pack= ass_ent->ia_frags;
   115          pack_hdr= (ip_hdr_t *)ptr2acc_data(pack);
   116          pack_flags_fragoff= ntohs(pack_hdr->ih_flags_fragoff);
       

Check if it is a complete packet.

   117          if (!(pack_flags_fragoff & (IH_FRAGOFF_MASK|IH_MORE_FRAGS)))
   118                  /* it's now a complete packet */
   119          {
   120                  first_time= ass_ent->ia_first_time;
       

Set both to 0 to show that ass_ent is unused.

   121                  ass_ent->ia_frags= 0;
   122                  ass_ent->ia_first_time= 0;
       

If it is a complete packet free pack.

   123                  while (pack->acc_ext_link)
   124                  {
   125                          tmp_acc= pack->acc_ext_link;
   126                          pack->acc_ext_link= tmp_acc->acc_ext_link;
   127                          bf_afree(tmp_acc);
   128                  }

If packet has expired send a time exceeded message back to the source who sent the original message.
Otherwise return the packet pack.

   129                  if ((ass_ent->ia_min_ttl) * HZ + first_time <
   130                          get_time())
   131                          icmp_snd_time_exceeded(ip_port-ip_port_table, pack,
   132                                  ICMP_FRAG_REASSEM);
   133                  else
   134                          return pack;
   135          }
   136          return NULL;
   137  }
       

merge_frags(first, second): merge_frags merges two acc_t structures first and second so
that the data in the second structure is appended to the end of the data in the first structure
by calling bf_append().

   138  PRIVATE acc_t *merge_frags (first, second)
   139  acc_t *first, *second;
   140  {
   141          ip_hdr_t *first_hdr, *second_hdr;
   142          size_t first_hdr_size, second_hdr_size, first_datasize, second_datasize,
   143                  first_offset, second_offset;
   144          acc_t *cut_second, *tmp_acc;
       

If second is null then return first.

   145          if (!second)
   146          {
   147                  first->acc_ext_link= NULL;
   148                  return first;
   149          }
       
   150  assert (first->acc_length >= IP_MIN_HDR_SIZE);
   151  assert (second->acc_length >= IP_MIN_HDR_SIZE);
       
   152          first_hdr= (ip_hdr_t *)ptr2acc_data(first);

first_offset = offset of first in datagram.

   153          first_offset= (ntohs(first_hdr->ih_flags_fragoff) &
   154                  IH_FRAGOFF_MASK) * 8;

first_hdr_size = header size of first.

   155          first_hdr_size= (first_hdr->ih_vers_ihl & IH_IHL_MASK) * 4;

first_datasize = size of payload data of first.

   156          first_datasize= ntohs(first_hdr->ih_length) - first_hdr_size;
       
   157          second_hdr= (ip_hdr_t *)ptr2acc_data(second);

second_offset = offset of first in datagram.

   158          second_offset= (ntohs(second_hdr->ih_flags_fragoff) &
   159                  IH_FRAGOFF_MASK) * 8;

second_hdr_size = header size of second.

   160          second_hdr_size= (second_hdr->ih_vers_ihl & IH_IHL_MASK) * 4;

second_datasize = size of payload data of first.

   161          second_datasize= ntohs(second_hdr->ih_length) - second_hdr_size;
       
   162          assert (first_hdr_size + first_datasize == bf_bufsize(first));
   163          assert (second_hdr_size + second_datasize == bf_bufsize(second));
   164          assert (second_offset >= first_offset);
       

There cannot be gaps between the data in first packet and data in second packet.

   165          if (second_offset > first_offset+first_datasize)
   166          {
   167                  DBLOCK(1, printf("ip fragments out of order\n"));
   168                  first->acc_ext_link= second;
   169                  return first;
   170          }
       

Data in first packet cannot go past the data in second packet.

   171          if (second_offset + second_datasize <= first_offset +
   172                  first_datasize)
   173          {
   174                  /* May cause problems if we try to merge. */
   175                  bf_afree(first);
   176                  return second;
   177          }
       

!(second_hdr->ih_flags_fragoff & HTONS(IH_MORE_FRAGS)) means no more fragments are coming
and the datagram is complete so set first_hdr to indicate that.

   178          if (!(second_hdr->ih_flags_fragoff & HTONS(IH_MORE_FRAGS)))
   179                  first_hdr->ih_flags_fragoff &= ~HTONS(IH_MORE_FRAGS);
       

second_datasize= size of data to be appended to data from first packet.

   180          second_datasize= second_offset+second_datasize-(first_offset+
   181                  first_datasize);

cut_second = data from second packet to be appended to data from first packet.

   182          cut_second= bf_cut(second, second_hdr_size + first_offset+
   183                  first_datasize-second_offset, second_datasize);
   184          tmp_acc= second->acc_ext_link;
   185          bf_afree(second);
   186          second= tmp_acc;
       
   187          first_datasize += second_datasize;
   188          first_hdr->ih_length= htons(first_hdr_size + first_datasize);
       

Append data from second packet to data from first packet.

   189          first= bf_append (first, cut_second);
   190          first->acc_ext_link= second;
       
   191  assert (first_hdr_size + first_datasize == bf_bufsize(first));
       
   192          return first;
   193  }
       

find_ass_ent(): find_ass_ent returns a ip_ass_t structure for a fragmented datagram.
The ip_ass_t structure is held in the ip_ass_table array.

   194  PRIVATE ip_ass_t *find_ass_ent (ip_port, id, proto, src, dst)
   195  ip_port_t *ip_port;
   196  u16_t id;
   197  ipproto_t proto;
   198  ipaddr_t src;
   199  ipaddr_t dst;
   200  {
   201          ip_ass_t *new_ass_ent, *tmp_ass_ent;
   202          int i;
   203          acc_t *tmp_acc, *curr_acc;
       
   204          new_ass_ent= 0;
       

First find_ass_ent looks through the ip_ass_table array.

   205          for (i=0, tmp_ass_ent= ip_ass_table; i<IP_ASS_NR; i++,
   206                  tmp_ass_ent++)
   207          {
   208                  if (!tmp_ass_ent->ia_frags && tmp_ass_ent->ia_first_time)
   209                  {

ia_first_time is the first time that a packet of the datagram has arrived, but there are no fragments
for the datagram in the list ia_frags.

   210                          DBLOCK(1,
   211                  printf("strange ip_ass entry (can be a race condition)\n"));
   212                          continue;
   213                  }
       
   214                  if ((tmp_ass_ent->ia_srcaddr == src) &&
   215                          (tmp_ass_ent->ia_dstaddr == dst) &&
   216                          (tmp_ass_ent->ia_proto == proto) &&
   217                          (tmp_ass_ent->ia_id == id) &&
   218                          (tmp_ass_ent->ia_port == ip_port))
   219                  {

A packet for the datagram had previously arrived. Return the ip_ass_t structure associated with it.

   220                          return tmp_ass_ent;
   221                  }
   222                  if (!new_ass_ent || tmp_ass_ent->ia_first_time <
   223                          new_ass_ent->ia_first_time)
   224                  {

Save it in new_ass_ent so that it can used for the datagram in case this is a new datagram.

   225                          new_ass_ent= tmp_ass_ent;
   226                  }
   227          }
       

We have received a packet for a new datagram.

   228          if (new_ass_ent->ia_frags)
   229          {

All of the ip_ass_t structures in ip_ass_table are being used. Use the one whose datagram packets
were received the earliest.

   230                  DBLOCK(1, printf("old frags id= %u, proto= %u, src= ",
   231                          ntohs(new_ass_ent->ia_id),
   232                          ntohs(new_ass_ent->ia_proto));
   233                          writeIpAddr(new_ass_ent->ia_srcaddr); printf(" dst= ");
   234                          writeIpAddr(new_ass_ent->ia_dstaddr); printf(": ");
   235                          ip_print_frags(new_ass_ent->ia_frags); printf("\n"));
   236                  curr_acc= new_ass_ent->ia_frags->acc_ext_link;

Free all of the received packets for the old datagram.

   237                  while (curr_acc)
   238                  {
   239                          tmp_acc= curr_acc->acc_ext_link;
   240                          bf_afree(curr_acc);
   241                          curr_acc= tmp_acc;
   242                  }
   243                  curr_acc= new_ass_ent->ia_frags;
   244                  new_ass_ent->ia_frags= 0;
   245                  icmp_snd_time_exceeded(ip_port-ip_port_table, curr_acc,
   246                          ICMP_FRAG_REASSEM);
   247          }

Set the fields to the appropriate values.

   248          new_ass_ent->ia_min_ttl= IP_MAX_TTL;
   249          new_ass_ent->ia_port= ip_port;
   250          new_ass_ent->ia_first_time= get_time();
   251          new_ass_ent->ia_srcaddr= src;
   252          new_ass_ent->ia_dstaddr= dst;
   253          new_ass_ent->ia_proto= proto;
   254          new_ass_ent->ia_id= id;
       
   255          return new_ass_ent;
   256  }
       

ip_frag_chk() checks the checksum and the header lengths and does other validations of the ip header.

   257  PRIVATE int ip_frag_chk(pack)
   258  acc_t *pack;
   259  {
   260          ip_hdr_t *ip_hdr;
   261          int hdr_len;
       
   262          if (pack->acc_length < sizeof(ip_hdr_t))
   263          {
   264                  DBLOCK(1, printf("wrong length\n"));
   265                  return FALSE;
   266          }
       
   267          ip_hdr= (ip_hdr_t *)ptr2acc_data(pack);
       
   268          hdr_len= (ip_hdr->ih_vers_ihl & IH_IHL_MASK) * 4;
   269          if (pack->acc_length < hdr_len)
   270          {
   271                  DBLOCK(1, printf("wrong length\n"));
       
   272                  return FALSE;
   273          }
       
   274          if (((ip_hdr->ih_vers_ihl >> 4) & IH_VERSION_MASK) !=
   275                  IP_VERSION)
   276          {
   277                  DBLOCK(1, printf("wrong version (ih_vers_ihl=0x%x)\n",
   278                          ip_hdr->ih_vers_ihl));
   279                  return FALSE;
   280          }
   281          if (ntohs(ip_hdr->ih_length) != bf_bufsize(pack))
   282          {
   283                  DBLOCK(1, printf("wrong size\n"));
       
   284                  return FALSE;
   285          }
   286          if ((u16_t)~oneC_sum(0, (u16_t *)ip_hdr, hdr_len))
   287          {
   288                  DBLOCK(1, printf("packet with wrong checksum (= %x)\n",
   289                          (u16_t)~oneC_sum(0, (u16_t *)ip_hdr, hdr_len)));
   290                  return FALSE;
   291          }
   292          if (hdr_len>IP_MIN_HDR_SIZE && ip_chk_hdropt((u8_t *)
   293                  (ptr2acc_data(pack) + IP_MIN_HDR_SIZE),
   294                  hdr_len-IP_MIN_HDR_SIZE))
   295          {
   296                  DBLOCK(1, printf("packet with wrong options\n"));
   297                  return FALSE;
   298          }
   299          return TRUE;
   300  }
       

packet2user sends the packet to the process or lower level protocol interface when is connected
to ip channel ip_fd.

   301  PRIVATE void packet2user (ip_fd, pack, exp_time)
   302  ip_fd_t *ip_fd;
   303  acc_t *pack;
   304  time_t exp_time;
   305  {
   306          acc_t *tmp_pack;
   307          ip_hdr_t *ip_hdr;
   308          int result, ip_hdr_len;
   309          size_t size, transf_size;
       
   310          assert (ip_fd->if_flags & IFF_INUSE);
   311          if (!(ip_fd->if_flags & IFF_READ_IP))

Nothing is reading from ip channel ip_fd so put it in the queue.

   312          {
   313                  if (pack->acc_linkC != 1)
   314                  {
   315                          tmp_pack= bf_dupacc(pack);
   316                          bf_afree(pack);
   317                          pack= tmp_pack;
   318                          tmp_pack= NULL;
   319                  }
   320                  pack->acc_ext_link= NULL;
   321                  if (ip_fd->if_rdbuf_head == NULL)
   322                  {
   323                          ip_fd->if_rdbuf_head= pack;
   324                          ip_fd->if_exp_time= exp_time;
   325                  }
   326                  else
   327                          ip_fd->if_rdbuf_tail->acc_ext_link= pack;
   328                  ip_fd->if_rdbuf_tail= pack;
   329                  return;
   330          }
       
   331          size= bf_bufsize (pack);

NWIO_RWDATONLY specifies a read operation will only return the data part.

   332          if (ip_fd->if_ipopt.nwio_flags & NWIO_RWDATONLY)
   333          {
       
   334                  pack= bf_packIffLess (pack, IP_MIN_HDR_SIZE);
   335                  assert (pack->acc_length >= IP_MIN_HDR_SIZE);
       
   336                  ip_hdr= (ip_hdr_t *)ptr2acc_data(pack);
   337                  ip_hdr_len= (ip_hdr->ih_vers_ihl & IH_IHL_MASK) * 4;
       
   338                  assert (size >= ip_hdr_len);
   339                  size -= ip_hdr_len;

Removes header from pack.

   340                  pack= bf_delhead(pack, ip_hdr_len);
   341          }
       

If size requested < packet size then delete excess data at end of packet.

   342          if (size>ip_fd->if_rd_count)
   343          {
   344                  tmp_pack= bf_cut (pack, 0, ip_fd->if_rd_count);
   345                  bf_afree(pack);
   346                  pack= tmp_pack;
   347                  transf_size= ip_fd->if_rd_count;
   348          }
   349          else
   350                  transf_size= size;
       

If if_put_pkt has been defined then a lower level protocol interface has this ip channel
and the lower level protocol interface has a queue where we can send the packet to.

   351          if (ip_fd->if_put_pkt)
   352          {
   353                  (*ip_fd->if_put_pkt)(ip_fd->if_srfd, pack, transf_size);
   354                  return;
   355          }
       

Copy packet from inet space to user space.

   356          result= (*ip_fd->if_put_userdata)(ip_fd->if_srfd,
   357                  (size_t)0, pack, FALSE);

Copy packet from inet space to user space.

   358          if (result >= 0)

Return EPACKSIZE if size requested < packet size.

   359                  if (size > transf_size)
   360                          result= EPACKSIZE;

Return packet size if size requested >= packet size.

   361                  else
   362                          result= transf_size;
       

if_put_pkt has not been defined. Hence a lower level protocol interface does not have
this ip channel. Hence what called us must make a read call on the IP device every time
it wants a packet. IFF_READ_IP is set when a read call has been made. IFF_READ_IP is unset
when the read call has been finished.

   363          ip_fd->if_flags &= ~IFF_READ_IP;

Return integer result to user.

   364          result= (*ip_fd->if_put_userdata)(ip_fd->if_srfd, result,
   365                          (acc_t *)0, FALSE);
   366          assert (result >= 0);
   367          return;
   368  }
       

ip_port_arrive() determines which ip channel should receive the packet pack and
sends pack to the user by calling packet2user().

   369  PUBLIC void ip_port_arrive (ip_port, pack, ip_hdr)
   370  ip_port_t *ip_port;
   371  acc_t *pack;
   372  ip_hdr_t *ip_hdr;
   373  {
   374          ip_fd_t *ip_fd, *first_fd, *share_fd;
   375          ip_hdr_t *hdr;
   376          int port_nr;
   377          unsigned long ip_pack_stat;
   378          int i;
   379          int hash, proto;
   380          time_t exp_time;
       
   381          assert (pack->acc_linkC>0);
   382          assert (pack->acc_length >= IP_MIN_HDR_SIZE);
       

Check if pack is a packet in defragmented datagram.

   383          if (ntohs(ip_hdr->ih_flags_fragoff) & (IH_FRAGOFF_MASK|IH_MORE_FRAGS))
   384          {

If pack is a packet in defragmented datagram then try to reassemble it by calling reassemble().

   385                  pack= reassemble (ip_port, pack, ip_hdr);

pack = NULL if the packets for the datagram have not completely arrived.

   386                  if (!pack)
   387                          return;
   388                  assert (pack->acc_length >= IP_MIN_HDR_SIZE);
   389                  ip_hdr= (ip_hdr_t *)ptr2acc_data(pack);
   390                  assert (!(ntohs(ip_hdr->ih_flags_fragoff) &
   391                          (IH_FRAGOFF_MASK|IH_MORE_FRAGS)));
   392          }
       
   393          exp_time= get_time() + (ip_hdr->ih_ttl+1) * HZ;
       

NWUO_EN_BROAD enables the reception of broadcast packets and
NWUO_DI_BROAD disables the reception of broadcast packets.

   394          if (ip_hdr->ih_dst == ip_port->ip_ipaddr)
   395                  ip_pack_stat= NWIO_EN_LOC;
   396          else
   397                  ip_pack_stat= NWIO_EN_BROAD;
       

proto = lower level protocol.

   398          proto= ip_hdr->ih_proto;

hash = hash value for proto in ip_proto array.

   399          hash= proto & (IP_PROTO_HASH_NR-1);
       
   400          first_fd= NULL;

ip_port_arrive chooses which fd(s) to send the packet to.

   401          for (i= 0; i<2; i++)
   402          {
   403                  share_fd= NULL;
       

ip_port_arrive does this by looking at the protocol field of the fd ie if_ipopt.nwio_proto. It
first looks at the linked list whose head is pointed to by the ip_proto_any field of
the port table entry. ip_proto_any is the head of a linked list of udp fds which
receive messages on any local protocol. Then it looks at ip_proto which is a hash
table of fds which receive messages on a specific protocol. packet2user is called to
send the packet to the user.

   404                  ip_fd= (i == 0) ? ip_port->ip_proto_any :
   405                          ip_port->ip_proto[hash];
   406                  for (; ip_fd; ip_fd= ip_fd->if_proto_next)
   407                  {

ip_port_chooses which fd(s) it sends the packet to by looking if the ip channel
and packet have the same remote IP address and local level protocol.

   408                          if (i && ip_fd->if_ipopt.nwio_proto != proto)
   409                                  continue;
   410                          if (!(ip_fd->if_ipopt.nwio_flags & ip_pack_stat))
   411                                  continue;
   412                          if ((ip_fd->if_ipopt.nwio_flags & NWIO_REMSPEC) &&
   413                                  ip_hdr->ih_src != ip_fd->if_ipopt.nwio_rem)
   414                          {
   415                                  continue;
   416                          }
   417                          if ((ip_fd->if_ipopt.nwio_flags & NWIO_ACC_MASK) ==
   418                                  NWIO_SHARED)
   419                          {
   420                                  if (!share_fd)
   421                                  {
   422                                          share_fd= ip_fd;
   423                                          continue;
   424                                  }
   425                                  if (!ip_fd->if_rdbuf_head)
   426                                          share_fd= ip_fd;
   427                                  continue;
   428                          }
   429                          if (!first_fd)
   430                          {
   431                                  first_fd= ip_fd;
   432                                  continue;
   433                          }
   434                          pack->acc_linkC++;
   435                          packet2user(ip_fd, pack, exp_time);
       
   436                  }
   437                  if (share_fd)
   438                  {
   439                          pack->acc_linkC++;
   440                          packet2user(share_fd, pack, exp_time);
   441                  }
   442          }
   443          if (first_fd)
   444          {
   445                  if (first_fd->if_put_pkt &&
   446                          (first_fd->if_flags & IFF_READ_IP) &&
   447                          !(first_fd->if_ipopt.nwio_flags & NWIO_RWDATONLY))
   448                  {
   449                          (*first_fd->if_put_pkt)(first_fd->if_srfd, pack,
   450                                  ntohs(ip_hdr->ih_length));
   451                  }
   452                  else
   453                          packet2user(first_fd, pack, exp_time);
   454          }
   455          else
   456          {
   457                  if (ip_pack_stat == NWIO_EN_LOC)
   458                  {
   459                          DBLOCK(0x01,
   460                          printf("ip_port_arrive: dropping packet for proto %d\n",
   461                                  proto));
   462                  }
   463                  else
   464                  {
   465                          DBLOCK(0x20, printf("dropping packet for proto %d\n",
   466                                  proto));
   467                  }
   468                  bf_afree(pack);
   469          }
   470  }
       

ip_arrived is called by the ethernet code or the psip code to inform ip that a new packet has just arrived.

   471  PUBLIC void ip_arrived(ip_port, pack)
   472  ip_port_t *ip_port;
   473  acc_t *pack;
   474  {
   475          ip_port_t *next_port;
   476          ip_hdr_t *ip_hdr;
   477          iroute_t *iroute;
   478          ipaddr_t dest;
   479          nettype_t nettype;
   480          int ip_frag_len, ip_hdr_len;
   481          size_t pack_size;
   482          acc_t *tmp_pack;
   483          int broadcast;
       
   484          pack_size= bf_bufsize(pack);
       
   485          if (pack_size < IP_MIN_HDR_SIZE)
   486          {
   487                  DBLOCK(1, printf("wrong acc_length\n"));
   488                  bf_afree(pack);
   489                  return;
   490          }
   491          pack= bf_align(pack, IP_MIN_HDR_SIZE, 4);
   492          pack= bf_packIffLess(pack, IP_MIN_HDR_SIZE);
   493  assert (pack->acc_length >= IP_MIN_HDR_SIZE);
       

ip_hdr is the header of the packet pack.

   494          ip_hdr= (ip_hdr_t *)ptr2acc_data(pack);

ip_hdr_len = length of ip header.

   495          ip_hdr_len= (ip_hdr->ih_vers_ihl & IH_IHL_MASK) << 2;
   496          if (ip_hdr_len>IP_MIN_HDR_SIZE)
   497          {
   498                  pack= bf_align(pack, IP_MIN_HDR_SIZE, 4);
   499                  pack= bf_packIffLess(pack, ip_hdr_len);
   500                  ip_hdr= (ip_hdr_t *)ptr2acc_data(pack);
   501          }

ip_frag_len = length of packet pack as given in ip header.

   502          ip_frag_len= ntohs(ip_hdr->ih_length);
   503          if (ip_frag_len<pack_size)
   504          {
   505                  tmp_pack= pack;
   506                  pack= bf_cut(tmp_pack, 0, ip_frag_len);
   507                  bf_afree(tmp_pack);
   508          }
       

ip_arrived calls ip_frag_chk to check the checksum and the header lengths.

   509          if (!ip_frag_chk(pack))
   510          {
   511                  DBLOCK(1, printf("fragment not allright\n"));
   512                  bf_afree(pack);
   513                  return;
   514          }
       
   515          /* Decide about local delivery or routing. Local delivery can happen
   516           * when the destination is the local ip address, or one of the
   517           * broadcast addresses and the packet happens to be delivered
   518           * point-to-point.
   519           */
       

dest = destination ip address of packet pack.

   520          dest= ip_hdr->ih_dst;
       
   521          if (dest == ip_port->ip_ipaddr)

If the ip address of this network interface is the destination ip address of the
packet pack then the ip channel waiting for the packet must be on this computer
if any ip channel is waiting for it. Send it to the ip channel which is waiting
for it by calling ip_port_arrive().

   522          {
   523                  ip_port_arrive (ip_port, pack, ip_hdr);
   524                  return;
   525          }
   526          if (broadcast_dst(ip_port, dest))

If the destination ip address is a broadcast ip address and the ip address of this
network interface is part of the broadcast network then try to send the packet to any
thing which is waiting for it by calling ip_port_arrive().

   527          {
   528                  ip_port_arrive (ip_port, pack, ip_hdr);
   529                  return;
   530          }
       
   531          /* Try to decrement the ttl field with one. */
   532          if (ip_hdr->ih_ttl < 2)
   533          {
   534                  icmp_snd_time_exceeded(ip_port-ip_port_table, pack, ICMP_TTL_EXC);
   535                  return;
   536          }

Next ip_arrived decrements the time to live (ih_ttl) field of the ip packet.

   537          ip_hdr->ih_ttl--;
   538          ip_hdr_chksum(ip_hdr, ip_hdr_len);
       
   539          /* Avoid routing to bad destinations. */

nettype = the class type of the ip address eg Class A, Class B, etc.

   540          nettype= ip_nettype(dest);
   541          if (nettype != IPNT_CLASS_A && nettype != IPNT_CLASS_B && nettype !=
   542                  IPNT_CLASS_C)
   543          {
   544                  /* Bogus destination address */
   545                  if (nettype == IPNT_CLASS_D || nettype == IPNT_CLASS_E)
   546                          bf_afree(pack);
   547                  else
   548                  {
   549                          icmp_snd_unreachable(ip_port-ip_port_table, pack,
   550                                  ICMP_HOST_UNRCH);
   551                  }
   552                  return;
   553          }

Next it looks up the destination in the route table of the port to look up to where
ip_arrived should route the packet by calling iroute_frag.

   554          iroute= iroute_frag(ip_port-ip_port_table, dest);
   555          if (iroute == NULL || iroute->irt_dist == IRTD_UNREACHABLE)
   556          {

Could not determine where to route packet.

   557                  /* Also unreachable */
   558                  /* Finding out if we send a network unreachable is too much
   559                   * trouble.
   560                   */
   561                  icmp_snd_unreachable(ip_port-ip_port_table, pack,
   562                          ICMP_HOST_UNRCH);
   563                  return;
   564          }
   565          next_port= &ip_port_table[iroute->irt_port];
   566          if (next_port != ip_port)

Packet must be sent to a different network ie sent through a different network
interface than the network represented by ip_port.

   567          {
   568                  if (iroute->irt_gateway != 0)
   569                  {
   570                          /* Just send the packet to the next gateway */
   571                          next_port->ip_dev_send(next_port, iroute->irt_gateway,
   572                                  pack, /* no bradcast */ 0);
   573                          return;
   574                  }
   575                  /* The packet is for the attached network. Special addresses
   576                   * are the ip address of the interface and net.0 if
   577                   * no IP_42BSD_BCAST.
   578                   */
   579                  if (dest == next_port->ip_ipaddr)
   580                  {
   581                          ip_port_arrive (next_port, pack, ip_hdr);
   582                          return;
   583                  }
   584                  if (dest == iroute->irt_dest)
   585                  {
   586  #if IP_42BSD_BCAST
   587                          broadcast= 1;
   588  #else
   589                          /* Bogus destination address */
   590                          icmp_snd_dstunrch(pack);
   591                          return;
   592  #endif
   593                  }
   594                  else if (dest == (iroute->irt_dest | ~iroute->irt_subnetmask))
   595                          broadcast= 1;
   596                  else
   597                          broadcast= 0;
       
   598                  /* Just send the packet to it's destination */
   599                  next_port->ip_dev_send(next_port, dest, pack, broadcast);
   600                  return;
   601          }
       

Since we already checked if the destination ip address is the same as the ip address
of this network interface, we must be routing the packet to a different network via
this network interface.

   602          /* Now we know that the packet should be route over the same network
   603           * as it came from. If there is a next hop gateway, we can send
   604           * the packet to that gateway and send a redirect ICMP to the sender
   605           * if the sender is on the attached network. If there is no gateway
   606           * complain.
   607           */
   608          if (iroute->irt_gateway == 0)
   609          {
   610  #if !CRAMPED
   611                  printf("packet should not be here, src=");
   612                  writeIpAddr(ip_hdr->ih_src);
   613                  printf(" dst=");
   614                  writeIpAddr(ip_hdr->ih_dst);
   615                  printf("\n");
   616  #endif
   617                  bf_afree(pack);
   618                  return;
   619          }
   620          if (((ip_hdr->ih_src ^ ip_port->ip_ipaddr) &
   621                  ip_port->ip_subnetmask) == 0)
   622          {
   623                  /* Finding out if we can send a network redirect instead of
   624                   * a host redirect is too much trouble.
   625                   */
   626                  pack->acc_linkC++;
   627                  icmp_snd_redirect(ip_port-ip_port_table, pack,
   628                          ICMP_REDIRECT_HOST, iroute->irt_gateway);
   629          }
   630          else
   631          {
   632  #if !CRAMPED
   633                  printf("packet is wrongly routed, src=");
   634                  writeIpAddr(ip_hdr->ih_src);
   635                  printf(" dst=");
   636                  writeIpAddr(ip_hdr->ih_dst);
   637                  printf("\n");
   638  #endif
   639                  bf_afree(pack);
   640                  return;
   641          }
   642          ip_port->ip_dev_send(ip_port, iroute->irt_gateway, pack,
   643                  /* no broadcast */ 0);
   644  }
       

ip_arrived_broadcast is called by the network interface code (psip or ethernet code)
when a broadcast message has arrived.

   645  PUBLIC void ip_arrived_broadcast(ip_port, pack)
   646  ip_port_t *ip_port;
   647  acc_t *pack;
   648  {
   649          ip_hdr_t *ip_hdr;
   650          int ip_frag_len, ip_hdr_len;
   651          size_t pack_size;
   652          acc_t *tmp_pack;
       
   653          pack_size= bf_bufsize(pack);
       
   654          if (pack_size < IP_MIN_HDR_SIZE)
   655          {
   656                  DBLOCK(1, printf("wrong acc_length\n"));
   657                  bf_afree(pack);
   658                  return;
   659          }
   660          pack= bf_align(pack, IP_MIN_HDR_SIZE, 4);
   661          pack= bf_packIffLess(pack, IP_MIN_HDR_SIZE);
   662  assert (pack->acc_length >= IP_MIN_HDR_SIZE);
       

ip_hdr is the header of the packet pack.

   663          ip_hdr= (ip_hdr_t *)ptr2acc_data(pack);
       
   664          DIFBLOCK(0x20, (ip_hdr->ih_dst & HTONL(0xf0000000)) == HTONL(0xe0000000),
   665                  printf("got multicast packet\n"));
       

ip_hdr_len = length of ip header.

   666          ip_hdr_len= (ip_hdr->ih_vers_ihl & IH_IHL_MASK) << 2;
   667          if (ip_hdr_len>IP_MIN_HDR_SIZE)
   668          {
   669                  pack= bf_align(pack, IP_MIN_HDR_SIZE, 4);
   670                  pack= bf_packIffLess(pack, ip_hdr_len);
   671                  ip_hdr= (ip_hdr_t *)ptr2acc_data(pack);
   672          }

ip_frag_len = length of packet pack as given in ip header.

   673          ip_frag_len= ntohs(ip_hdr->ih_length);
   674          if (ip_frag_len<pack_size)
   675          {
   676                  tmp_pack= pack;
   677                  pack= bf_cut(tmp_pack, 0, ip_frag_len);
   678                  bf_afree(tmp_pack);
   679          }
       

ip_arrived calls ip_frag_chk to check the checksum and the header lengths.

   680          if (!ip_frag_chk(pack))
   681          {
   682                  DBLOCK(1, printf("fragment not allright\n"));
   683                  bf_afree(pack);
   684                  return;
   685          }
       

Check if it is really a broadcast destination address for ip_port.

   686          if (!broadcast_dst(ip_port, ip_hdr->ih_dst))
   687          {
   688  #if !CRAMPED
   689                  printf(
   690                  "ip[%d]: broadcast packet for ip-nonbroadcast addr, src=",
   691                          ip_port-ip_port_table);
   692                  writeIpAddr(ip_hdr->ih_src);
   693                  printf(" dst=");
   694                  writeIpAddr(ip_hdr->ih_dst);
   695                  printf("\n");
   696  #endif
   697                  bf_afree(pack);
   698                  return;
   699          }
       

Send to port.

   700          ip_port_arrive (ip_port, pack, ip_hdr);
   701  }
       

broadcast_dst returns 1 if the destination ip address dest is a broadcast address
with respect to the port ip_port; else broadcast_dst returns 0.

   702  PRIVATE int broadcast_dst(ip_port, dest)
   703  ip_port_t *ip_port;
   704  ipaddr_t dest;
   705  {
   706          /* Treat class D (multicast) address as broadcasts. */
   707          if ((dest & HTONL(0xF0000000)) == HTONL(0xE0000000))
   708          {
   709                  return 1;
   710          }
       
   711          /* Accept without complaint if netmask not yet configured. */
   712          if (!(ip_port->ip_flags & IPF_NETMASKSET))
   713          {
   714                  return 1;
   715          }
       
   716          if (((ip_port->ip_ipaddr ^ dest) & ip_port->ip_netmask) != 0)
   717          {
   718                  /* Two possibilities, 0 (iff IP_42BSD_BCAST) and -1 */
   719                  if (dest == HTONL((ipaddr_t)-1))
   720                          return 1;
   721  #if IP_42BSD_BCAST
   722                  if (dest == HTONL((ipaddr_t)0))
   723                          return 1;
   724  #endif
   725                  return 0;
   726          }
   727          if (((ip_port->ip_ipaddr ^ dest) & ip_port->ip_subnetmask) != 0)
   728          {
   729                  /* Two possibilities, netwerk.0 (iff IP_42BSD_BCAST) and
   730                   * netwerk.-1
   731                   */
   732                  if ((dest & ~ip_port->ip_netmask) == ~ip_port->ip_netmask)
   733                          return 1;
   734  #if IP_42BSD_BCAST
   735                  if ((dest & ~ip_port->ip_netmask) == 0)
   736                          return 1;
   737  #endif
   738                  return 0;
   739          }
       
   740          /* Two possibilities, netwerk.subnet.0 (iff IP_42BSD_BCAST) and
   741           * netwerk.subnet.-1
   742           */
   743          if ((dest & ~ip_port->ip_subnetmask) == ~ip_port->ip_subnetmask)
   744                  return 1;
   745  #if IP_42BSD_BCAST
   746          if ((dest & ~ip_port->ip_subnetmask) == 0)
   747                  return 1;
   748  #endif
   749          return 0;
   750  }
       
   751  void ip_process_loopb(ev, arg)
   752  event_t *ev;
   753  ev_arg_t arg;
   754  {
   755          ip_port_t *ip_port;
   756          acc_t *pack;
       
   757          ip_port= arg.ev_ptr;
   758          assert(ev == &ip_port->ip_loopb_event);
       
   759          while(pack= ip_port->ip_loopb_head)
   760          {
   761                  ip_port->ip_loopb_head= pack->acc_ext_link;
   762                  ip_arrived(ip_port, pack);
   763          }
   764  }
       
   765  /*
   766   * $PchId: ip_read.c,v 1.9 1997/01/31 08:51:39 philip Exp $
   767   */