ip_write.c implements the IP interface for write calls.

     1  /*
     2  ip_write.c
       
     3  Copyright 1995 Philip Homburg
     4  */
       
     5  #include "inet.h"
     6  #include "buf.h"
     7  #include "event.h"
     8  #include "type.h"
       
     9  #include "arp.h"
    10  #include "assert.h"
    11  #include "clock.h"
    12  #include "eth.h"
    13  #include "icmp_lib.h"
    14  #include "io.h"
    15  #include "ip.h"
    16  #include "ip_int.h"
    17  #include "ipr.h"
       
    18  THIS_FILE
       
    19  FORWARD void error_reply ARGS(( ip_fd_t *fd, int error ));
    20  FORWARD int chk_dstaddr ARGS(( ipaddr_t dst ));
       

ip_write is called to write a packet to a network interface. ip_write () is called in
2 ways. 1) ip_write is called by a lower level protocol interface in order to write a packet.
For example read_ip_packets attempts to read a packet by calling ip_write. 2) If a user process
directly opens the ip device (eg by calling Open("/dev/ip", whatever_mode_you_want) then ip_write
is called by the corresponding channel in the sr_fd_table in sr.c.

    21  PUBLIC int ip_write (fd, count)
    22  int fd;
    23  size_t count;
    24  {
    25          ip_fd_t *ip_fd;
    26          acc_t *pack;
    27          int r;
       
    28          ip_fd= &ip_fd_table[fd];

ip_write() first checks if the requested bytes to write is greater than IP_MAX_PACKSIZE.
If it is, then a EPACKSIZE error is returned.

    29          if (count > IP_MAX_PACKSIZE)
    30          {
    31                  error_reply (ip_fd, EPACKSIZE);
    32                  return NW_OK;
    33          }

Next ip_write() gets the packet to write by calling if_get_userdata.

    34          pack= (*ip_fd->if_get_userdata)(ip_fd->if_srfd, (size_t)0,
    35                  count, FALSE);
    36          if (!pack)
    37                  return NW_OK;

Finally ip_write() calls ip_send to send the packet.

    38          r= ip_send(fd, pack, count);
    39          assert(r != NW_WOULDBLOCK);
       
    40          if (r == NW_OK)
    41                  error_reply (ip_fd, count);
    42          else
    43                  error_reply (ip_fd, r);
    44          return NW_OK;
    45  }
       

ip_send() sends the packet data which has size data_len on the ip channel with index fd
in the ip_fd_table array.

    46  PUBLIC int ip_send(fd, data, data_len)
    47  int fd;
    48  acc_t *data;
    49  size_t data_len;
    50  {
    51          ip_port_t *ip_port;
    52          ip_fd_t *ip_fd;
    53          ip_hdr_t *ip_hdr, *tmp_hdr;
    54          ipaddr_t dstaddr, netmask, nexthop;
    55          u8_t *addrInBytes;
    56          acc_t *tmp_pack, *tmp_pack1;
    57          int hdr_len, hdr_opt_len, r;
    58          int broadcast, ttl;
    59          ev_arg_t arg;
       
    60          ip_fd= &ip_fd_table[fd];
    61          ip_port= ip_fd->if_port;
       

Check if ip channel has been configured with NWIOSIPOPT IOCTL call.

    62          if (!(ip_fd->if_flags & IFF_OPTSET))
    63          {
    64                  bf_afree(data);
    65                  return EBADMODE;
    66          }
       
    67          data_len= bf_bufsize(data);
       

Checks if the ip address has been set by checking if the IPF_ADDRSET flag has been set.

    68          assert(ip_fd->if_port->ip_flags & IPF_IPADDRSET);
       

NWIO_RWDATONLY specifies that the header is omitted from the write request.

    69          if (ip_fd->if_ipopt.nwio_flags & NWIO_RWDATONLY)
    70          {
    71                  tmp_pack= bf_memreq(IP_MIN_HDR_SIZE);
    72                  tmp_pack->acc_next= data;
    73                  data= tmp_pack;
    74                  data_len += IP_MIN_HDR_SIZE;
    75          }
    76          if (data_len<IP_MIN_HDR_SIZE)
    77          {
    78                  bf_afree(data);
    79                  return EPACKSIZE;
    80          }
       
    81          data= bf_packIffLess(data, IP_MIN_HDR_SIZE);
    82          ip_hdr= (ip_hdr_t *)ptr2acc_data(data);
    83          if (data->acc_linkC != 1 || data->acc_buffer->buf_linkC != 1)
    84          {
    85                  tmp_pack= bf_memreq(IP_MIN_HDR_SIZE);
    86                  tmp_hdr= (ip_hdr_t *)ptr2acc_data(tmp_pack);
    87                  *tmp_hdr= *ip_hdr;
    88                  tmp_pack->acc_next= bf_cut(data, IP_MIN_HDR_SIZE,
    89                          data_len-IP_MIN_HDR_SIZE);
    90                  bf_afree(data);
    91                  ip_hdr= tmp_hdr;
    92                  data= tmp_pack;
    93                  assert (data->acc_length >= IP_MIN_HDR_SIZE);
    94          }
       
    95          if (ip_fd->if_ipopt.nwio_flags & NWIO_HDR_O_SPEC)
    96          {

NWIO_HDR_O_SPEC specifies all IP header options in advance. IP option headers are
passed in the iho_data field of the ip_hdropt structure.

    97                  hdr_opt_len= ip_fd->if_ipopt.nwio_hdropt.iho_opt_siz;
    98                  if (hdr_opt_len)
    99                  {
   100                          tmp_pack= bf_cut(data, 0, IP_MIN_HDR_SIZE);
   101                          tmp_pack1= bf_cut (data, IP_MIN_HDR_SIZE,
   102                                  data_len-IP_MIN_HDR_SIZE);
   103                          bf_afree(data);
   104                          data= bf_packIffLess(tmp_pack, IP_MIN_HDR_SIZE);
   105                          ip_hdr= (ip_hdr_t *)ptr2acc_data(data);
   106                          tmp_pack= bf_memreq (hdr_opt_len);

If header option length > 0 then copy it from if_ipopt.nwio_hdropt.iho_data.
if_ipopt.nwio_hdropt.iho_data should have been set in the NWIOSIPOPT IOCTL call.

   107                          memcpy (ptr2acc_data(tmp_pack), ip_fd->if_ipopt.
   108                                  nwio_hdropt.iho_data, hdr_opt_len);
   109                          data->acc_next= tmp_pack;
   110                          tmp_pack->acc_next= tmp_pack1;
   111                          hdr_len= IP_MIN_HDR_SIZE+hdr_opt_len;
   112                  }
   113                  else
   114                          hdr_len= IP_MIN_HDR_SIZE;
   115                  ip_hdr->ih_vers_ihl= hdr_len/4;
   116                  ip_hdr->ih_tos= ip_fd->if_ipopt.nwio_tos;
   117                  ip_hdr->ih_flags_fragoff= 0;
   118                  if (ip_fd->if_ipopt.nwio_df)
   119                          ip_hdr->ih_flags_fragoff |= HTONS(IH_DONT_FRAG);
   120                  ip_hdr->ih_ttl= ip_fd->if_ipopt.nwio_ttl;
   121                  ttl= ORTD_UNREACHABLE+1;                /* Don't check TTL */
   122          }
   123          else
   124          {
   125                  hdr_len= (ip_hdr->ih_vers_ihl & IH_IHL_MASK)*4;
   126                  r= NW_OK;
   127                  if (hdr_len<IP_MIN_HDR_SIZE)
   128                          r= EINVAL;
   129                  else if (hdr_len>data_len)
   130                          r= EPACKSIZE;
   131                  else if (!ip_hdr->ih_ttl)
   132                          r= EINVAL;
   133                  if (r != NW_OK)
   134                  {
   135                          bf_afree(data);
   136                          return r;
   137                  }
       
   138                  data= bf_packIffLess(data, hdr_len);
   139                  ip_hdr= (ip_hdr_t *)ptr2acc_data(data);
   140                  if (hdr_len != IP_MIN_HDR_SIZE)
   141                  {

If header options have been passed by the user check if the options are valid
by calling ip_chk_hdropt. ip_chk_hdropt is defined in inet/generic/ip_lib.c.

   142                          r= ip_chk_hdropt((u8_t *)(ptr2acc_data(data) +
   143                                  IP_MIN_HDR_SIZE),
   144                                  hdr_len-IP_MIN_HDR_SIZE);
   145                          if (r != NW_OK)
   146                          {
   147                                  bf_afree(data);
   148                                  return r;
   149                          }
   150                  }
   151                  ttl= ip_hdr->ih_ttl;
   152          }
   153         
   154          assert (!(ip_hdr->ih_flags_fragoff & HTONS(IH_DONT_FRAG)));

ip_send then sets the ip version, the length of the packet, the fragmentation flags,
the datagram id number, the ip source address, and the ip destination address.

   155          ip_hdr->ih_vers_ihl= (ip_hdr->ih_vers_ihl & IH_IHL_MASK) |
   156                  (IP_VERSION << 4);
   157          ip_hdr->ih_length= htons(data_len);
   158          ip_hdr->ih_flags_fragoff &= ~HTONS(IH_FRAGOFF_MASK |
   159                  IH_FLAGS_UNUSED | IH_MORE_FRAGS);
   160          if (ip_fd->if_ipopt.nwio_flags & NWIO_PROTOSPEC)

Lower level protocol has been set in NWIOSIPOPT IOCTL call which configured IP channel.

   161                  ip_hdr->ih_proto= ip_fd->if_ipopt.nwio_proto;

Set datagram id for datagram.

   162          ip_hdr->ih_id= htons(ip_port->ip_frame_id++);

Set source ip address for datagram.

   163          ip_hdr->ih_src= ip_fd->if_port->ip_ipaddr;
   164          if (ip_fd->if_ipopt.nwio_flags & NWIO_REMSPEC)

Remote IP address has been set in NWIOSIPOPT IOCTL call which configured IP channel.

   165                  ip_hdr->ih_dst= ip_fd->if_ipopt.nwio_rem;
   166          else
   167          {
   168                  r= chk_dstaddr(ip_hdr->ih_dst);
   169                  if (r<0)
   170                  {
   171                          DIFBLOCK(1, r == EBADDEST,
   172                                  printf("bad destination: ");
   173                                  writeIpAddr(ip_hdr->ih_dst);
   174                                  printf("\n"));
   175                          bf_afree(data);
   176                          return r;
   177                  }
   178          }

ip_send calculates the checksum field ih_hdr_chk of the ip header ip_hdr by calling ip_hdr_chksum().

   179          ip_hdr_chksum(ip_hdr, hdr_len);
       
   180          data= bf_packIffLess(data, IP_MIN_HDR_SIZE);
   181          assert (data->acc_length >= IP_MIN_HDR_SIZE);
   182          ip_hdr= (ip_hdr_t *)ptr2acc_data(data);
       

dstaddr = destination ip address.

   183          dstaddr= ip_hdr->ih_dst;

addrInBytes = pointer to dstaddr.

   184          addrInBytes= (u8_t *)&dstaddr;
       
   185          if ((addrInBytes[0] & 0xff) == 0x7f)    /* local loopback */

(addrInBytes[0] & 0xff) == 0x7f means destination ip address = 127.x.x.x.

   186          {
   187                  assert (data->acc_linkC == 1);
   188                  dstaddr= ip_hdr->ih_dst;        /* swap src and dst
   189                                                   * addresses */
   190                  ip_hdr->ih_dst= ip_hdr->ih_src;
   191                  ip_hdr->ih_src= dstaddr;
   192                  data->acc_ext_link= NULL;

Local loopback packets are put in the loopback queue. Each ip port has its own loopback
queue. The head of the loopback queue is pointed to by ip_loopb_head while the tail of the
loopback queue is pointed to by ip_loopb_tail. The loopback queue is processed by ev_process.
ev_process is called in the main function in inet.c.

   193                  if (ip_port->ip_loopb_head == NULL)
   194                  {
   195                          ip_port->ip_loopb_head= data;
   196                          arg.ev_ptr= ip_port;
   197                          ev_enqueue(&ip_port->ip_loopb_event,
   198                                  ip_process_loopb, arg);
   199                  }
   200                  else
   201                          ip_port->ip_loopb_tail->acc_ext_link= data;
   202                  ip_port->ip_loopb_tail= data;
       
   203                  return NW_OK;
   204          }
       
   205          if (dstaddr == (ipaddr_t)-1)

(ipaddr_t)-1 = ip address 255.255.255.255

   206          {
   207                  r= (*ip_port->ip_dev_send)(ip_port, dstaddr, data,
   208                          /* broadcast */ 1);
   209                  return r;
   210          }

netmask = net mask for class type of ip address dstaddr.

   211          netmask= ip_get_netmask(dstaddr);
       
   212          if (dstaddr == ip_port->ip_ipaddr)

Destination IP address is the ip address of this ip port ie we are sending a message
to ourself.

   213          {
   214                  assert (data->acc_linkC == 1);
       
   215                  data->acc_ext_link= NULL;

Local loopback packets are put in the loopback queue. Each ip port has its own loopback
queue. The head of the loopback queue is pointed to by ip_loopb_head while the tail of the
loopback queue is pointed to by ip_loopb_tail. The loopback queue is processed by ev_process.
ev_process is called in the main function in inet.c.

   216                  if (ip_port->ip_loopb_head == NULL)
   217                  {
   218                          ip_port->ip_loopb_head= data;
   219                          arg.ev_ptr= ip_port;
   220                          ev_enqueue(&ip_port->ip_loopb_event,
   221                                  ip_process_loopb, arg);
   222                  }
   223                  else
   224                          ip_port->ip_loopb_tail->acc_ext_link= data;
   225                  ip_port->ip_loopb_tail= data;
       
   226                  return NW_OK;
   227          }
       
   228          if (((dstaddr ^ ip_port->ip_ipaddr) & ip_port->ip_subnetmask) == 0)

((dstaddr ^ ip_port->ip_ipaddr) & ip_port->ip_subnetmask) == 0 means
destination ip address belongs in the same network as the ip address of this ip port.

   229          {
   230                  broadcast= (dstaddr == (ip_port->ip_ipaddr |
   231                          ~ip_port->ip_subnetmask));
       
   232                  r= (*ip_port->ip_dev_send)(ip_port, dstaddr, data,
   233                                                                  broadcast);
   234                  return r;
   235          }
       

Get ip address of where to send packet to by calling oroute_frag. IP address returned
in nexthop.

   236          r= oroute_frag (ip_port - ip_port_table, dstaddr, ttl, &nexthop);
       
   237          if (r == NW_OK)
   238          {
   239                  if (nexthop == ip_port->ip_ipaddr)

Sending it to this ip port.

   240                  {
   241                          data->acc_ext_link= NULL;

Local loopback packets are put in the loopback queue. Each ip port has its own loopback
queue. The head of the loopback queue is pointed to by ip_loopb_head while the tail of the
loopback queue is pointed to by ip_loopb_tail. The loopback queue is processed by ev_process.
ev_process is called in the main function in inet.c.

   242                          if (ip_port->ip_loopb_head == NULL)
   243                          {
   244                                  ip_port->ip_loopb_head= data;
   245                                  arg.ev_ptr= ip_port;
   246                                  ev_enqueue(&ip_port->ip_loopb_event,
   247                                          ip_process_loopb, arg);
   248                          }
   249                          else
   250                                  ip_port->ip_loopb_tail->acc_ext_link= data;
   251                          ip_port->ip_loopb_tail= data;
   252                  }
   253                  else
   254                  {

Send packet to nexthop.

   255                          r= (*ip_port->ip_dev_send)(ip_port,
   256                                  nexthop, data, /* no broadcast */ 0);
   257                  }
   258          }
   259          else
   260          {
   261                  DBLOCK(0x10, printf("got error %d\n", r));
   262                  bf_afree(data);
   263          }
   264          return r;
   265  }
       

ip_hdr_chksum() calculates the checksum field ih_hdr_chk of the ip header ip_hdr.

   266  PUBLIC void ip_hdr_chksum(ip_hdr, ip_hdr_len)
   267  ip_hdr_t *ip_hdr;
   268  int ip_hdr_len;
   269  {
   270          ip_hdr->ih_hdr_chk= 0;
   271          ip_hdr->ih_hdr_chk= ~oneC_sum (0, (u16_t *)ip_hdr, ip_hdr_len);
   272  }
       
   273  PUBLIC acc_t *ip_split_pack (ip_port, ref_last, first_size)
   274  ip_port_t *ip_port;
   275  acc_t **ref_last;
   276  int first_size;
   277  {
   278          int pack_siz;
   279          ip_hdr_t *first_hdr, *second_hdr;
   280          int first_hdr_len, second_hdr_len;
   281          int first_data_len, second_data_len;
   282          int new_first_data_len;
   283          int first_opt_size, second_opt_size;
   284          acc_t *first_pack, *second_pack, *tmp_pack, *tmp_pack1;
   285          u8_t *first_optptr, *second_optptr;
   286          int i, optlen;
       
   287          first_pack= *ref_last;
   288          *ref_last= 0;
   289          second_pack= 0;
       
   290          first_pack= bf_packIffLess(first_pack, IP_MIN_HDR_SIZE);
   291          assert (first_pack->acc_length >= IP_MIN_HDR_SIZE);
       
   292          first_hdr= (ip_hdr_t *)ptr2acc_data(first_pack);
   293          first_hdr_len= (first_hdr->ih_vers_ihl & IH_IHL_MASK) * 4;
       
   294          pack_siz= bf_bufsize(first_pack);
   295          assert(pack_siz > first_size);
       
   296          if (first_hdr->ih_flags_fragoff & HTONS(IH_DONT_FRAG))
   297          {
   298                  icmp_snd_unreachable(ip_port-ip_port_table, first_pack,
   299                          ICMP_FRAGM_AND_DF);
   300                  return NULL;
   301          }
       
   302          first_data_len= ntohs(first_hdr->ih_length) - first_hdr_len;
   303          new_first_data_len= (first_size- first_hdr_len) & ~7;
   304                  /* data goes in 8 byte chuncks */
   305          second_data_len= first_data_len-new_first_data_len;
   306          second_pack= bf_cut(first_pack, first_hdr_len+
   307                  new_first_data_len, second_data_len);
   308          tmp_pack= first_pack;
   309          first_data_len= new_first_data_len;
   310          first_pack= bf_cut (tmp_pack, 0, first_hdr_len+first_data_len);
   311          bf_afree(tmp_pack);
   312          tmp_pack= bf_memreq(first_hdr_len);
   313          tmp_pack->acc_next= second_pack;
   314          second_pack= tmp_pack;
   315          second_hdr= (ip_hdr_t *)ptr2acc_data(second_pack);
   316          *second_hdr= *first_hdr;
   317          second_hdr->ih_flags_fragoff= htons(
   318                  ntohs(first_hdr->ih_flags_fragoff)+(first_data_len/8));
       
   319          first_opt_size= first_hdr_len-IP_MIN_HDR_SIZE;
   320          second_opt_size= 0;
   321          if (first_opt_size)
   322          {
   323                  first_pack= bf_packIffLess (first_pack,
   324                          first_hdr_len);
   325                  first_hdr= (ip_hdr_t *)ptr2acc_data(first_pack);
   326                  assert (first_pack->acc_length>=first_hdr_len);
   327                  first_optptr= (u8_t *)ptr2acc_data(first_pack)+
   328                          IP_MIN_HDR_SIZE;
   329                  second_optptr= (u8_t *)ptr2acc_data(
   330                          second_pack)+IP_MIN_HDR_SIZE;
   331                  i= 0;
   332                  while (i<first_opt_size)
   333                  {
   334                          switch (*first_optptr & IP_OPT_NUMBER)
   335                          {
   336                          case 0:
   337                          case 1:
   338                                  optlen= 1;
   339                                  break;
   340                          default:
   341                                  optlen= first_optptr[1];
   342                                  break;
   343                          }
   344                          assert (i + optlen <= first_opt_size);
   345                          i += optlen;
   346                          if (*first_optptr & IP_OPT_COPIED)
   347                          {
   348                                  second_opt_size += optlen;
   349                                  while (optlen--)
   350                                          *second_optptr++=
   351                                                  *first_optptr++;
   352                          }
   353                          else
   354                                  first_optptr += optlen;
   355                  }
   356                  while (second_opt_size & 3)
   357                  {
   358                          *second_optptr++= 0;
   359                          second_opt_size++;
   360                  }
   361          }
   362          second_hdr_len= IP_MIN_HDR_SIZE + second_opt_size;
       
   363          second_hdr->ih_vers_ihl= second_hdr->ih_vers_ihl & 0xf0
   364                  + (second_hdr_len/4);
   365          second_hdr->ih_length= htons(second_data_len+
   366                  second_hdr_len);
   367          second_pack->acc_length= second_hdr_len;
       
   368          assert(first_pack->acc_linkC == 1);
   369          assert(first_pack->acc_buffer->buf_linkC == 1);
       
   370          first_hdr->ih_flags_fragoff |= HTONS(IH_MORE_FRAGS);
   371          first_hdr->ih_length= htons(first_data_len+
   372                  first_hdr_len);
   373          assert (!(second_hdr->ih_flags_fragoff & HTONS(IH_DONT_FRAG)));
       
   374          ip_hdr_chksum(first_hdr, first_hdr_len);
   375          if (second_data_len+second_hdr_len <= first_size)
   376          {
   377                  /* second_pack will not be split any further, so we have to
   378                   * calculate the header checksum.
   379                   */
   380                  ip_hdr_chksum(second_hdr, second_hdr_len);
   381          }
       
   382          *ref_last= second_pack;
   383          return first_pack;
   384  }
       
   385  PRIVATE void error_reply (ip_fd, error)
   386  ip_fd_t *ip_fd;
   387  int error;
   388  {
   389          if ((*ip_fd->if_get_userdata)(ip_fd->if_srfd, (size_t)error,
   390                  (size_t)0, FALSE))
   391          {
   392                  ip_panic(( "can't error_reply" ));
   393          }
   394  }
       

chk_dstaddr() returns 1 if dst is a valid destination ip address. Otherwise,
chk_dstaddr() returns 0.

   395  PRIVATE int chk_dstaddr (dst)
   396  ipaddr_t dst;
   397  {
   398          ipaddr_t hostrep_dst, netmask;
       
   399          hostrep_dst= ntohl(dst);
   400          if (hostrep_dst == (ipaddr_t)-1)
   401                  return NW_OK;
   402          if ((hostrep_dst & 0xe0000000l) == 0xe0000000l)
   403                  return EBADDEST;
   404          netmask= ip_get_netmask(dst);
   405          if (!(dst & ~netmask))
   406                  return EBADDEST;
   407          return NW_OK;
   408  }
       
   409  /*
   410   * $PchId: ip_write.c,v 1.7 1996/08/02 07:08:49 philip Exp $
   411   */