drivers/staging/unisys/visornic/visornic_main.c

   1 /* Copyright (c) 2012 - 2015 UNISYS CORPORATION
   2  * All rights reserved.
   3  *
   4  * This program is free software; you can redistribute it and/or modify it
   5  * under the terms and conditions of the GNU General Public License,
   6  * version 2, as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful, but
   9  * WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  11  * NON INFRINGEMENT.  See the GNU General Public License for more
  12  * details.
  13  */
  14
  15 /* This driver lives in a spar partition, and registers to ethernet io
  16  * channels from the visorbus driver. It creates netdev devices and
  17  * forwards transmit to the IO channel and accepts rcvs from the IO
  18  * Partition via the IO channel.
  19  */
  20
  21 #include <linux/debugfs.h>
  22 #include <linux/netdevice.h>
  23 #include <linux/etherdevice.h>
  24 #include <linux/skbuff.h>
  25 #include <linux/kthread.h>
  26
  27 #include "visorbus.h"
  28 #include "iochannel.h"
  29
  30 #define VISORNIC_INFINITE_RESPONSE_WAIT 0
  31 #define VISORNICSOPENMAX 32
  32 #define MAXDEVICES     16384
  33
  34 /* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
  35  *         = 163840 bytes
  36  */
  37 #define MAX_BUF 163840
  38
  39 static spinlock_t dev_num_pool_lock;
  40 static void *dev_num_pool;      /**< pool to grab device numbers from */
  41
  42 static int visornic_probe(struct visor_device *dev);
  43 static void visornic_remove(struct visor_device *dev);
  44 static int visornic_pause(struct visor_device *dev,
  45                           visorbus_state_complete_func complete_func);
  46 static int visornic_resume(struct visor_device *dev,
  47                            visorbus_state_complete_func complete_func);
  48
  49 /* DEBUGFS declarations */
  50 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
  51                                  size_t len, loff_t *offset);
  52 static ssize_t enable_ints_write(struct file *file, const char __user *buf,
  53                                  size_t len, loff_t *ppos);
  54 static struct dentry *visornic_debugfs_dir;
  55 static const struct file_operations debugfs_info_fops = {
  56         .read = info_debugfs_read,
  57 };
  58
  59 static const struct file_operations debugfs_enable_ints_fops = {
  60         .write = enable_ints_write,
  61 };
  62
  63 static struct workqueue_struct *visornic_serverdown_workqueue;
  64 static struct workqueue_struct *visornic_timeout_reset_workqueue;
  65
  66 /* GUIDS for director channel type supported by this driver.  */
  67 static struct visor_channeltype_descriptor visornic_channel_types[] = {
  68         /* Note that the only channel type we expect to be reported by the
  69          * bus driver is the SPAR_VNIC channel.
  70          */
  71         { SPAR_VNIC_CHANNEL_PROTOCOL_UUID, "ultravnic" },
  72         { NULL_UUID_LE, NULL }
  73 };
  74
  75 /* This is used to tell the visor bus driver which types of visor devices
  76  * we support, and what functions to call when a visor device that we support
  77  * is attached or removed.
  78  */
  79 static struct visor_driver visornic_driver = {
  80         .name = "visornic",
  81         .version = "1.0.0.0",
  82         .vertag = NULL,
  83         .owner = THIS_MODULE,
  84         .channel_types = visornic_channel_types,
  85         .probe = visornic_probe,
  86         .remove = visornic_remove,
  87         .pause = visornic_pause,
  88         .resume = visornic_resume,
  89         .channel_interrupt = NULL,
  90 };
  91
  92 struct visor_thread_info {
  93         struct task_struct *task;
  94         struct completion has_stopped;
  95         int id;
  96 };
  97
  98 struct chanstat {
  99         unsigned long got_rcv;
 100         unsigned long got_enbdisack;
 101         unsigned long got_xmit_done;
 102         unsigned long xmit_fail;
 103         unsigned long sent_enbdis;
 104         unsigned long sent_promisc;
 105         unsigned long sent_post;
 106         unsigned long sent_xmit;
 107         unsigned long reject_count;
 108         unsigned long extra_rcvbufs_sent;
 109 };
 110
 111 struct visornic_devdata {
 112         int devnum;
 113         int thread_wait_ms;
 114         unsigned short enabled;         /* 0 disabled 1 enabled to receive */
 115         unsigned short enab_dis_acked;  /* NET_RCV_ENABLE/DISABLE acked by
 116                                          * IOPART
 117                                          */
 118         struct visor_device *dev;
 119         char name[99];
 120         struct list_head list_all;   /* < link within list_all_devices list */
 121         struct net_device *netdev;
 122         struct net_device_stats net_stats;
 123         atomic_t interrupt_rcvd;
 124         wait_queue_head_t rsp_queue;
 125         struct sk_buff **rcvbuf;
 126         u64 uniquenum; /* TODO figure out why not used */
 127         unsigned short old_flags;       /* flags as they were prior to
 128                                          * set_multicast_list
 129                                          */
 130         atomic_t usage;                 /* count of users */
 131         int num_rcv_bufs;               /* indicates how many rcv buffers
 132                                          * the vnic will post
 133                                          */
 134         int num_rcv_bufs_could_not_alloc;
 135         atomic_t num_rcvbuf_in_iovm;
 136         unsigned long alloc_failed_in_if_needed_cnt;
 137         unsigned long alloc_failed_in_repost_rtn_cnt;
 138         int max_outstanding_net_xmits;   /* absolute max number of outstanding
 139                                           * xmits - should never hit this
 140                                           */
 141         int upper_threshold_net_xmits;   /* high water mark for calling
 142                                           * netif_stop_queue()
 143                                           */
 144         int lower_threshold_net_xmits;   /* high water mark for calling
 145                                           * netif_wake_queue()
 146                                           */
 147         struct sk_buff_head xmitbufhead; /* xmitbufhead is the head of the
 148                                           * xmit buffer list that have been
 149                                           * sent to the IOPART end
 150                                           */
 151         struct work_struct serverdown_completion;
 152         visorbus_state_complete_func server_down_complete_func;
 153         struct work_struct timeout_reset;
 154         struct uiscmdrsp *cmdrsp_rcv;    /* cmdrsp_rcv is used for
 155                                           * posting/unposting rcv buffers
 156                                           */
 157         struct uiscmdrsp *xmit_cmdrsp;   /* used to issue NET_XMIT - there is
 158                                           * never more that one xmit in
 159                                           * progress at a time
 160                                           */
 161         bool server_down;                /* IOPART is down */
 162         bool server_change_state;        /* Processing SERVER_CHANGESTATE msg */
 163         bool going_away;                 /* device is being torn down */
 164         struct dentry *eth_debugfs_dir;
 165         struct visor_thread_info threadinfo;
 166         u64 interrupts_rcvd;
 167         u64 interrupts_notme;
 168         u64 interrupts_disabled;
 169         u64 busy_cnt;
 170         spinlock_t priv_lock;  /* spinlock to access devdata structures */
 171
 172         /* flow control counter */
 173         u64 flow_control_upper_hits;
 174         u64 flow_control_lower_hits;
 175
 176         /* debug counters */
 177         unsigned long n_rcv0;                   /* # rcvs of 0 buffers */
 178         unsigned long n_rcv1;                   /* # rcvs of 1 buffers */
 179         unsigned long n_rcv2;                   /* # rcvs of 2 buffers */
 180         unsigned long n_rcvx;                   /* # rcvs of >2 buffers */
 181         unsigned long found_repost_rcvbuf_cnt;  /* # times we called
 182                                                  *   repost_rcvbuf_cnt
 183                                                  */
 184         unsigned long repost_found_skb_cnt;     /* # times found the skb */
 185         unsigned long n_repost_deficit;         /* # times we couldn't find
 186                                                  *   all of the rcv buffers
 187                                                  */
 188         unsigned long bad_rcv_buf;              /* # times we negleted to
 189                                                  * free the rcv skb because
 190                                                  * we didn't know where it
 191                                                  * came from
 192                                                  */
 193         unsigned long n_rcv_packets_not_accepted;/* # bogs rcv packets */
 194
 195         int queuefullmsg_logged;
 196         struct chanstat chstat;
 197 };
 198
 199 /* array of open devices maintained by open() and close() */
 200 static struct net_device *num_visornic_open[VISORNICSOPENMAX];
 201
 202 /* List of all visornic_devdata structs,
 203  * linked via the list_all member
 204  */
 205 static LIST_HEAD(list_all_devices);
 206 static DEFINE_SPINLOCK(lock_all_devices);
 207
 208 /**
 209  *      visor_copy_fragsinfo_from_skb(
 210  *      @skb_in: skbuff that we are pulling the frags from
 211  *      @firstfraglen: length of first fragment in skb
 212  *      @frags_max: max len of frags array
 213  *      @frags: frags array filled in on output
 214  *
 215  *      Copy the fragment list in the SKB to a phys_info
 216  *      array that the IOPART understands.
 217  *      Return value indicates number of entries filled in frags
 218  *      Negative values indicate an error.
 219  */
 220 static unsigned int
 221 visor_copy_fragsinfo_from_skb(struct sk_buff *skb, unsigned int firstfraglen,
 222                               unsigned int frags_max,
 223                               struct phys_info frags[])
 224 {
 225         unsigned int count = 0, ii, size, offset = 0, numfrags;
 226
 227         numfrags = skb_shinfo(skb)->nr_frags;
 228
 229         while (firstfraglen) {
 230                 if (count == frags_max)
 231                         return -EINVAL;
 232
 233                 frags[count].pi_pfn =
 234                         page_to_pfn(virt_to_page(skb->data + offset));
 235                 frags[count].pi_off =
 236                         (unsigned long)(skb->data + offset) & PI_PAGE_MASK;
 237                 size = min_t(unsigned int, firstfraglen,
 238                              PI_PAGE_SIZE - frags[count].pi_off);
 239
 240                 /* can take smallest of firstfraglen (what's left) OR
 241                  * bytes left in the page
 242                  */
 243                 frags[count].pi_len = size;
 244                 firstfraglen -= size;
 245                 offset += size;
 246                 count++;
 247         }
 248         if (numfrags) {
 249                 if ((count + numfrags) > frags_max)
 250                         return -EINVAL;
 251
 252                 for (ii = 0; ii < numfrags; ii++) {
 253                         count = add_physinfo_entries(page_to_pfn(
 254                                 skb_frag_page(&skb_shinfo(skb)->frags[ii])),
 255                                               skb_shinfo(skb)->frags[ii].
 256                                               page_offset,
 257                                               skb_shinfo(skb)->frags[ii].
 258                                               size, count, frags_max, frags);
 259                         if (!count)
 260                                 return -EIO;
 261                 }
 262         }
 263         if (skb_shinfo(skb)->frag_list) {
 264                 struct sk_buff *skbinlist;
 265                 int c;
 266
 267                 for (skbinlist = skb_shinfo(skb)->frag_list; skbinlist;
 268                      skbinlist = skbinlist->next) {
 269                         c = visor_copy_fragsinfo_from_skb(skbinlist,
 270                                                           skbinlist->len -
 271                                                           skbinlist->data_len,
 272                                                           frags_max - count,
 273                                                           &frags[count]);
 274                         if (c < 0)
 275                                 return c;
 276                         count += c;
 277                 }
 278         }
 279         return count;
 280 }
 281
 282 /**
 283  *      visort_thread_start - starts thread for the device
 284  *      @thrinfo: The thread to start
 285  *      @threadfn: Function the thread starts
 286  *      @thrcontext: Context to pass to the thread, i.e. devdata
 287  *      @name:  string describing name of thread
 288  *
 289  *      Starts a thread for the device, currently only thread is
 290  *      process_incoming_rsps
 291  *      Returns 0 on success;
 292  */
 293 static int visor_thread_start(struct visor_thread_info *thrinfo,
 294                               int (*threadfn)(void *),
 295                               void *thrcontext, char *name)
 296 {
 297         /* used to stop the thread */
 298         init_completion(&thrinfo->has_stopped);
 299         thrinfo->task = kthread_run(threadfn, thrcontext, name);
 300         if (IS_ERR(thrinfo->task)) {
 301                 pr_debug("%s failed (%ld)\n",
 302                          __func__, PTR_ERR(thrinfo->task));
 303                 thrinfo->id = 0;
 304                 return -EINVAL;
 305         }
 306         thrinfo->id = thrinfo->task->pid;
 307         return 0;
 308 }
 309
 310 /**
 311  *      visor_thread_stop - stop a thread for the device
 312  *      @thrinfo: The thread to stop
 313  *
 314  *      Stop the thread and wait for completion for a minute
 315  *      Returns void.
 316  */
 317 static void visor_thread_stop(struct visor_thread_info *thrinfo)
 318 {
 319         if (!thrinfo->id)
 320                 return; /* thread not running */
 321
 322         kthread_stop(thrinfo->task);
 323         /* give up if the thread has NOT died in 1 minute */
 324         if (wait_for_completion_timeout(&thrinfo->has_stopped, 60 * HZ))
 325                 thrinfo->id = 0;
 326 }
 327
 328 /* DebugFS code */
 329 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
 330                                  size_t len, loff_t *offset)
 331 {
 332         int i;
 333         ssize_t bytes_read = 0;
 334         int str_pos = 0;
 335         struct visornic_devdata *devdata;
 336         char *vbuf;
 337
 338         if (len > MAX_BUF)
 339                 len = MAX_BUF;
 340         vbuf = kzalloc(len, GFP_KERNEL);
 341         if (!vbuf)
 342                 return -ENOMEM;
 343
 344         /* for each vnic channel
 345          * dump out channel specific data
 346          */
 347         for (i = 0; i < VISORNICSOPENMAX; i++) {
 348                 if (!num_visornic_open[i])
 349                         continue;
 350
 351                 devdata = netdev_priv(num_visornic_open[i]);
 352                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 353                                      "Vnic i = %d\n", i);
 354                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 355                                      "netdev = %s (0x%p), MAC Addr %pM\n",
 356                                      num_visornic_open[i]->name,
 357                                      num_visornic_open[i],
 358                                      num_visornic_open[i]->dev_addr);
 359                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 360                                      "VisorNic Dev Info = 0x%p\n", devdata);
 361                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 362                                      " num_rcv_bufs = %d\n",
 363                                      devdata->num_rcv_bufs);
 364                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 365                                      " max_oustanding_next_xmits = %d\n",
 366                                     devdata->max_outstanding_net_xmits);
 367                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 368                                      " upper_threshold_net_xmits = %d\n",
 369                                      devdata->upper_threshold_net_xmits);
 370                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 371                                      " lower_threshold_net_xmits = %d\n",
 372                                      devdata->lower_threshold_net_xmits);
 373                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 374                                      " queuefullmsg_logged = %d\n",
 375                                      devdata->queuefullmsg_logged);
 376                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 377                                      " chstat.got_rcv = %lu\n",
 378                                      devdata->chstat.got_rcv);
 379                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 380                                      " chstat.got_enbdisack = %lu\n",
 381                                      devdata->chstat.got_enbdisack);
 382                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 383                                      " chstat.got_xmit_done = %lu\n",
 384                                      devdata->chstat.got_xmit_done);
 385                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 386                                      " chstat.xmit_fail = %lu\n",
 387                                      devdata->chstat.xmit_fail);
 388                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 389                                      " chstat.sent_enbdis = %lu\n",
 390                                      devdata->chstat.sent_enbdis);
 391                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 392                                      " chstat.sent_promisc = %lu\n",
 393                                      devdata->chstat.sent_promisc);
 394                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 395                                      " chstat.sent_post = %lu\n",
 396                                      devdata->chstat.sent_post);
 397                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 398                                      " chstat.sent_xmit = %lu\n",
 399                                      devdata->chstat.sent_xmit);
 400                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 401                                      " chstat.reject_count = %lu\n",
 402                                      devdata->chstat.reject_count);
 403                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 404                                      " chstat.extra_rcvbufs_sent = %lu\n",
 405                                      devdata->chstat.extra_rcvbufs_sent);
 406                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 407                                      " n_rcv0 = %lu\n", devdata->n_rcv0);
 408                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 409                                      " n_rcv1 = %lu\n", devdata->n_rcv1);
 410                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 411                                      " n_rcv2 = %lu\n", devdata->n_rcv2);
 412                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 413                                      " n_rcvx = %lu\n", devdata->n_rcvx);
 414                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 415                                      " num_rcvbuf_in_iovm = %d\n",
 416                                      atomic_read(&devdata->num_rcvbuf_in_iovm));
 417                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 418                                      " alloc_failed_in_if_needed_cnt = %lu\n",
 419                                      devdata->alloc_failed_in_if_needed_cnt);
 420                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 421                                      " alloc_failed_in_repost_rtn_cnt = %lu\n",
 422                                      devdata->alloc_failed_in_repost_rtn_cnt);
 423                 /* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 424                  *                   " inner_loop_limit_reached_cnt = %lu\n",
 425                  *                   devdata->inner_loop_limit_reached_cnt);
 426                  */
 427                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 428                                      " found_repost_rcvbuf_cnt = %lu\n",
 429                                      devdata->found_repost_rcvbuf_cnt);
 430                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 431                                      " repost_found_skb_cnt = %lu\n",
 432                                      devdata->repost_found_skb_cnt);
 433                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 434                                      " n_repost_deficit = %lu\n",
 435                                      devdata->n_repost_deficit);
 436                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 437                                      " bad_rcv_buf = %lu\n",
 438                                      devdata->bad_rcv_buf);
 439                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 440                                      " n_rcv_packets_not_accepted = %lu\n",
 441                                      devdata->n_rcv_packets_not_accepted);
 442                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 443                                      " interrupts_rcvd = %llu\n",
 444                                      devdata->interrupts_rcvd);
 445                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 446                                      " interrupts_notme = %llu\n",
 447                                      devdata->interrupts_notme);
 448                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 449                                      " interrupts_disabled = %llu\n",
 450                                      devdata->interrupts_disabled);
 451                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 452                                      " busy_cnt = %llu\n",
 453                                      devdata->busy_cnt);
 454                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 455                                      " flow_control_upper_hits = %llu\n",
 456                                      devdata->flow_control_upper_hits);
 457                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 458                                      " flow_control_lower_hits = %llu\n",
 459                                      devdata->flow_control_lower_hits);
 460                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 461                                      " thread_wait_ms = %d\n",
 462                                      devdata->thread_wait_ms);
 463                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
 464                                      " netif_queue = %s\n",
 465                                      netif_queue_stopped(devdata->netdev) ?
 466                                      "stopped" : "running");
 467         }
 468         bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
 469         kfree(vbuf);
 470         return bytes_read;
 471 }
 472
 473 static ssize_t enable_ints_write(struct file *file,
 474                                  const char __user *buffer,
 475                                  size_t count, loff_t *ppos)
 476 {
 477         char buf[4];
 478         int i, new_value;
 479         struct visornic_devdata *devdata;
 480
 481         if (count >= ARRAY_SIZE(buf))
 482                 return -EINVAL;
 483
 484         buf[count] = '\0';
 485         if (copy_from_user(buf, buffer, count))
 486                 return -EFAULT;
 487
 488         i = kstrtoint(buf, 10, &new_value);
 489         if (i != 0)
 490                 return -EFAULT;
 491
 492         /* set all counts to new_value usually 0 */
 493         for (i = 0; i < VISORNICSOPENMAX; i++) {
 494                 if (num_visornic_open[i]) {
 495                         devdata = netdev_priv(num_visornic_open[i]);
 496                         /* TODO update features bit in channel */
 497                 }
 498         }
 499
 500         return count;
 501 }
 502
 503 /**
 504  *      visornic_serverdown_complete - IOPART went down, need to pause
 505  *                                     device
 506  *      @work: Work queue it was scheduled on
 507  *
 508  *      The IO partition has gone down and we need to do some cleanup
 509  *      for when it comes back. Treat the IO partition as the link
 510  *      being down.
 511  *      Returns void.
 512  */
 513 static void
 514 visornic_serverdown_complete(struct work_struct *work)
 515 {
 516         struct visornic_devdata *devdata;
 517         struct net_device *netdev;
 518         unsigned long flags;
 519         int i = 0, count = 0;
 520
 521         devdata = container_of(work, struct visornic_devdata,
 522                                serverdown_completion);
 523         netdev = devdata->netdev;
 524
 525         /* Stop using datachan */
 526         visor_thread_stop(&devdata->threadinfo);
 527
 528         /* Inform Linux that the link is down */
 529         netif_carrier_off(netdev);
 530         netif_stop_queue(netdev);
 531
 532         /* Free the skb for XMITs that haven't been serviced by the server
 533          * We shouldn't have to inform Linux about these IOs because they
 534          * are "lost in the ethernet"
 535          */
 536         skb_queue_purge(&devdata->xmitbufhead);
 537
 538         spin_lock_irqsave(&devdata->priv_lock, flags);
 539         /* free rcv buffers */
 540         for (i = 0; i < devdata->num_rcv_bufs; i++) {
 541                 if (devdata->rcvbuf[i]) {
 542                         kfree_skb(devdata->rcvbuf[i]);
 543                         devdata->rcvbuf[i] = NULL;
 544                         count++;
 545                 }
 546         }
 547         atomic_set(&devdata->num_rcvbuf_in_iovm, 0);
 548         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 549
 550         if (devdata->server_down_complete_func)
 551                 (*devdata->server_down_complete_func)(devdata->dev, 0);
 552
 553         devdata->server_down = true;
 554         devdata->server_change_state = false;
 555         devdata->server_down_complete_func = NULL;
 556 }
 557
 558 /**
 559  *      visornic_serverdown - Command has notified us that IOPARt is down
 560  *      @devdata: device that is being managed by IOPART
 561  *
 562  *      Schedule the work needed to handle the server down request. Make
 563  *      sure we haven't already handled the server change state event.
 564  *      Returns 0 if we scheduled the work, -EINVAL on error.
 565  */
 566 static int
 567 visornic_serverdown(struct visornic_devdata *devdata,
 568                     visorbus_state_complete_func complete_func)
 569 {
 570         unsigned long flags;
 571
 572         spin_lock_irqsave(&devdata->priv_lock, flags);
 573         if (!devdata->server_down && !devdata->server_change_state) {
 574                 if (devdata->going_away) {
 575                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 576                         dev_dbg(&devdata->dev->device,
 577                                 "%s aborting because device removal pending\n",
 578                                 __func__);
 579                         return -ENODEV;
 580                 }
 581                 devdata->server_change_state = true;
 582                 devdata->server_down_complete_func = complete_func;
 583                 queue_work(visornic_serverdown_workqueue,
 584                            &devdata->serverdown_completion);
 585         } else if (devdata->server_change_state) {
 586                 dev_dbg(&devdata->dev->device, "%s changing state\n",
 587                         __func__);
 588                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 589                 return -EINVAL;
 590         }
 591         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 592         return 0;
 593 }
 594
 595 /**
 596  *      alloc_rcv_buf   - alloc rcv buffer to be given to the IO Partition.
 597  *      @netdev: network adapter the rcv bufs are attached too.
 598  *
 599  *      Create an sk_buff (rcv_buf) that will be passed to the IO Partition
 600  *      so that it can write rcv data into our memory space.
 601  *      Return pointer to sk_buff
 602  */
 603 static struct sk_buff *
 604 alloc_rcv_buf(struct net_device *netdev)
 605 {
 606         struct sk_buff *skb;
 607
 608         /* NOTE: the first fragment in each rcv buffer is pointed to by
 609          * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
 610          * in length, so the firstfrag is large enough to hold 1514.
 611          */
 612         skb = alloc_skb(RCVPOST_BUF_SIZE, GFP_ATOMIC);
 613         if (!skb)
 614                 return NULL;
 615         skb->dev = netdev;
 616         skb->len = RCVPOST_BUF_SIZE;
 617         /* current value of mtu doesn't come into play here; large
 618          * packets will just end up using multiple rcv buffers all of
 619          * same size
 620          */
 621         skb->data_len = 0;      /* dev_alloc_skb already zeroes it out
 622                                  * for clarification.
 623                                  */
 624         return skb;
 625 }
 626
 627 /**
 628  *      post_skb        - post a skb to the IO Partition.
 629  *      @cmdrsp: cmdrsp packet to be send to the IO Partition
 630  *      @devdata: visornic_devdata to post the skb too
 631  *      @skb: skb to give to the IO partition
 632  *
 633  *      Send the skb to the IO Partition.
 634  *      Returns void
 635  */
 636 static inline void
 637 post_skb(struct uiscmdrsp *cmdrsp,
 638          struct visornic_devdata *devdata, struct sk_buff *skb)
 639 {
 640         cmdrsp->net.buf = skb;
 641         cmdrsp->net.rcvpost.frag.pi_pfn = page_to_pfn(virt_to_page(skb->data));
 642         cmdrsp->net.rcvpost.frag.pi_off =
 643                 (unsigned long)skb->data & PI_PAGE_MASK;
 644         cmdrsp->net.rcvpost.frag.pi_len = skb->len;
 645         cmdrsp->net.rcvpost.unique_num = devdata->uniquenum;
 646
 647         if ((cmdrsp->net.rcvpost.frag.pi_off + skb->len) <= PI_PAGE_SIZE) {
 648                 cmdrsp->net.type = NET_RCV_POST;
 649                 cmdrsp->cmdtype = CMD_NET_TYPE;
 650                 visorchannel_signalinsert(devdata->dev->visorchannel,
 651                                           IOCHAN_TO_IOPART,
 652                                           cmdrsp);
 653                 atomic_inc(&devdata->num_rcvbuf_in_iovm);
 654                 devdata->chstat.sent_post++;
 655         }
 656 }
 657
 658 /**
 659  *      send_enbdis     - send NET_RCV_ENBDIS to IO Partition
 660  *      @netdev: netdevice we are enable/disable, used as context
 661  *               return value
 662  *      @state: enable = 1/disable = 0
 663  *      @devdata: visornic device we are enabling/disabling
 664  *
 665  *      Send the enable/disable message to the IO Partition.
 666  *      Returns void
 667  */
 668 static void
 669 send_enbdis(struct net_device *netdev, int state,
 670             struct visornic_devdata *devdata)
 671 {
 672         devdata->cmdrsp_rcv->net.enbdis.enable = state;
 673         devdata->cmdrsp_rcv->net.enbdis.context = netdev;
 674         devdata->cmdrsp_rcv->net.type = NET_RCV_ENBDIS;
 675         devdata->cmdrsp_rcv->cmdtype = CMD_NET_TYPE;
 676         visorchannel_signalinsert(devdata->dev->visorchannel,
 677                                   IOCHAN_TO_IOPART,
 678                                   devdata->cmdrsp_rcv);
 679         devdata->chstat.sent_enbdis++;
 680 }
 681
 682 /**
 683  *      visornic_disable_with_timeout - Disable network adapter
 684  *      @netdev: netdevice to disale
 685  *      @timeout: timeout to wait for disable
 686  *
 687  *      Disable the network adapter and inform the IO Partition that we
 688  *      are disabled, reclaim memory from rcv bufs.
 689  *      Returns 0 on success, negative for failure of IO Partition
 690  *      responding.
 691  *
 692  */
 693 static int
 694 visornic_disable_with_timeout(struct net_device *netdev, const int timeout)
 695 {
 696         struct visornic_devdata *devdata = netdev_priv(netdev);
 697         int i;
 698         unsigned long flags;
 699         int wait = 0;
 700
 701         /* stop the transmit queue so nothing more can be transmitted */
 702         netif_stop_queue(netdev);
 703
 704         /* send a msg telling the other end we are stopping incoming pkts */
 705         spin_lock_irqsave(&devdata->priv_lock, flags);
 706         devdata->enabled = 0;
 707         devdata->enab_dis_acked = 0; /* must wait for ack */
 708         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 709
 710         /* send disable and wait for ack -- don't hold lock when sending
 711          * disable because if the queue is full, insert might sleep.
 712          */
 713         send_enbdis(netdev, 0, devdata);
 714
 715         /* wait for ack to arrive before we try to free rcv buffers
 716          * NOTE: the other end automatically unposts the rcv buffers when
 717          * when it gets a disable.
 718          */
 719         spin_lock_irqsave(&devdata->priv_lock, flags);
 720         while ((timeout == VISORNIC_INFINITE_RESPONSE_WAIT) ||
 721                (wait < timeout)) {
 722                 if (devdata->enab_dis_acked)
 723                         break;
 724                 if (devdata->server_down || devdata->server_change_state) {
 725                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 726                         dev_dbg(&netdev->dev, "%s server went away\n",
 727                                 __func__);
 728                         return -EIO;
 729                 }
 730                 set_current_state(TASK_INTERRUPTIBLE);
 731                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 732                 wait += schedule_timeout(msecs_to_jiffies(10));
 733                 spin_lock_irqsave(&devdata->priv_lock, flags);
 734         }
 735
 736         /* Wait for usage to go to 1 (no other users) before freeing
 737          * rcv buffers
 738          */
 739         if (atomic_read(&devdata->usage) > 1) {
 740                 while (1) {
 741                         set_current_state(TASK_INTERRUPTIBLE);
 742                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 743                         schedule_timeout(msecs_to_jiffies(10));
 744                         spin_lock_irqsave(&devdata->priv_lock, flags);
 745                         if (atomic_read(&devdata->usage))
 746                                 break;
 747                 }
 748         }
 749
 750         /* we've set enabled to 0, so we can give up the lock. */
 751         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 752
 753         /* Free rcv buffers - other end has automatically unposed them on
 754          * disable
 755          */
 756         for (i = 0; i < devdata->num_rcv_bufs; i++) {
 757                 if (devdata->rcvbuf[i]) {
 758                         kfree_skb(devdata->rcvbuf[i]);
 759                         devdata->rcvbuf[i] = NULL;
 760                 }
 761         }
 762
 763         /* remove references from array */
 764         for (i = 0; i < VISORNICSOPENMAX; i++)
 765                 if (num_visornic_open[i] == netdev) {
 766                         num_visornic_open[i] = NULL;
 767                         break;
 768                 }
 769
 770         return 0;
 771 }
 772
 773 /**
 774  *      init_rcv_bufs  -- initialize receive bufs and send them to the IO Part
 775  *      @netdev: struct netdevice
 776  *      @devdata: visornic_devdata
 777  *
 778  *      Allocate rcv buffers and post them to the IO Partition.
 779  *      Return 0 for success, and negative for failure.
 780  */
 781 static int
 782 init_rcv_bufs(struct net_device *netdev, struct visornic_devdata *devdata)
 783 {
 784         int i, count;
 785
 786         /* allocate fixed number of receive buffers to post to uisnic
 787          * post receive buffers after we've allocated a required amount
 788          */
 789         for (i = 0; i < devdata->num_rcv_bufs; i++) {
 790                 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
 791                 if (!devdata->rcvbuf[i])
 792                         break; /* if we failed to allocate one let us stop */
 793         }
 794         if (i == 0) /* couldn't even allocate one -- bail out */
 795                 return -ENOMEM;
 796         count = i;
 797
 798         /* Ensure we can alloc 2/3rd of the requeested number of buffers.
 799          * 2/3 is an arbitrary choice; used also in ndis init.c
 800          */
 801         if (count < ((2 * devdata->num_rcv_bufs) / 3)) {
 802                 /* free receive buffers we did alloc and then bail out */
 803                 for (i = 0; i < count; i++) {
 804                         kfree_skb(devdata->rcvbuf[i]);
 805                         devdata->rcvbuf[i] = NULL;
 806                 }
 807                 return -ENOMEM;
 808         }
 809
 810         /* post receive buffers to receive incoming input - without holding
 811          * lock - we've not enabled nor started the queue so there shouldn't
 812          * be any rcv or xmit activity
 813          */
 814         for (i = 0; i < count; i++)
 815                 post_skb(devdata->cmdrsp_rcv, devdata, devdata->rcvbuf[i]);
 816
 817         return 0;
 818 }
 819
 820 /**
 821  *      visornic_enable_with_timeout    - send enable to IO Part
 822  *      @netdev: struct net_device
 823  *      @timeout: Time to wait for the ACK from the enable
 824  *
 825  *      Sends enable to IOVM, inits, and posts receive buffers to IOVM
 826  *      timeout is defined in msecs (timeout of 0 specifies infinite wait)
 827  *      Return 0 for success, negavite for failure.
 828  */
 829 static int
 830 visornic_enable_with_timeout(struct net_device *netdev, const int timeout)
 831 {
 832         int i;
 833         struct visornic_devdata *devdata = netdev_priv(netdev);
 834         unsigned long flags;
 835         int wait = 0;
 836
 837         /* NOTE: the other end automatically unposts the rcv buffers when it
 838          * gets a disable.
 839          */
 840         i = init_rcv_bufs(netdev, devdata);
 841         if (i < 0) {
 842                 dev_err(&netdev->dev,
 843                         "%s failed to init rcv bufs (%d)\n", __func__, i);
 844                 return i;
 845         }
 846
 847         spin_lock_irqsave(&devdata->priv_lock, flags);
 848         devdata->enabled = 1;
 849
 850         /* now we're ready, let's send an ENB to uisnic but until we get
 851          * an ACK back from uisnic, we'll drop the packets
 852          */
 853         devdata->n_rcv_packets_not_accepted = 0;
 854         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 855
 856         /* send enable and wait for ack -- don't hold lock when sending enable
 857          * because if the queue is full, insert might sleep.
 858          */
 859         send_enbdis(netdev, 1, devdata);
 860
 861         spin_lock_irqsave(&devdata->priv_lock, flags);
 862         while ((timeout == VISORNIC_INFINITE_RESPONSE_WAIT) ||
 863                (wait < timeout)) {
 864                 if (devdata->enab_dis_acked)
 865                         break;
 866                 if (devdata->server_down || devdata->server_change_state) {
 867                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 868                         dev_dbg(&netdev->dev, "%s server went away\n",
 869                                 __func__);
 870                         return -EIO;
 871                 }
 872                 set_current_state(TASK_INTERRUPTIBLE);
 873                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 874                 wait += schedule_timeout(msecs_to_jiffies(10));
 875                 spin_lock_irqsave(&devdata->priv_lock, flags);
 876         }
 877
 878         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 879
 880         if (!devdata->enab_dis_acked) {
 881                 dev_err(&netdev->dev, "%s missing ACK\n", __func__);
 882                 return -EIO;
 883         }
 884
 885         /* find an open slot in the array to save off VisorNic references
 886          * for debug
 887          */
 888         for (i = 0; i < VISORNICSOPENMAX; i++) {
 889                 if (!num_visornic_open[i]) {
 890                         num_visornic_open[i] = netdev;
 891                         break;
 892                 }
 893         }
 894
 895         return 0;
 896 }
 897
 898 /**
 899  *      visornic_timeout_reset  - handle xmit timeout resets
 900  *      @work   work item that scheduled the work
 901  *
 902  *      Transmit Timeouts are typically handled by resetting the
 903  *      device for our virtual NIC we will send a Disable and Enable
 904  *      to the IOVM. If it doesn't respond we will trigger a serverdown.
 905  */
 906 static void
 907 visornic_timeout_reset(struct work_struct *work)
 908 {
 909         struct visornic_devdata *devdata;
 910         struct net_device *netdev;
 911         int response = 0;
 912
 913         devdata = container_of(work, struct visornic_devdata, timeout_reset);
 914         netdev = devdata->netdev;
 915
 916         netif_stop_queue(netdev);
 917         response = visornic_disable_with_timeout(netdev, 100);
 918         if (response)
 919                 goto call_serverdown;
 920
 921         response = visornic_enable_with_timeout(netdev, 100);
 922         if (response)
 923                 goto call_serverdown;
 924         netif_wake_queue(netdev);
 925
 926         return;
 927
 928 call_serverdown:
 929         visornic_serverdown(devdata, NULL);
 930 }
 931
 932 /**
 933  *      visornic_open - Enable the visornic device and mark the queue started
 934  *      @netdev: netdevice to start
 935  *
 936  *      Enable the device and start the transmit queue.
 937  *      Return 0 for success
 938  */
 939 static int
 940 visornic_open(struct net_device *netdev)
 941 {
 942         visornic_enable_with_timeout(netdev, VISORNIC_INFINITE_RESPONSE_WAIT);
 943
 944         /* start the interface's transmit queue, allowing it to accept
 945          * packets for transmission
 946          */
 947         netif_start_queue(netdev);
 948
 949         return 0;
 950 }
 951
 952 /**
 953  *      visornic_close - Disables the visornic device and stops the queues
 954  *      @netdev: netdevice to start
 955  *
 956  *      Disable the device and stop the transmit queue.
 957  *      Return 0 for success
 958  */
 959 static int
 960 visornic_close(struct net_device *netdev)
 961 {
 962         netif_stop_queue(netdev);
 963         visornic_disable_with_timeout(netdev, VISORNIC_INFINITE_RESPONSE_WAIT);
 964
 965         return 0;
 966 }
 967
 968 /**
 969  *      visornic_xmit - send a packet to the IO Partition
 970  *      @skb: Packet to be sent
 971  *      @netdev: net device the packet is being sent from
 972  *
 973  *      Convert the skb to a cmdrsp so the IO Partition can undersand it.
 974  *      Send the XMIT command to the IO Partition for processing. This
 975  *      function is protected from concurrent calls by a spinlock xmit_lock
 976  *      in the net_device struct, but as soon as the function returns it
 977  *      can be called again.
 978  *      Returns NETDEV_TX_OK for success, NETDEV_TX_BUSY for error.
 979  */
 980 static int
 981 visornic_xmit(struct sk_buff *skb, struct net_device *netdev)
 982 {
 983         struct visornic_devdata *devdata;
 984         int len, firstfraglen, padlen;
 985         struct uiscmdrsp *cmdrsp = NULL;
 986         unsigned long flags;
 987
 988         devdata = netdev_priv(netdev);
 989         spin_lock_irqsave(&devdata->priv_lock, flags);
 990
 991         if (netif_queue_stopped(netdev) || devdata->server_down ||
 992             devdata->server_change_state) {
 993                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 994                 devdata->busy_cnt++;
 995                 dev_dbg(&netdev->dev,
 996                         "%s busy - queue stopped\n", __func__);
 997                 return NETDEV_TX_BUSY;
 998         }
 999
1000         /* sk_buff struct is used to host network data throughout all the
1001          * linux network subsystems
1002          */
1003         len = skb->len;
1004
1005         /* skb->len is the FULL length of data (including fragmentary portion)
1006          * skb->data_len is the length of the fragment portion in frags
1007          * skb->len - skb->data_len is size of the 1st fragment in skb->data
1008          * calculate the length of the first fragment that skb->data is
1009          * pointing to
1010          */
1011         firstfraglen = skb->len - skb->data_len;
1012         if (firstfraglen < ETH_HEADER_SIZE) {
1013                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1014                 devdata->busy_cnt++;
1015                 dev_err(&netdev->dev,
1016                         "%s busy - first frag too small (%d)\n",
1017                         __func__, firstfraglen);
1018                 return NETDEV_TX_BUSY;
1019         }
1020
1021         if ((len < ETH_MIN_PACKET_SIZE) &&
1022             ((skb_end_pointer(skb) - skb->data) >= ETH_MIN_PACKET_SIZE)) {
1023                 /* pad the packet out to minimum size */
1024                 padlen = ETH_MIN_PACKET_SIZE - len;
1025                 memset(&skb->data[len], 0, padlen);
1026                 skb->tail += padlen;
1027                 skb->len += padlen;
1028                 len += padlen;
1029                 firstfraglen += padlen;
1030         }
1031
1032         cmdrsp = devdata->xmit_cmdrsp;
1033         /* clear cmdrsp */
1034         memset(cmdrsp, 0, SIZEOF_CMDRSP);
1035         cmdrsp->net.type = NET_XMIT;
1036         cmdrsp->cmdtype = CMD_NET_TYPE;
1037
1038         /* save the pointer to skb -- we'll need it for completion */
1039         cmdrsp->net.buf = skb;
1040
1041         if (((devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done) &&
1042              (devdata->chstat.sent_xmit - devdata->chstat.got_xmit_done >=
1043              devdata->max_outstanding_net_xmits)) ||
1044              ((devdata->chstat.sent_xmit < devdata->chstat.got_xmit_done) &&
1045              (ULONG_MAX - devdata->chstat.got_xmit_done +
1046               devdata->chstat.sent_xmit >=
1047               devdata->max_outstanding_net_xmits))) {
1048                 /* too many NET_XMITs queued over to IOVM - need to wait
1049                  */
1050                 devdata->chstat.reject_count++;
1051                 if (!devdata->queuefullmsg_logged &&
1052                     ((devdata->chstat.reject_count & 0x3ff) == 1))
1053                         devdata->queuefullmsg_logged = 1;
1054                 netif_stop_queue(netdev);
1055                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1056                 devdata->busy_cnt++;
1057                 dev_dbg(&netdev->dev,
1058                         "%s busy - waiting for iovm to catch up\n",
1059                         __func__);
1060                 return NETDEV_TX_BUSY;
1061         }
1062         if (devdata->queuefullmsg_logged)
1063                 devdata->queuefullmsg_logged = 0;
1064
1065         if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
1066                 cmdrsp->net.xmt.lincsum.valid = 1;
1067                 cmdrsp->net.xmt.lincsum.protocol = skb->protocol;
1068                 if (skb_transport_header(skb) > skb->data) {
1069                         cmdrsp->net.xmt.lincsum.hrawoff =
1070                                 skb_transport_header(skb) - skb->data;
1071                         cmdrsp->net.xmt.lincsum.hrawoff = 1;
1072                 }
1073                 if (skb_network_header(skb) > skb->data) {
1074                         cmdrsp->net.xmt.lincsum.nhrawoff =
1075                                 skb_network_header(skb) - skb->data;
1076                         cmdrsp->net.xmt.lincsum.nhrawoffv = 1;
1077                 }
1078                 cmdrsp->net.xmt.lincsum.csum = skb->csum;
1079         } else {
1080                 cmdrsp->net.xmt.lincsum.valid = 0;
1081         }
1082
1083         /* save off the length of the entire data packet */
1084         cmdrsp->net.xmt.len = len;
1085
1086         /* copy ethernet header from first frag into ocmdrsp
1087          * - everything else will be pass in frags & DMA'ed
1088          */
1089         memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HEADER_SIZE);
1090         /* copy frags info - from skb->data we need to only provide access
1091          * beyond eth header
1092          */
1093         cmdrsp->net.xmt.num_frags =
1094                 visor_copy_fragsinfo_from_skb(skb, firstfraglen,
1095                                               MAX_PHYS_INFO,
1096                                               cmdrsp->net.xmt.frags);
1097         if (cmdrsp->net.xmt.num_frags == -1) {
1098                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1099                 devdata->busy_cnt++;
1100                 dev_err(&netdev->dev,
1101                         "%s busy - copy frags failed\n", __func__);
1102                 return NETDEV_TX_BUSY;
1103         }
1104
1105         if (!visorchannel_signalinsert(devdata->dev->visorchannel,
1106                                        IOCHAN_TO_IOPART, cmdrsp)) {
1107                 netif_stop_queue(netdev);
1108                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1109                 devdata->busy_cnt++;
1110                 dev_dbg(&netdev->dev,
1111                         "%s busy - signalinsert failed\n", __func__);
1112                 return NETDEV_TX_BUSY;
1113         }
1114
1115         /* Track the skbs that have been sent to the IOVM for XMIT */
1116         skb_queue_head(&devdata->xmitbufhead, skb);
1117
1118         /* set the last transmission start time
1119          * linux doc says: Do not forget to update netdev->trans_start to
1120          * jiffies after each new tx packet is given to the hardware.
1121          */
1122         netdev->trans_start = jiffies;
1123
1124         /* update xmt stats */
1125         devdata->net_stats.tx_packets++;
1126         devdata->net_stats.tx_bytes += skb->len;
1127         devdata->chstat.sent_xmit++;
1128
1129         /* check to see if we have hit the high watermark for
1130          * netif_stop_queue()
1131          */
1132         if (((devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done) &&
1133              (devdata->chstat.sent_xmit - devdata->chstat.got_xmit_done >=
1134               devdata->upper_threshold_net_xmits)) ||
1135             ((devdata->chstat.sent_xmit < devdata->chstat.got_xmit_done) &&
1136              (ULONG_MAX - devdata->chstat.got_xmit_done +
1137               devdata->chstat.sent_xmit >=
1138               devdata->upper_threshold_net_xmits))) {
1139                 /* too many NET_XMITs queued over to IOVM - need to wait */
1140                 netif_stop_queue(netdev); /* calling stop queue - call
1141                                            * netif_wake_queue() after lower
1142                                            * threshold
1143                                            */
1144                 dev_dbg(&netdev->dev,
1145                         "%s busy - invoking iovm flow control\n",
1146                         __func__);
1147                 devdata->flow_control_upper_hits++;
1148         }
1149         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1150
1151         /* skb will be freed when we get back NET_XMIT_DONE */
1152         return NETDEV_TX_OK;
1153 }
1154
1155 /**
1156  *      visornic_get_stats - returns net_stats of the visornic device
1157  *      @netdev: netdevice
1158  *
1159  *      Returns the net_device_stats for the device
1160  */
1161 static struct net_device_stats *
1162 visornic_get_stats(struct net_device *netdev)
1163 {
1164         struct visornic_devdata *devdata = netdev_priv(netdev);
1165
1166         return &devdata->net_stats;
1167 }
1168
1169 /**
1170  *      visornic_ioctl - ioctl function for netdevice.
1171  *      @netdev: netdevice
1172  *      @ifr: ignored
1173  *      @cmd: ignored
1174  *
1175  *      Currently not supported.
1176  *      Returns EOPNOTSUPP
1177  */
1178 static int
1179 visornic_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
1180 {
1181         return -EOPNOTSUPP;
1182 }
1183
1184 /**
1185  *      visornic_change_mtu - changes mtu of device.
1186  *      @netdev: netdevice
1187  *      @new_mtu: value of new mtu
1188  *
1189  *      MTU cannot be changed by system, must be changed via
1190  *      CONTROLVM message. All vnics and pnics in a switch have
1191  *      to have the same MTU for everything to work.
1192  *      Currently not supported.
1193  *      Returns EINVAL
1194  */
1195 static int
1196 visornic_change_mtu(struct net_device *netdev, int new_mtu)
1197 {
1198         return -EINVAL;
1199 }
1200
1201 /**
1202  *      visornic_set_multi - changes mtu of device.
1203  *      @netdev: netdevice
1204  *
1205  *      Only flag we support currently is IFF_PROMISC
1206  *      Returns void
1207  */
1208 static void
1209 visornic_set_multi(struct net_device *netdev)
1210 {
1211         struct uiscmdrsp *cmdrsp;
1212         struct visornic_devdata *devdata = netdev_priv(netdev);
1213
1214         /* any filtering changes */
1215         if (devdata->old_flags != netdev->flags) {
1216                 if ((netdev->flags & IFF_PROMISC) !=
1217                     (devdata->old_flags & IFF_PROMISC)) {
1218                         cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1219                         if (!cmdrsp)
1220                                 return;
1221                         cmdrsp->cmdtype = CMD_NET_TYPE;
1222                         cmdrsp->net.type = NET_RCV_PROMISC;
1223                         cmdrsp->net.enbdis.context = netdev;
1224                         cmdrsp->net.enbdis.enable =
1225                                 (netdev->flags & IFF_PROMISC);
1226                         visorchannel_signalinsert(devdata->dev->visorchannel,
1227                                                   IOCHAN_TO_IOPART,
1228                                                   cmdrsp);
1229                         kfree(cmdrsp);
1230                 }
1231                 devdata->old_flags = netdev->flags;
1232         }
1233 }
1234
1235 /**
1236  *      visornic_xmit_timeout - request to timeout the xmit
1237  *      @netdev
1238  *
1239  *      Queue the work and return. Make sure we have not already
1240  *      been informed the IO Partition is gone, if it is gone
1241  *      we will already timeout the xmits.
1242  */
1243 static void
1244 visornic_xmit_timeout(struct net_device *netdev)
1245 {
1246         struct visornic_devdata *devdata = netdev_priv(netdev);
1247         unsigned long flags;
1248
1249         spin_lock_irqsave(&devdata->priv_lock, flags);
1250         if (devdata->going_away) {
1251                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1252                 dev_dbg(&devdata->dev->device,
1253                         "%s aborting because device removal pending\n",
1254                         __func__);
1255                 return;
1256         }
1257
1258         /* Ensure that a ServerDown message hasn't been received */
1259         if (!devdata->enabled ||
1260             (devdata->server_down && !devdata->server_change_state)) {
1261                 dev_dbg(&netdev->dev, "%s no processing\n",
1262                         __func__);
1263                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1264                 return;
1265         }
1266         queue_work(visornic_timeout_reset_workqueue, &devdata->timeout_reset);
1267         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1268 }
1269
1270 /**
1271  *      repost_return   - repost rcv bufs that have come back
1272  *      @cmdrsp: io channel command struct to post
1273  *      @devdata: visornic devdata for the device
1274  *      @skb: skb
1275  *      @netdev: netdevice
1276  *
1277  *      Repost rcv buffers that have been returned to us when
1278  *      we are finished with them.
1279  *      Returns 0 for success, -1 for error.
1280  */
1281 static inline int
1282 repost_return(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
1283               struct sk_buff *skb, struct net_device *netdev)
1284 {
1285         struct net_pkt_rcv copy;
1286         int i = 0, cc, numreposted;
1287         int found_skb = 0;
1288         int status = 0;
1289
1290         copy = cmdrsp->net.rcv;
1291         switch (copy.numrcvbufs) {
1292         case 0:
1293                 devdata->n_rcv0++;
1294                 break;
1295         case 1:
1296                 devdata->n_rcv1++;
1297                 break;
1298         case 2:
1299                 devdata->n_rcv2++;
1300                 break;
1301         default:
1302                 devdata->n_rcvx++;
1303                 break;
1304         }
1305         for (cc = 0, numreposted = 0; cc < copy.numrcvbufs; cc++) {
1306                 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1307                         if (devdata->rcvbuf[i] != copy.rcvbuf[cc])
1308                                 continue;
1309
1310                         if ((skb) && devdata->rcvbuf[i] == skb) {
1311                                 devdata->found_repost_rcvbuf_cnt++;
1312                                 found_skb = 1;
1313                                 devdata->repost_found_skb_cnt++;
1314                         }
1315                         devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1316                         if (!devdata->rcvbuf[i]) {
1317                                 devdata->num_rcv_bufs_could_not_alloc++;
1318                                 devdata->alloc_failed_in_repost_rtn_cnt++;
1319                                 status = -ENOMEM;
1320                                 break;
1321                         }
1322                         post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1323                         numreposted++;
1324                         break;
1325                 }
1326         }
1327         if (numreposted != copy.numrcvbufs) {
1328                 devdata->n_repost_deficit++;
1329                 status = -EINVAL;
1330         }
1331         if (skb) {
1332                 if (found_skb) {
1333                         kfree_skb(skb);
1334                 } else {
1335                         status = -EINVAL;
1336                         devdata->bad_rcv_buf++;
1337                 }
1338         }
1339         atomic_dec(&devdata->usage);
1340         return status;
1341 }
1342
1343 /**
1344  *      visornic_rx - Handle receive packets coming back from IO Part
1345  *      @cmdrsp: Receive packet returned from IO Part
1346  *
1347  *      Got a receive packet back from the IO Part, handle it and send
1348  *      it up the stack.
1349  *      Returns void
1350  */
1351 static void
1352 visornic_rx(struct uiscmdrsp *cmdrsp)
1353 {
1354         struct visornic_devdata *devdata;
1355         struct sk_buff *skb, *prev, *curr;
1356         struct net_device *netdev;
1357         int cc, currsize, off, status;
1358         struct ethhdr *eth;
1359         unsigned long flags;
1360 #ifdef DEBUG
1361         struct phys_info testfrags[MAX_PHYS_INFO];
1362 #endif
1363
1364         /* post new rcv buf to the other end using the cmdrsp we have at hand
1365          * post it without holding lock - but we'll use the signal lock to
1366          * synchronize the queue insert the cmdrsp that contains the net.rcv
1367          * is the one we are using to repost, so copy the info we need from it.
1368          */
1369         skb = cmdrsp->net.buf;
1370         netdev = skb->dev;
1371
1372         if (!netdev) {
1373                 /* We must have previously downed this network device and
1374                  * this skb and device is no longer valid. This also means
1375                  * the skb reference was removed from devdata->rcvbuf so no
1376                  * need to search for it.
1377                  * All we can do is free the skb and return.
1378                  * Note: We crash if we try to log this here.
1379                  */
1380                 kfree_skb(skb);
1381                 return;
1382         }
1383
1384         devdata = netdev_priv(netdev);
1385
1386         spin_lock_irqsave(&devdata->priv_lock, flags);
1387         atomic_dec(&devdata->num_rcvbuf_in_iovm);
1388
1389         /* update rcv stats - call it with priv_lock held */
1390         devdata->net_stats.rx_packets++;
1391         devdata->net_stats.rx_bytes = skb->len;
1392
1393         atomic_inc(&devdata->usage);    /* don't want a close to happen before
1394                                          *  we're done here
1395                                          */
1396
1397         /* set length to how much was ACTUALLY received -
1398          * NOTE: rcv_done_len includes actual length of data rcvd
1399          * including ethhdr
1400          */
1401         skb->len = cmdrsp->net.rcv.rcv_done_len;
1402
1403         /* test enabled while holding lock */
1404         if (!(devdata->enabled && devdata->enab_dis_acked)) {
1405                 /* don't process it unless we're in enable mode and until
1406                  * we've gotten an ACK saying the other end got our RCV enable
1407                  */
1408                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1409                 repost_return(cmdrsp, devdata, skb, netdev);
1410                 return;
1411         }
1412
1413         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1414
1415         /* when skb was allocated, skb->dev, skb->data, skb->len and
1416          * skb->data_len were setup. AND, data has already put into the
1417          * skb (both first frag and in frags pages)
1418          * NOTE: firstfragslen is the amount of data in skb->data and that
1419          * which is not in nr_frags or frag_list. This is now simply
1420          * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1421          * firstfrag & set data_len to show rest see if we have to chain
1422          * frag_list.
1423          */
1424         if (skb->len > RCVPOST_BUF_SIZE) {      /* do PRECAUTIONARY check */
1425                 if (cmdrsp->net.rcv.numrcvbufs < 2) {
1426                         if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1427                                 dev_err(&devdata->netdev->dev,
1428                                         "repost_return failed");
1429                         return;
1430                 }
1431                 /* length rcvd is greater than firstfrag in this skb rcv buf  */
1432                 skb->tail += RCVPOST_BUF_SIZE;  /* amount in skb->data */
1433                 skb->data_len = skb->len - RCVPOST_BUF_SIZE;    /* amount that
1434                                                                    will be in
1435                                                                    frag_list */
1436         } else {
1437                 /* data fits in this skb - no chaining - do
1438                  * PRECAUTIONARY check
1439                  */
1440                 if (cmdrsp->net.rcv.numrcvbufs != 1) {  /* should be 1 */
1441                         if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1442                                 dev_err(&devdata->netdev->dev,
1443                                         "repost_return failed");
1444                         return;
1445                 }
1446                 skb->tail += skb->len;
1447                 skb->data_len = 0;      /* nothing rcvd in frag_list */
1448         }
1449         off = skb_tail_pointer(skb) - skb->data;
1450
1451         /* amount we bumped tail by in the head skb
1452          * it is used to calculate the size of each chained skb below
1453          * it is also used to index into bufline to continue the copy
1454          * (for chansocktwopc)
1455          * if necessary chain the rcv skbs together.
1456          * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1457          * chain the rest to that one.
1458          * - do PRECAUTIONARY check
1459          */
1460         if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
1461                 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1462                         dev_err(&devdata->netdev->dev, "repost_return failed");
1463                 return;
1464         }
1465
1466         if (cmdrsp->net.rcv.numrcvbufs > 1) {
1467                 /* chain the various rcv buffers into the skb's frag_list. */
1468                 /* Note: off was initialized above  */
1469                 for (cc = 1, prev = NULL;
1470                      cc < cmdrsp->net.rcv.numrcvbufs; cc++) {
1471                         curr = (struct sk_buff *)cmdrsp->net.rcv.rcvbuf[cc];
1472                         curr->next = NULL;
1473                         if (!prev)      /* start of list- set head */
1474                                 skb_shinfo(skb)->frag_list = curr;
1475                         else
1476                                 prev->next = curr;
1477                         prev = curr;
1478
1479                         /* should we set skb->len and skb->data_len for each
1480                          * buffer being chained??? can't hurt!
1481                          */
1482                         currsize = min(skb->len - off,
1483                                        (unsigned int)RCVPOST_BUF_SIZE);
1484                         curr->len = currsize;
1485                         curr->tail += currsize;
1486                         curr->data_len = 0;
1487                         off += currsize;
1488                 }
1489 #ifdef DEBUG
1490                 /* assert skb->len == off */
1491                 if (skb->len != off) {
1492                         dev_err(&devdata->netdev->dev,
1493                                 "%s something wrong; skb->len:%d != off:%d\n",
1494                                 netdev->name, skb->len, off);
1495                 }
1496                 /* test code */
1497                 cc = util_copy_fragsinfo_from_skb("rcvchaintest", skb,
1498                                                   RCVPOST_BUF_SIZE,
1499                                                   MAX_PHYS_INFO, testfrags);
1500                 if (cc != cmdrsp->net.rcv.numrcvbufs) {
1501                         dev_err(&devdata->netdev->dev,
1502                                 "**** %s Something wrong; rcvd chain length %d different from one we calculated %d\n",
1503                                 netdev->name, cmdrsp->net.rcv.numrcvbufs, cc);
1504                 }
1505                 for (i = 0; i < cc; i++) {
1506                         dev_inf(&devdata->netdev->dev,
1507                                 "test:RCVPOST_BUF_SIZE:%d[%d] pfn:%llu off:0x%x len:%d\n",
1508                                 RCVPOST_BUF_SIZE, i, testfrags[i].pi_pfn,
1509                                 testfrags[i].pi_off, testfrags[i].pi_len);
1510                 }
1511 #endif
1512         }
1513
1514         /* set up packet's protocl type using ethernet header - this
1515          * sets up skb->pkt_type & it also PULLS out the eth header
1516          */
1517         skb->protocol = eth_type_trans(skb, netdev);
1518
1519         eth = eth_hdr(skb);
1520
1521         skb->csum = 0;
1522         skb->ip_summed = CHECKSUM_NONE;
1523
1524         do {
1525                 if (netdev->flags & IFF_PROMISC)
1526                         break;  /* accept all packets */
1527                 if (skb->pkt_type == PACKET_BROADCAST) {
1528                         if (netdev->flags & IFF_BROADCAST)
1529                                 break;  /* accept all broadcast packets */
1530                 } else if (skb->pkt_type == PACKET_MULTICAST) {
1531                         if ((netdev->flags & IFF_MULTICAST) &&
1532                             (netdev_mc_count(netdev))) {
1533                                 struct netdev_hw_addr *ha;
1534                                 int found_mc = 0;
1535
1536                                 /* only accept multicast packets that we can
1537                                  * find in our multicast address list
1538                                  */
1539                                 netdev_for_each_mc_addr(ha, netdev) {
1540                                         if (ether_addr_equal(eth->h_dest,
1541                                                              ha->addr)) {
1542                                                 found_mc = 1;
1543                                                 break;
1544                                         }
1545                                 }
1546                                 if (found_mc)
1547                                         break;  /* accept packet, dest
1548                                                    matches a multicast
1549                                                    address */
1550                         }
1551                 } else if (skb->pkt_type == PACKET_HOST) {
1552                         break;  /* accept packet, h_dest must match vnic
1553                                    mac address */
1554                 } else if (skb->pkt_type == PACKET_OTHERHOST) {
1555                         /* something is not right */
1556                         dev_err(&devdata->netdev->dev,
1557                                 "**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1558                                 netdev->name, eth->h_dest, netdev->dev_addr);
1559                 }
1560                 /* drop packet - don't forward it up to OS */
1561                 devdata->n_rcv_packets_not_accepted++;
1562                 repost_return(cmdrsp, devdata, skb, netdev);
1563                 return;
1564         } while (0);
1565
1566         status = netif_rx(skb);
1567         /* netif_rx returns various values, but "in practice most drivers
1568          * ignore the return value
1569          */
1570
1571         skb = NULL;
1572         /*
1573          * whether the packet got dropped or handled, the skb is freed by
1574          * kernel code, so we shouldn't free it. but we should repost a
1575          * new rcv buffer.
1576          */
1577         repost_return(cmdrsp, devdata, skb, netdev);
1578 }
1579
1580 /**
1581  *      devdata_initialize      - Initialize devdata structure
1582  *      @devdata: visornic_devdata structure to initialize
1583  *      #dev: visorbus_deviced it belongs to
1584  *
1585  *      Setup initial values for the visornic based on channel and default
1586  *      values.
1587  *      Returns a pointer to the devdata if successful, else NULL
1588  */
1589 static struct visornic_devdata *
1590 devdata_initialize(struct visornic_devdata *devdata, struct visor_device *dev)
1591 {
1592         int devnum = -1;
1593
1594         if (!devdata)
1595                 return NULL;
1596         memset(devdata, '\0', sizeof(struct visornic_devdata));
1597         spin_lock(&dev_num_pool_lock);
1598         devnum = find_first_zero_bit(dev_num_pool, MAXDEVICES);
1599         set_bit(devnum, dev_num_pool);
1600         spin_unlock(&dev_num_pool_lock);
1601         if (devnum == MAXDEVICES)
1602                 devnum = -1;
1603         if (devnum < 0)
1604                 return NULL;
1605         devdata->devnum = devnum;
1606         devdata->dev = dev;
1607         strncpy(devdata->name, dev_name(&dev->device), sizeof(devdata->name));
1608         spin_lock(&lock_all_devices);
1609         list_add_tail(&devdata->list_all, &list_all_devices);
1610         spin_unlock(&lock_all_devices);
1611         return devdata;
1612 }
1613
1614 /**
1615  *      devdata_release - Frees up references in devdata
1616  *      @devdata: struct to clean up
1617  *
1618  *      Frees up references in devdata.
1619  *      Returns void
1620  */
1621 static void devdata_release(struct visornic_devdata *devdata)
1622 {
1623         spin_lock(&dev_num_pool_lock);
1624         clear_bit(devdata->devnum, dev_num_pool);
1625         spin_unlock(&dev_num_pool_lock);
1626         spin_lock(&lock_all_devices);
1627         list_del(&devdata->list_all);
1628         spin_unlock(&lock_all_devices);
1629         kfree(devdata->rcvbuf);
1630         kfree(devdata->cmdrsp_rcv);
1631         kfree(devdata->xmit_cmdrsp);
1632 }
1633
1634 static const struct net_device_ops visornic_dev_ops = {
1635         .ndo_open = visornic_open,
1636         .ndo_stop = visornic_close,
1637         .ndo_start_xmit = visornic_xmit,
1638         .ndo_get_stats = visornic_get_stats,
1639         .ndo_do_ioctl = visornic_ioctl,
1640         .ndo_change_mtu = visornic_change_mtu,
1641         .ndo_tx_timeout = visornic_xmit_timeout,
1642         .ndo_set_rx_mode = visornic_set_multi,
1643 };
1644
1645 /**
1646  *      send_rcv_posts_if_needed
1647  *      @devdata: visornic device
1648  *
1649  *      Send receive buffers to the IO Partition.
1650  *      Returns void
1651  */
1652 static void
1653 send_rcv_posts_if_needed(struct visornic_devdata *devdata)
1654 {
1655         int i;
1656         struct net_device *netdev;
1657         struct uiscmdrsp *cmdrsp = devdata->cmdrsp_rcv;
1658         int cur_num_rcv_bufs_to_alloc, rcv_bufs_allocated;
1659
1660         /* don't do this until vnic is marked ready */
1661         if (!(devdata->enabled && devdata->enab_dis_acked))
1662                 return;
1663
1664         netdev = devdata->netdev;
1665         rcv_bufs_allocated = 0;
1666         /* this code is trying to prevent getting stuck here forever,
1667          * but still retry it if you cant allocate them all this time.
1668          */
1669         cur_num_rcv_bufs_to_alloc = devdata->num_rcv_bufs_could_not_alloc;
1670         while (cur_num_rcv_bufs_to_alloc > 0) {
1671                 cur_num_rcv_bufs_to_alloc--;
1672                 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1673                         if (devdata->rcvbuf[i])
1674                                 continue;
1675                         devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1676                         if (!devdata->rcvbuf[i]) {
1677                                 devdata->alloc_failed_in_if_needed_cnt++;
1678                                 break;
1679                         }
1680                         rcv_bufs_allocated++;
1681                         post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1682                         devdata->chstat.extra_rcvbufs_sent++;
1683                 }
1684         }
1685         devdata->num_rcv_bufs_could_not_alloc -= rcv_bufs_allocated;
1686 }
1687
1688 /**
1689  *      draing_queue    - drains the response queue
1690  *      @cmdrsp: io channel command response message
1691  *      @devdata: visornic device to drain
1692  *
1693  *      Drain the respones queue of any responses from the IO partition.
1694  *      Process the responses as we get them.
1695  *      Returns when response queue is empty or when the threadd stops.
1696  */
1697 static void
1698 drain_queue(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata)
1699 {
1700         unsigned long flags;
1701         struct net_device *netdev;
1702
1703         /* drain queue */
1704         while (1) {
1705                 /* TODO: CLIENT ACQUIRE -- Don't really need this at the
1706                  * moment */
1707                 if (!visorchannel_signalremove(devdata->dev->visorchannel,
1708                                                IOCHAN_FROM_IOPART,
1709                                                cmdrsp))
1710                         break; /* queue empty */
1711
1712                 switch (cmdrsp->net.type) {
1713                 case NET_RCV:
1714                         devdata->chstat.got_rcv++;
1715                         /* process incoming packet */
1716                         visornic_rx(cmdrsp);
1717                         break;
1718                 case NET_XMIT_DONE:
1719                         spin_lock_irqsave(&devdata->priv_lock, flags);
1720                         devdata->chstat.got_xmit_done++;
1721                         if (cmdrsp->net.xmtdone.xmt_done_result)
1722                                 devdata->chstat.xmit_fail++;
1723                         /* only call queue wake if we stopped it */
1724                         netdev = ((struct sk_buff *)cmdrsp->net.buf)->dev;
1725                         /* ASSERT netdev == vnicinfo->netdev; */
1726                         if ((netdev == devdata->netdev) &&
1727                             netif_queue_stopped(netdev)) {
1728                                 /* check to see if we have crossed
1729                                  * the lower watermark for
1730                                  * netif_wake_queue()
1731                                  */
1732                                 if (((devdata->chstat.sent_xmit >=
1733                                     devdata->chstat.got_xmit_done) &&
1734                                     (devdata->chstat.sent_xmit -
1735                                     devdata->chstat.got_xmit_done <=
1736                                     devdata->lower_threshold_net_xmits)) ||
1737                                     ((devdata->chstat.sent_xmit <
1738                                     devdata->chstat.got_xmit_done) &&
1739                                     (ULONG_MAX - devdata->chstat.got_xmit_done
1740                                     + devdata->chstat.sent_xmit <=
1741                                     devdata->lower_threshold_net_xmits))) {
1742                                         /* enough NET_XMITs completed
1743                                          * so can restart netif queue
1744                                          */
1745                                         netif_wake_queue(netdev);
1746                                         devdata->flow_control_lower_hits++;
1747                                 }
1748                         }
1749                         skb_unlink(cmdrsp->net.buf, &devdata->xmitbufhead);
1750                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1751                         kfree_skb(cmdrsp->net.buf);
1752                         break;
1753                 case NET_RCV_ENBDIS_ACK:
1754                         devdata->chstat.got_enbdisack++;
1755                         netdev = (struct net_device *)
1756                         cmdrsp->net.enbdis.context;
1757                         spin_lock_irqsave(&devdata->priv_lock, flags);
1758                         devdata->enab_dis_acked = 1;
1759                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1760
1761                         if (devdata->server_down &&
1762                             devdata->server_change_state) {
1763                                 /* Inform Linux that the link is up */
1764                                 devdata->server_down = false;
1765                                 devdata->server_change_state = false;
1766                                 netif_wake_queue(netdev);
1767                                 netif_carrier_on(netdev);
1768                         }
1769                         break;
1770                 case NET_CONNECT_STATUS:
1771                         netdev = devdata->netdev;
1772                         if (cmdrsp->net.enbdis.enable == 1) {
1773                                 spin_lock_irqsave(&devdata->priv_lock, flags);
1774                                 devdata->enabled = cmdrsp->net.enbdis.enable;
1775                                 spin_unlock_irqrestore(&devdata->priv_lock,
1776                                                        flags);
1777                                 netif_wake_queue(netdev);
1778                                 netif_carrier_on(netdev);
1779                         } else {
1780                                 netif_stop_queue(netdev);
1781                                 netif_carrier_off(netdev);
1782                                 spin_lock_irqsave(&devdata->priv_lock, flags);
1783                                 devdata->enabled = cmdrsp->net.enbdis.enable;
1784                                 spin_unlock_irqrestore(&devdata->priv_lock,
1785                                                        flags);
1786                         }
1787                         break;
1788                 default:
1789                         break;
1790                 }
1791                 /* cmdrsp is now available for reuse  */
1792
1793                 if (kthread_should_stop())
1794                         break;
1795         }
1796 }
1797
1798 /**
1799  *      process_incoming_rsps   - Checks the status of the response queue.
1800  *      @v: void pointer to the visronic devdata
1801  *
1802  *      Main function of the vnic_incoming thread. Peridocially check the
1803  *      response queue and drain it if needed.
1804  *      Returns when thread has stopped.
1805  */
1806 static int
1807 process_incoming_rsps(void *v)
1808 {
1809         struct visornic_devdata *devdata = v;
1810         struct uiscmdrsp *cmdrsp = NULL;
1811         const int SZ = SIZEOF_CMDRSP;
1812
1813         cmdrsp = kmalloc(SZ, GFP_ATOMIC);
1814         if (!cmdrsp)
1815                 complete_and_exit(&devdata->threadinfo.has_stopped, 0);
1816
1817         while (1) {
1818                 wait_event_interruptible_timeout(
1819                         devdata->rsp_queue, (atomic_read(
1820                                              &devdata->interrupt_rcvd) == 1),
1821                                 msecs_to_jiffies(devdata->thread_wait_ms));
1822
1823                 /* periodically check to see if there are any rcf bufs which
1824                  * need to get sent to the IOSP. This can only happen if
1825                  * we run out of memory when trying to allocate skbs.
1826                  */
1827                 atomic_set(&devdata->interrupt_rcvd, 0);
1828                 send_rcv_posts_if_needed(devdata);
1829                 drain_queue(cmdrsp, devdata);
1830                 if (kthread_should_stop())
1831                         break;
1832         }
1833
1834         kfree(cmdrsp);
1835         complete_and_exit(&devdata->threadinfo.has_stopped, 0);
1836 }
1837
1838 /**
1839  *      visornic_probe  - probe function for visornic devices
1840  *      @dev: The visor device discovered
1841  *
1842  *      Called when visorbus discovers a visornic device on its
1843  *      bus. It creates a new visornic ethernet adapter.
1844  *      Returns 0 or negative for error.
1845  */
1846 static int visornic_probe(struct visor_device *dev)
1847 {
1848         struct visornic_devdata *devdata = NULL;
1849         struct net_device *netdev = NULL;
1850         int err;
1851         int channel_offset = 0;
1852         u64 features;
1853
1854         netdev = alloc_etherdev(sizeof(struct visornic_devdata));
1855         if (!netdev) {
1856                 dev_err(&dev->device,
1857                         "%s alloc_etherdev failed\n", __func__);
1858                 return -ENOMEM;
1859         }
1860
1861         netdev->netdev_ops = &visornic_dev_ops;
1862         netdev->watchdog_timeo = (5 * HZ);
1863         SET_NETDEV_DEV(netdev, &dev->device);
1864
1865         /* Get MAC adddress from channel and read it into the device. */
1866         netdev->addr_len = ETH_ALEN;
1867         channel_offset = offsetof(struct spar_io_channel_protocol,
1868                                   vnic.macaddr);
1869         err = visorbus_read_channel(dev, channel_offset, netdev->dev_addr,
1870                                     ETH_ALEN);
1871         if (err < 0) {
1872                 dev_err(&dev->device,
1873                         "%s failed to get mac addr from chan (%d)\n",
1874                         __func__, err);
1875                 goto cleanup_netdev;
1876         }
1877
1878         devdata = devdata_initialize(netdev_priv(netdev), dev);
1879         if (!devdata) {
1880                 dev_err(&dev->device,
1881                         "%s devdata_initialize failed\n", __func__);
1882                 err = -ENOMEM;
1883                 goto cleanup_netdev;
1884         }
1885
1886         devdata->netdev = netdev;
1887         dev_set_drvdata(&dev->device, devdata);
1888         init_waitqueue_head(&devdata->rsp_queue);
1889         spin_lock_init(&devdata->priv_lock);
1890         devdata->enabled = 0; /* not yet */
1891         atomic_set(&devdata->usage, 1);
1892
1893         /* Setup rcv bufs */
1894         channel_offset = offsetof(struct spar_io_channel_protocol,
1895                                   vnic.num_rcv_bufs);
1896         err = visorbus_read_channel(dev, channel_offset,
1897                                     &devdata->num_rcv_bufs, 4);
1898         if (err) {
1899                 dev_err(&dev->device,
1900                         "%s failed to get #rcv bufs from chan (%d)\n",
1901                         __func__, err);
1902                 goto cleanup_netdev;
1903         }
1904
1905         devdata->rcvbuf = kzalloc(sizeof(struct sk_buff *) *
1906                                   devdata->num_rcv_bufs, GFP_KERNEL);
1907         if (!devdata->rcvbuf) {
1908                 err = -ENOMEM;
1909                 goto cleanup_rcvbuf;
1910         }
1911
1912         /* set the net_xmit outstanding threshold */
1913         /* always leave two slots open but you should have 3 at a minimum */
1914         devdata->max_outstanding_net_xmits =
1915                 max(3, ((devdata->num_rcv_bufs / 3) - 2));
1916         devdata->upper_threshold_net_xmits =
1917                 max(2, devdata->max_outstanding_net_xmits - 1);
1918         devdata->lower_threshold_net_xmits =
1919                 max(1, devdata->max_outstanding_net_xmits / 2);
1920
1921         skb_queue_head_init(&devdata->xmitbufhead);
1922
1923         /* create a cmdrsp we can use to post and unpost rcv buffers */
1924         devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1925         if (!devdata->cmdrsp_rcv) {
1926                 err = -ENOMEM;
1927                 goto cleanup_cmdrsp_rcv;
1928         }
1929         devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1930         if (!devdata->xmit_cmdrsp) {
1931                 err = -ENOMEM;
1932                 goto cleanup_xmit_cmdrsp;
1933         }
1934         INIT_WORK(&devdata->serverdown_completion,
1935                   visornic_serverdown_complete);
1936         INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
1937         devdata->server_down = false;
1938         devdata->server_change_state = false;
1939
1940         /*set the default mtu */
1941         channel_offset = offsetof(struct spar_io_channel_protocol,
1942                                   vnic.mtu);
1943         err = visorbus_read_channel(dev, channel_offset, &netdev->mtu, 4);
1944         if (err) {
1945                 dev_err(&dev->device,
1946                         "%s failed to get mtu from chan (%d)\n",
1947                         __func__, err);
1948                 goto cleanup_xmit_cmdrsp;
1949         }
1950
1951         /* TODO: Setup Interrupt information */
1952         /* Let's start our threads to get responses */
1953         channel_offset = offsetof(struct spar_io_channel_protocol,
1954                                   channel_header.features);
1955         err = visorbus_read_channel(dev, channel_offset, &features, 8);
1956         if (err) {
1957                 dev_err(&dev->device,
1958                         "%s failed to get features from chan (%d)\n",
1959                         __func__, err);
1960                 goto cleanup_xmit_cmdrsp;
1961         }
1962
1963         features |= ULTRA_IO_CHANNEL_IS_POLLING;
1964         err = visorbus_write_channel(dev, channel_offset, &features, 8);
1965         if (err) {
1966                 dev_err(&dev->device,
1967                         "%s failed to set features in chan (%d)\n",
1968                         __func__, err);
1969                 goto cleanup_xmit_cmdrsp;
1970         }
1971
1972         err = register_netdev(netdev);
1973         if (err) {
1974                 dev_err(&dev->device,
1975                         "%s register_netdev failed (%d)\n", __func__, err);
1976                 goto cleanup_xmit_cmdrsp;
1977         }
1978
1979         /* create debgug/sysfs directories */
1980         devdata->eth_debugfs_dir = debugfs_create_dir(netdev->name,
1981                                                       visornic_debugfs_dir);
1982         if (!devdata->eth_debugfs_dir) {
1983                 dev_err(&dev->device,
1984                         "%s debugfs_create_dir %s failed\n",
1985                         __func__, netdev->name);
1986                 err = -ENOMEM;
1987                 goto cleanup_xmit_cmdrsp;
1988         }
1989
1990         devdata->thread_wait_ms = 2;
1991         visor_thread_start(&devdata->threadinfo, process_incoming_rsps,
1992                            devdata, "vnic_incoming");
1993
1994         dev_info(&dev->device, "%s success netdev=%s\n",
1995                  __func__, netdev->name);
1996         return 0;
1997
1998 cleanup_xmit_cmdrsp:
1999         kfree(devdata->xmit_cmdrsp);
2000
2001 cleanup_cmdrsp_rcv:
2002         kfree(devdata->cmdrsp_rcv);
2003
2004 cleanup_rcvbuf:
2005         kfree(devdata->rcvbuf);
2006
2007 cleanup_netdev:
2008         free_netdev(netdev);
2009         return err;
2010 }
2011
2012 /**
2013  *      host_side_disappeared   - IO part is gone.
2014  *      @devdata: device object
2015  *
2016  *      IO partition servicing this device is gone, do cleanup
2017  *      Returns void.
2018  */
2019 static void host_side_disappeared(struct visornic_devdata *devdata)
2020 {
2021         unsigned long flags;
2022
2023         spin_lock_irqsave(&devdata->priv_lock, flags);
2024         sprintf(devdata->name, "<dev#%d-history>", devdata->devnum);
2025         devdata->dev = NULL;   /* indicate device destroyed */
2026         spin_unlock_irqrestore(&devdata->priv_lock, flags);
2027 }
2028
2029 /**
2030  *      visornic_remove         - Called when visornic dev goes away
2031  *      @dev: visornic device that is being removed
2032  *
2033  *      Called when DEVICE_DESTROY gets called to remove device.
2034  *      Returns void
2035  */
2036 static void visornic_remove(struct visor_device *dev)
2037 {
2038         struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2039         struct net_device *netdev;
2040         unsigned long flags;
2041
2042         if (!devdata) {
2043                 dev_err(&dev->device, "%s no devdata\n", __func__);
2044                 return;
2045         }
2046         spin_lock_irqsave(&devdata->priv_lock, flags);
2047         if (devdata->going_away) {
2048                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2049                 dev_err(&dev->device, "%s already being removed\n", __func__);
2050                 return;
2051         }
2052         devdata->going_away = true;
2053         spin_unlock_irqrestore(&devdata->priv_lock, flags);
2054         netdev = devdata->netdev;
2055         if (!netdev) {
2056                 dev_err(&dev->device, "%s not net device\n", __func__);
2057                 return;
2058         }
2059
2060         /* going_away prevents new items being added to the workqueues */
2061         flush_workqueue(visornic_serverdown_workqueue);
2062         flush_workqueue(visornic_timeout_reset_workqueue);
2063
2064         debugfs_remove_recursive(devdata->eth_debugfs_dir);
2065
2066         unregister_netdev(netdev);  /* this will call visornic_close() */
2067
2068         /* this had to wait until last because visornic_close() /
2069          * visornic_disable_with_timeout() polls waiting for state that is
2070          * only updated by the thread
2071          */
2072         if (devdata->threadinfo.id) {
2073                 visor_thread_stop(&devdata->threadinfo);
2074                 if (devdata->threadinfo.id) {
2075                         dev_err(&dev->device, "%s cannot stop worker thread\n",
2076                                 __func__);
2077                         return;
2078                 }
2079         }
2080
2081         dev_set_drvdata(&dev->device, NULL);
2082         host_side_disappeared(devdata);
2083         devdata_release(devdata);
2084         free_netdev(netdev);
2085 }
2086
2087 /**
2088  *      visornic_pause          - Called when IO Part disappears
2089  *      @dev: visornic device that is being serviced
2090  *      @complete_func: call when finished.
2091  *
2092  *      Called when the IO Partition has gone down. Need to free
2093  *      up resources and wait for IO partition to come back. Mark
2094  *      link as down and don't attempt any DMA. When we have freed
2095  *      memory call the complete_func so that Command knows we are
2096  *      done. If we don't call complete_func, IO part will never
2097  *      come back.
2098  *      Returns 0 for success.
2099  */
2100 static int visornic_pause(struct visor_device *dev,
2101                           visorbus_state_complete_func complete_func)
2102 {
2103         struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2104
2105         visornic_serverdown(devdata, complete_func);
2106         return 0;
2107 }
2108
2109 /**
2110  *      visornic_resume         - Called when IO part has recovered
2111  *      @dev: visornic device that is being serviced
2112  *      @compelte_func: call when finished
2113  *
2114  *      Called when the IO partition has recovered. Reestablish
2115  *      connection to the IO part and set the link up. Okay to do
2116  *      DMA again.
2117  *      Returns 0 for success.
2118  */
2119 static int visornic_resume(struct visor_device *dev,
2120                            visorbus_state_complete_func complete_func)
2121 {
2122         struct visornic_devdata *devdata;
2123         struct net_device *netdev;
2124         unsigned long flags;
2125
2126         devdata = dev_get_drvdata(&dev->device);
2127         if (!devdata) {
2128                 dev_err(&dev->device, "%s no devdata\n", __func__);
2129                 return -EINVAL;
2130         }
2131
2132         netdev = devdata->netdev;
2133
2134         if (devdata->server_down && !devdata->server_change_state) {
2135                 devdata->server_change_state = true;
2136                 /* Must transition channel to ATTACHED state BEFORE
2137                  * we can start using the device again.
2138                  * TODO: State transitions
2139                  */
2140                 visor_thread_start(&devdata->threadinfo, process_incoming_rsps,
2141                                    devdata, "vnic_incoming");
2142                 init_rcv_bufs(netdev, devdata);
2143                 spin_lock_irqsave(&devdata->priv_lock, flags);
2144                 devdata->enabled = 1;
2145
2146                 /* Now we're ready, let's send an ENB to uisnic but until
2147                  * we get an ACK back from uisnic, we'll drop the packets
2148                  */
2149                 devdata->enab_dis_acked = 0;
2150                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2151
2152                 /* send enable and wait for ack - don't hold lock when
2153                  * sending enable because if the queue if sull, insert
2154                  * might sleep.
2155                  */
2156                 send_enbdis(netdev, 1, devdata);
2157         } else if (devdata->server_change_state) {
2158                 dev_err(&dev->device, "%s server_change_state\n",
2159                         __func__);
2160                 return -EIO;
2161         }
2162
2163         complete_func(dev, 0);
2164         return 0;
2165 }
2166
2167 /**
2168  *      visornic_init   - Init function
2169  *
2170  *      Init function for the visornic driver. Do initial driver setup
2171  *      and wait for devices.
2172  *      Returns 0 for success, negative for error.
2173  */
2174 static int visornic_init(void)
2175 {
2176         struct dentry *ret;
2177         int err = -ENOMEM;
2178
2179         visornic_debugfs_dir = debugfs_create_dir("visornic", NULL);
2180         if (!visornic_debugfs_dir)
2181                 return err;
2182
2183         ret = debugfs_create_file("info", S_IRUSR, visornic_debugfs_dir, NULL,
2184                                   &debugfs_info_fops);
2185         if (!ret)
2186                 goto cleanup_debugfs;
2187         ret = debugfs_create_file("enable_ints", S_IWUSR, visornic_debugfs_dir,
2188                                   NULL, &debugfs_enable_ints_fops);
2189         if (!ret)
2190                 goto cleanup_debugfs;
2191
2192         /* create workqueue for serverdown completion */
2193         visornic_serverdown_workqueue =
2194                 create_singlethread_workqueue("visornic_serverdown");
2195         if (!visornic_serverdown_workqueue)
2196                 goto cleanup_debugfs;
2197
2198         /* create workqueue for tx timeout reset */
2199         visornic_timeout_reset_workqueue =
2200                 create_singlethread_workqueue("visornic_timeout_reset");
2201         if (!visornic_timeout_reset_workqueue)
2202                 goto cleanup_workqueue;
2203
2204         spin_lock_init(&dev_num_pool_lock);
2205         dev_num_pool = kzalloc(BITS_TO_LONGS(MAXDEVICES), GFP_KERNEL);
2206         if (!dev_num_pool)
2207                 goto cleanup_workqueue;
2208
2209         visorbus_register_visor_driver(&visornic_driver);
2210         return 0;
2211
2212 cleanup_workqueue:
2213         flush_workqueue(visornic_serverdown_workqueue);
2214         destroy_workqueue(visornic_serverdown_workqueue);
2215         if (visornic_timeout_reset_workqueue) {
2216                 flush_workqueue(visornic_timeout_reset_workqueue);
2217                 destroy_workqueue(visornic_timeout_reset_workqueue);
2218         }
2219 cleanup_debugfs:
2220         debugfs_remove_recursive(visornic_debugfs_dir);
2221
2222         return err;
2223 }
2224
2225 /**
2226  *      visornic_cleanup        - driver exit routine
2227  *
2228  *      Unregister driver from the bus and free up memory.
2229  */
2230 static void visornic_cleanup(void)
2231 {
2232         visorbus_unregister_visor_driver(&visornic_driver);
2233
2234         if (visornic_serverdown_workqueue) {
2235                 flush_workqueue(visornic_serverdown_workqueue);
2236                 destroy_workqueue(visornic_serverdown_workqueue);
2237         }
2238         if (visornic_timeout_reset_workqueue) {
2239                 flush_workqueue(visornic_timeout_reset_workqueue);
2240                 destroy_workqueue(visornic_timeout_reset_workqueue);
2241         }
2242         debugfs_remove_recursive(visornic_debugfs_dir);
2243
2244         kfree(dev_num_pool);
2245         dev_num_pool = NULL;
2246 }
2247
2248 module_init(visornic_init);
2249 module_exit(visornic_cleanup);
2250
2251 MODULE_AUTHOR("Unisys");
2252 MODULE_LICENSE("GPL");
2253 MODULE_DESCRIPTION("sPAR nic driver for sparlinux: ver 1.0.0.0");
2254 MODULE_VERSION("1.0.0.0");