windows: target Windows 7 and add support for more than 64 CPUs
authorSitsofe Wheeler <sitsofe@yahoo.com>
Sun, 25 Mar 2018 19:56:07 +0000 (20:56 +0100)
committerSitsofe Wheeler <sitsofe@yahoo.com>
Tue, 27 Mar 2018 19:18:38 +0000 (20:18 +0100)
Introduce support for targeting the build to either Windows XP or
Windows 7 (the default). When targeting Windows 7 this allows us to use
the process group APIs which are required are required to support more
than 64 CPUs.

These changes mean if you want a Windows binary that supports versions
of Windows below Windows 7/Windows Server 2008 R2 you will need to
explicitly use the --target-win-ver=xp configure option at build time.
Such builds will lack features (such as the ability to access CPUs
beyond those in fio's default process group) that are dependent on
recent Windows APIs.

Signed-off-by: Sitsofe Wheeler <sitsofe@yahoo.com>
configure
os/os-windows-7.h [new file with mode: 0644]
os/os-windows.h
os/windows/posix.c
os/windows/posix/include/arpa/inet.h
os/windows/posix/include/poll.h
server.c

index f6358630562229029bb409ee98fbe94df939f7d1..b3638807432749414c8ef28a46d1d41e159092bc 100755 (executable)
--- a/configure
+++ b/configure
@@ -167,6 +167,8 @@ for opt do
   ;;
   --build-32bit-win) build_32bit_win="yes"
   ;;
+  --target-win-ver=*) target_win_ver="$optarg"
+  ;;
   --build-static) build_static="yes"
   ;;
   --enable-gfio) gfio_check="yes"
@@ -213,6 +215,7 @@ if test "$show_help" = "yes" ; then
   echo "--cc=                   Specify compiler to use"
   echo "--extra-cflags=         Specify extra CFLAGS to pass to compiler"
   echo "--build-32bit-win       Enable 32-bit build on Windows"
+  echo "--target-win-ver=       Minimum version of Windows to target (XP or 7)"
   echo "--build-static          Build a static fio"
   echo "--esx                   Configure build options for esx"
   echo "--enable-gfio           Enable building of gtk gfio"
@@ -333,7 +336,21 @@ CYGWIN*)
     output_sym "CONFIG_32BIT"
   else
     output_sym "CONFIG_64BIT_LLP64"
+
+  target_win_ver=$(echo "$target_win_ver" | tr '[:lower:]' '[:upper:]')
+  if test -z "$target_win_ver"; then
+    # Default Windows API target
+    target_win_ver="7"
+  fi
+  if test "$target_win_ver" = "XP"; then
+    output_sym "CONFIG_WINDOWS_XP"
+  elif test "$target_win_ver" = "7"; then
+    output_sym "CONFIG_WINDOWS_7"
+    CFLAGS="$CFLAGS -D_WIN32_WINNT=0x0601"
+  else
+    fatal "Unknown target Windows version"
   fi
+
   # We need this to be output_sym'd here because this is Windows specific.
   # The regular configure path never sets this config.
   output_sym "CONFIG_WINDOWSAIO"
@@ -498,6 +515,9 @@ fi
 print_config "Operating system" "$targetos"
 print_config "CPU" "$cpu"
 print_config "Big endian" "$bigendian"
+if test ! -z "$target_win_ver"; then
+  print_config "Target Windows version" "$target_win_ver"
+fi
 print_config "Compiler" "$cc"
 print_config "Cross compile" "$cross_compile"
 echo
diff --git a/os/os-windows-7.h b/os/os-windows-7.h
new file mode 100644 (file)
index 0000000..f5ddb8e
--- /dev/null
@@ -0,0 +1,367 @@
+#define FIO_MAX_CPUS           512 /* From Hyper-V 2016's max logical processors */
+#define FIO_CPU_MASK_STRIDE    64
+#define FIO_CPU_MASK_ROWS      (FIO_MAX_CPUS / FIO_CPU_MASK_STRIDE)
+
+typedef struct {
+       uint64_t row[FIO_CPU_MASK_ROWS];
+} os_cpu_mask_t;
+
+#define FIO_HAVE_CPU_ONLINE_SYSCONF
+/* Return all processors regardless of processor group */
+static inline unsigned int cpus_online(void)
+{
+       return GetMaximumProcessorCount(ALL_PROCESSOR_GROUPS);
+}
+
+static inline void print_mask(os_cpu_mask_t *cpumask)
+{
+       for (int i = 0; i < FIO_CPU_MASK_ROWS; i++)
+               dprint(FD_PROCESS, "cpumask[%d]=%lu\n", i, cpumask->row[i]);
+}
+
+/* Return the index of the least significant set CPU in cpumask or -1 if no
+ * CPUs are set */
+static inline int first_set_cpu(os_cpu_mask_t *cpumask)
+{
+       int cpus_offset, mask_first_cpu, row;
+
+       cpus_offset = 0;
+       row = 0;
+       mask_first_cpu = -1;
+       while (mask_first_cpu < 0 && row < FIO_CPU_MASK_ROWS) {
+               int row_first_cpu;
+
+               row_first_cpu = __builtin_ffsll(cpumask->row[row]) - 1;
+               dprint(FD_PROCESS, "row_first_cpu=%d cpumask->row[%d]=%lu\n",
+                      row_first_cpu, row, cpumask->row[row]);
+               if (row_first_cpu > -1) {
+                       mask_first_cpu = cpus_offset + row_first_cpu;
+                       dprint(FD_PROCESS, "first set cpu in mask is at index %d\n",
+                              mask_first_cpu);
+               } else {
+                       cpus_offset += FIO_CPU_MASK_STRIDE;
+                       row++;
+               }
+       }
+
+       return mask_first_cpu;
+}
+
+/* Return the index of the most significant set CPU in cpumask or -1 if no
+ * CPUs are set */
+static inline int last_set_cpu(os_cpu_mask_t *cpumask)
+{
+       int cpus_offset, mask_last_cpu, row;
+
+       cpus_offset = (FIO_CPU_MASK_ROWS - 1) * FIO_CPU_MASK_STRIDE;
+       row = FIO_CPU_MASK_ROWS - 1;
+       mask_last_cpu = -1;
+       while (mask_last_cpu < 0 && row >= 0) {
+               int row_last_cpu;
+
+               if (cpumask->row[row] == 0)
+                       row_last_cpu = -1;
+               else {
+                       uint64_t tmp = cpumask->row[row];
+
+                       row_last_cpu = 0;
+                       while (tmp >>= 1)
+                           row_last_cpu++;
+               }
+
+               dprint(FD_PROCESS, "row_last_cpu=%d cpumask->row[%d]=%lu\n",
+                      row_last_cpu, row, cpumask->row[row]);
+               if (row_last_cpu > -1) {
+                       mask_last_cpu = cpus_offset + row_last_cpu;
+                       dprint(FD_PROCESS, "last set cpu in mask is at index %d\n",
+                              mask_last_cpu);
+               } else {
+                       cpus_offset -= FIO_CPU_MASK_STRIDE;
+                       row--;
+               }
+       }
+
+       return mask_last_cpu;
+}
+
+static inline int mask_to_group_mask(os_cpu_mask_t *cpumask, int *processor_group, uint64_t *affinity_mask)
+{
+       WORD online_groups, group, group_size;
+       bool found;
+       int cpus_offset, search_cpu, last_cpu, bit_offset, row, end;
+       uint64_t group_cpumask;
+
+       search_cpu = first_set_cpu(cpumask);
+       if (search_cpu < 0) {
+               log_info("CPU mask doesn't set any CPUs\n");
+               return 1;
+       }
+
+       /* Find processor group first set CPU applies to */
+       online_groups = GetActiveProcessorGroupCount();
+       group = 0;
+       found = false;
+       cpus_offset = 0;
+       group_size = 0;
+       while (!found && group < online_groups) {
+               group_size = GetMaximumProcessorCount(group);
+               dprint(FD_PROCESS, "group=%d group_start=%d group_size=%u search_cpu=%d\n",
+                      group, cpus_offset, group_size, search_cpu);
+               if (cpus_offset + group_size > search_cpu)
+                       found = true;
+               else {
+                       cpus_offset += group_size;
+                       group++;
+               }
+       }
+
+       if (!found) {
+               log_err("CPU mask contains processor beyond last active processor index (%d)\n",
+                        cpus_offset - 1);
+               print_mask(cpumask);
+               return 1;
+       }
+
+       /* Check all the CPUs in the mask apply to ONLY that processor group */
+       last_cpu = last_set_cpu(cpumask);
+       if (last_cpu > (cpus_offset + group_size - 1)) {
+               log_info("CPU mask cannot bind CPUs (e.g. %d, %d) that are "
+                        "in different processor groups\n", search_cpu,
+                        last_cpu);
+               print_mask(cpumask);
+               return 1;
+       }
+
+       /* Extract the current processor group mask from the cpumask */
+       row = cpus_offset / FIO_CPU_MASK_STRIDE;
+       bit_offset = cpus_offset % FIO_CPU_MASK_STRIDE;
+       group_cpumask = cpumask->row[row] >> bit_offset;
+       end = bit_offset + group_size;
+       if (end > FIO_CPU_MASK_STRIDE && (row + 1 < FIO_CPU_MASK_ROWS)) {
+               /* Some of the next row needs to be part of the mask */
+               int needed, needed_shift, needed_mask_shift;
+               uint64_t needed_mask;
+
+               needed = end - FIO_CPU_MASK_STRIDE;
+               needed_shift = FIO_CPU_MASK_STRIDE - bit_offset;
+               needed_mask_shift = FIO_CPU_MASK_STRIDE - needed;
+               needed_mask = (uint64_t)-1 >> needed_mask_shift;
+               dprint(FD_PROCESS, "bit_offset=%d end=%d needed=%d needed_shift=%d needed_mask=%ld needed_mask_shift=%d\n", bit_offset, end, needed, needed_shift, needed_mask, needed_mask_shift);
+               group_cpumask |= (cpumask->row[row + 1] & needed_mask) << needed_shift;
+       }
+       group_cpumask &= (uint64_t)-1 >> (FIO_CPU_MASK_STRIDE - group_size);
+
+       /* Return group and mask */
+       dprint(FD_PROCESS, "Returning group=%d group_mask=%lu\n", group, group_cpumask);
+       *processor_group = group;
+       *affinity_mask = group_cpumask;
+
+       return 0;
+}
+
+static inline int fio_setaffinity(int pid, os_cpu_mask_t cpumask)
+{
+       HANDLE handle = NULL;
+       int group, ret;
+       uint64_t group_mask = 0;
+       GROUP_AFFINITY new_group_affinity;
+
+       ret = -1;
+
+       if (mask_to_group_mask(&cpumask, &group, &group_mask) != 0)
+               goto err;
+
+       handle = OpenThread(THREAD_QUERY_INFORMATION | THREAD_SET_INFORMATION,
+                           TRUE, pid);
+       if (handle == NULL) {
+               log_err("fio_setaffinity: failed to get handle for pid %d\n", pid);
+               goto err;
+       }
+
+       /* Set group and mask.
+        * Note: if the GROUP_AFFINITY struct's Reserved members are not
+        * initialised to 0 then SetThreadGroupAffinity will fail with
+        * GetLastError() set to ERROR_INVALID_PARAMETER */
+       new_group_affinity.Mask = (KAFFINITY) group_mask;
+       new_group_affinity.Group = group;
+       new_group_affinity.Reserved[0] = 0;
+       new_group_affinity.Reserved[1] = 0;
+       new_group_affinity.Reserved[2] = 0;
+       if (SetThreadGroupAffinity(handle, &new_group_affinity, NULL) != 0)
+               ret = 0;
+       else {
+               log_err("fio_setaffinity: failed to set thread affinity "
+                        "(pid %d, group %d, mask %" PRIx64 ", "
+                        "GetLastError=%d)\n", pid, group, group_mask,
+                        GetLastError());
+               goto err;
+       }
+
+err:
+       if (handle)
+               CloseHandle(handle);
+       return ret;
+}
+
+static inline void cpu_to_row_offset(int cpu, int *row, int *offset)
+{
+       *row = cpu / FIO_CPU_MASK_STRIDE;
+       *offset = cpu << FIO_CPU_MASK_STRIDE * *row;
+}
+
+static inline int fio_cpuset_init(os_cpu_mask_t *mask)
+{
+       for (int i = 0; i < FIO_CPU_MASK_ROWS; i++)
+               mask->row[i] = 0;
+       return 0;
+}
+
+/*
+ * fio_getaffinity() should not be called once a fio_setaffinity() call has
+ * been made because fio_setaffinity() may put the process into multiple
+ * processor groups
+ */
+static inline int fio_getaffinity(int pid, os_cpu_mask_t *mask)
+{
+       int ret;
+       int row, offset, end, group, group_size, group_start_cpu;
+       DWORD_PTR process_mask, system_mask;
+       HANDLE handle;
+       PUSHORT current_groups;
+       USHORT group_count;
+       WORD online_groups;
+
+       ret = -1;
+       current_groups = NULL;
+       handle = OpenProcess(PROCESS_QUERY_INFORMATION, TRUE, pid);
+       if (handle == NULL) {
+               log_err("fio_getaffinity: failed to get handle for pid %d\n",
+                       pid);
+               goto err;
+       }
+
+       group_count = 1;
+       /*
+        * GetProcessGroupAffinity() seems to expect more than the natural
+        * alignment for a USHORT from the area pointed to by current_groups so
+        * arrange for maximum alignment by allocating via malloc()
+        */
+       current_groups = malloc(sizeof(USHORT));
+       if (!current_groups) {
+               log_err("fio_getaffinity: malloc failed\n");
+               goto err;
+       }
+       if (GetProcessGroupAffinity(handle, &group_count, current_groups) == 0) {
+               /* NB: we also fail here if we are a multi-group process */
+               log_err("fio_getaffinity: failed to get single group affinity for pid %d\n", pid);
+               goto err;
+       }
+       GetProcessAffinityMask(handle, &process_mask, &system_mask);
+
+       /* Convert group and group relative mask to full CPU mask */
+       online_groups = GetActiveProcessorGroupCount();
+       if (online_groups == 0) {
+               log_err("fio_getaffinity: error retrieving total processor groups\n");
+               goto err;
+       }
+
+       group = 0;
+       group_start_cpu = 0;
+       group_size = 0;
+       dprint(FD_PROCESS, "current_groups=%d group_count=%d\n",
+              current_groups[0], group_count);
+       while (true) {
+               group_size = GetMaximumProcessorCount(group);
+               if (group_size == 0) {
+                       log_err("fio_getaffinity: error retrieving size of "
+                               "processor group %d\n", group);
+                       goto err;
+               } else if (group >= current_groups[0] || group >= online_groups)
+                       break;
+               else {
+                       group_start_cpu += group_size;
+                       group++;
+               }
+       }
+
+       if (group != current_groups[0]) {
+               log_err("fio_getaffinity: could not find processor group %d\n",
+                       current_groups[0]);
+               goto err;
+       }
+
+       dprint(FD_PROCESS, "group_start_cpu=%d, group size=%u\n",
+              group_start_cpu, group_size);
+       if ((group_start_cpu + group_size) >= FIO_MAX_CPUS) {
+               log_err("fio_getaffinity failed: current CPU affinity (group "
+                       "%d, group_start_cpu %d, group_size %d) extends "
+                       "beyond mask's highest CPU (%d)\n", group,
+                       group_start_cpu, group_size, FIO_MAX_CPUS);
+               goto err;
+       }
+
+       fio_cpuset_init(mask);
+       cpu_to_row_offset(group_start_cpu, &row, &offset);
+       mask->row[row] = process_mask;
+       mask->row[row] <<= offset;
+       end = offset + group_size;
+       if (end > FIO_CPU_MASK_STRIDE) {
+               int needed;
+               uint64_t needed_mask;
+
+               needed = FIO_CPU_MASK_STRIDE - end;
+               needed_mask = (uint64_t)-1 >> (FIO_CPU_MASK_STRIDE - needed);
+               row++;
+               mask->row[row] = process_mask;
+               mask->row[row] >>= needed;
+               mask->row[row] &= needed_mask;
+       }
+       ret = 0;
+
+err:
+       if (handle)
+               CloseHandle(handle);
+       if (current_groups)
+               free(current_groups);
+
+       return ret;
+}
+
+static inline void fio_cpu_clear(os_cpu_mask_t *mask, int cpu)
+{
+       int row, offset;
+       cpu_to_row_offset(cpu, &row, &offset);
+
+       mask->row[row] &= ~(1ULL << offset);
+}
+
+static inline void fio_cpu_set(os_cpu_mask_t *mask, int cpu)
+{
+       int row, offset;
+       cpu_to_row_offset(cpu, &row, &offset);
+
+       mask->row[row] |= 1ULL << offset;
+}
+
+static inline int fio_cpu_isset(os_cpu_mask_t *mask, int cpu)
+{
+       int row, offset;
+       cpu_to_row_offset(cpu, &row, &offset);
+
+       return (mask->row[row] & (1ULL << offset)) != 0;
+}
+
+static inline int fio_cpu_count(os_cpu_mask_t *mask)
+{
+       int count = 0;
+
+       for (int i = 0; i < FIO_CPU_MASK_ROWS; i++)
+               count += hweight64(mask->row[i]);
+
+       return count;
+}
+
+static inline int fio_cpuset_exit(os_cpu_mask_t *mask)
+{
+       return 0;
+}
index ddb752800191d77f679e53bfa641a56a3a214b6e..01f555e1b6fc3be5e99389365edeb65c4bc02008 100644 (file)
@@ -13,6 +13,7 @@
 #include <stdlib.h>
 
 #include "../smalloc.h"
+#include "../debug.h"
 #include "../file.h"
 #include "../log.h"
 #include "../lib/hweight.h"
@@ -191,6 +192,10 @@ static inline int fio_set_sched_idle(void)
        return (SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_IDLE))? 0 : -1;
 }
 
+#ifdef CONFIG_WINDOWS_XP
 #include "os-windows-xp.h"
+#else
+#include "os-windows-7.h"
+#endif
 
 #endif /* FIO_OS_WINDOWS_H */
index ecc8c40885fc622c43537869f85ea9ec007d2a78..d33250de0f85ab9f0c1ccb6b0df36feb1c805b17 100755 (executable)
@@ -959,6 +959,7 @@ in_addr_t inet_network(const char *cp)
        return hbo;
 }
 
+#ifdef CONFIG_WINDOWS_XP
 const char* inet_ntop(int af, const void *restrict src,
                char *restrict dst, socklen_t size)
 {
@@ -1039,3 +1040,4 @@ int inet_pton(int af, const char *restrict src, void *restrict dst)
 
        return ret;
 }
+#endif /* CONFIG_WINDOWS_XP */
index 30498c673a2b1f0f5ce1f666996d291b33371a03..056f1dd5822a91574dde256a6823accd5d28fc03 100644 (file)
@@ -12,8 +12,10 @@ typedef int in_addr_t;
 
 in_addr_t inet_network(const char *cp);
 
+#ifdef CONFIG_WINDOWS_XP
 const char *inet_ntop(int af, const void *restrict src,
         char *restrict dst, socklen_t size);
 int inet_pton(int af, const char *restrict src, void *restrict dst);
+#endif
 
 #endif /* ARPA_INET_H */
index 21e5699b3d685fc627319df4530cdcc1793a0501..25b8183fdfe45accb9d86b14c55e41afd27b8459 100644 (file)
@@ -1,8 +1,11 @@
 #ifndef POLL_H
 #define POLL_H
 
+#include <winsock2.h>
+
 typedef int nfds_t;
 
+#ifdef CONFIG_WINDOWS_XP
 struct pollfd
 {
        int fd;
@@ -10,11 +13,12 @@ struct pollfd
        short revents;
 };
 
-int poll(struct pollfd fds[], nfds_t nfds, int timeout);
-
 #define POLLOUT        1
 #define POLLIN 2
 #define POLLERR        0
 #define POLLHUP        1
+#endif /* CONFIG_WINDOWS_XP */
+
+int poll(struct pollfd fds[], nfds_t nfds, int timeout);
 
 #endif /* POLL_H */
index d3f69774165f8add2cab5d998989e31866ff173c..90c7057719d200871f1a325a6ad042f8a610b775 100644 (file)
--- a/server.c
+++ b/server.c
@@ -2145,14 +2145,14 @@ static int fio_init_server_ip(void)
 #endif
 
        if (use_ipv6) {
-               const void *src = &saddr_in6.sin6_addr;
+               void *src = &saddr_in6.sin6_addr;
 
                addr = (struct sockaddr *) &saddr_in6;
                socklen = sizeof(saddr_in6);
                saddr_in6.sin6_family = AF_INET6;
                str = inet_ntop(AF_INET6, src, buf, sizeof(buf));
        } else {
-               const void *src = &saddr_in.sin_addr;
+               void *src = &saddr_in.sin_addr;
 
                addr = (struct sockaddr *) &saddr_in;
                socklen = sizeof(saddr_in);
@@ -2220,7 +2220,7 @@ static int fio_init_server_connection(void)
 
        if (!bind_sock) {
                char *p, port[16];
-               const void *src;
+               void *src;
                int af;
 
                if (use_ipv6) {