Commit | Line | Data |
---|---|---|
0c6c0ebb EV |
1 | #!/bin/bash |
2 | ||
3 | args=$* | |
4 | first_cores="" | |
5 | taskset_cores="" | |
6 | first_cores_count=0 | |
c6c6df33 | 7 | nb_threads=1 |
55875a2c EV |
8 | drives="" |
9 | ||
10 | # Default options | |
11 | latency_cmdline="" | |
0c6c0ebb EV |
12 | |
13 | fatal() { | |
14 | echo "$@" | |
15 | exit 1 | |
16 | } | |
17 | ||
18 | hint() { | |
19 | echo "Warning: $*" | |
20 | } | |
21 | ||
22 | info() { | |
23 | item=$1 | |
24 | shift | |
25 | echo "${item}: $*" | |
26 | } | |
27 | ||
28 | check_root() { | |
29 | [[ ${EUID} -eq 0 ]] || fatal "You should be root to run this tool" | |
30 | } | |
31 | ||
32 | check_binary() { | |
33 | # Ensure the binaries are present and executable | |
34 | for bin in "$@"; do | |
35 | if [ ! -x ${bin} ]; then | |
36 | which ${bin} >/dev/null | |
37 | [ $? -eq 0 ] || fatal "${bin} doesn't exists or is not executable" | |
38 | fi | |
39 | done | |
40 | } | |
41 | ||
0c6c0ebb | 42 | detect_first_core() { |
c6c6df33 B |
43 | cpu_to_search="0" |
44 | if [ "${#drives[@]}" -eq 1 ]; then | |
45 | device_name=$(block_dev_name ${drives[0]}) | |
46 | device_dir="/sys/block/${device_name}/device/" | |
47 | pci_addr=$(cat ${device_dir}/address) | |
48 | pci_dir="/sys/bus/pci/devices/${pci_addr}/" | |
49 | cpu_to_search=$(cat ${pci_dir}/local_cpulist | cut -d"," -f 1 | cut -d"-" -f 1) | |
50 | else | |
51 | hint 'Passed multiple devices. Running on the first core.' | |
52 | fi | |
53 | core_to_run=$(lscpu --all -pSOCKET,CORE,CPU | grep ",$cpu_to_search\$" | cut -d"," -f1-2) | |
54 | ||
0c6c0ebb EV |
55 | # Detect which logical cpus belongs to the first physical core |
56 | # If Hyperthreading is enabled, two cores are returned | |
c6c6df33 | 57 | cpus=$(lscpu --all -pSOCKET,CORE,CPU | grep "$core_to_run") |
0c6c0ebb EV |
58 | for cpu in ${cpus}; do |
59 | IFS=',' | |
60 | # shellcheck disable=SC2206 | |
61 | array=(${cpu}) | |
62 | if [ ${first_cores_count} -eq 0 ]; then | |
63 | first_cores="${array[2]}" | |
64 | else | |
65 | first_cores="${first_cores} ${array[2]}" | |
66 | fi | |
67 | ||
68 | first_cores_count=$((first_cores_count + 1)) | |
69 | unset IFS | |
70 | done | |
71 | [ ${first_cores_count} -eq 0 ] && fatal "Cannot detect first core" | |
72 | taskset_cores=$(echo "${first_cores}" | tr ' ' ',') | |
73 | } | |
74 | ||
55875a2c EV |
75 | usage() { |
76 | echo "usage: [options] block_device [other_block_devices] | |
77 | ||
78 | -h : print help | |
79 | -l : enable latency reporting | |
80 | ||
81 | example: | |
82 | t/one-core-peak.sh /dev/nvme0n1 | |
83 | t/one-core-peak.sh -l /dev/nvme0n1 /dev/nvme1n1 | |
84 | " | |
85 | exit 0 | |
86 | } | |
87 | ||
0c6c0ebb | 88 | check_args() { |
0551c571 | 89 | local OPTIND option |
55875a2c EV |
90 | while getopts "hl" option; do |
91 | case "${option}" in | |
92 | h) # Show help | |
93 | usage | |
94 | ;; | |
95 | l) # Report latency | |
96 | latency_cmdline="1" | |
97 | ;; | |
98 | *) | |
99 | fatal "Unsupported ${option} option" | |
100 | ;; | |
101 | esac | |
102 | done | |
103 | shift $((OPTIND-1)) | |
104 | [ $# -eq 0 ] && fatal "Missing drive(s) as argument" | |
0551c571 | 105 | drives="$*" |
0c6c0ebb EV |
106 | } |
107 | ||
108 | check_drive_exists() { | |
109 | # Ensure the block device exists | |
110 | [ -b $1 ] || fatal "$1 is not a valid block device" | |
111 | } | |
112 | ||
113 | is_nvme() { | |
114 | [[ ${*} == *"nvme"* ]] | |
115 | } | |
116 | ||
117 | check_poll_queue() { | |
118 | # Print a warning if the nvme poll queues aren't enabled | |
55875a2c | 119 | is_nvme ${drives} || return |
0c6c0ebb EV |
120 | poll_queue=$(cat /sys/module/nvme/parameters/poll_queues) |
121 | [ ${poll_queue} -eq 0 ] && hint "For better performance, you should enable nvme poll queues by setting nvme.poll_queues=32 on the kernel commande line" | |
122 | } | |
123 | ||
124 | block_dev_name() { | |
125 | echo ${1#"/dev/"} | |
126 | } | |
127 | ||
128 | get_sys_block_dir() { | |
129 | # Returns the /sys/block/ directory of a given block device | |
130 | device_name=$1 | |
131 | sys_block_dir="/sys/block/${device_name}" | |
132 | [ -d "${sys_block_dir}" ] || fatal "Cannot find ${sys_block_dir} directory" | |
133 | echo ${sys_block_dir} | |
134 | } | |
135 | ||
136 | check_io_scheduler() { | |
137 | # Ensure io_sched is set to none | |
138 | device_name=$(block_dev_name $1) | |
139 | sys_block_dir=$(get_sys_block_dir ${device_name}) | |
140 | sched_file="${sys_block_dir}/queue/scheduler" | |
141 | [ -f "${sched_file}" ] || fatal "Cannot find IO scheduler for ${device_name}" | |
142 | grep -q '\[none\]' ${sched_file} | |
143 | if [ $? -ne 0 ]; then | |
144 | info "${device_name}" "set none as io scheduler" | |
145 | echo "none" > ${sched_file} | |
146 | fi | |
147 | ||
148 | } | |
149 | ||
150 | check_sysblock_value() { | |
151 | device_name=$(block_dev_name $1) | |
152 | sys_block_dir=$(get_sys_block_dir ${device_name}) | |
153 | target_file="${sys_block_dir}/$2" | |
154 | value=$3 | |
e96a5c43 | 155 | [ -f "${target_file}" ] || return |
38820f4a | 156 | content=$(cat ${target_file} 2>/dev/null) |
0c6c0ebb | 157 | if [ "${content}" != "${value}" ]; then |
38820f4a | 158 | echo ${value} > ${target_file} 2>/dev/null && info "${device_name}" "${target_file} set to ${value}." || hint "${device_name}: Cannot set ${value} on ${target_file}" |
0c6c0ebb EV |
159 | fi |
160 | } | |
161 | ||
162 | compute_nb_threads() { | |
163 | # Increase the number of threads if there is more devices or cores than the default value | |
164 | [ $# -gt ${nb_threads} ] && nb_threads=$# | |
165 | [ ${first_cores_count} -gt ${nb_threads} ] && nb_threads=${first_cores_count} | |
166 | } | |
167 | ||
168 | check_scaling_governor() { | |
169 | driver=$(LC_ALL=C cpupower frequency-info |grep "driver:" |awk '{print $2}') | |
170 | if [ -z "${driver}" ]; then | |
171 | hint "Cannot detect processor scaling driver" | |
172 | return | |
173 | fi | |
174 | cpupower frequency-set -g performance >/dev/null 2>&1 || fatal "Cannot set scaling processor governor" | |
175 | } | |
176 | ||
177 | check_idle_governor() { | |
178 | filename="/sys/devices/system/cpu/cpuidle/current_governor" | |
179 | if [ ! -f "${filename}" ]; then | |
180 | hint "Cannot detect cpu idle governor" | |
181 | return | |
182 | fi | |
183 | echo "menu" > ${filename} 2>/dev/null || fatal "Cannot set cpu idle governor to menu" | |
184 | } | |
185 | ||
186 | show_nvme() { | |
b1297bd2 | 187 | device="$1" |
0c6c0ebb EV |
188 | device_name=$(block_dev_name $1) |
189 | device_dir="/sys/block/${device_name}/device/" | |
190 | pci_addr=$(cat ${device_dir}/address) | |
191 | pci_dir="/sys/bus/pci/devices/${pci_addr}/" | |
192 | link_speed=$(cat ${pci_dir}/current_link_speed) | |
193 | irq=$(cat ${pci_dir}/irq) | |
f5dbea71 | 194 | numa=$([ -f ${pci_dir}/numa_node ] && cat ${pci_dir}/numa_node || echo "off") |
0c6c0ebb EV |
195 | cpus=$(cat ${pci_dir}/local_cpulist) |
196 | model=$(cat ${device_dir}/model | xargs) #xargs for trimming spaces | |
197 | fw=$(cat ${device_dir}/firmware_rev | xargs) #xargs for trimming spaces | |
198 | serial=$(cat ${device_dir}/serial | xargs) #xargs for trimming spaces | |
199 | info ${device_name} "MODEL=${model} FW=${fw} serial=${serial} PCI=${pci_addr}@${link_speed} IRQ=${irq} NUMA=${numa} CPUS=${cpus} " | |
22bb358e EV |
200 | which nvme &> /dev/null |
201 | if [ $? -eq 0 ]; then | |
1122c303 EV |
202 | status="" |
203 | NCQA=$(nvme get-feature -H -f 0x7 ${device} 2>&1 |grep NCQA |cut -d ':' -f 2 | xargs) | |
204 | [ -n "${NCQA}" ] && status="${status}Completion Queues:${NCQA}, " | |
205 | NSQA=$(nvme get-feature -H -f 0x7 ${device} 2>&1 |grep NSQA |cut -d ':' -f 2 | xargs) | |
206 | [ -n "${NSQA}" ] && status="${status}Submission Queues:${NSQA}, " | |
207 | power_state=$(nvme get-feature -H -f 0x2 ${device} 2>&1 | grep PS |cut -d ":" -f 2 | xargs) | |
208 | [ -n "${power_state}" ] && status="${status}PowerState:${power_state}, " | |
209 | apste=$(nvme get-feature -H -f 0xc ${device} 2>&1 | grep APSTE |cut -d ":" -f 2 | xargs) | |
210 | [ -n "${apste}" ] && status="${status} Autonomous Power State Transition:${apste}, " | |
211 | temp=$(nvme smart-log ${device} 2>&1 |grep 'temperature' |cut -d ':' -f 2 |xargs) | |
212 | [ -n "${temp}" ] && status="${status}Temp:${temp}" | |
213 | info ${device_name} "${status}" | |
22bb358e | 214 | fi |
0c6c0ebb EV |
215 | } |
216 | ||
217 | show_device() { | |
218 | device_name=$(block_dev_name $1) | |
219 | is_nvme $1 && show_nvme $1 | |
220 | } | |
221 | ||
f27e8c63 EV |
222 | show_kernel_config_item() { |
223 | config_item="CONFIG_$1" | |
224 | config_file="/boot/config-$(uname -r)" | |
225 | if [ ! -f "${config_file}" ]; then | |
226 | config_file='/proc/config.gz' | |
227 | if [ ! -f "${config_file}" ]; then | |
228 | return | |
229 | fi | |
230 | fi | |
231 | status=$(zgrep ${config_item}= ${config_file}) | |
232 | if [ -z "${status}" ]; then | |
233 | echo "${config_item}=N" | |
234 | else | |
235 | echo "${config_item}=$(echo ${status} | cut -d '=' -f 2)" | |
236 | fi | |
237 | } | |
238 | ||
0c6c0ebb | 239 | show_system() { |
55875a2c EV |
240 | CPU_MODEL=$(grep -m1 "model name" /proc/cpuinfo | awk '{print substr($0, index($0,$4))}') |
241 | MEMORY_SPEED=$(dmidecode -t 17 -q | grep -m 1 "Configured Memory Speed: [0-9]" | awk '{print substr($0, index($0,$4))}') | |
242 | KERNEL=$(uname -r) | |
243 | info "system" "CPU: ${CPU_MODEL}" | |
244 | info "system" "MEMORY: ${MEMORY_SPEED}" | |
245 | info "system" "KERNEL: ${KERNEL}" | |
8aa95a02 | 246 | for config_item in BLK_CGROUP BLK_WBT_MQ HZ RETPOLINE PAGE_TABLE_ISOLATION; do |
f27e8c63 EV |
247 | info "system" "KERNEL: $(show_kernel_config_item ${config_item})" |
248 | done | |
e45c5ea3 | 249 | info "system" "KERNEL: $(cat /proc/cmdline)" |
5d56ec6a | 250 | info "system" "SElinux: $(getenforce)" |
0551c571 | 251 | tsc=$(journalctl -k | grep 'tsc: Refined TSC clocksource calibration:' | awk '{print $11}') |
55875a2c EV |
252 | if [ -n "${tsc}" ]; then |
253 | info "system" "TSC: ${tsc} Mhz" | |
254 | tsc=$(echo ${tsc} | tr -d '.') | |
255 | [ -n "${latency_cmdline}" ] && latency_cmdline="-t1 -T${tsc}000" | |
256 | fi | |
0c6c0ebb EV |
257 | } |
258 | ||
259 | ### MAIN | |
55875a2c | 260 | check_args ${args} |
0c6c0ebb | 261 | check_root |
22bb358e | 262 | check_binary t/io_uring lscpu grep taskset cpupower awk tr xargs dmidecode |
0c6c0ebb EV |
263 | detect_first_core |
264 | ||
265 | info "##################################################" | |
266 | show_system | |
55875a2c | 267 | for drive in ${drives}; do |
0c6c0ebb EV |
268 | check_drive_exists ${drive} |
269 | check_io_scheduler ${drive} | |
270 | check_sysblock_value ${drive} "queue/iostats" 0 # Ensure iostats are disabled | |
271 | check_sysblock_value ${drive} "queue/nomerges" 2 # Ensure merge are disabled | |
272 | check_sysblock_value ${drive} "queue/io_poll" 1 # Ensure io_poll is enabled | |
e96a5c43 | 273 | check_sysblock_value ${drive} "queue/wbt_lat_usec" 0 # Disabling wbt lat |
0c6c0ebb EV |
274 | show_device ${drive} |
275 | done | |
276 | ||
277 | check_poll_queue | |
55875a2c | 278 | compute_nb_threads ${drives} |
0c6c0ebb EV |
279 | check_scaling_governor |
280 | check_idle_governor | |
281 | ||
282 | info "##################################################" | |
283 | echo | |
284 | ||
55875a2c | 285 | cmdline="taskset -c ${taskset_cores} t/io_uring -b512 -d128 -c32 -s32 -p1 -F1 -B1 -n${nb_threads} ${latency_cmdline} ${drives}" |
0c6c0ebb EV |
286 | info "io_uring" "Running ${cmdline}" |
287 | ${cmdline} |