[app][benchmarks] remove all use of floating point to compute speed of bench
Can approximate the calculation with 64bit integer math.
This commit is contained in:
@@ -5,21 +5,32 @@
|
|||||||
* license that can be found in the LICENSE file or at
|
* license that can be found in the LICENSE file or at
|
||||||
* https://opensource.org/licenses/MIT
|
* https://opensource.org/licenses/MIT
|
||||||
*/
|
*/
|
||||||
#include <sys/types.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <rand.h>
|
|
||||||
#include <lk/err.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <app/tests.h>
|
#include <app/tests.h>
|
||||||
#include <kernel/thread.h>
|
#include <inttypes.h>
|
||||||
|
#include <kernel/event.h>
|
||||||
#include <kernel/mutex.h>
|
#include <kernel/mutex.h>
|
||||||
#include <kernel/semaphore.h>
|
#include <kernel/semaphore.h>
|
||||||
#include <kernel/event.h>
|
#include <kernel/thread.h>
|
||||||
|
#include <lk/err.h>
|
||||||
#include <platform.h>
|
#include <platform.h>
|
||||||
|
#include <rand.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
const size_t BUFSIZE = (1024*1024);
|
// quickly guess how big of a buffer we can try to allocate
|
||||||
const uint ITER = 1024;
|
#if !defined(MEMSIZE) || MEMSIZE > (1024 * 1024)
|
||||||
|
static const size_t BUFSIZE = (size_t)1024 * 1024;
|
||||||
|
static const uint ITER = 1024;
|
||||||
|
#else
|
||||||
|
static const size_t BUFSIZE = (4 * 1024);
|
||||||
|
static const uint ITER = 1024 * 32;
|
||||||
|
#endif
|
||||||
|
// Have to use a define to work around gcc 7.x bug where it thinks
|
||||||
|
// BUFSIZE is not constant.
|
||||||
|
#define TOTAL_SIZE ((uint64_t)BUFSIZE * ITER)
|
||||||
|
|
||||||
__NO_INLINE static void bench_set_overhead(void) {
|
__NO_INLINE static void bench_set_overhead(void) {
|
||||||
uint32_t *buf = malloc(BUFSIZE);
|
uint32_t *buf = malloc(BUFSIZE);
|
||||||
@@ -51,44 +62,50 @@ __NO_INLINE static void bench_memset(void) {
|
|||||||
memset(buf, 0, BUFSIZE);
|
memset(buf, 0, BUFSIZE);
|
||||||
}
|
}
|
||||||
count = arch_cycle_count() - count;
|
count = arch_cycle_count() - count;
|
||||||
|
if (count == 0) {
|
||||||
|
count = 1;
|
||||||
|
}
|
||||||
|
|
||||||
size_t total_bytes = BUFSIZE * ITER;
|
uint64_t bytes_cycle = (TOTAL_SIZE * 1000) / count;
|
||||||
double bytes_cycle = total_bytes / (double)count;
|
printf("took %lu cycles to memset a buffer of size %zu %u times"
|
||||||
printf_float("took %lu cycles to memset a buffer of size %zu %d times (%zu bytes), %f bytes/cycle\n",
|
"(%" PRIu64 " bytes), %" PRIu64 ".%03" PRIu64 " bytes/cycle\n",
|
||||||
count, BUFSIZE, ITER, total_bytes, bytes_cycle);
|
count, BUFSIZE, ITER, TOTAL_SIZE, bytes_cycle / 1000, bytes_cycle % 1000);
|
||||||
|
|
||||||
free(buf);
|
free(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define bench_cset(type) \
|
#define bench_cset(type) \
|
||||||
__NO_INLINE static void bench_cset_##type(void) \
|
__NO_INLINE static void bench_cset_##type(void) { \
|
||||||
{ \
|
type *buf = malloc(BUFSIZE); \
|
||||||
type *buf = malloc(BUFSIZE); \
|
if (!buf) { \
|
||||||
if (!buf) { \
|
printf("failed to allocate buffer\n"); \
|
||||||
printf("failed to allocate buffer\n"); \
|
return; \
|
||||||
return; \
|
} \
|
||||||
} \
|
\
|
||||||
\
|
ulong count = arch_cycle_count(); \
|
||||||
ulong count = arch_cycle_count(); \
|
for (uint i = 0; i < ITER; i++) { \
|
||||||
for (uint i = 0; i < ITER; i++) { \
|
for (uint j = 0; j < BUFSIZE / sizeof(*buf); j++) { \
|
||||||
for (uint j = 0; j < BUFSIZE / sizeof(*buf); j++) { \
|
buf[j] = 0; \
|
||||||
buf[j] = 0; \
|
} \
|
||||||
} \
|
} \
|
||||||
} \
|
count = arch_cycle_count() - count; \
|
||||||
count = arch_cycle_count() - count; \
|
if (count == 0) { \
|
||||||
\
|
count = 1; \
|
||||||
size_t total_bytes = BUFSIZE * ITER; \
|
} \
|
||||||
double bytes_cycle = total_bytes / (double)count; \
|
\
|
||||||
printf_float("took %lu cycles to manually clear a buffer using wordsize %zu of size %zu %u times (%zu bytes), %f bytes/cycle\n", \
|
uint64_t bytes_cycle = (TOTAL_SIZE * 1000) / count; \
|
||||||
count, sizeof(*buf), BUFSIZE, ITER, total_bytes, bytes_cycle); \
|
printf("took %lu cycles to manually clear a buffer using wordsize %zu of size %zu %u times " \
|
||||||
\
|
"(%" PRIu64 " bytes), %" PRIu64 ".%03" PRIu64 " bytes/cycle\n", \
|
||||||
free(buf); \
|
count, sizeof(*buf), BUFSIZE, ITER, TOTAL_SIZE, bytes_cycle / 1000, bytes_cycle % 1000); \
|
||||||
}
|
free(buf); \
|
||||||
|
}
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
bench_cset(uint8_t)
|
bench_cset(uint8_t)
|
||||||
bench_cset(uint16_t)
|
bench_cset(uint16_t)
|
||||||
bench_cset(uint32_t)
|
bench_cset(uint32_t)
|
||||||
bench_cset(uint64_t)
|
bench_cset(uint64_t)
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
__NO_INLINE static void bench_cset_wide(void) {
|
__NO_INLINE static void bench_cset_wide(void) {
|
||||||
uint32_t *buf = malloc(BUFSIZE);
|
uint32_t *buf = malloc(BUFSIZE);
|
||||||
@@ -99,23 +116,26 @@ __NO_INLINE static void bench_cset_wide(void) {
|
|||||||
|
|
||||||
ulong count = arch_cycle_count();
|
ulong count = arch_cycle_count();
|
||||||
for (uint i = 0; i < ITER; i++) {
|
for (uint i = 0; i < ITER; i++) {
|
||||||
for (uint j = 0; j < BUFSIZE / sizeof(*buf) / 8; j++) {
|
for (size_t j = 0; j < BUFSIZE / sizeof(*buf) / 8; j++) {
|
||||||
buf[j*8] = 0;
|
buf[j * 8] = 0;
|
||||||
buf[j*8+1] = 0;
|
buf[j * 8 + 1] = 0;
|
||||||
buf[j*8+2] = 0;
|
buf[j * 8 + 2] = 0;
|
||||||
buf[j*8+3] = 0;
|
buf[j * 8 + 3] = 0;
|
||||||
buf[j*8+4] = 0;
|
buf[j * 8 + 4] = 0;
|
||||||
buf[j*8+5] = 0;
|
buf[j * 8 + 5] = 0;
|
||||||
buf[j*8+6] = 0;
|
buf[j * 8 + 6] = 0;
|
||||||
buf[j*8+7] = 0;
|
buf[j * 8 + 7] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
count = arch_cycle_count() - count;
|
count = arch_cycle_count() - count;
|
||||||
|
if (count == 0) {
|
||||||
|
count = 1;
|
||||||
|
}
|
||||||
|
|
||||||
size_t total_bytes = BUFSIZE * ITER;
|
uint64_t bytes_cycle = (TOTAL_SIZE * 1000) / count;
|
||||||
double bytes_cycle = total_bytes / (double)count;
|
printf("took %lu cycles to manually clear a buffer of size %zu %d times 8 words at a time "
|
||||||
printf_float("took %lu cycles to manually clear a buffer of size %zu %d times 8 words at a time (%zu bytes), %f bytes/cycle\n",
|
"(%" PRIu64 " bytes), %" PRIu64 ".%03" PRIu64 " bytes/cycle\n",
|
||||||
count, BUFSIZE, ITER, total_bytes, bytes_cycle);
|
count, BUFSIZE, ITER, TOTAL_SIZE, bytes_cycle / 1000, bytes_cycle % 1000);
|
||||||
|
|
||||||
free(buf);
|
free(buf);
|
||||||
}
|
}
|
||||||
@@ -132,11 +152,15 @@ __NO_INLINE static void bench_memcpy(void) {
|
|||||||
memcpy(buf, buf + BUFSIZE / 2, BUFSIZE / 2);
|
memcpy(buf, buf + BUFSIZE / 2, BUFSIZE / 2);
|
||||||
}
|
}
|
||||||
count = arch_cycle_count() - count;
|
count = arch_cycle_count() - count;
|
||||||
|
if (count == 0) {
|
||||||
|
count = 1;
|
||||||
|
}
|
||||||
|
|
||||||
size_t total_bytes = (BUFSIZE / 2) * ITER;
|
uint64_t total_bytes = TOTAL_SIZE / 2;
|
||||||
double bytes_cycle = total_bytes / (double)count;
|
uint64_t bytes_cycle = (total_bytes * 1000) / count;
|
||||||
printf_float("took %lu cycles to memcpy a buffer of size %zu %d times (%zu source bytes), %f source bytes/cycle\n",
|
printf("took %lu cycles to memcpy a buffer of size %zu %d times (%" PRIu64 " source bytes), "
|
||||||
count, BUFSIZE / 2, ITER, total_bytes, bytes_cycle);
|
"%" PRIu64 ".%03" PRIu64 " source bytes/cycle\n",
|
||||||
|
count, BUFSIZE / 2, ITER, total_bytes, bytes_cycle / 1000, bytes_cycle % 1000);
|
||||||
|
|
||||||
free(buf);
|
free(buf);
|
||||||
}
|
}
|
||||||
@@ -153,22 +177,23 @@ __NO_INLINE static void arm_bench_cset_stm(void) {
|
|||||||
for (uint i = 0; i < ITER; i++) {
|
for (uint i = 0; i < ITER; i++) {
|
||||||
for (uint j = 0; j < BUFSIZE / sizeof(*buf) / 8; j++) {
|
for (uint j = 0; j < BUFSIZE / sizeof(*buf) / 8; j++) {
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
"stm %0, {r0-r7};"
|
"stm %0, {r0-r7};" ::"r"(&buf[j * 8]));
|
||||||
:: "r" (&buf[j*8])
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
count = arch_cycle_count() - count;
|
count = arch_cycle_count() - count;
|
||||||
|
if (count == 0) {
|
||||||
|
count = 1;
|
||||||
|
}
|
||||||
|
|
||||||
size_t total_bytes = BUFSIZE * ITER;
|
uint64_t bytes_cycle = (TOTAL_SIZE * 1000) / count;
|
||||||
double bytes_cycle = total_bytes / (float)count;
|
printf("took %lu cycles to manually clear a buffer of size %zu %d times 8 words at a time using stm "
|
||||||
printf_float("took %lu cycles to manually clear a buffer of size %zu %d times 8 words at a time using stm (%zu bytes), %f bytes/cycle\n",
|
"(%" PRIu64 " bytes), %" PRIu64 ".%03" PRIu64 " bytes/cycle\n",
|
||||||
count, BUFSIZE, ITER, total_bytes, bytes_cycle);
|
count, BUFSIZE, ITER, TOTAL_SIZE, bytes_cycle / 1000, bytes_cycle % 1000);
|
||||||
|
|
||||||
free(buf);
|
free(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if (__CORTEX_M >= 0x03)
|
#if (__CORTEX_M >= 0x03)
|
||||||
__NO_INLINE static void arm_bench_multi_issue(void) {
|
__NO_INLINE static void arm_bench_multi_issue(void) {
|
||||||
ulong cycles;
|
ulong cycles;
|
||||||
uint32_t a = 0, b = 0, c = 0, d = 0, e = 0, f = 0, g = 0, h = 0;
|
uint32_t a = 0, b = 0, c = 0, d = 0, e = 0, f = 0, g = 0, h = 0;
|
||||||
@@ -176,20 +201,21 @@ __NO_INLINE static void arm_bench_multi_issue(void) {
|
|||||||
uint count = ITER;
|
uint count = ITER;
|
||||||
cycles = arch_cycle_count();
|
cycles = arch_cycle_count();
|
||||||
while (count--) {
|
while (count--) {
|
||||||
asm volatile ("");
|
asm volatile("");
|
||||||
asm volatile ("add %0, %0, %0" : "=r" (a) : "r" (a));
|
asm volatile("add %0, %0, %0" : "=r"(a) : "r"(a));
|
||||||
asm volatile ("add %0, %0, %0" : "=r" (b) : "r" (b));
|
asm volatile("add %0, %0, %0" : "=r"(b) : "r"(b));
|
||||||
asm volatile ("and %0, %0, %0" : "=r" (c) : "r" (c));
|
asm volatile("and %0, %0, %0" : "=r"(c) : "r"(c));
|
||||||
asm volatile ("mov %0, %0" : "=r" (d) : "r" (d));
|
asm volatile("mov %0, %0" : "=r"(d) : "r"(d));
|
||||||
asm volatile ("orr %0, %0, %0" : "=r" (e) : "r" (e));
|
asm volatile("orr %0, %0, %0" : "=r"(e) : "r"(e));
|
||||||
asm volatile ("add %0, %0, %0" : "=r" (f) : "r" (f));
|
asm volatile("add %0, %0, %0" : "=r"(f) : "r"(f));
|
||||||
asm volatile ("and %0, %0, %0" : "=r" (g) : "r" (g));
|
asm volatile("and %0, %0, %0" : "=r"(g) : "r"(g));
|
||||||
asm volatile ("mov %0, %0" : "=r" (h) : "r" (h));
|
asm volatile("mov %0, %0" : "=r"(h) : "r"(h));
|
||||||
}
|
}
|
||||||
cycles = arch_cycle_count() - cycles;
|
cycles = arch_cycle_count() - cycles;
|
||||||
|
|
||||||
double cycles_iter = (float)cycles / ITER;
|
ulong cycles_per_iter = (cycles * 1000) / ITER;
|
||||||
printf_float("took %lu cycles to issue 8 integer ops (%f cycles/iteration)\n", cycles, cycles_iter);
|
printf("took %lu cycles to issue 8 integer ops (%lu.%03lu cycles/iteration)\n", cycles,
|
||||||
|
cycles_per_iter / 1000, cycles_per_iter % 1000);
|
||||||
#undef ITER
|
#undef ITER
|
||||||
}
|
}
|
||||||
#endif // __CORTEX_M
|
#endif // __CORTEX_M
|
||||||
@@ -251,7 +277,7 @@ int benchmarks(int argc, const console_cmd_args *argv) {
|
|||||||
#if ARCH_ARM
|
#if ARCH_ARM
|
||||||
arm_bench_cset_stm();
|
arm_bench_cset_stm();
|
||||||
|
|
||||||
#if (__CORTEX_M >= 0x03)
|
#if (__CORTEX_M >= 0x03)
|
||||||
arm_bench_multi_issue();
|
arm_bench_multi_issue();
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
@@ -261,4 +287,3 @@ int benchmarks(int argc, const console_cmd_args *argv) {
|
|||||||
|
|
||||||
return NO_ERROR;
|
return NO_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -8,11 +8,11 @@ MODULE_SRCS := \
|
|||||||
$(LOCAL_DIR)/clock_tests.c \
|
$(LOCAL_DIR)/clock_tests.c \
|
||||||
$(LOCAL_DIR)/fibo.c \
|
$(LOCAL_DIR)/fibo.c \
|
||||||
$(LOCAL_DIR)/mem_tests.c \
|
$(LOCAL_DIR)/mem_tests.c \
|
||||||
|
$(LOCAL_DIR)/port_tests.c \
|
||||||
$(LOCAL_DIR)/tests.c \
|
$(LOCAL_DIR)/tests.c \
|
||||||
$(LOCAL_DIR)/thread_tests.c \
|
$(LOCAL_DIR)/thread_tests.c \
|
||||||
$(LOCAL_DIR)/port_tests.c \
|
|
||||||
$(LOCAL_DIR)/v9p_tests.c \
|
|
||||||
$(LOCAL_DIR)/v9fs_tests.c \
|
$(LOCAL_DIR)/v9fs_tests.c \
|
||||||
|
$(LOCAL_DIR)/v9p_tests.c \
|
||||||
|
|
||||||
MODULE_FLOAT_SRCS := \
|
MODULE_FLOAT_SRCS := \
|
||||||
$(LOCAL_DIR)/benchmarks.c \
|
$(LOCAL_DIR)/benchmarks.c \
|
||||||
|
|||||||
Reference in New Issue
Block a user