[app][tests] add a instruction multi issue test, clean up formatting
This commit is contained in:
@@ -33,69 +33,58 @@
|
||||
#include <kernel/event.h>
|
||||
#include <platform.h>
|
||||
|
||||
#if ARCH_ARM
|
||||
void bench_set_overhead(void)
|
||||
const size_t BUFSIZE = (1024*1024);
|
||||
const uint ITER = 1024;
|
||||
|
||||
__NO_INLINE static void bench_set_overhead(void)
|
||||
{
|
||||
const uint BUFSIZE = 4096;
|
||||
const uint ITER = 4096;
|
||||
uint32_t *buf = malloc(BUFSIZE);
|
||||
printf("buf %p\n", buf);
|
||||
uint32_t *buf = malloc(BUFSIZE);
|
||||
|
||||
uint count = arch_cycle_count();
|
||||
for (uint i = 0; i < ITER; i++) {
|
||||
// for (uint j = 0; j < BUFSIZE / sizeof(*buf); j++) {
|
||||
__asm__ volatile(
|
||||
"nop"
|
||||
);
|
||||
// }
|
||||
}
|
||||
count = arch_cycle_count() - count;
|
||||
uint count = arch_cycle_count();
|
||||
for (uint i = 0; i < ITER; i++) {
|
||||
__asm__ volatile("");
|
||||
}
|
||||
count = arch_cycle_count() - count;
|
||||
|
||||
printf("took %u cycles overhead to loop %u times\n",
|
||||
count, ITER);
|
||||
printf("took %u cycles overhead to loop %u times\n",
|
||||
count, ITER);
|
||||
|
||||
free(buf);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
void bench_memset(void)
|
||||
__NO_INLINE static void bench_memset(void)
|
||||
{
|
||||
const uint BUFSIZE = 4096;
|
||||
const uint ITER = 4096;
|
||||
void *buf = malloc(BUFSIZE);
|
||||
printf("buf %p\n", buf);
|
||||
void *buf = malloc(BUFSIZE);
|
||||
|
||||
uint count = arch_cycle_count();
|
||||
for (uint i = 0; i < ITER; i++) {
|
||||
memset(buf, 0, BUFSIZE);
|
||||
}
|
||||
count = arch_cycle_count() - count;
|
||||
uint count = arch_cycle_count();
|
||||
for (uint i = 0; i < ITER; i++) {
|
||||
memset(buf, 0, BUFSIZE);
|
||||
}
|
||||
count = arch_cycle_count() - count;
|
||||
|
||||
printf("took %u cycles to memset a buffer of size %u %d times (%u bytes)\n",
|
||||
count, BUFSIZE, ITER, BUFSIZE * ITER);
|
||||
printf("took %u cycles to memset a buffer of size %u %d times (%u bytes), %f bytes/cycle\n",
|
||||
count, BUFSIZE, ITER, BUFSIZE * ITER, (BUFSIZE * ITER) / (float)count);
|
||||
|
||||
free(buf);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
#define bench_cset(type) \
|
||||
void bench_cset_##type(void) \
|
||||
__NO_INLINE static void bench_cset_##type(void) \
|
||||
{ \
|
||||
const uint BUFSIZE = 4096; \
|
||||
const uint ITER = 4096; \
|
||||
type *buf = malloc(BUFSIZE); \
|
||||
printf("buf %p\n", buf); \
|
||||
type *buf = malloc(BUFSIZE); \
|
||||
\
|
||||
uint count = arch_cycle_count(); \
|
||||
for (uint i = 0; i < ITER; i++) { \
|
||||
for (uint j = 0; j < BUFSIZE / sizeof(*buf); j++) { \
|
||||
buf[j] = 0; \
|
||||
} \
|
||||
} \
|
||||
count = arch_cycle_count() - count; \
|
||||
uint count = arch_cycle_count(); \
|
||||
for (uint i = 0; i < ITER; i++) { \
|
||||
for (uint j = 0; j < BUFSIZE / sizeof(*buf); j++) { \
|
||||
buf[j] = 0; \
|
||||
} \
|
||||
} \
|
||||
count = arch_cycle_count() - count; \
|
||||
\
|
||||
printf("took %u cycles to manually clear a buffer using wordsize %d of size %u %d times (%u bytes)\n", \
|
||||
count, sizeof(*buf), BUFSIZE, ITER, BUFSIZE * ITER); \
|
||||
printf("took %u cycles to manually clear a buffer using wordsize %d of size %u %d times (%u bytes), %f bytes/cycle\n", \
|
||||
count, sizeof(*buf), BUFSIZE, ITER, BUFSIZE * ITER, (BUFSIZE * ITER) / (float)count); \
|
||||
\
|
||||
free(buf); \
|
||||
free(buf); \
|
||||
}
|
||||
|
||||
bench_cset(uint8_t)
|
||||
@@ -103,78 +92,94 @@ bench_cset(uint16_t)
|
||||
bench_cset(uint32_t)
|
||||
bench_cset(uint64_t)
|
||||
|
||||
void bench_cset_wide(void)
|
||||
__NO_INLINE static void bench_cset_wide(void)
|
||||
{
|
||||
const uint BUFSIZE = 4096;
|
||||
const uint ITER = 4096;
|
||||
uint32_t *buf = malloc(BUFSIZE);
|
||||
printf("buf %p\n", buf);
|
||||
uint32_t *buf = malloc(BUFSIZE);
|
||||
|
||||
uint count = arch_cycle_count();
|
||||
for (uint i = 0; i < ITER; i++) {
|
||||
for (uint j = 0; j < BUFSIZE / sizeof(*buf) / 8; j++) {
|
||||
buf[j*8] = 0;
|
||||
buf[j*8+1] = 0;
|
||||
buf[j*8+2] = 0;
|
||||
buf[j*8+3] = 0;
|
||||
buf[j*8+4] = 0;
|
||||
buf[j*8+5] = 0;
|
||||
buf[j*8+6] = 0;
|
||||
buf[j*8+7] = 0;
|
||||
}
|
||||
}
|
||||
count = arch_cycle_count() - count;
|
||||
uint count = arch_cycle_count();
|
||||
for (uint i = 0; i < ITER; i++) {
|
||||
for (uint j = 0; j < BUFSIZE / sizeof(*buf) / 8; j++) {
|
||||
buf[j*8] = 0;
|
||||
buf[j*8+1] = 0;
|
||||
buf[j*8+2] = 0;
|
||||
buf[j*8+3] = 0;
|
||||
buf[j*8+4] = 0;
|
||||
buf[j*8+5] = 0;
|
||||
buf[j*8+6] = 0;
|
||||
buf[j*8+7] = 0;
|
||||
}
|
||||
}
|
||||
count = arch_cycle_count() - count;
|
||||
|
||||
printf("took %u cycles to manually clear a buffer of size %u %d times 8 words at a time (%u bytes)\n",
|
||||
count, BUFSIZE, ITER, BUFSIZE * ITER);
|
||||
printf("took %u cycles to manually clear a buffer of size %u %d times 8 words at a time (%u bytes), %f bytes/cycle\n",
|
||||
count, BUFSIZE, ITER, BUFSIZE * ITER, (BUFSIZE * ITER) / (float)count);
|
||||
|
||||
free(buf);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
void bench_cset_stm(void)
|
||||
__NO_INLINE static void bench_memcpy(void)
|
||||
{
|
||||
const uint BUFSIZE = 4096;
|
||||
const uint ITER = 4096;
|
||||
uint32_t *buf = malloc(BUFSIZE);
|
||||
printf("buf %p\n", buf);
|
||||
uint8_t *buf = malloc(BUFSIZE);
|
||||
|
||||
uint count = arch_cycle_count();
|
||||
for (uint i = 0; i < ITER; i++) {
|
||||
for (uint j = 0; j < BUFSIZE / sizeof(*buf) / 8; j++) {
|
||||
__asm__ volatile(
|
||||
"stm %0, {r0-r7};"
|
||||
:: "r" (&buf[j*8])
|
||||
);
|
||||
}
|
||||
}
|
||||
count = arch_cycle_count() - count;
|
||||
uint count = arch_cycle_count();
|
||||
for (uint i = 0; i < ITER; i++) {
|
||||
memcpy(buf, buf + BUFSIZE / 2, BUFSIZE / 2);
|
||||
}
|
||||
count = arch_cycle_count() - count;
|
||||
|
||||
printf("took %u cycles to manually clear a buffer of size %u %d times 8 words at a time using stm (%u bytes)\n",
|
||||
count, BUFSIZE, ITER, BUFSIZE * ITER);
|
||||
printf("took %u cycles to memcpy a buffer of size %u %d times (%u source bytes), %f source bytes/cycle\n",
|
||||
count, BUFSIZE / 2, ITER, BUFSIZE / 2 * ITER, (BUFSIZE / 2 * ITER) / (float)count);
|
||||
|
||||
free(buf);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
|
||||
void bench_memcpy(void)
|
||||
#if ARCH_ARM
|
||||
__NO_INLINE static void arm_bench_cset_stm(void)
|
||||
{
|
||||
const uint BUFSIZE = 4096;
|
||||
const uint ITER = 4096;
|
||||
uint8_t *buf = malloc(BUFSIZE);
|
||||
printf("buf %p\n", buf);
|
||||
uint32_t *buf = malloc(BUFSIZE);
|
||||
|
||||
uint count = arch_cycle_count();
|
||||
for (uint i = 0; i < ITER; i++) {
|
||||
memcpy(buf, buf + BUFSIZE / 2, BUFSIZE / 2);
|
||||
}
|
||||
count = arch_cycle_count() - count;
|
||||
uint count = arch_cycle_count();
|
||||
for (uint i = 0; i < ITER; i++) {
|
||||
for (uint j = 0; j < BUFSIZE / sizeof(*buf) / 8; j++) {
|
||||
__asm__ volatile(
|
||||
"stm %0, {r0-r7};"
|
||||
:: "r" (&buf[j*8])
|
||||
);
|
||||
}
|
||||
}
|
||||
count = arch_cycle_count() - count;
|
||||
|
||||
printf("took %u cycles to memcpy a buffer of size %u %d times (%u bytes)\n",
|
||||
count, BUFSIZE / 2, ITER, BUFSIZE * ITER);
|
||||
printf("took %u cycles to manually clear a buffer of size %u %d times 8 words at a time using stm (%u bytes), %f bytes/cycle\n",
|
||||
count, BUFSIZE, ITER, BUFSIZE * ITER, (BUFSIZE * ITER) / (float)count);
|
||||
|
||||
free(buf);
|
||||
free(buf);
|
||||
}
|
||||
#endif
|
||||
|
||||
__NO_INLINE static void arm_bench_multi_issue(void)
|
||||
{
|
||||
uint32_t cycles;
|
||||
uint32_t a = 0, b = 0, c = 0, d = 0, e = 0, f = 0, g = 0, h = 0;
|
||||
|
||||
#define ITER 1000000
|
||||
uint count = ITER;
|
||||
cycles = arch_cycle_count();
|
||||
while (count--) {
|
||||
asm volatile ("");
|
||||
asm volatile ("add %0, %0, %0" : "=r" (a) : "r" (a));
|
||||
asm volatile ("add %0, %0, %0" : "=r" (b) : "r" (b));
|
||||
asm volatile ("and %0, %0, %0" : "=r" (c) : "r" (c));
|
||||
asm volatile ("mov %0, %0" : "=r" (d) : "r" (d));
|
||||
asm volatile ("orr %0, %0, %0" : "=r" (e) : "r" (e));
|
||||
asm volatile ("add %0, %0, %0" : "=r" (f) : "r" (f));
|
||||
asm volatile ("and %0, %0, %0" : "=r" (g) : "r" (g));
|
||||
asm volatile ("mov %0, %0" : "=r" (h) : "r" (h));
|
||||
}
|
||||
cycles = arch_cycle_count() - cycles;
|
||||
|
||||
printf("took %u cycles to issue 8 integer ops (%f cycles/iteration)\n", cycles, (float)cycles / ITER);
|
||||
#undef ITER
|
||||
}
|
||||
#endif // ARCH_ARM
|
||||
|
||||
#if WITH_LIB_LIBM
|
||||
#include <math.h>
|
||||
@@ -215,20 +220,24 @@ __NO_INLINE static void bench_sincos(void)
|
||||
printf("took %u cycles for sqrtf()\n", count);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif // WITH_LIB_LIBM
|
||||
|
||||
void benchmarks(void)
|
||||
{
|
||||
bench_set_overhead();
|
||||
bench_memset();
|
||||
bench_memcpy();
|
||||
|
||||
bench_cset_uint8_t();
|
||||
bench_cset_uint16_t();
|
||||
bench_cset_uint32_t();
|
||||
bench_cset_uint64_t();
|
||||
bench_cset_wide();
|
||||
|
||||
#if ARCH_ARM
|
||||
bench_set_overhead();
|
||||
bench_memset();
|
||||
bench_cset_uint8_t();
|
||||
bench_cset_uint16_t();
|
||||
bench_cset_uint32_t();
|
||||
bench_cset_uint64_t();
|
||||
bench_cset_wide();
|
||||
bench_cset_stm();
|
||||
bench_memcpy();
|
||||
arm_bench_cset_stm();
|
||||
|
||||
arm_bench_multi_issue();
|
||||
#endif
|
||||
#if WITH_LIB_LIBM
|
||||
bench_sincos();
|
||||
|
||||
@@ -5,17 +5,19 @@ MODULE := $(LOCAL_DIR)
|
||||
GLOBAL_INCLUDES += $(LOCAL_DIR)/include
|
||||
|
||||
MODULE_SRCS += \
|
||||
$(LOCAL_DIR)/tests.c \
|
||||
$(LOCAL_DIR)/thread_tests.c \
|
||||
$(LOCAL_DIR)/printf_tests.c \
|
||||
$(LOCAL_DIR)/clock_tests.c \
|
||||
$(LOCAL_DIR)/cache_tests.c \
|
||||
$(LOCAL_DIR)/benchmarks.c \
|
||||
$(LOCAL_DIR)/float.c \
|
||||
$(LOCAL_DIR)/float_instructions.S \
|
||||
$(LOCAL_DIR)/float_test_vec.c \
|
||||
$(LOCAL_DIR)/fibo.c \
|
||||
$(LOCAL_DIR)/mem_tests.c \
|
||||
$(LOCAL_DIR)/benchmarks.c \
|
||||
$(LOCAL_DIR)/cache_tests.c \
|
||||
$(LOCAL_DIR)/clock_tests.c \
|
||||
$(LOCAL_DIR)/fibo.c \
|
||||
$(LOCAL_DIR)/float.c \
|
||||
$(LOCAL_DIR)/float_instructions.S \
|
||||
$(LOCAL_DIR)/float_test_vec.c \
|
||||
$(LOCAL_DIR)/mem_tests.c \
|
||||
$(LOCAL_DIR)/printf_tests.c \
|
||||
$(LOCAL_DIR)/tests.c \
|
||||
$(LOCAL_DIR)/thread_tests.c \
|
||||
|
||||
MODULE_ARM_OVERRIDE_SRCS := \
|
||||
|
||||
MODULE_COMPILEFLAGS += -Wno-format
|
||||
|
||||
|
||||
Reference in New Issue
Block a user