[fpu] Implement two versions of all of the printf routines: with and without fpu support

The default printf and family will now not implement FPU support, a
second copy of the routines will be generated with the _float suffix.

ie, printf() has no %f support, but printf_float() does.

This is to avoid the default printf from emitting any floating point
instructions when used within core kernel code which has been an off and
on problem for years, especially on architectures that are eager to use
fpu/vector instructions for regular non-fpu code.

If FPU is not implemented on the arch, the *_float routines will alias
to the integer only one.

Perhaps a much more proper solution is to invert this and require every
caller of printf that cannot tolerate fpu codegen (which in mainline is
most of it) use a _nofloat implementation, but this would touch
pratically all printfs in mainline.

This solution acknowledges that for the most part most of the code in
mainline is in-kernel support code, and doesn't need floating point,
except for perhaps some app/* code, which already can opt in.

This solution also can potentially bloat the size of the binary by
having two complete implementations, though I think in practice the
architectures where the extra few KB of code will matter generally dont
have FPU support, or aren't using it. In the latter case the
link-time-gc should remove unused _float routines.
This commit is contained in:
Travis Geiselbrecht
2025-10-09 05:11:17 +00:00
parent 5016118509
commit 9c67917dd7
10 changed files with 740 additions and 674 deletions

View File

@@ -21,10 +21,9 @@ STATIC_COMMAND_END(accelerometer);
void read_xyz(void) {
position_vector_t pos_vector;
acc_read_xyz(&pos_vector);
printf("X value = %f\n",pos_vector.x);
printf("Y value = %f\n",pos_vector.y);
printf("Z value = %f\n",pos_vector.z);
printf_float("X value = %f\n",pos_vector.x);
printf_float("Y value = %f\n",pos_vector.y);
printf_float("Z value = %f\n",pos_vector.z);
}
APP_START(accelerometer)

View File

@@ -54,7 +54,7 @@ __NO_INLINE static void bench_memset(void) {
size_t total_bytes = BUFSIZE * ITER;
double bytes_cycle = total_bytes / (double)count;
printf("took %lu cycles to memset a buffer of size %zu %d times (%zu bytes), %f bytes/cycle\n",
printf_float("took %lu cycles to memset a buffer of size %zu %d times (%zu bytes), %f bytes/cycle\n",
count, BUFSIZE, ITER, total_bytes, bytes_cycle);
free(buf);
@@ -79,7 +79,7 @@ __NO_INLINE static void bench_cset_##type(void) \
\
size_t total_bytes = BUFSIZE * ITER; \
double bytes_cycle = total_bytes / (double)count; \
printf("took %lu cycles to manually clear a buffer using wordsize %zu of size %zu %u times (%zu bytes), %f bytes/cycle\n", \
printf_float("took %lu cycles to manually clear a buffer using wordsize %zu of size %zu %u times (%zu bytes), %f bytes/cycle\n", \
count, sizeof(*buf), BUFSIZE, ITER, total_bytes, bytes_cycle); \
\
free(buf); \
@@ -114,7 +114,7 @@ __NO_INLINE static void bench_cset_wide(void) {
size_t total_bytes = BUFSIZE * ITER;
double bytes_cycle = total_bytes / (double)count;
printf("took %lu cycles to manually clear a buffer of size %zu %d times 8 words at a time (%zu bytes), %f bytes/cycle\n",
printf_float("took %lu cycles to manually clear a buffer of size %zu %d times 8 words at a time (%zu bytes), %f bytes/cycle\n",
count, BUFSIZE, ITER, total_bytes, bytes_cycle);
free(buf);
@@ -135,7 +135,7 @@ __NO_INLINE static void bench_memcpy(void) {
size_t total_bytes = (BUFSIZE / 2) * ITER;
double bytes_cycle = total_bytes / (double)count;
printf("took %lu cycles to memcpy a buffer of size %zu %d times (%zu source bytes), %f source bytes/cycle\n",
printf_float("took %lu cycles to memcpy a buffer of size %zu %d times (%zu source bytes), %f source bytes/cycle\n",
count, BUFSIZE / 2, ITER, total_bytes, bytes_cycle);
free(buf);
@@ -162,7 +162,7 @@ __NO_INLINE static void arm_bench_cset_stm(void) {
size_t total_bytes = BUFSIZE * ITER;
double bytes_cycle = total_bytes / (float)count;
printf("took %lu cycles to manually clear a buffer of size %zu %d times 8 words at a time using stm (%zu bytes), %f bytes/cycle\n",
printf_float("took %lu cycles to manually clear a buffer of size %zu %d times 8 words at a time using stm (%zu bytes), %f bytes/cycle\n",
count, BUFSIZE, ITER, total_bytes, bytes_cycle);
free(buf);
@@ -189,7 +189,7 @@ __NO_INLINE static void arm_bench_multi_issue(void) {
cycles = arch_cycle_count() - cycles;
double cycles_iter = (float)cycles / ITER;
printf("took %lu cycles to issue 8 integer ops (%f cycles/iteration)\n", cycles, cycles_iter);
printf_float("took %lu cycles to issue 8 integer ops (%f cycles/iteration)\n", cycles, cycles_iter);
#undef ITER
}
#endif // __CORTEX_M

View File

@@ -114,7 +114,7 @@ static void float_test(void) {
float result = val[i];
uint32_t result_u32;
memcpy(&result_u32, &result, sizeof(result_u32));
printf("float thread %u returns %d, hex val %a, uint32 %#" PRIx32, i, res, (double)result, result_u32);
printf_float("float thread %u returns %d, hex val %a, uint32 %#" PRIx32, i, res, (double)result, result_u32);
if (result_u32 != test_results_32[i]) {
printf("\nfloat thread %u failed, expected %#" PRIx32 "\n", i, test_results_32[i]);
} else {
@@ -124,7 +124,7 @@ static void float_test(void) {
double result = val[i];
uint64_t result_u64;
memcpy(&result_u64, &result, sizeof(result_u64));
printf("float thread %u returns %d, hex val %a, uint64 %#" PRIx64, i, res, result, result_u64);
printf_float("float thread %u returns %d, hex val %a, uint64 %#" PRIx64, i, res, result, result_u64);
if (result_u64 != test_results_64[i]) {
printf("\nfloat thread %u failed, expected %#" PRIx64 "\n", i, test_results_64[i]);
} else {