/* This code was written to support the book, "ARM Assembly for Embedded Applications", by Daniel W. Lewis. Permission is granted to freely share this software provided that this notice is not removed. This software is intended to be used with a run-time library adapted by the author from the STM Cube Library for the 32F429IDISCOVERY board and available for download from http://www.engr.scu.edu/~dlewis/book3. */ #include #include #include #include #include "library.h" #include "graphics.h" typedef struct { float x32 ; // The least-significant 32 bits (returned in S0) float y32 ; // The most-significant 32 bits (returned in S1) } FLOAT2 ; #pragma GCC push_options #pragma GCC optimize ("O0") // ---------------------------------------------------------------------------- // Support routines for FLOAT2 arithmetic to be implemented in assembly // ---------------------------------------------------------------------------- FLOAT2 __attribute__((weak)) QuickTwoSum(float a32, float b32) { // assumes a32 > b32 float s32, e32 ; s32 = a32 + b32 ; e32 = b32 - (s32 - a32) ; // This is NOT always zero! return (FLOAT2) {s32, e32} ; } FLOAT2 __attribute__((weak)) TwoSum(float a32, float b32) { float s32, v32, e32 ; s32 = a32 + b32 ; v32 = s32 - a32 ; // This is NOT the same as b32! e32 = (a32 - (s32 - v32)) + (b32 - v32) ; // This is NOT always zero! return (FLOAT2) {s32, e32} ; } FLOAT2 __attribute__((weak)) Split(float a32) { float p32, ahi32, alo32 ; p32 = 4097.0f * a32 ; ahi32 = p32 - (p32 - a32) ; // This is NOT the same as a32! alo32 = a32 - ahi32 ; return (FLOAT2) {ahi32, alo32} ; } #pragma GCC pop_options typedef struct { int ytop ; int xlft ; double want ; double sum ; unsigned strt ; unsigned stop ; unsigned steps ; } RESULTS ; // Public fonts defined in run-time library typedef struct { const uint8_t * table ; const uint16_t Width ; const uint16_t Height ; } sFONT ; // Series approximations static void SeriesFP32(void) ; static void SeriesFLT2(void) ; static void SeriesFP64(void) ; // Basic arithmetic operations for double floats static FLOAT2 Add64(FLOAT2 a64, FLOAT2 b64) ; static FLOAT2 Sub64(FLOAT2 a64, FLOAT2 b64) ; static FLOAT2 Mul64(FLOAT2 a64, FLOAT2 b64) ; static FLOAT2 Div64(FLOAT2 a64, FLOAT2 b64) ; static int EQ64(FLOAT2 a64, FLOAT2 b64) ; // Additional support routine to implement Add64, Sub64, Mul64, & Div64 static FLOAT2 TwoProd(float a32, float b32) ; // Other functions static double Double(FLOAT2 a64) ; static void Results(char *alg, char *type, RESULTS *results) ; static void SetFontSize(sFONT *Font) ; static int PutStringAt(int row, int col, char *fmt, ...) ; // High-precision definitions of PI and e #pragma GCC push_options #pragma GCC optimize("no-single-precision-constant") static const double __attribute__((unused)) PI = 3.1415926535897932384626433 ; #pragma GCC pop_options extern sFONT Font8, Font12, Font16, Font20, Font24 ; #define LIMIT 500000 #define FONT_HEIGHT 12 #define FONT_WIDTH 7 #define RESULTS_FONT Font12 #define RESULTS_MARGIN 8 #define RESULTS_PADDING 4 #define RESULTS_WIDTH 220 #define RESULTS_HEIGHT (5*FONT_HEIGHT + 2*RESULTS_PADDING) #define ITER_XLFT ((XPIXELS - RESULTS_WIDTH)/2) #define ITER_YTOP 55 #define COLOR_BKGD COLOR_YELLOW #define COLOR_BRDR COLOR_RED #define COLOR_TEXT COLOR_BLACK #define FP32_YTOP (ITER_YTOP + FONT_HEIGHT + RESULTS_MARGIN) #define FP32_XLFT ITER_XLFT #define FLT2_YTOP (FP32_YTOP + RESULTS_HEIGHT + RESULTS_MARGIN) #define FLT2_XLFT ITER_XLFT #define FP64_YTOP (FLT2_YTOP + RESULTS_HEIGHT + RESULTS_MARGIN) #define FP64_XLFT ITER_XLFT int main() { char text[100] ; InitializeHardware(HEADER, "Lab 9D: Extending Precision") ; SetFontSize(&Font12) ; sprintf(text, "Iteration Limit: %u", LIMIT) ; DisplayStringAt(ITER_XLFT, ITER_YTOP, text) ; SeriesFP32() ; SeriesFLT2() ; SeriesFP64() ; return 0 ; } // ---------------------------------------------------------------------------- // Series approximations to PI // ---------------------------------------------------------------------------- static void SeriesFP32(void) { static RESULTS results = {FP32_YTOP, FP32_XLFT} ; float sum, top, num, err ; unsigned step ; sum = 3.0f ; top = 4.0f ; num = 3.0f ; err = 0.0f ; results.want = PI ; results.strt = GetClockCycleCount() ; for (step = 0; step < LIMIT; step++) { float btm = num*num*num - num ; // same as (n-1)*n*(n+1) float y = top / btm - err ; float tmp = sum + y ; if (tmp == sum) break ; err = (tmp - sum) - y ; // This is NOT always zero! sum = tmp ; top = -top ; num += 2.0f ; } results.stop = GetClockCycleCount() ; results.sum = (double) sum ; results.steps = step ; Results("Kahan", "float", &results) ; } static void SeriesFLT2(void) { static RESULTS results = {FLT2_YTOP, FLT2_XLFT} ; FLOAT2 sum, top, num ; unsigned step ; sum = (FLOAT2) { 3.0f, 0.0f} ; top = (FLOAT2) { 4.0f, 0.0f} ; num = (FLOAT2) { 3.0f, 0.0f} ; results.want = PI ; results.strt = GetClockCycleCount() ; for (step = 0; step < LIMIT; step++) { FLOAT2 btm = Sub64(Mul64(num, Mul64(num, num)), num) ; // same as (n-1)*n*(n+1) FLOAT2 tmp = Add64(sum, Div64(top, btm)) ; // This is NOT always zero! if (EQ64(tmp, sum)) break ; sum = tmp ; top.x32 = -top.x32 ; num.x32 += 2.0f ; } results.stop = GetClockCycleCount() ; results.sum = Double(sum) ; results.steps = step ; Results("Normal", "FLOAT2", &results) ; } static void SeriesFP64(void) { static RESULTS results = {FP64_YTOP, FP64_XLFT} ; double sum, top, num, err ; unsigned step ; sum = 3.0 ; top = 4.0 ; num = 3.0 ; err = 0.0 ; results.want = PI ; results.strt = GetClockCycleCount() ; for (step = 0; step < LIMIT; step++) { double btm = num*num*num - num ; // same as (n-1)*n*(n+1) double y = top / btm - err ; double tmp = sum + y ; if (tmp == sum) break ; err = (tmp - sum) - y ; // This is NOT always zero! sum = tmp ; top = -top ; num += 2.0f ; } results.stop = GetClockCycleCount() ; results.sum = sum ; results.steps = step ; Results("Kahan", "double", &results) ; } // ---------------------------------------------------------------------------- // Basic arithmetic operations for double floats // ---------------------------------------------------------------------------- static FLOAT2 Add64(FLOAT2 a64, FLOAT2 b64) { FLOAT2 s64, t64 ; s64 = TwoSum(a64.x32, b64.x32) ; t64 = TwoSum(a64.y32, b64.y32) ; s64.y32 += t64.x32 ; s64 = QuickTwoSum(s64.x32, s64.y32) ; s64.y32 += t64.y32 ; return QuickTwoSum(s64.x32, s64.y32) ; } static FLOAT2 Sub64(FLOAT2 a64, FLOAT2 b64) { b64.x32 = -b64.x32 ; b64.y32 = -b64.y32 ; return Add64(a64, b64) ; } static FLOAT2 Mul64(FLOAT2 a64, FLOAT2 b64) { FLOAT2 p64 ; p64 = TwoProd(a64.x32, b64.x32) ; p64.y32 += a64.x32 * b64.y32 ; p64.y32 += a64.y32 * b64.x32 ; return QuickTwoSum(p64.x32, p64.y32) ; } static FLOAT2 Div64(FLOAT2 a64, FLOAT2 b64) { FLOAT2 q64, e64 ; float i32, d32 ; i32 = 1.0f / b64.x32 ; q64 = (FLOAT2) {a64.x32 * i32, 0.0f} ; d32 = Sub64(a64, Mul64(b64, q64)).x32 ; e64 = TwoProd(i32, d32) ; return Add64(q64, e64) ; } static int EQ64(FLOAT2 a64, FLOAT2 b64) { return *((uint64_t *) &a64) == *((uint64_t *) &b64) ; } // ---------------------------------------------------------------------------- // Additional support routine to implement Add64, Sub64, Mul64, & Div64 // ---------------------------------------------------------------------------- static FLOAT2 TwoProd(float a32, float b32) { FLOAT2 a64, b64 ; float p32, e32 ; p32 = a32 * b32 ; a64 = Split(a32) ; b64 = Split(b32) ; e32 = ((a64.x32*b64.x32 - p32) + a64.x32*b64.y32 + a64.y32*b64.x32) + a64.y32*b64.y32 ; return (FLOAT2) {p32, e32} ; } // ---------------------------------------------------------------------------- // Other functions // ---------------------------------------------------------------------------- static double Double(FLOAT2 a64) { return (double) a64.x32 + (double) a64.y32 ; } static void Results(char *alg, char *type, RESULTS *results) { double err = fabs(results->sum - results->want) ; char sum[20], ref[20] ; int row, col, k ; SetForeground(COLOR_BKGD) ; FillRect(results->xlft, results->ytop, RESULTS_WIDTH, RESULTS_HEIGHT) ; SetForeground(COLOR_BRDR) ; DrawRect(results->xlft, results->ytop, RESULTS_WIDTH, RESULTS_HEIGHT) ; SetFontSize(&RESULTS_FONT) ; SetForeground(COLOR_TEXT) ; SetBackground(COLOR_BKGD) ; sprintf(sum, "%.16lf", results->sum) ; sprintf(ref, "%.16lf", results->want) ; for (k = 0; sum[k] != '\0'; k++) { if (sum[k] != ref[k]) break ; } row = results->ytop + RESULTS_PADDING ; col = results->xlft + RESULTS_PADDING ; row = PutStringAt(row, col, "%s summation using %s:", alg, type) ; PutStringAt(row, col, "sum = %s", sum) ; SetForeground(COLOR_BRDR) ; row = PutStringAt(row, col + (k + 6)*RESULTS_FONT.Width, sum + k) ; SetForeground(COLOR_TEXT) ; row = PutStringAt(row, col, "err = %.16lf", err) ; row = PutStringAt(row, col, "Iterations: %u (%s)", results->steps, (results->steps == LIMIT) ? "Limited" : "Converged") ; row = PutStringAt(row, col, "Clock cycles/iteration: %u", (results->stop - results->strt) / results->steps) ; } static int PutStringAt(int row, int col, char *fmt, ...) { char text[100] ; va_list ap ; va_start(ap, fmt) ; vsprintf(text, fmt, ap) ; va_end(ap) ; DisplayStringAt(col, row, text) ; return row + FONT_HEIGHT ; } static void SetFontSize(sFONT *Font) { extern void BSP_LCD_SetFont(sFONT *) ; BSP_LCD_SetFont(Font) ; }