dubious timing optimizations; try to sleep between frames; skip at constant speed

This commit is contained in:
Andrei Alexeyev 2019-01-09 05:25:10 +02:00
parent 64878d5c67
commit 69079f245a
No known key found for this signature in database
GPG key ID: 363707CD4C7FE8A4
16 changed files with 217 additions and 124 deletions

View file

@ -173,22 +173,12 @@ Timing
less accurate.
**TAISEI_FRAMELIMITER_SLEEP**
| Default: ``0``
| Default: ``3``
If over ``0``, tries to sleep this many milliseconds after every frame
if it was processed quickly enough. This reduces CPU usage by having the
game spend less time in a busy loop, but may hurt framerate stability if
set too high, especially if the high resolution timer is disabled or
not available.
**TAISEI_FRAMELIMITER_SLEEP_EXACT**
| Default: ``1``
If ``1``, the framerate limiter will either try to sleep the exact
amount of time set in ``TAISEI_FRAMELIMITER_SLEEP``, or none at all.
Mitigates the aforementioned framerate stability issues by effectively
making ``TAISEI_FRAMELIMITER_SLEEP`` do nothing if the value is too high
for your system.
If over ``0``, tries to give up processing time to other applications
while waiting for the next frame, if at least ``frame_time / this_value``
amount of time is remaining. Increasesing this value reduces CPU usage,
but may harm performance. Set to ``0`` for the v1.2 default behaviour.
**TAISEI_FRAMELIMITER_COMPENSATE**
| Default: ``1``

View file

@ -169,6 +169,8 @@ if not (have_vla and have_complex)
endif
config.set('TAISEI_BUILDCONF_HAVE_TIMESPEC', have_timespec)
config.set('TAISEI_BUILDCONF_HAVE_INT128', cc.sizeof('__int128') == 16)
config.set('TAISEI_BUILDCONF_HAVE_LONG_DOUBLE', cc.sizeof('long double') > 8)
macos_app_bundle = get_option('macos_bundle') and host_machine.system() == 'darwin'

View file

@ -80,7 +80,6 @@
CONFIGDEF_INT (VID_WIDTH, "vid_width", RESX) \
CONFIGDEF_INT (VID_HEIGHT, "vid_height", RESY) \
CONFIGDEF_INT (VID_RESIZABLE, "vid_resizable", 0) \
CONFIGDEF_INT (VID_LATE_SWAP, "vid_late_swap", 0) \
CONFIGDEF_INT (VID_FRAMESKIP, "vid_frameskip", 1) \
CONFIGDEF_INT (VSYNC, "vsync", 0) \
CONFIGDEF_INT (MIXER_CHUNKSIZE, "mixer_chunksize", 1024) \
@ -100,6 +99,7 @@
CONFIGDEF_INT (FXAA, "fxaa", 1) \
CONFIGDEF_INT (POSTPROCESS, "postprocess", 2) \
CONFIGDEF_INT (HEALTHBAR_STYLE, "healthbar_style", 1) \
CONFIGDEF_INT (SKIP_SPEED, "skip_speed", 10) \
KEYDEFS \
CONFIGDEF_INT (GAMEPAD_ENABLED, "gamepad_enabled", 0) \
CONFIGDEF_STRING (GAMEPAD_DEVICE, "gamepad_device", "default") \

View file

@ -286,7 +286,7 @@ void events_emit(TaiseiEvent type, int32_t code, void *data1, void *data2) {
void events_pause_keyrepeat(void) {
// workaround for SDL bug
// https://bugzilla.libsdl.org/show_bug.cgi?id=3287
keyrepeat_paused_until = time_get() + 0.25;
keyrepeat_paused_until = time_get() + HRTIME_RESOLUTION / 4;
}
/*

View file

@ -13,14 +13,15 @@
#include "video.h"
void fpscounter_reset(FPSCounter *fps) {
hrtime_t frametime = 1.0 / FPS;
hrtime_t frametime = HRTIME_RESOLUTION / FPS;
const int log_size = sizeof(fps->frametimes)/sizeof(hrtime_t);
for(int i = 0; i < log_size; ++i) {
fps->frametimes[i] = frametime;
}
fps->fps = 1.0 / frametime;
fps->fps = HRTIME_RESOLUTION / (long double)frametime;
fps->frametime = frametime;
fps->last_update_time = time_get();
}
@ -31,13 +32,14 @@ void fpscounter_update(FPSCounter *fps) {
memmove(fps->frametimes, fps->frametimes + 1, (log_size - 1) * sizeof(hrtime_t));
fps->frametimes[log_size - 1] = frametime;
hrtime_t avg = 0.0;
hrtime_t avg = 0;
for(int i = 0; i < log_size; ++i) {
avg += fps->frametimes[i];
}
fps->fps = 1.0 / (avg / log_size);
fps->fps = HRTIME_RESOLUTION / (avg / (long double)log_size);
fps->frametime = avg / log_size;
fps->last_update_time = time_get();
}
@ -60,20 +62,17 @@ void loop_at_fps(LogicFrameFunc logic_frame, RenderFrameFunc render_frame, void
hrtime_t frame_start_time = time_get();
hrtime_t next_frame_time = frame_start_time;
hrtime_t target_frame_time = ((hrtime_t)1.0) / fps;
hrtime_t target_frame_time = HRTIME_RESOLUTION / fps;
FrameAction rframe_action = RFRAME_SWAP;
FrameAction lframe_action = LFRAME_WAIT;
int32_t delay = env_get("TAISEI_FRAMELIMITER_SLEEP", 0);
bool exact_delay = env_get("TAISEI_FRAMELIMITER_SLEEP_EXACT", 1);
int32_t sleep = env_get("TAISEI_FRAMELIMITER_SLEEP", 3);
bool compensate = env_get("TAISEI_FRAMELIMITER_COMPENSATE", 1);
bool uncapped_rendering_env = env_get("TAISEI_FRAMELIMITER_LOGIC_ONLY", 0);
bool late_swap = config_get_int(CONFIG_VID_LATE_SWAP);
if(global.is_replay_verification) {
uncapped_rendering_env = false;
delay = 0;
}
uint32_t frame_num = 0;
@ -82,29 +81,12 @@ void loop_at_fps(LogicFrameFunc logic_frame, RenderFrameFunc render_frame, void
static uint8_t recursion_detector;
++recursion_detector;
#ifdef SPAM_FPS
hrtime_t frametimes[4096];
int frametimes_idx = 0;
#endif
while(true) {
bool uncapped_rendering = uncapped_rendering_env;
frame_start_time = time_get();
begin_frame:
#ifdef DEBUG
if(gamekeypressed(KEY_FPSLIMIT_OFF)) {
uncapped_rendering = false;
} else {
uncapped_rendering = uncapped_rendering_env;
}
#endif
if(late_swap && rframe_action == RFRAME_SWAP) {
video_swap_buffers();
}
global.fps.busy.last_update_time = time_get();
++frame_num;
@ -132,7 +114,7 @@ begin_frame:
hrtime_t frametime = target_frame_time;
if(lframe_action == LFRAME_SKIP) {
frametime *= 0.1;
frametime /= imax(1, config_get_int(CONFIG_SKIP_SPEED));
}
next_frame_time += frametime;
@ -141,7 +123,7 @@ begin_frame:
if(total > target_frame_time) {
next_frame_time = frame_start_time;
log_debug("Executing logic took too long (%f), giving up", (double)total);
log_debug("Executing logic took too long (%"PRIuTIME"), giving up", total);
}
}
@ -154,8 +136,12 @@ begin_frame:
);
}
} else {
lframe_action = logic_frame(arg);
fpscounter_update(&global.fps.logic);
uint cnt = 0;
do {
lframe_action = logic_frame(arg);
fpscounter_update(&global.fps.logic);
} while(lframe_action == LFRAME_SKIP && ++cnt < config_get_int(CONFIG_SKIP_SPEED));
}
if(taisei_quit_requested()) {
@ -168,35 +154,19 @@ begin_frame:
r_framebuffer_clear(NULL, CLEAR_ALL, RGBA(0, 0, 0, 1), 1);
rframe_action = render_frame(arg);
fpscounter_update(&global.fps.render);
#ifdef SPAM_FPS
frametimes[frametimes_idx++] = *global.fps.render.frametimes;
size_t s = sizeof(frametimes)/sizeof(*frametimes);
if(frametimes_idx == s) {
hrtime_t total = 0;
for(int i = 0; i < s; ++i) {
total += frametimes[i];
}
frametimes_idx = 0;
log_info("%zi frames in %.2fs = %.2f FPS", s, (double)total, (double)(1 / (total / s)));
}
#endif
}
if(lframe_action == LFRAME_STOP) {
break;
}
if(!late_swap && rframe_action == RFRAME_SWAP) {
if(rframe_action == RFRAME_SWAP) {
video_swap_buffers();
}
fpscounter_update(&global.fps.busy);
if(lframe_action == LFRAME_SKIP || uncapped_rendering) {
if(/*lframe_action == LFRAME_SKIP ||*/ uncapped_rendering) {
continue;
}
@ -207,40 +177,31 @@ begin_frame:
#endif
next_frame_time = frame_start_time + target_frame_time;
// next_frame_time = frame_start_time + 2 * target_frame_time - global.fps.logic.frametime;
if(compensate) {
hrtime_t rt = time_get();
hrtime_t diff = rt - next_frame_time;
if(diff >= 0) {
if(rt > next_frame_time) {
// frame took too long...
// try to compensate in the next frame to avoid slowdown
frame_start_time = rt - min(diff, target_frame_time);
frame_start_time = rt - imin(rt - next_frame_time, target_frame_time);
goto begin_frame;
}
}
if(delay > 0) {
int32_t realdelay = delay;
int32_t maxdelay = (int32_t)(1000 * (next_frame_time - time_get()));
if(realdelay > maxdelay) {
if(exact_delay) {
log_debug("Delay of %i ignored. Maximum is %i, TAISEI_FRAMELIMITER_SLEEP_EXACT is active", realdelay, maxdelay);
realdelay = 0;
} else {
log_debug("Delay reduced from %i to %i", realdelay, maxdelay);
realdelay = maxdelay;
}
}
if(realdelay > 0) {
SDL_Delay(realdelay);
if(sleep > 0) {
// CAUTION: All of these casts are important!
while((shrtime_t)next_frame_time - (shrtime_t)time_get() > (shrtime_t)target_frame_time / sleep) {
uint32_t nap_multiplier = 1;
uint32_t nap_divisor = 3;
hrtime_t nap_raw = imax(0, (shrtime_t)next_frame_time - (shrtime_t)time_get());
uint32_t nap_sdl = (nap_multiplier * nap_raw * 1000) / (HRTIME_RESOLUTION * nap_divisor);
nap_sdl = imax(nap_sdl, 1);
SDL_Delay(nap_sdl);
}
}
while(time_get() < next_frame_time) {
continue;
}
while(time_get() < next_frame_time);
}
}

View file

@ -14,6 +14,7 @@
typedef struct {
hrtime_t frametimes[120]; // size = number of frames to average
double fps; // average fps over the last X frames
hrtime_t frametime; // average frame time over the last X frames;
hrtime_t last_update_time; // internal; last time the average was recalculated
} FPSCounter;

View file

@ -16,7 +16,13 @@ static hrtime_t time_current;
static hrtime_t time_offset;
static uint64_t prev_hires_time;
static uint64_t prev_hires_freq;
static SDL_mutex *paranoia;
static uint64_t fast_path_mul;
static inline attr_must_inline void set_freq(uint64_t freq) {
prev_hires_freq = freq;
lldiv_t d = lldiv(HRTIME_RESOLUTION, freq);
fast_path_mul = d.quot * (d.rem == 0);
}
static void time_update(void) {
bool retry;
@ -28,18 +34,24 @@ static void time_update(void) {
uint64_t cntr = SDL_GetPerformanceCounter();
if(freq != prev_hires_freq) {
log_debug("High resolution timer frequency changed: was %"PRIu64", now %"PRIu64". Saved time offset: %.16Lf", prev_hires_freq, freq, time_offset);
log_debug("High resolution timer frequency changed: was %"PRIu64", now %"PRIu64". Saved time offset: %"PRIuTIME"", prev_hires_freq, freq, time_offset);
time_offset = time_current;
prev_hires_freq = freq;
set_freq(freq);
prev_hires_time = SDL_GetPerformanceCounter();
retry = true;
continue;
}
hrtime_t time_new = time_offset + (hrtime_t)(cntr - prev_hires_time) / freq;
hrtime_t time_new;
if(fast_path_mul) {
time_new = time_offset + (cntr - prev_hires_time) * fast_path_mul;
} else {
time_new = time_offset + umuldiv64(cntr - prev_hires_time, HRTIME_RESOLUTION, freq);
}
if(time_new < time_current) {
log_warn("BUG: time went backwards. Was %.16Lf, now %.16Lf. Possible cause: your OS sucks spherical objects. Attempting to correct this...", time_current, time_new);
log_warn("BUG: time went backwards. Was %"PRIuTIME", now %"PRIuTIME". Possible cause: your OS sucks spherical objects. Attempting to correct this...", time_current, time_new);
time_offset = time_current;
time_current = 0;
prev_hires_time = SDL_GetPerformanceCounter();
@ -54,15 +66,9 @@ void time_init(void) {
use_hires = env_get("TAISEI_HIRES_TIMER", 1);
if(use_hires) {
if(!(paranoia = SDL_CreateMutex())) {
log_warn("Not using the system high resolution timer: SDL_CreateMutex() failed: %s", SDL_GetError());
use_hires = false;
return;
}
log_info("Using the system high resolution timer");
prev_hires_time = SDL_GetPerformanceCounter();
prev_hires_freq = SDL_GetPerformanceFrequency();
set_freq(SDL_GetPerformanceFrequency());
} else {
log_info("Not using the system high resolution timer: disabled by environment");
return;
@ -70,20 +76,15 @@ void time_init(void) {
}
void time_shutdown(void) {
if(paranoia) {
SDL_DestroyMutex(paranoia);
paranoia = NULL;
}
}
hrtime_t time_get(void) {
if(use_hires) {
SDL_LockMutex(paranoia);
assert(is_main_thread());
time_update();
hrtime_t t = time_current;
SDL_UnlockMutex(paranoia);
return t;
return time_current;
}
return SDL_GetTicks() / 1000.0;
return SDL_GetTicks() * (HRTIME_RESOLUTION / 1000);
}

View file

@ -9,7 +9,14 @@
#pragma once
#include "taisei.h"
typedef long double hrtime_t;
typedef uint64_t hrtime_t;
typedef int64_t shrtime_t;
#define PRIuTIME PRIu64
#define PRIdTIME PRId64
#define HRTIME_C(value) UINT64_C(value)
// picoseconds. like super duper accurate, man
#define HRTIME_RESOLUTION HRTIME_C(1000000000000)
void time_init(void);
void time_shutdown(void);

View file

@ -81,8 +81,12 @@ void sdl_log(void *userdata, int category, SDL_LogPriority priority, const char
static void init_sdl(void) {
SDL_version v;
if(SDL_Init(SDL_INIT_EVENTS) < 0)
if(SDL_Init(SDL_INIT_EVENTS) < 0) {
log_fatal("SDL_Init() failed: %s", SDL_GetError());
}
// initialize it
is_main_thread();
/*
* TODO: refine this and make it optional

View file

@ -409,13 +409,6 @@ void options_sub_video(MenuData *parent, void *arg) {
bind_addvalue(b, "off");
bind_addvalue(b, "adaptive");
#if 0
add_menu_entry(m, "Swap buffers", do_nothing,
b = bind_option(CONFIG_VID_LATE_SWAP, bind_common_onoff_get, bind_common_onoff_set)
); bind_addvalue(b, "late");
bind_addvalue(b, "early");
#endif
add_menu_entry(m, "Skip frames", do_nothing,
b = bind_option(CONFIG_VID_FRAMESKIP, bind_common_intplus1_get, bind_common_intplus1_set)
); bind_addvalue(b, "0");

View file

@ -63,8 +63,6 @@ typedef struct ResourceAsyncLoadData {
void *opaque;
} ResourceAsyncLoadData;
static SDL_threadID main_thread_id; // TODO: move this somewhere else
static inline ResourceHandler* get_handler(ResourceType type) {
return *(_handlers + type);
}
@ -118,7 +116,7 @@ static void finish_async_load(InternalResource *ires, ResourceAsyncLoadData *dat
static ResourceStatus wait_for_resource_load(InternalResource *ires, uint32_t want_flags) {
SDL_LockMutex(ires->mutex);
if(ires->async_task != NULL && SDL_ThreadID() == main_thread_id) {
if(ires->async_task != NULL && is_main_thread()) {
assert(ires->status == RES_STATUS_LOADING);
ResourceAsyncLoadData *data;
@ -189,7 +187,7 @@ static void* load_resource_async_task(void *vdata) {
}
static bool resource_asyncload_handler(SDL_Event *evt, void *arg) {
assert(SDL_ThreadID() == main_thread_id);
assert(is_main_thread());
InternalResource *ires = evt->user.data1;
@ -414,8 +412,6 @@ void preload_resources(ResourceType type, ResourceFlags flags, const char *first
}
void init_resources(void) {
main_thread_id = SDL_ThreadID();
for(int i = 0; i < RES_NUMTYPES; ++i) {
ResourceHandler *h = get_handler(i);
alloc_handler(h);

View file

@ -1176,7 +1176,7 @@ void stage_draw_hud_text(struct labels_s* labels) {
static void fill_graph(int num_samples, float *samples, FPSCounter *fps) {
for(int i = 0; i < num_samples; ++i) {
samples[i] = fps->frametimes[i] / (((hrtime_t)2.0)/FPS);
samples[i] = fps->frametimes[i] / (2.0 * (HRTIME_RESOLUTION / (long double)FPS));
if(samples[i] > 1.0) {
samples[i] = 1.0;

View file

@ -13,6 +13,7 @@
#include <stdlib.h>
#include <string.h>
#include <SDL_thread.h>
void* memdup(const void *src, size_t size) {
void *data = malloc(size);
@ -29,3 +30,16 @@ void inherit_missing_pointers(uint num, void *dest[num], void *const base[num])
}
}
}
bool is_main_thread(void) {
static bool initialized = false;
static SDL_threadID main_thread_id = 0;
SDL_threadID tid = SDL_ThreadID();
if(!initialized) {
main_thread_id = tid;
}
return main_thread_id == tid;
}

View file

@ -11,3 +11,4 @@
void* memdup(const void *src, size_t size);
void inherit_missing_pointers(uint num, void *dest[num], void *const base[num]);
bool is_main_thread(void);

View file

@ -11,6 +11,8 @@
#include "miscmath.h"
#include "assert.h"
#include <stdlib.h>
double approach(double v, double t, double d) {
if(v < t) {
v += d;
@ -218,3 +220,120 @@ uint ipow10(uint n) {
assert(n < sizeof(pow10)/sizeof(*pow10));
return pow10[n];
}
typedef struct int128_bits {
uint64_t hi;
uint64_t lo;
} int128_bits_t;
static inline attr_must_inline attr_unused
void udiv_128_64(int128_bits_t divident, uint64_t divisor, uint64_t *out_quotient) {
/*
if(!divident.hi) {
*out_quotient = divident.lo / divisor;
return;
}
*/
uint64_t quotient = divident.lo << 1;
uint64_t remainder = divident.hi;
uint64_t carry = divident.lo >> 63;
uint64_t temp_carry = 0;
for(int i = 0; i < 64; i++) {
temp_carry = remainder >> 63;
remainder <<= 1;
remainder |= carry;
carry = temp_carry;
if(carry == 0) {
if(remainder >= divisor) {
carry = 1;
} else {
temp_carry = quotient >> 63;
quotient <<= 1;
quotient |= carry;
carry = temp_carry;
continue;
}
}
remainder -= divisor;
remainder -= (1 - carry);
carry = 1;
temp_carry = quotient >> 63;
quotient <<= 1;
quotient |= carry;
carry = temp_carry;
}
*out_quotient = quotient;
}
static inline attr_must_inline attr_unused
void umul_128_64(uint64_t multiplicant, uint64_t multiplier, int128_bits_t *result) {
#if (defined(__x86_64) || defined(__x86_64__))
__asm__ (
"mulq %3"
: "=a,a" (result->lo), "=d,d" (result->hi)
: "%0,0" (multiplicant), "r,m" (multiplier)
: "cc"
);
#else
uint64_t u1 = (multiplicant & 0xffffffff);
uint64_t v1 = (multiplier & 0xffffffff);
uint64_t t = (u1 * v1);
uint64_t w3 = (t & 0xffffffff);
uint64_t k = (t >> 32);
multiplicant >>= 32;
t = (multiplicant * v1) + k;
k = (t & 0xffffffff);
uint64_t w1 = (t >> 32);
multiplier >>= 32;
t = (u1 * multiplier) + k;
k = (t >> 32);
result->hi = (multiplicant * multiplier) + w1 + k;
result->lo = (t << 32) + w3;
#endif
}
static inline attr_must_inline attr_unused
uint64_t _umuldiv64_slow(uint64_t x, uint64_t multiplier, uint64_t divisor) {
int128_bits_t intermediate;
uint64_t result;
umul_128_64(x, multiplier, &intermediate);
udiv_128_64(intermediate, divisor, &result);
return result;
}
#include "util.h"
static inline attr_must_inline
uint64_t _umuldiv64(uint64_t x, uint64_t multiplier, uint64_t divisor) {
#if defined(TAISEI_BUILDCONF_HAVE_INT128)
__extension__ typedef unsigned __int128 uint128_t;
return ((uint128_t)x * (uint128_t)multiplier) / divisor;
#elif defined(TAISEI_BUILDCONF_HAVE_LONG_DOUBLE)
#define UMULDIV64_SANITY_CHECK
return ((long double)x * (long double)multiplier) / (long double)divisor;
#else
return _umuldiv64_slow(x, multiplier, divisor);
#endif
}
uint64_t umuldiv64(uint64_t x, uint64_t multiplier, uint64_t divisor) {
#ifdef UMULDIV64_SANITY_CHECK
static char sanity = -1;
if(sanity < 0) {
sanity = (_umuldiv64(UINT64_MAX, UINT64_MAX, UINT64_MAX) == UINT64_MAX);
}
return (sanity ? _umuldiv64 : _umuldiv64_slow)(x, multiplier, divisor);
#else
return _umuldiv64(x, multiplier, divisor);
#endif
}

View file

@ -39,6 +39,10 @@ uint ipow10(uint n) attr_const;
float normpdf(float x, float sigma) attr_const;
void gaussian_kernel_1d(size_t size, float sigma, float kernel[size]) attr_nonnull(3);
// Compute (a*b)/c with 128-bit intermediate precision.
// If the final result would not fit into 64 bits, the return value is undefined.
uint64_t umuldiv64(uint64_t x, uint64_t multiplier, uint64_t divisor);
#define topow2(x) (_Generic((x), \
uint32_t: topow2_u32, \
uint64_t: topow2_u64, \