desperate optimizations

i even used a profiler
This commit is contained in:
Andrei Alexeyev 2017-11-23 17:27:41 +02:00
parent a3c8e066b7
commit db8dd8ad7c
No known key found for this signature in database
GPG key ID: 363707CD4C7FE8A4
15 changed files with 192 additions and 52 deletions

View file

@ -27,6 +27,21 @@ check_symbol_exists(_POSIX_VERSION "unistd.h" POSIX)
check_symbol_exists(__STDC_NO_COMPLEX__ "unistd.h" COMPLEX_MISSING)
check_symbol_exists(__STDC_NO_VLA__ "unistd.h" VLA_MISSING)
# This doesn't work on gcc...
# check_symbol_exists(_mm_crc32_u8 "immintrin.h" HAVE_CRC32_INTRIN)
check_c_source_compiles("
#include <immintrin.h>
__attribute__((target(\"sse4.2\")))
int main(int argc, char **argv) {
return _mm_crc32_u8(0, 42);
}
" HAVE_CRC32_INTRIN)
if(HAVE_CRC32_INTRIN)
add_definitions(-DHAVE_CRC32_INTRIN)
endif()
if(COMPLEX_MISSING OR VLA_MISSING)
message(FATAL_ERROR "Your C implementation needs to support complex numbers and variable length arrays.")
endif()

View file

@ -209,6 +209,19 @@ void* hashtable_get(Hashtable *ht, void *key) {
return NULL;
}
void* hashtable_get_unsafe(Hashtable *ht, void *key) {
hash_t hash = ht->hash_func(key);
HashtableElement *elems = ht->table[hash % ht->table_size];
for(HashtableElement *e = elems; e; e = e->next) {
if(hash == e->hash && ht->cmp_func(key, e->key)) {
return e->data;
}
}
return NULL;
}
static bool hashtable_set_internal(Hashtable *ht, HashtableElement **table, size_t table_size, hash_t hash, void *key, void *data) {
bool collisions_updated = false;
size_t idx = hash % table_size;
@ -442,6 +455,10 @@ hash_t hashtable_hashfunc_string(void *vstr) {
return crc32str(0, (const char*)vstr);
}
hash_t hashtable_hashfunc_string_sse42(void *vstr) {
return crc32str_sse42(0, (const char*)vstr);
}
void hashtable_copyfunc_string(void **dst, void *src) {
*dst = malloc(strlen((char*)src) + 1);
strcpy(*dst, src);
@ -452,7 +469,7 @@ void hashtable_copyfunc_string(void **dst, void *src) {
Hashtable* hashtable_new_stringkeys(size_t size) {
return hashtable_new(size,
hashtable_cmpfunc_string,
hashtable_hashfunc_string,
SDL_HasSSE42() ? hashtable_hashfunc_string_sse42 : hashtable_hashfunc_string,
hashtable_copyfunc_string,
hashtable_freefunc_string
);

View file

@ -40,7 +40,8 @@ typedef void* (*HTIterCallback)(void *key, void *data, void *arg);
Hashtable* hashtable_new(size_t size, HTCmpFunc cmp_func, HTHashFunc hash_func, HTCopyFunc copy_func, HTFreeFunc free_func);
void hashtable_free(Hashtable *ht);
void* hashtable_get(Hashtable *ht, void *key);
void* hashtable_get(Hashtable *ht, void *key) __attribute__((hot));
void* hashtable_get_unsafe(Hashtable *ht, void *key) __attribute__((hot));
void hashtable_set(Hashtable *ht, void *key, void *data);
void hashtable_unset(Hashtable *ht, void *key);
void hashtable_unset_deferred(Hashtable *ht, void *key, ListContainer **list);
@ -53,8 +54,9 @@ void* hashtable_foreach(Hashtable *ht, HTIterCallback callback, void *arg);
HashtableIterator* hashtable_iter(Hashtable *ht);
bool hashtable_iter_next(HashtableIterator *iter, void **out_key, void **out_data);
bool hashtable_cmpfunc_string(void *str1, void *str2);
hash_t hashtable_hashfunc_string(void *vstr);
bool hashtable_cmpfunc_string(void *str1, void *str2) __attribute__((hot));
hash_t hashtable_hashfunc_string(void *vstr) __attribute__((hot));
hash_t hashtable_hashfunc_string_sse42(void *vstr) __attribute__((hot));
void hashtable_copyfunc_string(void **dst, void *src);
#define hashtable_freefunc_string free
Hashtable* hashtable_new_stringkeys(size_t size);

View file

@ -59,8 +59,8 @@ List* list_append(List **dest, List *elem) {
return list_insert(dest, elem);
}
List *end = NULL;
for(List *e = *dest; e; e = e->next) {
List *end = *dest;
for(List *e = (*dest)->next; e; e = e->next) {
end = e;
}
@ -79,12 +79,15 @@ List* list_insert_at_priority(List **list_head, List *elem, int prio, ListPriori
}
List *dest = *list_head;
int dest_prio = prio_func(dest);
int candidate_prio = dest_prio;
for(List *e = dest; e && prio_func(e) <= prio; e = e->next) {
for(List *e = dest->next; e && (candidate_prio = prio_func(e)) <= prio; e = e->next) {
dest = e;
dest_prio = candidate_prio;
}
if(dest == *list_head && prio_func(dest) > prio) {
if(dest == *list_head && dest_prio > prio) {
elem->next = dest;
elem->prev = dest->prev;

View file

@ -21,11 +21,12 @@ typedef struct ListContainer {
typedef void* (*ListForeachCallback)(List **head, List *elem, void *arg);
typedef int (*ListPriorityFunc)(List *elem);
typedef List* (*ListInsertionRule)(List **dest, List *elem);
List* list_insert(List **dest, List *elem);
List* list_push(List **dest, List *elem);
List* list_append(List **dest, List *elem);
List* list_insert_at_priority(List **dest, List *elem, int prio, ListPriorityFunc prio_func);
List* list_insert_at_priority(List **dest, List *elem, int prio, ListPriorityFunc prio_func) __attribute__((hot));
List* list_pop(List **dest);
List* list_unlink(List **dest, List *elem);
void* list_foreach(List **dest, ListForeachCallback callback, void *arg);

View file

@ -64,6 +64,40 @@ static void init_log_file(void) {
log_add_output(lvls_file, vfs_open("storage/log.txt", VFS_MODE_WRITE));
}
#ifdef CRC32_BENCHMARK
// TODO: move all this crap somewhere
static void hash_test_run(const char *str, uint32_t init, uint32_t (*hashfunc)(uint32_t, const char*)) {
hrtime_t begin = time_get();
for(int i = 0; i < 341346740; ++i) {
init = hashfunc(init, str);
}
log_debug("%08x %f", init, (double)(time_get() - begin));
}
static int hash_test(void) {
time_init();
const char *s;
s = "reimu";
log_info("-> %s", s);
hash_test_run(s, 0, crc32str);
hash_test_run(s, 0, crc32str_sse42);
s = "sphereness";
log_info("-> %s", s);
hash_test_run(s, 0, crc32str);
hash_test_run(s, 0, crc32str_sse42);
s = "res/textures/rabu_raibu.png";
log_info("-> %s", s);
hash_test_run(s, 0, crc32str);
hash_test_run(s, 0, crc32str_sse42);
return 1;
}
#else
static int hash_test(void) {
return 0;
}
#endif
static int run_tests(void) {
if(tsrand_test()) {
return 1;
@ -81,6 +115,10 @@ static int run_tests(void) {
return 1;
}
if(hash_test()) {
return 1;
}
return 0;
}
@ -106,6 +144,26 @@ static void log_lib_versions(void) {
log_info("Using libpng %s", png_get_header_ver(NULL));
}
void log_system_specs(void) {
log_info("CPU count: %d", SDL_GetCPUCount());
// log_info("CPU type: %s", SDL_GetCPUType());
// log_info("CPU name: %s", SDL_GetCPUName());
log_info("CacheLine size: %d", SDL_GetCPUCacheLineSize());
log_info("RDTSC: %d", SDL_HasRDTSC());
log_info("Altivec: %d", SDL_HasAltiVec());
log_info("MMX: %d", SDL_HasMMX());
log_info("3DNow: %d", SDL_Has3DNow());
log_info("SSE: %d", SDL_HasSSE());
log_info("SSE2: %d", SDL_HasSSE2());
log_info("SSE3: %d", SDL_HasSSE3());
log_info("SSE4.1: %d", SDL_HasSSE41());
log_info("SSE4.2: %d", SDL_HasSSE42());
log_info("AVX: %d", SDL_HasAVX());
log_info("AVX2: %d", SDL_HasAVX2());
log_info("NEON: %d", SDL_HasNEON());
log_info("RAM: %d MB", SDL_GetSystemRAM());
}
int main(int argc, char **argv) {
setlocale(LC_ALL, "C");
@ -118,7 +176,6 @@ int main(int argc, char **argv) {
return 0;
}
log_info("%s %s", TAISEI_VERSION_FULL, TAISEI_VERSION_BUILD_TYPE);
stage_init_array(); // cli_args depends on this
// commandline arguments should be parsed as early as possible
@ -174,6 +231,8 @@ int main(int argc, char **argv) {
free_cli_action(&a);
vfs_setup(false);
init_log_file();
log_info("%s %s", TAISEI_VERSION_FULL, TAISEI_VERSION_BUILD_TYPE);
log_system_specs();
config_load();

View file

@ -20,6 +20,7 @@ static ProjArgs defaults_proj = {
.type = EnemyProj,
.color = RGB(1, 1, 1),
.color_transform_rule = proj_clrtransform_bullet,
.insertion_rule = proj_insert_sizeprio,
};
static ProjArgs defaults_part = {
@ -29,6 +30,7 @@ static ProjArgs defaults_part = {
.type = Particle,
.color = RGB(1, 1, 1),
.color_transform_rule = proj_clrtransform_particle,
.insertion_rule = list_append,
};
static void process_projectile_args(ProjArgs *args, ProjArgs *defaults) {
@ -66,40 +68,40 @@ static void process_projectile_args(ProjArgs *args, ProjArgs *defaults) {
args->color_transform_rule = defaults->color_transform_rule;
}
if(!args->insertion_rule) {
args->insertion_rule = defaults->insertion_rule;
}
if(!args->max_viewport_dist && (args->type == Particle || args->type >= PlrProj)) {
args->max_viewport_dist = 300;
}
}
static complex projectile_size2(Texture *tex, complex size) {
if(tex) {
return tex->w + I*tex->h;
static double projectile_rect_area(Projectile *p) {
if(p->tex) {
return p->tex->w * p->tex->h;
} else {
return creal(p->size) * cimag(p->size);
}
return size;
}
static complex projectile_size(Projectile *p) {
return projectile_size2(p->tex, p->size);
}
static void projectile_size_split(Projectile *p, double *w, double *h) {
assert(w != NULL);
assert(h != NULL);
complex c = projectile_size(p);
*w = creal(c);
*h = cimag(c);
}
int projectile_prio_rawfunc(Texture *tex, complex size) {
complex s = projectile_size2(tex, size);
return -rint(creal(s) * cimag(s));
static void projectile_size(Projectile *p, double *w, double *h) {
if(p->tex) {
*w = p->tex->w;
*h = p->tex->h;
} else {
*w = creal(p->size);
*h = cimag(p->size);
}
}
int projectile_prio_func(List *vproj) {
Projectile *proj = (Projectile*)vproj;
return projectile_prio_rawfunc(proj->tex, proj->size);
return -rint(projectile_rect_area(proj));
}
List* proj_insert_sizeprio(List **dest, List *elem) {
return list_insert_at_priority(dest, elem, projectile_prio_func(elem), projectile_prio_func);
}
static Projectile* _create_projectile(ProjArgs *args) {
@ -107,12 +109,7 @@ static Projectile* _create_projectile(ProjArgs *args) {
log_fatal("Tried to spawn a projectile while in drawing code");
}
Projectile *p = (Projectile*)list_insert_at_priority(
(List**)args->dest,
malloc(sizeof(Projectile)),
projectile_prio_rawfunc(args->texture_ptr, args->size),
projectile_prio_func
);
Projectile *p = calloc(1, sizeof(Projectile));
p->birthtime = global.frames;
p->pos = p->pos0 = args->pos;
@ -135,7 +132,7 @@ static Projectile* _create_projectile(ProjArgs *args) {
// assert(rule != NULL);
// rule(p, EVENT_BIRTH);
return p;
return (Projectile*)args->insertion_rule((List**)args->dest, (List*)p);
}
Projectile* create_projectile(ProjArgs *args) {
@ -149,7 +146,8 @@ Projectile* create_particle(ProjArgs *args) {
}
#ifdef PROJ_DEBUG
Projectile* _proj_attach_dbginfo(Projectile *p, DebugInfo *dbg) {
Projectile* _proj_attach_dbginfo(Projectile *p, DebugInfo *dbg, const char *callsite_str) {
// log_debug("Spawn: [%s]", callsite_str);
memcpy(&p->debug, dbg, sizeof(DebugInfo));
set_debug_info(dbg);
return p;
@ -176,7 +174,7 @@ void delete_projectiles(Projectile **projs) {
int collision_projectile(Projectile *p) {
if(p->type == EnemyProj) {
double w, h;
projectile_size_split(p, &w, &h);
projectile_size(p, &w, &h);
double angle = carg(global.plr.pos - p->pos) + p->angle;
double projr = sqrt(pow(w/2*cos(angle), 2) + pow(h/2*sin(angle), 2)) * 0.45;
@ -296,7 +294,7 @@ void draw_projectiles(Projectile *projs, ProjPredicate predicate) {
bool projectile_in_viewport(Projectile *proj) {
double w, h;
int e = proj->max_viewport_dist;
projectile_size_split(proj, &w, &h);
projectile_size(proj, &w, &h);
return !(creal(proj->pos) + w/2 + e < 0 || creal(proj->pos) - w/2 - e > VIEWPORT_W
|| cimag(proj->pos) + h/2 + e < 0 || cimag(proj->pos) - h/2 - e > VIEWPORT_H);

View file

@ -90,20 +90,24 @@ typedef struct ProjArgs {
Texture *texture_ptr;
complex size;
int max_viewport_dist;
ListInsertionRule insertion_rule;
} ProjArgs;
Projectile* create_projectile(ProjArgs *args);
Projectile* create_particle(ProjArgs *args);
#ifdef PROJ_DEBUG
Projectile* _proj_attach_dbginfo(Projectile *p, DebugInfo *dbg);
#define PROJECTILE(...) _proj_attach_dbginfo(create_projectile(&(ProjArgs) { __VA_ARGS__ }), _DEBUG_INFO_PTR_)
#define PARTICLE(...) _proj_attach_dbginfo(create_particle(&(ProjArgs) { __VA_ARGS__ }), _DEBUG_INFO_PTR_)
Projectile* _proj_attach_dbginfo(Projectile *p, DebugInfo *dbg, const char *callsite_str);
#define _PROJ_WRAP_SPAWN(p) _proj_attach_dbginfo((p), _DEBUG_INFO_PTR_, #p)
#else
#define PROJECTILE(...) create_projectile(&(ProjArgs) { __VA_ARGS__ })
#define PARTICLE(...) create_particle(&(ProjArgs) { __VA_ARGS__ })
#define _PROJ_WRAP_SPAWN(p) (p)
#endif
#define _PROJ_GENERIC_SPAWN(constructor, ...) _PROJ_WRAP_SPAWN((constructor)((&(ProjArgs) { __VA_ARGS__ })))
#define PROJECTILE(...) _PROJ_GENERIC_SPAWN(create_projectile, __VA_ARGS__)
#define PARTICLE(...) _PROJ_GENERIC_SPAWN(create_particle, __VA_ARGS__)
void delete_projectile(Projectile **dest, Projectile *proj);
void delete_projectiles(Projectile **dest);
void draw_projectiles(Projectile *projs, ProjPredicate predicate);
@ -138,3 +142,5 @@ int blast_timeout(Projectile *p, int t);
void Blast(Projectile *p, int t);
void projectiles_preload(void);
List* proj_insert_sizeprio(List **dest, List *elem) __attribute__((hot));

View file

@ -51,7 +51,7 @@
#define REPLAY_WRITE_DESYNC_CHECKS
#ifdef DEBUG
#define REPLAY_LOAD_DEBUG
// #define REPLAY_LOAD_DEBUG
#endif
typedef struct ReplayEvent {

View file

@ -298,8 +298,22 @@ static Resource* load_resource_finish(void *opaque, ResourceHandler *handler, co
Resource* get_resource(ResourceType type, const char *name, ResourceFlags flags) {
ResourceHandler *handler = get_handler(type);
Resource *res;
resource_wait_for_async_load(handler, name);
Resource *res = hashtable_get_string(handler->mapping, name);
if(flags & RESF_UNSAFE) {
res = hashtable_get_unsafe(handler->mapping, (void*)name);
flags &= ~RESF_UNSAFE;
} else {
res = hashtable_get(handler->mapping, (void*)name);
}
if(res) {
return res;
}
resource_wait_for_async_load(handler, name);
res = hashtable_get(handler->mapping, (void*)name);
if(!res) {
if(!(flags & RESF_PRELOAD)) {

View file

@ -35,6 +35,7 @@ typedef enum ResourceFlags {
RESF_OPTIONAL = 1,
RESF_PERMANENT = 2,
RESF_PRELOAD = 4,
RESF_UNSAFE = 8,
} ResourceFlags;
#define RESF_DEFAULT 0

View file

@ -315,7 +315,7 @@ int uniloc(Shader *sha, const char *name) {
}
Shader* get_shader(const char *name) {
return get_resource(RES_SHADER, name, RESF_DEFAULT)->shader;
return get_resource(RES_SHADER, name, RESF_DEFAULT | RESF_UNSAFE)->shader;
}
Shader* get_shader_optional(const char *name) {

View file

@ -74,7 +74,7 @@ void* load_texture_end(void *opaque, const char *path, unsigned int flags) {
}
Texture* get_tex(const char *name) {
return get_resource(RES_TEXTURE, name, RESF_DEFAULT)->texture;
return get_resource(RES_TEXTURE, name, RESF_DEFAULT | RESF_UNSAFE)->texture;
}
Texture* prefix_get_tex(const char *name, const char *prefix) {

View file

@ -709,6 +709,20 @@ uint32_t crc32str(uint32_t crc, const char *str) {
return crc ^ ~0U;
}
#ifdef HAVE_CRC32_INTRIN
#include <immintrin.h>
__attribute__((target("sse4.2")))
uint32_t crc32str_sse42(uint32_t crc, const char *str) {
const uint8_t *s = (const uint8_t*)str;
while(*s) {
crc = _mm_crc32_u8(crc, *s++);
}
return crc;
}
#endif
#ifdef DEBUG
bool _in_draw_code;

View file

@ -137,7 +137,17 @@ void tsfprintf(FILE *out, const char *restrict fmt, ...) __attribute__((format(F
int getenvint(const char *v, int defaultval) __attribute__((pure));
void png_setup_error_handlers(png_structp png);
uint32_t crc32str(uint32_t crc, const char *str);
uint32_t crc32str(uint32_t crc, const char *str) __attribute__((hot, pure));
#if defined(HAVE_CRC32_INTRIN) && defined(DISABLE_CRC32_INTRIN)
#undef HAVE_CRC32_INTRIN
#endif
#ifdef HAVE_CRC32_INTRIN
uint32_t crc32str_sse42(uint32_t crc, const char *str) __attribute__((hot, pure));
#else
#define crc32str_sse42 crc32str
#endif
noreturn void _ts_assert_fail(const char *cond, const char *func, const char *file, int line, bool use_log);