summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Gediminas Jakutis <gediminas@varciai.lt> 2021-03-10 15:04:13 +0200
committerGravatar Gediminas Jakutis <gediminas@varciai.lt> 2021-03-10 15:33:58 +0200
commit8788b9b33ec46e3f96170fb35a70a03addbf9671 (patch)
tree44b620061ca427f436e89c7beb80022febde1e58
parentd7e2af2582660e3ed4ce824c33a21ffbf9ed4c6f (diff)
downloadalgos-ld1-8788b9b33ec46e3f96170fb35a70a03addbf9671.tar.gz
algos-ld1-8788b9b33ec46e3f96170fb35a70a03addbf9671.tar.bz2
algos-ld1-8788b9b33ec46e3f96170fb35a70a03addbf9671.zip
refactor && improve.
Make code more reusable and generic while preparing to add uncached ops. Signed-off-by: Gediminas Jakutis <gediminas@varciai.lt>
-rw-r--r--src/cache.c97
-rw-r--r--src/cache.h7
-rw-r--r--src/datagen.c9
-rw-r--r--src/datagen.h4
-rw-r--r--src/defs.h4
-rw-r--r--src/main.c8
-rw-r--r--src/mergesort.c16
-rw-r--r--src/meson.build2
-rw-r--r--src/stream.c34
-rw-r--r--src/stream.h8
-rw-r--r--src/util.c65
-rw-r--r--src/util.h5
12 files changed, 140 insertions, 119 deletions
diff --git a/src/cache.c b/src/cache.c
index af33f30..2b24dd0 100644
--- a/src/cache.c
+++ b/src/cache.c
@@ -33,6 +33,7 @@ int cache_populate(struct stream * const in)
int ret = 0;
size_t i;
struct entry_l *tmp;
+ struct entry_l tmp_store;
try(in->settings->access != cached, err, EINVAL, "cannot populate cache: stream is uncached");
try(!in->cache, err, EINVAL, "stream has no cache allocated");
@@ -41,7 +42,7 @@ int cache_populate(struct stream * const in)
if (in->type == stream_randread) {
for (i = 0; i < in->n && !ret; ++i) {
errno = 0;
- tmp = in->get(in);
+ tmp = in->get(in, &tmp_store);
if (tmp) { /* non-cache reads CAN fail */
in->put(in, tmp);
} else {
@@ -84,6 +85,10 @@ err:
return ret;
}
+/* Optimized copying routine for cached array streams.
+ * The layout let's us basically just use memcpy instead of slow and wasteful
+ * get-put loop, all while completely avoiding the inefficient get-loop for seeking.
+ */
int cache_block_copy(struct stream * const restrict src, struct stream * const restrict dest)
{
int ret = 0;
@@ -100,88 +105,20 @@ err:
return ret;
}
-int cache_list_copy(struct stream * const restrict src, struct stream * const restrict dest)
-{
- int ret = 0;
- size_t ss;
-
- try(src->settings->access != cached || dest->settings->access != cached, err, EINVAL, "cannot cache-copy between uncached streams");
- try(!src->cache, err, EINVAL, "no cache to transfer");
- try(src->n < dest->settings->to, err, EINVAL, "invalid copy size");
- try(!dest->cache, err, EINVAL, "no cache to transfer to");
-
- ss = dest->settings->ss;
-
- /* skip over to start position */
- while (ss--) {
- src->get(src);
- }
-
- do {
- dest->put(dest, src->get(src));
- } while (dest->index < (dest->n - 1));
-
- src->rewind(src);
- dest->rewind(dest);
-
-err:
- return ret;
-}
-
-int cache_split(struct stream * const src, struct stream * const A, struct stream * const B)
-{
- int ret = 0;
- struct settings tmp_settings;
-
- try(src->n < 2, err, EINVAL, "cannot split single element stream.");
-
- /* setting up minimal stream basics */
- A->n = src->n / 2;
- B->n = src->n / 2 + (src->n & 1ul);
- A->index = B->index = 0;
- A->type = B->type = stream_cache; /* disallow any stream operations other than split/merge on children */
- A->fd = B->fd = -1; /* if we're splitting, these are for holding cache only */
- /* we only care about these three functions for these temporary streams */
- A->get = B->get = src->get;
- A->put = B->put = src->put;
- A->split = B->split = src->split;
- A->rewind = B->rewind = src->rewind;
- A->copy = B->copy = src->copy;
-
- tmp_settings = *src->settings;
- A->settings = B->settings = &tmp_settings;
-
- /* setting up A */
- tmp_settings.ss = 0;
- tmp_settings.to = A->n;
- try_s((ret = cache_create(A)), err);
- try_s((ret = src->copy(src, A)), err);
-
- /* setting up B */
- tmp_settings.ss = A->n;
- tmp_settings.to = src->n;
- try_s((ret = cache_create(B)), err);
- try_s((ret = src->copy(src, B)), err);
-
- A->settings = B->settings = src->settings;
-
-err:
- return ret;
-}
-
-struct entry_l *cached_get_array(struct stream * const in)
+struct entry_l *cached_get_array(struct stream * const in, struct entry_l * const store)
{
struct entry_l *ret = NULL;
if (in->index < in->n) {
- ret = (struct entry_l *)(in->cache_a + in->index);
+ store->val = in->cache_a[in->index].val;
++in->index;
+ ret = store;
}
return ret;
}
-struct entry_l *cached_get_list(struct stream * const in)
+struct entry_l *cached_get_list(struct stream * const in, struct entry_l * const store)
{
struct entry_l *ret = NULL;
@@ -189,6 +126,8 @@ struct entry_l *cached_get_list(struct stream * const in)
ret = in->cnode;
in->cnode = ret->next ? ret->next : in->cnode;
++in->index;
+ *store = *ret;
+ ret = store;
}
return ret;
@@ -226,7 +165,7 @@ int cached_put_list(struct stream * const restrict in, const struct entry_l * co
in->cnode = in->cache_l;
in->cnode->val = node->val;
in->cnode->next = NULL;
- in->index = 0;
+ in->index = 1;
} else {
in->cnode->next = in->cnode + 1; /* lol says librin, lmao */
in->cnode = in->cnode->next;
@@ -239,16 +178,6 @@ err:
return ret;
}
-int cache_rewind(struct stream * const restrict in)
-{
- int ret = 0;
-
- in->cnode = in->cnode ? in->cache_l : NULL;
- in->index = 0;
-
- return ret;
-}
-
static int cache_readfile(struct stream * const in)
{
ssize_t ret = 0;
diff --git a/src/cache.h b/src/cache.h
index 6255122..8c61bc3 100644
--- a/src/cache.h
+++ b/src/cache.h
@@ -16,13 +16,10 @@ int cache_destroy(struct stream * const in);
/* BLOCKMANIP */
int cache_transfer(struct stream * const src, struct stream * const dest);
int cache_block_copy(struct stream * const restrict src, struct stream * const restrict dest);
-int cache_list_copy(struct stream * const restrict src, struct stream * const restrict dest);
-int cache_split(struct stream * const src, struct stream * const A, struct stream * const B);
-int cache_rewind(struct stream * const restrict in);
/* GET */
-struct entry_l *cached_get_array(struct stream * const in);
-struct entry_l *cached_get_list(struct stream * const in);
+struct entry_l *cached_get_array(struct stream * const in, struct entry_l * const store);
+struct entry_l *cached_get_list(struct stream * const in, struct entry_l * const store);
/* PUT */
int cached_put_array(struct stream * const in, const struct entry_l * const data);
diff --git a/src/datagen.c b/src/datagen.c
index 52f1896..3dc2a98 100644
--- a/src/datagen.c
+++ b/src/datagen.c
@@ -10,7 +10,7 @@
static struct entry_l buf;
-struct entry_l *gen_get_array(struct stream * const in)
+struct entry_l *gen_get_array(struct stream * const in, struct entry_l * const store)
{
struct entry_l *ret;
int errn;
@@ -18,7 +18,8 @@ struct entry_l *gen_get_array(struct stream * const in)
(void) in; /* lol says librin, lmao */
if (getrandom(&buf.val, sizeof(buf.val), 0ul) == sizeof(buf.val)) {
- ret = &buf;
+ *store = buf;
+ ret = store;
} else {
errn = errno;
ret = NULL;
@@ -28,11 +29,11 @@ struct entry_l *gen_get_array(struct stream * const in)
return ret;
}
-struct entry_l *gen_get_list(struct stream * const in)
+struct entry_l *gen_get_list(struct stream * const in, struct entry_l * const store)
{
struct entry_l *ret;
- try_s((ret = gen_get_array(in)), err);
+ try_s((ret = gen_get_array(in, store)), err);
ret->next = NULL;
err:
diff --git a/src/datagen.h b/src/datagen.h
index 92a7878..92ed49a 100644
--- a/src/datagen.h
+++ b/src/datagen.h
@@ -7,7 +7,7 @@
#include "defs.h"
-struct entry_l *gen_get_array(struct stream * const in);
-struct entry_l *gen_get_list(struct stream * const in);
+struct entry_l *gen_get_array(struct stream * const in, struct entry_l * const store);
+struct entry_l *gen_get_list(struct stream * const in, struct entry_l * const store);
#endif /* ALGOS_DATAGEN_H_INCLUDED */
diff --git a/src/defs.h b/src/defs.h
index 20621ef..522eed4 100644
--- a/src/defs.h
+++ b/src/defs.h
@@ -140,10 +140,8 @@ struct stream {
struct entry_l *cnode; /* "current" node */
union cachewrap;
struct settings *settings;
- struct entry_l *(*get)(struct stream * const);
+ struct entry_l *(*get)(struct stream * const, struct entry_l * const);
int (*put)(struct stream * const, struct entry_l const * const);
- int (*split)(struct stream * const, struct stream * const, struct stream * const);
- int (*rewind)(struct stream * restrict const);
int (*copy)(struct stream * restrict const, struct stream * restrict const);
};
diff --git a/src/main.c b/src/main.c
index 0122a3d..30f1973 100644
--- a/src/main.c
+++ b/src/main.c
@@ -68,7 +68,7 @@ int main(int argc, char **argv)
if (settings.format == array) {
try_s((ret = cache_block_copy(&file_in, &file_out)), out);
} else { /* settings.format == list */
- try_s((ret = cache_list_copy(&file_in, &file_out)), out);
+ try_s((ret = stream_copy_range(&file_in, &file_out)), out);
}
break;
case mode_generate:
@@ -204,15 +204,11 @@ static int load_io_functions(struct settings const * const s, struct stream * co
if (s->format == array) {
in->get = cached_get_array;
in->put = cached_put_array;
- in->split = cache_split;
- in->rewind = cache_rewind;
in->copy = cache_block_copy;
} else { /* if (s->format == list */
in->get = cached_get_list;
in->put = cached_put_list;
- in->split = cache_split;
- in->rewind = cache_rewind;
- in->copy = cache_list_copy;
+ in->copy = stream_copy_range;
}
}
} else {
diff --git a/src/mergesort.c b/src/mergesort.c
index 053e702..aa05f8e 100644
--- a/src/mergesort.c
+++ b/src/mergesort.c
@@ -23,11 +23,11 @@ int merge_sort(struct stream * const src, struct stream * const dest)
try_s((ret = stream_shallow_copy(src, dest)), err);
if (src->n > 1) {
- src->split(src, tmp, tmp + 2); /* split stream in half */
+ split(src, tmp, tmp + 2); /* split stream in half */
merge_sort(tmp, tmp + 1); /* recurse-sort first half */
merge_sort(tmp + 2, tmp + 3); /* recurse-sort second half */
merge(dest, tmp + 1, tmp + 3); /* merge the two halves back */
- dest->rewind(dest);
+ stream_rewind(dest);
/* you can't spell -funroll-loops without fun, wee~! */
stream_close(tmp);
@@ -49,17 +49,19 @@ static int merge(struct stream * const dest, struct stream * const A, struct str
int ret = 0;
struct entry_l *a;
struct entry_l *b;
+ struct entry_l a_store;
+ struct entry_l b_store;
- a = A->get(A);
- b = B->get(B);
+ a = A->get(A, &a_store);
+ b = B->get(B, &b_store);
while (a || b) {
- if (a && (!b || is_less_equal(a->val, b->val))) {
+ if (a && (!b || is_less_equal(a_store.val, b_store.val))) {
dest->put(dest, a);
- a = A->get(A);
+ a = A->get(A, &a_store);
} else {
dest->put(dest, b);
- b = B->get(B);
+ b = B->get(B, &b_store);
}
}
diff --git a/src/meson.build b/src/meson.build
index 7f391ab..5d4dd69 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -4,7 +4,7 @@ source_files = [
'stream.c',
'main.c',
'mergesort.c',
- 'util.h',
+ 'util.c',
]
header_files = [
diff --git a/src/stream.c b/src/stream.c
index 83235a5..b7bc39e 100644
--- a/src/stream.c
+++ b/src/stream.c
@@ -16,6 +16,7 @@
#include "defs.h"
#include "datagen.h"
#include "cache.h"
+#include "util.h"
static int stream_open_out(struct stream * const in);
static int stream_open_lite(struct stream * const in);
@@ -65,7 +66,9 @@ int stream_close(struct stream * const in)
char path[PATH_MAX];
struct stat st;
- try_s((ret = stream_flush(in)), err);
+ if (in->settings->access == cached) {
+ try_s((ret = stream_flush(in)), err);
+ }
snprintf(path, PATH_MAX, "/proc/self/fd/%i", in->fd);
@@ -96,6 +99,33 @@ err:
return ret;
}
+/* generic, naïve and slow copy routine when an optimized one cannot be used */
+int stream_copy_range(struct stream * const restrict src, struct stream * const restrict dest)
+{
+ int ret = 0;
+ size_t ss;
+ struct entry_l tmp_store;
+
+ try(src->n < dest->settings->to, err, EINVAL, "invalid copy size");
+
+ ss = dest->settings->ss;
+
+ /* skip over to start position */
+ while (ss--) {
+ src->get(src, &tmp_store);
+ }
+
+ do {
+ dest->put(dest, src->get(src, &tmp_store));
+ } while (dest->index < (dest->n));
+
+ stream_rewind(src);
+ stream_rewind(dest);
+
+err:
+ return ret;
+}
+
int stream_shallow_copy(struct stream const * const restrict src, struct stream * const dest)
{
int ret = 0;
@@ -106,8 +136,6 @@ int stream_shallow_copy(struct stream const * const restrict src, struct stream
dest->get = src->get;
dest->put = src->put;
- dest->split = src->split;
- dest->rewind = src->rewind;
dest->copy = src->copy;
dest->type = src->settings->access == cached ? stream_cache : stream_lite;
diff --git a/src/stream.h b/src/stream.h
index d208bdb..6288d32 100644
--- a/src/stream.h
+++ b/src/stream.h
@@ -10,11 +10,11 @@
int stream_open(struct stream * const in);
int stream_close(struct stream * const in);
-int direct_get_array(struct stream * const restrict in, ssize_t idx, struct entry_l * const data);
-int direct_get_list(struct stream * const restrict in, ssize_t idx, struct entry_l * const data);
-int direct_put_array(struct stream * const restrict in, ssize_t idx, const struct entry_l * const data);
-int direct_put_list(struct stream * const restrict in, ssize_t idx, const struct entry_l * const data);
+/* uncached blockmanip */
+int stream_copy_range(struct stream * const restrict src, struct stream * const restrict dest);
+
+/* misc */
int stream_shallow_copy(struct stream const * const restrict src, struct stream * const dest);
#endif /* ALGOS_IO_H_INCLUDED */
diff --git a/src/util.c b/src/util.c
new file mode 100644
index 0000000..47bf7a7
--- /dev/null
+++ b/src/util.c
@@ -0,0 +1,65 @@
+#include <errno.h>
+#include "defs.h"
+#include "util.h"
+#include "cache.h"
+#include "stream.h"
+
+int stream_rewind(struct stream * const restrict in)
+{
+ int ret = 0;
+
+ in->cnode = in->cnode ? in->cache_l : NULL;
+ in->index = 0;
+
+ return ret;
+}
+
+int split(struct stream * const src, struct stream * const A, struct stream * const B)
+{
+ int ret = 0;
+ struct settings tmp_settings[2];
+
+ try(src->n < 2, err, EINVAL, "cannot split single element stream.");
+
+ /* setting up minimal stream basics */
+ A->n = src->n / 2;
+ B->n = src->n / 2 + (src->n & 1ul);
+ A->index = B->index = 0;
+ /* we only care about these three functions for these temporary streams */
+ A->get = B->get = src->get;
+ A->put = B->put = src->put;
+ A->copy = B->copy = src->copy;
+
+ tmp_settings[0] = *src->settings;
+ tmp_settings[1] = *src->settings;
+ A->settings = tmp_settings;
+ B->settings = tmp_settings + 1;
+
+ /* setting up A */
+ tmp_settings[0].ss = 0;
+ tmp_settings[0].to = A->n;
+ /* setting up B */
+ tmp_settings[1].ss = A->n;
+ tmp_settings[1].to = src->n;
+
+ if (src->settings->access == cached) {
+ A->fd = B->fd = -1; /* if we're splitting, these are for holding cache only */
+ A->type = B->type = stream_cache;
+ try_s((ret = cache_create(A)), err);
+ try_s((ret = src->copy(src, A)), err);
+ try_s((ret = cache_create(B)), err);
+ try_s((ret = src->copy(src, B)), err);
+ } else {
+ A->name = B->name = src->name;
+ A->type = B->type = stream_lite;
+ try_s((ret = stream_open(A)), err);
+ try_s((ret = src->copy(src, A)), err);
+ try_s((ret = stream_open(B)), err);
+ try_s((ret = src->copy(src, B)), err);
+ }
+
+ A->settings = B->settings = src->settings;
+
+err:
+ return ret;
+}
diff --git a/src/util.h b/src/util.h
index af819f4..f2a84f9 100644
--- a/src/util.h
+++ b/src/util.h
@@ -7,6 +7,11 @@
#include "defs.h"
+int stream_rewind(struct stream * const restrict in);
+int split(struct stream * const src, struct stream * const A, struct stream * const B);
+
+/* inline crap below */
+
static inline int is_less_equal(const field a, const field b) __attribute__((always_inline));
static inline int is_less_equal(const field a, const field b)