endif
else
MK_CPPFLAGS += -DNDEBUG
- MK_CFLAGS += -O3
- MK_CXXFLAGS += -O3
- MK_NVCCFLAGS += -O3
+ MK_CFLAGS += -O3 -g
+ MK_CXXFLAGS += -O3 -g
+ MK_NVCCFLAGS += -O3 -g
endif
ifdef LLAMA_SANITIZE_THREAD
} else if (type == GGML_TYPE_I8) {
v = (float) *(int8_t *) &data[i];
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
printf("%12.4f", v);
sum += v;
}
else if (e.values.size() != (size_t)src1->ne[0]*n_as) {
fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]*n_as);
- exit(1); //GGML_ASSERT(false);
+ exit(1); //GGML_ABORT("fatal error");
}
if (m_params.verbosity > 1) {
printf("%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[2], (int)src1->type);
}
else if (e.values.size() != (size_t)src1->ne[0]) {
fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]);
- exit(1); //GGML_ASSERT(false);
+ exit(1); //GGML_ABORT("fatal error");
}
++e.ncall;
if (m_params.verbosity > 1) {
case JSON: return "json";
case MARKDOWN: return "md";
case SQL: return "sql";
- default: GGML_ASSERT(!"invalid output format");
+ default: GGML_ABORT("invalid output format");
}
}
case LLAMA_SPLIT_MODE_NONE: return "none";
case LLAMA_SPLIT_MODE_LAYER: return "layer";
case LLAMA_SPLIT_MODE_ROW: return "row";
- default: GGML_ASSERT(!"invalid split mode");
+ default: GGML_ABORT("invalid split mode");
}
}
case SQL:
return std::unique_ptr<printer>(new sql_printer());
}
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
int main(int argc, char ** argv) {
embeddings = peg_0;
}
else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
printf(">");
return;
}
- GGML_ASSERT(false && "MultiByteToWideChar() failed in an unexpected way.");
+ GGML_ABORT("MultiByteToWideChar() failed in an unexpected way.");
}
LPWSTR wstr = (LPWSTR) calloc(length_needed+1, sizeof(*wstr));
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
-#define GGML_ASSERT(x) \
- do { \
- if (!(x)) { \
- fflush(stdout); \
- fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
- ggml_print_backtrace(); \
- abort(); \
- } \
- } while (0)
-
#ifndef NDEBUG
-#define GGML_UNREACHABLE() GGML_ASSERT(!"statement should not be reached")
+#define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
#elif defined(__GNUC__)
#define GGML_UNREACHABLE() __builtin_unreachable()
#elif defined(_MSC_VER)
#define GGML_UNREACHABLE() ((void) 0)
#endif
+#ifdef __cplusplus
+#define GGML_NORETURN [[noreturn]]
+#elif defined(_MSC_VER)
+#define GGML_NORETURN __declspec(noreturn)
+#else
+#define GGML_NORETURN _Noreturn
+#endif
+
+#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__)
+#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x)
+
// used to copy the number of elements and stride in bytes of tensors into local variables.
// main purpose is to reduce code duplication and improve readability.
//
extern "C" {
#endif
+ GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
+ GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
+
enum ggml_status {
GGML_STATUS_ALLOC_FAILED = -2,
GGML_STATUS_FAILED = -1,
GGML_CGRAPH_EVAL_ORDER_COUNT
};
+ typedef uint32_t ggml_bitset_t;
+
struct ggml_hash_set {
size_t size;
+ ggml_bitset_t * used;
struct ggml_tensor ** keys;
};
struct ggml_tensor ** grads;
struct ggml_tensor ** leafs;
- struct ggml_hash_set visited_hash_table;
+ struct ggml_hash_set visited_hash_set;
enum ggml_cgraph_eval_order order;
};
GGML_API int64_t ggml_cycles(void);
GGML_API int64_t ggml_cycles_per_ms(void);
- GGML_API void ggml_print_backtrace(void);
-
// accepts a UTF-8 path, even on Windows
GGML_API FILE * ggml_fopen(const char * fname, const char * mode);
// ggml_graph_plan() has to be called before ggml_graph_compute()
// when plan.work_size > 0, caller must allocate memory for plan.work_data
- GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
- GGML_API enum ggml_status ggml_graph_compute ( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
+ GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
+ GGML_API enum ggml_status ggml_graph_compute( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
// same as ggml_graph_compute() but the work data is allocated as a part of the context
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
if (talloc->offset + size > ggml_backend_buffer_get_size(talloc->buffer)) {
fprintf(stderr, "%s: not enough space in the buffer to allocate %s (needed %zu, available %zu)\n",
__func__, tensor->name, size, ggml_backend_buffer_get_size(talloc->buffer) - talloc->offset);
- GGML_ASSERT(!"not enough space in the buffer");
- return;
+ GGML_ABORT("not enough space in the buffer");
}
void * addr = (char *)ggml_backend_buffer_get_base(talloc->buffer) + talloc->offset;
return;
}
}
- GGML_ASSERT(!"out of allocated_tensors");
+ GGML_ABORT("out of allocated_tensors");
}
static void remove_allocated_tensor(struct ggml_dyn_tallocr * alloc, size_t offset, const struct ggml_tensor * tensor) {
for (int i = 0; i < 1024; i++) {
return;
}
}
- fprintf(stderr, "tried to free tensor %s not found\n", tensor->name);
- GGML_ASSERT(!"tensor not found");
+ GGML_ABORT("tried to free tensor %s not found\n", tensor->name);
}
#endif
// this should never happen
fprintf(stderr, "%s: not enough space in the buffer to allocate %zu bytes, largest block available %zu bytes\n",
__func__, size, max_avail);
- GGML_ASSERT(!"not enough space in the buffer");
- GGML_UNREACHABLE();
+ GGML_ABORT("not enough space in the buffer");
}
}
}
}
- free(galloc->hash_set.keys);
+ ggml_hash_set_free(&galloc->hash_set);
free(galloc->hash_values);
free(galloc->bufts);
free(galloc->buffers);
typedef struct ggml_gallocr * ggml_gallocr_t;
static struct hash_node * ggml_gallocr_hash_get(ggml_gallocr_t galloc, struct ggml_tensor * t) {
- size_t i = ggml_hash_find_or_insert(galloc->hash_set, t);
+ size_t i = ggml_hash_find_or_insert(&galloc->hash_set, t);
return &galloc->hash_values[i];
}
static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
// clear hash tables
- memset(galloc->hash_set.keys, 0, galloc->hash_set.size * sizeof(struct ggml_tensor *));
- memset(galloc->hash_values, 0, galloc->hash_set.size * sizeof(struct hash_node));
+ ggml_hash_set_reset(&galloc->hash_set);
+ memset(galloc->hash_values, 0, sizeof(struct hash_node) * galloc->hash_set.size);
// allocate leafs
// these may be tensors that the application is not using in the graph, but may still want to allocate for other purposes
}
bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
- size_t hash_size = graph->visited_hash_table.size;
+ size_t min_hash_size = graph->n_nodes + graph->n_leafs;
+ // add 25% margin to avoid hash collisions
+ min_hash_size += min_hash_size / 4;
// initialize hash table
- if (galloc->hash_set.size < hash_size) {
- free(galloc->hash_set.keys);
- free(galloc->hash_values);
- galloc->hash_set.size = hash_size;
- galloc->hash_set.keys = calloc(hash_size, sizeof(struct ggml_tensor *));
- galloc->hash_values = calloc(hash_size, sizeof(struct hash_node));
+ if (galloc->hash_set.size < min_hash_size) {
+ ggml_hash_set_free(&galloc->hash_set);
+ galloc->hash_set = ggml_hash_set_new(min_hash_size);
GGML_ASSERT(galloc->hash_set.keys != NULL);
+
+ free(galloc->hash_values);
+ galloc->hash_values = malloc(sizeof(struct hash_node) * galloc->hash_set.size);
GGML_ASSERT(galloc->hash_values != NULL);
- } else {
- // reset hash table
- memset(galloc->hash_set.keys, 0, sizeof(struct ggml_tensor *) * galloc->hash_set.size);
- memset(galloc->hash_values, 0, sizeof(struct hash_node) * galloc->hash_set.size);
}
// reset allocators
}
static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) {
- ggml_backend_buffer_type_t buft = talloc->buffer_id != -1 ? galloc->bufts[talloc->buffer_id] : NULL;
- size_t node_size = (node->data || node->view_src) ? 0 : ggml_backend_buft_get_alloc_size(buft, node);
+ size_t node_size = (node->data || node->view_src) ? 0 : ggml_backend_buft_get_alloc_size(galloc->bufts[talloc->buffer_id], node);
return talloc->size_max >= node_size;
}
ggml_backend_buffer_type_t bufts[GGML_SCHED_MAX_BACKENDS];
ggml_gallocr_t galloc;
- // hash keys of the nodes in the graph
- struct ggml_hash_set hash_set;
- // hash values
- int * tensor_backend_id;
- struct ggml_tensor * (* tensor_copies)[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES];
+ // hash map of the nodes in the graph
+ struct ggml_hash_set hash_set;
+ int * hv_tensor_backend_ids; // [hash_set.size]
+ struct ggml_tensor ** hv_tensor_copies; // [hash_set.size][n_backends][n_copies]
int * node_backend_ids; // [graph_size]
int * leaf_backend_ids; // [graph_size]
int * prev_leaf_backend_ids; // [graph_size]
// copy of the graph with modified inputs
- struct ggml_cgraph * graph;
+ struct ggml_cgraph graph;
// graph splits
struct ggml_backend_sched_split * splits;
ggml_backend_sched_eval_callback callback_eval;
void * callback_eval_user_data;
- bool debug;
+ char * context_buffer;
+ size_t context_buffer_size;
- // align context_buffer to GGML_MEM_ALIGN
-#ifdef _MSC_VER
- __declspec(align(GGML_MEM_ALIGN))
-#else
- __attribute__((aligned(GGML_MEM_ALIGN)))
-#endif
- char context_buffer[GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + sizeof(struct ggml_cgraph)];
+ bool debug;
};
-#define hash_id(tensor) ggml_hash_find_or_insert(sched->hash_set, tensor)
-#define tensor_backend_id(tensor) sched->tensor_backend_id[hash_id(tensor)]
+#define hash_id(tensor) ggml_hash_find_or_insert(&sched->hash_set, tensor)
+#define tensor_backend_id(tensor) sched->hv_tensor_backend_ids[hash_id(tensor)]
+#define tensor_id_copy(id, backend_id, copy_id) sched->hv_tensor_copies[(id) * sched->n_backends * sched->n_copies + (backend_id) * sched->n_copies + (copy_id)]
+#define tensor_copy(tensor, backend_id, copy_id) tensor_id_copy(hash_id(tensor), backend_id, copy_id)
// returns the priority of the backend, lower id is higher priority
static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backend_t backend) {
return cur_backend_id;
}
- // assign nodes that use weights to the backend of the weights
// operations with weights are preferably run on the same backend as the weights
for (int i = 0; i < GGML_MAX_SRC; i++) {
const struct ggml_tensor * src = tensor->src[i];
sched->is_reset = false;
struct ggml_init_params params = {
- /* .mem_size = */ sizeof(sched->context_buffer),
+ /* .mem_size = */ sched->context_buffer_size,
/* .mem_buffer = */ sched->context_buffer,
/* .no_alloc = */ true
};
sched->ctx = ggml_init(params);
if (sched->ctx == NULL) {
- fprintf(stderr, "%s: failed to initialize context\n", __func__);
- GGML_ASSERT(false);
+ GGML_ABORT("%s: failed to initialize context\n", __func__);
}
// pass 1: assign backends to ops with pre-allocated inputs
for (int i = 0; i < graph->n_leafs; i++) {
struct ggml_tensor * leaf = graph->leafs[i];
int * leaf_backend_id = &tensor_backend_id(leaf);
- if (*leaf_backend_id != -1) {
- // do not overwrite user assignments
- continue;
+ // do not overwrite user assignments
+ if (*leaf_backend_id == -1) {
+ *leaf_backend_id = ggml_backend_sched_backend_id_from_cur(sched, leaf);
}
- *leaf_backend_id = ggml_backend_sched_backend_id_from_cur(sched, leaf);
}
for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];
int * node_backend_id = &tensor_backend_id(node);
- if (*node_backend_id != -1) {
- // do not overwrite user assignments
- continue;
- }
- *node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node);
- // src
- for (int j = 0; j < GGML_MAX_SRC; j++) {
- struct ggml_tensor * src = node->src[j];
- if (src == NULL) {
+ // do not overwrite user assignments
+ if (*node_backend_id == -1) {
+ *node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node);
+
+#if 0
+ // src
+ if (node->op == GGML_OP_NONE) {
continue;
}
- int * src_backend_id = &tensor_backend_id(src);
- if (*src_backend_id == -1) {
- *src_backend_id = ggml_backend_sched_backend_id_from_cur(sched, src);
+
+ for (int j = 0; j < GGML_MAX_SRC; j++) {
+ struct ggml_tensor * src = node->src[j];
+ if (src == NULL) {
+ continue;
+ }
+ int * src_backend_id = &tensor_backend_id(src);
+ if (*src_backend_id == -1) {
+ *src_backend_id = ggml_backend_sched_backend_id_from_cur(sched, src);
+ }
}
+#endif
}
}
}
}
- // pass 4: split graph, find tensors that need to be copied
+ // pass 5: split graph, find tensors that need to be copied
{
int i_split = 0;
struct ggml_backend_sched_split * split = &sched->splits[0];
// find the backend of the first split, skipping view ops
- for (int i = 0; i < graph->n_nodes; i++) {
+ int i = 0;
+ for (; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];
if (!ggml_is_view_op(node->op)) {
split->backend_id = tensor_backend_id(node);
}
split->i_start = 0;
split->n_inputs = 0;
- memset(split->inputs, 0, sizeof(split->inputs)); //HACK
int cur_backend_id = split->backend_id;
- for (int i = 0; i < graph->n_nodes; i++) {
+ for (; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];
if (ggml_is_view_op(node->op)) {
const int node_backend_id = tensor_backend_id(node);
- GGML_ASSERT(node_backend_id != -1); // all nodes should be assigned by now
+ assert(node_backend_id != -1); // all nodes should be assigned by now
// check if we should start a new split based on the sources of the current node
bool need_new_split = false;
// by starting a new split, the memory of the previously offloaded weights can be reused
if (src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
int src_backend_id = tensor_backend_id(src);
- if (src_backend_id != -1 && src_backend_id != cur_backend_id) {
+ if (src_backend_id != cur_backend_id) {
need_new_split = true;
break;
}
// FIXME: count the number of inputs instead of only checking when full
if (split->n_inputs == GGML_SCHED_MAX_SPLIT_INPUTS) {
const size_t id = hash_id(src);
- int src_backend_id = sched->tensor_backend_id[id];
+ int src_backend_id = sched->hv_tensor_backend_ids[id];
bool supported = ggml_backend_sched_buffer_supported(sched, src, cur_backend_id);
- if (src_backend_id != cur_backend_id && sched->tensor_copies[hash_id(src)][cur_backend_id][0] == NULL && !supported) {
+ if (src_backend_id != cur_backend_id && tensor_id_copy(id, cur_backend_id, 0) == NULL && !supported) {
//printf("starting new split because of too many inputs: node %s, input %s\n", node->name, src->name);
need_new_split = true;
break;
continue;
}
- const int src_backend_id = tensor_backend_id(src);
+ size_t src_id = hash_id(src);
+ const int src_backend_id = sched->hv_tensor_backend_ids[src_id];
assert(src_backend_id != -1); // all inputs should be assigned by now
if (src->flags & GGML_TENSOR_FLAG_INPUT && sched->n_copies > 1) {
- size_t id = hash_id(src);
- if (sched->tensor_copies[id][src_backend_id][0] == NULL) {
+ if (tensor_id_copy(src_id, src_backend_id, 0) == NULL) {
ggml_backend_t backend = sched->backends[src_backend_id];
for (int c = 0; c < sched->n_copies; c++) {
struct ggml_tensor * tensor_copy;
ggml_set_input(tensor_copy);
ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
}
- sched->tensor_copies[id][src_backend_id][c] = tensor_copy;
+ tensor_id_copy(src_id, src_backend_id, c) = tensor_copy;
SET_CAUSE(tensor_copy, "4.cpy");
}
int n_graph_inputs = sched->n_graph_inputs++;
}
}
- bool supported = ggml_backend_sched_buffer_supported(sched, src, cur_backend_id);
- if (src_backend_id != cur_backend_id && !supported) {
+ if (src_backend_id != cur_backend_id && !ggml_backend_sched_buffer_supported(sched, src, cur_backend_id)) {
// create a copy of the input in the split's backend
- const size_t id = hash_id(src);
- if (sched->tensor_copies[id][cur_backend_id][0] == NULL) {
+ if (tensor_id_copy(src_id, cur_backend_id, 0) == NULL) {
ggml_backend_t backend = sched->backends[cur_backend_id];
for (int c = 0; c < sched->n_copies; c++) {
struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout(sched->ctx, src);
ggml_set_input(tensor_copy);
ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
}
- sched->tensor_copies[id][cur_backend_id][c] = tensor_copy;
+ tensor_id_copy(src_id, cur_backend_id, c) = tensor_copy;
SET_CAUSE(tensor_copy, "4.cpy");
}
int n_inputs = split->n_inputs++;
GGML_ASSERT(n_inputs < GGML_SCHED_MAX_SPLIT_INPUTS);
split->inputs[n_inputs] = src;
}
- node->src[j] = sched->tensor_copies[id][cur_backend_id][sched->cur_copy];
+ node->src[j] = tensor_id_copy(src_id, cur_backend_id, sched->cur_copy);
}
}
}
ggml_backend_sched_print_assignments(sched, graph);
}
- // swap node_backend_ids and leaf_backend_ids and prevs
+ // swap node_backend_ids and leaf _backend_ids with prevs
{
int * tmp = sched->node_backend_ids;
sched->node_backend_ids = sched->prev_node_backend_ids;
sched->prev_leaf_backend_ids = tmp;
}
- // create copies of the graph for each split
- // TODO: avoid this copy
- struct ggml_cgraph * graph_copy = ggml_new_graph_custom(sched->ctx, graph->n_nodes + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2, false);
+ int graph_size = graph->n_nodes + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2;
+ if (sched->graph.size < graph_size) {
+ sched->graph.size = graph_size;
+ sched->graph.nodes = realloc(sched->graph.nodes, graph_size * sizeof(struct ggml_tensor *));
+ sched->graph.leafs = realloc(sched->graph.leafs, graph_size * sizeof(struct ggml_tensor *));
+ GGML_ASSERT(sched->graph.nodes != NULL);
+ GGML_ASSERT(sched->graph.leafs != NULL);
+ }
+ sched->graph.n_nodes = 0;
+ sched->graph.n_leafs = 0;
+
+ struct ggml_cgraph * graph_copy = &sched->graph;
+
for (int i = 0; i < sched->n_splits; i++) {
struct ggml_backend_sched_split * split = &sched->splits[i];
split->graph = ggml_graph_view(graph, split->i_start, split->i_end);
struct ggml_tensor * input = split->inputs[j];
const size_t input_id = hash_id(input);
- struct ggml_tensor * input_cpy = sched->tensor_copies[input_id][split->backend_id][sched->cur_copy];
+ struct ggml_tensor * input_cpy = tensor_id_copy(input_id, split->backend_id, sched->cur_copy);
// add a dependency to the input source so that it is not freed before the copy is done
struct ggml_tensor * input_dep = ggml_view_tensor(sched->ctx, input);
input_dep->src[0] = input;
- sched->node_backend_ids[graph_copy->n_nodes] = sched->tensor_backend_id[input_id];
+ sched->node_backend_ids[graph_copy->n_nodes] = sched->hv_tensor_backend_ids[input_id];
graph_copy->nodes[graph_copy->n_nodes++] = input_dep;
// add a dependency to the input copy so that it is allocated at the start of the split
size_t id = hash_id(input);
int backend_id = tensor_backend_id(input);
for (int c = 0; c < sched->n_copies; c++) {
- struct ggml_tensor * input_cpy = sched->tensor_copies[id][backend_id][c];
+ struct ggml_tensor * input_cpy = tensor_id_copy(id, backend_id, c);
sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id;
graph_copy->leafs[graph_copy->n_leafs++] = input_cpy;
}
struct ggml_tensor * input = split->inputs[j];
size_t id = hash_id(input);
for (int c = 0; c < sched->n_copies; c++) {
- struct ggml_tensor * input_cpy = sched->tensor_copies[id][backend_id][c];
+ struct ggml_tensor * input_cpy = tensor_id_copy(id, backend_id, c);
sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id;
graph_copy->leafs[graph_copy->n_leafs++] = input_cpy;
}
sched->leaf_backend_ids[graph_copy->n_leafs] = tensor_backend_id(leaf);
graph_copy->leafs[graph_copy->n_leafs++] = leaf;
}
-
- sched->graph = graph_copy;
}
static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
bool backend_ids_changed = false;
- for (int i = 0; i < sched->graph->n_nodes; i++) {
+ for (int i = 0; i < sched->graph.n_nodes; i++) {
if (sched->node_backend_ids[i] != sched->prev_node_backend_ids[i] &&
sched->bufts[sched->node_backend_ids[i]] != sched->bufts[sched->prev_node_backend_ids[i]]) {
backend_ids_changed = true;
}
}
if (!backend_ids_changed) {
- for (int i = 0; i < sched->graph->n_leafs; i++) {
+ for (int i = 0; i < sched->graph.n_leafs; i++) {
if (sched->leaf_backend_ids[i] != sched->prev_leaf_backend_ids[i] &&
sched->bufts[sched->leaf_backend_ids[i]] != sched->bufts[sched->prev_leaf_backend_ids[i]]) {
backend_ids_changed = true;
}
// allocate graph
- if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) {
+ if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
// the re-allocation may cause the split inputs to be moved to a different address
ggml_backend_sched_synchronize(sched);
#ifndef NDEBUG
- fprintf(stderr, "%s: failed to allocate graph, reserving\n", __func__);
+ fprintf(stderr, "%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
#endif
- ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids, sched->leaf_backend_ids);
- if (!ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) {
+ ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids);
+ if (!ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
fprintf(stderr, "%s: failed to allocate graph\n", __func__);
return false;
}
for (int j = 0; j < split->n_inputs; j++) {
ggml_backend_t input_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[j]);
struct ggml_tensor * input = split->inputs[j];
- struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split_backend_id][sched->cur_copy];
+ struct ggml_tensor * input_cpy = tensor_copy(input, split_backend_id, sched->cur_copy);
if (input->flags & GGML_TENSOR_FLAG_INPUT) {
// inputs from the user must be copied immediately to prevent the user overwriting the data before the copy is done
struct ggml_backend_sched * sched = calloc(1, sizeof(struct ggml_backend_sched));
sched->debug = getenv("GGML_SCHED_DEBUG") != NULL;
+ sched->n_backends = n_backends;
+ sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;
// initialize hash table
- sched->hash_set = ggml_hash_set_new(graph_size);
- sched->tensor_backend_id = calloc(sched->hash_set.size, sizeof(sched->tensor_backend_id[0]));
- sched->tensor_copies = calloc(sched->hash_set.size, sizeof(sched->tensor_copies[0]));
+ // FIXME: needs to be size*2 to account for leafs (do it in graph_split instead)
+ sched->hash_set = ggml_hash_set_new(graph_size);
+ sched->hv_tensor_backend_ids = malloc(sched->hash_set.size * sizeof(sched->hv_tensor_backend_ids[0]));
+ sched->hv_tensor_copies = malloc(sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *));
const size_t nodes_size = graph_size + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2;
- sched->node_backend_ids = calloc(nodes_size, sizeof(sched->node_backend_ids[0]));
- sched->leaf_backend_ids = calloc(nodes_size, sizeof(sched->leaf_backend_ids[0]));
+ sched->node_backend_ids = calloc(nodes_size, sizeof(sched->node_backend_ids[0]));
+ sched->leaf_backend_ids = calloc(nodes_size, sizeof(sched->leaf_backend_ids[0]));
sched->prev_node_backend_ids = calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
sched->prev_leaf_backend_ids = calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0]));
- sched->n_backends = n_backends;
-
- sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;
+ sched->context_buffer_size = GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false);
+ sched->context_buffer = malloc(sched->context_buffer_size);
const int initial_splits_capacity = 16;
sched->splits = calloc(initial_splits_capacity, sizeof(sched->splits[0]));
}
ggml_gallocr_free(sched->galloc);
ggml_free(sched->ctx);
+ ggml_hash_set_free(&sched->hash_set);
free(sched->splits);
- free(sched->hash_set.keys);
- free(sched->tensor_backend_id);
- free(sched->tensor_copies);
+ free(sched->hv_tensor_backend_ids);
+ free(sched->hv_tensor_copies);
free(sched->node_backend_ids);
free(sched->leaf_backend_ids);
free(sched->prev_node_backend_ids);
free(sched->prev_leaf_backend_ids);
+ free(sched->context_buffer);
+ free(sched->graph.nodes);
+ free(sched->graph.leafs);
free(sched);
}
void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
// reset state for the next run
if (!sched->is_reset) {
- size_t hash_size = sched->hash_set.size;
- memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT
- memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size);
- memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
-
+ ggml_hash_set_reset(&sched->hash_set);
+ memset(sched->hv_tensor_backend_ids, -1, sched->hash_set.size * sizeof(sched->hv_tensor_backend_ids[0]));
+ memset(sched->hv_tensor_copies, 0, sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *));
sched->is_reset = true;
}
sched->is_alloc = false;
}
bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) {
- GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes);
+ GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs);
ggml_backend_sched_split_graph(sched, measure_graph);
- // TODO: extract this to a separate function
- if (!ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) {
+ if (!ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) {
return false;
}
}
bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
- GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes);
+ GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + graph->n_leafs);
ggml_backend_sched_split_graph(sched, graph);
+
if (!ggml_backend_sched_alloc_splits(sched)) {
return false;
}
GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
tensor_backend_id(node) = backend_index;
SET_CAUSE(node, "usr");
+ sched->is_reset = false;
}
ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node) {
GGML_ASSERT(src != NULL);
GGML_ASSERT(src->data && "graph must be allocated");
- size_t id = ggml_hash_insert(hash_set, src);
- if (id == GGML_HASHTABLE_ALREADY_EXISTS) {
- return node_copies[ggml_hash_find(hash_set, src)];
+ size_t id = ggml_hash_insert(&hash_set, src);
+ if (id == GGML_HASHSET_ALREADY_EXISTS) {
+ return node_copies[ggml_hash_find(&hash_set, src)];
}
struct ggml_tensor * dst = ggml_dup_tensor_layout(src->data && !src->view_src ? ctx_allocated : ctx_unallocated, src);
return dst;
}
-static void graph_copy_init_tensor(struct ggml_hash_set hash_set, struct ggml_tensor ** node_copies, bool * node_init, struct ggml_tensor * src) {
+static void graph_copy_init_tensor(struct ggml_hash_set * hash_set, struct ggml_tensor ** node_copies, bool * node_init, struct ggml_tensor * src) {
size_t id = ggml_hash_find(hash_set, src);
if (node_init[id]) {
return;
}
struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph) {
- struct ggml_hash_set hash_set = {
- /* .size = */ graph->visited_hash_table.size,
- /* .keys = */ calloc(graph->visited_hash_table.size, sizeof(hash_set.keys[0])) // NOLINT
- };
+ struct ggml_hash_set hash_set = ggml_hash_set_new(graph->visited_hash_set.size);
struct ggml_tensor ** node_copies = calloc(hash_set.size, sizeof(node_copies[0])); // NOLINT
bool * node_init = calloc(hash_set.size, sizeof(node_init[0]));
if (ctx_allocated == NULL || ctx_unallocated == NULL) {
fprintf(stderr, "failed to allocate context for graph copy\n");
- free(hash_set.keys);
+ ggml_hash_set_free(&hash_set);
free(node_copies);
free(node_init);
ggml_free(ctx_allocated);
ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(ctx_allocated, backend);
if (buffer == NULL) {
fprintf(stderr, "failed to allocate buffer for graph copy\n");
- free(hash_set.keys);
+ ggml_hash_set_free(&hash_set);
free(node_copies);
free(node_init);
ggml_free(ctx_allocated);
// copy data and init views
for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];
- graph_copy_init_tensor(hash_set, node_copies, node_init, node);
+ graph_copy_init_tensor(&hash_set, node_copies, node_init, node);
}
// build graph copy
struct ggml_cgraph * graph_copy = ggml_new_graph_custom(ctx_allocated, graph->size, false);
for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];
- struct ggml_tensor * node_copy = node_copies[ggml_hash_find(hash_set, node)];
+ struct ggml_tensor * node_copy = node_copies[ggml_hash_find(&hash_set, node)];
graph_copy->nodes[i] = node_copy;
}
graph_copy->n_nodes = graph->n_nodes;
- free(hash_set.keys);
+ ggml_hash_set_free(&hash_set);
free(node_copies);
free(node_init);
break;
default:
- fprintf(stderr, "%s: unsupported op %s\n", __func__, ggml_op_desc(node));
- GGML_ASSERT(false);
+ GGML_ABORT("%s: unsupported op %s\n", __func__, ggml_op_desc(node));
}
}
file, line);
GGML_CANN_LOG_ERROR(" %s\n", stmt);
// abort with GGML_ASSERT to get a stack trace
- GGML_ASSERT(!"CANN error");
+ GGML_ABORT("CANN error");
}
/**
// memory should always buffered. these memory may still needed by
// tasks in stream.
// TODO, fix me.
- GGML_ASSERT(!"Cann buffer pool full, increase MAX_CANN_BUFFERS\n");
+ GGML_ABORT("Cann buffer pool full, increase MAX_CANN_BUFFERS\n");
}
};
ACL_CHECK(aclrtStreamWaitEvent(cann_ctx->stream(),
(aclrtEvent)event->context));
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
ggml_cann_max_pool2d(ctx, dst);
break;
case GGML_OP_POOL_COUNT:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
break;
}
}
((ggml_tensor*)dst->extra)->nb);
return;
}
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
if (dst->type == GGML_TYPE_F32) {
if (ggml_are_same_shape(src, dst)) {
((ggml_tensor*)dst->extra)->nb);
return;
}
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
// TODO
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
} else if (src->type == GGML_TYPE_F32) {
// TODO: if (src0->type == dst->type && ne00 == ne0 && nb00 == type_size
// && nb0 == type_size)
((ggml_tensor*)dst->extra)->nb);
return;
}
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
} else {
// TODO: dst not contiguous
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
if (dst->type == GGML_TYPE_F16) {
((ggml_tensor*)dst->extra)->nb);
return;
}
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
// TODO
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
} else {
if (ggml_are_same_shape(src, dst)) {
cann_copy(ctx, acl_src, acl_dst);
ACL_CHECK(aclDestroyTensor(acl_dst));
return;
}
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
((ggml_tensor*)dst->extra)->nb);
break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
break;
}
}
ggml_cann_mul_mat_q8_0(ctx, dst);
break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
break;
}
}
GGML_CUDA_LOG_ERROR(" current device: %d, in function %s at %s:%d\n", id, func, file, line);
GGML_CUDA_LOG_ERROR(" %s\n", stmt);
// abort with GGML_ASSERT to get a stack trace
- GGML_ASSERT(!"CUDA error");
+ GGML_ABORT("CUDA error");
}
// this is faster on Windows
CUDA_CHECK(ggml_cuda_cpy_tensor_2d(
src1_ddf_i, src1, i03, i02, src1_col_0, src1_col_0+src1_ncols, stream));
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
if (quantize_src1 && !src1_is_contiguous) {
CUDA_CHECK(cudaLaunchHostFunc(cuda_ctx->stream(), wait_fn, event));
#endif
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
} else if (order == GGML_SORT_ORDER_DESC) {
k_argsort_f32_i32<GGML_SORT_ORDER_DESC><<<block_nums, block_dims, shared_mem, stream>>>(x, dst, ncols, ncols_pad);
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
} else {
fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__,
ggml_type_name(dst->type), ggml_type_name(src0->type), ggml_type_name(src1->type));
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
#ifdef __CUDA_ARCH__
#define NO_DEVICE_CODE no_device_code(__FILE__, __LINE__, __FUNCTION__, __CUDA_ARCH__, STRINGIZE(__CUDA_ARCH_LIST__))
#else
-#define NO_DEVICE_CODE //GGML_ASSERT(false && "NO_DEVICE_CODE not valid in host code.")
+#define NO_DEVICE_CODE //GGML_ABORT("NO_DEVICE_CODE not valid in host code.")
#endif // __CUDA_ARCH__
static __device__ __forceinline__ float warp_reduce_sum(float x) {
} else {
fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__,
ggml_type_name(src0->type), ggml_type_name(src1->type));
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
} else {
fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__,
ggml_type_name(src0->type), ggml_type_name(src1->type));
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
convert_mul_mat_vec_f16_cuda(src0_dd_i, src1_dfloat, dst_dd_i, ne00, row_diff, stream);
break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
break;
}
fprintf(stderr, "Unsupported KV type combination for head_size 64.\n");
fprintf(stderr, "By default only f16 KV cache is supported.\n");
fprintf(stderr, "Compile with GGML_CUDA_FA_ALL_QUANTS for V cache quantization support.\n");
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
} else if (D == 128) {
fprintf(stderr, "Unsupported KV type combination for head_size 128.\n");
fprintf(stderr, "Supported combinations:\n");
fprintf(stderr, " - K == q8_0, V == q8_0, 8.50 BPV\n");
fprintf(stderr, " - K == f16, V == f16, 16.00 BPV\n");
fprintf(stderr, "Compile with GGML_CUDA_FA_ALL_QUANTS for all combinations of q4_0, q4_1, q5_0, q5_1, q8_0, and f16.\n");
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
} else {
fprintf(stderr, "Unsupported KV type combination for head_size 256.\n");
fprintf(stderr, "Only f16 is supported.\n");
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
launch_fattn<D, parallel_blocks>(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true);
} break;
default: {
- GGML_ASSERT(false && "FlashAttention without tensor cores only supports head sizes 64 and 128.");
+ GGML_ABORT("FlashAttention without tensor cores only supports head sizes 64 and 128.");
} break;
}
}
launch_fattn<D, parallel_blocks>(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true);
} break;
default: {
- GGML_ASSERT(false && "FlashAttention without tensor cores only supports head sizes 64 and 128.");
+ GGML_ABORT("FlashAttention without tensor cores only supports head sizes 64 and 128.");
} break;
}
}
ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, float>(ctx, dst);
break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
break;
}
} else {
// ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst);
// break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
break;
}
}
ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst);
break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
break;
}
return;
ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst);
break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
break;
}
return;
ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst);
break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
break;
}
}
break;
default:
// TODO: k-quants
- fprintf(stderr, "%s: unsupported type: %s\n", __func__, ggml_type_name(src0->type));
- GGML_ASSERT(false);
+ GGML_ABORT("%s: unsupported type: %s\n", __func__, ggml_type_name(src0->type));
break;
}
}
mul_mat_q_case<GGML_TYPE_IQ4_NL>(ctx, args, stream);
break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
break;
}
case GGML_TYPE_IQ4_NL:
return MMQ_Q8_1_DS_LAYOUT_D4;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
break;
}
}
break;
default:
fprintf(stderr, "mmq_x_best=%d\n", mmq_x_best);
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
break;
}
}
rows_per_cuda_block = 2;
break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
break;
}
}
mul_mat_vec_q<type, 8><<<block_nums, block_dims, 0, stream>>>(vx, vy, dst, ncols_x, nrows_x, nrows_y, nrows_dst);
break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
break;
}
}
mul_mat_vec_iq3_s_q8_1_cuda(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_padded_row_size, src1_ncols, nrows_dst, stream);
break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
break;
}
<<<num_blocks, block_size, 0, stream>>>(x, vy, kx0, kx1, kx0_padded);
break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
break;
}
}
attn_factor, corr_dims, freq_factors, stream
);
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} else {
if (src0->type == GGML_TYPE_F32) {
attn_factor, corr_dims, freq_factors, stream
);
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
}
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
#endif
-#define GGML_HASHTABLE_FULL ((size_t)-1)
-#define GGML_HASHTABLE_ALREADY_EXISTS ((size_t)-2)
+// bitset
+
+static_assert(sizeof(ggml_bitset_t) == 4, "bitset_t constants must be updated");
+#define BITSET_SHR 5 // log2(sizeof(ggml_bitset_t)*8)
+#define BITSET_MASK (sizeof(ggml_bitset_t)*8 - 1)
+
+static size_t ggml_bitset_size(size_t n) {
+ return (n + BITSET_MASK) >> BITSET_SHR;
+}
+
+static inline bool ggml_bitset_get(const ggml_bitset_t * bitset, size_t i) {
+ return !!(bitset[i >> BITSET_SHR] & (1u << (i & BITSET_MASK)));
+}
+
+static inline void ggml_bitset_set(ggml_bitset_t * bitset, size_t i) {
+ bitset[i >> BITSET_SHR] |= (1u << (i & BITSET_MASK));
+}
+
+static inline void ggml_bitset_clear(ggml_bitset_t * bitset, size_t i) {
+ bitset[i >> BITSET_SHR] &= ~(1u << (i & BITSET_MASK));
+}
+
+// hash set
+
+#define GGML_HASHSET_FULL ((size_t)-1)
+#define GGML_HASHSET_ALREADY_EXISTS ((size_t)-2)
struct ggml_hash_set ggml_hash_set_new(size_t size);
+void ggml_hash_set_free(struct ggml_hash_set * hash_set);
+
+// returns the minimum size for a hash set that can hold min_sz elements
+size_t ggml_hash_size(size_t min_sz);
-bool ggml_hash_contains (const struct ggml_hash_set hash_set, struct ggml_tensor * key);
+// remove all elements from the hash set
+void ggml_hash_set_reset(struct ggml_hash_set * hash_set);
-// returns GGML_HASHTABLE_FULL if table is full, otherwise the current index of the key or where it should be inserted
-size_t ggml_hash_find (const struct ggml_hash_set hash_set, struct ggml_tensor * key);
+// returns true if key is in the hash set
+static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key);
-// returns GGML_HASHTABLE_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full
-size_t ggml_hash_insert ( struct ggml_hash_set hash_set, struct ggml_tensor * key);
+// returns GGML_HASHSET_FULL if table is full, otherwise the current index of the key or where it should be inserted
+static size_t ggml_hash_find(const struct ggml_hash_set * hash_set, struct ggml_tensor * key);
+
+// returns GGML_HASHSET_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full
+static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key);
// return index, asserts if table is full
-size_t ggml_hash_find_or_insert( struct ggml_hash_set hash_set, struct ggml_tensor * key);
+static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key);
+
+// hash function for ggml_tensor
+static inline size_t ggml_hash(const struct ggml_tensor * p) {
+ // the last 4 bits are always zero due to alignment
+ return (size_t)(uintptr_t)p >> 4;
+}
+
+static size_t ggml_hash_find(const struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
+ size_t h = ggml_hash(key) % hash_set->size;
+
+ // linear probing
+ size_t i = h;
+ while (ggml_bitset_get(hash_set->used, i) && hash_set->keys[i] != key) {
+ i = (i + 1) % hash_set->size;
+ if (i == h) {
+ // visited all hash table entries -> not found
+ return GGML_HASHSET_FULL;
+ }
+ }
+ return i;
+}
+
+static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
+ size_t i = ggml_hash_find(hash_set, key);
+ return i != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, i);
+}
+
+static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
+ size_t h = ggml_hash(key) % hash_set->size;
+
+ // linear probing
+ size_t i = h;
+ do {
+ if (!ggml_bitset_get(hash_set->used, i)) {
+ ggml_bitset_set(hash_set->used, i);
+ hash_set->keys[i] = key;
+ return i;
+ }
+ if (hash_set->keys[i] == key) {
+ return GGML_HASHSET_ALREADY_EXISTS;
+ }
+ i = (i + 1) % hash_set->size;
+ } while (i != h);
+
+ // visited all hash table entries -> not found
+ GGML_ABORT("fatal error");
+}
+
+static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
+ size_t h = ggml_hash(key) % hash_set->size;
+
+ // linear probing
+ size_t i = h;
+ do {
+ if (!ggml_bitset_get(hash_set->used, i)) {
+ ggml_bitset_set(hash_set->used, i);
+ hash_set->keys[i] = key;
+ return i;
+ }
+ if (hash_set->keys[i] == key) {
+ return i;
+ }
+ i = (i + 1) % hash_set->size;
+ } while (i != h);
+
+ // visited all hash table entries -> not found
+ GGML_ABORT("fatal error");
+}
#ifdef __cplusplus
}
}
if ((a % b) != 0) {
fprintf(stderr, "((%u %% %u) == %u) != 0\n", a, b, a % b);
- GGML_ASSERT(!"safe_divide result would've had remainder");
+ GGML_ABORT("safe_divide result would've had remainder");
}
return a / b;
}
if (!ggml_vk_supports_op(dst)) {
fprintf(stderr, "%s: error: unsupported op '%s'\n", __func__, ggml_op_desc(dst));
- GGML_ASSERT(!"unsupported op");
+ GGML_ABORT("unsupported op");
}
const int32_t ne00 = src0 ? src0->ne[0] : 0;
default:
{
fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
} break;
continue;
not_implemented: {}
fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
- //GGML_ASSERT(false);
+ //GGML_ABORT("fatal error");
}
// Evaluate sequence
NSError * error = nil;
if (![[MTLCaptureManager sharedCaptureManager] startCaptureWithDescriptor:descriptor error:&error]) {
GGML_METAL_LOG_ERROR("%s: error: unable to start capture '%s'\n", __func__, [[error localizedDescription] UTF8String]);
- GGML_ASSERT(!"capture failed");
+ GGML_ABORT("capture failed");
}
}
if (!ggml_metal_supports_op(ctx, dst)) {
GGML_METAL_LOG_ERROR("%s: error: unsupported op '%s'\n", __func__, ggml_op_desc(dst));
- GGML_ASSERT(!"unsupported op");
+ GGML_ABORT("unsupported op");
}
if (should_capture) {
case GGML_OP_ADD: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ADD_ROW].pipeline; break;
case GGML_OP_MUL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_ROW].pipeline; break;
case GGML_OP_DIV: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_DIV_ROW].pipeline; break;
- default: GGML_ASSERT(false);
+ default: GGML_ABORT("fatal error");
}
bcast_row = true;
case GGML_OP_ADD: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ADD].pipeline; break;
case GGML_OP_MUL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL].pipeline; break;
case GGML_OP_DIV: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_DIV].pipeline; break;
- default: GGML_ASSERT(false);
+ default: GGML_ABORT("fatal error");
}
}
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_REPEAT_F16].pipeline; break;
case GGML_TYPE_I32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_REPEAT_I32].pipeline; break;
case GGML_TYPE_I16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_REPEAT_I16].pipeline; break;
- default: GGML_ASSERT(false);
+ default: GGML_ABORT("fatal error");
}
[encoder setComputePipelineState:pipeline];
default:
{
GGML_METAL_LOG_WARN("%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} break;
case GGML_OP_SQR:
case GGML_TYPE_IQ1_M: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_IQ1_M_F32 ].pipeline; break;
case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_IQ4_NL_F32 ].pipeline; break;
case GGML_TYPE_IQ4_XS: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_IQ4_XS_F32 ].pipeline; break;
- default: GGML_ASSERT(false && "MUL MAT-MAT not implemented");
+ default: GGML_ABORT("MUL MAT-MAT not implemented");
}
[encoder setComputePipelineState:pipeline];
default:
{
GGML_METAL_LOG_ERROR("Asserting on type %d\n", (int)src0t);
- GGML_ASSERT(false && "not implemented");
+ GGML_ABORT("not implemented");
}
};
case GGML_TYPE_IQ1_M: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ1_M_F32 ].pipeline; break;
case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ4_NL_F32 ].pipeline; break;
case GGML_TYPE_IQ4_XS: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_IQ4_XS_F32 ].pipeline; break;
- default: GGML_ASSERT(false && "MUL_MAT_ID not implemented");
+ default: GGML_ABORT("MUL_MAT_ID not implemented");
}
[encoder setComputePipelineState:pipeline];
default:
{
GGML_METAL_LOG_ERROR("Asserting on type %d\n", (int)src2t);
- GGML_ASSERT(false && "not implemented");
+ GGML_ABORT("not implemented");
}
};
case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ4_NL ].pipeline; break;
case GGML_TYPE_IQ4_XS: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ4_XS ].pipeline; break;
case GGML_TYPE_I32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_GET_ROWS_I32 ].pipeline; break;
- default: GGML_ASSERT(false && "not implemented");
+ default: GGML_ABORT("not implemented");
}
[encoder setComputePipelineState:pipeline];
switch (src0->type) {
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ROPE_NORM_F32].pipeline; break;
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ROPE_NORM_F16].pipeline; break;
- default: GGML_ASSERT(false);
+ default: GGML_ABORT("fatal error");
};
} else {
switch (src0->type) {
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ROPE_NEOX_F32].pipeline; break;
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ROPE_NEOX_F16].pipeline; break;
- default: GGML_ASSERT(false);
+ default: GGML_ABORT("fatal error");
};
}
switch (dst->type) {
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_IM2COL_F32].pipeline; break;
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_IM2COL_F16].pipeline; break;
- default: GGML_ASSERT(false);
+ default: GGML_ABORT("fatal error");
};
[encoder setComputePipelineState:pipeline];
switch (order) {
case GGML_SORT_ORDER_ASC: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_ASC].pipeline; break;
case GGML_SORT_ORDER_DESC: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_DESC].pipeline; break;
- default: GGML_ASSERT(false);
+ default: GGML_ABORT("fatal error");
};
[encoder setComputePipelineState:pipeline];
{
GGML_METAL_LOG_ERROR("unsupported size: %lld\n", ne00);
GGML_METAL_LOG_ERROR("add template specialization for this size\n");
- GGML_ASSERT(false && "add template specialization for this size");
+ GGML_ABORT("add template specialization for this size");
}
}
} else {
{
GGML_METAL_LOG_ERROR("unsupported size: %lld\n", ne00);
GGML_METAL_LOG_ERROR("add template specialization for this size\n");
- GGML_ASSERT(false && "add template specialization for this size");
+ GGML_ABORT("add template specialization for this size");
}
}
}
case GGML_TYPE_Q5_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_0].pipeline; break;
case GGML_TYPE_Q5_1: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_Q5_1].pipeline; break;
case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F32_IQ4_NL].pipeline; break;
- default: GGML_ASSERT(false && "not implemented");
+ default: GGML_ABORT("not implemented");
};
} break;
case GGML_TYPE_F16:
switch (dstt) {
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F16_F32].pipeline; break;
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CPY_F16_F16].pipeline; break;
- default: GGML_ASSERT(false && "not implemented");
+ default: GGML_ABORT("not implemented");
};
} break;
- default: GGML_ASSERT(false && "not implemented");
+ default: GGML_ABORT("not implemented");
}
[encoder setComputePipelineState:pipeline];
default:
{
GGML_METAL_LOG_ERROR("%s: error: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
printf("Oops: found point %u not on grid:", u);
for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]);
printf("\n");
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
q2[2*ib+0] |= ((uint32_t) grid_index << 8*k);
q2[2*ib+1] |= (block_signs[k] << 7*k);
printf("Oops: found point %u not on grid:", u);
for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]);
printf("\n");
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
q2[2*ib+k] = grid_index | (block_signs[k] << 9);
}
printf("Oops: found point %u not on grid:", u);
for (int i = 0; i < 4; ++i) printf(" %d", L[4*k+i]);
printf("\n");
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
if (grid_size == 256) {
q3[8*ib+k] = grid_index;
printf("Oops: found point %u not on grid:", u);
for (int i = 0; i < 4; ++i) printf(" %d", L[4*k+i]);
printf("\n");
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
qs[k] = grid_index & 255;
qh[(ib*bs4+k)/8] |= ((grid_index >> 8) << ((ib*bs4+k)%8));
printf("Oops: found point %u not on grid:", u);
for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]);
printf("\n");
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
const int i8 = 2*ib + k;
y[ibl].qs[i8] = grid_index & 255;
}
if (nbytes % ggml_type_size(type) != 0) {
- fprintf(stderr, "%s: invalid size %zu for type %d\n", __func__, nbytes, type);
+ fprintf(stderr, "%s: invalid size %zu for type %s (type size = %zu)\n", __func__, nbytes, ggml_type_name(type), ggml_type_size(type));
return false;
}
});
});
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
// GGML_SYCL_DEBUG("current device index %d\n", id);
src_ptr = (char *) extra->data_device[id];
} else {
- // GGML_SYCL_DEBUG("GGML_ASSERT(false)\n");
- GGML_ASSERT(false);
+ // GGML_SYCL_DEBUG("GGML_ABORT("fatal error")\n");
+ GGML_ABORT("fatal error");
}
char * dst_ptr = (char *) dst;
default:
// TODO: k-quants
fprintf(stderr, "%s: unsupported type: %s\n", __func__, ggml_type_name(src0->type));
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
break;
}
}
} else {
fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__,
ggml_type_name(dst->type), ggml_type_name(src0->type), ggml_type_name(src1->type));
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
case GGML_TYPE_Q6_K:
return 64;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
SYCL_CHECK(ggml_sycl_cpy_tensor_2d(
src1_ddf_i, src1, i03, i02, src1_col_0, src1_col_0+src1_ncols, stream));
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
if (convert_src1_to_q8_1 && !src1_is_contiguous) {
} else {
fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__,
ggml_type_name(src0->type), ggml_type_name(src1->type));
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
(void) dst;
const char* msg) {
fprintf(stderr, "SYCL error: %s: %s\n", stmt, msg);
fprintf(stderr, " in function %s at %s:%d\n", func, file, line);
- GGML_ASSERT(!"SYCL error");
+ GGML_ABORT("SYCL error");
}
#define SYCL_CHECK(err) \
break;
default:
printf("ggml_sycl_op_dequantize_mul_mat_vec unsupported GGML_TYPE %d\n", src0->type);
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
break;
}
if (backend == "opencl:cpu") return 4;
if (backend == "opencl:acc") return 5;
printf("convert_backend_index: can't handle backend=%s\n", backend.c_str());
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
static bool compare_backend(std::string &backend1, std::string &backend2) {
return convert_backend_index(backend1) < convert_backend_index(backend2);
mmq_y = MMQ_Y_Q4_0_PASCAL;
nwarps = NWARPS_Q4_0_PASCAL;
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
mmq_y = MMQ_Y_Q4_1_PASCAL;
nwarps = NWARPS_Q4_1_PASCAL;
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
mmq_y = MMQ_Y_Q5_0_PASCAL;
nwarps = NWARPS_Q5_0_PASCAL;
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
mmq_y = MMQ_Y_Q5_1_PASCAL;
nwarps = NWARPS_Q5_1_PASCAL;
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
mmq_y = MMQ_Y_Q8_0_PASCAL;
nwarps = NWARPS_Q8_0_PASCAL;
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
mmq_y = MMQ_Y_Q2_K_PASCAL;
nwarps = NWARPS_Q2_K_PASCAL;
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
mmq_y = MMQ_Y_Q3_K_PASCAL;
nwarps = NWARPS_Q3_K_PASCAL;
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
mmq_y = MMQ_Y_Q4_K_PASCAL;
nwarps = NWARPS_Q4_K_PASCAL;
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
mmq_y = MMQ_Y_Q5_K_PASCAL;
nwarps = NWARPS_Q5_K_PASCAL;
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
mmq_y = MMQ_Y_Q6_K_PASCAL;
nwarps = NWARPS_Q6_K_PASCAL;
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
const int block_num_x = (nrows_x + mmq_y - 1) / mmq_y;
ggml_mul_mat_q6_K_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, src1_ncols, src1_padded_row_size, nrows_dst, stream);
break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
break;
}
mul_mat_vec_iq4_xs_q8_1_sycl(src0_dd_i, src1_ddq_i_bs, dst_dd_i_bs, ne00, row_diff, stream);
break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
break;
}
}
attn_factor, corr_dims, freq_factors, main_stream
);
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} else {
if (src0->type == GGML_TYPE_F32) {
attn_factor, corr_dims, freq_factors, main_stream
);
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
// Make sure at least one device exists
if (devices.empty()) {
std::cerr << "ggml_vulkan: Error: No devices found." << std::endl;
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
// Default to using all dedicated GPUs
// Buffer is already mapped
if(dst->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) {
std::cerr << "ggml_vulkan: buffer_write_nc_async dst buffer is host_visible. Use synchronous write." << std::endl;
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
// Check if src is pinned memory
vk_buffer buf;
staging = ctx->device->sync_staging;
staging_offset = 0;
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
// Buffer is already mapped
if(dst->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) {
std::cerr << "ggml_vulkan: buffer_write_async dst buffer is host_visible. Use synchronous write." << std::endl;
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
// Check if src is pinned memory
vk_buffer buf = nullptr;
staging_buffer = dst->device->sync_staging;
staging_offset = 0;
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
staging_buffer = src->device->sync_staging;
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
}
std::cerr << "Missing CPY op for types: " << ggml_type_name(from) << " " << ggml_type_name(to) << std::endl;
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
static void ggml_vk_cpy_to_contiguous(ggml_backend_vk_context * ctx, vk_context * subctx, vk_pipeline pipeline, const ggml_tensor * tensor, vk_subbuffer&& in, vk_subbuffer&& out) {
const bool qy_needs_dequant = (src1->type != GGML_TYPE_F16 && !y_f32_kernel) || y_non_contig;
if (mmp == nullptr) {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
// Not implemented
std::cerr << " and " << ggml_type_name(src1->type);
}
std::cerr << " to " << ggml_type_name(dst->type) << std::endl;
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
op_func(ctx, subctx, src0, src1, dst);
} else if (type == GGML_TYPE_F16) {
val = ggml_fp16_to_fp32(*((const ggml_fp16_t *) data + i2*ne1*ne0 + idx1*ne0 + idx0));
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
fprintf(stderr, "% 7.2f ", val);
} else {
p = ctx->device->pipeline_matmul_f16->a_s;
shname = "F16_ALIGNED_S";
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} else if (shader_size == 1) {
if (std::is_same<float, X_TYPE>() && std::is_same<float, Y_TYPE>()) {
p = ctx->device->pipeline_matmul_f16->a_m;
shname = "F16_ALIGNED_M";
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} else if (shader_size == 2) {
if (std::is_same<float, X_TYPE>() && std::is_same<float, Y_TYPE>()) {
p = ctx->device->pipeline_matmul_f16->a_l;
shname = "F16_ALIGNED_L";
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} else {
GGML_ASSERT(0);
} else if (std::is_same<ggml_fp16_t, X_TYPE>()) {
x[i] = ggml_fp32_to_fp16((rand() / (float)RAND_MAX) * 2.0f - 1.0f);
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
for (size_t i = 0; i < y_ne; i++) {
// y[i] = ggml_fp32_to_fp16((rand() / (float)RAND_MAX) * 2.0f - 1.0f);
y[i] = ggml_fp32_to_fp16((i % k == i / k) ? 1.0f : 0.0f);
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
} else if (std::is_same<ggml_fp16_t, X_TYPE>()) {
src0_type = GGML_TYPE_F16;
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
if (std::is_same<float, Y_TYPE>()) {
src1_type = GGML_TYPE_F32;
} else if (std::is_same<ggml_fp16_t, Y_TYPE>()) {
src1_type = GGML_TYPE_F16;
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
ggml_tensor * src0_ggml = ggml_new_tensor_3d(ggml_ctx, src0_type, k, m, batch);
} else if (tensor->type == GGML_TYPE_F16) {
val = ggml_fp16_to_fp32(*(ggml_fp16_t *) ((char *) tensor->data + i3*tensor->nb[3] + i2*tensor->nb[2] + idx1*tensor->nb[1] + idx0*tensor->nb[0]));
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
fprintf(stderr, "% 7.2f ", val);
} else {
std::cerr << std::endl;
}
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
#endif
if (ctx->prealloc_x == nullptr || (ctx->prealloc_size_x > 0 && ctx->prealloc_x->size < ctx->prealloc_size_x)) {
break;
default:
std::cerr << "ggml_vulkan: Error: Missing op: " << ggml_op_name(node->op) << std::endl;
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
return;
}
} else if (tensor->type == GGML_TYPE_I32) {
val = *(const int32_t *) ((const char *) data + i3*tensor->nb[3] + i2*tensor->nb[2] + idx1*tensor->nb[1] + idx0*tensor->nb[0]);
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
fprintf(stderr, "% 7.2f ", val);
} else {
memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
}
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
if (vk_output_tensor > 0 && vk_output_tensor == check_counter) {
memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
}
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
if (vk_output_tensor > 0 && vk_output_tensor == check_counter) {
memcpy(src2_clone->nb, src2->nb, sizeof(size_t) * GGML_MAX_DIMS);
}
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
if (vk_output_tensor > 0 && vk_output_tensor == check_counter) {
break;
default:
std::cerr << "Missing vk_check_results OP: " << ggml_op_name(tensor->op) << std::endl;
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} else if (tensor->op == GGML_OP_CPY || tensor->op == GGML_OP_DUP) {
if (src1 == nullptr) {
tensor_clone = ggml_sum_rows(ggml_ctx, src0_clone);
} else {
std::cerr << "Missing vk_check_results OP: " << ggml_op_name(tensor->op) << std::endl;
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
ggml_cgraph * cgraph = ggml_new_graph(ggml_ctx);
}
} else {
std::cerr << "Missing debug code for type " << ggml_type_name(tensor->type) << std::endl;
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
if ((std::isnan(correct) != std::isnan(result)) || (std::isinf(correct) != std::isinf(result)) || !buffer_size_fit) {
std::cerr << std::endl;
std::vector<const ggml_tensor *> done;
ggml_vk_print_graph_origin(tensor, done);
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
if (first_error[0] == -1 && std::fabs(correct - result) > 0.1f) {
first_error[0] = i0;
std::cerr << std::endl;
std::vector<const ggml_tensor *> done;
ggml_vk_print_graph_origin(tensor, done);
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
} else {
std::cerr << check_counter << " " << tensor->name << " op=" << ggml_op_name(tensor->op) << " avg_err=" << avg_err << std::endl;
}
#include <sys/wait.h>
-void ggml_print_backtrace(void) {
- /*
- #include <execinfo.h>
- #include <dlfcn.h>
-
+#if defined(__linux__)
+#include <execinfo.h>
+static void ggml_print_backtrace_symbols(void) {
void * trace[100];
-
int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
-
backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
- */
+}
+#else
+static void ggml_print_backtrace_symbols(void) {
+ // platform not supported
+}
+#endif
- // backtrack_symbols does not show line numbers, use gdb instead
+static void ggml_print_backtrace(void) {
char attach[32];
snprintf(attach, sizeof(attach), "attach %d", getpid());
int pid = fork();
if (pid == 0) {
+ // try gdb
execlp("gdb", "gdb", "--batch",
"-ex", "set style enabled on",
"-ex", attach,
"-ex", "detach",
"-ex", "quit",
(char *) NULL);
+ // try lldb
+ execlp("lldb", "lldb", "--batch",
+ "-o", "bt",
+ "-o", "quit",
+ "-p", attach,
+ (char *) NULL);
+ exit(EXIT_FAILURE);
} else {
- waitpid(pid, NULL, 0);
+ int wstatus;
+ waitpid(pid, &wstatus, 0);
+ if (WIFEXITED(wstatus)) {
+ if (WEXITSTATUS(wstatus) == EXIT_FAILURE) {
+ // gdb failed, fallback to backtrace_symbols
+ ggml_print_backtrace_symbols();
+ }
+ }
}
}
#else
-void ggml_print_backtrace(void) {
+static void ggml_print_backtrace(void) {
// platform not supported
}
#endif
+void ggml_abort(const char * file, int line, const char * fmt, ...) {
+ fflush(stdout);
+
+ fprintf(stderr, "%s:%d: ", file, line);
+
+ va_list args;
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+
+ fprintf(stderr, "\n");
+
+ ggml_print_backtrace();
+ abort();
+}
+
#define GGML_DEBUG 0
#define GGML_GELU_FP16
#define GGML_GELU_QUICK_FP16
break;
}
GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
return NULL;
}
return aligned_memory;
void * result = malloc(size);
if (result == NULL) {
GGML_PRINT("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
return result;
}
void * result = calloc(num, size);
if (result == NULL) {
GGML_PRINT("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
return result;
}
}
// assert that pointer is aligned to GGML_MEM_ALIGN
-#define ggml_assert_aligned(ptr) \
+#define GGML_ASSERT_ALIGNED(ptr) \
GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0)
////////////////////////////////////////////////////////////////////////////////
GGML_ASSERT(ctx->mem_buffer != NULL);
- ggml_assert_aligned(ctx->mem_buffer);
+ GGML_ASSERT_ALIGNED(ctx->mem_buffer);
GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
.type = type,
};
- ggml_assert_aligned(mem_buffer + obj_new->offs);
+ GGML_ASSERT_ALIGNED(mem_buffer + obj_new->offs);
if (obj_cur != NULL) {
obj_cur->next = obj_new;
#endif
// TODO: this should not be needed as long as we don't rely on aligned SIMD loads
- //ggml_assert_aligned(result->data);
+ //GGML_ASSERT_ALIGNED(result->data);
for (int i = 0; i < n_dims; i++) {
result->ne[i] = ne[i];
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
return tensor;
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
return tensor;
}
default:
{
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
-
- return 0.0f;
}
void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value) {
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
case GGML_TYPE_F32:
return ((float *) data)[0];
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
-
- return 0.0f;
}
void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value) {
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
}
default:
{
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
-
- return 0.0f;
}
void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) {
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
case GGML_TYPE_F32:
return ((float *) data)[0];
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
-
- return 0.0f;
}
void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value) {
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
}
struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
- strncpy(tensor->name, name, sizeof(tensor->name) - 1);
- tensor->name[sizeof(tensor->name) - 1] = '\0';
+ size_t i;
+ for (i = 0; i < sizeof(tensor->name) - 1 && name[i] != '\0'; i++) {
+ tensor->name[i] = name[i];
+ }
+ tensor->name[i] = '\0';
return tensor;
}
bool is_node = false;
if (a->grad) {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
is_node = true;
}
bool is_node = false;
if (a->grad) {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
is_node = true;
}
bool is_node = false;
if (!inplace && (a->grad)) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
bool is_node = false;
if (!inplace && (a->grad)) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
if (b->grad) {
// gradient propagation is not supported
- //GGML_ASSERT(false);
+ //GGML_ABORT("fatal error");
}
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
bool is_node = false;
if (a->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
bool is_node = false;
if (a->grad || b->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
bool is_node = false;
if (a->grad || b->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
bool is_node = false;
if (a->grad || b->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
bool is_node = false;
if (a->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
bool is_node = false;
if (a->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
bool is_node = false;
if (a->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
bool is_node = false;
if (a->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
bool is_node = false;
if (timesteps->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
struct ggml_tensor * v,
struct ggml_tensor * d,
bool masked) {
- GGML_ASSERT(false && "TODO: adapt to ggml_flash_attn_ext() changes");
+ GGML_ABORT("TODO: adapt to ggml_flash_attn_ext() changes");
GGML_ASSERT(ggml_can_mul_mat(k, q));
// TODO: check if vT can be multiplied by (k*qT)
bool is_node = false;
if (s->grad || x->grad || c->grad || sq->grad) {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
is_node = true;
}
bool is_node = false;
if (s->grad || x->grad || dt->grad || A->grad || B->grad || C->grad || sq->grad) {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
is_node = true;
}
bool is_node = false;
if (a->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
bool is_node = false;
if (a->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
bool is_node = false;
if (a->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
}
}
} else {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
}
} else {
//printf("%s: this is not optimal - fix me\n", __func__);
}
}
} else {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
}
}
return;
}
}
} else {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
}
}
}
}
} else {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
}
} else {
//printf("%s: this is not optimal - fix me\n", __func__);
}
}
} else {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
}
}
return;
}
}
} else {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
}
}
}
}
} else {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
}
} else {
//printf("%s: this is not optimal - fix me\n", __func__);
}
}
} else {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
}
}
}
}
} else {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
}
else {
// src1 is not contiguous
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
}
else {
// src1 is not contiguous
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
}
else {
// src1 is not contiguous
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
}
else {
// src1 is not contiguous
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
ggml_compute_forward_add_f32(params, dst);
}
else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} break;
case GGML_TYPE_F16:
ggml_compute_forward_add_f16_f32(params, dst);
}
else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} break;
case GGML_TYPE_BF16:
ggml_compute_forward_add_bf16_f32(params, dst);
}
else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} break;
case GGML_TYPE_Q4_0:
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
ggml_compute_forward_add1_f16_f32(params, dst);
}
else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} break;
case GGML_TYPE_BF16:
ggml_compute_forward_add1_bf16_f32(params, dst);
}
else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} break;
case GGML_TYPE_Q4_0:
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
case GGML_TYPE_Q4_0_8_8:
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
// ggml_compute_forward_leaky_relu
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
case GGML_TYPE_F16:
{
- GGML_ASSERT(false); // todo
+ GGML_ABORT("fatal error"); // todo
// ggml_compute_forward_out_prod_f16_f32(params, dst);
- } break;
+ }
case GGML_TYPE_F32:
{
ggml_compute_forward_out_prod_f32(params, dst);
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
case GGML_TYPE_Q4_0_8_8:
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
//static bool first = true;
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
//static bool first = true;
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
case GGML_TYPE_F64:
case GGML_TYPE_COUNT:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
switch (op) {
case GGML_OP_POOL_AVG: drow[i] = 0; break;
case GGML_OP_POOL_MAX: drow[i] = -FLT_MAX; break;
- case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
+ case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error");
}
for (int ki = 0; ki < k; ++ki) {
switch (op) {
case GGML_OP_POOL_AVG: drow[i] += srow[j]; break;
case GGML_OP_POOL_MAX: if (srow[j] > drow[i]) drow[i] = srow[j]; break;
- case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
+ case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error");
}
++j;
}
switch (op) {
case GGML_OP_POOL_AVG: drow[i] /= k; break;
case GGML_OP_POOL_MAX: break;
- case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
+ case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error");
}
}
switch (op) {
case GGML_OP_POOL_AVG: *out = 0; break;
case GGML_OP_POOL_MAX: *out = -FLT_MAX; break;
- case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
+ case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error");
}
const int ix = offset0 + ox * s0;
switch (op) {
case GGML_OP_POOL_AVG: *out += srow[j]; break;
case GGML_OP_POOL_MAX: if (srow[j] > *out) *out = srow[j]; break;
- case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
+ case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error");
}
}
}
switch (op) {
case GGML_OP_POOL_AVG: *out /= ka; break;
case GGML_OP_POOL_MAX: break;
- case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
+ case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error");
}
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
} break;
case GGML_OP_COUNT:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
////////////////////////////////////////////////////////////////////////////////
-static size_t ggml_hash_size(size_t min_sz) {
+struct ggml_hash_set ggml_hash_set_new(size_t size) {
+ size = ggml_hash_size(size);
+ struct ggml_hash_set result;
+ result.size = size;
+ result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
+ result.used = GGML_CALLOC(ggml_bitset_size(size), sizeof(ggml_bitset_t));
+ return result;
+}
+
+void ggml_hash_set_reset(struct ggml_hash_set * hash_set) {
+ memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size));
+}
+
+void ggml_hash_set_free(struct ggml_hash_set * hash_set) {
+ GGML_FREE(hash_set->used);
+ GGML_FREE(hash_set->keys);
+}
+
+size_t ggml_hash_size(size_t min_sz) {
// next primes after powers of two
static const size_t primes[] = {
2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
};
static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
- // find the smallest prime that is larger or equal to min_sz
+ // find the smallest prime that is larger or equal than min_sz
size_t l = 0;
size_t r = n_primes;
while (l < r) {
return sz;
}
-static size_t ggml_hash(const void * p) {
- return (size_t)p;
-}
-
-size_t ggml_hash_find(const struct ggml_hash_set hash_set, struct ggml_tensor * key) {
- size_t h = ggml_hash(key) % hash_set.size;
-
- // linear probing
- size_t i = h;
- while (hash_set.keys[i] != NULL && hash_set.keys[i] != key) {
- i = (i + 1) % hash_set.size;
- if (i == h) {
- // visited all hash table entries -> not found
- return GGML_HASHTABLE_FULL;
- }
- }
- return i;
-}
-
-bool ggml_hash_contains(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
- size_t i = ggml_hash_find(hash_set, key);
- return i != GGML_HASHTABLE_FULL && hash_set.keys[i] == key;
-}
-
-size_t ggml_hash_insert(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
- size_t i = ggml_hash_find(hash_set, key);
-
- GGML_ASSERT(i != GGML_HASHTABLE_FULL);
-
- if (hash_set.keys[i] == key) {
- return GGML_HASHTABLE_ALREADY_EXISTS;
- }
-
- // insert
- GGML_ASSERT(hash_set.keys[i] == NULL);
- hash_set.keys[i] = key;
- return i;
-}
-
-size_t ggml_hash_find_or_insert(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
- size_t i = ggml_hash_find(hash_set, key);
-
- GGML_ASSERT(i != GGML_HASHTABLE_FULL);
-
- hash_set.keys[i] = key;
- return i;
-}
-
-struct ggml_hash_set ggml_hash_set_new(size_t size) {
- size = ggml_hash_size(size);
- struct ggml_hash_set result;
- result.size = size;
- result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
- memset(result.keys, 0, sizeof(struct ggml_tensor *) * size);
- return result;
-}
-
-static void ggml_hash_set_free(struct ggml_hash_set hash_set) {
- GGML_FREE(hash_set.keys);
-}
-
struct hash_map {
struct ggml_hash_set set;
struct ggml_tensor ** vals;
static struct hash_map * ggml_new_hash_map(size_t size) {
struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
result->set = ggml_hash_set_new(size);
- result->vals = GGML_MALLOC(sizeof(struct ggml_tensor *) * result->set.size);
- memset(result->vals, 0, sizeof(struct ggml_tensor *) * result->set.size);
+ result->vals = GGML_CALLOC(result->set.size, sizeof(struct ggml_tensor *));
return result;
}
static void ggml_hash_map_free(struct hash_map * map) {
- ggml_hash_set_free(map->set);
+ ggml_hash_set_free(&map->set);
GGML_FREE(map->vals);
GGML_FREE(map);
}
return node;
}
- if (!ggml_hash_contains(graph->visited_hash_table, node)) {
+ if (!ggml_hash_contains(&graph->visited_hash_set, node)) {
return node;
}
return node;
}
- size_t i = ggml_hash_find(replacements->set, node);
- GGML_ASSERT(i != GGML_HASHTABLE_FULL); // assert that not full
+ size_t i = ggml_hash_find(&replacements->set, node);
+ GGML_ASSERT(i != GGML_HASHSET_FULL); // assert that not full
if (replacements->set.keys[i] == node) {
return replacements->vals[i];
}
// insert checkpoints in replacements
for (int i = 0; i < n_checkpoints; ++i) {
- size_t k = ggml_hash_find(replacements->set, checkpoints[i]);
- GGML_ASSERT(k != GGML_HASHTABLE_FULL); // assert that not full
+ size_t k = ggml_hash_find(&replacements->set, checkpoints[i]);
+ GGML_ASSERT(k != GGML_HASHSET_FULL); // assert that not full
GGML_ASSERT(replacements->set.keys[k] == NULL); // assert that we don't overwrite
replacements->set.keys[k] = checkpoints[i];
replacements->vals[k] = checkpoints[i];
// functions to change gradients considering the case that input a might be initial gradient with zero value
-static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) {
+static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set * zero_table) {
if (ggml_hash_contains(zero_table, a)) {
return b;
} else {
}
}
-static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset, struct ggml_hash_set zero_table) {
+static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset, struct ggml_hash_set * zero_table) {
if (ggml_hash_contains(zero_table, a)) {
struct ggml_tensor * a_zero = ggml_scale(ctx, a, 0.0f);
return ggml_acc_impl(ctx, a_zero, b, nb1, nb2, nb3, offset, false);
}
}
-static struct ggml_tensor * ggml_add1_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) {
+static struct ggml_tensor * ggml_add1_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set * zero_table) {
if (ggml_hash_contains(zero_table, a)) {
return ggml_repeat(ctx, b, a);
} else {
}
}
-static struct ggml_tensor * ggml_sub_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) {
+static struct ggml_tensor * ggml_sub_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set * zero_table) {
if (ggml_hash_contains(zero_table, a)) {
return ggml_neg(ctx, b);
} else {
}
}
-static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor, struct ggml_hash_set zero_table) {
+static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor, struct ggml_hash_set * zero_table) {
struct ggml_tensor * src0 = tensor->src[0];
struct ggml_tensor * src1 = tensor->src[1];
struct ggml_tensor * src2 = tensor->src[2];
case GGML_OP_MEAN:
case GGML_OP_ARGMAX:
{
- GGML_ASSERT(false); // TODO: implement
- } break;
+ GGML_ABORT("fatal error"); // TODO: implement
+ }
case GGML_OP_REPEAT:
{
// necessary for llama
} break;
case GGML_OP_CONCAT:
{
- GGML_ASSERT(false); // TODO: implement
- } break;
+ GGML_ABORT("fatal error"); // TODO: implement
+ }
case GGML_OP_SILU_BACK:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_NORM:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_RMS_NORM:
{
// necessary for llama
} break;
case GGML_OP_RMS_NORM_BACK:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_GROUP_NORM:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_MUL_MAT:
{
// https://cs231n.github.io/optimization-2/#staged
} break;
case GGML_OP_MUL_MAT_ID:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_OUT_PROD:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_SCALE:
{
// necessary for llama
} break;
case GGML_OP_GET_ROWS_BACK:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_DIAG:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_DIAG_MASK_INF:
{
// necessary for llama
} break;
case GGML_OP_SOFT_MAX_BACK:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_ROPE:
{
// necessary for llama
} break;
case GGML_OP_CLAMP:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_CONV_TRANSPOSE_1D:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_IM2COL:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_CONV_TRANSPOSE_2D:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_POOL_1D:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_POOL_2D:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_UPSCALE:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_PAD:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_ARANGE:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_TIMESTEP_EMBEDDING:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_ARGSORT:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_LEAKY_RELU:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_FLASH_ATTN_EXT:
{
struct ggml_tensor * flash_grad = NULL;
} break;
case GGML_OP_FLASH_ATTN_BACK:
{
- GGML_ASSERT(false); // not supported
- } break;
+ GGML_ABORT("fatal error"); // not supported
+ }
case GGML_OP_SSM_CONV:
case GGML_OP_SSM_SCAN:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_WIN_PART:
case GGML_OP_WIN_UNPART:
case GGML_OP_UNARY:
} break;
case GGML_UNARY_OP_TANH:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_UNARY_OP_ELU:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_UNARY_OP_RELU:
{
if (src0->grad) {
} break;
case GGML_UNARY_OP_SIGMOID:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_UNARY_OP_GELU:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_UNARY_OP_GELU_QUICK:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_UNARY_OP_SILU:
{
// necessary for llama
}
} break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} break;
case GGML_OP_GET_REL_POS:
case GGML_OP_MAP_CUSTOM2:
case GGML_OP_MAP_CUSTOM3:
{
- GGML_ASSERT(false); // not supported
- } break;
+ GGML_ABORT("fatal error"); // not supported
+ }
case GGML_OP_CROSS_ENTROPY_LOSS:
{
if (src0->grad) {
} break;
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
{
- GGML_ASSERT(false); // not supported
- } break;
+ GGML_ABORT("fatal error"); // not supported
+ }
case GGML_OP_NONE:
{
// nop
} break;
case GGML_OP_COUNT:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
for (int i = 0; i < GGML_MAX_SRC; ++i) {
}
// check if already visited
- if (ggml_hash_insert(cgraph->visited_hash_table, node) == GGML_HASHTABLE_ALREADY_EXISTS) {
+ if (ggml_hash_insert(&cgraph->visited_hash_set, node) == GGML_HASHSET_ALREADY_EXISTS) {
return;
}
struct ggml_hash_set zero_table = ggml_hash_set_new(gf->size);
for (int i = 0; i < gf->n_nodes; i++) {
if (gf->grads[i]) {
- ggml_hash_insert(zero_table, gf->grads[i]);
+ ggml_hash_insert(&zero_table, gf->grads[i]);
}
}
// inplace operations to add gradients are not created by ggml_compute_backward
// use allocator to automatically make inplace operations
if (node->grad) {
- ggml_compute_backward(ctx, node, zero_table);
+ ggml_compute_backward(ctx, node, &zero_table);
}
}
}
}
- ggml_hash_set_free(zero_table);
+ ggml_hash_set_free(&zero_table);
+}
+
+static void * incr_ptr_aligned(void ** p, size_t size, size_t align) {
+ void * ptr = *p;
+ ptr = (void *) GGML_PAD((uintptr_t) ptr, align);
+ *p = (void *) ((char *) ptr + size);
+ return ptr;
}
static size_t ggml_graph_nbytes(size_t size, bool grads) {
- size_t nbytes = sizeof(struct ggml_cgraph);
- nbytes += size * sizeof(struct ggml_tensor *) * 2; // leafs + nodes
+ size_t hash_size = ggml_hash_size(size * 2);
+ void * p = 0;
+ incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1);
+ incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes
+ incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs
+ incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys
if (grads) {
- nbytes += size * sizeof(struct ggml_tensor *); // grads
+ incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads
}
- nbytes += ggml_hash_size(size * 2) * sizeof(struct ggml_tensor *); // hash set
+ incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
+
+ size_t nbytes = (size_t) p;
return nbytes;
}
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
- struct ggml_tensor ** data_start = (struct ggml_tensor **) (cgraph + 1);
-
+ // the size of the hash table is doubled since it needs to hold both nodes and leafs
size_t hash_size = ggml_hash_size(size * 2);
- struct ggml_tensor ** nodes_ptr = data_start;
- struct ggml_tensor ** leafs_ptr = nodes_ptr + size;
- struct ggml_tensor ** hash_keys_ptr = leafs_ptr + size;
- struct ggml_tensor ** grads_ptr = grads ? hash_keys_ptr + hash_size : NULL;
- // check that we allocated the correct amount of memory
- assert(obj_size == (size_t) (
- (grads ? (char *)(grads_ptr + size) : (char *)(hash_keys_ptr + hash_size)) - (char *)cgraph));
+ void * p = cgraph + 1;
+
+ struct ggml_tensor ** nodes_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
+ struct ggml_tensor ** leafs_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
+ struct ggml_tensor ** hash_keys_ptr = incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
+ struct ggml_tensor ** grads_ptr = grads ? incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
+ ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
- memset(hash_keys_ptr, 0, hash_size * sizeof(struct ggml_tensor *));
+ // check that we allocated the correct amount of memory
+ assert(obj_size == (size_t)((char *)p - (char *)cgraph));
*cgraph = (struct ggml_cgraph) {
/*.size =*/ size,
/*.nodes =*/ nodes_ptr,
/*.grads =*/ grads_ptr,
/*.leafs =*/ leafs_ptr,
- /*.hash_table =*/ { hash_size, hash_keys_ptr },
+ /*.hash_table =*/ { hash_size, hash_used, hash_keys_ptr },
/*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
};
+ ggml_hash_set_reset(&cgraph->visited_hash_set);
+
return cgraph;
}
/*.nodes =*/ cgraph0->nodes + i0,
/*.grads =*/ cgraph0->grads ? cgraph0->grads + i0 : NULL,
/*.leafs =*/ NULL,
- /*.hash_table =*/ { 0, NULL },
+ /*.hash_table =*/ { 0, NULL, NULL },
/*.order =*/ cgraph0->order,
};
void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
GGML_ASSERT(dst->size >= src->n_leafs);
GGML_ASSERT(dst->size >= src->n_nodes);
- GGML_ASSERT(dst->visited_hash_table.size >= src->visited_hash_table.size);
+ GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size);
dst->n_leafs = src->n_leafs;
dst->n_nodes = src->n_nodes;
}
}
- for (size_t i = 0; i < src->visited_hash_table.size; ++i) {
- if (src->visited_hash_table.keys[i]) {
- ggml_hash_insert(dst->visited_hash_table, src->visited_hash_table.keys[i]);
+ for (size_t i = 0; i < src->visited_hash_set.size; ++i) {
+ if (src->visited_hash_set.keys[i]) {
+ ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]);
}
}
}
void ggml_graph_clear(struct ggml_cgraph * cgraph) {
cgraph->n_leafs = 0;
cgraph->n_nodes = 0;
- memset(cgraph->visited_hash_table.keys, 0, cgraph->visited_hash_table.size * sizeof(struct ggml_tensor *));
+ ggml_hash_set_reset(&cgraph->visited_hash_set);
}
//
n_tasks = n_threads;
} break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
break;
case GGML_OP_SILU_BACK:
} break;
case GGML_OP_COUNT:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
default:
{
fprintf(stderr, "%s: op not implemented: ", __func__);
} else {
fprintf(stderr, "%d\n", node->op);
}
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
assert(n_tasks > 0);
cur += sizeof(float)*ne00*ne01*ne02;
cur += sizeof(float)*ne10*ne11;
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} break;
case GGML_OP_CONV_TRANSPOSE_2D:
} break;
case GGML_OP_COUNT:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
default:
break;
}
(*step) *= width;
}
- GGML_ASSERT(false && "line search failed");
+ GGML_ABORT("line search failed");
- return GGML_LINESEARCH_FAIL;
+ //return GGML_LINESEARCH_FAIL;
}
static enum ggml_opt_result ggml_opt_lbfgs(
step[0] = 1.0;
}
- GGML_ASSERT(false && "lbfgs failed");
+ GGML_ABORT("lbfgs failed");
- return GGML_OPT_RESULT_DID_NOT_CONVERGE;
+ //return GGML_OPT_RESULT_DID_NOT_CONVERGE;
}
struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
}
} break;
case GGUF_TYPE_ARRAY:
- default: GGML_ASSERT(false && "invalid type"); break;
+ default: GGML_ABORT("invalid type");
}
} break;
- default: GGML_ASSERT(false && "invalid type");
+ default: GGML_ABORT("invalid type");
}
if (!ok) {
gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
GGML_FREE((void *)data);
} else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
- GGML_ASSERT(false && "nested arrays not supported");
+ GGML_ABORT("nested arrays not supported");
} else {
gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
}
} break;
- default: GGML_ASSERT(false && "invalid type"); break;
+ default: GGML_ABORT("invalid type");
}
}
}
struct gguf_context * ctx,
const struct ggml_tensor * tensor) {
if (gguf_find_tensor(ctx, tensor->name) != -1) {
- GGML_ASSERT(false && "duplicated tensor name");
+ GGML_ABORT("duplicated tensor name");
}
const int idx = ctx->header.n_tensors;
void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type) {
const int idx = gguf_find_tensor(ctx, name);
if (idx < 0) {
- GGML_ASSERT(false && "tensor not found");
+ GGML_ABORT("tensor not found");
}
ctx->infos[idx].type = type;
void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size) {
const int idx = gguf_find_tensor(ctx, name);
if (idx < 0) {
- GGML_ASSERT(false && "tensor not found");
+ GGML_ABORT("tensor not found");
}
ctx->infos[idx].data = data;
}
} break;
case GGUF_TYPE_ARRAY:
- default: GGML_ASSERT(false && "invalid type"); break;
+ default: GGML_ABORT("invalid type");
}
} break;
- default: GGML_ASSERT(false && "invalid type");
+ default: GGML_ABORT("invalid type");
}
}
void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
FILE * file = ggml_fopen(fname, "wb");
if (!file) {
- GGML_ASSERT(false && "failed to open file for writing");
+ GGML_ABORT("failed to open file for writing");
}
struct gguf_buf buf = gguf_buf_init(16*1024);
// end of alternate (LLAMA_GRETYPE_END, LLAMA_GRETYPE_ALT) or middle of char range
// (LLAMA_GRETYPE_CHAR_ALT, LLAMA_GRETYPE_CHAR_RNG_UPPER); stack should never be left on
// those
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
return;
}
}
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
const std::string & piece = vocab->cache_token_to_piece.at(token);
return strtol(buf.c_str(), NULL, 16);
}
case LLAMA_VOCAB_TYPE_BPE: {
- GGML_ASSERT(false);
- return unicode_utf8_to_byte(token_data.text); // TODO: why is this here after GGML_ASSERT?
+ GGML_ABORT("fatal error");
+ //return unicode_utf8_to_byte(token_data.text); // TODO: why is this here after GGML_ASSERT?
}
case LLAMA_VOCAB_TYPE_WPM: {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
}
} break;
case LLAMA_VOCAB_TYPE_NONE:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
return output;
return vocab.token_to_id.at(unicode_byte_to_utf8(ch));
}
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
break;
}
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
return n_head_arr[il];
}
- GGML_ASSERT(false);
- return 0;
+ GGML_ABORT("fatal error");
}
uint32_t n_head_kv(uint32_t il = 0) const {
return n_head_kv_arr[il];
}
- GGML_ASSERT(false);
- return 0;
+ GGML_ABORT("fatal error");
}
uint32_t n_ff(uint32_t il = 0) const {
return n_ff_arr[il];
}
- GGML_ASSERT(false);
- return 0;
+ GGML_ABORT("fatal error");
}
uint32_t n_gqa(uint32_t il = 0) const {
cb(gate, "ffn_moe_gelu", il);
} break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
ggml_tensor * par = ggml_mul(ctx, up, gate); // [n_ff, n_expert_used, n_tokens]
} break;
default:
{
- GGML_ASSERT(false && "unknown pooling type");
- } break;
+ GGML_ABORT("unknown pooling type");
+ }
}
cb(cur, "result_embd_pooled", -1);
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd/n_head, n_head, n_tokens);
break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
cb(Qcur, "Qcur", il);
cb(Kcur, "Kcur", il);
switch (model.type) {
case e_model::MODEL_9B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break;
case e_model::MODEL_27B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd / n_head))); break;
- default: GGML_ASSERT(false);
+ default: GGML_ABORT("fatal error");
};
cb(Qcur, "Qcur_scaled", il);
result = llm.build_jais();
} break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
// add on pooling layer
} break;
case LLAMA_POOLING_TYPE_UNSPECIFIED:
{
- GGML_ASSERT(false && "unknown pooling type");
- } break;
+ GGML_ABORT("unknown pooling type");
+ }
}
}
n_outputs_prev += lctx.n_outputs;
// apply K-shift if needed
if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE && lctx.kv_self.has_shift) {
if (lctx.model.arch == LLM_ARCH_DEEPSEEK2) { // not supported due to MLA
- GGML_ASSERT(false && "Deepseek2 does not support K-shift");
+ GGML_ABORT("Deepseek2 does not support K-shift");
}
{
} else if (ggml_is_quantized(tensor->type)) {
qtype.to_float(tensor->data, f32_output, nelements);
} else {
- GGML_ASSERT(false); // unreachable
+ GGML_ABORT("fatal error"); // unreachable
}
return;
}
// all model arches should be listed explicitly here
case LLM_ARCH_UNKNOWN:
- GGML_ASSERT(false && "unknown architecture");
- break;
+ GGML_ABORT("unknown architecture");
}
return LLAMA_ROPE_TYPE_NONE;
} catch (const std::exception & err) {
LLAMA_LOG_ERROR("%s: invalid logits id %d, reason: %s\n", __func__, i, err.what());
#ifndef NDEBUG
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
#endif
return nullptr;
}
} catch (const std::exception & err) {
LLAMA_LOG_ERROR("%s: invalid embeddings id %d, reason: %s\n", __func__, i, err.what());
#ifndef NDEBUG
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
#endif
return nullptr;
}
// This is going to create some weird integers though.
ggml_backend_tensor_set(tensor, data.data(), 0, ggml_nbytes(tensor));
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
tt.to_float(&buf[i], vq.data(), bs);
tv.insert(tv.end(), vq.begin(), vq.end());
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
}
ggml_backend_tensor_set(t, data.data(), r * t->nb[1], t->ne[0] * sizeof(float));
}
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
}
return true;
}
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
return false;
}
for (auto s : samplers_sequence) {
switch (s){
case 'k': llama_sample_top_k (nullptr, &candidates_p, top_k, 1); break;
- case 'f': GGML_ASSERT(false && "tail_free test not implemented"); break;
- case 'y': GGML_ASSERT(false && "typical test not implemented"); break;
+ case 'f': GGML_ABORT("tail_free test not implemented"); break;
+ case 'y': GGML_ABORT("typical test not implemented"); break;
case 'p': llama_sample_top_p (nullptr, &candidates_p, top_p, 1); break;
case 'm': llama_sample_min_p (nullptr, &candidates_p, min_p, 1); break;
- case 't': GGML_ASSERT(false && "temperature test not implemented"); break;
- default : GGML_ASSERT(false && "Unknown sampler"); break;
+ case 't': GGML_ABORT("temperature test not implemented"); break;
+ default : GGML_ABORT("Unknown sampler"); break;
}
llama_sample_softmax(nullptr, &candidates_p); // make sure tokens are sorted for tests
GGML_ASSERT(candidates_p.data[0].id == max_token_id);
GGML_ASSERT(candidates_p.data[expected_size-1].id == min_token_id);
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}