From: Marcel Petrick Date: Thu, 5 Mar 2026 07:50:21 +0000 (+0100) Subject: chore : correct typos [no ci] (llama/20041) X-Git-Tag: v0.9.8~78 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=c01b2ea4e3f0c536fc24cbeedb8b4458cba9e4ee;p=pkg%2Fggml%2Fsources%2Fggml chore : correct typos [no ci] (llama/20041) * fix(docs): correct typos found during code review Non-functional changes only: - Fixed minor spelling mistakes in comments - Corrected typos in user-facing strings - No variables, logic, or functional code was modified. Signed-off-by: Marcel Petrick * Update docs/backend/CANN.md Co-authored-by: Aaron Teo * Revert "Auxiliary commit to revert individual files from 846d1c301281178efbc6ce6060ad34c1ebe45af8" This reverts commit 02fcf0c7db661d5ff3eff96b2b2db9fdb7213256. * Update tests/test-backend-ops.cpp Co-authored-by: Sigbjørn Skjæret * Update tests/test-backend-ops.cpp Co-authored-by: Sigbjørn Skjæret --------- Signed-off-by: Marcel Petrick Co-authored-by: Aaron Teo Co-authored-by: Sigbjørn Skjæret --- diff --git a/include/ggml-backend.h b/include/ggml-backend.h index a9d17786..9fd3f7f3 100644 --- a/include/ggml-backend.h +++ b/include/ggml-backend.h @@ -259,7 +259,7 @@ extern "C" { Example usage: // operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned - // preferrably to run on the same backend as the buffer + // preferably to run on the same backend as the buffer ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS); sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false, true); diff --git a/include/ggml-opt.h b/include/ggml-opt.h index 4703a05a..1c2ed79b 100644 --- a/include/ggml-opt.h +++ b/include/ggml-opt.h @@ -138,7 +138,7 @@ extern "C" { GGML_API ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params); GGML_API void ggml_opt_free(ggml_opt_context_t opt_ctx); - // set gradients to zero, initilize loss, and optionally reset the optimizer + // set gradients to zero, initialize loss, and optionally reset the optimizer GGML_API void ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer); GGML_API bool ggml_opt_static_graphs(ggml_opt_context_t opt_ctx); // whether the graphs are allocated_statically diff --git a/include/ggml.h b/include/ggml.h index fcc51f1f..784d6920 100644 --- a/include/ggml.h +++ b/include/ggml.h @@ -2575,7 +2575,7 @@ extern "C" { struct ggml_tensor * grad, struct ggml_tensor * sgd_params); // alpha, weight decay - // build forward mutiple tensors and select one of them for computing + // build forward multiple tensors and select one of them for computing // this is useful for creating graphs that have constant topology but compute different things based on the input // ref: https://github.com/ggml-org/llama.cpp/pull/18550 // diff --git a/src/ggml-cpu/amx/mmq.cpp b/src/ggml-cpu/amx/mmq.cpp index b5aca766..93a6d397 100644 --- a/src/ggml-cpu/amx/mmq.cpp +++ b/src/ggml-cpu/amx/mmq.cpp @@ -195,7 +195,7 @@ struct tile_config_t{ // will be needed. // // Here another commonly used pattern 1-3-3 is skipped, as it is mostly used when m <=16; -// and the sinlge batch gemm (m=1) has a special fast path with `avx512-vnni`. +// and the single batch gemm (m=1) has a special fast path with `avx512-vnni`. // // ref: https://www.intel.com/content/www/us/en/developer/articles/code-sample/ // advanced-matrix-extensions-intrinsics-functions.html @@ -1379,8 +1379,8 @@ struct tinygemm_kernel_vnni 4 #if _WIN32_WINNT >= 0x0602 diff --git a/src/ggml-cpu/llamafile/sgemm.cpp b/src/ggml-cpu/llamafile/sgemm.cpp index da412fd0..5fd452a0 100644 --- a/src/ggml-cpu/llamafile/sgemm.cpp +++ b/src/ggml-cpu/llamafile/sgemm.cpp @@ -533,7 +533,7 @@ class tinyBLAS { if constexpr (RN > 1) { return mnpack(m, n, SIZE_N, BN); } else { - GGML_LOG_ERROR("mnpack<%d, %d> bloc size not supported\n", RM, (int)SIZE_N); + GGML_LOG_ERROR("mnpack<%d, %d> block size not supported\n", RM, (int)SIZE_N); GGML_ASSERT(false); // we have miss something. } } @@ -711,7 +711,7 @@ class tinyBLAS_RVV { if constexpr (RN > 1) { return mnpack(m, n, SIZE_N, BN); } else { - GGML_LOG_ERROR("mnpack<%d, %d> bloc size not supported\n", RM, (int)SIZE_N); + GGML_LOG_ERROR("mnpack<%d, %d> block size not supported\n", RM, (int)SIZE_N); GGML_ASSERT(false); // we have miss something. } } diff --git a/src/ggml-cpu/ops.cpp b/src/ggml-cpu/ops.cpp index b7a70e06..ca1b3059 100644 --- a/src/ggml-cpu/ops.cpp +++ b/src/ggml-cpu/ops.cpp @@ -375,7 +375,7 @@ static void ggml_compute_forward_dup_bytes( const size_t rs = ne00 * type_size; if (nb00 == type_size) { - // src0 is contigous on first dimension, copy by rows + // src0 is contiguous on first dimension, copy by rows for (int64_t i03 = 0; i03 < ne03; i03++) { for (int64_t i02 = 0; i02 < ne02; i02++) { id += rs * ir0; @@ -1795,7 +1795,7 @@ void ggml_compute_forward_repeat( { ggml_compute_forward_repeat_f32(params, dst); } break; - // TODO: templateify the implemenation and support for I64 + // TODO: templateify the implementation and support for I64 // ref https://github.com/ggml-org/llama.cpp/pull/14274#discussion_r2169492225 //case GGML_TYPE_I64: // { diff --git a/src/ggml-cpu/repack.cpp b/src/ggml-cpu/repack.cpp index 5edba421..02c3cc31 100644 --- a/src/ggml-cpu/repack.cpp +++ b/src/ggml-cpu/repack.cpp @@ -3032,7 +3032,7 @@ template src[1])); - size = GGML_PAD(size, sizeof(int64_t)); // + padding for next bloc. + size = GGML_PAD(size, sizeof(int64_t)); // + padding for next block. const int64_t ne02 = op->src[0]->ne[2]; // n_as, n_expert const int64_t ne12 = op->src[1]->ne[2]; // n_tokens @@ -3297,7 +3297,7 @@ template wdata; auto * wdata_src1_end = (char *)wdata + GGML_PAD(nbw3, sizeof(int64_t)); - // total of [n_as][ne12 + 1] elemets of type mmid_row_mapping (2*int32_t = int64_t) + // total of [n_as][ne12 + 1] elements of type mmid_row_mapping (2*int32_t = int64_t) auto * matrix_row_counts = (int64_t *) (wdata_src1_end); // [n_as] struct mmid_row_mapping * matrix_rows = (struct mmid_row_mapping *) (matrix_row_counts + n_as); // [n_as][ne12] diff --git a/src/ggml-cuda/fattn-mma-f16.cuh b/src/ggml-cuda/fattn-mma-f16.cuh index beb7e32e..fff70c8e 100644 --- a/src/ggml-cuda/fattn-mma-f16.cuh +++ b/src/ggml-cuda/fattn-mma-f16.cuh @@ -1215,7 +1215,7 @@ static __device__ __forceinline__ void flash_attn_ext_f16_process_tile( } // If attention sinks are used, potentially re-scale if KQ_max is small. - // Also add the sink as a value to KQ_rowsum, this is done after synchonization of KQ_rowsum + // Also add the sink as a value to KQ_rowsum, this is done after synchronization of KQ_rowsum // so it's being done unconditionally for every thread. if (!is_fixup && (np == 1 || threadIdx.y % np == 0) && sinks_f) { float KQ_max_scale[cols_per_thread]; diff --git a/src/ggml-cuda/fattn-vec.cuh b/src/ggml-cuda/fattn-vec.cuh index 3f4a78cc..7cbe3263 100644 --- a/src/ggml-cuda/fattn-vec.cuh +++ b/src/ggml-cuda/fattn-vec.cuh @@ -10,7 +10,7 @@ static constexpr __device__ int ggml_cuda_fattn_vec_get_nthreads_device() { return 128; } -// Currenlty llvm with the amdgcn target does not support unrolling loops +// Currently llvm with the amdgcn target does not support unrolling loops // that contain a break that can not be resolved at compile time. #ifdef __clang__ #pragma clang diagnostic push diff --git a/src/ggml-cuda/fattn-wmma-f16.cuh b/src/ggml-cuda/fattn-wmma-f16.cuh index cd3bfd40..aaf711a6 100644 --- a/src/ggml-cuda/fattn-wmma-f16.cuh +++ b/src/ggml-cuda/fattn-wmma-f16.cuh @@ -18,7 +18,7 @@ #if defined(RDNA4) && ROCWMMA_VERSION_MAJOR > 1 #define GGML_USE_WMMA_FATTN #elif defined(RDNA4) -#warning "rocwmma fattn is not suported on RDNA4 on rocwmma < v2.0.0, expect degraded performance" +#warning "rocwmma fattn is not supported on RDNA4 on rocwmma < v2.0.0, expect degraded performance" #endif // defined(RDNA4) && ROCWMMA_VERSION_MAJOR > 1 #endif // defined(GGML_HIP_ROCWMMA_FATTN) diff --git a/src/ggml-cuda/ggml-cuda.cu b/src/ggml-cuda/ggml-cuda.cu index 7e6d3303..b56e3d50 100644 --- a/src/ggml-cuda/ggml-cuda.cu +++ b/src/ggml-cuda/ggml-cuda.cu @@ -3330,7 +3330,7 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, return false; } - //rms_norm kernel assumes contigous rows + //rms_norm kernel assumes contiguous rows if (!ggml_is_contiguous_rows(mul->src[0]) || !ggml_is_contiguous_rows(mul->src[1])) { return false; } diff --git a/src/ggml-cuda/quantize.cu b/src/ggml-cuda/quantize.cu index a8c68e44..4300ffc1 100644 --- a/src/ggml-cuda/quantize.cu +++ b/src/ggml-cuda/quantize.cu @@ -235,7 +235,7 @@ static __global__ void quantize_mmq_q8_1( q.z = roundf(xi.z*d_inv); q.w = roundf(xi.w*d_inv); - // Write back 4 int8 values as a single 32 bit value for better memroy bandwidth: + // Write back 4 int8 values as a single 32 bit value for better memory bandwidth: char4 * yqs4 = (char4 *) y[ib].qs; yqs4[iqs/4] = q; diff --git a/src/ggml-cuda/softmax.cu b/src/ggml-cuda/softmax.cu index dc06d069..285c0e95 100644 --- a/src/ggml-cuda/softmax.cu +++ b/src/ggml-cuda/softmax.cu @@ -46,7 +46,7 @@ struct soft_max_params { }; // When ncols_template == 0 the bounds for the loops in this function are not known and can't be unrolled. -// As we want to keep pragma unroll for all other cases we supress the clang transformation warning here. +// As we want to keep pragma unroll for all other cases we suppress the clang transformation warning here. #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wpass-failed" diff --git a/src/ggml-cuda/solve_tri.cu b/src/ggml-cuda/solve_tri.cu index 177ffc26..07ca33f5 100644 --- a/src/ggml-cuda/solve_tri.cu +++ b/src/ggml-cuda/solve_tri.cu @@ -83,7 +83,7 @@ static void solve_tri_f32_cublas(ggml_backend_cuda_context & ctx, // ====================== // When ncols_template == 0 the bounds for the loops in this function are not // known and can't be unrolled. As we want to keep pragma unroll for all other -// cases we supress the clang transformation warning here. +// cases we suppress the clang transformation warning here. #ifdef __clang__ # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wpass-failed" diff --git a/src/ggml-hexagon/ggml-hexagon.cpp b/src/ggml-hexagon/ggml-hexagon.cpp index 7a44443a..3006e217 100644 --- a/src/ggml-hexagon/ggml-hexagon.cpp +++ b/src/ggml-hexagon/ggml-hexagon.cpp @@ -139,7 +139,7 @@ struct ggml_hexagon_session { }; void ggml_hexagon_session::enqueue(struct htp_general_req &req, struct dspqueue_buffer *bufs, uint32_t n_bufs, bool sync) { - // Bump pending flag (cleared in the session::flush once we get the responce) + // Bump pending flag (cleared in the session::flush once we get the response) this->op_pending++; // atomic inc int err = dspqueue_write(this->queue, @@ -443,7 +443,7 @@ static void repack_row_q4x4x2(uint8_t * y, const block_q4_0 * x, int64_t k) { // Repack the scales // Note: Do not combine with the loop above. For tensor sizes not multiple of 256 (QK_Q4_0x4x2) - // the last block is truncated and overriden by the scales. + // the last block is truncated and overridden by the scales. for (int i = 0; i < nb; i++) { // Repack the scales ggml_half * d = (ggml_half *) (y_d + i * dblk_size); @@ -503,7 +503,7 @@ static void unpack_row_q4x4x2(block_q4_0 * x, const uint8_t * y, int64_t k) { // Repack the scales // Note: Do not combine with the loop above. For tensor sizes not multiple of 256 (QK_Q4_0x4x2) - // the last block is truncated and overriden by the scales. + // the last block is truncated and overridden by the scales. for (int i = 0; i < nb; i++) { // Unpack the scales const ggml_half * d = (const ggml_half *) (y_d + i * dblk_size); @@ -552,7 +552,7 @@ static void init_row_q4x4x2(block_q4_0 * x, int64_t k) { // Init the scales // Note: Do not combine with the loop above. For tensor sizes not multiple of 256 (QK_Q4_0x4x2) - // the last block is truncated and overriden by the scales. + // the last block is truncated and overridden by the scales. for (int i = 0; i < nb; i++) { // Unpack the scales x[i * 8 + 0].d = 0; @@ -770,7 +770,7 @@ static void repack_row_q8x4x2(uint8_t * y, const block_q8_0 * x, int64_t k) { // Repack the scales // Note: Do not combine with the loop above. For tensor sizes not multiple of 256 (QK_Q4_0x4x2) - // the last block is truncated and overriden by the scales. + // the last block is truncated and overridden by the scales. for (int i = 0; i < nb; i++) { // Repack the scales ggml_half * d = (ggml_half *) (y_d + i * dblk_size); @@ -829,7 +829,7 @@ static void unpack_row_q8x4x2(block_q8_0 * x, const uint8_t * y, int64_t k) { // Repack the scales // Note: Do not combine with the loop above. For tensor sizes not multiple of 256 (QK_Q4_0x4x2) - // the last block is truncated and overriden by the scales. + // the last block is truncated and overridden by the scales. for (int i = 0; i < nb; i++) { // Unpack the scales const ggml_half * d = (const ggml_half *) (y_d + i * dblk_size); @@ -878,7 +878,7 @@ static void init_row_q8x4x2(block_q8_0 * x, int64_t k) { // Init the scales // Note: Do not combine with the loop above. For tensor sizes not multiple of 256 (QK_Q8_0x4x2) - // the last block is truncated and overriden by the scales. + // the last block is truncated and overridden by the scales. for (int i = 0; i < nb; i++) { // Unpack the scales x[i * 8 + 0].d = 0; @@ -1120,7 +1120,7 @@ static void repack_row_mxfp4x4x2(uint8_t * y, const block_mxfp4 * x, int64_t k) // Repack the scales // Note: Do not combine with the loop above. For tensor sizes not multiple of 256 (QK_MXFP4x4x2) - // the last block is truncated and overriden by the scales. + // the last block is truncated and overridden by the scales. for (int i = 0; i < nb; i++) { // Repack the scales uint8_t * e = (uint8_t *) (y_e + i * eblk_size); @@ -1180,7 +1180,7 @@ static void unpack_row_mxfp4x4x2(block_mxfp4 * x, const uint8_t * y, int64_t k) // Repack the scales // Note: Do not combine with the loop above. For tensor sizes not multiple of 256 (QK_MXFP4_0x4x2) - // the last block is truncated and overriden by the scales. + // the last block is truncated and overridden by the scales. for (int i = 0; i < nb; i++) { // Unpack the scales const uint8_t * e = (const uint8_t *) (y_e + i * eblk_size); @@ -1229,7 +1229,7 @@ static void init_row_mxfp4x4x2(block_mxfp4 * x, int64_t k) { // Init the scales // Note: Do not combine with the loop above. For tensor sizes not multiple of 256 (QK_MXFP4x4x2) - // the last block is truncated and overriden by the scales. + // the last block is truncated and overridden by the scales. for (int i = 0; i < nb; i++) { // Unpack the scales x[i * 8 + 0].e = 0; @@ -2670,7 +2670,7 @@ static std::vector ggml_hexagon_graph_optimize_reorder(const std::vectorn_jobs); unsigned int i = atomic_fetch_add(&pool->next_job, 1); if (i >= n) { - // Spurios wakeup + // Spurious wakeup continue; } diff --git a/src/ggml-metal/ggml-metal-device.m b/src/ggml-metal/ggml-metal-device.m index 3db7f126..4cce414a 100644 --- a/src/ggml-metal/ggml-metal-device.m +++ b/src/ggml-metal/ggml-metal-device.m @@ -1281,7 +1281,7 @@ struct ggml_metal_buffer { bool use_residency_sets; // optional MTLResidencySet - // note: cannot use explicity "id" here because it is not available on certain OSes + // note: cannot use explicitly "id" here because it is not available on certain OSes id rset; // pointers to global device diff --git a/src/ggml-metal/ggml-metal-ops.cpp b/src/ggml-metal/ggml-metal-ops.cpp index 3d5db0b7..b3390352 100644 --- a/src/ggml-metal/ggml-metal-ops.cpp +++ b/src/ggml-metal/ggml-metal-ops.cpp @@ -631,7 +631,7 @@ int ggml_metal_op_acc(ggml_metal_op_t ctx, int idx) { const bool inplace = (bool) ((const int32_t *) op->op_params)[4]; if (!inplace) { - // run a separete kernel to cpy src->dst + // run a separate kernel to cpy src->dst // not sure how to avoid this // TODO: make a simpler cpy_bytes kernel @@ -1644,7 +1644,7 @@ int ggml_metal_op_set(ggml_metal_op_t ctx, int idx) { const bool inplace = (bool) ((const int32_t *) op->op_params)[4]; if (!inplace) { - // run a separete kernel to cpy src->dst + // run a separate kernel to cpy src->dst // not sure how to avoid this // TODO: make a simpler cpy_bytes kernel @@ -2005,7 +2005,7 @@ int ggml_metal_op_mul_mat(ggml_metal_op_t ctx, int idx) { const int16_t r0ptg = nypsg*nsg; // num src0 rows per threadgroup int16_t r1ptg = 4; // num src1 rows per threadgroup - // note: not sure how optimal are those across all different hardware. there might be someting cleverer + // note: not sure how optimal are those across all different hardware. there might be something cleverer switch (ne11) { case 2: r1ptg = 2; break; diff --git a/src/ggml-metal/ggml-metal.cpp b/src/ggml-metal/ggml-metal.cpp index 1c705362..9382ce53 100644 --- a/src/ggml-metal/ggml-metal.cpp +++ b/src/ggml-metal/ggml-metal.cpp @@ -14,7 +14,7 @@ #define GGML_METAL_MAX_DEVICES 16 // number of Metal devices -// note: can be overriden with GGML_METAL_DEVICES env to simulate virtual devices +// note: can be overridden with GGML_METAL_DEVICES env to simulate virtual devices static int g_devices = 1; //////////////////////////////////////////////////////////////////////////////// diff --git a/src/ggml-metal/ggml-metal.metal b/src/ggml-metal/ggml-metal.metal index 6c349aa0..a58e641a 100644 --- a/src/ggml-metal/ggml-metal.metal +++ b/src/ggml-metal/ggml-metal.metal @@ -4218,7 +4218,7 @@ kernel void kernel_im2col( template [[host_name("kernel_im2col_f32")]] kernel im2col_t kernel_im2col; template [[host_name("kernel_im2col_f16")]] kernel im2col_t kernel_im2col; -// TODO: obolete -- remove +// TODO: obsolete -- remove //typedef void (im2col_ext_t)( // constant ggml_metal_kargs_im2col & args, // device const float * x, diff --git a/src/ggml-opencl/ggml-opencl.cpp b/src/ggml-opencl/ggml-opencl.cpp index a4403a5c..7af032ce 100644 --- a/src/ggml-opencl/ggml-opencl.cpp +++ b/src/ggml-opencl/ggml-opencl.cpp @@ -313,7 +313,7 @@ struct ProfilingInfo { cl_ulong cmd_duration_ns; // The time for the kernel to complete - COMPLETE - END cl_ulong cmd_complete_duration_ns; - // Total time to finish the kernel - COMPELTE - QUEUED + // Total time to finish the kernel - COMPLETE - QUEUED cl_ulong cmd_total_duration_ns; // Global and local work sizes. size_t global_size[3]; @@ -2555,7 +2555,7 @@ static std::vector ggml_opencl_probe_devices(ggml_backend_r cl_platform_id platform_ids[NPLAT]; if (clGetPlatformIDs(NPLAT, platform_ids, &n_platforms) != CL_SUCCESS) { - GGML_LOG_ERROR("ggml_opencl: plaform IDs not available.\n"); + GGML_LOG_ERROR("ggml_opencl: platform IDs not available.\n"); return found_devices; } @@ -3339,7 +3339,7 @@ static void ggml_backend_opencl_synchronize(ggml_backend_t backend) { CL_CHECK(clReleaseEvent(evt)); } -// Syncronizes the 'backend_ctx's device with others so that commands +// Synchronizes the 'backend_ctx's device with others so that commands // enqueued to it won't start until commands in the other devices have // completed. static void sync_with_other_backends(ggml_backend_opencl_context * backend_ctx) { @@ -3997,7 +3997,7 @@ struct ggml_backend_opencl_buffer_context { // The buffer_context is initially created by ggml_backend_buft_alloc_buffer // before any tensor is initialized (at the beginning of alloc_tensor_range). - // Hence, there is alway a buffer object in this vector. When each tensor is + // Hence, there is always a buffer object in this vector. When each tensor is // being initialized, this original buffer object will be released if both // flattening and small allocation are enabled, and additional buffer // objects will be created in init_tensor to represent flattened quantized @@ -4132,7 +4132,7 @@ static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer, //GGML_ASSERT(offset == 0); // We create subbuffers from the original tensor buffer for scales and - // quants - i.e., scales and quants are aliases into the buffer obejct + // quants - i.e., scales and quants are aliases into the buffer object // that backs the original tensor. This is a cleaner way to adapt to the // new memory management. // In the old code, we allocate new buffers for scales and quants diff --git a/src/ggml-sycl/common.hpp b/src/ggml-sycl/common.hpp index 519638fd..04c9e1d7 100644 --- a/src/ggml-sycl/common.hpp +++ b/src/ggml-sycl/common.hpp @@ -76,10 +76,10 @@ extern int g_ggml_sycl_prioritize_dmmv; #define __SYCL_ARCH__ DPCT_COMPATIBILITY_TEMP -#define VER_4VEC 610 // todo for hardward optimize. -#define VER_GEN9 700 // todo for hardward optimize. -#define VER_GEN12 1000000 // todo for hardward optimize. -#define VER_GEN13 (VER_GEN12 + 1030) // todo for hardward optimize. +#define VER_4VEC 610 // todo for hardware optimize. +#define VER_GEN9 700 // todo for hardware optimize. +#define VER_GEN12 1000000 // todo for hardware optimize. +#define VER_GEN13 (VER_GEN12 + 1030) // todo for hardware optimize. #define GGML_SYCL_MAX_NODES 8192 // TODO: adapt to hardwares diff --git a/src/ggml-sycl/quants.hpp b/src/ggml-sycl/quants.hpp index d0d5ac9a..14490fea 100644 --- a/src/ggml-sycl/quants.hpp +++ b/src/ggml-sycl/quants.hpp @@ -29,7 +29,7 @@ namespace ggml_sycl_reordered { // [qs0, qs1, qs2, ..., qsN] [d0, d1, d2, ..., dN] // // Notes: out-of-bounds qs will run into d values -// Aligment relies on the allocated size of qs +// Alignment relies on the allocated size of qs template struct block_q_t; diff --git a/src/ggml-sycl/softmax.cpp b/src/ggml-sycl/softmax.cpp index b41124ac..15d92e5e 100644 --- a/src/ggml-sycl/softmax.cpp +++ b/src/ggml-sycl/softmax.cpp @@ -37,7 +37,7 @@ struct soft_max_params { }; // When ncols_template == 0 the bounds for the loops in this function are not known and can't be unrolled. -// As we want to keep pragma unroll for all other cases we supress the clang transformation warning here. +// As we want to keep pragma unroll for all other cases we suppress the clang transformation warning here. #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wpass-failed" diff --git a/src/ggml-vulkan/CMakeLists.txt b/src/ggml-vulkan/CMakeLists.txt index de01336c..715a263a 100644 --- a/src/ggml-vulkan/CMakeLists.txt +++ b/src/ggml-vulkan/CMakeLists.txt @@ -90,7 +90,7 @@ if (Vulkan_FOUND) target_include_directories(ggml-vulkan PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) # Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build - # Posssibly relevant: https://stackoverflow.com/questions/74748276/visual-studio-no-displays-the-correct-length-of-stdvector + # Possibly relevant: https://stackoverflow.com/questions/74748276/visual-studio-no-displays-the-correct-length-of-stdvector if (MSVC AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang") add_compile_definitions(_ITERATOR_DEBUG_LEVEL=0) endif() diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 0ac21cdc..7c6938d4 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -1868,9 +1868,9 @@ struct test_case { }; -// ################################### -// ## Section 2: GGML Op Defintions ## -// ################################### +// #################################### +// ## Section 2: GGML Op Definitions ## +// #################################### // The following is an example showing the bare minimum for creating a test for a GGML op. @@ -6222,7 +6222,7 @@ struct test_flash_attn_ext : public test_case { void initialize_tensors(ggml_context * ctx) override { for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { if (strcmp(t->name, "s") == 0) { - // make the sink values more noticable in order to trigger a test failure when the implementation is wrong + // make the sink values more noticeable in order to trigger a test failure when the implementation is wrong init_tensor_uniform(t, -10.0f, 10.0f); } else if (strcmp(t->name, "m") == 0) { init_tensor_kq_mask(t);