From: Daniel Bevenius Date: Wed, 1 Oct 2025 07:13:34 +0000 (+0200) Subject: bindings-java : disable flash attention by default (#3445) X-Git-Tag: upstream/1.8.2~70 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=2a5686966944a3fbf192678757afd7120d25732f;p=pkg%2Fggml%2Fsources%2Fwhisper.cpp bindings-java : disable flash attention by default (#3445) This commit disables flash-attention for the Java binding test so that the testFullTranscribe test passes. Without this change the test was failing because the expected output mismatches after the flash-attention change: ```console but was: ``` An alternative would also be to update the expected output but it felt better to keep the same expected output and disable flash-attention and not just change the expected output to match the new behavior. --- diff --git a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperContextParams.java b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperContextParams.java index 4bcdb6b0..66ec5d70 100644 --- a/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperContextParams.java +++ b/bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperContextParams.java @@ -20,7 +20,7 @@ public class WhisperContextParams extends Structure { /** Use GPU for inference (default = true) */ public CBool use_gpu; - /** Use flash attention (default = false) */ + /** Use flash attention (default = true) */ public CBool flash_attn; /** CUDA device to use (default = 0) */ diff --git a/bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperCppTest.java b/bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperCppTest.java index bf37e519..e5b22cf8 100644 --- a/bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperCppTest.java +++ b/bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperCppTest.java @@ -4,6 +4,7 @@ import static org.junit.jupiter.api.Assertions.*; import io.github.ggerganov.whispercpp.bean.WhisperSegment; import io.github.ggerganov.whispercpp.params.CBool; +import io.github.ggerganov.whispercpp.params.WhisperContextParams; import io.github.ggerganov.whispercpp.params.WhisperFullParams; import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy; import org.junit.jupiter.api.BeforeAll; @@ -25,7 +26,9 @@ class WhisperCppTest { //String modelName = "../../models/ggml-tiny.bin"; String modelName = "../../models/ggml-tiny.en.bin"; try { - whisper.initContext(modelName); + WhisperContextParams.ByValue contextParams = whisper.getContextDefaultParams(); + contextParams.useFlashAttn(false); // Disable flash attention + whisper.initContext(modelName, contextParams); //whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY); //whisper.getJavaDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH); modelInitialised = true;