coreml : fix ANE optimized encoder (#1716)

author Yajing Tang <redacted>

Thu, 4 Jan 2024 14:28:30 +0000 (06:28 -0800)

committer GitHub <redacted>

Thu, 4 Jan 2024 14:28:30 +0000 (16:28 +0200)
author Yajing Tang <redacted>
Thu, 4 Jan 2024 14:28:30 +0000 (06:28 -0800)
committer GitHub <redacted>
Thu, 4 Jan 2024 14:28:30 +0000 (16:28 +0200)
diff --git a/coreml/whisper-encoder.mm b/coreml/whisper-encoder.mm

index 8e93f180c1b6ef4f900c1a61afac3983e0687634..81a5a6aaa462deb81fcd9aab45cf5317504b34ce 100644 (file)
--- a/coreml/whisper-encoder.mm
+++ b/coreml/whisper-encoder.mm
@@ -24,9 +24,9 @@ struct whisper_coreml_context * whisper_coreml_init(const char * path_model) {
  
      // select which device to run the Core ML model on
      MLModelConfiguration *config = [[MLModelConfiguration alloc] init];
-    config.computeUnits = MLComputeUnitsCPUAndGPU;
+    // config.computeUnits = MLComputeUnitsCPUAndGPU;
      //config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;
-    //config.computeUnits = MLComputeUnitsAll;
+    config.computeUnits = MLComputeUnitsAll;
  
      const void * data = CFBridgingRetain([[whisper_encoder_impl alloc] initWithContentsOfURL:url_model configuration:config error:nil]);
  
diff --git a/models/convert-whisper-to-coreml.py b/models/convert-whisper-to-coreml.py

index fd7191abcb27e7f2108f0767d08611a254452804..046aabd224e0150fa9724e0737c85824af177e12 100644 (file)
--- a/models/convert-whisper-to-coreml.py
+++ b/models/convert-whisper-to-coreml.py
@@ -143,20 +143,7 @@ class AudioEncoderANE(AudioEncoder):
              x = block(x)
  
          x = self.ln_post(x)
-
-        # """
-        # TODO:
-        # I think we need to transpose the result here to make it fit whisper.cpp memory order.
-        # However, even doing this, the results are still wrong. Kind of less wrong compared to
-        # not transposing, but still wrong.
-
-        # Also, I don't know why the original OpenAI implementation does not need to transpose
-
-        # transpose to (batch_size, n_ctx, n_state)
-        # x : torch.Tensor, shape = (batch_size, n_state, 1, n_ctx)
-
-        # """
-        # x = x.transpose(1,3)
+        x = x.squeeze(2).transpose(1, 2)
  
          return x
  
diff --git a/models/generate-coreml-model.sh b/models/generate-coreml-model.sh

index bd71b33876e3b2accbcab084fec2beb5890acdd7..cb8be6dcbc0c66c57725ef4bd9aa59f326d62e8f 100755 (executable)
--- a/models/generate-coreml-model.sh
+++ b/models/generate-coreml-model.sh
@@ -23,7 +23,7 @@ if [[ $mname == "-h5" ]]; then
    echo $mpath
    python3 models/convert-h5-to-coreml.py --model-name $mname --model-path $mpath --encoder-only True
  else
-  python3 models/convert-whisper-to-coreml.py --model $mname --encoder-only True
+  python3 models/convert-whisper-to-coreml.py --model $mname --encoder-only True  --optimize-ane True
  fi
  
  xcrun coremlc compile models/coreml-encoder-${mname}.mlpackage models/
author	Yajing Tang <redacted>
	Thu, 4 Jan 2024 14:28:30 +0000 (06:28 -0800)
committer	GitHub <redacted>
	Thu, 4 Jan 2024 14:28:30 +0000 (16:28 +0200)
coreml/whisper-encoder.mm		patch \| blob \| history
models/convert-whisper-to-coreml.py		patch \| blob \| history
models/generate-coreml-model.sh		patch \| blob \| history