// select which device to run the Core ML model on
MLModelConfiguration *config = [[MLModelConfiguration alloc] init];
- config.computeUnits = MLComputeUnitsCPUAndGPU;
+ // config.computeUnits = MLComputeUnitsCPUAndGPU;
//config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;
- //config.computeUnits = MLComputeUnitsAll;
+ config.computeUnits = MLComputeUnitsAll;
const void * data = CFBridgingRetain([[whisper_encoder_impl alloc] initWithContentsOfURL:url_model configuration:config error:nil]);
x = block(x)
x = self.ln_post(x)
-
- # """
- # TODO:
- # I think we need to transpose the result here to make it fit whisper.cpp memory order.
- # However, even doing this, the results are still wrong. Kind of less wrong compared to
- # not transposing, but still wrong.
-
- # Also, I don't know why the original OpenAI implementation does not need to transpose
-
- # transpose to (batch_size, n_ctx, n_state)
- # x : torch.Tensor, shape = (batch_size, n_state, 1, n_ctx)
-
- # """
- # x = x.transpose(1,3)
+ x = x.squeeze(2).transpose(1, 2)
return x
echo $mpath
python3 models/convert-h5-to-coreml.py --model-name $mname --model-path $mpath --encoder-only True
else
- python3 models/convert-whisper-to-coreml.py --model $mname --encoder-only True
+ python3 models/convert-whisper-to-coreml.py --model $mname --encoder-only True --optimize-ane True
fi
xcrun coremlc compile models/coreml-encoder-${mname}.mlpackage models/