CANN: Use smart pointers to manage ACL objects (llama/17238)

author hipudding <redacted>

Mon, 17 Nov 2025 00:43:59 +0000 (08:43 +0800)

committer Georgi Gerganov <redacted>

Mon, 17 Nov 2025 19:05:46 +0000 (21:05 +0200)
author hipudding <redacted>
Mon, 17 Nov 2025 00:43:59 +0000 (08:43 +0800)
committer Georgi Gerganov <redacted>
Mon, 17 Nov 2025 19:05:46 +0000 (21:05 +0200)
diff --git a/ggml/src/ggml-cann/Doxyfile b/ggml/src/ggml-cann/Doxyfile

deleted file mode 100755 (executable)

index 3290a48..0000000
--- a/ggml/src/ggml-cann/Doxyfile
+++ /dev/null
@@ -1,2579 +0,0 @@
-# Doxyfile 1.8.17
-
-# This file describes the settings to be used by the documentation system
-# doxygen (www.doxygen.org) for a project.
-#
-# All text after a double hash (##) is considered a comment and is placed in
-# front of the TAG it is preceding.
-#
-# All text after a single hash (#) is considered a comment and will be ignored.
-# The format is:
-# TAG = value [value, ...]
-# For lists, items can also be appended using:
-# TAG += value [value, ...]
-# Values that contain spaces should be placed between quotes (\" \").
-
-#---------------------------------------------------------------------------
-# Project related configuration options
-#---------------------------------------------------------------------------
-
-# This tag specifies the encoding used for all characters in the configuration
-# file that follow. The default is UTF-8 which is also the encoding used for all
-# text before the first occurrence of this tag. Doxygen uses libiconv (or the
-# iconv built into libc) for the transcoding. See
-# https://www.gnu.org/software/libiconv/ for the list of possible encodings.
-# The default value is: UTF-8.
-
-DOXYFILE_ENCODING      = UTF-8
-
-# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
-# double-quotes, unless you are using Doxywizard) that should identify the
-# project for which the documentation is generated. This name is used in the
-# title of most generated pages and in a few other places.
-# The default value is: My Project.
-
-PROJECT_NAME           = "ggml"
-
-# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
-# could be handy for archiving the generated documentation or if some version
-# control system is used.
-
-PROJECT_NUMBER         =
-
-# Using the PROJECT_BRIEF tag one can provide an optional one line description
-# for a project that appears at the top of each page and should give viewer a
-# quick idea about the purpose of the project. Keep the description short.
-
-PROJECT_BRIEF          = "Tensor library for machine learning"
-
-# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
-# in the documentation. The maximum height of the logo should not exceed 55
-# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy
-# the logo to the output directory.
-
-PROJECT_LOGO           =
-
-# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
-# into which the generated documentation will be written. If a relative path is
-# entered, it will be relative to the location where doxygen was started. If
-# left blank the current directory will be used.
-
-OUTPUT_DIRECTORY       = docs
-
-# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-
-# directories (in 2 levels) under the output directory of each output format and
-# will distribute the generated files over these directories. Enabling this
-# option can be useful when feeding doxygen a huge amount of source files, where
-# putting all generated files in the same directory would otherwise causes
-# performance problems for the file system.
-# The default value is: NO.
-
-CREATE_SUBDIRS         = NO
-
-# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII
-# characters to appear in the names of generated files. If set to NO, non-ASCII
-# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode
-# U+3044.
-# The default value is: NO.
-
-ALLOW_UNICODE_NAMES    = NO
-
-# The OUTPUT_LANGUAGE tag is used to specify the language in which all
-# documentation generated by doxygen is written. Doxygen will use this
-# information to generate all constant output in the proper language.
-# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
-# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
-# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
-# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
-# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
-# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
-# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
-# Ukrainian and Vietnamese.
-# The default value is: English.
-
-OUTPUT_LANGUAGE        = English
-
-# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all
-# documentation generated by doxygen is written. Doxygen will use this
-# information to generate all generated output in the proper direction.
-# Possible values are: None, LTR, RTL and Context.
-# The default value is: None.
-
-OUTPUT_TEXT_DIRECTION  = None
-
-# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
-# descriptions after the members that are listed in the file and class
-# documentation (similar to Javadoc). Set to NO to disable this.
-# The default value is: YES.
-
-BRIEF_MEMBER_DESC      = YES
-
-# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief
-# description of a member or function before the detailed description
-#
-# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
-# brief descriptions will be completely suppressed.
-# The default value is: YES.
-
-REPEAT_BRIEF           = YES
-
-# This tag implements a quasi-intelligent brief description abbreviator that is
-# used to form the text in various listings. Each string in this list, if found
-# as the leading text of the brief description, will be stripped from the text
-# and the result, after processing the whole list, is used as the annotated
-# text. Otherwise, the brief description is used as-is. If left blank, the
-# following values are used ($name is automatically replaced with the name of
-# the entity):The $name class, The $name widget, The $name file, is, provides,
-# specifies, contains, represents, a, an and the.
-
-ABBREVIATE_BRIEF       = "The $name class" \
-                         "The $name widget" \
-                         "The $name file" \
-                         is \
-                         provides \
-                         specifies \
-                         contains \
-                         represents \
-                         a \
-                         an \
-                         the
-
-# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
-# doxygen will generate a detailed section even if there is only a brief
-# description.
-# The default value is: NO.
-
-ALWAYS_DETAILED_SEC    = NO
-
-# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
-# inherited members of a class in the documentation of that class as if those
-# members were ordinary class members. Constructors, destructors and assignment
-# operators of the base classes will not be shown.
-# The default value is: NO.
-
-INLINE_INHERITED_MEMB  = NO
-
-# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path
-# before files name in the file list and in the header files. If set to NO the
-# shortest path that makes the file name unique will be used
-# The default value is: YES.
-
-FULL_PATH_NAMES        = YES
-
-# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
-# Stripping is only done if one of the specified strings matches the left-hand
-# part of the path. The tag can be used to show relative paths in the file list.
-# If left blank the directory from which doxygen is run is used as the path to
-# strip.
-#
-# Note that you can specify absolute paths here, but also relative paths, which
-# will be relative from the directory where doxygen is started.
-# This tag requires that the tag FULL_PATH_NAMES is set to YES.
-
-STRIP_FROM_PATH        =
-
-# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
-# path mentioned in the documentation of a class, which tells the reader which
-# header file to include in order to use a class. If left blank only the name of
-# the header file containing the class definition is used. Otherwise one should
-# specify the list of include paths that are normally passed to the compiler
-# using the -I flag.
-
-STRIP_FROM_INC_PATH    =
-
-# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
-# less readable) file names. This can be useful is your file systems doesn't
-# support long names like on DOS, Mac, or CD-ROM.
-# The default value is: NO.
-
-SHORT_NAMES            = NO
-
-# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
-# first line (until the first dot) of a Javadoc-style comment as the brief
-# description. If set to NO, the Javadoc-style will behave just like regular Qt-
-# style comments (thus requiring an explicit @brief command for a brief
-# description.)
-# The default value is: NO.
-
-JAVADOC_AUTOBRIEF      = NO
-
-# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line
-# such as
-# /***************
-# as being the beginning of a Javadoc-style comment "banner". If set to NO, the
-# Javadoc-style will behave just like regular comments and it will not be
-# interpreted by doxygen.
-# The default value is: NO.
-
-JAVADOC_BANNER         = NO
-
-# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
-# line (until the first dot) of a Qt-style comment as the brief description. If
-# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
-# requiring an explicit \brief command for a brief description.)
-# The default value is: NO.
-
-QT_AUTOBRIEF           = NO
-
-# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
-# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
-# a brief description. This used to be the default behavior. The new default is
-# to treat a multi-line C++ comment block as a detailed description. Set this
-# tag to YES if you prefer the old behavior instead.
-#
-# Note that setting this tag to YES also means that rational rose comments are
-# not recognized any more.
-# The default value is: NO.
-
-MULTILINE_CPP_IS_BRIEF = NO
-
-# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
-# documentation from any documented member that it re-implements.
-# The default value is: YES.
-
-INHERIT_DOCS           = YES
-
-# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new
-# page for each member. If set to NO, the documentation of a member will be part
-# of the file/class/namespace that contains it.
-# The default value is: NO.
-
-SEPARATE_MEMBER_PAGES  = NO
-
-# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
-# uses this value to replace tabs by spaces in code fragments.
-# Minimum value: 1, maximum value: 16, default value: 4.
-
-TAB_SIZE               = 4
-
-# This tag can be used to specify a number of aliases that act as commands in
-# the documentation. An alias has the form:
-# name=value
-# For example adding
-# "sideeffect=@par Side Effects:\n"
-# will allow you to put the command \sideeffect (or @sideeffect) in the
-# documentation, which will result in a user-defined paragraph with heading
-# "Side Effects:". You can put \n's in the value part of an alias to insert
-# newlines (in the resulting output). You can put ^^ in the value part of an
-# alias to insert a newline as if a physical newline was in the original file.
-# When you need a literal { or } or , in the value part of an alias you have to
-# escape them by means of a backslash (\), this can lead to conflicts with the
-# commands \{ and \} for these it is advised to use the version @{ and @} or use
-# a double escape (\\{ and \\})
-
-ALIASES                =
-
-# This tag can be used to specify a number of word-keyword mappings (TCL only).
-# A mapping has the form "name=value". For example adding "class=itcl::class"
-# will allow you to use the command class in the itcl::class meaning.
-
-TCL_SUBST              =
-
-# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
-# only. Doxygen will then generate output that is more tailored for C. For
-# instance, some of the names that are used will be different. The list of all
-# members will be omitted, etc.
-# The default value is: NO.
-
-OPTIMIZE_OUTPUT_FOR_C  = NO
-
-# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
-# Python sources only. Doxygen will then generate output that is more tailored
-# for that language. For instance, namespaces will be presented as packages,
-# qualified scopes will look different, etc.
-# The default value is: NO.
-
-OPTIMIZE_OUTPUT_JAVA   = NO
-
-# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
-# sources. Doxygen will then generate output that is tailored for Fortran.
-# The default value is: NO.
-
-OPTIMIZE_FOR_FORTRAN   = NO
-
-# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
-# sources. Doxygen will then generate output that is tailored for VHDL.
-# The default value is: NO.
-
-OPTIMIZE_OUTPUT_VHDL   = NO
-
-# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice
-# sources only. Doxygen will then generate output that is more tailored for that
-# language. For instance, namespaces will be presented as modules, types will be
-# separated into more groups, etc.
-# The default value is: NO.
-
-OPTIMIZE_OUTPUT_SLICE  = NO
-
-# Doxygen selects the parser to use depending on the extension of the files it
-# parses. With this tag you can assign which parser to use for a given
-# extension. Doxygen has a built-in mapping, but you can override or extend it
-# using this tag. The format is ext=language, where ext is a file extension, and
-# language is one of the parsers supported by doxygen: IDL, Java, JavaScript,
-# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice,
-# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:
-# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser
-# tries to guess whether the code is fixed or free formatted code, this is the
-# default for Fortran type files), VHDL, tcl. For instance to make doxygen treat
-# .inc files as Fortran files (default is PHP), and .f files as C (default is
-# Fortran), use: inc=Fortran f=C.
-#
-# Note: For files without extension you can use no_extension as a placeholder.
-#
-# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
-# the files are not read by doxygen.
-
-EXTENSION_MAPPING      =
-
-# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
-# according to the Markdown format, which allows for more readable
-# documentation. See https://daringfireball.net/projects/markdown/ for details.
-# The output of markdown processing is further processed by doxygen, so you can
-# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
-# case of backward compatibilities issues.
-# The default value is: YES.
-
-MARKDOWN_SUPPORT       = YES
-
-# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up
-# to that level are automatically included in the table of contents, even if
-# they do not have an id attribute.
-# Note: This feature currently applies only to Markdown headings.
-# Minimum value: 0, maximum value: 99, default value: 5.
-# This tag requires that the tag MARKDOWN_SUPPORT is set to YES.
-
-TOC_INCLUDE_HEADINGS   = 5
-
-# When enabled doxygen tries to link words that correspond to documented
-# classes, or namespaces to their corresponding documentation. Such a link can
-# be prevented in individual cases by putting a % sign in front of the word or
-# globally by setting AUTOLINK_SUPPORT to NO.
-# The default value is: YES.
-
-AUTOLINK_SUPPORT       = YES
-
-# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
-# to include (a tag file for) the STL sources as input, then you should set this
-# tag to YES in order to let doxygen match functions declarations and
-# definitions whose arguments contain STL classes (e.g. func(std::string);
-# versus func(std::string) {}). This also make the inheritance and collaboration
-# diagrams that involve STL classes more complete and accurate.
-# The default value is: NO.
-
-BUILTIN_STL_SUPPORT    = NO
-
-# If you use Microsoft's C++/CLI language, you should set this option to YES to
-# enable parsing support.
-# The default value is: NO.
-
-CPP_CLI_SUPPORT        = NO
-
-# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
-# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen
-# will parse them like normal C++ but will assume all classes use public instead
-# of private inheritance when no explicit protection keyword is present.
-# The default value is: NO.
-
-SIP_SUPPORT            = NO
-
-# For Microsoft's IDL there are propget and propput attributes to indicate
-# getter and setter methods for a property. Setting this option to YES will make
-# doxygen to replace the get and set methods by a property in the documentation.
-# This will only work if the methods are indeed getting or setting a simple
-# type. If this is not the case, or you want to show the methods anyway, you
-# should set this option to NO.
-# The default value is: YES.
-
-IDL_PROPERTY_SUPPORT   = YES
-
-# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
-# tag is set to YES then doxygen will reuse the documentation of the first
-# member in the group (if any) for the other members of the group. By default
-# all members of a group must be documented explicitly.
-# The default value is: NO.
-
-DISTRIBUTE_GROUP_DOC   = NO
-
-# If one adds a struct or class to a group and this option is enabled, then also
-# any nested class or struct is added to the same group. By default this option
-# is disabled and one has to add nested compounds explicitly via \ingroup.
-# The default value is: NO.
-
-GROUP_NESTED_COMPOUNDS = NO
-
-# Set the SUBGROUPING tag to YES to allow class member groups of the same type
-# (for instance a group of public functions) to be put as a subgroup of that
-# type (e.g. under the Public Functions section). Set it to NO to prevent
-# subgrouping. Alternatively, this can be done per class using the
-# \nosubgrouping command.
-# The default value is: YES.
-
-SUBGROUPING            = YES
-
-# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
-# are shown inside the group in which they are included (e.g. using \ingroup)
-# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
-# and RTF).
-#
-# Note that this feature does not work in combination with
-# SEPARATE_MEMBER_PAGES.
-# The default value is: NO.
-
-INLINE_GROUPED_CLASSES = NO
-
-# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
-# with only public data fields or simple typedef fields will be shown inline in
-# the documentation of the scope in which they are defined (i.e. file,
-# namespace, or group documentation), provided this scope is documented. If set
-# to NO, structs, classes, and unions are shown on a separate page (for HTML and
-# Man pages) or section (for LaTeX and RTF).
-# The default value is: NO.
-
-INLINE_SIMPLE_STRUCTS  = NO
-
-# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
-# enum is documented as struct, union, or enum with the name of the typedef. So
-# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
-# with name TypeT. When disabled the typedef will appear as a member of a file,
-# namespace, or class. And the struct will be named TypeS. This can typically be
-# useful for C code in case the coding convention dictates that all compound
-# types are typedef'ed and only the typedef is referenced, never the tag name.
-# The default value is: NO.
-
-TYPEDEF_HIDES_STRUCT   = NO
-
-# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
-# cache is used to resolve symbols given their name and scope. Since this can be
-# an expensive process and often the same symbol appears multiple times in the
-# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
-# doxygen will become slower. If the cache is too large, memory is wasted. The
-# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
-# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
-# symbols. At the end of a run doxygen will report the cache usage and suggest
-# the optimal cache size from a speed point of view.
-# Minimum value: 0, maximum value: 9, default value: 0.
-
-LOOKUP_CACHE_SIZE      = 0
-
-#---------------------------------------------------------------------------
-# Build related configuration options
-#---------------------------------------------------------------------------
-
-# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
-# documentation are documented, even if no documentation was available. Private
-# class members and static file members will be hidden unless the
-# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
-# Note: This will also disable the warnings about undocumented members that are
-# normally produced when WARNINGS is set to YES.
-# The default value is: NO.
-
-EXTRACT_ALL            = YES
-
-# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
-# be included in the documentation.
-# The default value is: NO.
-
-EXTRACT_PRIVATE        = YES
-
-# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual
-# methods of a class will be included in the documentation.
-# The default value is: NO.
-
-EXTRACT_PRIV_VIRTUAL   = YES
-
-# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
-# scope will be included in the documentation.
-# The default value is: NO.
-
-EXTRACT_PACKAGE        = YES
-
-# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
-# included in the documentation.
-# The default value is: NO.
-
-EXTRACT_STATIC         = YES
-
-# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
-# locally in source files will be included in the documentation. If set to NO,
-# only classes defined in header files are included. Does not have any effect
-# for Java sources.
-# The default value is: YES.
-
-EXTRACT_LOCAL_CLASSES  = YES
-
-# This flag is only useful for Objective-C code. If set to YES, local methods,
-# which are defined in the implementation section but not in the interface are
-# included in the documentation. If set to NO, only methods in the interface are
-# included.
-# The default value is: NO.
-
-EXTRACT_LOCAL_METHODS  = YES
-
-# If this flag is set to YES, the members of anonymous namespaces will be
-# extracted and appear in the documentation as a namespace called
-# 'anonymous_namespace{file}', where file will be replaced with the base name of
-# the file that contains the anonymous namespace. By default anonymous namespace
-# are hidden.
-# The default value is: NO.
-
-EXTRACT_ANON_NSPACES   = NO
-
-# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
-# undocumented members inside documented classes or files. If set to NO these
-# members will be included in the various overviews, but no documentation
-# section is generated. This option has no effect if EXTRACT_ALL is enabled.
-# The default value is: NO.
-
-HIDE_UNDOC_MEMBERS     = NO
-
-# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
-# undocumented classes that are normally visible in the class hierarchy. If set
-# to NO, these classes will be included in the various overviews. This option
-# has no effect if EXTRACT_ALL is enabled.
-# The default value is: NO.
-
-HIDE_UNDOC_CLASSES     = NO
-
-# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
-# declarations. If set to NO, these declarations will be included in the
-# documentation.
-# The default value is: NO.
-
-HIDE_FRIEND_COMPOUNDS  = NO
-
-# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
-# documentation blocks found inside the body of a function. If set to NO, these
-# blocks will be appended to the function's detailed documentation block.
-# The default value is: NO.
-
-HIDE_IN_BODY_DOCS      = NO
-
-# The INTERNAL_DOCS tag determines if documentation that is typed after a
-# \internal command is included. If the tag is set to NO then the documentation
-# will be excluded. Set it to YES to include the internal documentation.
-# The default value is: NO.
-
-INTERNAL_DOCS          = NO
-
-# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
-# names in lower-case letters. If set to YES, upper-case letters are also
-# allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
-# (including Cygwin) ands Mac users are advised to set this option to NO.
-# The default value is: system dependent.
-
-CASE_SENSE_NAMES       = YES
-
-# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
-# their full class and namespace scopes in the documentation. If set to YES, the
-# scope will be hidden.
-# The default value is: NO.
-
-HIDE_SCOPE_NAMES       = NO
-
-# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
-# append additional text to a page's title, such as Class Reference. If set to
-# YES the compound reference will be hidden.
-# The default value is: NO.
-
-HIDE_COMPOUND_REFERENCE= NO
-
-# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
-# the files that are included by a file in the documentation of that file.
-# The default value is: YES.
-
-SHOW_INCLUDE_FILES     = YES
-
-# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
-# grouped member an include statement to the documentation, telling the reader
-# which file to include in order to use the member.
-# The default value is: NO.
-
-SHOW_GROUPED_MEMB_INC  = NO
-
-# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
-# files with double quotes in the documentation rather than with sharp brackets.
-# The default value is: NO.
-
-FORCE_LOCAL_INCLUDES   = NO
-
-# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
-# documentation for inline members.
-# The default value is: YES.
-
-INLINE_INFO            = YES
-
-# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
-# (detailed) documentation of file and class members alphabetically by member
-# name. If set to NO, the members will appear in declaration order.
-# The default value is: YES.
-
-SORT_MEMBER_DOCS       = YES
-
-# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
-# descriptions of file, namespace and class members alphabetically by member
-# name. If set to NO, the members will appear in declaration order. Note that
-# this will also influence the order of the classes in the class list.
-# The default value is: NO.
-
-SORT_BRIEF_DOCS        = NO
-
-# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
-# (brief and detailed) documentation of class members so that constructors and
-# destructors are listed first. If set to NO the constructors will appear in the
-# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
-# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
-# member documentation.
-# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
-# detailed member documentation.
-# The default value is: NO.
-
-SORT_MEMBERS_CTORS_1ST = NO
-
-# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
-# of group names into alphabetical order. If set to NO the group names will
-# appear in their defined order.
-# The default value is: NO.
-
-SORT_GROUP_NAMES       = NO
-
-# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
-# fully-qualified names, including namespaces. If set to NO, the class list will
-# be sorted only by class name, not including the namespace part.
-# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
-# Note: This option applies only to the class list, not to the alphabetical
-# list.
-# The default value is: NO.
-
-SORT_BY_SCOPE_NAME     = NO
-
-# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
-# type resolution of all parameters of a function it will reject a match between
-# the prototype and the implementation of a member function even if there is
-# only one candidate or it is obvious which candidate to choose by doing a
-# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
-# accept a match between prototype and implementation in such cases.
-# The default value is: NO.
-
-STRICT_PROTO_MATCHING  = NO
-
-# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
-# list. This list is created by putting \todo commands in the documentation.
-# The default value is: YES.
-
-GENERATE_TODOLIST      = YES
-
-# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
-# list. This list is created by putting \test commands in the documentation.
-# The default value is: YES.
-
-GENERATE_TESTLIST      = YES
-
-# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
-# list. This list is created by putting \bug commands in the documentation.
-# The default value is: YES.
-
-GENERATE_BUGLIST       = YES
-
-# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
-# the deprecated list. This list is created by putting \deprecated commands in
-# the documentation.
-# The default value is: YES.
-
-GENERATE_DEPRECATEDLIST= YES
-
-# The ENABLED_SECTIONS tag can be used to enable conditional documentation
-# sections, marked by \if <section_label> ... \endif and \cond <section_label>
-# ... \endcond blocks.
-
-ENABLED_SECTIONS       =
-
-# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
-# initial value of a variable or macro / define can have for it to appear in the
-# documentation. If the initializer consists of more lines than specified here
-# it will be hidden. Use a value of 0 to hide initializers completely. The
-# appearance of the value of individual variables and macros / defines can be
-# controlled using \showinitializer or \hideinitializer command in the
-# documentation regardless of this setting.
-# Minimum value: 0, maximum value: 10000, default value: 30.
-
-MAX_INITIALIZER_LINES  = 30
-
-# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
-# the bottom of the documentation of classes and structs. If set to YES, the
-# list will mention the files that were used to generate the documentation.
-# The default value is: YES.
-
-SHOW_USED_FILES        = YES
-
-# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
-# will remove the Files entry from the Quick Index and from the Folder Tree View
-# (if specified).
-# The default value is: YES.
-
-SHOW_FILES             = YES
-
-# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
-# page. This will remove the Namespaces entry from the Quick Index and from the
-# Folder Tree View (if specified).
-# The default value is: YES.
-
-SHOW_NAMESPACES        = YES
-
-# The FILE_VERSION_FILTER tag can be used to specify a program or script that
-# doxygen should invoke to get the current version for each file (typically from
-# the version control system). Doxygen will invoke the program by executing (via
-# popen()) the command command input-file, where command is the value of the
-# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
-# by doxygen. Whatever the program writes to standard output is used as the file
-# version. For an example see the documentation.
-
-FILE_VERSION_FILTER    =
-
-# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
-# by doxygen. The layout file controls the global structure of the generated
-# output files in an output format independent way. To create the layout file
-# that represents doxygen's defaults, run doxygen with the -l option. You can
-# optionally specify a file name after the option, if omitted DoxygenLayout.xml
-# will be used as the name of the layout file.
-#
-# Note that if you run doxygen from a directory containing a file called
-# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
-# tag is left empty.
-
-LAYOUT_FILE            =
-
-# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
-# the reference definitions. This must be a list of .bib files. The .bib
-# extension is automatically appended if omitted. This requires the bibtex tool
-# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.
-# For LaTeX the style of the bibliography can be controlled using
-# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
-# search path. See also \cite for info how to create references.
-
-CITE_BIB_FILES         =
-
-#---------------------------------------------------------------------------
-# Configuration options related to warning and progress messages
-#---------------------------------------------------------------------------
-
-# The QUIET tag can be used to turn on/off the messages that are generated to
-# standard output by doxygen. If QUIET is set to YES this implies that the
-# messages are off.
-# The default value is: NO.
-
-QUIET                  = NO
-
-# The WARNINGS tag can be used to turn on/off the warning messages that are
-# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
-# this implies that the warnings are on.
-#
-# Tip: Turn warnings on while writing the documentation.
-# The default value is: YES.
-
-WARNINGS               = YES
-
-# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate
-# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
-# will automatically be disabled.
-# The default value is: YES.
-
-WARN_IF_UNDOCUMENTED   = YES
-
-# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
-# potential errors in the documentation, such as not documenting some parameters
-# in a documented function, or documenting parameters that don't exist or using
-# markup commands wrongly.
-# The default value is: YES.
-
-WARN_IF_DOC_ERROR      = YES
-
-# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
-# are documented, but have no documentation for their parameters or return
-# value. If set to NO, doxygen will only warn about wrong or incomplete
-# parameter documentation, but not about the absence of documentation. If
-# EXTRACT_ALL is set to YES then this flag will automatically be disabled.
-# The default value is: NO.
-
-WARN_NO_PARAMDOC       = NO
-
-# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
-# a warning is encountered.
-# The default value is: NO.
-
-WARN_AS_ERROR          = NO
-
-# The WARN_FORMAT tag determines the format of the warning messages that doxygen
-# can produce. The string should contain the $file, $line, and $text tags, which
-# will be replaced by the file and line number from which the warning originated
-# and the warning text. Optionally the format may contain $version, which will
-# be replaced by the version of the file (if it could be obtained via
-# FILE_VERSION_FILTER)
-# The default value is: $file:$line: $text.
-
-WARN_FORMAT            = "$file:$line: $text"
-
-# The WARN_LOGFILE tag can be used to specify a file to which warning and error
-# messages should be written. If left blank the output is written to standard
-# error (stderr).
-
-WARN_LOGFILE           =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the input files
-#---------------------------------------------------------------------------
-
-# The INPUT tag is used to specify the files and/or directories that contain
-# documented source files. You may enter file names like myfile.cpp or
-# directories like /usr/src/myproject. Separate the files or directories with
-# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
-# Note: If this tag is empty the current directory is searched.
-
-INPUT                  =
-
-# This tag can be used to specify the character encoding of the source files
-# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
-# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
-# documentation (see: https://www.gnu.org/software/libiconv/) for the list of
-# possible encodings.
-# The default value is: UTF-8.
-
-INPUT_ENCODING         = UTF-8
-
-# If the value of the INPUT tag contains directories, you can use the
-# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
-# *.h) to filter out the source-files in the directories.
-#
-# Note that for custom extensions or not directly supported extensions you also
-# need to set EXTENSION_MAPPING for the extension otherwise the files are not
-# read by doxygen.
-#
-# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
-# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
-# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
-# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),
-# *.doc (to be provided as doxygen C comment), *.txt (to be provided as doxygen
-# C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f, *.for, *.tcl, *.vhd,
-# *.vhdl, *.ucf, *.qsf and *.ice.
-
-FILE_PATTERNS          = *.c \
-                         *.cc \
-                         *.cxx \
-                         *.cpp \
-                         *.c++ \
-                         *.java \
-                         *.ii \
-                         *.ixx \
-                         *.ipp \
-                         *.i++ \
-                         *.inl \
-                         *.idl \
-                         *.ddl \
-                         *.odl \
-                         *.h \
-                         *.hh \
-                         *.hxx \
-                         *.hpp \
-                         *.h++ \
-                         *.cs \
-                         *.d \
-                         *.php \
-                         *.php4 \
-                         *.php5 \
-                         *.phtml \
-                         *.inc \
-                         *.m \
-                         *.markdown \
-                         *.md \
-                         *.mm \
-                         *.dox \
-                         *.doc \
-                         *.txt \
-                         *.py \
-                         *.pyw \
-                         *.f90 \
-                         *.f95 \
-                         *.f03 \
-                         *.f08 \
-                         *.f \
-                         *.for \
-                         *.tcl \
-                         *.vhd \
-                         *.vhdl \
-                         *.ucf \
-                         *.qsf \
-                         *.ice
-
-# The RECURSIVE tag can be used to specify whether or not subdirectories should
-# be searched for input files as well.
-# The default value is: NO.
-
-RECURSIVE              = YES
-
-# The EXCLUDE tag can be used to specify files and/or directories that should be
-# excluded from the INPUT source files. This way you can easily exclude a
-# subdirectory from a directory tree whose root is specified with the INPUT tag.
-#
-# Note that relative paths are relative to the directory from which doxygen is
-# run.
-
-EXCLUDE                =
-
-# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
-# directories that are symbolic links (a Unix file system feature) are excluded
-# from the input.
-# The default value is: NO.
-
-EXCLUDE_SYMLINKS       = NO
-
-# If the value of the INPUT tag contains directories, you can use the
-# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
-# certain files from those directories.
-#
-# Note that the wildcards are matched against the file with absolute path, so to
-# exclude all test directories for example use the pattern */test/*
-
-EXCLUDE_PATTERNS       =
-
-# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
-# (namespaces, classes, functions, etc.) that should be excluded from the
-# output. The symbol name can be a fully qualified name, a word, or if the
-# wildcard * is used, a substring. Examples: ANamespace, AClass,
-# AClass::ANamespace, ANamespace::*Test
-#
-# Note that the wildcards are matched against the file with absolute path, so to
-# exclude all test directories use the pattern */test/*
-
-EXCLUDE_SYMBOLS        =
-
-# The EXAMPLE_PATH tag can be used to specify one or more files or directories
-# that contain example code fragments that are included (see the \include
-# command).
-
-EXAMPLE_PATH           =
-
-# If the value of the EXAMPLE_PATH tag contains directories, you can use the
-# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
-# *.h) to filter out the source-files in the directories. If left blank all
-# files are included.
-
-EXAMPLE_PATTERNS       = *
-
-# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
-# searched for input files to be used with the \include or \dontinclude commands
-# irrespective of the value of the RECURSIVE tag.
-# The default value is: NO.
-
-EXAMPLE_RECURSIVE      = NO
-
-# The IMAGE_PATH tag can be used to specify one or more files or directories
-# that contain images that are to be included in the documentation (see the
-# \image command).
-
-IMAGE_PATH             =
-
-# The INPUT_FILTER tag can be used to specify a program that doxygen should
-# invoke to filter for each input file. Doxygen will invoke the filter program
-# by executing (via popen()) the command:
-#
-# <filter> <input-file>
-#
-# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the
-# name of an input file. Doxygen will then use the output that the filter
-# program writes to standard output. If FILTER_PATTERNS is specified, this tag
-# will be ignored.
-#
-# Note that the filter must not add or remove lines; it is applied before the
-# code is scanned, but not when the output code is generated. If lines are added
-# or removed, the anchors will not be placed correctly.
-#
-# Note that for custom extensions or not directly supported extensions you also
-# need to set EXTENSION_MAPPING for the extension otherwise the files are not
-# properly processed by doxygen.
-
-INPUT_FILTER           =
-
-# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
-# basis. Doxygen will compare the file name with each pattern and apply the
-# filter if there is a match. The filters are a list of the form: pattern=filter
-# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
-# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
-# patterns match the file name, INPUT_FILTER is applied.
-#
-# Note that for custom extensions or not directly supported extensions you also
-# need to set EXTENSION_MAPPING for the extension otherwise the files are not
-# properly processed by doxygen.
-
-FILTER_PATTERNS        =
-
-# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
-# INPUT_FILTER) will also be used to filter the input files that are used for
-# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
-# The default value is: NO.
-
-FILTER_SOURCE_FILES    = NO
-
-# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
-# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and
-# it is also possible to disable source filtering for a specific pattern using
-# *.ext= (so without naming a filter).
-# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.
-
-FILTER_SOURCE_PATTERNS =
-
-# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
-# is part of the input, its contents will be placed on the main page
-# (index.html). This can be useful if you have a project on for instance GitHub
-# and want to reuse the introduction page also for the doxygen output.
-
-USE_MDFILE_AS_MAINPAGE =
-
-#---------------------------------------------------------------------------
-# Configuration options related to source browsing
-#---------------------------------------------------------------------------
-
-# If the SOURCE_BROWSER tag is set to YES then a list of source files will be
-# generated. Documented entities will be cross-referenced with these sources.
-#
-# Note: To get rid of all source code in the generated output, make sure that
-# also VERBATIM_HEADERS is set to NO.
-# The default value is: NO.
-
-SOURCE_BROWSER         = NO
-
-# Setting the INLINE_SOURCES tag to YES will include the body of functions,
-# classes and enums directly into the documentation.
-# The default value is: NO.
-
-INLINE_SOURCES         = NO
-
-# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
-# special comment blocks from generated source code fragments. Normal C, C++ and
-# Fortran comments will always remain visible.
-# The default value is: YES.
-
-STRIP_CODE_COMMENTS    = YES
-
-# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
-# entity all documented functions referencing it will be listed.
-# The default value is: NO.
-
-REFERENCED_BY_RELATION = NO
-
-# If the REFERENCES_RELATION tag is set to YES then for each documented function
-# all documented entities called/used by that function will be listed.
-# The default value is: NO.
-
-REFERENCES_RELATION    = NO
-
-# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
-# to YES then the hyperlinks from functions in REFERENCES_RELATION and
-# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
-# link to the documentation.
-# The default value is: YES.
-
-REFERENCES_LINK_SOURCE = YES
-
-# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the
-# source code will show a tooltip with additional information such as prototype,
-# brief description and links to the definition and documentation. Since this
-# will make the HTML file larger and loading of large files a bit slower, you
-# can opt to disable this feature.
-# The default value is: YES.
-# This tag requires that the tag SOURCE_BROWSER is set to YES.
-
-SOURCE_TOOLTIPS        = YES
-
-# If the USE_HTAGS tag is set to YES then the references to source code will
-# point to the HTML generated by the htags(1) tool instead of doxygen built-in
-# source browser. The htags tool is part of GNU's global source tagging system
-# (see https://www.gnu.org/software/global/global.html). You will need version
-# 4.8.6 or higher.
-#
-# To use it do the following:
-# - Install the latest version of global
-# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file
-# - Make sure the INPUT points to the root of the source tree
-# - Run doxygen as normal
-#
-# Doxygen will invoke htags (and that will in turn invoke gtags), so these
-# tools must be available from the command line (i.e. in the search path).
-#
-# The result: instead of the source browser generated by doxygen, the links to
-# source code will now point to the output of htags.
-# The default value is: NO.
-# This tag requires that the tag SOURCE_BROWSER is set to YES.
-
-USE_HTAGS              = NO
-
-# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a
-# verbatim copy of the header file for each class for which an include is
-# specified. Set to NO to disable this.
-# See also: Section \class.
-# The default value is: YES.
-
-VERBATIM_HEADERS       = YES
-
-# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the
-# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the
-# cost of reduced performance. This can be particularly helpful with template
-# rich C++ code for which doxygen's built-in parser lacks the necessary type
-# information.
-# Note: The availability of this option depends on whether or not doxygen was
-# generated with the -Duse_libclang=ON option for CMake.
-# The default value is: NO.
-
-CLANG_ASSISTED_PARSING = NO
-
-# If clang assisted parsing is enabled you can provide the compiler with command
-# line options that you would normally use when invoking the compiler. Note that
-# the include paths will already be set by doxygen for the files and directories
-# specified with INPUT and INCLUDE_PATH.
-# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES.
-
-CLANG_OPTIONS          =
-
-# If clang assisted parsing is enabled you can provide the clang parser with the
-# path to the compilation database (see:
-# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) used when the files
-# were built. This is equivalent to specifying the "-p" option to a clang tool,
-# such as clang-check. These options will then be passed to the parser.
-# Note: The availability of this option depends on whether or not doxygen was
-# generated with the -Duse_libclang=ON option for CMake.
-
-CLANG_DATABASE_PATH    =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the alphabetical class index
-#---------------------------------------------------------------------------
-
-# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all
-# compounds will be generated. Enable this if the project contains a lot of
-# classes, structs, unions or interfaces.
-# The default value is: YES.
-
-ALPHABETICAL_INDEX     = YES
-
-# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
-# which the alphabetical index list will be split.
-# Minimum value: 1, maximum value: 20, default value: 5.
-# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
-
-COLS_IN_ALPHA_INDEX    = 5
-
-# In case all classes in a project start with a common prefix, all classes will
-# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
-# can be used to specify a prefix (or a list of prefixes) that should be ignored
-# while generating the index headers.
-# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
-
-IGNORE_PREFIX          =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the HTML output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output
-# The default value is: YES.
-
-GENERATE_HTML          = YES
-
-# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it.
-# The default directory is: html.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_OUTPUT            = html
-
-# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
-# generated HTML page (for example: .htm, .php, .asp).
-# The default value is: .html.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_FILE_EXTENSION    = .html
-
-# The HTML_HEADER tag can be used to specify a user-defined HTML header file for
-# each generated HTML page. If the tag is left blank doxygen will generate a
-# standard header.
-#
-# To get valid HTML the header file that includes any scripts and style sheets
-# that doxygen needs, which is dependent on the configuration options used (e.g.
-# the setting GENERATE_TREEVIEW). It is highly recommended to start with a
-# default header using
-# doxygen -w html new_header.html new_footer.html new_stylesheet.css
-# YourConfigFile
-# and then modify the file new_header.html. See also section "Doxygen usage"
-# for information on how to generate the default header that doxygen normally
-# uses.
-# Note: The header is subject to change so you typically have to regenerate the
-# default header when upgrading to a newer version of doxygen. For a description
-# of the possible markers and block names see the documentation.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_HEADER            =
-
-# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
-# generated HTML page. If the tag is left blank doxygen will generate a standard
-# footer. See HTML_HEADER for more information on how to generate a default
-# footer and what special commands can be used inside the footer. See also
-# section "Doxygen usage" for information on how to generate the default footer
-# that doxygen normally uses.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_FOOTER            =
-
-# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
-# sheet that is used by each HTML page. It can be used to fine-tune the look of
-# the HTML output. If left blank doxygen will generate a default style sheet.
-# See also section "Doxygen usage" for information on how to generate the style
-# sheet that doxygen normally uses.
-# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as
-# it is more robust and this tag (HTML_STYLESHEET) will in the future become
-# obsolete.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_STYLESHEET        =
-
-# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined
-# cascading style sheets that are included after the standard style sheets
-# created by doxygen. Using this option one can overrule certain style aspects.
-# This is preferred over using HTML_STYLESHEET since it does not replace the
-# standard style sheet and is therefore more robust against future updates.
-# Doxygen will copy the style sheet files to the output directory.
-# Note: The order of the extra style sheet files is of importance (e.g. the last
-# style sheet in the list overrules the setting of the previous ones in the
-# list). For an example see the documentation.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_EXTRA_STYLESHEET  =
-
-# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
-# other source files which should be copied to the HTML output directory. Note
-# that these files will be copied to the base HTML output directory. Use the
-# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
-# files. In the HTML_STYLESHEET file, use the file name only. Also note that the
-# files will be copied as-is; there are no commands or markers available.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_EXTRA_FILES       =
-
-# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
-# will adjust the colors in the style sheet and background images according to
-# this color. Hue is specified as an angle on a colorwheel, see
-# https://en.wikipedia.org/wiki/Hue for more information. For instance the value
-# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
-# purple, and 360 is red again.
-# Minimum value: 0, maximum value: 359, default value: 220.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_COLORSTYLE_HUE    = 220
-
-# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
-# in the HTML output. For a value of 0 the output will use grayscales only. A
-# value of 255 will produce the most vivid colors.
-# Minimum value: 0, maximum value: 255, default value: 100.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_COLORSTYLE_SAT    = 100
-
-# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the
-# luminance component of the colors in the HTML output. Values below 100
-# gradually make the output lighter, whereas values above 100 make the output
-# darker. The value divided by 100 is the actual gamma applied, so 80 represents
-# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not
-# change the gamma.
-# Minimum value: 40, maximum value: 240, default value: 80.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_COLORSTYLE_GAMMA  = 80
-
-# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
-# page will contain the date and time when the page was generated. Setting this
-# to YES can help to show when doxygen was last run and thus if the
-# documentation is up to date.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_TIMESTAMP         = NO
-
-# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML
-# documentation will contain a main index with vertical navigation menus that
-# are dynamically created via JavaScript. If disabled, the navigation index will
-# consists of multiple levels of tabs that are statically embedded in every HTML
-# page. Disable this option to support browsers that do not have JavaScript,
-# like the Qt help browser.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_DYNAMIC_MENUS     = YES
-
-# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
-# documentation will contain sections that can be hidden and shown after the
-# page has loaded.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_DYNAMIC_SECTIONS  = NO
-
-# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
-# shown in the various tree structured indices initially; the user can expand
-# and collapse entries dynamically later on. Doxygen will expand the tree to
-# such a level that at most the specified number of entries are visible (unless
-# a fully collapsed tree already exceeds this amount). So setting the number of
-# entries 1 will produce a full collapsed tree by default. 0 is a special value
-# representing an infinite number of entries and will result in a full expanded
-# tree by default.
-# Minimum value: 0, maximum value: 9999, default value: 100.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_INDEX_NUM_ENTRIES = 100
-
-# If the GENERATE_DOCSET tag is set to YES, additional index files will be
-# generated that can be used as input for Apple's Xcode 3 integrated development
-# environment (see: https://developer.apple.com/xcode/), introduced with OSX
-# 10.5 (Leopard). To create a documentation set, doxygen will generate a
-# Makefile in the HTML output directory. Running make will produce the docset in
-# that directory and running make install will install the docset in
-# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
-# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy
-# genXcode/_index.html for more information.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_DOCSET        = NO
-
-# This tag determines the name of the docset feed. A documentation feed provides
-# an umbrella under which multiple documentation sets from a single provider
-# (such as a company or product suite) can be grouped.
-# The default value is: Doxygen generated docs.
-# This tag requires that the tag GENERATE_DOCSET is set to YES.
-
-DOCSET_FEEDNAME        = "Doxygen generated docs"
-
-# This tag specifies a string that should uniquely identify the documentation
-# set bundle. This should be a reverse domain-name style string, e.g.
-# com.mycompany.MyDocSet. Doxygen will append .docset to the name.
-# The default value is: org.doxygen.Project.
-# This tag requires that the tag GENERATE_DOCSET is set to YES.
-
-DOCSET_BUNDLE_ID       = org.doxygen.Project
-
-# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
-# the documentation publisher. This should be a reverse domain-name style
-# string, e.g. com.mycompany.MyDocSet.documentation.
-# The default value is: org.doxygen.Publisher.
-# This tag requires that the tag GENERATE_DOCSET is set to YES.
-
-DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
-
-# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
-# The default value is: Publisher.
-# This tag requires that the tag GENERATE_DOCSET is set to YES.
-
-DOCSET_PUBLISHER_NAME  = Publisher
-
-# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
-# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
-# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
-# (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on
-# Windows.
-#
-# The HTML Help Workshop contains a compiler that can convert all HTML output
-# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
-# files are now used as the Windows 98 help format, and will replace the old
-# Windows help format (.hlp) on all Windows platforms in the future. Compressed
-# HTML files also contain an index, a table of contents, and you can search for
-# words in the documentation. The HTML workshop also contains a viewer for
-# compressed HTML files.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_HTMLHELP      = NO
-
-# The CHM_FILE tag can be used to specify the file name of the resulting .chm
-# file. You can add a path in front of the file if the result should not be
-# written to the html output directory.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-CHM_FILE               =
-
-# The HHC_LOCATION tag can be used to specify the location (absolute path
-# including file name) of the HTML help compiler (hhc.exe). If non-empty,
-# doxygen will try to run the HTML help compiler on the generated index.hhp.
-# The file has to be specified with full path.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-HHC_LOCATION           =
-
-# The GENERATE_CHI flag controls if a separate .chi index file is generated
-# (YES) or that it should be included in the master .chm file (NO).
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-GENERATE_CHI           = NO
-
-# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc)
-# and project file content.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-CHM_INDEX_ENCODING     =
-
-# The BINARY_TOC flag controls whether a binary table of contents is generated
-# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it
-# enables the Previous and Next buttons.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-BINARY_TOC             = NO
-
-# The TOC_EXPAND flag can be set to YES to add extra items for group members to
-# the table of contents of the HTML help documentation and to the tree view.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-TOC_EXPAND             = NO
-
-# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
-# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
-# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
-# (.qch) of the generated HTML documentation.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_QHP           = NO
-
-# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
-# the file name of the resulting .qch file. The path specified is relative to
-# the HTML output folder.
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QCH_FILE               =
-
-# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
-# Project output. For more information please see Qt Help Project / Namespace
-# (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
-# The default value is: org.doxygen.Project.
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_NAMESPACE          = org.doxygen.Project
-
-# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
-# Help Project output. For more information please see Qt Help Project / Virtual
-# Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-
-# folders).
-# The default value is: doc.
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_VIRTUAL_FOLDER     = doc
-
-# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
-# filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
-# filters).
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_CUST_FILTER_NAME   =
-
-# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
-# custom filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
-# filters).
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_CUST_FILTER_ATTRS  =
-
-# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
-# project's filter section matches. Qt Help Project / Filter Attributes (see:
-# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes).
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_SECT_FILTER_ATTRS  =
-
-# The QHG_LOCATION tag can be used to specify the location of Qt's
-# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
-# generated .qhp file.
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHG_LOCATION           =
-
-# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
-# generated, together with the HTML files, they form an Eclipse help plugin. To
-# install this plugin and make it available under the help contents menu in
-# Eclipse, the contents of the directory containing the HTML and XML files needs
-# to be copied into the plugins directory of eclipse. The name of the directory
-# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.
-# After copying Eclipse needs to be restarted before the help appears.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_ECLIPSEHELP   = NO
-
-# A unique identifier for the Eclipse help plugin. When installing the plugin
-# the directory name containing the HTML and XML files should also have this
-# name. Each documentation set should have its own identifier.
-# The default value is: org.doxygen.Project.
-# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.
-
-ECLIPSE_DOC_ID         = org.doxygen.Project
-
-# If you want full control over the layout of the generated HTML pages it might
-# be necessary to disable the index and replace it with your own. The
-# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top
-# of each HTML page. A value of NO enables the index and the value YES disables
-# it. Since the tabs in the index contain the same information as the navigation
-# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-DISABLE_INDEX          = NO
-
-# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
-# structure should be generated to display hierarchical information. If the tag
-# value is set to YES, a side panel will be generated containing a tree-like
-# index structure (just like the one that is generated for HTML Help). For this
-# to work a browser that supports JavaScript, DHTML, CSS and frames is required
-# (i.e. any modern browser). Windows users are probably better off using the
-# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can
-# further fine-tune the look of the index. As an example, the default style
-# sheet generated by doxygen has an example that shows how to put an image at
-# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
-# the same information as the tab index, you could consider setting
-# DISABLE_INDEX to YES when enabling this option.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_TREEVIEW      = NO
-
-# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
-# doxygen will group on one line in the generated HTML documentation.
-#
-# Note that a value of 0 will completely suppress the enum values from appearing
-# in the overview section.
-# Minimum value: 0, maximum value: 20, default value: 4.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-ENUM_VALUES_PER_LINE   = 4
-
-# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used
-# to set the initial width (in pixels) of the frame in which the tree is shown.
-# Minimum value: 0, maximum value: 1500, default value: 250.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-TREEVIEW_WIDTH         = 250
-
-# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to
-# external symbols imported via tag files in a separate window.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-EXT_LINKS_IN_WINDOW    = NO
-
-# Use this tag to change the font size of LaTeX formulas included as images in
-# the HTML documentation. When you change the font size after a successful
-# doxygen run you need to manually remove any form_*.png images from the HTML
-# output directory to force them to be regenerated.
-# Minimum value: 8, maximum value: 50, default value: 10.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-FORMULA_FONTSIZE       = 10
-
-# Use the FORMULA_TRANSPARENT tag to determine whether or not the images
-# generated for formulas are transparent PNGs. Transparent PNGs are not
-# supported properly for IE 6.0, but are supported on all modern browsers.
-#
-# Note that when changing this option you need to delete any form_*.png files in
-# the HTML output directory before the changes have effect.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-FORMULA_TRANSPARENT    = YES
-
-# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands
-# to create new LaTeX commands to be used in formulas as building blocks. See
-# the section "Including formulas" for details.
-
-FORMULA_MACROFILE      =
-
-# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
-# https://www.mathjax.org) which uses client side JavaScript for the rendering
-# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX
-# installed or if you want to formulas look prettier in the HTML output. When
-# enabled you may also need to install MathJax separately and configure the path
-# to it using the MATHJAX_RELPATH option.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-USE_MATHJAX            = YES
-
-# When MathJax is enabled you can set the default output format to be used for
-# the MathJax output. See the MathJax site (see:
-# http://docs.mathjax.org/en/latest/output.html) for more details.
-# Possible values are: HTML-CSS (which is slower, but has the best
-# compatibility), NativeMML (i.e. MathML) and SVG.
-# The default value is: HTML-CSS.
-# This tag requires that the tag USE_MATHJAX is set to YES.
-
-MATHJAX_FORMAT         = HTML-CSS
-
-# When MathJax is enabled you need to specify the location relative to the HTML
-# output directory using the MATHJAX_RELPATH option. The destination directory
-# should contain the MathJax.js script. For instance, if the mathjax directory
-# is located at the same level as the HTML output directory, then
-# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
-# Content Delivery Network so you can quickly see the result without installing
-# MathJax. However, it is strongly recommended to install a local copy of
-# MathJax from https://www.mathjax.org before deployment.
-# The default value is: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/.
-# This tag requires that the tag USE_MATHJAX is set to YES.
-
-MATHJAX_RELPATH        = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/
-
-# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
-# extension names that should be enabled during MathJax rendering. For example
-# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
-# This tag requires that the tag USE_MATHJAX is set to YES.
-
-MATHJAX_EXTENSIONS     =
-
-# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
-# of code that will be used on startup of the MathJax code. See the MathJax site
-# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
-# example see the documentation.
-# This tag requires that the tag USE_MATHJAX is set to YES.
-
-MATHJAX_CODEFILE       =
-
-# When the SEARCHENGINE tag is enabled doxygen will generate a search box for
-# the HTML output. The underlying search engine uses javascript and DHTML and
-# should work on any modern browser. Note that when using HTML help
-# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)
-# there is already a search function so this one should typically be disabled.
-# For large projects the javascript based search engine can be slow, then
-# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to
-# search using the keyboard; to jump to the search box use <access key> + S
-# (what the <access key> is depends on the OS and browser, but it is typically
-# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down
-# key> to jump into the search results window, the results can be navigated
-# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel
-# the search. The filter options can be selected when the cursor is inside the
-# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys>
-# to select a filter and <Enter> or <escape> to activate or cancel the filter
-# option.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-SEARCHENGINE           = YES
-
-# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
-# implemented using a web server instead of a web client using JavaScript. There
-# are two flavors of web server based searching depending on the EXTERNAL_SEARCH
-# setting. When disabled, doxygen will generate a PHP script for searching and
-# an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing
-# and searching needs to be provided by external tools. See the section
-# "External Indexing and Searching" for details.
-# The default value is: NO.
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-SERVER_BASED_SEARCH    = NO
-
-# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP
-# script for searching. Instead the search results are written to an XML file
-# which needs to be processed by an external indexer. Doxygen will invoke an
-# external search engine pointed to by the SEARCHENGINE_URL option to obtain the
-# search results.
-#
-# Doxygen ships with an example indexer (doxyindexer) and search engine
-# (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: https://xapian.org/).
-#
-# See the section "External Indexing and Searching" for details.
-# The default value is: NO.
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-EXTERNAL_SEARCH        = NO
-
-# The SEARCHENGINE_URL should point to a search engine hosted by a web server
-# which will return the search results when EXTERNAL_SEARCH is enabled.
-#
-# Doxygen ships with an example indexer (doxyindexer) and search engine
-# (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: https://xapian.org/). See the section "External Indexing and
-# Searching" for details.
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-SEARCHENGINE_URL       =
-
-# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
-# search data is written to a file for indexing by an external tool. With the
-# SEARCHDATA_FILE tag the name of this file can be specified.
-# The default file is: searchdata.xml.
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-SEARCHDATA_FILE        = searchdata.xml
-
-# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the
-# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
-# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
-# projects and redirect the results back to the right project.
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-EXTERNAL_SEARCH_ID     =
-
-# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
-# projects other than the one defined by this configuration file, but that are
-# all added to the same external search index. Each project needs to have a
-# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of
-# to a relative location where the documentation can be found. The format is:
-# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-EXTRA_SEARCH_MAPPINGS  =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the LaTeX output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output.
-# The default value is: YES.
-
-GENERATE_LATEX         = YES
-
-# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it.
-# The default directory is: latex.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_OUTPUT           = latex
-
-# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
-# invoked.
-#
-# Note that when not enabling USE_PDFLATEX the default is latex when enabling
-# USE_PDFLATEX the default is pdflatex and when in the later case latex is
-# chosen this is overwritten by pdflatex. For specific output languages the
-# default can have been set differently, this depends on the implementation of
-# the output language.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_CMD_NAME         =
-
-# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
-# index for LaTeX.
-# Note: This tag is used in the Makefile / make.bat.
-# See also: LATEX_MAKEINDEX_CMD for the part in the generated output file
-# (.tex).
-# The default file is: makeindex.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-MAKEINDEX_CMD_NAME     = makeindex
-
-# The LATEX_MAKEINDEX_CMD tag can be used to specify the command name to
-# generate index for LaTeX. In case there is no backslash (\) as first character
-# it will be automatically added in the LaTeX code.
-# Note: This tag is used in the generated output file (.tex).
-# See also: MAKEINDEX_CMD_NAME for the part in the Makefile / make.bat.
-# The default value is: makeindex.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_MAKEINDEX_CMD    = makeindex
-
-# If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX
-# documents. This may be useful for small projects and may help to save some
-# trees in general.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-COMPACT_LATEX          = NO
-
-# The PAPER_TYPE tag can be used to set the paper type that is used by the
-# printer.
-# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x
-# 14 inches) and executive (7.25 x 10.5 inches).
-# The default value is: a4.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-PAPER_TYPE             = a4
-
-# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
-# that should be included in the LaTeX output. The package can be specified just
-# by its name or with the correct syntax as to be used with the LaTeX
-# \usepackage command. To get the times font for instance you can specify :
-# EXTRA_PACKAGES=times or EXTRA_PACKAGES={times}
-# To use the option intlimits with the amsmath package you can specify:
-# EXTRA_PACKAGES=[intlimits]{amsmath}
-# If left blank no extra packages will be included.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-EXTRA_PACKAGES         =
-
-# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
-# generated LaTeX document. The header should contain everything until the first
-# chapter. If it is left blank doxygen will generate a standard header. See
-# section "Doxygen usage" for information on how to let doxygen write the
-# default header to a separate file.
-#
-# Note: Only use a user-defined header if you know what you are doing! The
-# following commands have a special meaning inside the header: $title,
-# $datetime, $date, $doxygenversion, $projectname, $projectnumber,
-# $projectbrief, $projectlogo. Doxygen will replace $title with the empty
-# string, for the replacement values of the other commands the user is referred
-# to HTML_HEADER.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_HEADER           =
-
-# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
-# generated LaTeX document. The footer should contain everything after the last
-# chapter. If it is left blank doxygen will generate a standard footer. See
-# LATEX_HEADER for more information on how to generate a default footer and what
-# special commands can be used inside the footer.
-#
-# Note: Only use a user-defined footer if you know what you are doing!
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_FOOTER           =
-
-# The LATEX_EXTRA_STYLESHEET tag can be used to specify additional user-defined
-# LaTeX style sheets that are included after the standard style sheets created
-# by doxygen. Using this option one can overrule certain style aspects. Doxygen
-# will copy the style sheet files to the output directory.
-# Note: The order of the extra style sheet files is of importance (e.g. the last
-# style sheet in the list overrules the setting of the previous ones in the
-# list).
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_EXTRA_STYLESHEET =
-
-# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
-# other source files which should be copied to the LATEX_OUTPUT output
-# directory. Note that the files will be copied as-is; there are no commands or
-# markers available.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_EXTRA_FILES      =
-
-# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is
-# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will
-# contain links (just like the HTML output) instead of page references. This
-# makes the output suitable for online browsing using a PDF viewer.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-PDF_HYPERLINKS         = YES
-
-# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
-# the PDF file directly from the LaTeX files. Set this option to YES, to get a
-# higher quality PDF documentation.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-USE_PDFLATEX           = YES
-
-# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
-# command to the generated LaTeX files. This will instruct LaTeX to keep running
-# if errors occur, instead of asking the user for help. This option is also used
-# when generating formulas in HTML.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_BATCHMODE        = NO
-
-# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the
-# index chapters (such as File Index, Compound Index, etc.) in the output.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_HIDE_INDICES     = NO
-
-# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
-# code with syntax highlighting in the LaTeX output.
-#
-# Note that which sources are shown also depends on other settings such as
-# SOURCE_BROWSER.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_SOURCE_CODE      = NO
-
-# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
-# bibliography, e.g. plainnat, or ieeetr. See
-# https://en.wikipedia.org/wiki/BibTeX and \cite for more info.
-# The default value is: plain.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_BIB_STYLE        = plain
-
-# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated
-# page will contain the date and time when the page was generated. Setting this
-# to NO can help when comparing the output of multiple runs.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_TIMESTAMP        = NO
-
-# The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute)
-# path from which the emoji images will be read. If a relative path is entered,
-# it will be relative to the LATEX_OUTPUT directory. If left blank the
-# LATEX_OUTPUT directory will be used.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_EMOJI_DIRECTORY  =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the RTF output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_RTF tag is set to YES, doxygen will generate RTF output. The
-# RTF output is optimized for Word 97 and may not look too pretty with other RTF
-# readers/editors.
-# The default value is: NO.
-
-GENERATE_RTF           = NO
-
-# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it.
-# The default directory is: rtf.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_OUTPUT             = rtf
-
-# If the COMPACT_RTF tag is set to YES, doxygen generates more compact RTF
-# documents. This may be useful for small projects and may help to save some
-# trees in general.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-COMPACT_RTF            = NO
-
-# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will
-# contain hyperlink fields. The RTF file will contain links (just like the HTML
-# output) instead of page references. This makes the output suitable for online
-# browsing using Word or some other Word compatible readers that support those
-# fields.
-#
-# Note: WordPad (write) and others do not support links.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_HYPERLINKS         = NO
-
-# Load stylesheet definitions from file. Syntax is similar to doxygen's
-# configuration file, i.e. a series of assignments. You only have to provide
-# replacements, missing definitions are set to their default value.
-#
-# See also section "Doxygen usage" for information on how to generate the
-# default style sheet that doxygen normally uses.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_STYLESHEET_FILE    =
-
-# Set optional variables used in the generation of an RTF document. Syntax is
-# similar to doxygen's configuration file. A template extensions file can be
-# generated using doxygen -e rtf extensionFile.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_EXTENSIONS_FILE    =
-
-# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code
-# with syntax highlighting in the RTF output.
-#
-# Note that which sources are shown also depends on other settings such as
-# SOURCE_BROWSER.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_SOURCE_CODE        = NO
-
-#---------------------------------------------------------------------------
-# Configuration options related to the man page output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_MAN tag is set to YES, doxygen will generate man pages for
-# classes and files.
-# The default value is: NO.
-
-GENERATE_MAN           = NO
-
-# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it. A directory man3 will be created inside the directory specified by
-# MAN_OUTPUT.
-# The default directory is: man.
-# This tag requires that the tag GENERATE_MAN is set to YES.
-
-MAN_OUTPUT             = man
-
-# The MAN_EXTENSION tag determines the extension that is added to the generated
-# man pages. In case the manual section does not start with a number, the number
-# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is
-# optional.
-# The default value is: .3.
-# This tag requires that the tag GENERATE_MAN is set to YES.
-
-MAN_EXTENSION          = .3
-
-# The MAN_SUBDIR tag determines the name of the directory created within
-# MAN_OUTPUT in which the man pages are placed. If defaults to man followed by
-# MAN_EXTENSION with the initial . removed.
-# This tag requires that the tag GENERATE_MAN is set to YES.
-
-MAN_SUBDIR             =
-
-# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it
-# will generate one additional man file for each entity documented in the real
-# man page(s). These additional files only source the real man page, but without
-# them the man command would be unable to find the correct page.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_MAN is set to YES.
-
-MAN_LINKS              = NO
-
-#---------------------------------------------------------------------------
-# Configuration options related to the XML output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_XML tag is set to YES, doxygen will generate an XML file that
-# captures the structure of the code including all documentation.
-# The default value is: NO.
-
-GENERATE_XML           = NO
-
-# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it.
-# The default directory is: xml.
-# This tag requires that the tag GENERATE_XML is set to YES.
-
-XML_OUTPUT             = xml
-
-# If the XML_PROGRAMLISTING tag is set to YES, doxygen will dump the program
-# listings (including syntax highlighting and cross-referencing information) to
-# the XML output. Note that enabling this will significantly increase the size
-# of the XML output.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_XML is set to YES.
-
-XML_PROGRAMLISTING     = YES
-
-# If the XML_NS_MEMB_FILE_SCOPE tag is set to YES, doxygen will include
-# namespace members in file scope as well, matching the HTML output.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_XML is set to YES.
-
-XML_NS_MEMB_FILE_SCOPE = NO
-
-#---------------------------------------------------------------------------
-# Configuration options related to the DOCBOOK output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_DOCBOOK tag is set to YES, doxygen will generate Docbook files
-# that can be used to generate PDF.
-# The default value is: NO.
-
-GENERATE_DOCBOOK       = NO
-
-# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
-# front of it.
-# The default directory is: docbook.
-# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
-
-DOCBOOK_OUTPUT         = docbook
-
-# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the
-# program listings (including syntax highlighting and cross-referencing
-# information) to the DOCBOOK output. Note that enabling this will significantly
-# increase the size of the DOCBOOK output.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
-
-DOCBOOK_PROGRAMLISTING = NO
-
-#---------------------------------------------------------------------------
-# Configuration options for the AutoGen Definitions output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
-# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures
-# the structure of the code including all documentation. Note that this feature
-# is still experimental and incomplete at the moment.
-# The default value is: NO.
-
-GENERATE_AUTOGEN_DEF   = NO
-
-#---------------------------------------------------------------------------
-# Configuration options related to the Perl module output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_PERLMOD tag is set to YES, doxygen will generate a Perl module
-# file that captures the structure of the code including all documentation.
-#
-# Note that this feature is still experimental and incomplete at the moment.
-# The default value is: NO.
-
-GENERATE_PERLMOD       = NO
-
-# If the PERLMOD_LATEX tag is set to YES, doxygen will generate the necessary
-# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI
-# output from the Perl module output.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_PERLMOD is set to YES.
-
-PERLMOD_LATEX          = NO
-
-# If the PERLMOD_PRETTY tag is set to YES, the Perl module output will be nicely
-# formatted so it can be parsed by a human reader. This is useful if you want to
-# understand what is going on. On the other hand, if this tag is set to NO, the
-# size of the Perl module output will be much smaller and Perl will parse it
-# just the same.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_PERLMOD is set to YES.
-
-PERLMOD_PRETTY         = YES
-
-# The names of the make variables in the generated doxyrules.make file are
-# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful
-# so different doxyrules.make files included by the same Makefile don't
-# overwrite each other's variables.
-# This tag requires that the tag GENERATE_PERLMOD is set to YES.
-
-PERLMOD_MAKEVAR_PREFIX =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the preprocessor
-#---------------------------------------------------------------------------
-
-# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all
-# C-preprocessor directives found in the sources and include files.
-# The default value is: YES.
-
-ENABLE_PREPROCESSING   = YES
-
-# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names
-# in the source code. If set to NO, only conditional compilation will be
-# performed. Macro expansion can be done in a controlled way by setting
-# EXPAND_ONLY_PREDEF to YES.
-# The default value is: NO.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-MACRO_EXPANSION        = NO
-
-# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
-# the macro expansion is limited to the macros specified with the PREDEFINED and
-# EXPAND_AS_DEFINED tags.
-# The default value is: NO.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-EXPAND_ONLY_PREDEF     = NO
-
-# If the SEARCH_INCLUDES tag is set to YES, the include files in the
-# INCLUDE_PATH will be searched if a #include is found.
-# The default value is: YES.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-SEARCH_INCLUDES        = YES
-
-# The INCLUDE_PATH tag can be used to specify one or more directories that
-# contain include files that are not input files but should be processed by the
-# preprocessor.
-# This tag requires that the tag SEARCH_INCLUDES is set to YES.
-
-INCLUDE_PATH           =
-
-# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
-# patterns (like *.h and *.hpp) to filter out the header-files in the
-# directories. If left blank, the patterns specified with FILE_PATTERNS will be
-# used.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-INCLUDE_FILE_PATTERNS  =
-
-# The PREDEFINED tag can be used to specify one or more macro names that are
-# defined before the preprocessor is started (similar to the -D option of e.g.
-# gcc). The argument of the tag is a list of macros of the form: name or
-# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
-# is assumed. To prevent a macro definition from being undefined via #undef or
-# recursively expanded use the := operator instead of the = operator.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-PREDEFINED             =
-
-# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
-# tag can be used to specify a list of macro names that should be expanded. The
-# macro definition that is found in the sources will be used. Use the PREDEFINED
-# tag if you want to use a different macro definition that overrules the
-# definition found in the source code.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-EXPAND_AS_DEFINED      =
-
-# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
-# remove all references to function-like macros that are alone on a line, have
-# an all uppercase name, and do not end with a semicolon. Such function macros
-# are typically used for boiler-plate code, and will confuse the parser if not
-# removed.
-# The default value is: YES.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-SKIP_FUNCTION_MACROS   = YES
-
-#---------------------------------------------------------------------------
-# Configuration options related to external references
-#---------------------------------------------------------------------------
-
-# The TAGFILES tag can be used to specify one or more tag files. For each tag
-# file the location of the external documentation should be added. The format of
-# a tag file without this location is as follows:
-# TAGFILES = file1 file2 ...
-# Adding location for the tag files is done as follows:
-# TAGFILES = file1=loc1 "file2 = loc2" ...
-# where loc1 and loc2 can be relative or absolute paths or URLs. See the
-# section "Linking to external documentation" for more information about the use
-# of tag files.
-# Note: Each tag file must have a unique name (where the name does NOT include
-# the path). If a tag file is not located in the directory in which doxygen is
-# run, you must also specify the path to the tagfile here.
-
-TAGFILES               =
-
-# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
-# tag file that is based on the input files it reads. See section "Linking to
-# external documentation" for more information about the usage of tag files.
-
-GENERATE_TAGFILE       =
-
-# If the ALLEXTERNALS tag is set to YES, all external class will be listed in
-# the class index. If set to NO, only the inherited external classes will be
-# listed.
-# The default value is: NO.
-
-ALLEXTERNALS           = NO
-
-# If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed
-# in the modules index. If set to NO, only the current project's groups will be
-# listed.
-# The default value is: YES.
-
-EXTERNAL_GROUPS        = YES
-
-# If the EXTERNAL_PAGES tag is set to YES, all external pages will be listed in
-# the related pages index. If set to NO, only the current project's pages will
-# be listed.
-# The default value is: YES.
-
-EXTERNAL_PAGES         = YES
-
-#---------------------------------------------------------------------------
-# Configuration options related to the dot tool
-#---------------------------------------------------------------------------
-
-# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram
-# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
-# NO turns the diagrams off. Note that this option also works with HAVE_DOT
-# disabled, but it is recommended to install and use dot, since it yields more
-# powerful graphs.
-# The default value is: YES.
-
-CLASS_DIAGRAMS         = YES
-
-# You can include diagrams made with dia in doxygen documentation. Doxygen will
-# then run dia to produce the diagram and insert it in the documentation. The
-# DIA_PATH tag allows you to specify the directory where the dia binary resides.
-# If left empty dia is assumed to be found in the default search path.
-
-DIA_PATH               =
-
-# If set to YES the inheritance and collaboration graphs will hide inheritance
-# and usage relations if the target is undocumented or is not a class.
-# The default value is: YES.
-
-HIDE_UNDOC_RELATIONS   = YES
-
-# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
-# available from the path. This tool is part of Graphviz (see:
-# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
-# Bell Labs. The other options in this section have no effect if this option is
-# set to NO
-# The default value is: YES.
-
-HAVE_DOT               = YES
-
-# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
-# to run in parallel. When set to 0 doxygen will base this on the number of
-# processors available in the system. You can set it explicitly to a value
-# larger than 0 to get control over the balance between CPU load and processing
-# speed.
-# Minimum value: 0, maximum value: 32, default value: 0.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_NUM_THREADS        = 0
-
-# When you want a differently looking font in the dot files that doxygen
-# generates you can specify the font name using DOT_FONTNAME. You need to make
-# sure dot is able to find the font, which can be done by putting it in a
-# standard location or by setting the DOTFONTPATH environment variable or by
-# setting DOT_FONTPATH to the directory containing the font.
-# The default value is: Helvetica.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_FONTNAME           = Helvetica
-
-# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
-# dot graphs.
-# Minimum value: 4, maximum value: 24, default value: 10.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_FONTSIZE           = 10
-
-# By default doxygen will tell dot to use the default font as specified with
-# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
-# the path where dot can find it using this tag.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_FONTPATH           =
-
-# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
-# each documented class showing the direct and indirect inheritance relations.
-# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-CLASS_GRAPH            = YES
-
-# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
-# graph for each documented class showing the direct and indirect implementation
-# dependencies (inheritance, containment, and class references variables) of the
-# class with other documented classes.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-COLLABORATION_GRAPH    = YES
-
-# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
-# groups, showing the direct groups dependencies.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-GROUP_GRAPHS           = YES
-
-# If the UML_LOOK tag is set to YES, doxygen will generate inheritance and
-# collaboration diagrams in a style similar to the OMG's Unified Modeling
-# Language.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-UML_LOOK               = NO
-
-# If the UML_LOOK tag is enabled, the fields and methods are shown inside the
-# class node. If there are many fields or methods and many nodes the graph may
-# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the
-# number of items for each type to make the size more manageable. Set this to 0
-# for no limit. Note that the threshold may be exceeded by 50% before the limit
-# is enforced. So when you set the threshold to 10, up to 15 fields may appear,
-# but if the number exceeds 15, the total amount of fields shown is limited to
-# 10.
-# Minimum value: 0, maximum value: 100, default value: 10.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-UML_LIMIT_NUM_FIELDS   = 10
-
-# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
-# collaboration graphs will show the relations between templates and their
-# instances.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-TEMPLATE_RELATIONS     = NO
-
-# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
-# YES then doxygen will generate a graph for each documented file showing the
-# direct and indirect include dependencies of the file with other documented
-# files.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-INCLUDE_GRAPH          = YES
-
-# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
-# set to YES then doxygen will generate a graph for each documented file showing
-# the direct and indirect include dependencies of the file with other documented
-# files.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-INCLUDED_BY_GRAPH      = YES
-
-# If the CALL_GRAPH tag is set to YES then doxygen will generate a call
-# dependency graph for every global function or class method.
-#
-# Note that enabling this option will significantly increase the time of a run.
-# So in most cases it will be better to enable call graphs for selected
-# functions only using the \callgraph command. Disabling a call graph can be
-# accomplished by means of the command \hidecallgraph.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-CALL_GRAPH             = NO
-
-# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
-# dependency graph for every global function or class method.
-#
-# Note that enabling this option will significantly increase the time of a run.
-# So in most cases it will be better to enable caller graphs for selected
-# functions only using the \callergraph command. Disabling a caller graph can be
-# accomplished by means of the command \hidecallergraph.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-CALLER_GRAPH           = NO
-
-# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
-# hierarchy of all classes instead of a textual one.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-GRAPHICAL_HIERARCHY    = YES
-
-# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
-# dependencies a directory has on other directories in a graphical way. The
-# dependency relations are determined by the #include relations between the
-# files in the directories.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DIRECTORY_GRAPH        = YES
-
-# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
-# generated by dot. For an explanation of the image formats see the section
-# output formats in the documentation of the dot tool (Graphviz (see:
-# http://www.graphviz.org/)).
-# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
-# to make the SVG files visible in IE 9+ (other browsers do not have this
-# requirement).
-# Possible values are: png, png:cairo, png:cairo:cairo, png:cairo:gd, png:gd,
-# png:gd:gd, jpg, jpg:cairo, jpg:cairo:gd, jpg:gd, jpg:gd:gd, gif, gif:cairo,
-# gif:cairo:gd, gif:gd, gif:gd:gd, svg, png:gd, png:gd:gd, png:cairo,
-# png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and
-# png:gdiplus:gdiplus.
-# The default value is: png.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_IMAGE_FORMAT       = png
-
-# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
-# enable generation of interactive SVG images that allow zooming and panning.
-#
-# Note that this requires a modern browser other than Internet Explorer. Tested
-# and working are Firefox, Chrome, Safari, and Opera.
-# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make
-# the SVG files visible. Older versions of IE do not have SVG support.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-INTERACTIVE_SVG        = NO
-
-# The DOT_PATH tag can be used to specify the path where the dot tool can be
-# found. If left blank, it is assumed the dot tool can be found in the path.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_PATH               =
-
-# The DOTFILE_DIRS tag can be used to specify one or more directories that
-# contain dot files that are included in the documentation (see the \dotfile
-# command).
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOTFILE_DIRS           =
-
-# The MSCFILE_DIRS tag can be used to specify one or more directories that
-# contain msc files that are included in the documentation (see the \mscfile
-# command).
-
-MSCFILE_DIRS           =
-
-# The DIAFILE_DIRS tag can be used to specify one or more directories that
-# contain dia files that are included in the documentation (see the \diafile
-# command).
-
-DIAFILE_DIRS           =
-
-# When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the
-# path where java can find the plantuml.jar file. If left blank, it is assumed
-# PlantUML is not used or called during a preprocessing step. Doxygen will
-# generate a warning when it encounters a \startuml command in this case and
-# will not generate output for the diagram.
-
-PLANTUML_JAR_PATH      =
-
-# When using plantuml, the PLANTUML_CFG_FILE tag can be used to specify a
-# configuration file for plantuml.
-
-PLANTUML_CFG_FILE      =
-
-# When using plantuml, the specified paths are searched for files specified by
-# the !include statement in a plantuml block.
-
-PLANTUML_INCLUDE_PATH  =
-
-# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
-# that will be shown in the graph. If the number of nodes in a graph becomes
-# larger than this value, doxygen will truncate the graph, which is visualized
-# by representing a node as a red box. Note that doxygen if the number of direct
-# children of the root node in a graph is already larger than
-# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that
-# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
-# Minimum value: 0, maximum value: 10000, default value: 50.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_GRAPH_MAX_NODES    = 50
-
-# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
-# generated by dot. A depth value of 3 means that only nodes reachable from the
-# root by following a path via at most 3 edges will be shown. Nodes that lay
-# further from the root node will be omitted. Note that setting this option to 1
-# or 2 may greatly reduce the computation time needed for large code bases. Also
-# note that the size of a graph can be further restricted by
-# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
-# Minimum value: 0, maximum value: 1000, default value: 0.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-MAX_DOT_GRAPH_DEPTH    = 0
-
-# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
-# background. This is disabled by default, because dot on Windows does not seem
-# to support this out of the box.
-#
-# Warning: Depending on the platform used, enabling this option may lead to
-# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
-# read).
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_TRANSPARENT        = NO
-
-# Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output
-# files in one run (i.e. multiple -o and -T options on the command line). This
-# makes dot run faster, but since only newer versions of dot (>1.8.10) support
-# this, this feature is disabled by default.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_MULTI_TARGETS      = NO
-
-# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
-# explaining the meaning of the various boxes and arrows in the dot generated
-# graphs.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-GENERATE_LEGEND        = YES
-
-# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
-# files that are used to generate the various graphs.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_CLEANUP            = YES
diff --git a/ggml/src/ggml-cann/acl_tensor.cpp b/ggml/src/ggml-cann/acl_tensor.cpp

index 8958ebcd787045057c50321be0eca3a757d88791..7b7042a1f5486a01ac26fa469ec3e5cf5622d102 100644 (file)
--- a/ggml/src/ggml-cann/acl_tensor.cpp
+++ b/ggml/src/ggml-cann/acl_tensor.cpp
@@ -48,15 +48,14 @@ aclDataType ggml_cann_type_mapping(ggml_type type) {
          default:
              return ACL_DT_UNDEFINED;
      }
-    return ACL_DT_UNDEFINED;
  }
  
-aclTensor * ggml_cann_create_tensor(const ggml_tensor * tensor,
-                                    int64_t *           ne,
-                                    size_t *            nb,
-                                    int64_t             dims,
-                                    aclFormat           format,
-                                    size_t              offset) {
+acl_tensor_ptr ggml_cann_create_tensor(const ggml_tensor * tensor,
+                                       int64_t *           ne,
+                                       size_t *            nb,
+                                       int64_t             dims,
+                                       aclFormat           format,
+                                       size_t              offset) {
      // If tensor is bcasted, Up to GGML_MAX_DIMS additional dimensions will be
      // added.
      int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
@@ -87,10 +86,20 @@ aclTensor * ggml_cann_create_tensor(const ggml_tensor * tensor,
      std::reverse(acl_ne, acl_ne + final_dims);
      std::reverse(acl_stride, acl_stride + final_dims);
  
-    aclTensor * acl_tensor = aclCreateTensor(acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride,
-                                             elem_offset, format, &acl_storage_len, 1, tensor->data);
+    aclTensor * raw = aclCreateTensor(acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride, elem_offset,
+                                      format, &acl_storage_len, 1, tensor->data);
  
-    return acl_tensor;
+    return acl_tensor_ptr(raw);
+}
+
+acl_int_array_ptr ggml_cann_create_int_array(const int64_t * value, uint64_t size) {
+    aclIntArray * raw = aclCreateIntArray(value, size);
+    return acl_int_array_ptr(raw);
+}
+
+acl_scalar_ptr ggml_cann_create_scalar(void * value, aclDataType dataType) {
+    aclScalar * raw = aclCreateScalar(value, dataType);
+    return acl_scalar_ptr(raw);
  }
  
  bool ggml_cann_need_bcast(const ggml_tensor * t0, const ggml_tensor * t1) {
diff --git a/ggml/src/ggml-cann/acl_tensor.h b/ggml/src/ggml-cann/acl_tensor.h

index cb17ebcc1bbe296f2dafa5606a7670db12a08383..7deac383420c0d7bae5f35f9c12a833d08fc5aca 100644 (file)
--- a/ggml/src/ggml-cann/acl_tensor.h
+++ b/ggml/src/ggml-cann/acl_tensor.h
@@ -23,11 +23,12 @@
  #ifndef CANN_ACL_TENSOR_H
  #define CANN_ACL_TENSOR_H
  
-#include <algorithm>
-#include <cstring>
+#include "common.h"
  
  #include <aclnn/aclnn_base.h>
-#include "common.h"
+
+#include <algorithm>
+#include <cstring>
  
  /**
   * @brief      Maps a ggml_type to its corresponding aclDataType.
@@ -43,6 +44,20 @@
   */
  aclDataType ggml_cann_type_mapping(ggml_type type);
  
+// Deleter for acl objects.
+template <typename T, aclError (*DestroyFunc)(const T *)> struct acl_deleter {
+    void operator()(T * ptr) const noexcept {
+        if (ptr) {
+            ACL_CHECK(DestroyFunc(ptr));
+        }
+    }
+};
+
+using acl_tensor_ptr      = std::unique_ptr<aclTensor, acl_deleter<aclTensor, aclDestroyTensor>>;
+using acl_int_array_ptr   = std::unique_ptr<aclIntArray, acl_deleter<aclIntArray, aclDestroyIntArray>>;
+using acl_scalar_ptr      = std::unique_ptr<aclScalar, acl_deleter<aclScalar, aclDestroyScalar>>;
+using acl_tensor_list_ptr = std::unique_ptr<aclTensorList, acl_deleter<aclTensorList, aclDestroyTensorList>>;
+
  /**
   * @brief   Creates an ACL tensor from a ggml_tensor with optional shape.
   *
@@ -62,12 +77,12 @@ aclDataType ggml_cann_type_mapping(ggml_type type);
   * @param   offset      Offset in bytes for the ACL tensor data. Defaults to 0.
   * @return  Pointer to the created ACL tensor.
   */
-aclTensor * ggml_cann_create_tensor(const ggml_tensor * tensor,
-                                    int64_t *           ne     = nullptr,
-                                    size_t *            nb     = nullptr,
-                                    int64_t             dims   = 0,
-                                    aclFormat           format = ACL_FORMAT_ND,
-                                    size_t              offset = 0);
+acl_tensor_ptr ggml_cann_create_tensor(const ggml_tensor * tensor,
+                                       int64_t *           ne     = nullptr,
+                                       size_t *            nb     = nullptr,
+                                       int64_t             dims   = 0,
+                                       aclFormat           format = ACL_FORMAT_ND,
+                                       size_t              offset = 0);
  
  /**
   * @brief   Template for creating an ACL tensor from provided parameters. typename TYPE
@@ -90,14 +105,14 @@ aclTensor * ggml_cann_create_tensor(const ggml_tensor * tensor,
   * @return  Pointer to the created ACL tensor.
   */
  template <typename TYPE>
-aclTensor * ggml_cann_create_tensor(void *      data_ptr,
-                                    aclDataType dtype,
-                                    TYPE        type_size,
-                                    int64_t *   ne,
-                                    TYPE *      nb,
-                                    int64_t     dims,
-                                    aclFormat   format = ACL_FORMAT_ND,
-                                    size_t      offset = 0) {
+acl_tensor_ptr ggml_cann_create_tensor(void *      data_ptr,
+                                       aclDataType dtype,
+                                       TYPE        type_size,
+                                       int64_t *   ne,
+                                       TYPE *      nb,
+                                       int64_t     dims,
+                                       aclFormat   format = ACL_FORMAT_ND,
+                                       size_t      offset = 0) {
      int64_t tmp_ne[GGML_MAX_DIMS * 2];
      int64_t tmp_stride[GGML_MAX_DIMS * 2];
  
@@ -114,10 +129,75 @@ aclTensor * ggml_cann_create_tensor(void *      data_ptr,
      std::reverse(tmp_ne, tmp_ne + dims);
      std::reverse(tmp_stride, tmp_stride + dims);
  
-    aclTensor * acl_tensor =
+    aclTensor * raw =
          aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size, format, &acl_storage_len, 1, data_ptr);
  
-    return acl_tensor;
+    return acl_tensor_ptr(raw);
+}
+
+/**
+ * @brief Create an ACL int array resource wrapped in a smart pointer.
+ *
+ * This function constructs an aclIntArray from the provided int64_t values
+ * and returns it as an acl_int_array_ptr (a std::unique_ptr with a custom
+ * deleter). The returned pointer owns the ACL resource and will automatically
+ * destroy it via aclDestroyIntArray().
+ *
+ * @param value  Pointer to the int64_t elements.
+ * @param size   Number of elements in value.
+ *
+ * @return A smart pointer managing the created ACL int array.
+ */
+acl_int_array_ptr ggml_cann_create_int_array(const int64_t * value, uint64_t size);
+
+/**
+ * @brief Create an ACL scalar resource wrapped in a smart pointer.
+ *
+ * This function constructs an aclScalar from the raw value pointer and ACL
+ * data type, then returns it as an acl_scalar_ptr (a std::unique_ptr with
+ * a custom deleter). The returned pointer owns the ACL scalar and will
+ * automatically destroy it via aclDestroyScalar().
+ *
+ * @param value     Pointer to the raw scalar memory.
+ * @param dataType  ACL data type of the scalar.
+ *
+ * @return A smart pointer managing the created ACL scalar.
+ */
+acl_scalar_ptr ggml_cann_create_scalar(void * value, aclDataType dataType);
+
+/**
+ * @brief Create an ACL tensor list from multiple tensor smart pointers.
+ *
+ * This function accepts a variadic list of acl_tensor_ptr (a unique_ptr with
+ * custom deleter) and produces an aclTensorList using aclCreateTensorList().
+ *
+ * The lifecycle management of the tensor objects changes as follows:
+ *  - aclCreateTensorList() takes ownership of the tensors
+ *  - Each input smart pointer releases ownership using release()
+ *  - As a result, the tensors will NOT be destroyed by unique_ptr
+ *  - Instead, they will be destroyed when aclDestroyTensorList() is called
+ *
+ * This ensures correct ownership transfer and prevents double-free situations.
+ *
+ * @param acl_tensor_ptr  Variadic template parameter; each argument must be
+ *                         a unique_ptr-like type supporting get() and release().
+ *
+ * @param tensors  Variadic list of acl_tensor_ptr objects. Ownership of
+ *                         each tensor is transferred away from these smart pointers.
+ *
+ * @return A smart pointer (acl_tensor_list_ptr) owning the created ACL tensor list.
+ *
+ * @note This implementation is C++11 compatible. The ownership-release process is
+ *       executed using a pack expansion inside an initializer list.
+ */
+template <typename... acl_tensor_ptr> acl_tensor_list_ptr ggml_cann_create_tensor_list(acl_tensor_ptr &&... tensors) {
+    aclTensor *     raw_tensors[] = { tensors.get()... };
+    aclTensorList * raw           = aclCreateTensorList(raw_tensors, sizeof...(tensors));
+    // aclTensor will release by aclTensorList, so release ownership without
+    // destroying the tensor
+    int             dummy[]       = { (tensors.release(), 0)... };
+    GGML_UNUSED(dummy);
+    return acl_tensor_list_ptr(raw);
  }
  
  /**
diff --git a/ggml/src/ggml-cann/aclnn_ops.cpp b/ggml/src/ggml-cann/aclnn_ops.cpp

index 6d8b4a5f0ebf0bbf838eb1444962e4882c65badd..c8d98546358f63c84d33abf5c62748e3af0467ac 100644 (file)
--- a/ggml/src/ggml-cann/aclnn_ops.cpp
+++ b/ggml/src/ggml-cann/aclnn_ops.cpp
@@ -22,56 +22,58 @@
  
  #include "aclnn_ops.h"
  
+#include "ggml-impl.h"
+#include "ggml.h"
+
+#include <aclnnop/aclnn_add.h>
  #include <aclnnop/aclnn_addcdiv.h>
+#include <aclnnop/aclnn_argmax.h>
  #include <aclnnop/aclnn_avgpool2d.h>
  #include <aclnnop/aclnn_batch_matmul.h>
  #include <aclnnop/aclnn_cast.h>
+#include <aclnnop/aclnn_clamp.h>
  #include <aclnnop/aclnn_constant_pad_nd.h>
+#include <aclnnop/aclnn_convolution.h>
  #include <aclnnop/aclnn_copy.h>
  #include <aclnnop/aclnn_div.h>
+#include <aclnnop/aclnn_elu.h>
  #include <aclnnop/aclnn_embedding.h>
+#include <aclnnop/aclnn_eq_tensor.h>
  #include <aclnnop/aclnn_exp.h>
  #include <aclnnop/aclnn_fill_scalar.h>
+#include <aclnnop/aclnn_fused_infer_attention_score_v2.h>
  #include <aclnnop/aclnn_group_norm.h>
+#include <aclnnop/aclnn_grouped_matmul_v3.h>
+#include <aclnnop/aclnn_gt_scalar.h>
+#include <aclnnop/aclnn_im2col.h>
+#include <aclnnop/aclnn_index_copy.h>
  #include <aclnnop/aclnn_index_fill_tensor.h>
+#include <aclnnop/aclnn_index_select.h>
  #include <aclnnop/aclnn_layer_norm.h>
+#include <aclnnop/aclnn_log.h>
  #include <aclnnop/aclnn_matmul.h>
  #include <aclnnop/aclnn_max_pool.h>
+#include <aclnnop/aclnn_mean.h>
  #include <aclnnop/aclnn_mm.h>
+#include <aclnnop/aclnn_mul.h>
  #include <aclnnop/aclnn_permute.h>
+#include <aclnnop/aclnn_pow.h>
  #include <aclnnop/aclnn_pow_tensor_tensor.h>
  #include <aclnnop/aclnn_reduce_sum.h>
+#include <aclnnop/aclnn_reflection_pad1d.h>
  #include <aclnnop/aclnn_repeat.h>
  #include <aclnnop/aclnn_repeat_interleave.h>
+#include <aclnnop/aclnn_rms_norm.h>
  #include <aclnnop/aclnn_roll.h>
  #include <aclnnop/aclnn_softmax.h>
+#include <aclnnop/aclnn_sub.h>
+#include <aclnnop/aclnn_sum.h>
+#include <aclnnop/aclnn_threshold.h>
  #include <aclnnop/aclnn_tril.h>
  #include <aclnnop/aclnn_triu.h>
  #include <aclnnop/aclnn_upsample_nearest_2d.h>
  #include <aclnnop/aclnn_weight_quant_batch_matmul_v2.h>
-#include <aclnnop/aclnn_argmax.h>
-#include <aclnnop/aclnn_sum.h>
-#include <aclnnop/aclnn_rms_norm.h>
-#include <aclnnop/aclnn_im2col.h>
-#include <aclnnop/aclnn_add.h>
-#include <aclnnop/aclnn_sub.h>
-#include <aclnnop/aclnn_mul.h>
-#include <aclnnop/aclnn_div.h>
-#include <aclnnop/aclnn_convolution.h>
-#include <aclnnop/aclnn_elu.h>
-#include <aclnnop/aclnn_log.h>
-#include <aclnnop/aclnn_mean.h>
-#include <aclnnop/aclnn_reflection_pad1d.h>
-#include <aclnnop/aclnn_eq_tensor.h>
-#include <aclnnop/aclnn_gt_scalar.h>
-#include <aclnnop/aclnn_pow.h>
-#include <aclnnop/aclnn_grouped_matmul_v3.h>
-#include <aclnnop/aclnn_fused_infer_attention_score_v2.h>
  #include <aclnnop/aclnn_zero.h>
-#include <aclnnop/aclnn_index_copy.h>
-#include <aclnnop/aclnn_index_select.h>
-#include <aclnnop/aclnn_clamp.h>
-#include <aclnnop/aclnn_threshold.h>
  #include <float.h>
  
  #include <cmath>
@@ -79,30 +81,27 @@
  #include <exception>
  #include <vector>
  
-#include "ggml-impl.h"
-#include "ggml.h"
-
  #define GGML_COMMON_DECL_C
  
  #include "../ggml-common.h"
  
-void bcast_shape(ggml_tensor * src0,
-                 ggml_tensor * src1,
-                 ggml_tensor * dst,
-                 aclTensor **  acl_src0,
-                 aclTensor **  acl_src1,
-                 aclTensor **  acl_dst) {
+void bcast_shape(ggml_tensor *    src0,
+                 ggml_tensor *    src1,
+                 ggml_tensor *    dst,
+                 acl_tensor_ptr & acl_src0,
+                 acl_tensor_ptr & acl_src1,
+                 acl_tensor_ptr & acl_dst) {
      GGML_ASSERT(ggml_are_same_shape(src0, dst) && ggml_can_repeat(src1, src0));
      // Need bcast
      if (!ggml_are_same_shape(src0, src1) && ggml_cann_need_bcast(src0, src1)) {
          BCAST_SHAPE(src0, src1)
-        *acl_src0 = ggml_cann_create_tensor(src0, BCAST_PARAM(src0));
-        *acl_src1 = ggml_cann_create_tensor(src1, BCAST_PARAM(src1));
-        *acl_dst  = ggml_cann_create_tensor(dst, BCAST_PARAM(src0));
+        acl_src0 = ggml_cann_create_tensor(src0, BCAST_PARAM(src0));
+        acl_src1 = ggml_cann_create_tensor(src1, BCAST_PARAM(src1));
+        acl_dst  = ggml_cann_create_tensor(dst, BCAST_PARAM(src0));
      } else {
-        *acl_src0 = ggml_cann_create_tensor(src0);
-        *acl_src1 = ggml_cann_create_tensor(src1);
-        *acl_dst  = ggml_cann_create_tensor(dst);
+        acl_src0 = ggml_cann_create_tensor(src0);
+        acl_src1 = ggml_cann_create_tensor(src1);
+        acl_dst  = ggml_cann_create_tensor(dst);
      }
  }
  
@@ -111,11 +110,10 @@ void ggml_cann_op_unary(std::function<void(ggml_backend_cann_context &, aclTenso
                          ggml_tensor *                                                              dst) {
      ggml_tensor * src = dst->src[0];
  
-    aclTensor * acl_src = ggml_cann_create_tensor(src);
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst);
+    acl_tensor_ptr acl_src = ggml_cann_create_tensor(src);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
  
-    unary_op(ctx, acl_src, acl_dst);
-    ggml_cann_release_resources(ctx, acl_src, acl_dst);
+    unary_op(ctx, acl_src.get(), acl_dst.get());
  }
  
  void ggml_cann_op_unary_gated(std::function<void(ggml_backend_cann_context &, aclTensor *, aclTensor *)> unary_op,
@@ -128,8 +126,8 @@ void ggml_cann_op_unary_gated(std::function<void(ggml_backend_cann_context &, ac
      GGML_ASSERT(ggml_is_contiguous_1(dst));
      const int32_t swapped = ggml_get_op_params_i32(dst, 1);
  
-    aclTensor * acl_dst  = ggml_cann_create_tensor(dst);
-    aclTensor * acl_src0 = nullptr, *acl_src1 = nullptr;
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
+    acl_tensor_ptr acl_src0, acl_src1;
      if (src1) {
          GGML_ASSERT(ggml_is_contiguous_1(src1));
          GGML_ASSERT(src0->type == src1->type);
@@ -146,10 +144,8 @@ void ggml_cann_op_unary_gated(std::function<void(ggml_backend_cann_context &, ac
          }
      }
  
-    unary_op(ctx, acl_src0, acl_dst);
-    GGML_CANN_CALL_ACLNN_OP(ctx, InplaceMul, acl_dst, acl_src1);
-
-    ggml_cann_release_resources(ctx, acl_src0, acl_src1, acl_dst);
+    unary_op(ctx, acl_src0.get(), acl_dst.get());
+    GGML_CANN_CALL_ACLNN_OP(ctx, InplaceMul, acl_dst.get(), acl_src1.get());
  }
  
  /**
@@ -167,10 +163,9 @@ static void aclnn_repeat(ggml_backend_cann_context & ctx,
                           aclTensor *                 acl_dst,
                           int64_t *                   repeat_array) {
      // repeat tensor along each dim with repeat_array
-    aclIntArray * repeats = aclCreateIntArray(repeat_array, GGML_MAX_DIMS);
+    acl_int_array_ptr repeats = ggml_cann_create_int_array(repeat_array, GGML_MAX_DIMS);
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, Repeat, acl_src, repeats, acl_dst);
-    ggml_cann_release_resources(ctx, repeats);
+    GGML_CANN_CALL_ACLNN_OP(ctx, Repeat, acl_src, repeats.get(), acl_dst);
  }
  
  /**
@@ -197,36 +192,33 @@ void ggml_cann_repeat(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      ggml_tensor * src = dst->src[0];
      GGML_ASSERT(ggml_can_repeat(src, dst));
  
-    aclTensor * acl_src = ggml_cann_create_tensor(src);
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst);
+    acl_tensor_ptr acl_src = ggml_cann_create_tensor(src);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
  
      int64_t repeatsArray[] = { dst->ne[3] / src->ne[3], dst->ne[2] / src->ne[2], dst->ne[1] / src->ne[1],
                                 dst->ne[0] / src->ne[0] };
  
-    aclnn_repeat(ctx, acl_src, acl_dst, repeatsArray);
-    ggml_cann_release_resources(ctx, acl_src, acl_dst);
+    aclnn_repeat(ctx, acl_src.get(), acl_dst.get(), repeatsArray);
  }
  
  void aclnn_add(ggml_backend_cann_context & ctx, aclTensor * acl_src0, aclTensor * acl_src1, aclTensor * acl_dst) {
-    float       alphaValue = 1.0f;
-    aclScalar * alpha      = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
+    float          alphaValue = 1.0f;
+    acl_scalar_ptr alpha      = ggml_cann_create_scalar(&alphaValue, aclDataType::ACL_FLOAT);
      if (acl_dst != nullptr) {
-        GGML_CANN_CALL_ACLNN_OP(ctx, Add, acl_src0, acl_src1, alpha, acl_dst);
+        GGML_CANN_CALL_ACLNN_OP(ctx, Add, acl_src0, acl_src1, alpha.get(), acl_dst);
      } else {
-        GGML_CANN_CALL_ACLNN_OP(ctx, InplaceAdd, acl_src0, acl_src1, alpha);
+        GGML_CANN_CALL_ACLNN_OP(ctx, InplaceAdd, acl_src0, acl_src1, alpha.get());
      }
-    ggml_cann_release_resources(ctx, alpha);
  }
  
  void aclnn_sub(ggml_backend_cann_context & ctx, aclTensor * acl_src0, aclTensor * acl_src1, aclTensor * acl_dst) {
-    float       alphaValue = 1.0f;
-    aclScalar * alpha      = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
+    float          alphaValue = 1.0f;
+    acl_scalar_ptr alpha      = ggml_cann_create_scalar(&alphaValue, aclDataType::ACL_FLOAT);
      if (acl_dst != nullptr) {
-        GGML_CANN_CALL_ACLNN_OP(ctx, Sub, acl_src0, acl_src1, alpha, acl_dst);
+        GGML_CANN_CALL_ACLNN_OP(ctx, Sub, acl_src0, acl_src1, alpha.get(), acl_dst);
      } else {
-        GGML_CANN_CALL_ACLNN_OP(ctx, InplaceSub, acl_src0, acl_src1, alpha);
+        GGML_CANN_CALL_ACLNN_OP(ctx, InplaceSub, acl_src0, acl_src1, alpha.get());
      }
-    ggml_cann_release_resources(ctx, alpha);
  }
  
  void aclnn_mul(ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_other, aclTensor * acl_dst) {
@@ -272,30 +264,25 @@ static void aclnn_muls(ggml_backend_cann_context & ctx,
                         float                       scale,
                         aclTensor *                 acl_dst,
                         bool                        inplace) {
-    aclScalar * acl_scale = aclCreateScalar(&scale, aclDataType::ACL_FLOAT);
+    acl_scalar_ptr acl_scale = ggml_cann_create_scalar(&scale, aclDataType::ACL_FLOAT);
      if (inplace) {
-        GGML_CANN_CALL_ACLNN_OP(ctx, InplaceMuls, acl_src, acl_scale);
+        GGML_CANN_CALL_ACLNN_OP(ctx, InplaceMuls, acl_src, acl_scale.get());
      } else {
-        GGML_CANN_CALL_ACLNN_OP(ctx, Muls, acl_src, acl_scale, acl_dst);
+        GGML_CANN_CALL_ACLNN_OP(ctx, Muls, acl_src, acl_scale.get(), acl_dst);
      }
-    ggml_cann_release_resources(ctx, acl_scale);
  }
  
  void ggml_cann_leaky_relu(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      ggml_tensor * src = dst->src[0];
  
-    GGML_ASSERT(src->type == GGML_TYPE_F32);
-    GGML_ASSERT(dst->type == GGML_TYPE_F32);
-
-    aclTensor * acl_src = ggml_cann_create_tensor(src);
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst);
+    acl_tensor_ptr acl_src = ggml_cann_create_tensor(src);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
  
      float negative_slope;
      memcpy(&negative_slope, dst->op_params, sizeof(float));
-    aclScalar * acl_negative_slope = aclCreateScalar(&negative_slope, aclDataType::ACL_FLOAT);
+    acl_scalar_ptr acl_negative_slope = ggml_cann_create_scalar(&negative_slope, aclDataType::ACL_FLOAT);
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, LeakyRelu, acl_src, acl_negative_slope, acl_dst);
-    ggml_cann_release_resources(ctx, acl_negative_slope, acl_src, acl_dst);
+    GGML_CANN_CALL_ACLNN_OP(ctx, LeakyRelu, acl_src.get(), acl_negative_slope.get(), acl_dst.get());
  }
  
  /**
@@ -316,22 +303,19 @@ static void aclnn_concat(ggml_backend_cann_context & ctx,
  }
  
  void ggml_cann_concat(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
-    ggml_tensor * src0     = dst->src[0];
-    ggml_tensor * src1     = dst->src[1];
-    aclTensor *   acl_src0 = ggml_cann_create_tensor(src0);
-    aclTensor *   acl_src1 = ggml_cann_create_tensor(src1);
-    aclTensor *   acl_dst  = ggml_cann_create_tensor(dst);
+    ggml_tensor *  src0     = dst->src[0];
+    ggml_tensor *  src1     = dst->src[1];
+    acl_tensor_ptr acl_src0 = ggml_cann_create_tensor(src0);
+    acl_tensor_ptr acl_src1 = ggml_cann_create_tensor(src1);
+    acl_tensor_ptr acl_dst  = ggml_cann_create_tensor(dst);
  
      const int32_t dim = ggml_get_op_params_i32(dst, 0);
  
      GGML_ASSERT(dim >= 0 && dim < 4);
      int32_t acl_dim = 3 - dim;
  
-    aclTensor *     tensors[]   = { acl_src0, acl_src1 };
-    aclTensorList * tensor_list = aclCreateTensorList(tensors, 2);
-    aclnn_concat(ctx, tensor_list, acl_dst, acl_dim);
-
-    ggml_cann_release_resources(ctx, tensor_list, acl_dst);
+    acl_tensor_list_ptr tensor_list = ggml_cann_create_tensor_list(acl_src0, acl_src1);
+    aclnn_concat(ctx, tensor_list.get(), acl_dst.get(), acl_dim);
  }
  
  /**
@@ -360,18 +344,17 @@ static void aclnn_arange(ggml_backend_cann_context & ctx,
      int64_t steps = (int64_t) std::ceil((stop - start) / step);
      GGML_ASSERT(n_elements == steps);
  
-    aclScalar * acl_start = aclCreateScalar(&start, aclDataType::ACL_FLOAT);
-    aclScalar * acl_end   = aclCreateScalar(&stop, aclDataType::ACL_FLOAT);
-    aclScalar * acl_step  = aclCreateScalar(&step, aclDataType::ACL_FLOAT);
+    acl_scalar_ptr acl_start = ggml_cann_create_scalar(&start, aclDataType::ACL_FLOAT);
+    acl_scalar_ptr acl_end   = ggml_cann_create_scalar(&stop, aclDataType::ACL_FLOAT);
+    acl_scalar_ptr acl_step  = ggml_cann_create_scalar(&step, aclDataType::ACL_FLOAT);
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, Arange, acl_start, acl_end, acl_step, acl_dst);
-    ggml_cann_release_resources(ctx, acl_start, acl_end, acl_step);
+    GGML_CANN_CALL_ACLNN_OP(ctx, Arange, acl_start.get(), acl_end.get(), acl_step.get(), acl_dst);
  }
  
  void ggml_cann_arange(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      GGML_ASSERT(dst->type == GGML_TYPE_F32);
  
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
  
      int64_t n_elements = ggml_nelements(dst);
      float   start;
@@ -381,8 +364,7 @@ void ggml_cann_arange(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      memcpy(&stop, (float *) dst->op_params + 1, sizeof(float));
      memcpy(&step, (float *) dst->op_params + 2, sizeof(float));
  
-    aclnn_arange(ctx, acl_dst, start, stop, step, n_elements);
-    ggml_cann_release_resources(ctx, acl_dst);
+    aclnn_arange(ctx, acl_dst.get(), start, stop, step, n_elements);
  }
  
  void ggml_cann_clamp(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
@@ -393,14 +375,13 @@ void ggml_cann_clamp(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      memcpy(&min, dst->op_params, sizeof(float));
      memcpy(&max, (float *) dst->op_params + 1, sizeof(float));
  
-    aclTensor * acl_src = ggml_cann_create_tensor(src);
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst);
+    acl_tensor_ptr acl_src = ggml_cann_create_tensor(src);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
  
-    aclScalar * acl_min = aclCreateScalar(&min, aclDataType::ACL_FLOAT);
-    aclScalar * acl_max = aclCreateScalar(&max, aclDataType::ACL_FLOAT);
+    acl_scalar_ptr acl_min = ggml_cann_create_scalar(&min, aclDataType::ACL_FLOAT);
+    acl_scalar_ptr acl_max = ggml_cann_create_scalar(&max, aclDataType::ACL_FLOAT);
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, Clamp, acl_src, acl_min, acl_max, acl_dst);
-    ggml_cann_release_resources(ctx, acl_min, acl_max, acl_src, acl_dst);
+    GGML_CANN_CALL_ACLNN_OP(ctx, Clamp, acl_src.get(), acl_min.get(), acl_max.get(), acl_dst.get());
  }
  
  void ggml_cann_scale(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
@@ -410,71 +391,69 @@ void ggml_cann_scale(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      float v;
      memcpy(&v, dst->op_params, sizeof(float));
  
-    aclScalar * scale   = aclCreateScalar(&v, aclDataType::ACL_FLOAT);
-    aclTensor * acl_src = ggml_cann_create_tensor(src);
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst);
+    acl_scalar_ptr scale   = ggml_cann_create_scalar(&v, aclDataType::ACL_FLOAT);
+    acl_tensor_ptr acl_src = ggml_cann_create_tensor(src);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, Muls, acl_src, scale, acl_dst);
-    ggml_cann_release_resources(ctx, scale, acl_src, acl_dst);
+    GGML_CANN_CALL_ACLNN_OP(ctx, Muls, acl_src.get(), scale.get(), acl_dst.get());
  }
  
  void ggml_cann_argsort(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      ggml_tensor *        src   = dst->src[0];
      enum ggml_sort_order order = (enum ggml_sort_order) dst->op_params[0];
  
-    aclTensor *          acl_src = ggml_cann_create_tensor(src);
-    aclTensor *          acl_dst = ggml_cann_create_tensor(dst);
+    acl_tensor_ptr       acl_src = ggml_cann_create_tensor(src);
+    acl_tensor_ptr       acl_dst = ggml_cann_create_tensor(dst);
      ggml_cann_pool_alloc temp_buffer_allocator(ctx.pool(), ggml_nelements(dst) * sizeof(int64_t));
      void *               buffer = temp_buffer_allocator.get();
-    aclTensor *          tmp_tensor =
+    acl_tensor_ptr       tmp_tensor =
          ggml_cann_create_tensor(buffer, ACL_INT64, ggml_type_size(dst->type), dst->ne, dst->nb, GGML_MAX_DIMS);
-    GGML_CANN_CALL_ACLNN_OP(ctx, Argsort, acl_src, -1, (order == GGML_SORT_ORDER_DESC ? true : false), tmp_tensor);
-    GGML_CANN_CALL_ACLNN_OP(ctx, Cast, tmp_tensor, ggml_cann_type_mapping(dst->type), acl_dst);
-    ggml_cann_release_resources(ctx, acl_src, tmp_tensor, acl_dst);
+    GGML_CANN_CALL_ACLNN_OP(ctx, Argsort, acl_src.get(), -1, (order == GGML_SORT_ORDER_DESC ? true : false),
+                            tmp_tensor.get());
+    GGML_CANN_CALL_ACLNN_OP(ctx, Cast, tmp_tensor.get(), ggml_cann_type_mapping(dst->type), acl_dst.get());
  }
  
  void ggml_cann_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      ggml_tensor * src = dst->src[0];
  
-    aclTensor * acl_src = ggml_cann_create_tensor(src);
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst);
+    acl_tensor_ptr acl_src = ggml_cann_create_tensor(src);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
  
      float eps;
      memcpy(&eps, dst->op_params, sizeof(float));
  
      std::vector<int64_t> normData = { dst->ne[0] };
-    aclIntArray *        norm     = aclCreateIntArray(normData.data(), normData.size());
-    GGML_CANN_CALL_ACLNN_OP(ctx, LayerNorm, acl_src, norm, nullptr, nullptr, eps, acl_dst, nullptr, nullptr);
-    ggml_cann_release_resources(ctx, norm, acl_src, acl_dst);
+    acl_int_array_ptr    norm     = ggml_cann_create_int_array(normData.data(), normData.size());
+    GGML_CANN_CALL_ACLNN_OP(ctx, LayerNorm, acl_src.get(), norm.get(), nullptr, nullptr, eps, acl_dst.get(), nullptr,
+                            nullptr);
  }
  
  void ggml_cann_l2_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      ggml_tensor * src = dst->src[0];
  
-    aclTensor * acl_src = ggml_cann_create_tensor(src);
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst);
+    acl_tensor_ptr acl_src = ggml_cann_create_tensor(src);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
  
-    size_t  type_size = ggml_type_size(src->type);
-    int64_t n_bytes   = src->ne[3]* src->ne[2]* src->ne[1]* type_size;
+    size_t               type_size = ggml_type_size(src->type);
+    int64_t              n_bytes   = src->ne[3] * src->ne[2] * src->ne[1] * type_size;
      ggml_cann_pool_alloc temp_buffer_allocator(ctx.pool(), n_bytes);
-    void *               buffer       = temp_buffer_allocator.get();
+    void *               buffer = temp_buffer_allocator.get();
  
-    int64_t div_ne[] = {1, src->ne[1], src->ne[2], src->ne[3]};
+    int64_t div_ne[] = { 1, src->ne[1], src->ne[2], src->ne[3] };
      size_t  div_nb[GGML_MAX_DIMS];
      div_nb[0] = sizeof(float);
      for (int i = 1; i < GGML_MAX_DIMS; ++i) {
          div_nb[i] = div_nb[i - 1] * div_ne[i - 1];
      }
-    aclTensor *          acl_div      = ggml_cann_create_tensor(buffer, ACL_FLOAT, type_size, div_ne, div_nb, GGML_MAX_DIMS);
+    acl_tensor_ptr acl_div = ggml_cann_create_tensor(buffer, ACL_FLOAT, type_size, div_ne, div_nb, GGML_MAX_DIMS);
  
-    std::vector<int64_t> norm_dims = { 3 };
-    aclIntArray * dims_array = aclCreateIntArray(norm_dims.data(), norm_dims.size());
+    std::vector<int64_t> norm_dims  = { 3 };
+    acl_int_array_ptr    dims_array = ggml_cann_create_int_array(norm_dims.data(), norm_dims.size());
  
-    float p_value = 2.0f;
-    aclScalar * p_scalar = aclCreateScalar(&p_value, aclDataType::ACL_FLOAT);
-    GGML_CANN_CALL_ACLNN_OP(ctx, Norm, acl_src, p_scalar, dims_array, true, acl_div);
-    GGML_CANN_CALL_ACLNN_OP(ctx, Div, acl_src, acl_div, acl_dst);
-    ggml_cann_release_resources(ctx, dims_array, p_scalar, acl_src, acl_dst, acl_div);
+    float          p_value  = 2.0f;
+    acl_scalar_ptr p_scalar = ggml_cann_create_scalar(&p_value, aclDataType::ACL_FLOAT);
+    GGML_CANN_CALL_ACLNN_OP(ctx, Norm, acl_src.get(), p_scalar.get(), dims_array.get(), true, acl_div.get());
+    GGML_CANN_CALL_ACLNN_OP(ctx, Div, acl_src.get(), acl_div.get(), acl_dst.get());
  }
  
  void ggml_cann_cross_entropy_loss(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
@@ -484,90 +463,94 @@ void ggml_cann_cross_entropy_loss(ggml_backend_cann_context & ctx, ggml_tensor *
      const int64_t nc = src0->ne[0];
      const int64_t nr = ggml_nrows(src0);
  
-    int64_t logits_ne[] = {nc, nr};
-    size_t logits_nb[2];
-    logits_nb[0] = ggml_type_size(src0->type);
-    logits_nb[1] = logits_nb[0] * logits_ne[0];
-    aclTensor * acl_logits = ggml_cann_create_tensor(src0->data, ACL_FLOAT, sizeof(float), logits_ne, logits_nb, 2);
+    int64_t logits_ne[] = { nc, nr };
+    size_t  logits_nb[2];
+    logits_nb[0]              = ggml_type_size(src0->type);
+    logits_nb[1]              = logits_nb[0] * logits_ne[0];
+    acl_tensor_ptr acl_logits = ggml_cann_create_tensor(src0->data, ACL_FLOAT, sizeof(float), logits_ne, logits_nb, 2);
  
-    size_t log_softmax_type_size = sizeof(float);
-    int64_t log_softmax_n_bytes = nr * nc * log_softmax_type_size;
+    size_t               log_softmax_type_size = sizeof(float);
+    int64_t              log_softmax_n_bytes   = nr * nc * log_softmax_type_size;
      ggml_cann_pool_alloc log_softmax_allocator(ctx.pool(), log_softmax_n_bytes);
-    void * log_softmax_buffer = log_softmax_allocator.get();
+    void *               log_softmax_buffer = log_softmax_allocator.get();
  
-    int64_t log_softmax_ne[] = {nc, nr};
-    size_t log_softmax_nb[2];
-    log_softmax_nb[0] = log_softmax_type_size;
-    log_softmax_nb[1] = log_softmax_nb[0] * log_softmax_ne[0];
-    aclTensor * acl_log_softmax = ggml_cann_create_tensor(log_softmax_buffer, ACL_FLOAT, log_softmax_type_size, log_softmax_ne, log_softmax_nb, 2);
+    int64_t log_softmax_ne[] = { nc, nr };
+    size_t  log_softmax_nb[2];
+    log_softmax_nb[0]              = log_softmax_type_size;
+    log_softmax_nb[1]              = log_softmax_nb[0] * log_softmax_ne[0];
+    acl_tensor_ptr acl_log_softmax = ggml_cann_create_tensor(log_softmax_buffer, ACL_FLOAT, log_softmax_type_size,
+                                                             log_softmax_ne, log_softmax_nb, 2);
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, LogSoftmax, acl_logits, 1, acl_log_softmax);
+    GGML_CANN_CALL_ACLNN_OP(ctx, LogSoftmax, acl_logits.get(), 1, acl_log_softmax.get());
  
-    int64_t labels_ne[] = {nc, nr};
-    size_t labels_nb[2];
-    labels_nb[0] = ggml_type_size(src1->type);
-    labels_nb[1] = labels_nb[0] * labels_ne[0];
-    aclTensor * acl_labels = ggml_cann_create_tensor(src1->data, ACL_FLOAT, sizeof(float), labels_ne, labels_nb, 2);
+    int64_t labels_ne[] = { nc, nr };
+    size_t  labels_nb[2];
+    labels_nb[0]              = ggml_type_size(src1->type);
+    labels_nb[1]              = labels_nb[0] * labels_ne[0];
+    acl_tensor_ptr acl_labels = ggml_cann_create_tensor(src1->data, ACL_FLOAT, sizeof(float), labels_ne, labels_nb, 2);
  
-    size_t mul_type_size = sizeof(float);
-    int64_t mul_n_bytes = nr * nc * mul_type_size;
+    size_t               mul_type_size = sizeof(float);
+    int64_t              mul_n_bytes   = nr * nc * mul_type_size;
      ggml_cann_pool_alloc mul_allocator(ctx.pool(), mul_n_bytes);
-    void * mul_buffer = mul_allocator.get();
+    void *               mul_buffer = mul_allocator.get();
  
-    int64_t mul_ne[] = {nc, nr};
-    size_t mul_nb[2];
-    mul_nb[0] = mul_type_size;
-    mul_nb[1] = mul_nb[0] * mul_ne[0];
-    aclTensor * acl_mul_result = ggml_cann_create_tensor(mul_buffer, ACL_FLOAT, mul_type_size, mul_ne, mul_nb, 2);
+    int64_t mul_ne[] = { nc, nr };
+    size_t  mul_nb[2];
+    mul_nb[0]                     = mul_type_size;
+    mul_nb[1]                     = mul_nb[0] * mul_ne[0];
+    acl_tensor_ptr acl_mul_result = ggml_cann_create_tensor(mul_buffer, ACL_FLOAT, mul_type_size, mul_ne, mul_nb, 2);
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, Mul, acl_log_softmax, acl_labels, acl_mul_result);
+    GGML_CANN_CALL_ACLNN_OP(ctx, Mul, acl_log_softmax.get(), acl_labels.get(), acl_mul_result.get());
  
-    size_t sum_per_sample_type_size = sizeof(float);
-    int64_t sum_per_sample_n_bytes = nr * sum_per_sample_type_size;
+    size_t               sum_per_sample_type_size = sizeof(float);
+    int64_t              sum_per_sample_n_bytes   = nr * sum_per_sample_type_size;
      ggml_cann_pool_alloc sum_per_sample_allocator(ctx.pool(), sum_per_sample_n_bytes);
-    void * sum_per_sample_buffer = sum_per_sample_allocator.get();
+    void *               sum_per_sample_buffer = sum_per_sample_allocator.get();
  
-    int64_t sum_per_sample_ne[] = {nr};
-    size_t sum_per_sample_nb[1];
-    sum_per_sample_nb[0] = sum_per_sample_type_size;
-    aclTensor * acl_sum_per_sample = ggml_cann_create_tensor(sum_per_sample_buffer, ACL_FLOAT, sum_per_sample_type_size, sum_per_sample_ne, sum_per_sample_nb, 1);
+    int64_t sum_per_sample_ne[] = { nr };
+    size_t  sum_per_sample_nb[1];
+    sum_per_sample_nb[0]              = sum_per_sample_type_size;
+    acl_tensor_ptr acl_sum_per_sample = ggml_cann_create_tensor(
+        sum_per_sample_buffer, ACL_FLOAT, sum_per_sample_type_size, sum_per_sample_ne, sum_per_sample_nb, 1);
  
-    std::vector<int64_t> sum_dims = {1};
-    aclIntArray * dims_array = aclCreateIntArray(sum_dims.data(), sum_dims.size());
-    bool keep_dims = false;
+    std::vector<int64_t> sum_dims   = { 1 };
+    acl_int_array_ptr    dims_array = ggml_cann_create_int_array(sum_dims.data(), sum_dims.size());
+    bool                 keep_dims  = false;
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, ReduceSum, acl_mul_result, dims_array, keep_dims, ACL_FLOAT, acl_sum_per_sample);
+    GGML_CANN_CALL_ACLNN_OP(ctx, ReduceSum, acl_mul_result.get(), dims_array.get(), keep_dims, ACL_FLOAT,
+                            acl_sum_per_sample.get());
  
-    size_t total_sum_type_size = sizeof(float);
-    int64_t total_sum_n_bytes = 1 * total_sum_type_size;
+    size_t               total_sum_type_size = sizeof(float);
+    int64_t              total_sum_n_bytes   = 1 * total_sum_type_size;
      ggml_cann_pool_alloc total_sum_allocator(ctx.pool(), total_sum_n_bytes);
-    void * total_sum_buffer = total_sum_allocator.get();
+    void *               total_sum_buffer = total_sum_allocator.get();
  
-    int64_t total_sum_ne[] = {1};
-    size_t total_sum_nb[1];
+    int64_t total_sum_ne[] = { 1 };
+    size_t  total_sum_nb[1];
      total_sum_nb[0] = total_sum_type_size;
  
-    aclTensor * acl_total_sum = ggml_cann_create_tensor(total_sum_buffer, ACL_FLOAT, total_sum_type_size, total_sum_ne, total_sum_nb, 1);
-
-    std::vector<int64_t> total_sum_dims = {0};
-    aclIntArray * total_sum_dims_array = aclCreateIntArray(total_sum_dims.data(), total_sum_dims.size());
+    acl_tensor_ptr acl_total_sum =
+        ggml_cann_create_tensor(total_sum_buffer, ACL_FLOAT, total_sum_type_size, total_sum_ne, total_sum_nb, 1);
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, ReduceSum, acl_sum_per_sample, total_sum_dims_array, keep_dims, ACL_FLOAT, acl_total_sum);
+    std::vector<int64_t> total_sum_dims    = { 0 };
+    acl_int_array_ptr total_sum_dims_array = ggml_cann_create_int_array(total_sum_dims.data(), total_sum_dims.size());
  
-    float value = -1.0f / static_cast<float>(nr);
-    aclScalar * scale_factor = aclCreateScalar(&value, aclDataType::ACL_FLOAT);
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst->data, ACL_FLOAT, sizeof(float), total_sum_ne, total_sum_nb, 1);
+    GGML_CANN_CALL_ACLNN_OP(ctx, ReduceSum, acl_sum_per_sample.get(), total_sum_dims_array.get(), keep_dims, ACL_FLOAT,
+                            acl_total_sum.get());
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, Muls, acl_total_sum, scale_factor, acl_dst);
+    float          value        = -1.0f / static_cast<float>(nr);
+    acl_scalar_ptr scale_factor = ggml_cann_create_scalar(&value, aclDataType::ACL_FLOAT);
+    acl_tensor_ptr acl_dst =
+        ggml_cann_create_tensor(dst->data, ACL_FLOAT, sizeof(float), total_sum_ne, total_sum_nb, 1);
  
-    ggml_cann_release_resources(ctx, acl_logits, acl_log_softmax, acl_labels, acl_mul_result, acl_sum_per_sample, acl_total_sum, acl_dst, scale_factor, dims_array, total_sum_dims_array);
+    GGML_CANN_CALL_ACLNN_OP(ctx, Muls, acl_total_sum.get(), scale_factor.get(), acl_dst.get());
  }
  
  void ggml_cann_group_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      ggml_tensor * src = dst->src[0];
  
-    aclTensor * acl_src = ggml_cann_create_tensor(src);
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst);
+    acl_tensor_ptr acl_src = ggml_cann_create_tensor(src);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
  
      int n_groups = dst->op_params[0];
  
@@ -585,13 +568,12 @@ void ggml_cann_group_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
  
      ggml_cann_pool_alloc temp_buffer_allocator(ctx.pool(), n_bytes * 2);
      void *               buffer       = temp_buffer_allocator.get();
-    aclTensor *          acl_mean_out = ggml_cann_create_tensor(buffer, ACL_FLOAT, type_size, ne, nb, ACL_FORMAT_ND);
-    aclTensor *          acl_rstd_out =
+    acl_tensor_ptr       acl_mean_out = ggml_cann_create_tensor(buffer, ACL_FLOAT, type_size, ne, nb, ACL_FORMAT_ND);
+    acl_tensor_ptr       acl_rstd_out =
          ggml_cann_create_tensor((char *) buffer + n_bytes, ACL_FLOAT, type_size, ne, nb, ACL_FORMAT_ND);
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, GroupNorm, acl_src, nullptr, nullptr, N, C, HxW, n_groups, eps, acl_dst, acl_mean_out,
-                            acl_rstd_out);
-    ggml_cann_release_resources(ctx, acl_src, acl_dst, acl_mean_out, acl_rstd_out);
+    GGML_CANN_CALL_ACLNN_OP(ctx, GroupNorm, acl_src.get(), nullptr, nullptr, N, C, HxW, n_groups, eps, acl_dst.get(),
+                            acl_mean_out.get(), acl_rstd_out.get());
  }
  
  void ggml_cann_acc(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
@@ -606,24 +588,24 @@ void ggml_cann_acc(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
  
      size_t param_nb[] = { ggml_element_size(src0), nb1, nb2, nb3 };
  
-    aclTensor * acl_dst  = ggml_cann_create_tensor(dst, src1->ne, param_nb, GGML_MAX_DIMS, ACL_FORMAT_ND, offset);
-    aclTensor * acl_src1 = ggml_cann_create_tensor(src1);
+    acl_tensor_ptr acl_dst  = ggml_cann_create_tensor(dst, src1->ne, param_nb, GGML_MAX_DIMS, ACL_FORMAT_ND, offset);
+    acl_tensor_ptr acl_src1 = ggml_cann_create_tensor(src1);
  
-    aclScalar * alpha      = nullptr;
-    float       alphaValue = 1.0f;
-    alpha                  = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
+    acl_scalar_ptr alpha      = nullptr;
+    float          alphaValue = 1.0f;
+    alpha                     = ggml_cann_create_scalar(&alphaValue, aclDataType::ACL_FLOAT);
  
      if (!inplace) {
          size_t cpy_size = ggml_nbytes(dst);
-        ggml_cann_async_memcpy(ctx, dst->data, src0->data, cpy_size, ACL_MEMCPY_DEVICE_TO_DEVICE);
-        aclTensor * acl_src0 = ggml_cann_create_tensor(src0, src1->ne, src0->nb, GGML_MAX_DIMS, ACL_FORMAT_ND, offset);
+        ACL_CHECK(
+            aclrtMemcpyAsync(dst->data, cpy_size, src0->data, cpy_size, ACL_MEMCPY_DEVICE_TO_DEVICE, ctx.stream()));
+        acl_tensor_ptr acl_src0 =
+            ggml_cann_create_tensor(src0, src1->ne, src0->nb, GGML_MAX_DIMS, ACL_FORMAT_ND, offset);
  
-        GGML_CANN_CALL_ACLNN_OP(ctx, Add, acl_src0, acl_src1, alpha, acl_dst);
-        ggml_cann_release_resources(ctx, acl_src0);
+        GGML_CANN_CALL_ACLNN_OP(ctx, Add, acl_src0.get(), acl_src1.get(), alpha.get(), acl_dst.get());
      } else {
-        GGML_CANN_CALL_ACLNN_OP(ctx, InplaceAdd, acl_dst, acl_src1, alpha);
+        GGML_CANN_CALL_ACLNN_OP(ctx, InplaceAdd, acl_dst.get(), acl_src1.get(), alpha.get());
      }
-    ggml_cann_release_resources(ctx, acl_src1, acl_dst);
  }
  
  /**
@@ -638,13 +620,13 @@ void ggml_cann_acc(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
   */
  static void aclnn_reduce_sum(ggml_backend_cann_context & ctx, ggml_tensor * dst, int64_t * dim, size_t dim_size) {
      GGML_ASSERT(dst->ne[0] == 1);
-    ggml_tensor * src         = dst->src[0];
-    aclTensor *   acl_src     = ggml_cann_create_tensor(src);
-    aclTensor *   acl_dst     = ggml_cann_create_tensor(dst);
-    aclIntArray * reduce_dims = aclCreateIntArray(dim, dim_size);
+    ggml_tensor *     src         = dst->src[0];
+    acl_tensor_ptr    acl_src     = ggml_cann_create_tensor(src);
+    acl_tensor_ptr    acl_dst     = ggml_cann_create_tensor(dst);
+    acl_int_array_ptr reduce_dims = ggml_cann_create_int_array(dim, dim_size);
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, ReduceSum, acl_src, reduce_dims, true, ggml_cann_type_mapping(dst->type), acl_dst);
-    ggml_cann_release_resources(ctx, acl_src, acl_dst, reduce_dims);
+    GGML_CANN_CALL_ACLNN_OP(ctx, ReduceSum, acl_src.get(), reduce_dims.get(), true, ggml_cann_type_mapping(dst->type),
+                            acl_dst.get());
  }
  
  void ggml_cann_sum_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
@@ -658,15 +640,14 @@ void ggml_cann_sum(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
  }
  
  void ggml_cann_upsample_nearest2d(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
-    ggml_tensor * src     = dst->src[0];
-    aclTensor *   acl_src = ggml_cann_create_tensor(src, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
-    aclTensor *   acl_dst = ggml_cann_create_tensor(dst, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
+    ggml_tensor *  src     = dst->src[0];
+    acl_tensor_ptr acl_src = ggml_cann_create_tensor(src, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
  
      std::vector<int64_t> output_size{ dst->ne[1], dst->ne[0] };
-    auto                 output_size_array = aclCreateIntArray(output_size.data(), 2);
+    acl_int_array_ptr    output_size_array = ggml_cann_create_int_array(output_size.data(), 2);
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, UpsampleNearest2d, acl_src, output_size_array, acl_dst);
-    ggml_cann_release_resources(ctx, acl_src, acl_dst, output_size_array);
+    GGML_CANN_CALL_ACLNN_OP(ctx, UpsampleNearest2d, acl_src.get(), output_size_array.get(), acl_dst.get());
  }
  
  /**
@@ -688,17 +669,16 @@ static void aclnn_pad(ggml_backend_cann_context & ctx,
                        aclTensor *                 acl_dst,
                        int64_t *                   paddings,
                        float                       value = 0.0f) {
-    aclIntArray * acl_pad   = aclCreateIntArray(paddings, GGML_MAX_DIMS * 2);
-    aclScalar *   acl_value = aclCreateScalar(&value, aclDataType::ACL_FLOAT);
+    acl_int_array_ptr acl_pad   = ggml_cann_create_int_array(paddings, GGML_MAX_DIMS * 2);
+    acl_scalar_ptr    acl_value = ggml_cann_create_scalar(&value, aclDataType::ACL_FLOAT);
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, ConstantPadNd, acl_src, acl_pad, acl_value, acl_dst);
-    ggml_cann_release_resources(ctx, acl_pad, acl_value);
+    GGML_CANN_CALL_ACLNN_OP(ctx, ConstantPadNd, acl_src, acl_pad.get(), acl_value.get(), acl_dst);
  }
  
  void ggml_cann_pad(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
-    ggml_tensor * src     = dst->src[0];
-    aclTensor *   acl_src = ggml_cann_create_tensor(src);
-    aclTensor *   acl_dst = ggml_cann_create_tensor(dst);
+    ggml_tensor *  src     = dst->src[0];
+    acl_tensor_ptr acl_src = ggml_cann_create_tensor(src);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
  
      // padding: value in the array means how much distance will be padding.
      // the position of elements in the array means which dirction to padding,
@@ -714,8 +694,7 @@ void ggml_cann_pad(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      const int32_t rp3 = ggml_get_op_params_i32(dst, 7);
  
      int64_t paddings[] = { lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3 };
-    aclnn_pad(ctx, acl_src, acl_dst, paddings);
-    ggml_cann_release_resources(ctx, acl_src, acl_dst);
+    aclnn_pad(ctx, acl_src.get(), acl_dst.get(), paddings);
  }
  
  /**
@@ -735,8 +714,8 @@ static void ggml_cann_avg_pool2d(ggml_backend_cann_context & ctx, ggml_tensor *
      GGML_ASSERT(src->type == GGML_TYPE_F32);
      GGML_ASSERT(dst->type == GGML_TYPE_F32);
  
-    aclTensor * acl_src = ggml_cann_create_tensor(src, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
+    acl_tensor_ptr acl_src = ggml_cann_create_tensor(src, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
  
      const int32_t * opts = (const int32_t *) dst->op_params;
      const int       k0   = opts[1];
@@ -750,9 +729,9 @@ static void ggml_cann_avg_pool2d(ggml_backend_cann_context & ctx, ggml_tensor *
      std::vector<int64_t> stride_dims      = { s1, s0 };
      std::vector<int64_t> padding_avg_dims = { p1, p0 };  // (padH, padW)
  
-    auto * kernel_size  = aclCreateIntArray(kernel_dims.data(), 2);
-    auto * strides      = aclCreateIntArray(stride_dims.data(), 2);
-    auto * paddings_avg = aclCreateIntArray(padding_avg_dims.data(), 2);
+    acl_int_array_ptr kernel_size  = ggml_cann_create_int_array(kernel_dims.data(), 2);
+    acl_int_array_ptr strides      = ggml_cann_create_int_array(stride_dims.data(), 2);
+    acl_int_array_ptr paddings_avg = ggml_cann_create_int_array(padding_avg_dims.data(), 2);
  
      bool    ceil_mode         = false;
      bool    count_include_pad = true;
@@ -762,9 +741,8 @@ static void ggml_cann_avg_pool2d(ggml_backend_cann_context & ctx, ggml_tensor *
      cube_math_type = 1;
  #endif
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, AvgPool2d, acl_src, kernel_size, strides, paddings_avg, ceil_mode, count_include_pad,
-                            divisor_override, cube_math_type, acl_dst);
-    ggml_cann_release_resources(ctx, acl_src, acl_dst, kernel_size, strides, paddings_avg);
+    GGML_CANN_CALL_ACLNN_OP(ctx, AvgPool2d, acl_src.get(), kernel_size.get(), strides.get(), paddings_avg.get(),
+                            ceil_mode, count_include_pad, divisor_override, cube_math_type, acl_dst.get());
  }
  
  /**
@@ -784,8 +762,8 @@ static void ggml_cann_max_pool2d(ggml_backend_cann_context & ctx, ggml_tensor *
      GGML_ASSERT(src->type == GGML_TYPE_F32);
      GGML_ASSERT(dst->type == GGML_TYPE_F32);
  
-    aclTensor * acl_src = ggml_cann_create_tensor(src, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
+    acl_tensor_ptr acl_src = ggml_cann_create_tensor(src, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
  
      const int32_t * opts = (const int32_t *) dst->op_params;
      const int       k0   = opts[1];
@@ -805,13 +783,13 @@ static void ggml_cann_max_pool2d(ggml_backend_cann_context & ctx, ggml_tensor *
  
      ggml_cann_pool_alloc temp_buffer_allocator(ctx.pool(), ggml_nbytes(src) + p0 * 2 + p1 * 2 * src->nb[1]);
      void *               buffer = temp_buffer_allocator.get();
-    aclTensor * tmp_tensor      = ggml_cann_create_tensor(buffer, ACL_FLOAT, ggml_element_size(src), temp_ne, temp_nb,
+    acl_tensor_ptr tmp_tensor   = ggml_cann_create_tensor(buffer, ACL_FLOAT, ggml_element_size(src), temp_ne, temp_nb,
                                                            GGML_MAX_DIMS, ACL_FORMAT_NCHW);
  
      // pad: see padding in ggml_cann_pad()
      int64_t paddings[] = { p0, p0, p1, p1, 0, 0, 0, 0 };
      float   value      = -FLT_MAX;
-    aclnn_pad(ctx, acl_src, tmp_tensor, paddings, value);
+    aclnn_pad(ctx, acl_src.get(), tmp_tensor.get(), paddings, value);
  
      // max_pool
      std::vector<int64_t> kernel_dims      = { k1, k0 };
@@ -819,16 +797,15 @@ static void ggml_cann_max_pool2d(ggml_backend_cann_context & ctx, ggml_tensor *
      // padding_max_dims: [dim0_start, dim0_end, dim1_start, dim1_end]
      std::vector<int64_t> padding_max_dims = { 0, 0, 0, 0 };
      std::vector<int64_t> dilation_size    = { 1, 1 };
-    auto *               kernel_size      = aclCreateIntArray(kernel_dims.data(), 2);
-    auto *               strides          = aclCreateIntArray(stride_dims.data(), 2);
-    auto *               paddings_max     = aclCreateIntArray(padding_max_dims.data(), 4);
-    auto *               dilations        = aclCreateIntArray(dilation_size.data(), 2);
+    acl_int_array_ptr    kernel_size      = ggml_cann_create_int_array(kernel_dims.data(), 2);
+    acl_int_array_ptr    strides          = ggml_cann_create_int_array(stride_dims.data(), 2);
+    acl_int_array_ptr    paddings_max     = ggml_cann_create_int_array(padding_max_dims.data(), 4);
+    acl_int_array_ptr    dilations        = ggml_cann_create_int_array(dilation_size.data(), 2);
  
      bool    ceil_mode = false;
      int64_t auto_pads = 0;
-    GGML_CANN_CALL_ACLNN_OP(ctx, MaxPool, tmp_tensor, kernel_size, strides, auto_pads, paddings_max, dilations,
-                            ceil_mode, acl_dst);
-    ggml_cann_release_resources(ctx, acl_src, acl_dst, tmp_tensor, kernel_size, strides, paddings_max, dilations);
+    GGML_CANN_CALL_ACLNN_OP(ctx, MaxPool, tmp_tensor.get(), kernel_size.get(), strides.get(), auto_pads,
+                            paddings_max.get(), dilations.get(), ceil_mode, acl_dst.get());
  }
  
  void ggml_cann_pool2d(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
@@ -865,19 +842,18 @@ void ggml_cann_dup(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      ggml_tensor * src0 = dst->src[0];
  
      if (ggml_are_same_shape(src0, dst)) {
-        aclTensor * acl_src = ggml_cann_create_tensor(src0);
-        aclTensor * acl_dst = ggml_cann_create_tensor(dst);
+        acl_tensor_ptr acl_src = ggml_cann_create_tensor(src0);
+        acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
          if (dst->type == src0->type) {
-            cann_copy(ctx, acl_src, acl_dst);
+            cann_copy(ctx, acl_src.get(), acl_dst.get());
          } else {
-            aclnn_cast(ctx, acl_src, acl_dst, ggml_cann_type_mapping(dst->type));
+            aclnn_cast(ctx, acl_src.get(), acl_dst.get(), ggml_cann_type_mapping(dst->type));
          }
-        ggml_cann_release_resources(ctx, acl_src, acl_dst);
      } else {
          void *               src_trans_buffer = src0->data;
          ggml_cann_pool_alloc src_buffer_allocator;
          if (!ggml_is_contiguous(src0)) {
-            aclTensor * acl_src = ggml_cann_create_tensor(src0);
+            acl_tensor_ptr acl_src = ggml_cann_create_tensor(src0);
              src_buffer_allocator.alloc(ctx.pool(), ggml_nelements(src0) * ggml_type_size(src0->type));
              src_trans_buffer = src_buffer_allocator.get();
              size_t src_trans_nb[GGML_MAX_DIMS];
@@ -885,11 +861,10 @@ void ggml_cann_dup(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
              for (int i = 1; i < GGML_MAX_DIMS; i++) {
                  src_trans_nb[i] = src_trans_nb[i - 1] * src0->ne[i - 1];
              }
-            aclTensor * src_trans_tensor =
+            acl_tensor_ptr src_trans_tensor =
                  ggml_cann_create_tensor(src_trans_buffer, ggml_cann_type_mapping(src0->type),
                                          ggml_type_size(src0->type), src0->ne, src_trans_nb, GGML_MAX_DIMS);
-            cann_copy(ctx, acl_src, src_trans_tensor);
-            ggml_cann_release_resources(ctx, acl_src, src_trans_tensor);
+            cann_copy(ctx, acl_src.get(), src_trans_tensor.get());
          }
  
          size_t src_reshape_nb[GGML_MAX_DIMS];
@@ -898,19 +873,17 @@ void ggml_cann_dup(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
              src_reshape_nb[i] = src_reshape_nb[i - 1] * dst->ne[i - 1];
          }
  
-        aclTensor * trans_acl_src =
+        acl_tensor_ptr trans_acl_src =
              ggml_cann_create_tensor(src_trans_buffer, ggml_cann_type_mapping(src0->type), ggml_type_size(src0->type),
                                      dst->ne, src_reshape_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
-        aclTensor * acl_dst = ggml_cann_create_tensor(dst);
+        acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
  
          if (dst->type == src0->type) {
-            cann_copy(ctx, trans_acl_src, acl_dst);
+            cann_copy(ctx, trans_acl_src.get(), acl_dst.get());
          } else {
-            aclnn_cast(ctx, trans_acl_src, acl_dst, ggml_cann_type_mapping(dst->type));
+            aclnn_cast(ctx, trans_acl_src.get(), acl_dst.get(), ggml_cann_type_mapping(dst->type));
          }
-        ggml_cann_release_resources(ctx, trans_acl_src, acl_dst);
      }
-    return;
  }
  
  /**
@@ -927,23 +900,23 @@ void ggml_cann_dup(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
   * @param dims The number of dimensions of the tensor.
   * @param type The data type of the tensor.
   * @param type_size The size of each element in the tensor data type.
- * @return An ACL tensor initialized with zeros.
+ * @return A tensor smart pointer initialized with zeros.
   */
-static aclTensor * aclnn_zero(ggml_backend_cann_context & ctx,
-                              void *                      buffer,
-                              size_t                      n_bytes,
-                              int64_t *                   ne,
-                              int64_t                     dims,
-                              aclDataType                 type,
-                              size_t                      type_size) {
+static acl_tensor_ptr aclnn_zero(ggml_backend_cann_context & ctx,
+                                 void *                      buffer,
+                                 size_t                      n_bytes,
+                                 int64_t *                   ne,
+                                 int64_t                     dims,
+                                 aclDataType                 type,
+                                 size_t                      type_size) {
      size_t nb[GGML_MAX_DIMS];
      nb[0] = type_size;
      for (int i = 1; i < dims; i++) {
          nb[i] = nb[i - 1] * ne[i - 1];
      }
  
-    aclTensor * zero = ggml_cann_create_tensor(buffer, type, type_size, ne, nb, dims);
-    GGML_CANN_CALL_ACLNN_OP(ctx, InplaceZero, zero);
+    acl_tensor_ptr zero = ggml_cann_create_tensor(buffer, type, type_size, ne, nb, dims);
+    GGML_CANN_CALL_ACLNN_OP(ctx, InplaceZero, zero.get());
      return zero;
      GGML_UNUSED(n_bytes);
  }
@@ -964,21 +937,21 @@ static aclTensor * aclnn_zero(ggml_backend_cann_context & ctx,
   * @param type_size The size of each element in the tensor data type.
   * @param value The value to be used for initializing the tensor (default
   * is 1.0).
- * @return An ACL tensor initialized with value.
+ * @return A tensor smart pointer initialized with value.
   */
-static aclTensor * aclnn_values(ggml_backend_cann_context & ctx,
-                                void *                      buffer,
-                                size_t                      n_bytes,
-                                int64_t *                   ne,
-                                int64_t                     dims,
-                                aclDataType                 type,
-                                size_t                      type_size,
-                                float                       value = 1.0f) {
-    aclTensor * acl_tensor = aclnn_zero(ctx, buffer, n_bytes, ne, dims, type, type_size);
-    float       alpha_host = 1.0f;
-    aclScalar * alpha      = aclCreateScalar(&alpha_host, aclDataType::ACL_FLOAT);
-    aclScalar * other      = aclCreateScalar(&value, aclDataType::ACL_FLOAT);
-    GGML_CANN_CALL_ACLNN_OP(ctx, InplaceAdds, acl_tensor, other, alpha);
+static acl_tensor_ptr aclnn_values(ggml_backend_cann_context & ctx,
+                                   void *                      buffer,
+                                   size_t                      n_bytes,
+                                   int64_t *                   ne,
+                                   int64_t                     dims,
+                                   aclDataType                 type,
+                                   size_t                      type_size,
+                                   float                       value = 1.0f) {
+    acl_tensor_ptr acl_tensor = aclnn_zero(ctx, buffer, n_bytes, ne, dims, type, type_size);
+    float          alpha_host = 1.0f;
+    acl_scalar_ptr alpha      = ggml_cann_create_scalar(&alpha_host, aclDataType::ACL_FLOAT);
+    acl_scalar_ptr other      = ggml_cann_create_scalar(&value, aclDataType::ACL_FLOAT);
+    GGML_CANN_CALL_ACLNN_OP(ctx, InplaceAdds, acl_tensor.get(), other.get(), alpha.get());
      return acl_tensor;
  }
  
@@ -993,9 +966,8 @@ static aclTensor * aclnn_values(ggml_backend_cann_context & ctx,
   * @param acl_dst The destination tensor to be filled with the scalar value.
   */
  static void aclnn_fill_scalar(ggml_backend_cann_context & ctx, float scalar, aclTensor * acl_dst) {
-    auto acl_scalar = aclCreateScalar(&scalar, aclDataType::ACL_FLOAT);
-    GGML_CANN_CALL_ACLNN_OP(ctx, InplaceFillScalar, acl_dst, acl_scalar);
-    ggml_cann_release_resources(ctx, acl_scalar);
+    acl_scalar_ptr acl_scalar = ggml_cann_create_scalar(&scalar, aclDataType::ACL_FLOAT);
+    GGML_CANN_CALL_ACLNN_OP(ctx, InplaceFillScalar, acl_dst, acl_scalar.get());
  }
  
  /**
@@ -1018,16 +990,16 @@ static void aclnn_fill_scalar(ggml_backend_cann_context & ctx, float scalar, acl
   * @param dims          The number of tensor dimensions.
   * @param value         The scalar value used to fill the tensor (supports zero
   *                      initialization via memset or arbitrary values via fill_scalar).
- * @return              An aclTensor pointer created from the cached buffer.
+ * @return              A tensor smart pointer created from the cached buffer.
   */
-static aclTensor * get_cache_acl_tensor(ggml_backend_cann_context & ctx,
-                                        void **                     buffer,
-                                        int64_t &                   cache_element,
-                                        int64_t *                   ne,
-                                        size_t *                    nb,
-                                        ggml_type                   dtype,
-                                        int64_t                     dims,
-                                        float                       value) {
+static acl_tensor_ptr get_cache_acl_tensor(ggml_backend_cann_context & ctx,
+                                           void **                     buffer,
+                                           int64_t &                   cache_element,
+                                           int64_t *                   ne,
+                                           size_t *                    nb,
+                                           ggml_type                   dtype,
+                                           int64_t                     dims,
+                                           float                       value) {
      // Calculate total number of elements
      int64_t n_element = 1;
      for (int i = 0; i < dims; i++) {
@@ -1046,12 +1018,11 @@ static aclTensor * get_cache_acl_tensor(ggml_backend_cann_context & ctx,
          cache_element = n_element;
  
          // Initialize cache
-        int64_t     pool_ne[1] = { n_element };
-        size_t      pool_nb[1] = { ggml_type_size(dtype) };
-        aclTensor * acl_value =
+        int64_t        pool_ne[1] = { n_element };
+        size_t         pool_nb[1] = { ggml_type_size(dtype) };
+        acl_tensor_ptr acl_value =
              ggml_cann_create_tensor(*buffer, ggml_cann_type_mapping(dtype), ggml_type_size(dtype), pool_ne, pool_nb, 1);
-        aclnn_fill_scalar(ctx, value, acl_value);
-        ggml_cann_release_resources(ctx, acl_value);
+        aclnn_fill_scalar(ctx, value, acl_value.get());
      }
  
      return ggml_cann_create_tensor(*buffer, ggml_cann_type_mapping(dtype), ggml_type_size(dtype), ne, nb, dims);
@@ -1060,8 +1031,8 @@ static aclTensor * get_cache_acl_tensor(ggml_backend_cann_context & ctx,
  void ggml_cann_rms_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      ggml_tensor * src = dst->src[0];
  
-    aclTensor * acl_src = ggml_cann_create_tensor(src);
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst);
+    acl_tensor_ptr acl_src = ggml_cann_create_tensor(src);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
  
      float eps;
      memcpy(&eps, dst->op_params, sizeof(float));
@@ -1073,10 +1044,10 @@ void ggml_cann_rms_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      for (int i = 1; i < GGML_MAX_DIMS; i++) {
          acl_gamma_nb[i] = acl_gamma_nb[i - 1] * src->ne[i - 1];
      }
-    aclTensor * acl_gamma = get_cache_acl_tensor(ctx, &ctx.rms_norm_one_tensor_cache.cache,
-                                                 ctx.rms_norm_one_tensor_cache.size, src->ne, acl_gamma_nb, dst->type,
-                                                 1,    // dims
-                                                 1.0f  // value
+    acl_tensor_ptr acl_gamma = get_cache_acl_tensor(
+        ctx, &ctx.rms_norm_one_tensor_cache.cache, ctx.rms_norm_one_tensor_cache.size, src->ne, acl_gamma_nb, dst->type,
+        1,    // dims
+        1.0f  // value
      );
  
      // build rstd.
@@ -1087,41 +1058,38 @@ void ggml_cann_rms_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      for (int i = 1; i < GGML_MAX_DIMS - 1; i++) {
          acl_rstd_nb[i] = acl_rstd_nb[i - 1] * acl_rstd_ne[i - 1];
      }
-    aclTensor * acl_rstd =
+    acl_tensor_ptr acl_rstd =
          get_cache_acl_tensor(ctx, &ctx.rms_norm_zero_tensor_cache.cache, ctx.rms_norm_zero_tensor_cache.size,
                               acl_rstd_ne, acl_rstd_nb, GGML_TYPE_F32, GGML_MAX_DIMS - 1,
                               0.0f  // value
          );
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, RmsNorm, acl_src, acl_gamma, eps, acl_dst, acl_rstd);
-    ggml_cann_release_resources(ctx, acl_src, acl_dst, acl_gamma, acl_rstd);
+    GGML_CANN_CALL_ACLNN_OP(ctx, RmsNorm, acl_src.get(), acl_gamma.get(), eps, acl_dst.get(), acl_rstd.get());
  }
  
  // TODO: performace is low.
  void ggml_cann_diag_mask(ggml_backend_cann_context & ctx, ggml_tensor * dst, float value) {
      ggml_tensor * src = dst->src[0];
  
-    aclTensor * acl_src = ggml_cann_create_tensor(src);
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst);
+    acl_tensor_ptr acl_src = ggml_cann_create_tensor(src);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
  
      const int n_past = ((int32_t *) dst->op_params)[0];
  
      ggml_cann_pool_alloc one_tensor_allocator(ctx.pool(), ggml_nbytes(src));
      void *               buffer = one_tensor_allocator.get();
  
-    aclTensor * mask_tensor = ggml_cann_create_tensor(buffer, ggml_cann_type_mapping(src->type),
-                                                      ggml_type_size(src->type), src->ne, src->nb, GGML_MAX_DIMS);
+    acl_tensor_ptr mask_tensor = ggml_cann_create_tensor(buffer, ggml_cann_type_mapping(src->type),
+                                                         ggml_type_size(src->type), src->ne, src->nb, GGML_MAX_DIMS);
  
-    aclnn_fill_scalar(ctx, value, mask_tensor);
+    aclnn_fill_scalar(ctx, value, mask_tensor.get());
  
-    aclScalar * alpha      = nullptr;
-    float       alphaValue = 1.0f;
-    alpha                  = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
+    float          alphaValue = 1.0f;
+    acl_scalar_ptr alpha      = ggml_cann_create_scalar(&alphaValue, aclDataType::ACL_FLOAT);
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, InplaceTriu, mask_tensor, n_past + 1);
-    GGML_CANN_CALL_ACLNN_OP(ctx, Tril, acl_src, n_past + 1, acl_dst);
-    GGML_CANN_CALL_ACLNN_OP(ctx, InplaceAdd, acl_dst, mask_tensor, alpha);
-    ggml_cann_release_resources(ctx, alpha, acl_src, acl_dst, mask_tensor);
+    GGML_CANN_CALL_ACLNN_OP(ctx, InplaceTriu, mask_tensor.get(), n_past + 1);
+    GGML_CANN_CALL_ACLNN_OP(ctx, Tril, acl_src.get(), n_past + 1, acl_dst.get());
+    GGML_CANN_CALL_ACLNN_OP(ctx, InplaceAdd, acl_dst.get(), mask_tensor.get(), alpha.get());
  }
  
  /**
@@ -1144,9 +1112,8 @@ static void aclnn_permute(ggml_backend_cann_context & ctx,
                            aclTensor *                 acl_dst,
                            int64_t *                   new_dim,
                            uint64_t                    dims) {
-    aclIntArray * acl_dims = aclCreateIntArray(new_dim, dims);
-    GGML_CANN_CALL_ACLNN_OP(ctx, Permute, acl_src, acl_dims, acl_dst);
-    ggml_cann_release_resources(ctx, acl_dims);
+    acl_int_array_ptr acl_dims = ggml_cann_create_int_array(new_dim, dims);
+    GGML_CANN_CALL_ACLNN_OP(ctx, Permute, acl_src, acl_dims.get(), acl_dst);
  }
  
  static void ggml_cann_im2col_2d_post_process(ggml_backend_cann_context & ctx,
@@ -1155,18 +1122,16 @@ static void ggml_cann_im2col_2d_post_process(ggml_backend_cann_context & ctx,
                                               aclTensor *                 tmp_cast_tensor,
                                               aclTensor *                 tmp_im2col_tensor) {
      // Permute: [N, IC * KH * KW, OW * OH] -> [N, OW * OH, IC * KH * KW]
-    int64_t     dst_ne[] = { dst->ne[0], dst->ne[1] * dst->ne[2], dst->ne[3] };
-    size_t      dst_nb[] = { dst->nb[0], dst->nb[1], dst->nb[3] };
-    aclTensor * acl_dst  = ggml_cann_create_tensor(dst, dst_ne, dst_nb, GGML_MAX_DIMS - 1);
+    int64_t        dst_ne[] = { dst->ne[0], dst->ne[1] * dst->ne[2], dst->ne[3] };
+    size_t         dst_nb[] = { dst->nb[0], dst->nb[1], dst->nb[3] };
+    acl_tensor_ptr acl_dst  = ggml_cann_create_tensor(dst, dst_ne, dst_nb, GGML_MAX_DIMS - 1);
  
      int64_t permute_dim[] = { 0, 2, 1 };
      if (src1->type != dst->type) {
-        aclnn_permute(ctx, tmp_cast_tensor, acl_dst, permute_dim, 3);
+        aclnn_permute(ctx, tmp_cast_tensor, acl_dst.get(), permute_dim, 3);
      } else {
-        aclnn_permute(ctx, tmp_im2col_tensor, acl_dst, permute_dim, 3);
+        aclnn_permute(ctx, tmp_im2col_tensor, acl_dst.get(), permute_dim, 3);
      }
-
-    ggml_cann_release_resources(ctx, acl_dst);
  }
  
  static void ggml_cann_im2col_1d_post_process(ggml_backend_cann_context &  ctx,
@@ -1201,15 +1166,15 @@ static void ggml_cann_im2col_1d_post_process(ggml_backend_cann_context &  ctx,
          tmp_permute_nb[i] = tmp_permute_nb[i - 1] * tmp_permute_ne[i - 1];
      }
  
-    aclTensor * tmp_permute_tensor =
+    acl_tensor_ptr tmp_permute_tensor =
          ggml_cann_create_tensor(tmp_permute_buffer, ggml_cann_type_mapping(dst->type), ggml_type_size(dst->type),
                                  tmp_permute_ne, tmp_permute_nb, GGML_MAX_DIMS - 1, ACL_FORMAT_ND);
  
      int64_t permute_dim[] = { 0, 2, 1 };
      if (src1->type != dst->type) {
-        aclnn_permute(ctx, tmp_cast_tensor, tmp_permute_tensor, permute_dim, 3);
+        aclnn_permute(ctx, tmp_cast_tensor, tmp_permute_tensor.get(), permute_dim, 3);
      } else {
-        aclnn_permute(ctx, tmp_im2col_tensor, tmp_permute_tensor, permute_dim, 3);
+        aclnn_permute(ctx, tmp_im2col_tensor, tmp_permute_tensor.get(), permute_dim, 3);
      }
  
      // number of times the kernel moves in W dimension
@@ -1220,25 +1185,24 @@ static void ggml_cann_im2col_1d_post_process(ggml_backend_cann_context &  ctx,
      // memory copy with offset to restore 1D im2col from 2d
      if (IC > 1) {
          offset          = IC * KH * KW * n_step_w * ggml_type_size(dst->type);
-        size_t size_cpy = KH * KW * ggml_type_size(dst->type);
+        size_t cpy_size = KH * KW * ggml_type_size(dst->type);
  
          for (int c = 0; c < IC; c++) {
              cur_permute_buffer = (char *) tmp_permute_buffer + offset + KH * KW * c * ggml_type_size(dst->type);
              cur_dst_buffer     = (char *) dst->data + c * KH * KW * n_step_w * ggml_type_size(dst->type);
  
              for (int i = 0; i < n_step_w; i++) {
-                ggml_cann_async_memcpy(ctx, cur_dst_buffer, cur_permute_buffer, size_cpy, ACL_MEMCPY_DEVICE_TO_DEVICE);
+                ACL_CHECK(aclrtMemcpyAsync(cur_dst_buffer, cpy_size, cur_permute_buffer, cpy_size,
+                                           ACL_MEMCPY_DEVICE_TO_DEVICE, ctx.stream()));
                  cur_dst_buffer     = (char *) cur_dst_buffer + KH * KW * ggml_type_size(dst->type);
                  cur_permute_buffer = (char *) cur_permute_buffer + KH * KW * IC * ggml_type_size(dst->type);
              }
          }
      } else {
          offset = KH * KW * n_step_w * ggml_type_size(dst->type);  // equal to ggml_nbytes(dst)
-        ggml_cann_async_memcpy(ctx, dst->data, (char *) tmp_permute_buffer + offset, offset,
-                               ACL_MEMCPY_DEVICE_TO_DEVICE);
+        ACL_CHECK(aclrtMemcpyAsync(dst->data, offset, (char *) tmp_permute_buffer + offset, offset,
+                                   ACL_MEMCPY_DEVICE_TO_DEVICE, ctx.stream()));
      }
-
-    ggml_cann_release_resources(ctx, tmp_permute_tensor);
  }
  
  void ggml_cann_im2col(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
@@ -1270,9 +1234,9 @@ void ggml_cann_im2col(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      const int64_t n_bytes_factor = is_2D ? 1 : 3;
  
      // im2col: [N,C,H,W] -> [N, IC * KH * KW, OW * OH * n_bytes_factor]
-    aclTensor * acl_src1        = ggml_cann_create_tensor(src1);
-    int64_t     tmp_im2col_ne[] = { OW * OH * n_bytes_factor, IC * KH * KW, N };
-    size_t      tmp_im2col_nb[GGML_MAX_DIMS - 1];
+    acl_tensor_ptr acl_src1        = ggml_cann_create_tensor(src1);
+    int64_t        tmp_im2col_ne[] = { OW * OH * n_bytes_factor, IC * KH * KW, N };
+    size_t         tmp_im2col_nb[GGML_MAX_DIMS - 1];
  
      tmp_im2col_nb[0] = ggml_type_size(src1->type);
      for (int i = 1; i < GGML_MAX_DIMS - 1; i++) {
@@ -1285,7 +1249,7 @@ void ggml_cann_im2col(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      ggml_cann_pool_alloc im2col_allocator(ctx.pool(), ggml_nelements(dst) * ggml_element_size(src1) * n_bytes_factor);
      void *               tmp_im2col_buffer = im2col_allocator.get();
  
-    aclTensor * tmp_im2col_tensor =
+    acl_tensor_ptr tmp_im2col_tensor =
          ggml_cann_create_tensor(tmp_im2col_buffer, ggml_cann_type_mapping(src1->type), ggml_type_size(src1->type),
                                  tmp_im2col_ne, tmp_im2col_nb, GGML_MAX_DIMS - 1, ACL_FORMAT_ND);
  
@@ -1293,14 +1257,15 @@ void ggml_cann_im2col(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      std::vector<int64_t> dilation_size = { d1, d0 };
      std::vector<int64_t> padding_dims  = { p1, p0 };
      std::vector<int64_t> stride_dims   = { s1, s0 };
-    auto *               kernel_size   = aclCreateIntArray(kernel_dims.data(), 2);
-    auto *               dilations     = aclCreateIntArray(dilation_size.data(), 2);
-    auto *               paddings      = aclCreateIntArray(padding_dims.data(), 2);
-    auto *               strides       = aclCreateIntArray(stride_dims.data(), 2);
-    GGML_CANN_CALL_ACLNN_OP(ctx, Im2col, acl_src1, kernel_size, dilations, paddings, strides, tmp_im2col_tensor);
+    acl_int_array_ptr    kernel_size   = ggml_cann_create_int_array(kernel_dims.data(), 2);
+    acl_int_array_ptr    dilations     = ggml_cann_create_int_array(dilation_size.data(), 2);
+    acl_int_array_ptr    paddings      = ggml_cann_create_int_array(padding_dims.data(), 2);
+    acl_int_array_ptr    strides       = ggml_cann_create_int_array(stride_dims.data(), 2);
+    GGML_CANN_CALL_ACLNN_OP(ctx, Im2col, acl_src1.get(), kernel_size.get(), dilations.get(), paddings.get(),
+                            strides.get(), tmp_im2col_tensor.get());
  
      // Cast if dst is f16.
-    aclTensor *          tmp_cast_tensor = nullptr;
+    acl_tensor_ptr       tmp_cast_tensor;
      ggml_cann_pool_alloc tmp_cast_allocator(ctx.pool());
      void *               tmp_cast_buffer = nullptr;
      if (src1->type != dst->type) {
@@ -1315,19 +1280,17 @@ void ggml_cann_im2col(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
          tmp_cast_tensor =
              ggml_cann_create_tensor(tmp_cast_buffer, ggml_cann_type_mapping(dst->type), ggml_type_size(dst->type),
                                      tmp_im2col_ne, temp_cast_nb, GGML_MAX_DIMS - 1, ACL_FORMAT_ND);
-        aclnn_cast(ctx, tmp_im2col_tensor, tmp_cast_tensor, ggml_cann_type_mapping(dst->type));
+        aclnn_cast(ctx, tmp_im2col_tensor.get(), tmp_cast_tensor.get(), ggml_cann_type_mapping(dst->type));
      }
  
      // post-processing
      if (is_2D) {
-        ggml_cann_im2col_2d_post_process(ctx, dst, src1, tmp_cast_tensor, tmp_im2col_tensor);
+        ggml_cann_im2col_2d_post_process(ctx, dst, src1, tmp_cast_tensor.get(), tmp_im2col_tensor.get());
      } else {
          std::vector<int64_t> im2col_op_params = { KH, KW, IW, IC, N, OH, OW, s0, p0, d0, n_bytes_factor };
-        ggml_cann_im2col_1d_post_process(ctx, dst, src1, tmp_cast_tensor, tmp_im2col_tensor, im2col_op_params);
+        ggml_cann_im2col_1d_post_process(ctx, dst, src1, tmp_cast_tensor.get(), tmp_im2col_tensor.get(),
+                                         im2col_op_params);
      }
-
-    ggml_cann_release_resources(ctx, acl_src1, tmp_im2col_tensor, tmp_cast_tensor, kernel_size, dilations, paddings,
-                                strides);
  }
  
  /**
@@ -1373,7 +1336,7 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context & ctx, ggml_tensor *
      const int max_period = dst->op_params[1];
      int       half       = dim / 2;
  
-    aclTensor * acl_src = ggml_cann_create_tensor(src);
+    acl_tensor_ptr acl_src = ggml_cann_create_tensor(src);
  
      // arange: [0, ..., half)
      float   start             = 0;
@@ -1385,17 +1348,17 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context & ctx, ggml_tensor *
  
      ggml_cann_pool_alloc arange_allocator(ctx.pool(), half * sizeof(dst->type));
      void *               tmp_arange_buffer = arange_allocator.get();
-    aclTensor *          tmp_arange_tensor =
+    acl_tensor_ptr       tmp_arange_tensor =
          ggml_cann_create_tensor(tmp_arange_buffer, ggml_cann_type_mapping(dst->type), ggml_type_size(dst->type),
                                  tmp_arange_ne, tmp_arange_nb, GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
  
-    aclnn_arange(ctx, tmp_arange_tensor, start, stop, step, n_elements_arange);
+    aclnn_arange(ctx, tmp_arange_tensor.get(), start, stop, step, n_elements_arange);
  
      // freq
      float freq_param = -logf(max_period) / half;
      bool  inplace    = true;
-    aclnn_muls(ctx, tmp_arange_tensor, freq_param, nullptr, inplace);
-    aclnn_exp(ctx, tmp_arange_tensor);
+    aclnn_muls(ctx, tmp_arange_tensor.get(), freq_param, nullptr, inplace);
+    aclnn_exp(ctx, tmp_arange_tensor.get());
  
      // permute: src [0,1,2,3]->[0,1,3,2]
      int64_t tmp_permute_ne[] = { src->ne[1], src->ne[0], src->ne[2], src->ne[3] };
@@ -1407,12 +1370,12 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context & ctx, ggml_tensor *
  
      ggml_cann_pool_alloc permute_allocator(ctx.pool(), ggml_nbytes(src));
      void *               tmp_permute_buffer = permute_allocator.get();
-    aclTensor *          tmp_permute_tensor =
+    acl_tensor_ptr       tmp_permute_tensor =
          ggml_cann_create_tensor(tmp_permute_buffer, ggml_cann_type_mapping(src->type), ggml_type_size(src->type),
                                  tmp_permute_ne, tmp_permute_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
      int64_t permute_dim[] = { 0, 1, 3, 2 };
      int64_t num_dims      = 4;
-    aclnn_permute(ctx, acl_src, tmp_permute_tensor, permute_dim, num_dims);
+    aclnn_permute(ctx, acl_src.get(), tmp_permute_tensor.get(), permute_dim, num_dims);
  
      // timestep * freq
      int64_t tmp_mul_ne[] = { src->ne[1] * half, src->ne[0], src->ne[2], src->ne[3] };
@@ -1426,40 +1389,34 @@ void ggml_cann_timestep_embedding(ggml_backend_cann_context & ctx, ggml_tensor *
  
      ggml_cann_pool_alloc mul_allocator(ctx.pool(), mul_nelements * ggml_type_size(src->type));
      void *               tmp_mul_buffer = mul_allocator.get();
-    aclTensor *          tmp_mul_tensor =
+    acl_tensor_ptr       tmp_mul_tensor =
          ggml_cann_create_tensor(tmp_mul_buffer, ggml_cann_type_mapping(src->type), ggml_type_size(src->type),
                                  tmp_mul_ne, tmp_mul_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
-    aclnn_mul(ctx, tmp_permute_tensor, tmp_arange_tensor, tmp_mul_tensor);
+    aclnn_mul(ctx, tmp_permute_tensor.get(), tmp_arange_tensor.get(), tmp_mul_tensor.get());
  
      // cos
      ggml_cann_pool_alloc cos_allocator(ctx.pool(), mul_nelements * ggml_type_size(src->type));
      void *               tmp_cos_buffer = cos_allocator.get();
-    aclTensor *          tmp_cos_tensor =
+    acl_tensor_ptr       tmp_cos_tensor =
          ggml_cann_create_tensor(tmp_cos_buffer, ggml_cann_type_mapping(dst->type), ggml_type_size(dst->type),
                                  tmp_mul_ne, tmp_mul_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
  
-    aclnn_cos(ctx, tmp_mul_tensor, tmp_cos_tensor);
+    aclnn_cos(ctx, tmp_mul_tensor.get(), tmp_cos_tensor.get());
  
      // sin
      ggml_cann_pool_alloc sin_allocator(ctx.pool(), mul_nelements * ggml_type_size(src->type));
      void *               tmp_sin_buffer = sin_allocator.get();
-    aclTensor *          tmp_sin_tensor =
+    acl_tensor_ptr       tmp_sin_tensor =
          ggml_cann_create_tensor(tmp_sin_buffer, ggml_cann_type_mapping(dst->type), ggml_type_size(dst->type),
                                  tmp_mul_ne, tmp_mul_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
  
-    aclnn_sin(ctx, tmp_mul_tensor, tmp_sin_tensor);
+    aclnn_sin(ctx, tmp_mul_tensor.get(), tmp_sin_tensor.get());
  
      // concat
-    int64_t         concat_dim  = 3;
-    aclTensor *     acl_dst     = ggml_cann_create_tensor(dst);
-    aclTensor *     tensors[]   = { tmp_cos_tensor, tmp_sin_tensor };
-    aclTensorList * tensor_list = aclCreateTensorList(tensors, 2);
-    aclnn_concat(ctx, tensor_list, acl_dst, concat_dim);
-
-    // release
-    // segmentation fault when delete both tensorList and his elements.
-    ggml_cann_release_resources(ctx, tensor_list, acl_src, tmp_arange_tensor, tmp_permute_tensor, tmp_mul_tensor,
-                                acl_dst);
+    int64_t             concat_dim  = 3;
+    acl_tensor_ptr      acl_dst     = ggml_cann_create_tensor(dst);
+    acl_tensor_list_ptr tensor_list = ggml_cann_create_tensor_list(tmp_cos_tensor, tmp_sin_tensor);
+    aclnn_concat(ctx, tensor_list.get(), acl_dst.get(), concat_dim);
  }
  
  /**
@@ -1520,15 +1477,14 @@ static void aclnn_get_slope_inner(ggml_backend_cann_context & ctx,
      ggml_cann_pool_alloc arange_allocator(ctx.pool(), size * type_size);
      void *               arange_buffer = arange_allocator.get();
  
-    aclTensor * arange_tensor = ggml_cann_create_tensor(arange_buffer, acl_type, type_size, ne, nb, 1);
-    aclnn_arange(ctx, arange_tensor, start, stop, step, size);
+    acl_tensor_ptr arange_tensor = ggml_cann_create_tensor(arange_buffer, acl_type, type_size, ne, nb, 1);
+    aclnn_arange(ctx, arange_tensor.get(), start, stop, step, size);
  
-    aclTensor * slope_tensor = ggml_cann_create_tensor(slope_buffer, acl_type, type_size, ne, nb, 1);
+    acl_tensor_ptr slope_tensor = ggml_cann_create_tensor(slope_buffer, acl_type, type_size, ne, nb, 1);
  
-    aclScalar * sc = aclCreateScalar(&m, aclDataType::ACL_FLOAT);
+    acl_scalar_ptr sc = ggml_cann_create_scalar(&m, aclDataType::ACL_FLOAT);
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, PowScalarTensor, sc, arange_tensor, slope_tensor);
-    ggml_cann_release_resources(ctx, sc, arange_tensor, slope_tensor);
+    GGML_CANN_CALL_ACLNN_OP(ctx, PowScalarTensor, sc.get(), arange_tensor.get(), slope_tensor.get());
  }
  
  /**
@@ -1649,13 +1605,13 @@ static void aclnn_add_alibi(ggml_backend_cann_context & ctx,
          slope_nb[i] = slope_nb[i - 1] * slope_ne[i - 1];
      }
  
-    aclTensor * acl_slope =
+    acl_tensor_ptr acl_slope =
          ggml_cann_create_tensor(slope_buffer, ACL_FLOAT, sizeof(float), slope_ne, slope_nb, GGML_MAX_DIMS + 2);
-    aclTensor * acl_mask = ggml_cann_create_tensor(mask, mask_ne, mask_nb, GGML_MAX_DIMS + 2);
+    acl_tensor_ptr acl_mask = ggml_cann_create_tensor(mask, mask_ne, mask_nb, GGML_MAX_DIMS + 2);
  
      // write data into dst_ptr using only the shape information of the dst tensor.
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst_ptr, ggml_cann_type_mapping(dst->type), ggml_type_size(dst->type),
-                                                  dst_ne, dst_nb, GGML_MAX_DIMS + 2);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst_ptr, ggml_cann_type_mapping(dst->type),
+                                                     ggml_type_size(dst->type), dst_ne, dst_nb, GGML_MAX_DIMS + 2);
  
      if (max_bias > 0.0f) {
          int64_t bias_ne[] = { mask->ne[0], dst->ne[1], mask->ne[2], nr2, mask->ne[3], 1 };
@@ -1664,16 +1620,14 @@ static void aclnn_add_alibi(ggml_backend_cann_context & ctx,
          for (int i = 1; i < GGML_MAX_DIMS + 2; i++) {
              bias_nb[i] = bias_nb[i - 1] * bias_ne[i - 1];
          }
-        aclTensor * bias_tensor =
+        acl_tensor_ptr bias_tensor =
              ggml_cann_create_tensor(bias_buffer, ACL_FLOAT, sizeof(float), bias_ne, bias_nb, GGML_MAX_DIMS + 2);
  
-        aclnn_mul(ctx, acl_slope, acl_mask, bias_tensor);
-        aclnn_add(ctx, acl_dst, bias_tensor);
-        ggml_cann_release_resources(ctx, bias_tensor);
+        aclnn_mul(ctx, acl_slope.get(), acl_mask.get(), bias_tensor.get());
+        aclnn_add(ctx, acl_dst.get(), bias_tensor.get());
      } else {
-        aclnn_add(ctx, acl_dst, acl_mask);
+        aclnn_add(ctx, acl_dst.get(), acl_mask.get());
      }
-    ggml_cann_release_resources(ctx, acl_slope, acl_mask, acl_dst);
  }
  
  void ggml_cann_cpy(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
@@ -1702,8 +1656,8 @@ void ggml_cann_softmax(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      ggml_tensor * src0 = dst->src[0];
      ggml_tensor * src1 = dst->src[1];  // mask
  
-    aclTensor * acl_src0 = ggml_cann_create_tensor(src0);
-    aclTensor * acl_dst  = ggml_cann_create_tensor(dst);
+    acl_tensor_ptr acl_src0 = ggml_cann_create_tensor(src0);
+    acl_tensor_ptr acl_dst  = ggml_cann_create_tensor(dst);
  
      float scale    = 1.0f;
      float max_bias = 0.0f;
@@ -1712,21 +1666,20 @@ void ggml_cann_softmax(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      memcpy(&max_bias, (float *) dst->op_params + 1, sizeof(float));
  
      // input mul scale
-    aclScalar *          acl_scale = aclCreateScalar(&scale, aclDataType::ACL_FLOAT);
+    acl_scalar_ptr       acl_scale = ggml_cann_create_scalar(&scale, aclDataType::ACL_FLOAT);
      ggml_cann_pool_alloc src_tensor_allocator(ctx.pool(), ggml_nbytes(src0));
      void *               src_tensor_buffer = src_tensor_allocator.get();
-    aclTensor *          softmax_tensor = ggml_cann_create_tensor(src_tensor_buffer, ggml_cann_type_mapping(src0->type),
+    acl_tensor_ptr       softmax_tensor = ggml_cann_create_tensor(src_tensor_buffer, ggml_cann_type_mapping(src0->type),
                                                                    ggml_element_size(src0), src0->ne, src0->nb, GGML_MAX_DIMS);
  
-    aclnn_muls(ctx, acl_src0, scale, softmax_tensor, false);
+    aclnn_muls(ctx, acl_src0.get(), scale, softmax_tensor.get(), false);
  
      // mask
      if (src1) {
          aclnn_add_alibi(ctx, src1, src0, src_tensor_buffer, max_bias);
      }
      // softmax
-    aclnn_softmax(ctx, softmax_tensor, 3, acl_dst);
-    ggml_cann_release_resources(ctx, acl_src0, acl_dst, acl_scale, softmax_tensor);
+    aclnn_softmax(ctx, softmax_tensor.get(), 3, acl_dst.get());
  }
  
  /**
@@ -1760,21 +1713,20 @@ static void aclnn_index_select_4d(ggml_backend_cann_context & ctx,
      for (int64_t i = 0; i < src_ne[3]; i++) {
          for (int64_t j = 0; j < src_ne[2]; j++) {
              // src
-            aclTensor * acl_src_tensor =
+            acl_tensor_ptr acl_src_tensor =
                  ggml_cann_create_tensor((char *) src_buffer + i * src_nb[3] + j * src_nb[2],
                                          ggml_cann_type_mapping(type), ggml_type_size(type), src_ne, src_nb, 2);
  
              // index
-            aclTensor * acl_index = ggml_cann_create_tensor(
+            acl_tensor_ptr acl_index = ggml_cann_create_tensor(
                  (char *) index->data + (i % index->ne[2]) * index->nb[2] + (j % index->ne[1]) * index->nb[1],
                  ggml_cann_type_mapping(index->type), ggml_element_size(index), index->ne, index->nb, 1);
  
              // out
-            aclTensor * acl_out =
+            acl_tensor_ptr acl_out =
                  ggml_cann_create_tensor((char *) dst_buffer + i * dst_nb[3] + j * dst_nb[2],
                                          ggml_cann_type_mapping(type), ggml_type_size(type), dst_ne, dst_nb, 2);
-            GGML_CANN_CALL_ACLNN_OP(ctx, IndexSelect, acl_src_tensor, 0, acl_index, acl_out);
-            ggml_cann_release_resources(ctx, acl_src_tensor, acl_index, acl_out);
+            GGML_CANN_CALL_ACLNN_OP(ctx, IndexSelect, acl_src_tensor.get(), 0, acl_index.get(), acl_out.get());
          }
      }
  }
@@ -1811,21 +1763,20 @@ static void aclnn_index_copy_4d(ggml_backend_cann_context & ctx,
      for (int64_t i = 0; i < src_ne[3]; i++) {
          for (int64_t j = 0; j < src_ne[2]; j++) {
              // src
-            aclTensor * acl_src_tensor =
+            acl_tensor_ptr acl_src_tensor =
                  ggml_cann_create_tensor((char *) src_buffer + i * src_nb[3] + j * src_nb[2],
                                          ggml_cann_type_mapping(type), ggml_type_size(type), src_ne, src_nb, 2);
  
              // index
-            aclTensor * acl_index = ggml_cann_create_tensor(
+            acl_tensor_ptr acl_index = ggml_cann_create_tensor(
                  (char *) index->data + (i % index->ne[2]) * index->nb[2] + (j % index->ne[1]) * index->nb[1],
                  ggml_cann_type_mapping(index->type), ggml_element_size(index), index->ne, index->nb, 1);
  
              // out
-            aclTensor * acl_out =
+            acl_tensor_ptr acl_out =
                  ggml_cann_create_tensor((char *) dst_buffer + i * dst_nb[3] + j * dst_nb[2],
                                          ggml_cann_type_mapping(type), ggml_type_size(type), dst_ne, dst_nb, 2);
-            GGML_CANN_CALL_ACLNN_OP(ctx, InplaceIndexCopy, acl_out, 0, acl_index, acl_src_tensor);
-            ggml_cann_release_resources(ctx, acl_src_tensor, acl_index, acl_out);
+            GGML_CANN_CALL_ACLNN_OP(ctx, InplaceIndexCopy, acl_out.get(), 0, acl_index.get(), acl_src_tensor.get());
          }
      }
  }
@@ -1843,7 +1794,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
                  aclnn_index_select_4d(ctx, src0->data, src0->ne, src0->nb, dst->data, dst->ne, dst->nb, src1,
                                        dst->type);
              } else {
-                aclTensor *          acl_src0 = ggml_cann_create_tensor(src0);
+                acl_tensor_ptr       acl_src0 = ggml_cann_create_tensor(src0);
                  ggml_cann_pool_alloc src_buffer_allocator(ctx.pool(), ggml_nelements(src0) * ggml_element_size(dst));
                  void *               src_trans_buffer = src_buffer_allocator.get();
                  size_t               src_trans_nb[GGML_MAX_DIMS];
@@ -1851,13 +1802,12 @@ void ggml_cann_get_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
                  for (int i = 1; i < GGML_MAX_DIMS; i++) {
                      src_trans_nb[i] = src_trans_nb[i - 1] * src0->ne[i - 1];
                  }
-                aclTensor * src_trans_tensor =
+                acl_tensor_ptr src_trans_tensor =
                      ggml_cann_create_tensor(src_trans_buffer, ggml_cann_type_mapping(dst->type),
                                              ggml_type_size(dst->type), src0->ne, src_trans_nb, GGML_MAX_DIMS);
-                aclnn_cast(ctx, acl_src0, src_trans_tensor, ggml_cann_type_mapping(dst->type));
+                aclnn_cast(ctx, acl_src0.get(), src_trans_tensor.get(), ggml_cann_type_mapping(dst->type));
                  aclnn_index_select_4d(ctx, src_trans_buffer, src0->ne, src_trans_nb, dst->data, dst->ne, dst->nb, src1,
                                        dst->type);
-                ggml_cann_release_resources(ctx, acl_src0, src_trans_tensor);
              }
              break;
          case GGML_TYPE_Q8_0:
@@ -1893,15 +1843,15 @@ void ggml_cann_get_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
                  scale_offset = ggml_nelements(src0) * sizeof(int8_t);
                  ggml_cann_pool_alloc dequant_buffer_allocator(ctx.pool(),
                                                                ggml_nelements(src0) * ggml_type_size(dst->type));
-                aclTensor * acl_weight_tensor = ggml_cann_create_tensor(src0->data, ACL_INT8, sizeof(int8_t), weight_ne,
-                                                                        weight_nb, GGML_MAX_DIMS + 1);
-                aclTensor * acl_scale_tensor =
+                acl_tensor_ptr       acl_weight_tensor = ggml_cann_create_tensor(src0->data, ACL_INT8, sizeof(int8_t),
+                                                                                 weight_ne, weight_nb, GGML_MAX_DIMS + 1);
+                acl_tensor_ptr       acl_scale_tensor =
                      ggml_cann_create_tensor(src0->data, ACL_FLOAT16, sizeof(uint16_t), scale_ne, scale_nb,
                                              GGML_MAX_DIMS + 1, ACL_FORMAT_ND, scale_offset);
-                aclTensor * dequant_tensor =
+                acl_tensor_ptr dequant_tensor =
                      ggml_cann_create_tensor(dequant_buffer_allocator.get(), ggml_cann_type_mapping(dst->type),
                                              ggml_type_size(dst->type), dequant_ne, dequant_nb, GGML_MAX_DIMS + 1);
-                aclnn_mul(ctx, acl_weight_tensor, acl_scale_tensor, dequant_tensor);
+                aclnn_mul(ctx, acl_weight_tensor.get(), acl_scale_tensor.get(), dequant_tensor.get());
                  dequant_nb[0] = ggml_type_size(dst->type);
                  dequant_ne    = src0->ne;
                  for (int i = 1; i < GGML_MAX_DIMS; i++) {
@@ -1909,8 +1859,6 @@ void ggml_cann_get_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
                  }
                  aclnn_index_select_4d(ctx, dequant_buffer_allocator.get(), dequant_ne, dequant_nb, dst->data, dst->ne,
                                        dst->nb, src1, dst->type);
-
-                ggml_cann_release_resources(ctx, acl_weight_tensor, acl_scale_tensor, dequant_tensor);
                  break;
              }
          default:
@@ -1931,7 +1879,7 @@ void ggml_cann_set_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
              }
          case GGML_TYPE_F16:
              {
-                aclTensor *          acl_src0 = ggml_cann_create_tensor(src0);
+                acl_tensor_ptr       acl_src0 = ggml_cann_create_tensor(src0);
                  ggml_cann_pool_alloc src_buffer_allocator(ctx.pool(), ggml_nelements(src0) * sizeof(uint16_t));
                  void *               src_trans_buffer = src_buffer_allocator.get();
                  size_t               src_trans_nb[GGML_MAX_DIMS];
@@ -1939,12 +1887,11 @@ void ggml_cann_set_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
                  for (int i = 1; i < GGML_MAX_DIMS; i++) {
                      src_trans_nb[i] = src_trans_nb[i - 1] * src0->ne[i - 1];
                  }
-                aclTensor * src_trans_tensor = ggml_cann_create_tensor(
+                acl_tensor_ptr src_trans_tensor = ggml_cann_create_tensor(
                      src_trans_buffer, ACL_FLOAT16, ggml_type_size(dst->type), src0->ne, src_trans_nb, GGML_MAX_DIMS);
-                aclnn_cast(ctx, acl_src0, src_trans_tensor, ggml_cann_type_mapping(dst->type));
+                aclnn_cast(ctx, acl_src0.get(), src_trans_tensor.get(), ggml_cann_type_mapping(dst->type));
                  aclnn_index_copy_4d(ctx, src_trans_buffer, src0->ne, src_trans_nb, dst->data, dst->ne, dst->nb, src1,
                                      dst->type);
-                ggml_cann_release_resources(ctx, acl_src0, src_trans_tensor);
                  break;
              }
          default:
@@ -2006,12 +1953,12 @@ static void ggml_cann_mat_mul_fp(ggml_backend_cann_context & ctx, ggml_tensor *
          }
      }
  
-    aclTensor * acl_input_tensor = ggml_cann_create_tensor(input, bcast_input_ne, bcast_input_nb, n_dims);
-    int64_t     transpose_ne[]   = { bcast_weight_ne[1], bcast_weight_ne[0], bcast_weight_ne[2],
-                                     bcast_weight_ne[3], bcast_weight_ne[4], bcast_weight_ne[5] };
-    size_t      transpose_nb[]   = { bcast_weight_nb[1], bcast_weight_nb[0], bcast_weight_nb[2],
-                                     bcast_weight_nb[3], bcast_weight_nb[4], bcast_weight_nb[5] };
-    aclTensor * acl_weight_tensor;
+    acl_tensor_ptr acl_input_tensor = ggml_cann_create_tensor(input, bcast_input_ne, bcast_input_nb, n_dims);
+    int64_t        transpose_ne[]   = { bcast_weight_ne[1], bcast_weight_ne[0], bcast_weight_ne[2],
+                                        bcast_weight_ne[3], bcast_weight_ne[4], bcast_weight_ne[5] };
+    size_t         transpose_nb[]   = { bcast_weight_nb[1], bcast_weight_nb[0], bcast_weight_nb[2],
+                                        bcast_weight_nb[3], bcast_weight_nb[4], bcast_weight_nb[5] };
+    acl_tensor_ptr acl_weight_tensor;
  
      // Only check env once.
      static bool weight_to_nz = parse_bool(get_env("GGML_CANN_WEIGHT_NZ").value_or("on"));
@@ -2020,23 +1967,22 @@ static void ggml_cann_mat_mul_fp(ggml_backend_cann_context & ctx, ggml_tensor *
      } else {
          acl_weight_tensor = ggml_cann_create_tensor(weight, transpose_ne, transpose_nb, n_dims, ACL_FORMAT_ND);
      }
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst, bcast_dst_ne, bcast_dst_nb, n_dims);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst, bcast_dst_ne, bcast_dst_nb, n_dims);
  
      switch (n_dims) {
          case 2:
-            GGML_CANN_CALL_ACLNN_OP(ctx, Mm, acl_input_tensor, acl_weight_tensor, acl_dst, 2);
+            GGML_CANN_CALL_ACLNN_OP(ctx, Mm, acl_input_tensor.get(), acl_weight_tensor.get(), acl_dst.get(), 2);
              break;
          case 3:
-            GGML_CANN_CALL_ACLNN_OP(ctx, BatchMatMul, acl_input_tensor, acl_weight_tensor, acl_dst, 2);
+            GGML_CANN_CALL_ACLNN_OP(ctx, BatchMatMul, acl_input_tensor.get(), acl_weight_tensor.get(), acl_dst.get(),
+                                    2);
              break;
          default:
              // ALLOW_FP32_DOWN_PRECISION, when input is
              // fp32, atlas a2 will transpose it to HFLOAT32.
-            GGML_CANN_CALL_ACLNN_OP(ctx, Matmul, acl_input_tensor, acl_weight_tensor, acl_dst, 1);
+            GGML_CANN_CALL_ACLNN_OP(ctx, Matmul, acl_input_tensor.get(), acl_weight_tensor.get(), acl_dst.get(), 1);
              break;
      }
-
-    ggml_cann_release_resources(ctx, acl_weight_tensor, acl_input_tensor, acl_dst);
  }
  
  /**
@@ -2088,8 +2034,8 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context & ctx, ggml_tensor
  
      // case in
      if (src1->type != GGML_TYPE_F16) {
-        aclTensor * acl_src1_tensor = ggml_cann_create_tensor(src1);
-        input_buffer                = input_alloctor.alloc(ggml_nelements(src1) * input_elem_size);
+        acl_tensor_ptr acl_src1_tensor = ggml_cann_create_tensor(src1);
+        input_buffer                   = input_alloctor.alloc(ggml_nelements(src1) * input_elem_size);
  
          int64_t * input_cast_ne = src1->ne;
          size_t    input_cast_nb[GGML_MAX_DIMS];
@@ -2098,10 +2044,9 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context & ctx, ggml_tensor
              input_cast_nb[i] = input_cast_nb[i - 1] * input_cast_ne[i - 1];
          }
  
-        aclTensor * acl_input_tensor = ggml_cann_create_tensor(input_buffer, ACL_FLOAT16, input_elem_size,
-                                                               input_cast_ne, input_cast_nb, GGML_MAX_DIMS);
-        aclnn_cast(ctx, acl_src1_tensor, acl_input_tensor, ACL_FLOAT16);
-        ggml_cann_release_resources(ctx, acl_input_tensor, acl_src1_tensor);
+        acl_tensor_ptr acl_input_tensor = ggml_cann_create_tensor(input_buffer, ACL_FLOAT16, input_elem_size,
+                                                                  input_cast_ne, input_cast_nb, GGML_MAX_DIMS);
+        aclnn_cast(ctx, acl_src1_tensor.get(), acl_input_tensor.get(), ACL_FLOAT16);
      }
  
      // output
@@ -2123,8 +2068,8 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context & ctx, ggml_tensor
              int64_t batch1 = (n1 * src1->ne[2]) + c1;
              int64_t batch0 = (n0 * src0->ne[2]) + c0;
  
-            aclTensor * acl_input_tensor = ggml_cann_create_tensor((char *) input_buffer + batch1 * input_stride,
-                                                                   ACL_FLOAT16, input_elem_size, input_ne, input_nb, 2);
+            acl_tensor_ptr acl_input_tensor = ggml_cann_create_tensor(
+                (char *) input_buffer + batch1 * input_stride, ACL_FLOAT16, input_elem_size, input_ne, input_nb, 2);
  
              // first split
              int64_t weight_ne_offset = 0;
@@ -2134,23 +2079,22 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context & ctx, ggml_tensor
              int64_t output_ne_offset = 0;
              int64_t output_ne[2]     = { weight_ne[0], dst->ne[1] };
  
-            aclTensor * acl_weight_tensor =
+            acl_tensor_ptr acl_weight_tensor =
                  ggml_cann_create_tensor((char *) src0->data + batch0 * weight_stride, ggml_cann_type_mapping(type),
                                          weight_elem_size, weight_ne, weight_nb, 2, ACL_FORMAT_ND, weight_ne_offset);
-            aclTensor * acl_scale_tensor =
+            acl_tensor_ptr acl_scale_tensor =
                  ggml_cann_create_tensor(scale_offset + batch0 * scale_stride, ACL_FLOAT16, scale_elem_size, scale_ne,
                                          scale_nb, 2, ACL_FORMAT_ND, scale_ne_offset);
-            aclTensor * acl_output_tensor =
+            acl_tensor_ptr acl_output_tensor =
                  ggml_cann_create_tensor((char *) output_buffer + batch1 * output_stride, ACL_FLOAT16, output_elem_size,
                                          output_ne, output_nb, 2, ACL_FORMAT_ND, output_ne_offset);
              int64_t antiquantGroupSize = 0;
              if (src0->ne[0] > QK8_0) {
                  antiquantGroupSize = QK8_0;
              }
-            GGML_CANN_CALL_ACLNN_OP(ctx, WeightQuantBatchMatmulV2, acl_input_tensor, acl_weight_tensor,
-                                    acl_scale_tensor, nullptr, nullptr, nullptr, nullptr, antiquantGroupSize,
-                                    acl_output_tensor);
-            ggml_cann_release_resources(ctx, acl_weight_tensor, acl_scale_tensor, acl_output_tensor);
+            GGML_CANN_CALL_ACLNN_OP(ctx, WeightQuantBatchMatmulV2, acl_input_tensor.get(), acl_weight_tensor.get(),
+                                    acl_scale_tensor.get(), nullptr, nullptr, nullptr, nullptr, antiquantGroupSize,
+                                    acl_output_tensor.get());
  
              // other splits
              for (int64_t split = 1; split < split_size; split++) {
@@ -2171,13 +2115,10 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context & ctx, ggml_tensor
                  acl_output_tensor =
                      ggml_cann_create_tensor((char *) output_buffer + batch1 * output_stride, ACL_FLOAT16,
                                              output_elem_size, output_ne, output_nb, 2, ACL_FORMAT_ND, output_ne_offset);
-                GGML_CANN_CALL_ACLNN_OP(ctx, WeightQuantBatchMatmulV2, acl_input_tensor, acl_weight_tensor,
-                                        acl_scale_tensor, nullptr, nullptr, nullptr, nullptr, antiquantGroupSize,
-                                        acl_output_tensor);
-                ggml_cann_release_resources(ctx, acl_weight_tensor, acl_scale_tensor, acl_output_tensor);
+                GGML_CANN_CALL_ACLNN_OP(ctx, WeightQuantBatchMatmulV2, acl_input_tensor.get(), acl_weight_tensor.get(),
+                                        acl_scale_tensor.get(), nullptr, nullptr, nullptr, nullptr, antiquantGroupSize,
+                                        acl_output_tensor.get());
              }
-
-            ggml_cann_release_resources(ctx, acl_input_tensor);
          }
      }
  
@@ -2190,12 +2131,10 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context & ctx, ggml_tensor
              output_cast_nb[i] = output_cast_nb[i - 1] * output_cast_ne[i - 1];
          }
  
-        aclTensor * acl_output_tensor = ggml_cann_create_tensor(output_buffer, ACL_FLOAT16, output_elem_size,
-                                                                output_cast_ne, output_cast_nb, GGML_MAX_DIMS);
-        aclTensor * acl_dst_tensor    = ggml_cann_create_tensor(dst);
-        aclnn_cast(ctx, acl_output_tensor, acl_dst_tensor, ggml_cann_type_mapping(dst->type));
-
-        ggml_cann_release_resources(ctx, acl_output_tensor, acl_dst_tensor);
+        acl_tensor_ptr acl_output_tensor = ggml_cann_create_tensor(output_buffer, ACL_FLOAT16, output_elem_size,
+                                                                   output_cast_ne, output_cast_nb, GGML_MAX_DIMS);
+        acl_tensor_ptr acl_dst_tensor    = ggml_cann_create_tensor(dst);
+        aclnn_cast(ctx, acl_output_tensor.get(), acl_dst_tensor.get(), ggml_cann_type_mapping(dst->type));
      }
  }
  
@@ -2237,10 +2176,9 @@ static void aclnn_roll(ggml_backend_cann_context & ctx,
                         aclTensor *                 acl_dst,
                         int64_t *                   shifts,
                         int64_t *                   dims) {
-    aclIntArray * acl_shifts = aclCreateIntArray(shifts, 1);
-    aclIntArray * acl_dims   = aclCreateIntArray(dims, 1);
-    GGML_CANN_CALL_ACLNN_OP(ctx, Roll, acl_src, acl_shifts, acl_dims, acl_dst);
-    ggml_cann_release_resources(ctx, acl_shifts, acl_dims);
+    acl_int_array_ptr acl_shifts = ggml_cann_create_int_array(shifts, 1);
+    acl_int_array_ptr acl_dims   = ggml_cann_create_int_array(dims, 1);
+    GGML_CANN_CALL_ACLNN_OP(ctx, Roll, acl_src, acl_shifts.get(), acl_dims.get(), acl_dst);
  }
  
  /**
@@ -2262,10 +2200,9 @@ static void aclnn_index_fill_tensor(ggml_backend_cann_context & ctx,
                                      int64_t *                   index,
                                      int64_t                     index_num,
                                      float                       value) {
-    aclIntArray * acl_index = aclCreateIntArray(index, index_num);
-    aclScalar *   acl_value = aclCreateScalar(&value, aclDataType::ACL_FLOAT);
-    GGML_CANN_CALL_ACLNN_OP(ctx, InplaceIndexFillTensor, acl_src, dim, acl_index, acl_value);
-    ggml_cann_release_resources(ctx, acl_index, acl_value);
+    acl_int_array_ptr acl_index = ggml_cann_create_int_array(index, index_num);
+    acl_scalar_ptr    acl_value = ggml_cann_create_scalar(&value, aclDataType::ACL_FLOAT);
+    GGML_CANN_CALL_ACLNN_OP(ctx, InplaceIndexFillTensor, acl_src, dim, acl_index.get(), acl_value.get());
  }
  
  /**
@@ -2334,7 +2271,7 @@ static void aclnn_cache_init(ggml_backend_cann_context & ctx,
      }
  
      // theta_scale arange, [0,1,...,ne00/2 - 1]
-    aclTensor * acl_theta_scale_tensor = nullptr;
+    acl_tensor_ptr acl_theta_scale_tensor;
      // cache theta scale
      if (ctx.rope_cache.theta_scale_length != theta_scale_length ||
          // theta_scale and freq_scale should not change during the current token inference process,
@@ -2355,32 +2292,33 @@ static void aclnn_cache_init(ggml_backend_cann_context & ctx,
          float step       = 1;
          float stop       = theta_scale_length;
          float n_elements = theta_scale_length;
-        aclnn_arange(ctx, acl_theta_scale_tensor, start, stop, step, n_elements);
+        aclnn_arange(ctx, acl_theta_scale_tensor.get(), start, stop, step, n_elements);
  
          ggml_cann_pool_alloc yarn_ramp_allocator(ctx.pool());
-        aclTensor *          acl_yarn_ramp_tensor = nullptr;
+        acl_tensor_ptr       acl_yarn_ramp_tensor;
          if (ext_factor != 0) {
              // -rope_yarn_ramp
              // const float y = (i0 / 2 - low) / MAX(0.001f, high - low);
              // return MIN(1, MAX(0, y)) - 1;
              yarn_ramp_allocator.alloc(theta_scale_length * sizeof(float));
              void * yarn_ramp_buffer = yarn_ramp_allocator.get();
-            acl_yarn_ramp_tensor   = ggml_cann_create_tensor(yarn_ramp_buffer, ACL_FLOAT, sizeof(float), theta_scale_ne,
-                                                             theta_scale_nb, 1);
-            float       zero_value = 0, one_value = 1;
-            float       denom_safe_value = MAX(0.001f, corr_dims[1] - corr_dims[0]);
-            aclScalar * low              = aclCreateScalar(&corr_dims[0], aclDataType::ACL_FLOAT);
-            aclScalar * zero             = aclCreateScalar(&zero_value, aclDataType::ACL_FLOAT);
-            aclScalar * one              = aclCreateScalar(&one_value, aclDataType::ACL_FLOAT);
-            aclScalar * denom_safe       = aclCreateScalar(&denom_safe_value, aclDataType::ACL_FLOAT);
-            aclScalar * ext_factor_sc    = aclCreateScalar(&ext_factor, aclDataType::ACL_FLOAT);
-
-            GGML_CANN_CALL_ACLNN_OP(ctx, Subs, acl_theta_scale_tensor, low, one, acl_yarn_ramp_tensor);
-            GGML_CANN_CALL_ACLNN_OP(ctx, InplaceDivs, acl_yarn_ramp_tensor, denom_safe);
-            GGML_CANN_CALL_ACLNN_OP(ctx, InplaceThreshold, acl_yarn_ramp_tensor, zero, zero);
-            GGML_CANN_CALL_ACLNN_OP(ctx, InplaceClampMax, acl_yarn_ramp_tensor, one);
-            GGML_CANN_CALL_ACLNN_OP(ctx, InplaceSubs, acl_yarn_ramp_tensor, one, one);
-            GGML_CANN_CALL_ACLNN_OP(ctx, InplaceMuls, acl_yarn_ramp_tensor, ext_factor_sc);
+            acl_yarn_ramp_tensor =
+                ggml_cann_create_tensor(yarn_ramp_buffer, ACL_FLOAT, sizeof(float), theta_scale_ne, theta_scale_nb, 1);
+            float          zero_value = 0, one_value = 1;
+            float          denom_safe_value = MAX(0.001f, corr_dims[1] - corr_dims[0]);
+            acl_scalar_ptr low              = ggml_cann_create_scalar(&corr_dims[0], aclDataType::ACL_FLOAT);
+            acl_scalar_ptr zero             = ggml_cann_create_scalar(&zero_value, aclDataType::ACL_FLOAT);
+            acl_scalar_ptr one              = ggml_cann_create_scalar(&one_value, aclDataType::ACL_FLOAT);
+            acl_scalar_ptr denom_safe       = ggml_cann_create_scalar(&denom_safe_value, aclDataType::ACL_FLOAT);
+            acl_scalar_ptr ext_factor_sc    = ggml_cann_create_scalar(&ext_factor, aclDataType::ACL_FLOAT);
+
+            GGML_CANN_CALL_ACLNN_OP(ctx, Subs, acl_theta_scale_tensor.get(), low.get(), one.get(),
+                                    acl_yarn_ramp_tensor.get());
+            GGML_CANN_CALL_ACLNN_OP(ctx, InplaceDivs, acl_yarn_ramp_tensor.get(), denom_safe.get());
+            GGML_CANN_CALL_ACLNN_OP(ctx, InplaceThreshold, acl_yarn_ramp_tensor.get(), zero.get(), zero.get());
+            GGML_CANN_CALL_ACLNN_OP(ctx, InplaceClampMax, acl_yarn_ramp_tensor.get(), one.get());
+            GGML_CANN_CALL_ACLNN_OP(ctx, InplaceSubs, acl_yarn_ramp_tensor.get(), one.get(), one.get());
+            GGML_CANN_CALL_ACLNN_OP(ctx, InplaceMuls, acl_yarn_ramp_tensor.get(), ext_factor_sc.get());
  
              // theta_interp = freq_scale * theta_extrap;
              // theta = theta_interp * (1 - ramp_mix) + theta_extrap * ramp_mix;
@@ -2390,26 +2328,23 @@ static void aclnn_cache_init(ggml_backend_cann_context & ctx,
              //
              // we cache (freq_scale - freq_scale * ramp_mix + ramp_mix), Considering that the rope_yarn_ramp here is the inverse
              // cache freq_scale + (freq_scale - 1) * ramp_mix
-            float       freq_scale_1    = freq_scale - 1;
-            aclScalar * freq_scale_sc   = aclCreateScalar(&freq_scale, aclDataType::ACL_FLOAT);
-            aclScalar * freq_scale_1_sc = aclCreateScalar(&freq_scale_1, aclDataType::ACL_FLOAT);
-            GGML_CANN_CALL_ACLNN_OP(ctx, InplaceMuls, acl_yarn_ramp_tensor, freq_scale_1_sc);
-            GGML_CANN_CALL_ACLNN_OP(ctx, InplaceAdds, acl_yarn_ramp_tensor, freq_scale_sc, one);
-
-            ggml_cann_release_resources(ctx, low, zero, one, denom_safe, ext_factor_sc, freq_scale_sc, freq_scale_1_sc);
+            float          freq_scale_1    = freq_scale - 1;
+            acl_scalar_ptr freq_scale_sc   = ggml_cann_create_scalar(&freq_scale, aclDataType::ACL_FLOAT);
+            acl_scalar_ptr freq_scale_1_sc = ggml_cann_create_scalar(&freq_scale_1, aclDataType::ACL_FLOAT);
+            GGML_CANN_CALL_ACLNN_OP(ctx, InplaceMuls, acl_yarn_ramp_tensor.get(), freq_scale_1_sc.get());
+            GGML_CANN_CALL_ACLNN_OP(ctx, InplaceAdds, acl_yarn_ramp_tensor.get(), freq_scale_sc.get(), one.get());
          }
  
          // power
-        aclScalar * acl_theta_scale = aclCreateScalar(&theta_scale, aclDataType::ACL_FLOAT);
-        GGML_CANN_CALL_ACLNN_OP(ctx, PowScalarTensor, acl_theta_scale, acl_theta_scale_tensor, acl_theta_scale_tensor);
+        acl_scalar_ptr acl_theta_scale = ggml_cann_create_scalar(&theta_scale, aclDataType::ACL_FLOAT);
+        GGML_CANN_CALL_ACLNN_OP(ctx, PowScalarTensor, acl_theta_scale.get(), acl_theta_scale_tensor.get(),
+                                acl_theta_scale_tensor.get());
  
          if (ext_factor != 0) {
-            aclnn_mul(ctx, acl_theta_scale_tensor, acl_yarn_ramp_tensor);
+            aclnn_mul(ctx, acl_theta_scale_tensor.get(), acl_yarn_ramp_tensor.get());
          } else if (freq_scale != 1) {
-            aclnn_muls(ctx, acl_theta_scale_tensor, freq_scale, nullptr, true);
+            aclnn_muls(ctx, acl_theta_scale_tensor.get(), freq_scale, nullptr, true);
          }
-
-        ggml_cann_release_resources(ctx, acl_yarn_ramp_tensor, acl_theta_scale);
      } else {
          // use cache
          acl_theta_scale_tensor = ggml_cann_create_tensor(ctx.rope_cache.theta_scale_cache, ACL_FLOAT, sizeof(float),
@@ -2420,15 +2355,14 @@ static void aclnn_cache_init(ggml_backend_cann_context & ctx,
      // freq_factors
      if (src2) {
          freq_fac_res_allocator.alloc(theta_scale_length * sizeof(float));
-        void *      freq_fac_res_ptr = freq_fac_res_allocator.get();
-        aclTensor * acl_freq_factors_tensor =
+        void *         freq_fac_res_ptr = freq_fac_res_allocator.get();
+        acl_tensor_ptr acl_freq_factors_tensor =
              ggml_cann_create_tensor(src2->data, ggml_cann_type_mapping(src2->type), ggml_type_size(src2->type),
                                      theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
-        aclTensor * acl_freq_fac_res_tensor = ggml_cann_create_tensor(freq_fac_res_ptr, ACL_FLOAT, sizeof(float),
-                                                                      theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
-        aclnn_div(ctx, acl_theta_scale_tensor, acl_freq_factors_tensor, acl_freq_fac_res_tensor);
+        acl_tensor_ptr acl_freq_fac_res_tensor = ggml_cann_create_tensor(freq_fac_res_ptr, ACL_FLOAT, sizeof(float),
+                                                                         theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
+        aclnn_div(ctx, acl_theta_scale_tensor.get(), acl_freq_factors_tensor.get(), acl_freq_fac_res_tensor.get());
          std::swap(acl_theta_scale_tensor, acl_freq_fac_res_tensor);
-        ggml_cann_release_resources(ctx, acl_freq_factors_tensor, acl_freq_fac_res_tensor);
      }
  
      // init sin_repeat && cos_repeat, only to accelerate first layer on each device
@@ -2448,7 +2382,7 @@ static void aclnn_cache_init(ggml_backend_cann_context & ctx,
      }
  
      // position
-    aclTensor * acl_position_tensor =
+    acl_tensor_ptr acl_position_tensor =
          ggml_cann_create_tensor(src1->data, ggml_cann_type_mapping(src1->type), ggml_type_size(src1->type), position_ne,
                                  position_nb, GGML_MAX_DIMS);
  
@@ -2457,22 +2391,22 @@ static void aclnn_cache_init(ggml_backend_cann_context & ctx,
      ggml_cann_pool_alloc theta_allocator(ctx.pool(), theta_length * sizeof(float));
      void *               theta_buffer = theta_allocator.get();
  
-    aclTensor * acl_theta_tensor =
+    acl_tensor_ptr acl_theta_tensor =
          ggml_cann_create_tensor(theta_buffer, ACL_FLOAT, sizeof(float), theta_ne, theta_nb, GGML_MAX_DIMS);
-    aclnn_mul(ctx, acl_position_tensor, acl_theta_scale_tensor, acl_theta_tensor);
+    aclnn_mul(ctx, acl_position_tensor.get(), acl_theta_scale_tensor.get(), acl_theta_tensor.get());
  
      // sin/cos
      ggml_cann_pool_alloc sin_allocator(ctx.pool(), theta_length * sizeof(float));
      void *               sin_buffer = sin_allocator.get();
-    aclTensor *          acl_sin_tensor =
+    acl_tensor_ptr       acl_sin_tensor =
          ggml_cann_create_tensor(sin_buffer, ACL_FLOAT, sizeof(float), theta_ne, theta_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
-    aclnn_sin(ctx, acl_theta_tensor, acl_sin_tensor);
+    aclnn_sin(ctx, acl_theta_tensor.get(), acl_sin_tensor.get());
  
      ggml_cann_pool_alloc cos_allocator(ctx.pool(), theta_length * sizeof(float));
      void *               cos_buffer = cos_allocator.get();
-    aclTensor *          acl_cos_tensor =
+    acl_tensor_ptr       acl_cos_tensor =
          ggml_cann_create_tensor(cos_buffer, ACL_FLOAT, sizeof(float), theta_ne, theta_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
-    aclnn_cos(ctx, acl_theta_tensor, acl_cos_tensor);
+    aclnn_cos(ctx, acl_theta_tensor.get(), acl_cos_tensor.get());
  
      if (ext_factor != 0) {
          attn_factor *= 1.0f + 0.1f * logf(1.0f / freq_scale);
@@ -2480,8 +2414,8 @@ static void aclnn_cache_init(ggml_backend_cann_context & ctx,
  
      // attn_factor
      if (attn_factor != 1) {
-        aclnn_muls(ctx, acl_sin_tensor, attn_factor, nullptr, true);
-        aclnn_muls(ctx, acl_cos_tensor, attn_factor, nullptr, true);
+        aclnn_muls(ctx, acl_sin_tensor.get(), attn_factor, nullptr, true);
+        aclnn_muls(ctx, acl_cos_tensor.get(), attn_factor, nullptr, true);
      }
  
      int64_t sin_reshape_ne[4] = { src0->ne[0], 1, src0->ne[2], 1 };
@@ -2490,22 +2424,22 @@ static void aclnn_cache_init(ggml_backend_cann_context & ctx,
      for (int i = 1; i < GGML_MAX_DIMS; i++) {
          sin_reshape_nb[i] = sin_reshape_nb[i - 1] * sin_reshape_ne[i - 1];
      }
-    aclTensor * acl_sin_repeat_tensor = ggml_cann_create_tensor(ctx.rope_cache.sin_cache, ACL_FLOAT, sizeof(float),
-                                                                sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
-    aclTensor * acl_cos_repeat_tensor = ggml_cann_create_tensor(ctx.rope_cache.cos_cache, ACL_FLOAT, sizeof(float),
-                                                                sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
+    acl_tensor_ptr acl_sin_repeat_tensor = ggml_cann_create_tensor(ctx.rope_cache.sin_cache, ACL_FLOAT, sizeof(float),
+                                                                   sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
+    acl_tensor_ptr acl_cos_repeat_tensor = ggml_cann_create_tensor(ctx.rope_cache.cos_cache, ACL_FLOAT, sizeof(float),
+                                                                   sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
  
      // repeat
      if (is_neox) {
          int64_t repeatsArray[] = { 1, 1, 1, 2 };
-        aclnn_repeat(ctx, acl_sin_tensor, acl_sin_repeat_tensor, repeatsArray);
-        aclnn_repeat(ctx, acl_cos_tensor, acl_cos_repeat_tensor, repeatsArray);
+        aclnn_repeat(ctx, acl_sin_tensor.get(), acl_sin_repeat_tensor.get(), repeatsArray);
+        aclnn_repeat(ctx, acl_cos_tensor.get(), acl_cos_repeat_tensor.get(), repeatsArray);
      } else {
          int64_t num_repeats = 2;
          int64_t dim         = 3;
          int64_t output_size = theta_scale_length * num_repeats;
-        aclnn_repeat_interleave(ctx, acl_sin_tensor, acl_sin_repeat_tensor, dim, num_repeats, output_size);
-        aclnn_repeat_interleave(ctx, acl_cos_tensor, acl_cos_repeat_tensor, dim, num_repeats, output_size);
+        aclnn_repeat_interleave(ctx, acl_sin_tensor.get(), acl_sin_repeat_tensor.get(), dim, num_repeats, output_size);
+        aclnn_repeat_interleave(ctx, acl_cos_tensor.get(), acl_cos_repeat_tensor.get(), dim, num_repeats, output_size);
      }
  
      // Other layers use cache except first layer.
@@ -2515,9 +2449,6 @@ static void aclnn_cache_init(ggml_backend_cann_context & ctx,
      ctx.rope_cache.freq_scale  = freq_scale;
      ctx.rope_cache.attn_factor = attn_factor;
      ctx.rope_cache.is_neox     = is_neox;
-
-    ggml_cann_release_resources(ctx, acl_theta_scale_tensor, acl_position_tensor, acl_theta_tensor, acl_sin_tensor,
-                                acl_sin_repeat_tensor, acl_cos_tensor, acl_cos_repeat_tensor);
  }
  
  #ifdef __cplusplus
@@ -2578,20 +2509,20 @@ void ggml_cann_rope(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      for (int i = 1; i < GGML_MAX_DIMS; i++) {
          sin_reshape_nb[i] = sin_reshape_nb[i - 1] * sin_reshape_ne[i - 1];
      }
-    aclTensor * acl_sin_reshape_tensor = ggml_cann_create_tensor(ctx.rope_cache.sin_cache, ACL_FLOAT, sizeof(float),
-                                                                 sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
-    aclTensor * acl_cos_reshape_tensor = ggml_cann_create_tensor(ctx.rope_cache.cos_cache, ACL_FLOAT, sizeof(float),
-                                                                 sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
+    acl_tensor_ptr acl_sin_reshape_tensor = ggml_cann_create_tensor(ctx.rope_cache.sin_cache, ACL_FLOAT, sizeof(float),
+                                                                    sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
+    acl_tensor_ptr acl_cos_reshape_tensor = ggml_cann_create_tensor(ctx.rope_cache.cos_cache, ACL_FLOAT, sizeof(float),
+                                                                    sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
  
-    aclTensor * acl_src = ggml_cann_create_tensor(src0);
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst);
+    acl_tensor_ptr acl_src = ggml_cann_create_tensor(src0);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
  
  #ifdef ASCEND_310P
      // Special ROPE operation for 310P
  
      // roll input
      void *               input_roll_buffer;
-    aclTensor *          acl_minus_one_tensor;
+    acl_tensor_ptr       acl_minus_one_tensor;
      void *               minus_one_scale_buffer = nullptr;
      ggml_cann_pool_alloc roll_allocator(ctx.pool(), ggml_nbytes(src0));
      ggml_cann_pool_alloc minus_one_scale_allocator(ctx.pool(), sizeof(float) * src0->ne[0]);
@@ -2604,17 +2535,16 @@ void ggml_cann_rope(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
          for (int i = 1; i < GGML_MAX_DIMS; i++) {
              input_roll_nb[i] = input_roll_nb[i - 1] * input_roll_ne[i - 1];
          }
-        aclTensor * acl_input_roll_tensor =
+        acl_tensor_ptr acl_input_roll_tensor =
              ggml_cann_create_tensor(input_roll_buffer, ggml_cann_type_mapping(src0->type), ggml_type_size(src0->type),
                                      input_roll_ne, input_roll_nb, GGML_MAX_DIMS);
-        aclTensor * acl_input_tensor =
+        acl_tensor_ptr acl_input_tensor =
              ggml_cann_create_tensor(src0->data, ggml_cann_type_mapping(src0->type), ggml_type_size(src0->type),
                                      input_roll_ne, input_roll_nb, GGML_MAX_DIMS);
  
          int64_t shifts[] = { 1 };
          int64_t dims[]   = { 3 };
          aclnn_roll(ctx, acl_input_tensor, acl_input_roll_tensor, shifts, dims);
-        ggml_cann_release_resources(ctx, acl_input_roll_tensor, acl_input_tensor);
  
          // init [-1, 1, -1, 1, ...]
          minus_one_scale_buffer = minus_one_scale_allocator.get();
@@ -2639,16 +2569,15 @@ void ggml_cann_rope(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
          // roll input: [q0,q1,q2,...] ->
          // [q_half,q_half+1,...,q_end,q0,q1,...q_half-1]
          input_roll_buffer = roll_allocator.get();
-        aclTensor * acl_input_roll_tensor =
+        acl_tensor_ptr acl_input_roll_tensor =
              ggml_cann_create_tensor(input_roll_buffer, ggml_cann_type_mapping(src0->type), ggml_type_size(src0->type),
                                      src0->ne, src0->nb, GGML_MAX_DIMS);
-        aclTensor * acl_input_tensor = ggml_cann_create_tensor(src0);
+        acl_tensor_ptr acl_input_tensor = ggml_cann_create_tensor(src0);
  
          int64_t shifts[] = { src0->ne[0] / 2 };
          int64_t dims[]   = { 3 };
          aclnn_roll(ctx, acl_input_tensor, acl_input_roll_tensor, shifts, dims);
  
-        ggml_cann_release_resources(ctx, acl_input_roll_tensor, acl_input_tensor);
          // init [-1, -1, -1, 1, 1，1，...]
          minus_one_scale_buffer  = minus_one_scale_allocator.get();
          int64_t minus_one_ne[4] = { src0->ne[0], 1, 1, 1 };
@@ -2666,12 +2595,11 @@ void ggml_cann_rope(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
          for (int i = 1; i < GGML_MAX_DIMS; i++) {
              first_half_nb[i] = first_half_nb[i - 1] * first_half_ne[i - 1];
          }
-        aclTensor * acl_first_half_tensor = ggml_cann_create_tensor(minus_one_scale_buffer, ACL_FLOAT, sizeof(float),
-                                                                    first_half_ne, first_half_nb, GGML_MAX_DIMS);
-        bool        inplace               = true;
-        float       scale                 = -1;
+        acl_tensor_ptr acl_first_half_tensor = ggml_cann_create_tensor(minus_one_scale_buffer, ACL_FLOAT, sizeof(float),
+                                                                       first_half_ne, first_half_nb, GGML_MAX_DIMS);
+        bool           inplace               = true;
+        float          scale                 = -1;
          aclnn_muls(ctx, acl_first_half_tensor, scale, nullptr, inplace);
-        ggml_cann_release_resources(ctx, acl_first_half_tensor);
      }
  
      // TODO: n_dims < ne0
@@ -2685,10 +2613,10 @@ void ggml_cann_rope(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      for (int i = 1; i < GGML_MAX_DIMS; i++) {
          input_nb[i] = input_nb[i - 1] * src0->ne[i - 1];
      }
-    aclTensor * acl_input_roll_mul_scale_tensor =
+    acl_tensor_ptr acl_input_roll_mul_scale_tensor =
          ggml_cann_create_tensor(input_roll_mul_scale_buffer, ggml_cann_type_mapping(src0->type),
                                  ggml_type_size(src0->type), src0->ne, input_nb, GGML_MAX_DIMS);
-    aclTensor * acl_input_roll_reshape_tensor =
+    acl_tensor_ptr acl_input_roll_reshape_tensor =
          ggml_cann_create_tensor(input_roll_buffer, ggml_cann_type_mapping(src0->type), ggml_type_size(src0->type),
                                  src0->ne, input_nb, GGML_MAX_DIMS);
  
@@ -2709,25 +2637,21 @@ void ggml_cann_rope(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
          }
          ggml_cann_pool_alloc fp32_allocator1(ctx.pool(), ggml_nelements(dst) * sizeof(float));
          void *               input_fp32_buffer1 = fp32_allocator1.get();
-        aclTensor * input_fp32_tensor1 = ggml_cann_create_tensor(input_fp32_buffer1, ACL_FLOAT, sizeof(float), dst->ne,
-                                                                 input_fp32_nb, GGML_MAX_DIMS);
+        acl_tensor_ptr       input_fp32_tensor1 = ggml_cann_create_tensor(input_fp32_buffer1, ACL_FLOAT, sizeof(float),
+                                                                          dst->ne, input_fp32_nb, GGML_MAX_DIMS);
          ggml_cann_pool_alloc fp32_allocator2(ctx.pool(), ggml_nelements(dst) * sizeof(float));
          void *               input_fp32_buffer2 = fp32_allocator2.get();
-        aclTensor * input_fp32_tensor2 = ggml_cann_create_tensor(input_fp32_buffer2, ACL_FLOAT, sizeof(float), dst->ne,
-                                                                 input_fp32_nb, GGML_MAX_DIMS);
+        acl_tensor_ptr       input_fp32_tensor2 = ggml_cann_create_tensor(input_fp32_buffer2, ACL_FLOAT, sizeof(float),
+                                                                          dst->ne, input_fp32_nb, GGML_MAX_DIMS);
  
          ggml_cann_pool_alloc fp32_allocator(ctx.pool(), ggml_nelements(dst) * sizeof(float));
-        output_fp32_buffer             = fp32_allocator.get();
-        aclTensor * output_fp32_tensor = ggml_cann_create_tensor(output_fp32_buffer, ACL_FLOAT, sizeof(float), dst->ne,
-                                                                 input_fp32_nb, GGML_MAX_DIMS);
+        output_fp32_buffer                = fp32_allocator.get();
+        acl_tensor_ptr output_fp32_tensor = ggml_cann_create_tensor(output_fp32_buffer, ACL_FLOAT, sizeof(float),
+                                                                    dst->ne, input_fp32_nb, GGML_MAX_DIMS);
          aclnn_mul(ctx, acl_src, acl_cos_reshape_tensor, input_fp32_tensor1);
          aclnn_mul(ctx, acl_input_roll_mul_scale_tensor, acl_sin_reshape_tensor, input_fp32_tensor2);
          aclnn_add(ctx, input_fp32_tensor1, input_fp32_tensor2, output_fp32_tensor);
          aclnn_cast(ctx, output_fp32_tensor, acl_dst, ACL_FLOAT16);
-
-        ggml_cann_release_resources(ctx, input_fp32_tensor1, input_fp32_tensor2, output_fp32_tensor,
-                                    acl_sin_reshape_tensor, acl_minus_one_tensor, acl_input_roll_mul_scale_tensor,
-                                    acl_input_roll_reshape_tensor, acl_src);
      }
      return;
  #endif
@@ -2738,8 +2662,8 @@ void ggml_cann_rope(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      switch (src0->type) {
          case GGML_TYPE_F32:
              {
-                GGML_CANN_CALL_ACLNN_OP(ctx, RotaryPositionEmbedding, acl_src, acl_cos_reshape_tensor,
-                                        acl_sin_reshape_tensor, acl_mode, acl_dst);
+                GGML_CANN_CALL_ACLNN_OP(ctx, RotaryPositionEmbedding, acl_src.get(), acl_cos_reshape_tensor.get(),
+                                        acl_sin_reshape_tensor.get(), acl_mode, acl_dst.get());
                  break;
              }
          case GGML_TYPE_F16:
@@ -2755,37 +2679,33 @@ void ggml_cann_rope(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
                      src_trans_nb[i] = src_trans_nb[i - 1] * src0->ne[i - 1];
                  }
  
-                aclTensor * acl_src_trans_tensor = ggml_cann_create_tensor(src_trans_buffer, ACL_FLOAT, sizeof(float),
-                                                                           src0->ne, src_trans_nb, GGML_MAX_DIMS);
-                aclTensor * acl_dst_trans_tensor = ggml_cann_create_tensor(dst_trans_buffer, ACL_FLOAT, sizeof(float),
-                                                                           dst->ne, src_trans_nb, GGML_MAX_DIMS);
+                acl_tensor_ptr acl_src_trans_tensor = ggml_cann_create_tensor(
+                    src_trans_buffer, ACL_FLOAT, sizeof(float), src0->ne, src_trans_nb, GGML_MAX_DIMS);
+                acl_tensor_ptr acl_dst_trans_tensor = ggml_cann_create_tensor(
+                    dst_trans_buffer, ACL_FLOAT, sizeof(float), dst->ne, src_trans_nb, GGML_MAX_DIMS);
  
-                aclnn_cast(ctx, acl_src, acl_src_trans_tensor, ACL_FLOAT);
+                aclnn_cast(ctx, acl_src.get(), acl_src_trans_tensor.get(), ACL_FLOAT);
  
-                GGML_CANN_CALL_ACLNN_OP(ctx, RotaryPositionEmbedding, acl_src_trans_tensor, acl_cos_reshape_tensor,
-                                        acl_sin_reshape_tensor, acl_mode, acl_dst_trans_tensor);
+                GGML_CANN_CALL_ACLNN_OP(ctx, RotaryPositionEmbedding, acl_src_trans_tensor.get(),
+                                        acl_cos_reshape_tensor.get(), acl_sin_reshape_tensor.get(), acl_mode,
+                                        acl_dst_trans_tensor.get());
  
-                aclnn_cast(ctx, acl_dst_trans_tensor, acl_dst, ACL_FLOAT16);
-
-                ggml_cann_release_resources(ctx, acl_src_trans_tensor, acl_dst_trans_tensor);
+                aclnn_cast(ctx, acl_dst_trans_tensor.get(), acl_dst.get(), ACL_FLOAT16);
                  break;
              }
          default:
              GGML_ABORT("Unsupported tensor type for GGML_OP_ROPE");
              break;
      }
-    ggml_cann_release_resources(ctx, acl_cos_reshape_tensor, acl_sin_reshape_tensor, acl_src, acl_dst);
  }
  
  void ggml_cann_argmax(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      ggml_tensor * src0 = dst->src[0];
  
-    aclTensor * acl_src = ggml_cann_create_tensor(src0);
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst, dst->ne, dst->nb, 3);
-
-    GGML_CANN_CALL_ACLNN_OP(ctx, ArgMax, acl_src, 3, false, acl_dst);
+    acl_tensor_ptr acl_src = ggml_cann_create_tensor(src0);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst, dst->ne, dst->nb, 3);
  
-    ggml_cann_release_resources(ctx, acl_src, acl_dst);
+    GGML_CANN_CALL_ACLNN_OP(ctx, ArgMax, acl_src.get(), 3, false, acl_dst.get());
  }
  
  void ggml_cann_conv_transpose_1d(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
@@ -2795,108 +2715,95 @@ void ggml_cann_conv_transpose_1d(ggml_backend_cann_context & ctx, ggml_tensor *
      // stride
      int64_t s0 = ((const int32_t *) (dst->op_params))[0];
  
-    aclTensor * acl_input  = ggml_cann_create_tensor(src1, src1->ne, src1->nb, 3, ACL_FORMAT_NCL);
-    aclTensor * acl_weight = ggml_cann_create_tensor(src0, src0->ne, src0->nb, 3, ACL_FORMAT_NCL);
-    aclTensor * acl_dst    = ggml_cann_create_tensor(dst, dst->ne, dst->nb, 3, ACL_FORMAT_NCL);
+    acl_tensor_ptr acl_input  = ggml_cann_create_tensor(src1, src1->ne, src1->nb, 3, ACL_FORMAT_NCL);
+    acl_tensor_ptr acl_weight = ggml_cann_create_tensor(src0, src0->ne, src0->nb, 3, ACL_FORMAT_NCL);
+    acl_tensor_ptr acl_dst    = ggml_cann_create_tensor(dst, dst->ne, dst->nb, 3, ACL_FORMAT_NCL);
  
      int64_t strideVal[1];
-    strideVal[0]                = s0;
-    aclIntArray * stride        = aclCreateIntArray(strideVal, 1);
-    int64_t       paddingVal[]  = { 0 };
-    aclIntArray * padding       = aclCreateIntArray(paddingVal, 1);
-    int64_t       dilationVal[] = { 1 };
-    aclIntArray * dilation      = aclCreateIntArray(dilationVal, 1);
-    int8_t        cubeMathType  = 0;
+    strideVal[0]                    = s0;
+    acl_int_array_ptr stride        = ggml_cann_create_int_array(strideVal, 1);
+    int64_t           paddingVal[]  = { 0 };
+    acl_int_array_ptr padding       = ggml_cann_create_int_array(paddingVal, 1);
+    int64_t           dilationVal[] = { 1 };
+    acl_int_array_ptr dilation      = ggml_cann_create_int_array(dilationVal, 1);
+    int8_t            cubeMathType  = 0;
  
  #ifdef ASCEND_310P
      cubeMathType = 1;
  #endif
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, Convolution, acl_input, acl_weight, nullptr, stride, padding, dilation, true, padding,
-                            1, acl_dst, cubeMathType);
-
-    ggml_cann_release_resources(ctx, acl_weight, acl_dst, stride, padding, dilation);
+    GGML_CANN_CALL_ACLNN_OP(ctx, Convolution, acl_input.get(), acl_weight.get(), nullptr, stride.get(), padding.get(),
+                            dilation.get(), true, padding.get(), 1, acl_dst.get(), cubeMathType);
  }
  
  void ggml_cann_elu(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      ggml_tensor * src0 = dst->src[0];
  
-    aclTensor * acl_input = ggml_cann_create_tensor(src0);
-    aclTensor * acl_dst   = ggml_cann_create_tensor(dst);
-
-    float       alphaValue = 1.0f;
-    aclScalar * alpha      = nullptr;
-    alpha                  = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
+    acl_tensor_ptr acl_input = ggml_cann_create_tensor(src0);
+    acl_tensor_ptr acl_dst   = ggml_cann_create_tensor(dst);
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, Elu, acl_input, alpha, alpha, alpha, acl_dst);
+    float          alphaValue = 1.0f;
+    acl_scalar_ptr alpha      = nullptr;
+    alpha                     = ggml_cann_create_scalar(&alphaValue, aclDataType::ACL_FLOAT);
  
-    ggml_cann_release_resources(ctx, acl_input, acl_dst, alpha);
+    GGML_CANN_CALL_ACLNN_OP(ctx, Elu, acl_input.get(), alpha.get(), alpha.get(), alpha.get(), acl_dst.get());
  }
  
  void ggml_cann_mean(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      ggml_tensor * src0 = dst->src[0];
  
-    aclTensor * acl_src = ggml_cann_create_tensor(src0);
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst);
-
-    int64_t       reduceDimValue[] = { 3 };
-    aclIntArray * reduceDim        = aclCreateIntArray(reduceDimValue, 1);
-    bool          keepDim          = true;
+    acl_tensor_ptr acl_src = ggml_cann_create_tensor(src0);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, Mean, acl_src, reduceDim, keepDim, ACL_FLOAT, acl_dst);
+    int64_t           reduceDimValue[] = { 3 };
+    acl_int_array_ptr reduceDim        = ggml_cann_create_int_array(reduceDimValue, 1);
+    bool              keepDim          = true;
  
-    ggml_cann_release_resources(ctx, acl_src, acl_dst, reduceDim);
+    GGML_CANN_CALL_ACLNN_OP(ctx, Mean, acl_src.get(), reduceDim.get(), keepDim, ACL_FLOAT, acl_dst.get());
  }
  
  void ggml_cann_pad_reflect_1d(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
-    ggml_tensor * src0             = dst->src[0];
-    int32_t *     opts             = (int32_t *) dst->op_params;
-    int64_t       paddingsArray[2] = { opts[0], opts[1] };
-    aclIntArray * paddings         = aclCreateIntArray(paddingsArray, 2);
+    ggml_tensor *     src0             = dst->src[0];
+    int32_t *         opts             = (int32_t *) dst->op_params;
+    int64_t           paddingsArray[2] = { opts[0], opts[1] };
+    acl_int_array_ptr paddings         = ggml_cann_create_int_array(paddingsArray, 2);
  
      for (int64_t i = 0; i < src0->ne[3]; i++) {
-        aclTensor * acl_src =
+        acl_tensor_ptr acl_src =
              ggml_cann_create_tensor((char *) src0->data + i * src0->ne[3], ggml_cann_type_mapping(src0->type),
                                      ggml_element_size(src0), src0->ne, src0->nb, 3);
  
-        aclTensor * acl_dst =
+        acl_tensor_ptr acl_dst =
              ggml_cann_create_tensor((char *) dst->data + i * src0->ne[3], ggml_cann_type_mapping(dst->type),
                                      ggml_element_size(dst), dst->ne, dst->nb, 3);
  
-        GGML_CANN_CALL_ACLNN_OP(ctx, ReflectionPad1d, acl_src, paddings, acl_dst);
-
-        ggml_cann_release_resources(ctx, acl_src, acl_dst);
+        GGML_CANN_CALL_ACLNN_OP(ctx, ReflectionPad1d, acl_src.get(), paddings.get(), acl_dst.get());
      }
-    ggml_cann_release_resources(ctx, paddings);
  }
  
  void ggml_cann_count_equal(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      ggml_tensor * src0 = dst->src[0];
      ggml_tensor * src1 = dst->src[1];
  
-    aclTensor * acl_self  = ggml_cann_create_tensor(src0);
-    aclTensor * acl_other = ggml_cann_create_tensor(src1);
+    acl_tensor_ptr acl_self  = ggml_cann_create_tensor(src0);
+    acl_tensor_ptr acl_other = ggml_cann_create_tensor(src1);
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, InplaceEqTensor, acl_self, acl_other);
+    GGML_CANN_CALL_ACLNN_OP(ctx, InplaceEqTensor, acl_self.get(), acl_other.get());
  
      ggml_cann_sum(ctx, dst);
-
-    ggml_cann_release_resources(ctx, acl_self, acl_other);
  }
  
  void ggml_cann_step(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      ggml_tensor * src0 = dst->src[0];
  
-    aclTensor * acl_src = ggml_cann_create_tensor(src0);
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst);
+    acl_tensor_ptr acl_src = ggml_cann_create_tensor(src0);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
  
-    float       alphaValue = 0.0f;
-    aclScalar * alpha      = nullptr;
-    alpha                  = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
+    float          alphaValue = 0.0f;
+    acl_scalar_ptr alpha      = nullptr;
+    alpha                     = ggml_cann_create_scalar(&alphaValue, aclDataType::ACL_FLOAT);
  
-    GGML_CANN_CALL_ACLNN_OP(ctx, GtScalar, acl_src, alpha, acl_dst);
-
-    ggml_cann_release_resources(ctx, acl_src, acl_dst, alpha);
+    GGML_CANN_CALL_ACLNN_OP(ctx, GtScalar, acl_src.get(), alpha.get(), acl_dst.get());
  }
  
  /**
@@ -2933,8 +2840,8 @@ static void ggml_cann_mul_mat_id_fp(ggml_backend_cann_context & ctx, ggml_tensor
      ggml_cann_pool_alloc export_allocator(ctx.pool(), src0->ne[0] * src0->ne[1] * ids->ne[0] * ggml_element_size(src0));
      void *               export_ptr = export_allocator.get();
      for (int64_t i = 0; i < batch; i++) {
-        aclTensor * select_index  = ggml_cann_create_tensor(ids, ids->ne, ids->nb, 1, ACL_FORMAT_ND, i * ids->nb[1]);
-        aclTensor * export_weight = ggml_cann_create_tensor(src0, src0->ne, src0->nb, 3);
+        acl_tensor_ptr select_index  = ggml_cann_create_tensor(ids, ids->ne, ids->nb, 1, ACL_FORMAT_ND, i * ids->nb[1]);
+        acl_tensor_ptr export_weight = ggml_cann_create_tensor(src0, src0->ne, src0->nb, 3);
  
          int64_t select_export_ne[] = { src0->ne[0], src0->ne[1], ids->ne[0] };
          size_t  select_export_nb[3];
@@ -2943,30 +2850,27 @@ static void ggml_cann_mul_mat_id_fp(ggml_backend_cann_context & ctx, ggml_tensor
              select_export_nb[k] = select_export_nb[k - 1] * select_export_ne[k - 1];
          }
  
-        aclTensor * select_export =
+        acl_tensor_ptr select_export =
              ggml_cann_create_tensor(export_ptr, ggml_cann_type_mapping(src0->type), ggml_element_size(src0),
                                      select_export_ne, select_export_nb, 3);
-        GGML_CANN_CALL_ACLNN_OP(ctx, IndexSelect, export_weight, 0, select_index, select_export);
+        GGML_CANN_CALL_ACLNN_OP(ctx, IndexSelect, export_weight.get(), 0, select_index.get(), select_export.get());
  
-        int64_t     select_transpose_ne[] = { select_export_ne[1], select_export_ne[0], select_export_ne[2] };
-        size_t      select_transpose_nb[] = { select_export_nb[1], select_export_nb[0], select_export_nb[2] };
-        aclTensor * select_export_transpose =
+        int64_t        select_transpose_ne[] = { select_export_ne[1], select_export_ne[0], select_export_ne[2] };
+        size_t         select_transpose_nb[] = { select_export_nb[1], select_export_nb[0], select_export_nb[2] };
+        acl_tensor_ptr select_export_transpose =
              ggml_cann_create_tensor(export_ptr, ggml_cann_type_mapping(src0->type), ggml_element_size(src0),
                                      select_transpose_ne, select_transpose_nb, 3);
  
-        int64_t     active_tensor_ne[] = { src1->ne[0], 1, src1->ne[1] };
-        size_t      active_tensor_nb[] = { src1->nb[0], src1->nb[1], src1->nb[1] };
-        aclTensor * active_tensor =
+        int64_t        active_tensor_ne[] = { src1->ne[0], 1, src1->ne[1] };
+        size_t         active_tensor_nb[] = { src1->nb[0], src1->nb[1], src1->nb[1] };
+        acl_tensor_ptr active_tensor =
              ggml_cann_create_tensor(src1, active_tensor_ne, active_tensor_nb, 3, ACL_FORMAT_ND, i * src1->nb[2]);
  
-        int64_t     dst_ne[] = { dst->ne[0], 1, dst->ne[1] };
-        size_t      dst_nb[] = { dst->nb[0], dst->nb[1], dst->nb[1] };
-        aclTensor * acl_dst  = ggml_cann_create_tensor(dst, dst_ne, dst_nb, 3, ACL_FORMAT_ND, i * dst->nb[2]);
-
-        GGML_CANN_CALL_ACLNN_OP(ctx, BatchMatMul, active_tensor, select_export_transpose, acl_dst, 2);
+        int64_t        dst_ne[] = { dst->ne[0], 1, dst->ne[1] };
+        size_t         dst_nb[] = { dst->nb[0], dst->nb[1], dst->nb[1] };
+        acl_tensor_ptr acl_dst  = ggml_cann_create_tensor(dst, dst_ne, dst_nb, 3, ACL_FORMAT_ND, i * dst->nb[2]);
  
-        ggml_cann_release_resources(ctx, select_index, export_weight, select_export, active_tensor, acl_dst,
-                                    select_export_transpose);
+        GGML_CANN_CALL_ACLNN_OP(ctx, BatchMatMul, active_tensor.get(), select_export_transpose.get(), acl_dst.get(), 2);
      }
  }
  
@@ -3007,7 +2911,8 @@ static void ggml_cann_mul_mat_id_quant(ggml_backend_cann_context & ctx, ggml_ten
      int64_t n_ids = ids->ne[0];  // K
  
      std::vector<char> ids_host(ggml_nbytes(ids));
-    ggml_cann_async_memcpy(ctx, ids_host.data(), ids->data, ggml_nbytes(ids), ACL_MEMCPY_DEVICE_TO_HOST);
+    ACL_CHECK(aclrtMemcpyAsync(ids_host.data(), ggml_nbytes(ids), ids->data, ggml_nbytes(ids),
+                               ACL_MEMCPY_DEVICE_TO_HOST, ctx.stream()));
      ACL_CHECK(aclrtSynchronizeStream(ctx.stream()));
  
      char * src0_original = (char *) src0->data;
@@ -3078,9 +2983,11 @@ static void ggml_cann_mul_mat_id_quant(ggml_backend_cann_context & ctx, ggml_ten
              void * dst_tmp_ptr   = dst_original + i1 * nb1 + i2 * nb2;
  
              // mem cpy
-            ggml_cann_async_memcpy(ctx, weight_buffer, src0_tmp_ptr, weight_stride, ACL_MEMCPY_DEVICE_TO_DEVICE);
+            ACL_CHECK(aclrtMemcpyAsync(weight_buffer, weight_stride, src0_tmp_ptr, weight_stride,
+                                       ACL_MEMCPY_DEVICE_TO_DEVICE, ctx.stream()));
              void * scale_buffer = (char *) weight_buffer + weight_stride;
-            ggml_cann_async_memcpy(ctx, scale_buffer, scale_tmp_ptr, scale_stride, ACL_MEMCPY_DEVICE_TO_DEVICE);
+            ACL_CHECK(aclrtMemcpyAsync(scale_buffer, scale_stride, scale_tmp_ptr, scale_stride,
+                                       ACL_MEMCPY_DEVICE_TO_DEVICE, ctx.stream()));
  
              src0_row.data  = weight_buffer;
              src1_row.data  = src1_tmp_ptr;
@@ -3155,17 +3062,18 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context & ctx, ggml_tensor * dst
          size_t faElemSize = sizeof(uint16_t);
          auto   faDataType = ACL_FLOAT16;  //ACL_BF16;
  
-        aclTensor * acl_src0_f16_tensor = nullptr;
-        aclTensor * acl_src1_f16_tensor = nullptr;
-        aclTensor * acl_src2_f16_tensor = nullptr;
+        acl_tensor_ptr acl_q_tensor = nullptr;
+        acl_tensor_ptr acl_k_tensor = nullptr;
+        acl_tensor_ptr acl_v_tensor = nullptr;
  
          // Step 1: cast the src0 (Query) to fp16 if needed
          ggml_cann_pool_alloc src0_f16_allocator(ctx.pool());
          void *               src0_f16_buffer = nullptr;
  
          if (ggml_cann_type_mapping(src0->type) != faDataType) {
-            aclTensor * acl_src0_f32_tensor = ggml_cann_create_tensor(src0, src0_bsnd_ne, src0_bsnd_nb, GGML_MAX_DIMS);
-            src0_f16_buffer                 = src0_f16_allocator.alloc(ggml_nelements(src0) * faElemSize);
+            acl_tensor_ptr acl_src0_f32_tensor =
+                ggml_cann_create_tensor(src0, src0_bsnd_ne, src0_bsnd_nb, GGML_MAX_DIMS);
+            src0_f16_buffer = src0_f16_allocator.alloc(ggml_nelements(src0) * faElemSize);
  
              int64_t * src0_f16_ne = src0_bsnd_ne;
              size_t    src0_f16_nb[GGML_MAX_DIMS];
@@ -3174,23 +3082,22 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context & ctx, ggml_tensor * dst
                  src0_f16_nb[i] = src0_f16_nb[i - 1] * src0_f16_ne[i - 1];
              }
  
-            acl_src0_f16_tensor = ggml_cann_create_tensor(src0_f16_buffer, faDataType, faElemSize, src0_f16_ne,
-                                                          src0_f16_nb, GGML_MAX_DIMS);
-            aclnn_cast(ctx, acl_src0_f32_tensor, acl_src0_f16_tensor, faDataType);
-            ggml_cann_release_resources(ctx, acl_src0_f32_tensor);
+            acl_q_tensor = ggml_cann_create_tensor(src0_f16_buffer, faDataType, faElemSize, src0_f16_ne, src0_f16_nb,
+                                                   GGML_MAX_DIMS);
+            aclnn_cast(ctx, acl_src0_f32_tensor.get(), acl_q_tensor.get(), faDataType);
          } else {
-            acl_src0_f16_tensor = ggml_cann_create_tensor(src0, src0_bsnd_ne, src0_bsnd_nb, GGML_MAX_DIMS);
+            acl_q_tensor = ggml_cann_create_tensor(src0, src0_bsnd_ne, src0_bsnd_nb, GGML_MAX_DIMS);
          }
  
          // Step 2: create the acl tensors for src1 (Key), src2 (Value),
          //         and the direct output from FusedInferAttention
  
-        acl_src1_f16_tensor = ggml_cann_create_tensor(src1, src1_bsnd_ne, src1_bsnd_nb, GGML_MAX_DIMS);
-        acl_src2_f16_tensor = ggml_cann_create_tensor(src2, src2_bsnd_ne, src2_bsnd_nb, GGML_MAX_DIMS);
+        acl_k_tensor = ggml_cann_create_tensor(src1, src1_bsnd_ne, src1_bsnd_nb, GGML_MAX_DIMS);
+        acl_v_tensor = ggml_cann_create_tensor(src2, src2_bsnd_ne, src2_bsnd_nb, GGML_MAX_DIMS);
  
          // Step 3: create the PSEShift tensor if needed
          //         this tensor is considered as mask (f16) in the llama.cpp
-        aclTensor *          bcast_pse_tensor = nullptr;
+        acl_tensor_ptr       bcast_pse_tensor;
          ggml_cann_pool_alloc bcast_pse_allocator(ctx.pool());
          if (src3 != nullptr) {
              // Construct the truncated pse tensor (common for prefill/decode)
@@ -3202,8 +3109,8 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context & ctx, ggml_tensor * dst
              };
              size_t * trunc_pse_nb = src3->nb;
  
-            aclTensor * acl_mask_f16_trunc_tensor = ggml_cann_create_tensor(src3->data, ACL_FLOAT16, sizeof(uint16_t),
-                                                                            trunc_pse_ne, trunc_pse_nb, GGML_MAX_DIMS);
+            acl_tensor_ptr acl_mask_f16_trunc_tensor = ggml_cann_create_tensor(
+                src3->data, ACL_FLOAT16, sizeof(uint16_t), trunc_pse_ne, trunc_pse_nb, GGML_MAX_DIMS);
  
              int64_t bcast_pse_ne[GGML_MAX_DIMS];
              size_t  bcast_pse_nb[GGML_MAX_DIMS];
@@ -3222,7 +3129,6 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context & ctx, ggml_tensor * dst
                  bcast_pse_tensor = ggml_cann_create_tensor(src3->data, ACL_FLOAT16, sizeof(uint16_t), bcast_pse_ne,
                                                             bcast_pse_nb, GGML_MAX_DIMS);
  
-                ggml_cann_release_resources(ctx, acl_mask_f16_trunc_tensor);
              } else {
                  bcast_pse_nb[0] = sizeof(uint16_t);
                  for (int i = 1; i < GGML_MAX_DIMS; i++) {
@@ -3236,7 +3142,7 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context & ctx, ggml_tensor * dst
                                                             bcast_pse_ne, bcast_pse_nb, GGML_MAX_DIMS);
  
                  int64_t repeats[] = { 1, src0->ne[2], 1, 1 };
-                aclnn_repeat(ctx, acl_mask_f16_trunc_tensor, bcast_pse_tensor, repeats);
+                aclnn_repeat(ctx, acl_mask_f16_trunc_tensor.get(), bcast_pse_tensor.get(), repeats);
  
                  // alibi
                  // Compute the slope if needed. Derived from ggml_cann_softmax().
@@ -3252,21 +3158,15 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context & ctx, ggml_tensor * dst
                      slope_nb[i] = slope_nb[i - 1] * slope_ne[0];
                  }
  
-                aclTensor * slope_tensor = ggml_cann_create_tensor(slope_buffer, ACL_FLOAT16, sizeof(uint16_t),
-                                                                   slope_ne, slope_nb, GGML_MAX_DIMS);
-                GGML_CANN_CALL_ACLNN_OP(ctx, InplaceMul, bcast_pse_tensor, slope_tensor);
-
-                ggml_cann_release_resources(ctx, slope_tensor, acl_mask_f16_trunc_tensor);
+                acl_tensor_ptr slope_tensor = ggml_cann_create_tensor(slope_buffer, ACL_FLOAT16, sizeof(uint16_t),
+                                                                      slope_ne, slope_nb, GGML_MAX_DIMS);
+                GGML_CANN_CALL_ACLNN_OP(ctx, InplaceMul, bcast_pse_tensor.get(), slope_tensor.get());
              }
          }
  
          // Step 4: set the inputs for FusedInferAttention.
-        int             kvTensorNum       = 1;
-        aclTensor *     acl_q_tensor      = acl_src0_f16_tensor;
-        aclTensor *     acl_k_tensors[]   = { acl_src1_f16_tensor };
-        aclTensor *     acl_v_tensors[]   = { acl_src2_f16_tensor };
-        aclTensorList * acl_k_tensor_list = aclCreateTensorList(acl_k_tensors, kvTensorNum);
-        aclTensorList * acl_v_tensor_list = aclCreateTensorList(acl_v_tensors, kvTensorNum);
+        acl_tensor_list_ptr acl_k_tensor_list = ggml_cann_create_tensor_list(acl_k_tensor);
+        acl_tensor_list_ptr acl_v_tensor_list = ggml_cann_create_tensor_list(acl_v_tensor);
  
          int64_t numHeads           = src0->ne[2];  // N
          int64_t numKeyValueHeads   = src1->ne[2];
@@ -3283,8 +3183,8 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context & ctx, ggml_tensor * dst
          int64_t valueAntiquantMode = 0;
  
          GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
-        aclTensor *          fa_dst_tensor  = nullptr;
-        aclTensor *          acl_dst_tensor = nullptr;
+        acl_tensor_ptr       fa_dst_tensor;
+        acl_tensor_ptr       acl_dst_tensor;
          ggml_cann_pool_alloc out_f16_allocator(ctx.pool());
          if (dst->type == GGML_TYPE_F32) {
              void * out_f16_buffer = out_f16_allocator.alloc(ggml_nelements(dst) * faElemSize);
@@ -3302,9 +3202,9 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context & ctx, ggml_tensor * dst
              fa_dst_tensor = ggml_cann_create_tensor(dst);
          }
  
-        GGML_CANN_CALL_ACLNN_OP(ctx, FusedInferAttentionScoreV2, acl_q_tensor, acl_k_tensor_list,
-                                acl_v_tensor_list,                     // q, k, v
-                                bcast_pse_tensor, nullptr,             // pse, mask
+        GGML_CANN_CALL_ACLNN_OP(ctx, FusedInferAttentionScoreV2, acl_q_tensor.get(), acl_k_tensor_list.get(),
+                                acl_v_tensor_list.get(),               // q, k, v
+                                bcast_pse_tensor.get(), nullptr,       // pse, mask
                                  nullptr, nullptr,                      // actSeqLen, actSeqLenkv
                                  nullptr, nullptr,                      // deqScale1, quantScale1
                                  nullptr, nullptr, nullptr,             // deqScale2, quantScale2, quantOffset2
@@ -3322,19 +3222,15 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context & ctx, ggml_tensor * dst
                                  blockSize, antiquantMode,              // blockSize, antiquantMode
                                  softmaxLseFlag,                        // softmaxLseFlag
                                  keyAntiquantMode, valueAntiquantMode,  // keyAntiqMode, valueAntiqMode
-                                fa_dst_tensor,                         // attentionOut
+                                fa_dst_tensor.get(),                   // attentionOut
                                  nullptr                                // softmaxLse
          );
  
          if (dst->type == GGML_TYPE_F32) {
              // Step 6: post-processing, permute and cast to f32
-            aclTensor * acl_dst_tensor = ggml_cann_create_tensor(dst);
-            aclnn_cast(ctx, fa_dst_tensor, acl_dst_tensor, ggml_cann_type_mapping(dst->type));
+            acl_tensor_ptr acl_dst_tensor = ggml_cann_create_tensor(dst);
+            aclnn_cast(ctx, fa_dst_tensor.get(), acl_dst_tensor.get(), ggml_cann_type_mapping(dst->type));
          }
-
-        ggml_cann_release_resources(ctx, acl_src0_f16_tensor, acl_k_tensor_list, acl_v_tensor_list, fa_dst_tensor,
-                                    acl_dst_tensor, bcast_pse_tensor);
-
      } else {
          GGML_ABORT("Function is not implemented.");
      }
diff --git a/ggml/src/ggml-cann/aclnn_ops.h b/ggml/src/ggml-cann/aclnn_ops.h

index c1ea1b153fc80dea8c5d5b5f9ca0f5663161fe45..a6c2eb1226b1e5592ffc6f19e61bff2bee7569f9 100644 (file)
--- a/ggml/src/ggml-cann/aclnn_ops.h
+++ b/ggml/src/ggml-cann/aclnn_ops.h
@@ -23,33 +23,35 @@
  #ifndef CANN_ACLNN_OPS
  #define CANN_ACLNN_OPS
  
-#include <unordered_set>
-#include <functional>
+#include "acl_tensor.h"
+#include "common.h"
+
  #include <aclnnop/aclnn_abs.h>
-#include <aclnnop/aclnn_neg.h>
-#include <aclnnop/aclnn_exp.h>
  #include <aclnnop/aclnn_arange.h>
  #include <aclnnop/aclnn_argsort.h>
  #include <aclnnop/aclnn_cat.h>
  #include <aclnnop/aclnn_clamp.h>
+#include <aclnnop/aclnn_cos.h>
+#include <aclnnop/aclnn_exp.h>
  #include <aclnnop/aclnn_gelu.h>
  #include <aclnnop/aclnn_gelu_v2.h>
-#include <aclnnop/aclnn_sigmoid.h>
  #include <aclnnop/aclnn_hardsigmoid.h>
  #include <aclnnop/aclnn_hardswish.h>
  #include <aclnnop/aclnn_leaky_relu.h>
+#include <aclnnop/aclnn_log.h>
+#include <aclnnop/aclnn_logsoftmax.h>
+#include <aclnnop/aclnn_neg.h>
+#include <aclnnop/aclnn_norm.h>
  #include <aclnnop/aclnn_relu.h>
+#include <aclnnop/aclnn_sigmoid.h>
+#include <aclnnop/aclnn_sign.h>
  #include <aclnnop/aclnn_silu.h>
-#include <aclnnop/aclnn_tanh.h>
-#include <aclnnop/aclnn_sqrt.h>
  #include <aclnnop/aclnn_sin.h>
-#include <aclnnop/aclnn_cos.h>
-#include <aclnnop/aclnn_log.h>
-#include <aclnnop/aclnn_sign.h>
-#include <aclnnop/aclnn_norm.h>
-#include <aclnnop/aclnn_logsoftmax.h>
-#include "acl_tensor.h"
-#include "common.h"
+#include <aclnnop/aclnn_sqrt.h>
+#include <aclnnop/aclnn_tanh.h>
+
+#include <functional>
+#include <unordered_set>
  
  /**
   * @brief   Repeats a ggml tensor along each dimension to match the dimensions
@@ -688,12 +690,12 @@ void aclnn_sin(ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor *
   * @param acl_src1 Output pointer to the created ACL tensor corresponding to src1.
   * @param acl_dst  Output pointer to the created ACL tensor corresponding to dst.
   */
-void bcast_shape(ggml_tensor * src0,
-                 ggml_tensor * src1,
-                 ggml_tensor * dst,
-                 aclTensor **  acl_src0,
-                 aclTensor **  acl_src1,
-                 aclTensor **  acl_dst);
+void bcast_shape(ggml_tensor *    src0,
+                 ggml_tensor *    src1,
+                 ggml_tensor *    dst,
+                 acl_tensor_ptr & acl_src0,
+                 acl_tensor_ptr & acl_src1,
+                 acl_tensor_ptr & acl_dst);
  
  /**
   * @brief   Computes the 1D transposed convolution (deconvolution) of a ggml
@@ -873,83 +875,6 @@ template <typename... Args> void register_acl_resources(std::vector<any_acl_reso
      (vec.emplace_back(make_acl_resource(args)), ...);
  }
  
-/**
- * @brief Task class that wraps the execution of an aclnn function call.
- */
-class aclnn_task : public cann_task {
-  public:
-    aclnn_task(aclnn_func_t    aclnn_func,
-               void *          workspace_addr,
-               uint64_t        workspace_size,
-               aclOpExecutor * executor,
-               aclrtStream     stream) :
-        aclnn_func_(aclnn_func),
-        workspace_addr_(workspace_addr),
-        workspace_size_(workspace_size),
-        executor_(executor),
-        stream_(stream) {}
-
-    virtual void run_task() override { ACL_CHECK(aclnn_func_(workspace_addr_, workspace_size_, executor_, stream_)); }
-  private:
-    aclnn_func_t    aclnn_func_;
-    void *          workspace_addr_;
-    uint64_t        workspace_size_;
-    aclOpExecutor * executor_;
-    aclrtStream     stream_;
-};
-
-/**
- * @brief Task class that releases ACL resources after usage.
- */
-class release_resource_task : public cann_task {
-  public:
-    release_resource_task(std::vector<any_acl_resource> && resources) { resource_ = std::move(resources); }
-
-    virtual void run_task() override { resource_.clear(); }
-  private:
-    std::vector<any_acl_resource> resource_;
-};
-
-/**
- * @brief Task class for performing asynchronous memory copy operations.
- */
-class async_memcpy_task : public cann_task {
-  public:
-    async_memcpy_task(void * dst, const void * src, size_t size, aclrtMemcpyKind kind, aclrtStream stream) :
-        dst_(dst),
-        src_(src),
-        size_(size),
-        kind_(kind),
-        stream_(stream) {}
-
-    virtual void run_task() override { ACL_CHECK(aclrtMemcpyAsync(dst_, size_, src_, size_, kind_, stream_)); }
-  private:
-    void *          dst_;
-    const void *    src_;
-    size_t          size_;
-    aclrtMemcpyKind kind_;
-    aclrtStream     stream_;
-};
-
-/**
- * @brief Task class for performing asynchronous memory set operations.
- */
-class async_memset_task : public cann_task {
-  public:
-    async_memset_task(void * buffer, size_t size, int32_t value, aclrtStream stream) :
-        buffer_(buffer),
-        size_(size),
-        value_(value),
-        stream_(stream) {}
-
-    virtual void run_task() override { ACL_CHECK(aclrtMemsetAsync(buffer_, size_, value_, size_, stream_)); }
-  private:
-    void *      buffer_;
-    size_t      size_;
-    int32_t     value_;
-    aclrtStream stream_;
-};
-
  /**
   * @brief Launches an asynchronous task using the memory allocator.
   *
@@ -968,95 +893,20 @@ class async_memset_task : public cann_task {
   * same stream are executed in queue order.
   */
  
-#define GGML_CANN_CALL_ACLNN_OP(CTX, OP_NAME, ...)                                                                  \
-    do {                                                                                                            \
-        uint64_t        workspaceSize = 0;                                                                          \
-        aclOpExecutor * executor;                                                                                   \
-        void *          workspaceAddr = nullptr;                                                                    \
-        ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor));                        \
-        /* workspace should alloced in main thread to keep malloc order when using vmm. */                          \
-        if (workspaceSize > 0) {                                                                                    \
-            ggml_cann_pool_alloc workspace_allocator(CTX.pool(), workspaceSize);                                    \
-            workspaceAddr = workspace_allocator.get();                                                              \
-        }                                                                                                           \
-        if (CTX.async_mode) {                                                                                       \
-            auto task =                                                                                             \
-                std::make_unique<aclnn_task>(aclnn##OP_NAME, workspaceAddr, workspaceSize, executor, CTX.stream()); \
-            CTX.task_queue.submit_task(std::move(task));                                                            \
-        } else {                                                                                                    \
-            ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, CTX.stream()));                        \
-        }                                                                                                           \
+#define GGML_CANN_CALL_ACLNN_OP(CTX, OP_NAME, ...)                                           \
+    do {                                                                                     \
+        uint64_t        workspaceSize = 0;                                                   \
+        aclOpExecutor * executor;                                                            \
+        void *          workspaceAddr = nullptr;                                             \
+        ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor)); \
+        /* workspace should alloced in main thread to keep malloc order when using vmm. */   \
+        if (workspaceSize > 0) {                                                             \
+            ggml_cann_pool_alloc workspace_allocator(CTX.pool(), workspaceSize);             \
+            workspaceAddr = workspace_allocator.get();                                       \
+        }                                                                                    \
+        ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, CTX.stream()));     \
      } while (0)
  
-/**
- * @brief Registers and releases multiple ACL resources, optionally deferring the release
- *        using a task.
- *
- * @tparam Args Types of the ACL resources.
- * @param ctx Backend context which manages task submission and async mode.
- * @param args Pointers to ACL resources to be released.
- */
-template <typename... Args> void ggml_cann_release_resources(ggml_backend_cann_context & ctx, Args &&... args) {
-    std::vector<any_acl_resource> resources;
-    register_acl_resources(resources, std::forward<Args>(args)...);
-    if (ctx.async_mode) {
-        auto task = std::make_unique<release_resource_task>(std::move(resources));
-        ctx.task_queue.submit_task(std::move(task));
-    }
-}
-
-/**
- * @brief Performs an asynchronous memory copy operation, optionally deferred via task submission.
- *
- * @param ctx Backend context containing stream and async configuration.
- * @param dst Destination memory address.
- * @param src Source memory address.
- * @param len Size of memory to copy (in bytes).
- * @param kind Type of memory copy (host-to-device, device-to-host, etc).
- */
-inline void ggml_cann_async_memcpy(ggml_backend_cann_context & ctx,
-                                   void *                      dst,
-                                   const void *                src,
-                                   size_t                      len,
-                                   aclrtMemcpyKind             kind) {
-    if (ctx.async_mode) {
-        auto task = std::make_unique<async_memcpy_task>(dst, const_cast<void *>(src), len, kind, ctx.stream());
-        ctx.task_queue.submit_task(std::move(task));
-    } else {
-        ACL_CHECK(aclrtMemcpyAsync(dst, len, src, len, kind, ctx.stream()));
-    }
-}
-
-inline void ggml_cann_async_memcpy(ggml_backend_cann_context * ctx,
-                                   void *                      dst,
-                                   const void *                src,
-                                   size_t                      len,
-                                   aclrtMemcpyKind             kind) {
-    if (ctx->async_mode) {
-        auto task = std::make_unique<async_memcpy_task>(dst, const_cast<void *>(src), len, kind, ctx->stream());
-        ctx->task_queue.submit_task(std::move(task));
-    } else {
-        ACL_CHECK(aclrtMemcpyAsync(dst, len, src, len, kind, ctx->stream()));
-    }
-}
-
-/**
- * @brief Performs an asynchronous memory set operation, optionally deferred via task submission.
- *
- * @param ctx Backend context containing stream and async configuration.
- * @param buffer Memory buffer to be set.
- * @param size Size of the memory buffer (in bytes).
- * @param value Value to set in the buffer.
- */
-inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffer, size_t size, int value) {
-    if (ctx.async_mode) {
-        auto task = std::make_unique<async_memset_task>(buffer, size, value, ctx.stream());
-        ctx.task_queue.submit_task(std::move(task));
-    } else {
-        ACL_CHECK(aclrtMemsetAsync(buffer, size, value, size, ctx.stream()));
-    }
-}
-
  /**
   * @brief   Performs sparse expert-based matrix multiplication using the CANN backend.
   *
@@ -1129,15 +979,11 @@ template <auto binary_op> void ggml_cann_binary_op(ggml_backend_cann_context & c
      ggml_tensor * src0 = dst->src[0];
      ggml_tensor * src1 = dst->src[1];
  
-    aclTensor * acl_src0;
-    aclTensor * acl_src1;
-    aclTensor * acl_dst;
+    acl_tensor_ptr acl_src0, acl_src1, acl_dst;
  
      // Need bcast
-    bcast_shape(src0, src1, dst, &acl_src0, &acl_src1, &acl_dst);
-    binary_op(ctx, acl_src0, acl_src1, acl_dst);
-
-    ggml_cann_release_resources(ctx, acl_src0, acl_src1, acl_dst);
+    bcast_shape(src0, src1, dst, acl_src0, acl_src1, acl_dst);
+    binary_op(ctx, acl_src0.get(), acl_src1.get(), acl_dst.get());
  }
  
  /**
@@ -1147,7 +993,7 @@ template <auto binary_op> void ggml_cann_binary_op(ggml_backend_cann_context & c
   * and stores the result in the destination tensor.
   *
   * @tparam unary_op A callable with the signature:
- *         void(ggml_backend_cann_context&, aclTensor*, aclTensor*)
+ *         void(ggml_backend_cann_context&, aclTensor *, aclTensor *)
   *         where the first aclTensor is the source and the second is the destination.
   * @param ctx The CANN backend context for managing resources and execution.
   * @param dst The destination tensor. Its src[0] is treated as the input tensor.
@@ -1156,11 +1002,10 @@ template <void unary_op(ggml_backend_cann_context &, aclTensor *, aclTensor *)>
  void ggml_cann_op_unary(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
      ggml_tensor * src = dst->src[0];
  
-    aclTensor * acl_src = ggml_cann_create_tensor(src);
-    aclTensor * acl_dst = ggml_cann_create_tensor(dst);
+    acl_tensor_ptr acl_src = ggml_cann_create_tensor(src);
+    acl_tensor_ptr acl_dst = ggml_cann_create_tensor(dst);
  
-    unary_op(ctx, acl_src, acl_dst);
-    ggml_cann_release_resources(ctx, acl_src, acl_dst);
+    unary_op(ctx, acl_src.get(), acl_dst.get());
  }
  
  /**
diff --git a/ggml/src/ggml-cann/common.h b/ggml/src/ggml-cann/common.h

index e87dbcf329ff28036bb9000b5c06808c31f53c5e..d4ef24eaa7b0562a75d0530eb96a1ff08fa5546b 100644 (file)
--- a/ggml/src/ggml-cann/common.h
+++ b/ggml/src/ggml-cann/common.h
@@ -23,26 +23,26 @@
  #ifndef CANN_COMMON_H
  #define CANN_COMMON_H
  
+#include "../ggml-impl.h"
+#include "../include/ggml-cann.h"
+#include "../include/ggml.h"
+
  #include <acl/acl.h>
+#include <unistd.h>
  
+#include <atomic>
+#include <condition_variable>
  #include <cstdio>
+#include <functional>
  #include <iostream>
+#include <list>
  #include <map>
  #include <memory>
-#include <string>
-#include <vector>
-#include <atomic>
-#include <condition_variable>
  #include <mutex>
-#include <thread>
-#include <unistd.h>
-#include <functional>
  #include <optional>
-#include <list>
-
-#include "../include/ggml-cann.h"
-#include "../include/ggml.h"
-#include "../ggml-impl.h"
+#include <string>
+#include <thread>
+#include <vector>
  
  #define MATRIX_ROW_PADDING    512
  #define GGML_CANN_MAX_STREAMS 8
@@ -214,130 +214,6 @@ struct ggml_cann_pool_alloc {
      ggml_cann_pool_alloc & operator=(ggml_cann_pool_alloc &&) = delete;
  };
  
-/**
- * @brief Function pointer type for ACLNN operator calls.
- */
-using aclnn_func_t = aclnnStatus (*)(void *, uint64_t, aclOpExecutor *, aclrtStream);
-
-/**
- * @brief Base class for all CANN tasks to be submitted to the task queue.
- *
- * Users should override the run_task() method with actual task logic.
- */
-class cann_task {
-  public:
-    virtual void run_task() {}
-};
-
-/**
- * @brief A lock-free ring-buffer based task queue for asynchronously executing cann_task instances.
- */
-class cann_task_queue {
-  public:
-    /**
-     * @brief Constructs a task queue with a fixed power-of-two capacity for a specific device.
-     *
-     * @param capacity Queue capacity. Must be a power of 2.
-     * @param device Target device ID (used for context setting).
-     */
-    explicit cann_task_queue(size_t capacity, int32_t device) :
-        buffer_(capacity),
-        capacity_(capacity),
-        head_(0),
-        tail_(0),
-        running_(false),
-        device_(device) {
-        GGML_ASSERT((capacity & (capacity - 1)) == 0 && "capacity must be power of 2");
-        mask_ = capacity_ - 1;
-    }
-
-    /**
-     * @brief Attempts to enqueue a task into the queue.
-     *
-     * @param item Unique pointer to the task.
-     * @return true if the task was successfully enqueued, false if the queue was full.
-     */
-    bool enqueue(std::unique_ptr<cann_task> && item) {
-        size_t next_tail = (tail_ + 1) & mask_;
-
-        if (next_tail == head_) {
-            return false;
-        }
-
-        buffer_[tail_] = std::move(item);
-        std::atomic_thread_fence(std::memory_order_release);
-        tail_ = next_tail;
-
-        return true;
-    }
-
-    /**
-     * @brief Submits a task to the queue, and starts the worker thread if not already running.
-     *
-     * @param task Task to be submitted.
-     */
-    void submit_task(std::unique_ptr<cann_task> && task) {
-        while (!enqueue(std::move(task))) {
-            std::this_thread::yield();
-            continue;
-        }
-
-        if (!running_) {
-            running_ = true;
-            thread_  = std::thread(&cann_task_queue::execute, this);
-        }
-    }
-
-    /**
-     * @brief Waits until the queue is completely empty and no tasks are being processed.
-     */
-    void wait() {
-        while (running_ && head_ != tail_) {
-            std::this_thread::yield();
-            continue;
-        }
-    }
-
-    /**
-     * @brief Stops the task queue and joins the worker thread.
-     */
-    void stop() {
-        running_ = false;
-        if (thread_.joinable()) {
-            thread_.join();
-        }
-    }
-
-  private:
-    /**
-     * @brief Worker thread function that continuously dequeues and executes tasks.
-     */
-    void execute() {
-        ggml_cann_set_device(device_);
-
-        while (running_) {
-            if (head_ == tail_) {
-                std::this_thread::yield();
-                continue;
-            }
-
-            std::atomic_thread_fence(std::memory_order_acquire);
-            buffer_[head_]->run_task();
-            buffer_[head_].reset();
-            head_ = (head_ + 1) & mask_;
-        }
-    }
-
-    std::vector<std::unique_ptr<cann_task>> buffer_;
-    const size_t                            capacity_;
-    size_t                                  mask_;
-    size_t                                  head_;
-    size_t                                  tail_;
-    bool                                    running_;
-    std::thread                             thread_;
-    int32_t                                 device_;
-};
-
  #ifdef USE_ACL_GRAPH
  struct ggml_graph_node_properties {
      // dst tensor
@@ -474,7 +350,6 @@ struct ggml_backend_cann_context {
      ggml_cann_graph_lru_cache graph_lru_cache;
      bool                      acl_graph_mode = true;
  #endif
-    cann_task_queue        task_queue;
      bool                   async_mode;
      // Rope Cache
      ggml_cann_rope_cache   rope_cache;
@@ -488,15 +363,10 @@ struct ggml_backend_cann_context {
       * @brief Constructor for initializing the context with a given device.
       * @param device Device ID.
       */
-    explicit ggml_backend_cann_context(int device) :
-        device(device),
-        name("CANN" + std::to_string(device)),
-        task_queue(1024, device) {
+    explicit ggml_backend_cann_context(int device) : device(device), name("CANN" + std::to_string(device)) {
          ggml_cann_set_device(device);
          description = aclrtGetSocName();
  
-        async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or(""));
-        GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__, device, async_mode ? "ON" : "OFF");
  #ifdef USE_ACL_GRAPH
          acl_graph_mode = parse_bool(get_env("GGML_CANN_ACL_GRAPH").value_or("on"));
          GGML_LOG_INFO("%s: device %d execution mode is %s (%s)\n", __func__, device, acl_graph_mode ? "GRAPH" : "EAGER",
@@ -509,7 +379,6 @@ struct ggml_backend_cann_context {
       */
      ~ggml_backend_cann_context() {
          ggml_cann_set_device(device);
-        task_queue.stop();
          if (copy_event != nullptr) {
              ACL_CHECK(aclrtDestroyEvent(copy_event));
          }
diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp

index da7aede702a54573dfff74191d46693d2a21c41d..9576dcb6e87560479dd140e4e6cc10cf83859fb2 100644 (file)
--- a/ggml/src/ggml-cann/ggml-cann.cpp
+++ b/ggml/src/ggml-cann/ggml-cann.cpp
@@ -22,24 +22,24 @@
  
  #include "ggml-cann.h"
  
+#include "ggml-backend-impl.h"
+#include "ggml-cann/aclnn_ops.h"
+#include "ggml-cann/common.h"
+#include "ggml-impl.h"
+#include "ggml.h"
+
  #include <acl/acl.h>
-#include <stdarg.h>
  #include <aclnnop/aclnn_trans_matmul_weight.h>
+#include <stdarg.h>
  
+#include <chrono>
  #include <cmath>
  #include <cstdio>
  #include <cstring>
  #include <mutex>
+#include <optional>
  #include <queue>
-#include <chrono>
  #include <unordered_set>
-#include <optional>
-
-#include "ggml-impl.h"
-#include "ggml-backend-impl.h"
-#include "ggml-cann/aclnn_ops.h"
-#include "ggml-cann/common.h"
-#include "ggml.h"
  
  #define GGML_COMMON_DECL_C
  
@@ -1177,19 +1177,18 @@ static ggml_cann_nz_workspace g_nz_workspaces[GGML_CANN_MAX_DEVICES];
   *       across calls. This reduces overhead from repeated memory allocation and deallocation.
   */
  static void weight_format_to_nz(ggml_tensor * tensor, size_t offset, int device) {
-    aclTensor * weightTransposed = ggml_cann_create_tensor(tensor, tensor->ne, tensor->nb, 2, ACL_FORMAT_ND, offset);
-    uint64_t    workspaceSize    = 0;
+    acl_tensor_ptr weightTransposed = ggml_cann_create_tensor(tensor, tensor->ne, tensor->nb, 2, ACL_FORMAT_ND, offset);
+    uint64_t       workspaceSize    = 0;
      aclOpExecutor * executor;
  
      // TransMatmulWeight
-    ACL_CHECK(aclnnTransMatmulWeightGetWorkspaceSize(weightTransposed, &workspaceSize, &executor));
+    ACL_CHECK(aclnnTransMatmulWeightGetWorkspaceSize(weightTransposed.get(), &workspaceSize, &executor));
      // Avoid frequent malloc/free of the workspace.
      g_nz_workspaces[device].realloc(workspaceSize);
  
      void * g_nz_workspace = g_nz_workspaces[device].get();
  
      ACL_CHECK(aclnnTransMatmulWeight(g_nz_workspace, workspaceSize, executor, nullptr));
-    ACL_CHECK(aclDestroyTensor(weightTransposed));
  }
  
  // TODO: need handle tensor which has paddings.
@@ -1641,7 +1640,7 @@ ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() {
                             /* .is_host          = */ ggml_backend_cpu_buffer_type()->iface.is_host,
                             },
          /* .device   = */
-         ggml_backend_reg_dev_get(ggml_backend_cann_reg(), 0),
+        ggml_backend_reg_dev_get(ggml_backend_cann_reg(), 0),
          /* .context  = */ nullptr,
      };
  
@@ -1949,7 +1948,8 @@ static void ggml_backend_cann_set_tensor_async(ggml_backend_t backend,
      GGML_ASSERT(buf->buft == ggml_backend_cann_buffer_type(cann_ctx->device) && "unsupported buffer type");
      GGML_ASSERT(!ggml_is_quantized(tensor->type));
  
-    ggml_cann_async_memcpy(cann_ctx, (char *) tensor->data + offset, data, size, ACL_MEMCPY_HOST_TO_DEVICE);
+    ACL_CHECK(aclrtMemcpyAsync((char *) tensor->data + offset, size, data, size, ACL_MEMCPY_HOST_TO_DEVICE,
+                               cann_ctx->stream()));
  }
  
  /**
@@ -1974,7 +1974,8 @@ static void ggml_backend_cann_get_tensor_async(ggml_backend_t      backend,
      GGML_ASSERT(buf->buft == ggml_backend_cann_buffer_type(cann_ctx->device) && "unsupported buffer type");
      GGML_ASSERT(!ggml_is_quantized(tensor->type));
  
-    ggml_cann_async_memcpy(cann_ctx, data, (char *) tensor->data + offset, size, ACL_MEMCPY_DEVICE_TO_HOST);
+    ACL_CHECK(aclrtMemcpyAsync(data, size, (char *) tensor->data + offset, size, ACL_MEMCPY_DEVICE_TO_HOST,
+                               cann_ctx->stream()));
  }
  
  /**
@@ -2035,7 +2036,6 @@ static bool ggml_backend_cann_cpy_tensor_async(ggml_backend_t      backend_src,
          ACL_CHECK(aclrtDeviceEnablePeerAccess(cann_ctx_dst->device, 0));
  
          // wait for task_queue empty to keep task order.
-        cann_ctx_src->task_queue.wait();
          ACL_CHECK(aclrtMemcpyAsync(dst->data, copy_size, src->data, copy_size, ACL_MEMCPY_DEVICE_TO_DEVICE,
                                     cann_ctx_src->stream()));
          // record event on src stream after the copy
@@ -2068,7 +2068,6 @@ static bool ggml_backend_cann_cpy_tensor_async(ggml_backend_t      backend_src,
   */
  static void ggml_backend_cann_synchronize(ggml_backend_t backend) {
      ggml_backend_cann_context * cann_ctx = (ggml_backend_cann_context *) backend->context;
-    cann_ctx->task_queue.wait();
      ggml_cann_set_device(cann_ctx->device);
      ACL_CHECK(aclrtSynchronizeStream(cann_ctx->stream()));
  }
@@ -2485,6 +2484,9 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
                  if (mode & GGML_ROPE_TYPE_VISION) {
                      return false;
                  }
+                if (op->src[0]->ne[0] > 896) {
+                    return false;
+                }
  #ifdef ASCEND_310P
                  if (!ggml_is_contiguous(op->src[0])) {
                      return false;
@@ -2521,10 +2523,11 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
                  // value of paddingW should be at most half of kernelW
                  return (p0 <= (k0 / 2)) && (p1 <= (k1 / 2));
              }
+        case GGML_OP_SUM:
+            return ggml_is_contiguous_rows(op->src[0]);
          case GGML_OP_L2_NORM:
          case GGML_OP_CROSS_ENTROPY_LOSS:
          case GGML_OP_DUP:
-        case GGML_OP_SUM:
          case GGML_OP_IM2COL:
          case GGML_OP_CONCAT:
          case GGML_OP_REPEAT:
author	hipudding <redacted>
	Mon, 17 Nov 2025 00:43:59 +0000 (08:43 +0800)
committer	Georgi Gerganov <redacted>
	Mon, 17 Nov 2025 19:05:46 +0000 (21:05 +0200)
ggml/src/ggml-cann/Doxyfile	[deleted file]	patch \| blob \| history
ggml/src/ggml-cann/acl_tensor.cpp		patch \| blob \| history
ggml/src/ggml-cann/acl_tensor.h		patch \| blob \| history
ggml/src/ggml-cann/aclnn_ops.cpp		patch \| blob \| history
ggml/src/ggml-cann/aclnn_ops.h		patch \| blob \| history
ggml/src/ggml-cann/common.h		patch \| blob \| history
ggml/src/ggml-cann/ggml-cann.cpp		patch \| blob \| history