PyTorch训练模型转ONNX

方法一

加载Pipeline

从transformers库中加载FeatureExtractionPipeline。如果知道要使用的模型的所有输入形状和配置,则可以不需要此步骤。使用 convert_graph_to_onnx 中的相应函数可以显着简化自定义模型的创建。生成的变量将用于torch导出调用。

from transformers import FeatureExtractionPipeline, AutoModel, AutoTokenizer, convert_graph_to_onnx

model_access = "my_model_dir"
model_pipeline = FeatureExtractionPipeline(
        model=AutoModel.from_pretrained(model_access),
        tokenizer=AutoTokenizer.from_pretrained(model_access, use_fast=True),
        framework="pt",
        device=-1)

config = model_pipeline.model.config
tokenizer = model_pipeline.tokenizer

with torch.no_grad():
    input_names, output_names, dynamic_axes, tokens = convert_graph_to_onnx.infer_shapes(model_pipeline, "pt")
    ordered_input_names, model_args = convert_graph_to_onnx.ensure_valid_input(
            model_pipeline.model, tokens, input_names)

# 如果想添加更多的输出,则必须相应地修改dynamic_axes和output_names。
del dynamic_axes["output_0"]  # Delete unused output
del dynamic_axes["output_1"]  # Delete unused output

output_names = ["output"]
dynamic_axes["output"] = {0: 'batch'}

# 导出模型到ONNX
model = torch.load("best_model.pth")
output = "best_model.onnx"
torch.onnx.export(
        model,
        model_args,
        f=output,
        input_names=input_names,
        output_names=output_names,
        dynamic_axes=dynamic_axes,
        do_constant_folding=True,
        use_external_data_format=False,
        enable_onnx_checker=True,
        opset_version=11)

# 检查 onnx model
onnx_model = onnx.load(output)
onnx.checker.check_model(onnx_model)
print('The model is checked!')

加载onnx模型进行推理

import numpy as np
from transformers import AutoTokenizer
import onnxruntime as rt
import time

onnx_model_path = "best_model.onnx"
model_path = "best-checkpoint"  # my_model_dir
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
span = "输入"  # 输入的文本

opt = rt.SessionOptions()
sess = rt.InferenceSession(onnx_model_path)  # Loads the model
t0 = time.perf_counter()
model_input = tokenizer.encode_plus(span)
model_input = {name: np.atleast_2d(value) for name, value in model_input.items()}
onnx_result = sess.run(None, model_input)
onnx_result = onnx_result[0]
onnx_result = np.argmax(onnx_result, axis=-1)
print(time.perf_counter() - t0)

方法二

pt2onnx

import os
import torch
from transformers import BertTokenizer, get_linear_schedule_with_warmup,BertForSequenceClassification, AdamW
import onnx

def pt2onnx_bert():
    pretrained_model = '../model/bert-base-chinese'
    onnx_path = 'api/onnx/bert-base-chinese-cls.onnx'
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    tokenizer = BertTokenizer.from_pretrained(pretrained_model)
    model = BertForSequenceClassification.from_pretrained(pretrained_model)
    model.eval()
    model.to(device)

    input_names = ['input_ids', 'attention_mask', 'token_type_ids']
    outputs_names = ['output']
    # 构造输入
    inputs = '输入'
    encode_dict = tokenizer.encode_plus(text=inputs,
                                        max_length=512,
                                        pad_to_max_length=True,
                                        return_tensors='pt',
                                        return_token_type_ids=True,
                                        return_attention_mask=True)

    input_ids = encode_dict['input_ids']
    attention_mask = encode_dict['attention_mask']
    token_type_ids = encode_dict['token_type_ids']

    dummy_input = {
        'input_ids': input_ids,
        'attention_mask': attention_mask,
        'token_type_ids': token_type_ids
    }
    with torch.no_grad():
        torch.onnx.export(model=model,
                          args=tuple(dummy_input.values()),
                          f=onnx_path,
                          opset_version=11,
                          input_names=input_names,
                          output_names=outputs_names,
                          dynamic_axes={'input_ids': {0: 'batch_size'},
                                        'attention_mask': {0: 'batch_size'},
                                        'token_type_ids': {0: 'batch_size'},
                                        'output': {0: 'batch_size'}}
                          )
    # 验证
    print(onnx.checker.check_model(onnx_path))

推理

from onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions, get_all_providers

def create_model_for_provider(model_path: str, provider: str) -> InferenceSession:
    assert provider in get_all_providers(), f"provider {provider} not found, {get_all_providers()}"

    options = SessionOptions()
    #控制线程数
    options.intra_op_num_threads = 0
    options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
    session = InferenceSession(model_path, options, providers=[provider])
    return session

def onnx_inference(x):
    #CPUExecutionProvider
    ort_session = create_model_for_provider(onnx_model_path, 'CUDAExecutionProvider')
    encode_dict = tokenizer.batch_encode_plus(batch_text_or_text_pairs=x,
                                              max_length=MAX_SEQ_LEN,
                                              pad_to_max_length=True,
                                              return_tensors='pt',
                                              return_token_type_ids=False,
                                              return_attention_mask=True)

    inputs = {k: v.numpy() for k, v in encode_dict.items()}
    outputs = ort_session.run(None, inputs)
    # 不同的模型对应不同的以下代码
    outputs = outputs[0]
    outputs = np.argmax(outputs, axis=-1)
    result = []
    for out in outputs:
        result.append(id2label[out])
    return result

Cython编译python

from distutils.core import setup
from Cython.Build import cythonize
from distutils.extension import Extension

extensions = [
    Extension(name='name1',
              sources=['source file'],
              include_dirs=['head files'],
              libraries=['library names'],
              library_dirs=['library directories']),
    Extension(name='name2',
              sources=['source file'],
              include_dirs=['head files'],
              libraries=['library names'],
              library_dirs=['library directories'])
]
# setup.py 放在 sources同目录
# 按照上述方式配置,会生成两个so,name1.so和name2.so
# 命令:python setup.pu build_ext,同时生成c文件和so
setup(
    name='name',
    # language_level=3指定python3
    ext_modules=cythonize(extensions, language_level=3)
)

参考

https://github.com/oborchers/sentence-transformers/blob/72da7293186700bff4fed8e81adcfe40a03be1d2/examples/onnx_inference/onnx_inference.ipynb

https://zhuanlan.zhihu.com/p/422290231

发表回复

您的电子邮箱地址不会被公开。 必填项已用 * 标注