Python API
Python API
Section titled “Python API”注意: この API はプレビュー版であり、変更される可能性があります。
インストールとインポート
Section titled “インストールとインポート”Python APIはonnxruntime-genai Pythonパッケージによって提供されます。
pip install onnxruntime-genaiimport onnxruntime_genaiModel クラス
Section titled “Model クラス”モデルのロード
Section titled “モデルのロード”onnxruntime_genai.Model(config_path: str) -> Modelonnxruntime_genai.Model(config: onnxruntime_genai.Config) -> Model-
type: モデルタイプを文字列として返します。model = onnxruntime_genai.Model("config.json")print(model.type) -
device_type: デバイスタイプを文字列として返します。print(model.device_type)
-
create_multimodal_processor() -> MultiModalProcessorprocessor = model.create_multimodal_processor()
Config クラス
Section titled “Config クラス”onnxruntime_genai.Config(config_path: str) -> Config-
append_provider(provider: str)config = onnxruntime_genai.Config("config.json")config.append_provider("CUDAExecutionProvider") -
set_provider_option(option: str, value: str)config.set_provider_option("device_id", "0") -
clear_providers()config.clear_providers()
GeneratorParams クラス
Section titled “GeneratorParams クラス”onnxruntime_genai.GeneratorParams(model: Model) -> GeneratorParams-
set_inputs(named_tensors: NamedTensors)params = onnxruntime_genai.GeneratorParams(model)named_tensors = onnxruntime_genai.NamedTensors()params.set_inputs(named_tensors) -
set_model_input(name: str, value: numpy.ndarray)import numpy as npparams.set_model_input("input_ids", np.array([1, 2, 3], dtype=np.int32)) -
try_graph_capture_with_max_batch_size(max_batch_size: int)params.try_graph_capture_with_max_batch_size(8) -
set_search_options(**options)params.set_search_options(temperature=0.7, top_p=0.9) -
set_guidance(type: str, data: str)params.set_guidance("prefix", "昔々あるところに")
Generator クラス
Section titled “Generator クラス”onnxruntime_genai.Generator(model: Model, params: GeneratorParams) -> Generator-
is_done() -> boolgenerator = onnxruntime_genai.Generator(model, params)done = generator.is_done() -
get_output(name: str) -> numpy.ndarrayoutput = generator.get_output("output_ids") -
append_tokens(tokens: numpy.ndarray[int32])generator.append_tokens(np.array([4, 5], dtype=np.int32)) -
append_tokens(tokens: onnxruntime_genai.Tensor)tensor = onnxruntime_genai.Tensor(np.array([4, 5], dtype=np.int32))generator.append_tokens(tensor) -
get_logits() -> numpy.ndarray[float32]logits = generator.get_logits() -
set_logits(new_logits: numpy.ndarray[float32])generator.set_logits(np.zeros_like(logits)) -
generate_next_token()generator.generate_next_token() -
rewind_to(new_length: int)generator.rewind_to(2) -
get_next_tokens() -> numpy.ndarray[int32]next_tokens = generator.get_next_tokens() -
get_sequence(index: int) -> numpy.ndarray[int32]sequence = generator.get_sequence(0) -
set_active_adapter(adapters: onnxruntime_genai.Adapters, adapter_name: str)adapters = onnxruntime_genai.Adapters(model)generator.set_active_adapter(adapters, "adapter_name")
Tokenizer クラス
Section titled “Tokenizer クラス”onnxruntime_genai.Tokenizer(model: Model) -> Tokenizer-
encode(text: str) -> numpy.ndarray[int32]tokenizer = onnxruntime_genai.Tokenizer(model)tokens = tokenizer.encode("こんにちは世界") -
to_token_id(text: str) -> inttoken_id = tokenizer.to_token_id("こんにちは") -
decode(tokens: numpy.ndarray[int32]) -> strtext = tokenizer.decode(tokens) -
apply_chat_template(template_str: str, messages: str, tools: str = None, add_generation_prompt: bool = False) -> strchat = tokenizer.apply_chat_template("{user}: {message}", messages="こんにちは!", add_generation_prompt=True) -
encode_batch(texts: list[str]) -> onnxruntime_genai.Tensorbatch_tensor = tokenizer.encode_batch(["こんにちは", "世界"]) -
decode_batch(tokens: onnxruntime_genai.Tensor) -> list[str]texts = tokenizer.decode_batch(batch_tensor) -
create_stream() -> TokenizerStreamstream = tokenizer.create_stream()
TokenizerStream クラス
Section titled “TokenizerStream クラス”onnxruntime_genai.TokenizerStream(tokenizer: Tokenizer) -> TokenizerStream-
decode(token: int32) -> strtoken_str = stream.decode(123)
NamedTensors クラス
Section titled “NamedTensors クラス”onnxruntime_genai.NamedTensors() -> NamedTensors-
__getitem__(name: str) -> onnxruntime_genai.Tensortensor = named_tensors["input_ids"] -
__setitem__(name: str, value: numpy.ndarray or onnxruntime_genai.Tensor)named_tensors["input_ids"] = np.array([1, 2, 3], dtype=np.int32) -
__contains__(name: str) -> boolexists = "input_ids" in named_tensors -
__delitem__(name: str)del named_tensors["input_ids"] -
__len__() -> intlength = len(named_tensors) -
keys() -> list[str]keys = named_tensors.keys()
Tensor クラス
Section titled “Tensor クラス”onnxruntime_genai.Tensor(array: numpy.ndarray) -> Tensor-
shape() -> list[int]tensor = onnxruntime_genai.Tensor(np.array([1, 2, 3]))print(tensor.shape()) -
type() -> intprint(tensor.type()) -
data() -> memoryviewdata = tensor.data() -
as_numpy() -> numpy.ndarrayarr = tensor.as_numpy()
Adapters クラス
Section titled “Adapters クラス”onnxruntime_genai.Adapters(model: Model) -> Adapters-
unload(adapter_name: str)adapters.unload("adapter_name") -
load(file: str, name: str)adapters.load("adapter_file.bin", "adapter_name")
MultiModalProcessor クラス
Section titled “MultiModalProcessor クラス”onnxruntime_genai.MultiModalProcessor(model: Model) -> MultiModalProcessor-
__call__(prompt: str = None, images: Images = None, audios: Audios = None) -> onnxruntime_genai.Tensorresult = processor(prompt="この画像を説明してください", images=onnxruntime_genai.Images.open("image.png")) -
create_stream() -> TokenizerStreamstream = processor.create_stream() -
decode(tokens: numpy.ndarray[int32]) -> strtext = processor.decode(tokens)
Images クラス
Section titled “Images クラス”onnxruntime_genai.Images.open(*image_paths: str) -> Imagesonnxruntime_genai.Images.open_bytes(*image_datas: bytes) -> Imagesimages = onnxruntime_genai.Images.open("image1.png", "image2.jpg")with open("image1.png", "rb") as f: images_bytes = onnxruntime_genai.Images.open_bytes(f.read())Audios クラス
Section titled “Audios クラス”onnxruntime_genai.Audios.open(*audio_paths: str) -> Audiosonnxruntime_genai.Audios.open_bytes(*audio_datas: bytes) -> Audiosaudios = onnxruntime_genai.Audios.open("audio1.wav")with open("audio1.wav", "rb") as f: audios_bytes = onnxruntime_genai.Audios.open_bytes(f.read())ユーティリティ関数
Section titled “ユーティリティ関数”-
onnxruntime_genai.set_log_options(**options)onnxruntime_genai.set_log_options(verbose=True) -
onnxruntime_genai.is_cuda_available() -> boolprint(onnxruntime_genai.is_cuda_available()) -
onnxruntime_genai.is_dml_available() -> boolprint(onnxruntime_genai.is_dml_available()) -
onnxruntime_genai.is_rocm_available() -> boolprint(onnxruntime_genai.is_rocm_available()) -
onnxruntime_genai.is_webgpu_available() -> boolprint(onnxruntime_genai.is_webgpu_available()) -
onnxruntime_genai.is_qnn_available() -> boolprint(onnxruntime_genai.is_qnn_available()) -
onnxruntime_genai.is_openvino_available() -> boolprint(onnxruntime_genai.is_openvino_available()) -
onnxruntime_genai.set_current_gpu_device_id(device_id: int)onnxruntime_genai.set_current_gpu_device_id(0) -
onnxruntime_genai.get_current_gpu_device_id() -> intprint(onnxruntime_genai.get_current_gpu_device_id())