コンテンツにスキップ

C++ API

Note: this API is in preview and is subject to change.

This document describes the C++ API for ONNX Runtime GenAI.
Below are the main classes and methods, with code snippets and descriptions for each.


Creates a model from a configuration directory, with optional runtime settings or config object.

auto model = OgaModel::Create("path/to/model_dir");
auto model2 = OgaModel::Create("path/to/model_dir", *settings);
auto model3 = OgaModel::Create(*config);

Gets the type of the model.

auto type = model->GetType();

Gets the device type used by the model.

auto device_type = model->GetDeviceType();

Creates a configuration object from a config path.

auto config = OgaConfig::Create("path/to/model_dir");

Clears all providers from the configuration.

config->ClearProviders();

Appends a provider to the configuration.

config->AppendProvider("CUDAExecutionProvider");

Sets a provider option in the configuration.

config->SetProviderOption("CUDAExecutionProvider", "device_id", "0");

Overlays a JSON string onto the configuration.

config->Overlay("{\"option\": \"value\"}");

Creates a runtime settings object.

auto settings = OgaRuntimeSettings::Create();

Sets a named handle in the runtime settings.

settings->SetHandle("custom_handle", handle_ptr);

Creates a tokenizer for the given model.

auto tokenizer = OgaTokenizer::Create(*model);

Encodes a string and adds the encoded sequence of tokens to the provided OgaSequences.

auto sequences = OgaSequences::Create();
tokenizer->Encode("Hello world", *sequences);

Encodes a batch of strings.

const char* texts[] = {"Hello", "World"};
auto tensor = tokenizer->EncodeBatch(texts, 2);

Converts a string to its corresponding token ID.

int32_t token_id = tokenizer->ToTokenId("Hello");

Decodes a sequence of tokens into a string.

auto str = tokenizer->Decode(tokens, token_count);

Applies a chat template to messages and tools.

auto templated = tokenizer->ApplyChatTemplate("template", "messages", "tools", true);

Decodes a batch of token sequences.

auto decoded = tokenizer->DecodeBatch(*tensor);

Creates a tokenizer stream for incremental decoding.

auto stream = OgaTokenizerStream::Create(*tokenizer);

Decodes a single token in the stream. If this results in a word being generated, it will be returned.

const char* chunk = stream->Decode(token);

Creates an empty OgaSequences object.

auto sequences = OgaSequences::Create();

Returns the number of sequences.

size_t n = sequences->Count();

Returns the number of tokens in the sequence at the given index.

size_t tokens = sequences->SequenceCount(0);

Returns a pointer to the token data for the sequence at the given index.

const int32_t* data = sequences->SequenceData(0);

Appends a sequence of tokens or a single token to the sequences.

sequences->Append(tokens, token_count);
sequences->Append(token, sequence_index);

Creates generator parameters for the given model.

auto params = OgaGeneratorParams::Create(*model);

Sets a numeric search option.

params->SetSearchOption("max_length", 128);

Sets a boolean search option.

params->SetSearchOptionBool("do_sample", true);

Sets an additional model input.

params->SetModelInput("input_name", *tensor);

Sets named tensors as inputs.

params->SetInputs(*named_tensors);

Sets guidance data.

params->SetGuidance("type", "data");

Creates a generator from the given model and parameters.

auto generator = OgaGenerator::Create(*model, *params);

Checks if generation is complete.

bool done = generator->IsDone();

Appends token sequences to the generator.

generator->AppendTokenSequences(*sequences);

Appends tokens to the generator.

generator->AppendTokens(tokens, token_count);

Checks if the session is terminated.

bool terminated = generator->IsSessionTerminated();

Generates the next token.

generator->GenerateNextToken();

Rewinds the sequence to a new length.

generator->RewindTo(new_length);

Sets a runtime option.

generator->SetRuntimeOption("terminate_session", "1");

Returns the number of tokens in the sequence at the given index.

size_t count = generator->GetSequenceCount(0);

Returns a pointer to the sequence data at the given index.

const int32_t* data = generator->GetSequenceData(0);

Gets a named output tensor.

auto tensor = generator->GetOutput("output_name");

Gets the logits tensor.

auto logits = generator->GetLogits();

Sets the logits tensor.

generator->SetLogits(*tensor);

Sets the active adapter for the generator.

generator->SetActiveAdapter(*adapters, "adapter_name");

Creates a tensor from a buffer.

auto tensor = OgaTensor::Create(data, shape, shape_dims_count, element_type);

Returns the element type of the tensor.

auto type = tensor->Type();

Returns the shape of the tensor.

auto shape = tensor->Shape();

Returns a pointer to the tensor data.

void* data = tensor->Data();

Loads images from file paths or memory buffers.

std::vector<const char*> image_paths = {"img1.png", "img2.png"};
auto images = OgaImages::Load(image_paths);
auto images2 = OgaImages::Load(image_data_ptrs, image_sizes, count);

Loads audios from file paths or memory buffers.

std::vector<const char*> audio_paths = {"audio1.wav", "audio2.wav"};
auto audios = OgaAudios::Load(audio_paths);
auto audios2 = OgaAudios::Load(audio_data_ptrs, audio_sizes, count);

Creates a named tensors object.

auto named_tensors = OgaNamedTensors::Create();

Gets a tensor by name.

auto tensor = named_tensors->Get("input_name");

Sets a tensor by name.

named_tensors->Set("input_name", *tensor);

Deletes a tensor by name.

named_tensors->Delete("input_name");

Returns the number of named tensors.

size_t count = named_tensors->Count();

Gets the names of all tensors.

auto names = named_tensors->GetNames();

Creates an adapters manager for the given model.

auto adapters = OgaAdapters::Create(*model);

Loads an adapter from file.

adapters->LoadAdapter("adapter_file_path", "adapter_name");

Unloads an adapter by name.

adapters->UnloadAdapter("adapter_name");

Creates a multi-modal processor for the given model.

auto processor = OgaMultiModalProcessor::Create(*model);

Processes images and returns named tensors.

auto named_tensors = processor->ProcessImages("prompt", images.get());

Processes audios and returns named tensors.

auto named_tensors = processor->ProcessAudios(audios.get());

Processes both images and audios.

auto named_tensors = processor->ProcessImagesAndAudios("prompt", images.get(), audios.get());

Decodes a sequence of tokens into a string.

auto str = processor->Decode(tokens, token_count);

Initializes and shuts down the global Oga runtime.

OgaHandle handle;

Sets a boolean logging option.

Oga::SetLogBool("option_name", true);

Sets a string logging option.

Oga::SetLogString("option_name", "value");

Sets the current GPU device ID.

Oga::SetCurrentGpuDeviceId(0);

Gets the current GPU device ID.

int id = Oga::GetCurrentGpuDeviceId();