Skip to content

Facade Crate

The pmetal crate is the Rust facade for PMetal. It re-exports the core sub-crates behind feature flags so applications can depend on one crate while still using the lower-level APIs directly.

Inference

use pmetal::data::Tokenizer;
use pmetal::data::chat_templates::{detect_chat_template, Message};
use pmetal::hub::resolve_model_path;
use pmetal::models::{generate_cached_async, DynamicModel, GenerationConfig};

let model_id = "Qwen/Qwen3-0.6B";
let model_path = resolve_model_path(model_id, None, None).await?;

let tokenizer = Tokenizer::from_model_dir(&model_path)?;
let template = detect_chat_template(&model_path, model_id);
let formatted = template.apply(&[Message::user("What is 2+2?")]).text;
let input_ids = tokenizer.encode_with_special_tokens(&formatted)?;

let mut model = DynamicModel::load(&model_path)?;
let mut cache = model.create_cache(input_ids.len() + 256);
let mut mamba_cache = model.create_mamba_cache();

let output = generate_cached_async(
    |input, cache| {
        model.forward_with_hybrid_cache(input, None, Some(cache), mamba_cache.as_mut())
    },
    &input_ids,
    GenerationConfig::sampling(256, 0.7),
    &mut cache,
)?;

let generated = &output.token_ids[input_ids.len()..];
let text = tokenizer.decode(generated)?;
println!("{text}");

Manual LoRA Fine-Tuning

use pmetal::prelude::*;
use std::path::PathBuf;

let model_path = PathBuf::from("./Qwen3-0.6B");
let dataset_path = "train.jsonl";
let output_dir = PathBuf::from("./output");

let tokenizer = Tokenizer::from_model_dir(&model_path)?;
let chat_template = pmetal::data::chat_templates::detect_chat_template(
    &model_path,
    &model_path.to_string_lossy(),
);
let train_dataset = TrainingDataset::from_jsonl_tokenized(
    dataset_path,
    &tokenizer,
    DatasetFormat::Auto,
    2048,
    Some(&chat_template),
    None,
)?;

let lora_config = pmetal::core::LoraConfig {
    r: 16,
    alpha: 32.0,
    ..Default::default()
};
let model = DynamicLoraModel::from_pretrained(&model_path, lora_config)?;

let loop_config = TrainingLoopConfig {
    training: pmetal::core::TrainingConfig {
        learning_rate: 2e-4,
        batch_size: 1,
        num_epochs: 3,
        max_seq_len: 2048,
        output_dir: output_dir.to_string_lossy().to_string(),
        ..Default::default()
    },
    dataloader: DataLoaderConfig {
        batch_size: 1,
        max_seq_len: 2048,
        shuffle: true,
        seed: 42,
        pad_token_id: tokenizer.pad_token_id().unwrap_or(0),
        drop_last: false,
        ..Default::default()
    },
    use_sequence_packing: true,
    ..Default::default()
};

let mut training_loop = TrainingLoop::new(loop_config);
let model = training_loop.run_packed(model, train_dataset, None, None)?;
let weights_path = output_dir.join("lora_weights.safetensors");
model.save_lora_weights(&weights_path)?;

Re-Exported Modules

Module	Source crate
`pmetal::core`	`pmetal-core`
`pmetal::data`	`pmetal-data`
`pmetal::distributed`	`pmetal-distributed`
`pmetal::distill`	`pmetal-distill`
`pmetal::gguf`	`pmetal-gguf`
`pmetal::hub`	`pmetal-hub`
`pmetal::lora`	`pmetal-lora`
`pmetal::merge`	`pmetal-merge`
`pmetal::metal`	`pmetal-metal`
`pmetal::mhc`	`pmetal-mhc`
`pmetal::mlx`	`pmetal-mlx`
`pmetal::models`	`pmetal-models`
`pmetal::trainer`	`pmetal-trainer`
`pmetal::vocoder`	`pmetal-vocoder`

See Also

Advanced SDK Usage — Lower-level crate APIs
Python SDK — Python equivalent