Facade Crate
The pmetal crate is the Rust facade for PMetal. It re-exports the core sub-crates behind feature flags so applications can depend on one crate while still using the lower-level APIs directly.
Inference
Section titled “Inference”use pmetal::data::Tokenizer;use pmetal::data::chat_templates::{detect_chat_template, Message};use pmetal::hub::resolve_model_path;use pmetal::models::{generate_cached_async, DynamicModel, GenerationConfig};
let model_id = "Qwen/Qwen3-0.6B";let model_path = resolve_model_path(model_id, None, None).await?;
let tokenizer = Tokenizer::from_model_dir(&model_path)?;let template = detect_chat_template(&model_path, model_id);let formatted = template.apply(&[Message::user("What is 2+2?")]).text;let input_ids = tokenizer.encode_with_special_tokens(&formatted)?;
let mut model = DynamicModel::load(&model_path)?;let mut cache = model.create_cache(input_ids.len() + 256);let mut mamba_cache = model.create_mamba_cache();
let output = generate_cached_async( |input, cache| { model.forward_with_hybrid_cache(input, None, Some(cache), mamba_cache.as_mut()) }, &input_ids, GenerationConfig::sampling(256, 0.7), &mut cache,)?;
let generated = &output.token_ids[input_ids.len()..];let text = tokenizer.decode(generated)?;println!("{text}");Manual LoRA Fine-Tuning
Section titled “Manual LoRA Fine-Tuning”use pmetal::prelude::*;use std::path::PathBuf;
let model_path = PathBuf::from("./Qwen3-0.6B");let dataset_path = "train.jsonl";let output_dir = PathBuf::from("./output");
let tokenizer = Tokenizer::from_model_dir(&model_path)?;let chat_template = pmetal::data::chat_templates::detect_chat_template( &model_path, &model_path.to_string_lossy(),);let train_dataset = TrainingDataset::from_jsonl_tokenized( dataset_path, &tokenizer, DatasetFormat::Auto, 2048, Some(&chat_template), None,)?;
let lora_config = pmetal::core::LoraConfig { r: 16, alpha: 32.0, ..Default::default()};let model = DynamicLoraModel::from_pretrained(&model_path, lora_config)?;
let loop_config = TrainingLoopConfig { training: pmetal::core::TrainingConfig { learning_rate: 2e-4, batch_size: 1, num_epochs: 3, max_seq_len: 2048, output_dir: output_dir.to_string_lossy().to_string(), ..Default::default() }, dataloader: DataLoaderConfig { batch_size: 1, max_seq_len: 2048, shuffle: true, seed: 42, pad_token_id: tokenizer.pad_token_id().unwrap_or(0), drop_last: false, ..Default::default() }, use_sequence_packing: true, ..Default::default()};
let mut training_loop = TrainingLoop::new(loop_config);let model = training_loop.run_packed(model, train_dataset, None, None)?;let weights_path = output_dir.join("lora_weights.safetensors");model.save_lora_weights(&weights_path)?;Re-Exported Modules
Section titled “Re-Exported Modules”| Module | Source crate |
|---|---|
pmetal::core | pmetal-core |
pmetal::data | pmetal-data |
pmetal::distributed | pmetal-distributed |
pmetal::distill | pmetal-distill |
pmetal::gguf | pmetal-gguf |
pmetal::hub | pmetal-hub |
pmetal::lora | pmetal-lora |
pmetal::merge | pmetal-merge |
pmetal::metal | pmetal-metal |
pmetal::mhc | pmetal-mhc |
pmetal::mlx | pmetal-mlx |
pmetal::models | pmetal-models |
pmetal::trainer | pmetal-trainer |
pmetal::vocoder | pmetal-vocoder |
See Also
Section titled “See Also”- Advanced SDK Usage — Lower-level crate APIs
- Python SDK — Python equivalent