Example: Basic chat app
This example builds a minimal iOS or macOS chat app that loads a local text model and streams multi-turn replies.
Prerequisites
- Edge Kit added to your Xcode project with Swift Package Manager.
- A local text model directory on the device or simulator.
- iOS 17 or macOS 14 or later.
For the baseline developer path, use Qwen3.5 9B 4bit. For lower-latency iteration on constrained devices, choose a smaller supported model and keep the same app wiring.
Complete code
Create a new SwiftUI app target, add Edge Kit, and replace the app code with the following:
import EdgeInference
import SwiftUI
@main
struct BasicChatExampleApp: App {
var body: some Scene {
WindowGroup {
ContentView()
}
}
}
struct ContentView: View {
@StateObject private var model = ChatViewModel()
var body: some View {
VStack(spacing: 12) {
HStack {
TextField("Model directory", text: $model.modelPath)
.textFieldStyle(.roundedBorder)
Button(model.isLoaded ? "Loaded" : "Load") {
Task { await model.loadModel() }
}
.disabled(model.isLoading)
}
ScrollViewReader { proxy in
ScrollView {
LazyVStack(alignment: .leading, spacing: 12) {
ForEach(model.turns) { turn in
MessageBubble(turn: turn)
.id(turn.id)
}
}
.frame(maxWidth: .infinity, alignment: .leading)
.padding(.vertical, 8)
}
.onChange(of: model.turns.last?.text) {
if let id = model.turns.last?.id {
proxy.scrollTo(id, anchor: .bottom)
}
}
}
HStack(alignment: .bottom) {
TextField("Message", text: $model.input, axis: .vertical)
.textFieldStyle(.roundedBorder)
.lineLimit(1...4)
Button("Send") {
Task { await model.send() }
}
.disabled(!model.canSend)
}
HStack {
Text(model.status)
.font(.caption)
.foregroundStyle(.secondary)
Spacer()
Button("Clear") {
model.clear()
}
.disabled(model.isGenerating)
}
}
.padding()
}
}
struct MessageBubble: View {
let turn: ChatTurn
var body: some View {
VStack(alignment: .leading, spacing: 4) {
Text(turn.role == .user ? "You" : "Assistant")
.font(.caption)
.foregroundStyle(.secondary)
Text(turn.text.isEmpty ? "..." : turn.text)
.textSelection(.enabled)
}
.frame(maxWidth: .infinity, alignment: .leading)
.padding(10)
.background(turn.role == .user ? Color.blue.opacity(0.12) : Color.gray.opacity(0.12))
.clipShape(RoundedRectangle(cornerRadius: 8))
}
}
struct ChatTurn: Identifiable, Equatable {
enum Role {
case user
case assistant
}
let id: UUID
let role: Role
var text: String
init(id: UUID = UUID(), role: Role, text: String) {
self.id = id
self.role = role
self.text = text
}
}
@MainActor
final class ChatViewModel: ObservableObject {
@Published var modelPath = "\(NSHomeDirectory())/Models/Qwen3.5-9B-4bit"
@Published var input = ""
@Published var turns: [ChatTurn] = []
@Published var status = "Load a model to begin."
@Published var isLoading = false
@Published var isGenerating = false
@Published var isLoaded = false
private let engine = LLMEngine()
private var history: [ChatMessage] = [
.system("You are a concise, helpful assistant.")
]
var canSend: Bool {
!input.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty
&& isLoaded
&& !isGenerating
}
func loadModel() async {
guard !isLoading else { return }
isLoading = true
status = "Loading model..."
defer { isLoading = false }
do {
let url = URL(fileURLWithPath: modelPath)
try await engine.loadLocal(directory: url)
isLoaded = true
status = "Ready"
} catch {
isLoaded = false
status = "Load failed: \(error.localizedDescription)"
}
}
func send() async {
let text = input.trimmingCharacters(in: .whitespacesAndNewlines)
guard !text.isEmpty, isLoaded, !isGenerating else { return }
input = ""
turns.append(ChatTurn(role: .user, text: text))
let assistantID = UUID()
turns.append(ChatTurn(id: assistantID, role: .assistant, text: ""))
let request = history + [.user(text)]
var response = ""
isGenerating = true
status = "Generating..."
defer { isGenerating = false }
do {
let parameters = EdgeGenerateParameters(maxTokens: 512)
for try await chunk in engine.generate(
messages: request,
parameters: parameters
) {
response += chunk.text
updateAssistant(id: assistantID, text: response)
}
history = request + [.assistant(response)]
status = "Ready"
} catch {
updateAssistant(id: assistantID, text: "Error: \(error.localizedDescription)")
status = "Generation failed"
}
}
func clear() {
engine.clearPromptCache()
history = [.system("You are a concise, helpful assistant.")]
turns.removeAll()
status = isLoaded ? "Ready" : "Load a model to begin."
}
private func updateAssistant(id: UUID, text: String) {
guard let index = turns.firstIndex(where: { $0.id == id }) else { return }
turns[index].text = text
}
}
Note
If your model lives inside the app bundle, replace the editable modelPath
with Bundle.main.url(forResource:withExtension:).
Key concepts
LLMEngineis@MainActor; keep UI state and engine calls on the main actor.generate(messages:)returns an async sequence ofGenerateChunkvalues.- Keep a separate
[ChatMessage]history for the model and a UI-specificChatTurnarray for rendering. - Reuse one engine for a conversation so prompt cache reuse can work.
- Call
clearPromptCache()when the user starts a new conversation.
Next steps
- Add image input with Vision chat.
- See the EdgeInference API reference.