boxabirds · April 23, 2026 03:18
diff --git a/qwen3.6-35b-dfast-mac.sh b/qwen3.6-35b-dfast-mac.sh
 #!/bin/bash
 # ================================================
 # Qwen3.6-35B-A3B + DFlash (24 GB MacBook edition)
 # One-time setup → forever simple commands
 # ================================================

 set -e

 echo "🚀 Setting up Qwen3.6-35B-A3B + DFlash drafter for your 24 GB MacBook..."

 # 1. Create isolated Python environment
 VENV="$HOME/.qwen-dflash-venv"
 if [ ! -d "$VENV" ]; then
  echo "🐍 Creating Python venv..."
  python3 -m venv "$VENV"
 fi

 echo "📦 Installing dflash-mlx (the Apple Silicon DFlash port)..."
 source "$VENV/bin/activate"
 pip install --upgrade pip
 pip install dflash-mlx mlx-lm huggingface_hub

 # 2. Pre-download the exact models that fit in 24 GB
 echo "📥 Pre-downloading 4-bit target model + DFlash drafter (~22–23 GB total)..."
 python -c '
 from huggingface_hub import snapshot_download
 print("→ Target model (mlx-community 4-bit)...")
 snapshot_download("mlx-community/Qwen3.6-35B-A3B-4bit", allow_patterns=["*.safetensors", "config.json", "*.json"])
 print("→ DFlash drafter (z-lab)...")
 snapshot_download("z-lab/Qwen3.6-35B-A3B-DFlash", allow_patterns=["*.safetensors", "config.json", "*.json"])
 print("✅ Models ready!")
 '

 # 3. Create permanent easy commands (aliases)
 echo "🔧 Adding qwen-chat and qwen-server commands..."
 cat >> ~/.zshrc << 'EOF'

 # === Qwen3.6-35B-A3B + DFlash (24 GB MacBook) ===
 alias qwen-chat='source ~/.qwen-dflash-venv/bin/activate && dflash --model mlx-community/Qwen3.6-35B-A3B-4bit --chat'
 alias qwen-server='source ~/.qwen-dflash-venv/bin/activate && dflash-serve --model mlx-community/Qwen3.6-35B-A3B-4bit --port 8000'
 EOF

 # Reload shell
 source ~/.zshrc

 echo ""
 echo "🎉 SETUP COMPLETE!"
 echo ""
 echo "=== How your colleague uses it ==="
 echo ""
 echo "1. Fast interactive chat (recommended for coding):"
 echo "   qwen-chat"
 echo ""
 echo "2. OpenAI-compatible server (for Cursor, VS Code, Windsurf, etc.):"
 echo "   qwen-server"
 echo "   → Then point your IDE to http://localhost:8000/v1 (any API key or blank)"
 echo ""
 echo "💡 24 GB MacBook tips (very important):"
 echo "   • Close ALL browsers and heavy apps before running"
 echo "   • Start with 4k–8k context (type /context 4096 in chat if needed)"
 echo "   • Expected speed with DFlash: 130–200+ tokens/sec"
 echo "   • First run may take 30–60 seconds while it warms up"
 echo ""
 echo "Just type qwen-chat and you’re coding with a frontier model at 2× speed. ⚡"
 echo "Run this script again anytime to update."
	#!/bin/bash
	# ================================================
	# Qwen3.6-35B-A3B + DFlash (24 GB MacBook edition)
	# One-time setup → forever simple commands
	# ================================================

	set -e

	echo "🚀 Setting up Qwen3.6-35B-A3B + DFlash drafter for your 24 GB MacBook..."

	# 1. Create isolated Python environment
	VENV="$HOME/.qwen-dflash-venv"
	if [ ! -d "$VENV" ]; then
	echo "🐍 Creating Python venv..."
	python3 -m venv "$VENV"
	fi

	echo "📦 Installing dflash-mlx (the Apple Silicon DFlash port)..."
	source "$VENV/bin/activate"
	pip install --upgrade pip
	pip install dflash-mlx mlx-lm huggingface_hub

	# 2. Pre-download the exact models that fit in 24 GB
	echo "📥 Pre-downloading 4-bit target model + DFlash drafter (~22–23 GB total)..."
	python -c '
	from huggingface_hub import snapshot_download
	print("→ Target model (mlx-community 4-bit)...")
	snapshot_download("mlx-community/Qwen3.6-35B-A3B-4bit", allow_patterns=[".safetensors", "config.json", ".json"])
	print("→ DFlash drafter (z-lab)...")
	snapshot_download("z-lab/Qwen3.6-35B-A3B-DFlash", allow_patterns=[".safetensors", "config.json", ".json"])
	print("✅ Models ready!")
	'

	# 3. Create permanent easy commands (aliases)
	echo "🔧 Adding qwen-chat and qwen-server commands..."
	cat >> ~/.zshrc << 'EOF'

	# === Qwen3.6-35B-A3B + DFlash (24 GB MacBook) ===
	alias qwen-chat='source ~/.qwen-dflash-venv/bin/activate && dflash --model mlx-community/Qwen3.6-35B-A3B-4bit --chat'
	alias qwen-server='source ~/.qwen-dflash-venv/bin/activate && dflash-serve --model mlx-community/Qwen3.6-35B-A3B-4bit --port 8000'
	EOF

	# Reload shell
	source ~/.zshrc

	echo ""
	echo "🎉 SETUP COMPLETE!"
	echo ""
	echo "=== How your colleague uses it ==="
	echo ""
	echo "1. Fast interactive chat (recommended for coding):"
	echo " qwen-chat"
	echo ""
	echo "2. OpenAI-compatible server (for Cursor, VS Code, Windsurf, etc.):"
	echo " qwen-server"
	echo " → Then point your IDE to http://localhost:8000/v1 (any API key or blank)"
	echo ""
	echo "💡 24 GB MacBook tips (very important):"
	echo " • Close ALL browsers and heavy apps before running"
	echo " • Start with 4k–8k context (type /context 4096 in chat if needed)"
	echo " • Expected speed with DFlash: 130–200+ tokens/sec"
	echo " • First run may take 30–60 seconds while it warms up"
	echo ""
	echo "Just type qwen-chat and you’re coding with a frontier model at 2× speed. ⚡"
	echo "Run this script again anytime to update."
No results found