Created
April 23, 2026 03:18
-
-
Save boxabirds/316e954fdfd39ca067494aff4a206076 to your computer and use it in GitHub Desktop.
24GB Mac local AI coding config
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # ================================================ | |
| # Qwen3.6-35B-A3B + DFlash (24 GB MacBook edition) | |
| # One-time setup β forever simple commands | |
| # ================================================ | |
| set -e | |
| echo "π Setting up Qwen3.6-35B-A3B + DFlash drafter for your 24 GB MacBook..." | |
| # 1. Create isolated Python environment | |
| VENV="$HOME/.qwen-dflash-venv" | |
| if [ ! -d "$VENV" ]; then | |
| echo "π Creating Python venv..." | |
| python3 -m venv "$VENV" | |
| fi | |
| echo "π¦ Installing dflash-mlx (the Apple Silicon DFlash port)..." | |
| source "$VENV/bin/activate" | |
| pip install --upgrade pip | |
| pip install dflash-mlx mlx-lm huggingface_hub | |
| # 2. Pre-download the exact models that fit in 24 GB | |
| echo "π₯ Pre-downloading 4-bit target model + DFlash drafter (~22β23 GB total)..." | |
| python -c ' | |
| from huggingface_hub import snapshot_download | |
| print("β Target model (mlx-community 4-bit)...") | |
| snapshot_download("mlx-community/Qwen3.6-35B-A3B-4bit", allow_patterns=["*.safetensors", "config.json", "*.json"]) | |
| print("β DFlash drafter (z-lab)...") | |
| snapshot_download("z-lab/Qwen3.6-35B-A3B-DFlash", allow_patterns=["*.safetensors", "config.json", "*.json"]) | |
| print("β Models ready!") | |
| ' | |
| # 3. Create permanent easy commands (aliases) | |
| echo "π§ Adding qwen-chat and qwen-server commands..." | |
| cat >> ~/.zshrc << 'EOF' | |
| # === Qwen3.6-35B-A3B + DFlash (24 GB MacBook) === | |
| alias qwen-chat='source ~/.qwen-dflash-venv/bin/activate && dflash --model mlx-community/Qwen3.6-35B-A3B-4bit --chat' | |
| alias qwen-server='source ~/.qwen-dflash-venv/bin/activate && dflash-serve --model mlx-community/Qwen3.6-35B-A3B-4bit --port 8000' | |
| EOF | |
| # Reload shell | |
| source ~/.zshrc | |
| echo "" | |
| echo "π SETUP COMPLETE!" | |
| echo "" | |
| echo "=== How your colleague uses it ===" | |
| echo "" | |
| echo "1. Fast interactive chat (recommended for coding):" | |
| echo " qwen-chat" | |
| echo "" | |
| echo "2. OpenAI-compatible server (for Cursor, VS Code, Windsurf, etc.):" | |
| echo " qwen-server" | |
| echo " β Then point your IDE to http://localhost:8000/v1 (any API key or blank)" | |
| echo "" | |
| echo "π‘ 24 GB MacBook tips (very important):" | |
| echo " β’ Close ALL browsers and heavy apps before running" | |
| echo " β’ Start with 4kβ8k context (type /context 4096 in chat if needed)" | |
| echo " β’ Expected speed with DFlash: 130β200+ tokens/sec" | |
| echo " β’ First run may take 30β60 seconds while it warms up" | |
| echo "" | |
| echo "Just type qwen-chat and youβre coding with a frontier model at 2Γ speed. β‘" | |
| echo "Run this script again anytime to update." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment