Spaces:
Sleeping
Sleeping
| # Start llama.cpp server in background | |
| /app/llama.cpp/build/bin/llama-server \ | |
| -m /app/models/${ORPHEUS_MODEL_NAME} \ | |
| --host 0.0.0.0 \ | |
| --port 5006 \ | |
| --ctx-size ${ORPHEUS_MAX_TOKENS:-512} \ | |
| --n-predict ${ORPHEUS_MAX_TOKENS:-512} \ | |
| --threads ${LLAMA_CPU_THREADS:-6} \ | |
| --threads-batch ${LLAMA_CPU_THREADS:-6} \ | |
| --rope-scaling linear \ | |
| --no-mmap \ | |
| --no-slots & | |
| # Wait briefly to allow server to start | |
| sleep 5 | |
| # Start FastAPI server with logging enabled | |
| uvicorn orpheus.app:app \ | |
| --host 0.0.0.0 \ | |
| --port 5005 \ | |
| --log-level info | |