Spaces:

prithivMLmods
/

TRELLIS.2-Text-to-3D

Running on Zero

App Files Files Community

prithivMLmods commited on 5 days ago

Commit

de9769e

verified ·

1 Parent(s): 54a0ae6

Update app.py

Browse files

Files changed (1) hide show

app.py +400 -277

app.py CHANGED Viewed

@@ -2,91 +2,179 @@ import os
 import io
 import cv2
 import time
 import torch
 import shutil
 import base64
 import tempfile
 import numpy as np
 import gradio as gr
 from PIL import Image
 from typing import *
 from datetime import datetime
-# --- Environment Configuration ---
 os.environ["OPENCV_IO_ENABLE_OPENEXR"] = '1'
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
-os.environ["ATTN_BACKEND"] = "flash_attn_3" # Ensure you have flash-attn installed, or set to 'xformers'/'flash_attn'
 os.environ["FLEX_GEMM_AUTOTUNE_CACHE_PATH"] = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'autotune_cache.json')
 os.environ["FLEX_GEMM_AUTOTUNER_VERBOSE"] = '1'
-# --- Hugging Face Spaces / GPU Setup ---
-import spaces
-from diffusers import DiffusionPipeline
-# --- TRELLIS Imports ---
-# (Assumes running from root of TRELLIS repo)
 from trellis2.modules.sparse import SparseTensor
 from trellis2.pipelines import Trellis2ImageTo3DPipeline
 from trellis2.renderers import EnvMap
 from trellis2.utils import render_utils
 import o_voxel
-# --- Background Removal ---
-# We use rembg locally for stability instead of an API call
-try:
-    from rembg import remove
-except ImportError:
-    print("Please install rembg: pip install rembg")
-# =========================================
-# MODEL LOADING
-# =========================================
-print(">>> Loading Z-Image-Turbo Pipeline...")
-z_image_pipe = DiffusionPipeline.from_pretrained(
     "Tongyi-MAI/Z-Image-Turbo",
     torch_dtype=torch.bfloat16,
     low_cpu_mem_usage=False,
 )
-z_image_pipe.to("cuda")
-print(">>> Z-Image-Turbo Loaded!")
-print(">>> Loading TRELLIS.2 Pipeline...")
-trellis_pipeline = Trellis2ImageTo3DPipeline.from_pretrained(
-    "microsoft/TRELLIS.2-4B",
-    torch_dtype=torch.float16
-)
-trellis_pipeline.cuda()
-# Load EnvMap for rendering previews
-try:
-    envmap = EnvMap.from_file("assets/app/envmap.exr")
-except:
-    print("Warning: envmap.exr not found in assets/app/. Rendering might look flat.")
-    envmap = None
-print(">>> TRELLIS.2 Loaded!")
-# =========================================
-# CONSTANTS & UTILS
-# =========================================
-MAX_SEED = np.iinfo(np.int32).max
-TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
-os.makedirs(TMP_DIR, exist_ok=True)
-# Pre-load Icons for HTML Previewer
-MODES = [
-    {"name": "Normal", "icon_path": "assets/app/normal.png", "render_key": "normal"},
-    {"name": "Clay render", "icon_path": "assets/app/clay.png", "render_key": "clay"},
-    {"name": "Base color", "icon_path": "assets/app/basecolor.png", "render_key": "base_color"},
-    {"name": "HDRI forest", "icon_path": "assets/app/hdri_forest.png", "render_key": "shaded_forest"},
-    {"name": "HDRI sunset", "icon_path": "assets/app/hdri_sunset.png", "render_key": "shaded_sunset"},
-    {"name": "HDRI courtyard", "icon_path": "assets/app/hdri_courtyard.png", "render_key": "shaded_courtyard"},
-]
-STEPS = 8
-DEFAULT_MODE = 3
-DEFAULT_STEP = 3
 def image_to_base64(image):
     buffered = io.BytesIO()
@@ -95,48 +183,59 @@ def image_to_base64(image):
     img_str = base64.b64encode(buffered.getvalue()).decode()
     return f"data:image/jpeg;base64,{img_str}"
-# Load icons into memory as base64 to avoid path issues in HTML
-for mode in MODES:
-    if os.path.exists(mode['icon_path']):
-        with open(mode['icon_path'], "rb") as f:
-            mode['icon_base64'] = f"data:image/png;base64,{base64.b64encode(f.read()).decode()}"
-    else:
-        # Fallback empty image if asset missing
-        mode['icon_base64'] = ""
 def preprocess_image(input_img: Image.Image) -> Image.Image:
-    """Preprocess: Resize, Remove Background, Center Crop."""
-    # 1. Resize if too large
     max_size = max(input_img.size)
     scale = min(1, 1024 / max_size)
     if scale < 1:
         input_img = input_img.resize((int(input_img.width * scale), int(input_img.height * scale)), Image.Resampling.LANCZOS)
-    # 2. Remove Background (if no alpha)
-    if input_img.mode != 'RGBA':
-        input_img = remove(input_img)
     else:
-        # Check if alpha is fully opaque
-        alpha = np.array(input_img)[:, :, 3]
-        if np.all(alpha == 255):
-            input_img = remove(input_img)
-    # 3. Crop to content
-    output_np = np.array(input_img)
     alpha = output_np[:, :, 3]
     bbox = np.argwhere(alpha > 0.8 * 255)
-    if len(bbox) == 0: return input_img # Empty image
     bbox = np.min(bbox[:, 1]), np.min(bbox[:, 0]), np.max(bbox[:, 1]), np.max(bbox[:, 0])
     center = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2
     size = max(bbox[2] - bbox[0], bbox[3] - bbox[1])
-    size = int(size * 1.1) # Add some padding
     bbox = center[0] - size // 2, center[1] - size // 2, center[0] + size // 2, center[1] + size // 2
-    output = input_img.crop(bbox)
-    # 4. Composite on white (optional for 3D logic, but TRELLIS likes alpha)
-    # Keeping alpha channel for TRELLIS
     return output
 def pack_state(latents: Tuple[SparseTensor, SparseTensor, int]) -> dict:
@@ -156,69 +255,74 @@ def unpack_state(state: dict) -> Tuple[SparseTensor, SparseTensor, int]:
     tex_slat = shape_slat.replace(torch.from_numpy(state['tex_slat_feats']).cuda())
     return shape_slat, tex_slat, state['res']
-# =========================================
-# GRADIO LOGIC
-# =========================================
-@spaces.GPU(duration=60)
-def generate_z_image(prompt, height, width, steps, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
-    """Step 1: Text to Image"""
     if randomize_seed:
         seed = torch.randint(0, 2**32 - 1, (1,)).item()
     generator = torch.Generator("cuda").manual_seed(int(seed))
-    print(f"Generating image for: {prompt}")
-    image = z_image_pipe(
         prompt=prompt,
         height=int(height),
         width=int(width),
-        num_inference_steps=int(steps),
-        guidance_scale=0.0, # Turbo usually uses 0 or low guidance
         generator=generator,
     ).images[0]
     return image, seed
-@spaces.GPU(duration=180)
-def generate_3d_trellis(
     image: Image.Image,
     seed: int,
-    resolution: str = "1024",
-    # Advanced Params with defaults
-    ss_guidance_strength=7.5, ss_sampling_steps=12,
-    slat_guidance_strength=3.0, slat_sampling_steps=12,
-    req: gr.Request = None,
-    progress=gr.Progress(track_tqdm=True)
-):
-    """Step 2: Image to 3D"""
-    if image is None:
-        raise gr.Error("Please generate or upload an image first.")
-    # Preprocess
-    processed_image = preprocess_image(image)
-    # Run Pipeline
-    # Using simplified params for the UI, mapping to full pipeline args
-    outputs, latents = trellis_pipeline.run(
-        processed_image,
         seed=seed,
-        preprocess_image=False, # We did it manually
         sparse_structure_sampler_params={
             "steps": ss_sampling_steps,
             "guidance_strength": ss_guidance_strength,
-            "guidance_rescale": 0.0, "rescale_t": 0.0,
         },
         shape_slat_sampler_params={
-            "steps": slat_sampling_steps,
-            "guidance_strength": slat_guidance_strength,
-            "guidance_rescale": 0.0, "rescale_t": 0.0,
         },
         tex_slat_sampler_params={
-            "steps": slat_sampling_steps,
-            "guidance_strength": slat_guidance_strength,
-            "guidance_rescale": 0.0, "rescale_t": 0.0,
         },
         pipeline_type={
             "512": "512",
@@ -229,34 +333,59 @@ def generate_3d_trellis(
     )
     mesh = outputs[0]
-    # Simplify for visualization
-    mesh.simplify(16777216)
-    # Render Preview (Spinning view)
-    images_render = render_utils.render_snapshot(mesh, resolution=1024, r=2, fov=36, nviews=STEPS, envmap=envmap)
     state = pack_state(latents)
     torch.cuda.empty_cache()
-    # --- Build HTML ---
     images_html = ""
     for m_idx, mode in enumerate(MODES):
-        key = mode['render_key']
-        if key not in images_render: continue
         for s_idx in range(STEPS):
             unique_id = f"view-m{m_idx}-s{s_idx}"
             is_visible = (m_idx == DEFAULT_MODE and s_idx == DEFAULT_STEP)
             vis_class = "visible" if is_visible else ""
-            img_b64 = image_to_base64(Image.fromarray(images_render[key][s_idx]))
-            images_html += f'<img id="{unique_id}" class="previewer-main-image {vis_class}" src="{img_b64}" loading="eager">'
     btns_html = ""
     for idx, mode in enumerate(MODES):
         active_class = "active" if idx == DEFAULT_MODE else ""
-        btns_html += f'<img src="{mode["icon_base64"]}" class="mode-btn {active_class}" onclick="selectMode({idx})" title="{mode["name"]}">'
     full_html = f"""
     <div class="previewer-container">
         <div class="display-row">{images_html}</div>
         <div class="mode-row" id="btn-group">{btns_html}</div>
         <div class="slider-row">
@@ -264,32 +393,31 @@ def generate_3d_trellis(
         </div>
     </div>
     """
     return state, full_html
-@spaces.GPU(duration=60)
-def extract_glb(state: dict, mesh_simplify: float, texture_size: int, req: gr.Request):
-    """Step 3: Export GLB"""
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    os.makedirs(user_dir, exist_ok=True)
     shape_slat, tex_slat, res = unpack_state(state)
-    mesh = trellis_pipeline.decode_latent(shape_slat, tex_slat, res)[0]
-    # Decimation logic
-    # Approximate face count vs float 0-1
-    target_faces = int(mesh_simplify * 100000) # Simple mapping
     glb = o_voxel.postprocess.to_glb(
         vertices=mesh.vertices,
         faces=mesh.faces,
         attr_volume=mesh.attrs,
         coords=mesh.coords,
-        attr_layout=trellis_pipeline.pbr_attr_layout,
         grid_size=res,
         aabb=[[-0.5, -0.5, -0.5], [0.5, 0.5, 0.5]],
-        decimation_target=target_faces,
-        texture_size=int(texture_size),
         remesh=True,
         remesh_band=1,
         remesh_project=0,
@@ -297,151 +425,146 @@ def extract_glb(state: dict, mesh_simplify: float, texture_size: int, req: gr.Re
     )
     now = datetime.now()
-    timestamp = now.strftime("%Y-%m-%dT%H%M%S")
-    glb_path = os.path.join(user_dir, f'trellis_output_{timestamp}.glb')
     glb.export(glb_path, extension_webp=True)
     torch.cuda.empty_cache()
-    return glb_path
-# =========================================
-# CSS & JS
-# =========================================
-css = """
-.previewer-container {
-    width: 100%; height: 600px; display: flex; flex-direction: column; align-items: center; justify-content: center;
-    background: var(--background-fill-secondary); border-radius: 8px; padding: 20px;
-}
-.display-row { flex-grow: 1; width: 100%; display: flex; justify-content: center; align-items: center; overflow: hidden; }
-.previewer-main-image { max-width: 100%; max-height: 100%; object-fit: contain; display: none; }
-.previewer-main-image.visible { display: block; }
-.mode-row { display: flex; gap: 10px; margin: 10px 0; }
-.mode-btn { width: 30px; height: 30px; border-radius: 50%; cursor: pointer; opacity: 0.6; border: 2px solid transparent; }
-.mode-btn:hover { opacity: 1; transform: scale(1.1); }
-.mode-btn.active { opacity: 1; border-color: var(--color-accent); transform: scale(1.1); }
-.slider-row { width: 80%; }
-input[type=range] { width: 100%; }
-"""
-head_js = """
-<script>
-    function refreshView(mode, step) {
-        const allImgs = document.querySelectorAll('.previewer-main-image');
-        let currentMode = mode;
-        let currentStep = step;
-        // Find current state if args are -1
-        if (currentMode === -1 || currentStep === -1) {
-            for (let img of allImgs) {
-                if (img.classList.contains('visible')) {
-                    const parts = img.id.split('-');
-                    if (currentMode === -1) currentMode = parseInt(parts[1].substring(1));
-                    if (currentStep === -1) currentStep = parseInt(parts[2].substring(1));
-                    break;
-                }
-            }
-        }
-        if (currentMode === -1) currentMode = 3;
-        if (currentStep === -1) currentStep = 3;
-        allImgs.forEach(img => img.classList.remove('visible'));
-        const targetId = `view-m${currentMode}-s${currentStep}`;
-        const target = document.getElementById(targetId);
-        if (target) target.classList.add('visible');
-        const allBtns = document.querySelectorAll('.mode-btn');
-        allBtns.forEach((btn, idx) => {
-            if(idx === currentMode) btn.classList.add('active');
-            else btn.classList.remove('active');
-        });
-    }
-    function selectMode(mode) { refreshView(mode, -1); }
-    function onSliderChange(val) { refreshView(-1, parseInt(val)); }
-</script>
-"""
-# =========================================
-# APP LAYOUT
-# =========================================
-with gr.Blocks(title="Z-Image-Turbo + TRELLIS 2", css=css, head=head_js) as demo:
-    gr.Markdown("# 🧊 Text to 3D with Z-Image-Turbo + TRELLIS.2")
-    # Session state
-    trellis_state = gr.State()
     with gr.Row():
-        # --- LEFT COLUMN: Text to Image ---
-        with gr.Column(scale=1):
-            gr.Markdown("### 1. Generate Image")
-            prompt_input = gr.Textbox(label="Prompt", placeholder="A detailed 3D render of a futuristic robot helmet...")
-            with gr.Accordion("Image Settings", open=False):
                 with gr.Row():
-                    height_in = gr.Number(label="Height", value=1024)
-                    width_in = gr.Number(label="Width", value=1024)
-                steps_in = gr.Slider(label="Steps", minimum=1, maximum=50, value=4, step=1)
-                seed_in = gr.Number(label="Seed", value=0)
-                random_seed = gr.Checkbox(label="Randomize Seed", value=True)
-            gen_img_btn = gr.Button("Generate Image", variant="primary")
-            output_image = gr.Image(label="Generated Image", type="pil", interactive=False)
-        # --- RIGHT COLUMN: Image to 3D ---
-        with gr.Column(scale=2):
-            gr.Markdown("### 2. Generate 3D")
-            with gr.Accordion("TRELLIS Settings", open=False):
-                seed_3d = gr.Number(label="3D Seed", value=0)
-                res_3d = gr.Dropdown(label="Resolution", choices=["512", "1024", "1536"], value="1024")
-            gen_3d_btn = gr.Button("To 3D 🧊", variant="primary")
-            # HTML Previewer
-            html_output = gr.HTML(label="3D Preview", value="<div style='height:600px; display:flex; align-items:center; justify-content:center; color:gray;'>Generate 3D to view preview</div>")
-            gr.Markdown("### 3. Export")
-            with gr.Row():
-                simplify_slider = gr.Slider(label="Mesh Density (Face Count)", minimum=0.1, maximum=2.0, value=0.9)
-                tex_size_drop = gr.Dropdown(label="Texture Size", choices=[1024, 2048, 4096], value=2048)
-                export_btn = gr.Button("Export GLB")
-            glb_output = gr.File(label="Download GLB")
-    # --- Event Wiring ---
-    # 1. Text to Image
     gen_img_btn.click(
         fn=generate_z_image,
-        inputs=[prompt_input, height_in, width_in, steps_in, seed_in, random_seed],
-        outputs=[output_image, seed_in]
     )
-    # 2. Image to 3D
-    gen_3d_btn.click(
-        fn=generate_3d_trellis,
-        inputs=[output_image, seed_3d, res_3d],
-        outputs=[trellis_state, html_output]
     )
-    # 3. Export
-    export_btn.click(
-        fn=extract_glb,
-        inputs=[trellis_state, simplify_slider, tex_size_drop],
-        outputs=[glb_output]
     )
-    def on_load(req: gr.Request):
-        # Setup session dir
-        if req:
-            user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-            os.makedirs(user_dir, exist_ok=True)
-    demo.load(on_load)
 if __name__ == "__main__":
-    demo.queue().launch(show_api=False, share=True)

 import io
 import cv2
 import time
+import math
 import torch
+import shlex
 import shutil
 import base64
+import random
 import tempfile
 import numpy as np
 import gradio as gr
+import spaces
 from PIL import Image
 from typing import *
 from datetime import datetime
+from gradio_client import Client, handle_file
+from diffusers import DiffusionPipeline
+# --- TRELLIS Imports ---
+# Ensure these env vars are set before importing trellis2 modules
 os.environ["OPENCV_IO_ENABLE_OPENEXR"] = '1'
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+os.environ["ATTN_BACKEND"] = "flash_attn_3"
+# Adjust path if needed or keep relative
 os.environ["FLEX_GEMM_AUTOTUNE_CACHE_PATH"] = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'autotune_cache.json')
 os.environ["FLEX_GEMM_AUTOTUNER_VERBOSE"] = '1'
 from trellis2.modules.sparse import SparseTensor
 from trellis2.pipelines import Trellis2ImageTo3DPipeline
 from trellis2.renderers import EnvMap
 from trellis2.utils import render_utils
 import o_voxel
+# ==========================================
+# 1. HTML/CSS/JS CONFIGURATION
+# ==========================================
+MAX_SEED = np.iinfo(np.int32).max
+TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
+MODES = [
+    {"name": "Normal", "icon": "assets/app/normal.png", "render_key": "normal"},
+    {"name": "Clay render", "icon": "assets/app/clay.png", "render_key": "clay"},
+    {"name": "Base color", "icon": "assets/app/basecolor.png", "render_key": "base_color"},
+    {"name": "HDRI forest", "icon": "assets/app/hdri_forest.png", "render_key": "shaded_forest"},
+    {"name": "HDRI sunset", "icon": "assets/app/hdri_sunset.png", "render_key": "shaded_sunset"},
+    {"name": "HDRI courtyard", "icon": "assets/app/hdri_courtyard.png", "render_key": "shaded_courtyard"},
+]
+STEPS = 8
+DEFAULT_MODE = 3
+DEFAULT_STEP = 3
+css = """
+.stepper-wrapper { padding: 0; }
+.stepper-container { padding: 0; align-items: center; }
+.step-button { flex-direction: row; }
+.step-connector { transform: none; }
+.step-number { width: 16px; height: 16px; }
+.step-label { position: relative; bottom: 0; }
+.previewer-container {
+    position: relative; font-family: sans-serif; width: 100%; height: 722px;
+    margin: 0 auto; padding: 20px; display: flex; flex-direction: column;
+    align-items: center; justify-content: center;
+}
+.previewer-container .tips-icon {
+    position: absolute; right: 10px; top: 10px; z-index: 10; border-radius: 10px;
+    color: #fff; background-color: var(--color-accent); padding: 3px 6px; user-select: none;
+}
+.previewer-container .tips-text {
+    position: absolute; right: 10px; top: 50px; color: #fff; background-color: var(--color-accent);
+    border-radius: 10px; padding: 6px; text-align: left; max-width: 300px; z-index: 10;
+    transition: all 0.3s; opacity: 0%; user-select: none;
+}
+.tips-icon:hover + .tips-text { display: block; opacity: 100%; }
+.previewer-container .mode-row { width: 100%; display: flex; gap: 8px; justify-content: center; margin-bottom: 20px; flex-wrap: wrap; }
+.previewer-container .mode-btn { width: 24px; height: 24px; border-radius: 50%; cursor: pointer; opacity: 0.5; transition: all 0.2s; border: 2px solid #ddd; object-fit: cover; }
+.previewer-container .mode-btn:hover { opacity: 0.9; transform: scale(1.1); }
+.previewer-container .mode-btn.active { opacity: 1; border-color: var(--color-accent); transform: scale(1.1); }
+.previewer-container .display-row { margin-bottom: 20px; min-height: 400px; width: 100%; flex-grow: 1; display: flex; justify-content: center; align-items: center; }
+.previewer-container .previewer-main-image { max-width: 100%; max-height: 100%; flex-grow: 1; object-fit: contain; display: none; }
+.previewer-container .previewer-main-image.visible { display: block; }
+.previewer-container .slider-row { width: 100%; display: flex; flex-direction: column; align-items: center; gap: 10px; padding: 0 10px; }
+.previewer-container input[type=range] { -webkit-appearance: none; width: 100%; max-width: 400px; background: transparent; }
+.previewer-container input[type=range]::-webkit-slider-runnable-track { width: 100%; height: 8px; cursor: pointer; background: #ddd; border-radius: 5px; }
+.previewer-container input[type=range]::-webkit-slider-thumb { height: 20px; width: 20px; border-radius: 50%; background: var(--color-accent); cursor: pointer; -webkit-appearance: none; margin-top: -6px; box-shadow: 0 2px 5px rgba(0,0,0,0.2); transition: transform 0.1s; }
+.previewer-container input[type=range]::-webkit-slider-thumb:hover { transform: scale(1.2); }
+.gradio-container .padded:has(.previewer-container) { padding: 0 !important; }
+.gradio-container:has(.previewer-container) [data-testid="block-label"] { position: absolute; top: 0; left: 0; }
+"""
+head = """
+<script>
+    function refreshView(mode, step) {
+        const allImgs = document.querySelectorAll('.previewer-main-image');
+        for (let i = 0; i < allImgs.length; i++) {
+            const img = allImgs[i];
+            if (img.classList.contains('visible')) {
+                const id = img.id;
+                const [_, m, s] = id.split('-');
+                if (mode === -1) mode = parseInt(m.slice(1));
+                if (step === -1) step = parseInt(s.slice(1));
+                break;
+            }
+        }
+        allImgs.forEach(img => img.classList.remove('visible'));
+        const targetId = 'view-m' + mode + '-s' + step;
+        const targetImg = document.getElementById(targetId);
+        if (targetImg) targetImg.classList.add('visible');
+        const allBtns = document.querySelectorAll('.mode-btn');
+        allBtns.forEach((btn, idx) => {
+            if (idx === mode) btn.classList.add('active');
+            else btn.classList.remove('active');
+        });
+    }
+    function selectMode(mode) { refreshView(mode, -1); }
+    function onSliderChange(val) { refreshView(-1, parseInt(val)); }
+</script>
+"""
+empty_html = """
+<div class="previewer-container">
+    <svg style="opacity: .5; height: var(--size-5); color: var(--body-text-color);"
+    xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather feather-image"><rect x="3" y="3" width="18" height="18" rx="2" ry="2"></rect><circle cx="8.5" cy="8.5" r="1.5"></circle><polyline points="21 15 16 10 5 21"></polyline></svg>
+</div>
+"""
+# ==========================================
+# 2. MODEL LOADING
+# ==========================================
+print("Loading Z-Image-Turbo pipeline...")
+# Load Z-Image Pipeline
+z_pipe = DiffusionPipeline.from_pretrained(
     "Tongyi-MAI/Z-Image-Turbo",
     torch_dtype=torch.bfloat16,
     low_cpu_mem_usage=False,
 )
+z_pipe.to("cuda")
+print("Loading TRELLIS.2 pipeline...")
+# Load TRELLIS Pipeline
+trellis_pipe = Trellis2ImageTo3DPipeline.from_pretrained('microsoft/TRELLIS.2-4B')
+trellis_pipe.rembg_model = None
+trellis_pipe.low_vram = False
+trellis_pipe.cuda()
+# Load RMBG Client
+print("Loading RMBG Client...")
+rmbg_client = Client("briaai/BRIA-RMBG-2.0")
+# Load HDRI Maps (Ensure assets folder exists)
+try:
+    envmap = {
+        'forest': EnvMap(torch.tensor(
+            cv2.cvtColor(cv2.imread('assets/hdri/forest.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB),
+            dtype=torch.float32, device='cuda'
+        )),
+        'sunset': EnvMap(torch.tensor(
+            cv2.cvtColor(cv2.imread('assets/hdri/sunset.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB),
+            dtype=torch.float32, device='cuda'
+        )),
+        'courtyard': EnvMap(torch.tensor(
+            cv2.cvtColor(cv2.imread('assets/hdri/courtyard.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB),
+            dtype=torch.float32, device='cuda'
+        )),
+    }
+except Exception as e:
+    print(f"Warning: Could not load HDRI maps. Check 'assets/hdri' folder. Error: {e}")
+    envmap = {}
+print("All models loaded!")
+# ==========================================
+# 3. HELPER FUNCTIONS
+# ==========================================
 def image_to_base64(image):
     buffered = io.BytesIO()
     img_str = base64.b64encode(buffered.getvalue()).decode()
     return f"data:image/jpeg;base64,{img_str}"
+def start_session(req: gr.Request):
+    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
+    os.makedirs(user_dir, exist_ok=True)
+def end_session(req: gr.Request):
+    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
+    if os.path.exists(user_dir):
+        shutil.rmtree(user_dir)
+def remove_background(input_img: Image.Image) -> Image.Image:
+    with tempfile.NamedTemporaryFile(suffix='.png') as f:
+        input_img = input_img.convert('RGB')
+        input_img.save(f.name)
+        # Using Gradio Client for Bria RMBG
+        output = rmbg_client.predict(handle_file(f.name), api_name="/image")[0][0]
+        output = Image.open(output)
+        return output
 def preprocess_image(input_img: Image.Image) -> Image.Image:
+    """Preprocess the input image: Resize and Remove Background if needed."""
+    has_alpha = False
+    if input_img.mode == 'RGBA':
+        alpha = np.array(input_img)[:, :, 3]
+        if not np.all(alpha == 255):
+            has_alpha = True
     max_size = max(input_img.size)
     scale = min(1, 1024 / max_size)
     if scale < 1:
         input_img = input_img.resize((int(input_img.width * scale), int(input_img.height * scale)), Image.Resampling.LANCZOS)
+    if has_alpha:
+        output = input_img
     else:
+        output = remove_background(input_img)
+    output_np = np.array(output)
     alpha = output_np[:, :, 3]
     bbox = np.argwhere(alpha > 0.8 * 255)
+    if bbox.size == 0:
+        return output # Return original if empty
     bbox = np.min(bbox[:, 1]), np.min(bbox[:, 0]), np.max(bbox[:, 1]), np.max(bbox[:, 0])
     center = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2
     size = max(bbox[2] - bbox[0], bbox[3] - bbox[1])
+    size = int(size * 1) # margin
     bbox = center[0] - size // 2, center[1] - size // 2, center[0] + size // 2, center[1] + size // 2
+    output = output.crop(bbox)
+    # Normalize
+    output = np.array(output).astype(np.float32) / 255
+    output = output[:, :, :3] * output[:, :, 3:4]
+    output = Image.fromarray((output * 255).astype(np.uint8))
     return output
 def pack_state(latents: Tuple[SparseTensor, SparseTensor, int]) -> dict:
     tex_slat = shape_slat.replace(torch.from_numpy(state['tex_slat_feats']).cuda())
     return shape_slat, tex_slat, state['res']
+def get_seed(randomize_seed: bool, seed: int) -> int:
+    return np.random.randint(0, MAX_SEED) if randomize_seed else seed
+# ==========================================
+# 4. CORE GENERATION FUNCTIONS
+# ==========================================
+@spaces.GPU
+def generate_z_image(prompt, height, width, num_inference_steps, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
+    """Generate image using Z-Image-Turbo"""
     if randomize_seed:
         seed = torch.randint(0, 2**32 - 1, (1,)).item()
     generator = torch.Generator("cuda").manual_seed(int(seed))
+    image = z_pipe(
         prompt=prompt,
         height=int(height),
         width=int(width),
+        num_inference_steps=int(num_inference_steps),
+        guidance_scale=0.0,
         generator=generator,
     ).images[0]
     return image, seed
+@spaces.GPU(duration=120)
+def generate_trellis_3d(
     image: Image.Image,
     seed: int,
+    resolution: str,
+    ss_guidance_strength: float,
+    ss_guidance_rescale: float,
+    ss_sampling_steps: int,
+    ss_rescale_t: float,
+    shape_slat_guidance_strength: float,
+    shape_slat_guidance_rescale: float,
+    shape_slat_sampling_steps: int,
+    shape_slat_rescale_t: float,
+    tex_slat_guidance_strength: float,
+    tex_slat_guidance_rescale: float,
+    tex_slat_sampling_steps: int,
+    tex_slat_rescale_t: float,
+    req: gr.Request,
+    progress=gr.Progress(track_tqdm=True),
+) -> str:
+    # Run pipeline
+    outputs, latents = trellis_pipe.run(
+        image,
         seed=seed,
+        preprocess_image=False, # We handle preprocessing in the UI/before calling
         sparse_structure_sampler_params={
             "steps": ss_sampling_steps,
             "guidance_strength": ss_guidance_strength,
+            "guidance_rescale": ss_guidance_rescale,
+            "rescale_t": ss_rescale_t,
         },
         shape_slat_sampler_params={
+            "steps": shape_slat_sampling_steps,
+            "guidance_strength": shape_slat_guidance_strength,
+            "guidance_rescale": shape_slat_guidance_rescale,
+            "rescale_t": shape_slat_rescale_t,
         },
         tex_slat_sampler_params={
+            "steps": tex_slat_sampling_steps,
+            "guidance_strength": tex_slat_guidance_strength,
+            "guidance_rescale": tex_slat_guidance_rescale,
+            "rescale_t": tex_slat_rescale_t,
         },
         pipeline_type={
             "512": "512",
     )
     mesh = outputs[0]
+    mesh.simplify(16777216) # nvdiffrast limit
+    # Render Preview Images
+    if not envmap:
+        # Fallback if maps missing
+        print("Envmap missing, rendering basic")
+        images = render_utils.render_snapshot(mesh, resolution=1024, r=2, fov=36, nviews=STEPS)
+    else:
+        images = render_utils.render_snapshot(mesh, resolution=1024, r=2, fov=36, nviews=STEPS, envmap=envmap)
     state = pack_state(latents)
     torch.cuda.empty_cache()
+    # --- HTML Construction ---
     images_html = ""
     for m_idx, mode in enumerate(MODES):
+        # Check if render key exists (in case hdri missing)
+        if mode['render_key'] not in images:
+            continue
         for s_idx in range(STEPS):
             unique_id = f"view-m{m_idx}-s{s_idx}"
             is_visible = (m_idx == DEFAULT_MODE and s_idx == DEFAULT_STEP)
             vis_class = "visible" if is_visible else ""
+            img_base64 = image_to_base64(Image.fromarray(images[mode['render_key']][s_idx]))
+            images_html += f"""
+                <img id="{unique_id}"
+                     class="previewer-main-image {vis_class}"
+                     src="{img_base64}"
+                     loading="eager">
+            """
     btns_html = ""
     for idx, mode in enumerate(MODES):
+        if mode['render_key'] not in images: continue
         active_class = "active" if idx == DEFAULT_MODE else ""
+        btns_html += f"""
+            <img src="{mode['icon_base64']}"
+                 class="mode-btn {active_class}"
+                 onclick="selectMode({idx})"
+                 title="{mode['name']}">
+        """
     full_html = f"""
     <div class="previewer-container">
+        <div class="tips-wrapper">
+            <div class="tips-icon">💡Tips</div>
+            <div class="tips-text">
+                <p>● <b>Render Mode</b> - Click buttons to switch render modes.</p>
+                <p>● <b>View Angle</b> - Drag slider to rotate.</p>
+            </div>
+        </div>
         <div class="display-row">{images_html}</div>
         <div class="mode-row" id="btn-group">{btns_html}</div>
         <div class="slider-row">
         </div>
     </div>
     """
     return state, full_html
+@spaces.GPU(duration=120)
+def extract_glb(
+    state: dict,
+    decimation_target: int,
+    texture_size: int,
+    req: gr.Request,
+    progress=gr.Progress(track_tqdm=True),
+) -> Tuple[str, str]:
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     shape_slat, tex_slat, res = unpack_state(state)
+    mesh = trellis_pipe.decode_latent(shape_slat, tex_slat, res)[0]
+    mesh.simplify(16777216)
     glb = o_voxel.postprocess.to_glb(
         vertices=mesh.vertices,
         faces=mesh.faces,
         attr_volume=mesh.attrs,
         coords=mesh.coords,
+        attr_layout=trellis_pipe.pbr_attr_layout,
         grid_size=res,
         aabb=[[-0.5, -0.5, -0.5], [0.5, 0.5, 0.5]],
+        decimation_target=decimation_target,
+        texture_size=texture_size,
         remesh=True,
         remesh_band=1,
         remesh_project=0,
     )
     now = datetime.now()
+    timestamp = now.strftime("%Y-%m-%dT%H%M%S") + f".{now.microsecond // 1000:03d}"
+    os.makedirs(user_dir, exist_ok=True)
+    glb_path = os.path.join(user_dir, f'sample_{timestamp}.glb')
     glb.export(glb_path, extension_webp=True)
     torch.cuda.empty_cache()
+    return glb_path, glb_path
+# ==========================================
+# 5. GRADIO APP INTERFACE
+# ==========================================
+with gr.Blocks(delete_cache=(600, 600), css=css, head=head) as demo:
+    gr.Markdown("""
+    # Z-Image-Turbo + TRELLIS.2: Text to 3D
+    Step 1: Generate an image from text.
+    Step 2: Convert that image into a 3D Asset.
+    """)
     with gr.Row():
+        # --- LEFT COLUMN: INPUTS ---
+        with gr.Column(scale=1, min_width=360):
+            # --- Z-Image Section ---
+            with gr.Group():
+                gr.Markdown("### 1. Text to Image (Z-Image)")
+                prompt = gr.Textbox(label="Prompt", placeholder="A stylized 3d render of a cute robot...", lines=2)
                 with gr.Row():
+                    img_width = gr.Number(label="Width", value=1024, precision=0)
+                    img_height = gr.Number(label="Height", value=1024, precision=0)
+                img_steps = gr.Slider(1, 10, value=4, step=1, label="Steps")
+                img_seed = gr.Number(value=42, label="Seed", precision=0)
+                img_rand_seed = gr.Checkbox(label="Randomize Seed", value=True)
+                gen_img_btn = gr.Button("Generate Image", variant="primary")
+            # --- Intermediate Image ---
+            image_prompt = gr.Image(label="Generated Image (Input for 3D)", format="png", image_mode="RGBA", type="pil", height=400)
+            preprocess_btn = gr.Button("Remove Background (Preprocess)", variant="secondary")
+            # --- TRELLIS Section ---
+            with gr.Group():
+                gr.Markdown("### 2. Image to 3D (TRELLIS)")
+                resolution = gr.Radio(["512", "1024", "1536"], label="3D Resolution", value="1024")
+                trellis_seed = gr.Slider(0, MAX_SEED, label="3D Seed", value=0, step=1)
+                trellis_rand_seed = gr.Checkbox(label="Randomize 3D Seed", value=True)
+                gen_3d_btn = gr.Button("Generate 3D Model", variant="primary")
+            # Advanced Settings
+            with gr.Accordion(label="Advanced 3D Settings", open=False):
+                decimation_target = gr.Slider(100000, 500000, label="Decimation Target", value=300000, step=10000)
+                texture_size = gr.Slider(1024, 4096, label="Texture Size", value=2048, step=1024)
+                gr.Markdown("Stage 1: Sparse Structure")
+                with gr.Row():
+                    ss_guidance_strength = gr.Slider(1.0, 10.0, label="Guidance", value=7.5, step=0.1)
+                    ss_sampling_steps = gr.Slider(1, 50, label="Steps", value=12, step=1)
+                gr.Markdown("Stage 2: Shape")
+                with gr.Row():
+                    shape_slat_guidance_strength = gr.Slider(1.0, 10.0, label="Guidance", value=7.5, step=0.1)
+                    shape_slat_sampling_steps = gr.Slider(1, 50, label="Steps", value=12, step=1)
+                gr.Markdown("Stage 3: Material")
+                with gr.Row():
+                    tex_slat_guidance_strength = gr.Slider(1.0, 10.0, label="Guidance", value=1.0, step=0.1)
+                    tex_slat_sampling_steps = gr.Slider(1, 50, label="Steps", value=12, step=1)
+                # Hidden params kept for compatibility
+                ss_guidance_rescale = gr.Number(value=0.7, visible=False)
+                ss_rescale_t = gr.Number(value=5.0, visible=False)
+                shape_slat_guidance_rescale = gr.Number(value=0.5, visible=False)
+                shape_slat_rescale_t = gr.Number(value=3.0, visible=False)
+                tex_slat_guidance_rescale = gr.Number(value=0.0, visible=False)
+                tex_slat_rescale_t = gr.Number(value=3.0, visible=False)
+        # --- RIGHT COLUMN: OUTPUTS ---
+        with gr.Column(scale=10):
+            with gr.Walkthrough(selected=0) as walkthrough:
+                with gr.Step("Preview", id=0):
+                    preview_output = gr.HTML(empty_html, label="3D Asset Preview", show_label=True, container=True)
+                    extract_btn = gr.Button("Extract GLB")
+                with gr.Step("Extract", id=1):
+                    glb_output = gr.Model3D(label="Extracted GLB", height=724, show_label=True, display_mode="solid", clear_color=(0.25, 0.25, 0.25, 1.0))
+                    download_btn = gr.DownloadButton(label="Download GLB")
+    # State for the 3D generation latent
+    output_buf = gr.State()
+    # --- EVENT HANDLERS ---
+    demo.load(start_session)
+    demo.unload(end_session)
+    # 1. Generate Image
     gen_img_btn.click(
         fn=generate_z_image,
+        inputs=[prompt, img_height, img_width, img_steps, img_seed, img_rand_seed],
+        outputs=[image_prompt, img_seed] # Update image and show used seed
     )
+    # 2. Preprocess Image (Remove BG)
+    preprocess_btn.click(
+        fn=preprocess_image,
+        inputs=[image_prompt],
+        outputs=[image_prompt]
     )
+    # Auto-preprocess on upload as well (optional, from original code)
+    image_prompt.upload(
+        preprocess_image,
+        inputs=[image_prompt],
+        outputs=[image_prompt],
     )
+    # 3. Generate 3D
+    gen_3d_btn.click(
+        get_seed,
+        inputs=[trellis_rand_seed, trellis_seed],
+        outputs=[trellis_seed],
+    ).then(
+        lambda: gr.Walkthrough(selected=0), outputs=walkthrough
+    ).then(
+        generate_trellis_3d,
+        inputs=[
+            image_prompt, trellis_seed, resolution,
+            ss_guidance_strength, ss_guidance_rescale, ss_sampling_steps, ss_rescale_t,
+            shape_slat_guidance_strength, shape_slat_guidance_rescale, shape_slat_sampling_steps, shape_slat_rescale_t,
+            tex_slat_guidance_strength, tex_slat_guidance_rescale, tex_slat_sampling_steps, tex_slat_rescale_t,
+        ],
+        outputs=[output_buf, preview_output],
+    )
+    # 4. Extract GLB
+    extract_btn.click(
+        lambda: gr.Walkthrough(selected=1), outputs=walkthrough
+    ).then(
+        extract_glb,
+        inputs=[output_buf, decimation_target, texture_size],
+        outputs=[glb_output, download_btn],
+    )
 if __name__ == "__main__":
+    demo.launch()