AI-Hypercomputer · ninatu · May 15, 2026
@@ -41,6 +41,11 @@ revision: ''
 weights_dtype: 'bfloat16'
 # This sets the layer's dtype in the model. Ex: nn.Dense(dtype=activations_dtype)
 activations_dtype: 'bfloat16'
+# The dtype for text_encoder model during load/compile
+text_encoder_dtype: 'float32'
+
+# Whether to compile the text_encoder with torch.compile
+compile_text_encoder: False
 
 # Replicates vae across devices instead of using the model's sharding annotations for sharding.
 replicate_vae: False

@@ -41,6 +41,11 @@ revision: ''
 weights_dtype: 'bfloat16'
 # This sets the layer's dtype in the model. Ex: nn.Dense(dtype=activations_dtype)
 activations_dtype: 'bfloat16'
+# The dtype for text_encoder model during load/compile
+text_encoder_dtype: 'float32'
+
+# Whether to compile the text_encoder with torch.compile
+compile_text_encoder: False
 
 # Replicates vae across devices instead of using the model's sharding annotations for sharding.
 replicate_vae: False

@@ -41,6 +41,11 @@ revision: ''
 weights_dtype: 'bfloat16'
 # This sets the layer's dtype in the model. Ex: nn.Dense(dtype=activations_dtype)
 activations_dtype: 'bfloat16'
+# The dtype for text_encoder model during load/compile
+text_encoder_dtype: 'float32'
+
+# Whether to compile the text_encoder with torch.compile
+compile_text_encoder: False
 
 # Replicates vae across devices instead of using the model's sharding annotations for sharding.
 replicate_vae: False

@@ -41,6 +41,11 @@ revision: ''
 weights_dtype: 'bfloat16'
 # This sets the layer's dtype in the model. Ex: nn.Dense(dtype=activations_dtype)
 activations_dtype: 'bfloat16'
+# The dtype for text_encoder model during load/compile
+text_encoder_dtype: 'float32'
+
+# Whether to compile the text_encoder with torch.compile
+compile_text_encoder: False
 
 # Replicates vae across devices instead of using the model's sharding annotations for sharding.
 replicate_vae: False

@@ -41,6 +41,11 @@ revision: ''
 weights_dtype: 'bfloat16'
 # This sets the layer's dtype in the model. Ex: nn.Dense(dtype=activations_dtype)
 activations_dtype: 'bfloat16'
+# The dtype for text_encoder model during load/compile
+text_encoder_dtype: 'float32'
+
+# Whether to compile the text_encoder with torch.compile
+compile_text_encoder: False
 
 # Replicates vae across devices instead of using the model's sharding annotations for sharding.
 replicate_vae: False

@@ -41,6 +41,11 @@ revision: ''
 weights_dtype: 'bfloat16'
 # This sets the layer's dtype in the model. Ex: nn.Dense(dtype=activations_dtype)
 activations_dtype: 'bfloat16'
+# The dtype for text_encoder model during load/compile
+text_encoder_dtype: 'float32'
+
+# Whether to compile the text_encoder with torch.compile
+compile_text_encoder: False
 
 # Replicates vae across devices instead of using the model's sharding annotations for sharding.
 replicate_vae: False

@@ -270,13 +270,15 @@ def __init__(
 
   @classmethod
   def load_text_encoder(cls, config: HyperParameters):
-    torch_dtype = getattr(torch, str(config.weights_dtype), torch.float32)
+    text_encoder_dtype = getattr(config, "text_encoder_dtype", "float32")
+    torch_dtype = getattr(torch, str(text_encoder_dtype), torch.float32)
     text_encoder = UMT5EncoderModel.from_pretrained(
         config.pretrained_model_name_or_path,
         subfolder="text_encoder",
         torch_dtype=torch_dtype,
     )
-    text_encoder = torch.compile(text_encoder)
+    if getattr(config, "compile_text_encoder", True):
+      text_encoder = torch.compile(text_encoder)
     return text_encoder
 
   @classmethod