From 95495d62ccb240d0f330bd4fc996da5d426c8e83 Mon Sep 17 00:00:00 2001 From: kabachuha Date: Sun, 20 Apr 2025 21:34:58 +0300 Subject: [PATCH 1/3] add last frame support --- nodes.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/nodes.py b/nodes.py index 9d63102..b5318eb 100644 --- a/nodes.py +++ b/nodes.py @@ -289,6 +289,8 @@ def INPUT_TYPES(s): }, "optional": { "start_latent": ("LATENT", {"tooltip": "init Latents to use for image2video"} ), + "end_latent": ("LATENT", {"tooltip": "end Latents to use for image2video"} ), + "end_image_embeds": ("CLIP_VISION_OUTPUT", {"tooltip": "end Image's clip embeds"} ), "initial_samples": ("LATENT", {"tooltip": "init Latents to use for video2video"} ), "denoise_strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), } @@ -299,8 +301,8 @@ def INPUT_TYPES(s): FUNCTION = "process" CATEGORY = "FramePackWrapper" - def process(self, model, shift, positive, negative, latent_window_size, use_teacache, total_second_length, teacache_rel_l1_thresh, image_embeds, steps, cfg, - guidance_scale, seed, sampler, gpu_memory_preservation, start_latent=None, initial_samples=None, denoise_strength=1.0): + def process(self, model, shift, positive, negative, latent_window_size, use_teacache, total_second_length, teacache_rel_l1_thresh, image_embeds, steps, cfg, + guidance_scale, seed, sampler, gpu_memory_preservation, start_latent=None, end_latent=None, end_image_embeds=None, initial_samples=None, denoise_strength=1.0): total_latent_sections = (total_second_length * 30) / (latent_window_size * 4) total_latent_sections = int(max(round(total_latent_sections), 1)) print("total_latent_sections: ", total_latent_sections) @@ -318,11 +320,20 @@ def process(self, model, shift, positive, negative, latent_window_size, use_teac start_latent = start_latent["samples"] * vae_scaling_factor if initial_samples is not None: initial_samples = initial_samples["samples"] * vae_scaling_factor + if end_latent is not None: + end_latent = end_latent["samples"] * vae_scaling_factor + has_end_image = end_latent is not None print("start_latent", start_latent.shape) B, C, T, H, W = start_latent.shape image_encoder_last_hidden_state = image_embeds["last_hidden_state"].to(base_dtype).to(device) + if has_end_image: + assert end_image_embeds is not None + end_image_encoder_last_hidden_state = end_image_embeds["last_hidden_state"].to(base_dtype).to(device) + # Combine both image embeddings or use a weighted approach + image_encoder_last_hidden_state = (image_encoder_last_hidden_state + end_image_encoder_last_hidden_state) / 2 + llama_vec = positive[0][0].to(base_dtype).to(device) clip_l_pooler = positive[0][1]["pooled_output"].to(base_dtype).to(device) @@ -373,9 +384,10 @@ def process(self, model, shift, positive, negative, latent_window_size, use_teac for latent_padding in latent_paddings: print(f"latent_padding: {latent_padding}") is_last_section = latent_padding == 0 + is_first_section = latent_padding == latent_paddings[0] latent_padding_size = latent_padding * latent_window_size - print(f'latent_padding_size = {latent_padding_size}, is_last_section = {is_last_section}') + print(f'latent_padding_size = {latent_padding_size}, is_last_section = {is_last_section}, is_first_section = {is_first_section}') indices = torch.arange(0, sum([1, latent_padding_size, latent_window_size, 1, 2, 16])).unsqueeze(0) clean_latent_indices_pre, blank_indices, latent_indices, clean_latent_indices_post, clean_latent_2x_indices, clean_latent_4x_indices = indices.split([1, latent_padding_size, latent_window_size, 1, 2, 16], dim=1) @@ -385,6 +397,11 @@ def process(self, model, shift, positive, negative, latent_window_size, use_teac clean_latents_post, clean_latents_2x, clean_latents_4x = history_latents[:, :, :1 + 2 + 16, :, :].split([1, 2, 16], dim=2) clean_latents = torch.cat([clean_latents_pre, clean_latents_post], dim=2) + # Use end image latent for the first section if provided + if has_end_image and is_first_section: + clean_latents_post = end_latent.to(history_latents) + clean_latents = torch.cat([clean_latents_pre, clean_latents_post], dim=2) + #vid2vid if initial_samples is not None: @@ -455,7 +472,7 @@ def process(self, model, shift, positive, negative, latent_window_size, use_teac total_generated_latent_frames += int(generated_latents.shape[2]) history_latents = torch.cat([generated_latents.to(history_latents), history_latents], dim=2) - real_history_latents = history_latents[:, :, :total_generated_latent_frames, :, :] + real_history_latents = history_latents[:, :, :total_generated_latent_frames, :, :] if is_last_section: break From 05354f7b5a2227e45a87f6535a1166de5ea63860 Mon Sep 17 00:00:00 2001 From: kabachuha Date: Mon, 21 Apr 2025 21:27:52 +0300 Subject: [PATCH 2/3] add time interpolation between image embeds --- nodes.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/nodes.py b/nodes.py index b5318eb..5bc7539 100644 --- a/nodes.py +++ b/nodes.py @@ -291,6 +291,8 @@ def INPUT_TYPES(s): "start_latent": ("LATENT", {"tooltip": "init Latents to use for image2video"} ), "end_latent": ("LATENT", {"tooltip": "end Latents to use for image2video"} ), "end_image_embeds": ("CLIP_VISION_OUTPUT", {"tooltip": "end Image's clip embeds"} ), + "embed_interpolation": (["weighted_average", "linear"], {"default": 'linear', "tooltip": "Image embedding interpolation type. If linear, will smoothly interpolate with time, else it'll be weighted average with the specified weight."}), + "start_embed_strength": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01, "tooltip": "Weighted average constant for image embed interpolation. If end image is not set, the embed's strength won't be affected"}), "initial_samples": ("LATENT", {"tooltip": "init Latents to use for video2video"} ), "denoise_strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), } @@ -302,7 +304,7 @@ def INPUT_TYPES(s): CATEGORY = "FramePackWrapper" def process(self, model, shift, positive, negative, latent_window_size, use_teacache, total_second_length, teacache_rel_l1_thresh, image_embeds, steps, cfg, - guidance_scale, seed, sampler, gpu_memory_preservation, start_latent=None, end_latent=None, end_image_embeds=None, initial_samples=None, denoise_strength=1.0): + guidance_scale, seed, sampler, gpu_memory_preservation, start_latent=None, end_latent=None, end_image_embeds=None, embed_interpolation="linear", start_embed_strength=1.0, initial_samples=None, denoise_strength=1.0): total_latent_sections = (total_second_length * 30) / (latent_window_size * 4) total_latent_sections = int(max(round(total_latent_sections), 1)) print("total_latent_sections: ", total_latent_sections) @@ -326,13 +328,13 @@ def process(self, model, shift, positive, negative, latent_window_size, use_teac print("start_latent", start_latent.shape) B, C, T, H, W = start_latent.shape - image_encoder_last_hidden_state = image_embeds["last_hidden_state"].to(base_dtype).to(device) + start_image_encoder_last_hidden_state = image_embeds["last_hidden_state"].to(base_dtype).to(device) if has_end_image: assert end_image_embeds is not None end_image_encoder_last_hidden_state = end_image_embeds["last_hidden_state"].to(base_dtype).to(device) - # Combine both image embeddings or use a weighted approach - image_encoder_last_hidden_state = (image_encoder_last_hidden_state + end_image_encoder_last_hidden_state) / 2 + else: + end_image_encoder_last_hidden_state = torch.zeros_like(start_image_encoder_last_hidden_state) llama_vec = positive[0][0].to(base_dtype).to(device) clip_l_pooler = positive[0][1]["pooled_output"].to(base_dtype).to(device) @@ -380,13 +382,20 @@ def process(self, model, shift, positive, negative, latent_window_size, use_teac # use `latent_paddings = list(reversed(range(total_latent_sections)))` to compare latent_paddings = [3] + [2] * (total_latent_sections - 3) + [1, 0] latent_paddings_list = latent_paddings.copy() - - for latent_padding in latent_paddings: + + for i, latent_padding in enumerate(latent_paddings): print(f"latent_padding: {latent_padding}") is_last_section = latent_padding == 0 is_first_section = latent_padding == latent_paddings[0] latent_padding_size = latent_padding * latent_window_size + if embed_interpolation == "linear": + frac = 1 - i / (total_latent_sections - 1) # going backwards + else: + frac = start_embed_strength if has_end_image else 1.0 + + image_encoder_last_hidden_state = start_image_encoder_last_hidden_state * frac + (1 - frac) * end_image_encoder_last_hidden_state + print(f'latent_padding_size = {latent_padding_size}, is_last_section = {is_last_section}, is_first_section = {is_first_section}') indices = torch.arange(0, sum([1, latent_padding_size, latent_window_size, 1, 2, 16])).unsqueeze(0) From 4b959f46b6900244edc7bcf35662919db0ec18dc Mon Sep 17 00:00:00 2001 From: kijai <40791699+kijai@users.noreply.github.com> Date: Wed, 23 Apr 2025 15:30:34 +0300 Subject: [PATCH 3/3] Update framepack_hv_example.json --- example_workflows/framepack_hv_example.json | 1563 +++++++++++++------ 1 file changed, 1103 insertions(+), 460 deletions(-) diff --git a/example_workflows/framepack_hv_example.json b/example_workflows/framepack_hv_example.json index 1830e91..db6bffe 100644 --- a/example_workflows/framepack_hv_example.json +++ b/example_workflows/framepack_hv_example.json @@ -1,19 +1,60 @@ { "id": "ce2cb810-7775-4564-8928-dd5bed1053cd", "revision": 0, - "last_node_id": 55, - "last_link_id": 130, + "last_node_id": 69, + "last_link_id": 158, "nodes": [ { - "id": 12, - "type": "VAELoader", + "id": 15, + "type": "ConditioningZeroOut", "pos": [ - 570.5363159179688, - -282.70068359375 + 1346.0872802734375, + 263.21856689453125 ], "size": [ - 469.0488586425781, - 58 + 317.4000244140625, + 26 + ], + "flags": { + "collapsed": true + }, + "order": 18, + "mode": 0, + "inputs": [ + { + "name": "conditioning", + "type": "CONDITIONING", + "link": 118 + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "CONDITIONING", + "links": [ + 108 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.28", + "Node name for S&R": "ConditioningZeroOut" + }, + "widgets_values": [], + "color": "#332922", + "bgcolor": "#593930" + }, + { + "id": 13, + "type": "DualCLIPLoader", + "pos": [ + 320.9956359863281, + 166.8336181640625 + ], + "size": [ + 340.2243957519531, + 130 ], "flags": {}, "order": 0, @@ -21,137 +62,153 @@ "inputs": [], "outputs": [ { - "name": "VAE", - "type": "VAE", + "name": "CLIP", + "type": "CLIP", "links": [ - 22, - 62 + 102 ] } ], "properties": { "cnr_id": "comfy-core", "ver": "0.3.28", - "Node name for S&R": "VAELoader" + "Node name for S&R": "DualCLIPLoader" }, "widgets_values": [ - "hyvid\\hunyuan_video_vae_bf16_repack.safetensors" + "clip_l.safetensors", + "llava_llama3_fp16.safetensors", + "hunyuan_video", + "default" ], - "color": "#322", - "bgcolor": "#533" + "color": "#432", + "bgcolor": "#653" }, { - "id": 33, - "type": "VAEDecodeTiled", + "id": 54, + "type": "DownloadAndLoadFramePackModel", "pos": [ - 2181.271484375, - -292.61370849609375 + 1256.5235595703125, + -277.76226806640625 ], "size": [ 315, - 150 + 130 ], "flags": {}, - "order": 16, - "mode": 0, + "order": 1, + "mode": 4, "inputs": [ { - "name": "samples", - "type": "LATENT", - "link": 85 - }, - { - "name": "vae", - "type": "VAE", - "link": 62 + "name": "compile_args", + "shape": 7, + "type": "FRAMEPACKCOMPILEARGS", + "link": null } ], "outputs": [ { - "name": "IMAGE", - "type": "IMAGE", - "links": [ - 96 - ] + "name": "model", + "type": "FramePackMODEL", + "links": null } ], "properties": { - "cnr_id": "comfy-core", - "ver": "0.3.28", - "Node name for S&R": "VAEDecodeTiled" + "aux_id": "kijai/ComfyUI-FramePackWrapper", + "ver": "49fe507eca8246cc9d08a8093892f40c1180e88f", + "Node name for S&R": "DownloadAndLoadFramePackModel" }, "widgets_values": [ - 256, - 64, - 64, - 8 + "lllyasviel/FramePackI2V_HY", + "bf16", + "disabled", + "sdpa" + ] + }, + { + "id": 55, + "type": "MarkdownNote", + "pos": [ + 567.05908203125, + -628.8865966796875 ], - "color": "#322", - "bgcolor": "#533" + "size": [ + 459.8609619140625, + 285.9714660644531 + ], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [], + "outputs": [], + "properties": {}, + "widgets_values": [ + "Model links:\n\n[https://huggingface.co/Kijai/HunyuanVideo_comfy/blob/main/FramePackI2V_HY_fp8_e4m3fn.safetensors](https://huggingface.co/Kijai/HunyuanVideo_comfy/blob/main/FramePackI2V_HY_fp8_e4m3fn.safetensors)\n\n[https://huggingface.co/Kijai/HunyuanVideo_comfy/blob/main/FramePackI2V_HY_bf16.safetensors](https://huggingface.co/Kijai/HunyuanVideo_comfy/blob/main/FramePackI2V_HY_bf16.safetensors)\n\nsigclip:\n\n[https://huggingface.co/Comfy-Org/sigclip_vision_384/tree/main](https://huggingface.co/Comfy-Org/sigclip_vision_384/tree/main)\n\ntext encoder and VAE:\n\n[https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/tree/main/split_files](https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/tree/main/split_files)" + ], + "color": "#432", + "bgcolor": "#653" }, { - "id": 20, - "type": "VAEEncode", + "id": 52, + "type": "LoadFramePackModel", "pos": [ - 1329.880859375, - 701.230224609375 + 1253.046630859375, + -82.57657623291016 ], "size": [ - 210, - 46 + 480.7601013183594, + 130 ], "flags": {}, - "order": 14, + "order": 3, "mode": 0, "inputs": [ { - "name": "pixels", - "type": "IMAGE", - "link": 117 - }, - { - "name": "vae", - "type": "VAE", - "link": 22 + "name": "compile_args", + "shape": 7, + "type": "FRAMEPACKCOMPILEARGS", + "link": null } ], "outputs": [ { - "name": "LATENT", - "type": "LATENT", + "name": "model", + "type": "FramePackMODEL", "links": [ - 86 + 129 ] } ], "properties": { - "cnr_id": "comfy-core", - "ver": "0.3.28", - "Node name for S&R": "VAEEncode" + "aux_id": "kijai/ComfyUI-FramePackWrapper", + "ver": "49fe507eca8246cc9d08a8093892f40c1180e88f", + "Node name for S&R": "LoadFramePackModel" }, - "widgets_values": [], - "color": "#322", - "bgcolor": "#533" + "widgets_values": [ + "Hyvid\\FramePackI2V_HY_fp8_e4m3fn.safetensors", + "bf16", + "fp8_e4m3fn", + "sdpa" + ] }, { "id": 17, "type": "CLIPVisionEncode", "pos": [ - 1228.9832763671875, - 525.7402954101562 + 1545.9541015625, + 359.1331481933594 ], "size": [ 380.4000244140625, 78 ], "flags": {}, - "order": 13, + "order": 23, "mode": 0, "inputs": [ { "name": "clip_vision", "type": "CLIP_VISION", - "link": 18 + "link": 149 }, { "name": "image", @@ -164,7 +221,7 @@ "name": "CLIP_VISION_OUTPUT", "type": "CLIP_VISION_OUTPUT", "links": [ - 83 + 141 ] } ], @@ -180,173 +237,207 @@ "bgcolor": "#355" }, { - "id": 18, - "type": "CLIPVisionLoader", + "id": 39, + "type": "FramePackSampler", "pos": [ - 511.98028564453125, - 530.965576171875 + 2292.58837890625, + 194.90232849121094 ], "size": [ - 388.87139892578125, - 58 + 365.07305908203125, + 814.6473388671875 ], "flags": {}, - "order": 1, + "order": 27, "mode": 0, - "inputs": [], + "inputs": [ + { + "name": "model", + "type": "FramePackMODEL", + "link": 129 + }, + { + "name": "positive", + "type": "CONDITIONING", + "link": 114 + }, + { + "name": "negative", + "type": "CONDITIONING", + "link": 108 + }, + { + "name": "image_embeds", + "type": "CLIP_VISION_OUTPUT", + "link": 141 + }, + { + "name": "start_latent", + "shape": 7, + "type": "LATENT", + "link": 86 + }, + { + "name": "end_latent", + "shape": 7, + "type": "LATENT", + "link": 147 + }, + { + "name": "end_image_embeds", + "shape": 7, + "type": "CLIP_VISION_OUTPUT", + "link": 132 + }, + { + "name": "initial_samples", + "shape": 7, + "type": "LATENT", + "link": null + } + ], "outputs": [ { - "name": "CLIP_VISION", - "type": "CLIP_VISION", + "name": "samples", + "type": "LATENT", "links": [ - 18 + 85 ] } ], "properties": { - "cnr_id": "comfy-core", - "ver": "0.3.28", - "Node name for S&R": "CLIPVisionLoader" + "aux_id": "kijai/ComfyUI-FramePackWrapper", + "ver": "8e5ec6b7f3acf88255c5d93d062079f18b43aa2b", + "Node name for S&R": "FramePackSampler" }, "widgets_values": [ - "sigclip_vision_patch14_384.safetensors" - ], - "color": "#2a363b", - "bgcolor": "#3f5159" - }, - { - "id": 27, - "type": "FramePackTorchCompileSettings", - "pos": [ - 528.2340087890625, - -143.91505432128906 - ], - "size": [ - 531.5999755859375, - 202 + 30, + true, + 0.15, + 1, + 10, + 0, + 47, + "fixed", + 9, + 5, + 6, + "unipc_bh1", + "weighted_average", + 0.5, + 1, + "" + ] + }, + { + "id": 64, + "type": "GetNode", + "pos": [ + 1554.2071533203125, + 486.79547119140625 ], - "flags": {}, - "order": 2, + "size": [ + 210, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 4, "mode": 0, "inputs": [], "outputs": [ { - "name": "torch_compile_args", - "type": "FRAMEPACKCOMPILEARGS", - "links": [] + "name": "CLIP_VISION", + "type": "CLIP_VISION", + "links": [ + 149 + ] } ], - "properties": { - "aux_id": "lllyasviel/FramePack", - "ver": "0e5fe5d7ca13c76fb8e13708f4b92e7c7a34f20c", - "Node name for S&R": "FramePackTorchCompileSettings" - }, + "title": "Get_ClipVisionModle", + "properties": {}, "widgets_values": [ - "inductor", - false, - "default", - false, - 64, - true, - true - ] + "ClipVisionModle" + ], + "color": "#233", + "bgcolor": "#355" }, { - "id": 23, - "type": "VHS_VideoCombine", + "id": 48, + "type": "GetImageSizeAndCount", "pos": [ - 2723.8759765625, - -376.53961181640625 + 1259.2060546875, + 626.8657836914062 ], "size": [ - 908.428955078125, - 1283.1883544921875 + 277.20001220703125, + 86 ], "flags": {}, - "order": 18, + "order": 21, "mode": 0, "inputs": [ { - "name": "images", + "name": "image", "type": "IMAGE", - "link": 97 - }, + "link": 125 + } + ], + "outputs": [ { - "name": "audio", - "shape": 7, - "type": "AUDIO", - "link": null + "name": "image", + "type": "IMAGE", + "links": [ + 116, + 156 + ] }, { - "name": "meta_batch", - "shape": 7, - "type": "VHS_BatchManager", - "link": null + "label": "704 width", + "name": "width", + "type": "INT", + "links": null }, { - "name": "vae", - "shape": 7, - "type": "VAE", - "link": null - } - ], - "outputs": [ + "label": "544 height", + "name": "height", + "type": "INT", + "links": null + }, { - "name": "Filenames", - "type": "VHS_FILENAMES", + "label": "1 count", + "name": "count", + "type": "INT", "links": null } ], "properties": { - "cnr_id": "comfyui-videohelpersuite", - "ver": "0a75c7958fe320efcb052f1d9f8451fd20c730a8", - "Node name for S&R": "VHS_VideoCombine" + "cnr_id": "comfyui-kjnodes", + "ver": "8ecf5cd05e0a1012087b0da90eea9a13674668db", + "Node name for S&R": "GetImageSizeAndCount" }, - "widgets_values": { - "frame_rate": 30, - "loop_count": 0, - "filename_prefix": "FramePack", - "format": "video/h264-mp4", - "pix_fmt": "yuv420p", - "crf": 19, - "save_metadata": true, - "trim_to_audio": false, - "pingpong": false, - "save_output": false, - "videopreview": { - "hidden": false, - "paused": false, - "params": { - "filename": "FramePack_00057.mp4", - "subfolder": "", - "type": "temp", - "format": "video/h264-mp4", - "frame_rate": 24, - "workflow": "FramePack_00057.png", - "fullpath": "N:\\AI\\ComfyUI\\temp\\FramePack_00057.mp4" - } - } - } + "widgets_values": [] }, { - "id": 48, + "id": 60, "type": "GetImageSizeAndCount", "pos": [ - 1266.1427001953125, - 844.8764038085938 + 1279.781494140625, + 1060.245361328125 ], "size": [ 277.20001220703125, 86 ], "flags": {}, - "order": 12, + "order": 22, "mode": 0, "inputs": [ { "name": "image", "type": "IMAGE", - "link": 125 + "link": 139 } ], "outputs": [ @@ -354,18 +445,18 @@ "name": "image", "type": "IMAGE", "links": [ - 116, - 117 + 151, + 152 ] }, { - "label": "608 width", + "label": "704 width", "name": "width", "type": "INT", "links": null }, { - "label": "640 height", + "label": "544 height", "name": "height", "type": "INT", "links": null @@ -381,277 +472,401 @@ "cnr_id": "comfyui-kjnodes", "ver": "8ecf5cd05e0a1012087b0da90eea9a13674668db", "Node name for S&R": "GetImageSizeAndCount" - }, - "widgets_values": [] + } }, { - "id": 15, - "type": "ConditioningZeroOut", + "id": 12, + "type": "VAELoader", "pos": [ - 1346.0872802734375, - 263.21856689453125 + 570.5363159179688, + -282.70068359375 ], "size": [ - 317.4000244140625, - 26 + 469.0488586425781, + 58 ], - "flags": { - "collapsed": true - }, - "order": 10, + "flags": {}, + "order": 5, "mode": 0, - "inputs": [ - { - "name": "conditioning", - "type": "CONDITIONING", - "link": 118 - } - ], + "inputs": [], "outputs": [ { - "name": "CONDITIONING", - "type": "CONDITIONING", + "name": "VAE", + "type": "VAE", "links": [ - 108 + 153 ] } ], "properties": { "cnr_id": "comfy-core", "ver": "0.3.28", - "Node name for S&R": "ConditioningZeroOut" + "Node name for S&R": "VAELoader" }, - "widgets_values": [], - "color": "#332922", - "bgcolor": "#593930" + "widgets_values": [ + "hyvid\\hunyuan_video_vae_bf16_repack.safetensors" + ], + "color": "#322", + "bgcolor": "#533" }, { - "id": 39, - "type": "FramePackSampler", + "id": 66, + "type": "SetNode", "pos": [ - 1822.7847900390625, - 167.60594177246094 + 1083.503173828125, + -358.4913330078125 ], "size": [ - 393, - 852.631591796875 + 210, + 60 ], - "flags": {}, + "flags": { + "collapsed": true + }, "order": 15, "mode": 0, "inputs": [ { - "name": "model", - "type": "FramePackMODEL", - "link": 129 - }, - { - "name": "positive", - "type": "CONDITIONING", - "link": 114 - }, - { - "name": "negative", - "type": "CONDITIONING", - "link": 108 - }, - { - "name": "image_embeds", - "type": "CLIP_VISION_OUTPUT", - "link": 83 - }, - { - "name": "start_latent", - "shape": 7, - "type": "LATENT", - "link": 86 - }, - { - "name": "initial_samples", - "shape": 7, - "type": "LATENT", - "link": null + "name": "VAE", + "type": "VAE", + "link": 153 } ], "outputs": [ { - "name": "samples", - "type": "LATENT", - "links": [ - 85 - ] + "name": "*", + "type": "*", + "links": null } ], + "title": "Set_VAE", "properties": { - "aux_id": "kijai/ComfyUI-FramePackWrapper", - "ver": "8e5ec6b7f3acf88255c5d93d062079f18b43aa2b", - "Node name for S&R": "FramePackSampler" + "previousName": "VAE" }, "widgets_values": [ - 30, - true, - 0.15, - 1, - 10, - 0, - 47, - "fixed", - 9, - 5, - 6, - "unipc_bh1", - 1, - "" - ] + "VAE" + ], + "color": "#322", + "bgcolor": "#533" }, { - "id": 47, - "type": "CLIPTextEncode", + "id": 20, + "type": "VAEEncode", "pos": [ - 715.3054809570312, - 127.73457336425781 + 1733.111083984375, + 633.30419921875 ], "size": [ - 400, - 200 + 210, + 46 ], "flags": {}, - "order": 8, + "order": 24, "mode": 0, "inputs": [ { - "name": "clip", - "type": "CLIP", - "link": 102 - } - ], + "name": "pixels", + "type": "IMAGE", + "link": 156 + }, + { + "name": "vae", + "type": "VAE", + "link": 155 + } + ], "outputs": [ { - "name": "CONDITIONING", - "type": "CONDITIONING", + "name": "LATENT", + "type": "LATENT", "links": [ - 114, - 118 + 86 ] } ], "properties": { "cnr_id": "comfy-core", "ver": "0.3.28", - "Node name for S&R": "CLIPTextEncode" + "Node name for S&R": "VAEEncode" + }, + "widgets_values": [], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 68, + "type": "GetNode", + "pos": [ + 1729.60693359375, + 734.5352172851562 + ], + "size": [ + 210, + 34 + ], + "flags": { + "collapsed": true }, + "order": 6, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "links": [ + 155 + ] + } + ], + "title": "Get_VAE", + "properties": {}, "widgets_values": [ - "old man gets up and takes a walk on the beach" + "VAE" ], - "color": "#232", - "bgcolor": "#353" + "color": "#322", + "bgcolor": "#533" }, { - "id": 13, - "type": "DualCLIPLoader", + "id": 62, + "type": "VAEEncode", "pos": [ - 320.9956359863281, - 166.8336181640625 + 1612.563232421875, + 1048.6236572265625 ], "size": [ - 340.2243957519531, - 130 + 210, + 46 ], "flags": {}, - "order": 3, + "order": 26, "mode": 0, - "inputs": [], + "inputs": [ + { + "name": "pixels", + "type": "IMAGE", + "link": 152 + }, + { + "name": "vae", + "type": "VAE", + "link": 158 + } + ], "outputs": [ { - "name": "CLIP", - "type": "CLIP", + "name": "LATENT", + "type": "LATENT", "links": [ - 102 + 147 ] } ], "properties": { "cnr_id": "comfy-core", "ver": "0.3.28", - "Node name for S&R": "DualCLIPLoader" + "Node name for S&R": "VAEEncode" }, - "widgets_values": [ - "clip_l.safetensors", - "llava_llama3_fp16.safetensors", - "hunyuan_video", - "default" - ], - "color": "#432", - "bgcolor": "#653" + "color": "#322", + "bgcolor": "#533" }, { - "id": 44, - "type": "GetImageSizeAndCount", + "id": 57, + "type": "CLIPVisionEncode", "pos": [ - 2225.53564453125, - -78.62096405029297 + 1600.4202880859375, + 1181.36767578125 ], "size": [ - 277.20001220703125, - 86 + 380.4000244140625, + 78 ], "flags": {}, - "order": 17, + "order": 25, "mode": 0, "inputs": [ + { + "name": "clip_vision", + "type": "CLIP_VISION", + "link": 150 + }, { "name": "image", "type": "IMAGE", - "link": 96 + "link": 151 } ], "outputs": [ { - "name": "image", - "type": "IMAGE", + "name": "CLIP_VISION_OUTPUT", + "type": "CLIP_VISION_OUTPUT", "links": [ - 97 + 132 ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.29", + "Node name for S&R": "CLIPVisionEncode" + }, + "widgets_values": [ + "center" + ], + "color": "#233", + "bgcolor": "#355" + }, + { + "id": 69, + "type": "GetNode", + "pos": [ + 1619.6104736328125, + 1137.854736328125 + ], + "size": [ + 210, + 34 + ], + "flags": { + "collapsed": true + }, + "order": 7, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "links": [ + 158 + ] + } + ], + "title": "Get_VAE", + "properties": {}, + "widgets_values": [ + "VAE" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 65, + "type": "GetNode", + "pos": [ + 1604.746337890625, + 1306.3175048828125 + ], + "size": [ + 210, + 34 + ], + "flags": { + "collapsed": true + }, + "order": 8, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP_VISION", + "type": "CLIP_VISION", + "links": [ + 150 + ] + } + ], + "title": "Get_ClipVisionModle", + "properties": {}, + "widgets_values": [ + "ClipVisionModle" + ], + "color": "#233", + "bgcolor": "#355" + }, + { + "id": 59, + "type": "ImageResize+", + "pos": [ + 908.9832763671875, + 1062.01123046875 + ], + "size": [ + 315, + 218 + ], + "flags": {}, + "order": 20, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 138 }, { - "label": "608 width", "name": "width", "type": "INT", - "links": null + "widget": { + "name": "width" + }, + "link": 136 }, { - "label": "640 height", "name": "height", "type": "INT", + "widget": { + "name": "height" + }, + "link": 137 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 139 + ] + }, + { + "name": "width", + "type": "INT", "links": null }, { - "label": "145 count", - "name": "count", + "name": "height", "type": "INT", "links": null } ], "properties": { - "cnr_id": "comfyui-kjnodes", - "ver": "8ecf5cd05e0a1012087b0da90eea9a13674668db", - "Node name for S&R": "GetImageSizeAndCount" + "aux_id": "kijai/ComfyUI_essentials", + "ver": "76e9d1e4399bd025ce8b12c290753d58f9f53e93", + "Node name for S&R": "ImageResize+" }, - "widgets_values": [] + "widgets_values": [ + 512, + 512, + "lanczos", + "stretch", + "always", + 0 + ] }, { "id": 50, "type": "ImageResize+", "pos": [ - 921.9315795898438, - 701.9561767578125 + 907.2653198242188, + 593.743896484375 ], "size": [ 315, 218 ], "flags": {}, - "order": 11, + "order": 19, "mode": 0, "inputs": [ { @@ -710,18 +925,18 @@ ] }, { - "id": 19, + "id": 58, "type": "LoadImage", "pos": [ - 221.91770935058594, - 702.9730834960938 + 190.07057189941406, + 1060.399169921875 ], "size": [ 315, 314 ], "flags": {}, - "order": 4, + "order": 9, "mode": 0, "inputs": [], "outputs": [ @@ -729,8 +944,7 @@ "name": "IMAGE", "type": "IMAGE", "links": [ - 122, - 126 + 138 ] }, { @@ -739,13 +953,14 @@ "links": null } ], + "title": "Load Image: End", "properties": { "cnr_id": "comfy-core", "ver": "0.3.28", "Node name for S&R": "LoadImage" }, "widgets_values": [ - "oldman_upscaled.png", + "sd3stag.png", "image" ] }, @@ -753,15 +968,15 @@ "id": 51, "type": "FramePackFindNearestBucket", "pos": [ - 578.9364624023438, - 773.94677734375 + 550.0997314453125, + 887.411376953125 ], "size": [ 315, 78 ], "flags": {}, - "order": 9, + "order": 16, "mode": 0, "inputs": [ { @@ -775,14 +990,16 @@ "name": "width", "type": "INT", "links": [ - 128 + 128, + 136 ] }, { "name": "height", "type": "INT", "links": [ - 127 + 127, + 137 ] } ], @@ -796,146 +1013,417 @@ ] }, { - "id": 54, - "type": "DownloadAndLoadFramePackModel", + "id": 19, + "type": "LoadImage", + "pos": [ + 184.2612762451172, + 591.6886596679688 + ], + "size": [ + 315, + 314 + ], + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 122, + 126 + ] + }, + { + "name": "MASK", + "type": "MASK", + "links": null + } + ], + "title": "Load Image: Start", + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.28", + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "sd3stag.png", + "image" + ] + }, + { + "id": 18, + "type": "CLIPVisionLoader", + "pos": [ + 33.149566650390625, + 23.595293045043945 + ], + "size": [ + 388.87139892578125, + 58 + ], + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "CLIP_VISION", + "type": "CLIP_VISION", + "links": [ + 148 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.28", + "Node name for S&R": "CLIPVisionLoader" + }, + "widgets_values": [ + "sigclip_vision_patch14_384.safetensors" + ], + "color": "#2a363b", + "bgcolor": "#3f5159" + }, + { + "id": 63, + "type": "SetNode", + "pos": [ + 247.1346435546875, + -28.502397537231445 + ], + "size": [ + 210, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 17, + "mode": 0, + "inputs": [ + { + "name": "CLIP_VISION", + "type": "CLIP_VISION", + "link": 148 + } + ], + "outputs": [ + { + "name": "*", + "type": "*", + "links": null + } + ], + "title": "Set_ClipVisionModle", + "properties": { + "previousName": "ClipVisionModle" + }, + "widgets_values": [ + "ClipVisionModle" + ], + "color": "#233", + "bgcolor": "#355" + }, + { + "id": 27, + "type": "FramePackTorchCompileSettings", + "pos": [ + 623.3660278320312, + -140.94215393066406 + ], + "size": [ + 531.5999755859375, + 202 + ], + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "torch_compile_args", + "type": "FRAMEPACKCOMPILEARGS", + "links": [] + } + ], + "properties": { + "aux_id": "lllyasviel/FramePack", + "ver": "0e5fe5d7ca13c76fb8e13708f4b92e7c7a34f20c", + "Node name for S&R": "FramePackTorchCompileSettings" + }, + "widgets_values": [ + "inductor", + false, + "default", + false, + 64, + true, + true + ] + }, + { + "id": 33, + "type": "VAEDecodeTiled", + "pos": [ + 2328.923828125, + -22.08228874206543 + ], + "size": [ + 315, + 150 + ], + "flags": {}, + "order": 28, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 85 + }, + { + "name": "vae", + "type": "VAE", + "link": 154 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 96 + ] + } + ], + "properties": { + "cnr_id": "comfy-core", + "ver": "0.3.28", + "Node name for S&R": "VAEDecodeTiled" + }, + "widgets_values": [ + 256, + 64, + 64, + 8 + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 67, + "type": "GetNode", + "pos": [ + 2342.01806640625, + -76.06847381591797 + ], + "size": [ + 210, + 60 + ], + "flags": { + "collapsed": true + }, + "order": 13, + "mode": 0, + "inputs": [], + "outputs": [ + { + "name": "VAE", + "type": "VAE", + "links": [ + 154 + ] + } + ], + "title": "Get_VAE", + "properties": {}, + "widgets_values": [ + "VAE" + ], + "color": "#322", + "bgcolor": "#533" + }, + { + "id": 23, + "type": "VHS_VideoCombine", + "pos": [ + 2726.849853515625, + -29.90264129638672 + ], + "size": [ + 908.428955078125, + 1034.5133056640625 + ], + "flags": {}, + "order": 30, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 97 + }, + { + "name": "audio", + "shape": 7, + "type": "AUDIO", + "link": null + }, + { + "name": "meta_batch", + "shape": 7, + "type": "VHS_BatchManager", + "link": null + }, + { + "name": "vae", + "shape": 7, + "type": "VAE", + "link": null + } + ], + "outputs": [ + { + "name": "Filenames", + "type": "VHS_FILENAMES", + "links": null + } + ], + "properties": { + "cnr_id": "comfyui-videohelpersuite", + "ver": "0a75c7958fe320efcb052f1d9f8451fd20c730a8", + "Node name for S&R": "VHS_VideoCombine" + }, + "widgets_values": { + "frame_rate": 30, + "loop_count": 0, + "filename_prefix": "FramePack", + "format": "video/h264-mp4", + "pix_fmt": "yuv420p", + "crf": 19, + "save_metadata": true, + "trim_to_audio": false, + "pingpong": false, + "save_output": false, + "videopreview": { + "hidden": false, + "paused": false, + "params": { + "filename": "FramePack_00001.mp4", + "subfolder": "", + "type": "temp", + "format": "video/h264-mp4", + "frame_rate": 30, + "workflow": "FramePack_00001.png", + "fullpath": "N:\\AI\\ComfyUI\\temp\\FramePack_00001.mp4" + } + } + } + }, + { + "id": 44, + "type": "GetImageSizeAndCount", "pos": [ - 1256.5235595703125, - -277.76226806640625 + 2501.023193359375, + -178.70773315429688 ], "size": [ - 315, - 130 + 277.20001220703125, + 86 ], "flags": {}, - "order": 5, - "mode": 4, + "order": 29, + "mode": 0, "inputs": [ { - "name": "compile_args", - "shape": 7, - "type": "FRAMEPACKCOMPILEARGS", - "link": null + "name": "image", + "type": "IMAGE", + "link": 96 } ], "outputs": [ { - "name": "model", - "type": "FramePackMODEL", + "name": "image", + "type": "IMAGE", + "links": [ + 97 + ] + }, + { + "label": "704 width", + "name": "width", + "type": "INT", + "links": null + }, + { + "label": "544 height", + "name": "height", + "type": "INT", + "links": null + }, + { + "label": "145 count", + "name": "count", + "type": "INT", "links": null } ], "properties": { - "aux_id": "kijai/ComfyUI-FramePackWrapper", - "ver": "49fe507eca8246cc9d08a8093892f40c1180e88f", - "Node name for S&R": "DownloadAndLoadFramePackModel" + "cnr_id": "comfyui-kjnodes", + "ver": "8ecf5cd05e0a1012087b0da90eea9a13674668db", + "Node name for S&R": "GetImageSizeAndCount" }, - "widgets_values": [ - "lllyasviel/FramePackI2V_HY", - "bf16", - "disabled", - "sdpa" - ] - }, - { - "id": 55, - "type": "MarkdownNote", - "pos": [ - 567.05908203125, - -628.8865966796875 - ], - "size": [ - 459.8609619140625, - 285.9714660644531 - ], - "flags": {}, - "order": 6, - "mode": 0, - "inputs": [], - "outputs": [], - "properties": {}, - "widgets_values": [ - "Model links:\n\n[https://huggingface.co/Kijai/HunyuanVideo_comfy/blob/main/FramePackI2V_HY_fp8_e4m3fn.safetensors](https://huggingface.co/Kijai/HunyuanVideo_comfy/blob/main/FramePackI2V_HY_fp8_e4m3fn.safetensors)\n\n[https://huggingface.co/Kijai/HunyuanVideo_comfy/blob/main/FramePackI2V_HY_bf16.safetensors](https://huggingface.co/Kijai/HunyuanVideo_comfy/blob/main/FramePackI2V_HY_bf16.safetensors)\n\nsigclip:\n\n[https://huggingface.co/Comfy-Org/sigclip_vision_384/tree/main](https://huggingface.co/Comfy-Org/sigclip_vision_384/tree/main)\n\ntext encoder and VAE:\n\n[https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/tree/main/split_files](https://huggingface.co/Comfy-Org/HunyuanVideo_repackaged/tree/main/split_files)" - ], - "color": "#432", - "bgcolor": "#653" + "widgets_values": [] }, { - "id": 52, - "type": "LoadFramePackModel", + "id": 47, + "type": "CLIPTextEncode", "pos": [ - 1253.046630859375, - -82.57657623291016 + 715.3054809570312, + 127.73457336425781 ], "size": [ - 480.7601013183594, - 130 + 400, + 200 ], "flags": {}, - "order": 7, + "order": 14, "mode": 0, "inputs": [ { - "name": "compile_args", - "shape": 7, - "type": "FRAMEPACKCOMPILEARGS", - "link": null + "name": "clip", + "type": "CLIP", + "link": 102 } ], "outputs": [ { - "name": "model", - "type": "FramePackMODEL", + "name": "CONDITIONING", + "type": "CONDITIONING", "links": [ - 129 + 114, + 118 ] } ], "properties": { - "aux_id": "kijai/ComfyUI-FramePackWrapper", - "ver": "49fe507eca8246cc9d08a8093892f40c1180e88f", - "Node name for S&R": "LoadFramePackModel" + "cnr_id": "comfy-core", + "ver": "0.3.28", + "Node name for S&R": "CLIPTextEncode" }, "widgets_values": [ - "Hyvid\\FramePackI2V_HY_fp8_e4m3fn.safetensors", - "bf16", - "fp8_e4m3fn", - "sdpa" - ] + "majestig stag in a forest" + ], + "color": "#232", + "bgcolor": "#353" } ], "links": [ - [ - 18, - 18, - 0, - 17, - 0, - "CLIP_VISION" - ], - [ - 22, - 12, - 0, - 20, - 1, - "VAE" - ], - [ - 62, - 12, - 0, - 33, - 1, - "VAE" - ], - [ - 83, - 17, - 0, - 39, - 3, - "CLIP_VISION_OUTPUT" - ], [ 85, 39, @@ -1000,14 +1488,6 @@ 1, "IMAGE" ], - [ - 117, - 48, - 0, - 20, - 0, - "IMAGE" - ], [ 118, 47, @@ -1063,23 +1543,186 @@ 39, 0, "FramePackMODEL" + ], + [ + 132, + 57, + 0, + 39, + 6, + "CLIP_VISION_OUTPUT" + ], + [ + 136, + 51, + 0, + 59, + 1, + "INT" + ], + [ + 137, + 51, + 1, + 59, + 2, + "INT" + ], + [ + 138, + 58, + 0, + 59, + 0, + "IMAGE" + ], + [ + 139, + 59, + 0, + 60, + 0, + "IMAGE" + ], + [ + 141, + 17, + 0, + 39, + 3, + "CLIP_VISION_OUTPUT" + ], + [ + 147, + 62, + 0, + 39, + 5, + "LATENT" + ], + [ + 148, + 18, + 0, + 63, + 0, + "*" + ], + [ + 149, + 64, + 0, + 17, + 0, + "CLIP_VISION" + ], + [ + 150, + 65, + 0, + 57, + 0, + "CLIP_VISION" + ], + [ + 151, + 60, + 0, + 57, + 1, + "IMAGE" + ], + [ + 152, + 60, + 0, + 62, + 0, + "IMAGE" + ], + [ + 153, + 12, + 0, + 66, + 0, + "*" + ], + [ + 154, + 67, + 0, + 33, + 1, + "VAE" + ], + [ + 155, + 68, + 0, + 20, + 1, + "VAE" + ], + [ + 156, + 48, + 0, + 20, + 0, + "IMAGE" + ], + [ + 158, + 69, + 0, + 62, + 1, + "VAE" ] ], - "groups": [], + "groups": [ + { + "id": 1, + "title": "End Image", + "bounding": [ + 12.77297592163086, + 999.1203002929688, + 2038.674560546875, + 412.9618225097656 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + }, + { + "id": 2, + "title": "Start Image", + "bounding": [ + 11.781991958618164, + 531.3884887695312, + 2032.7288818359375, + 442.6904602050781 + ], + "color": "#3f789e", + "font_size": 24, + "flags": {} + } + ], "config": {}, "extra": { "ds": { - "scale": 0.6727499949325823, + "scale": 0.6115909044841659, "offset": [ - -101.98335175553812, - 539.7905569391395 + 486.2830381478698, + 430.32571185324866 ] }, - "frontendVersion": "1.16.7", + "frontendVersion": "1.17.3", "VHS_latentpreview": true, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true }, "version": 0.4 -} +} \ No newline at end of file