Merge branch 'main' into manager-v4

2025-11-26 22:14:11 +09:00
parent d3906e3cbc e4a90089ab
commit 69b6f1a66b
22 changed files with 35584 additions and 11170 deletions
--- a/scanner.py
+++ b/scanner.py
@@ -7,13 +7,15 @@ import concurrent
 import datetime
 import concurrent.futures
 import requests
+import warnings
+import argparse

 builtin_nodes = set()

 import sys

 from urllib.parse import urlparse
-from github import Github
+from github import Github, Auth


 def download_url(url, dest_folder, filename=None):
@@ -39,26 +41,51 @@ def download_url(url, dest_folder, filename=None):
        raise Exception(f"Failed to download file from {url}")


-# prepare temp dir
-if len(sys.argv) > 1:
-    temp_dir = sys.argv[1]
-else:
-    temp_dir = os.path.join(os.getcwd(), ".tmp")
+def parse_arguments():
+    """Parse command-line arguments"""
+    parser = argparse.ArgumentParser(
+        description='ComfyUI Manager Node Scanner',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog='''
+Examples:
+  # Standard mode
+  python3 scanner.py
+  python3 scanner.py --skip-update

-if not os.path.exists(temp_dir):
-    os.makedirs(temp_dir)
+  # Scan-only mode
+  python3 scanner.py --scan-only temp-urls-clean.list
+  python3 scanner.py --scan-only urls.list --temp-dir /custom/temp
+  python3 scanner.py --scan-only urls.list --skip-update
+        '''
+    )
+
+    parser.add_argument('--scan-only', type=str, metavar='URL_LIST_FILE',
+                       help='Scan-only mode: provide URL list file (one URL per line)')
+    parser.add_argument('--temp-dir', type=str, metavar='DIR',
+                       help='Temporary directory for cloned repositories')
+    parser.add_argument('--skip-update', action='store_true',
+                       help='Skip git clone/pull operations')
+    parser.add_argument('--skip-stat-update', action='store_true',
+                       help='Skip GitHub stats collection')
+    parser.add_argument('--skip-all', action='store_true',
+                       help='Skip all update operations')
+
+    # Backward compatibility: positional argument for temp_dir
+    parser.add_argument('temp_dir_positional', nargs='?', metavar='TEMP_DIR',
+                       help='(Legacy) Temporary directory path')
+
+    args = parser.parse_args()
+    return args


-skip_update = '--skip-update' in sys.argv or '--skip-all' in sys.argv
-skip_stat_update = '--skip-stat-update' in sys.argv or '--skip-all' in sys.argv
-
-if not skip_stat_update:
-    g = Github(os.environ.get('GITHUB_TOKEN'))
-else:
-    g = None
-
-
-print(f"TEMP DIR: {temp_dir}")
+# Module-level variables (will be set in main if running as script)
+args = None
+scan_only_mode = False
+url_list_file = None
+temp_dir = None
+skip_update = False
+skip_stat_update = True
+g = None


 parse_cnt = 0
@@ -73,12 +100,22 @@ def extract_nodes(code_text):
        parse_cnt += 1

        code_text = re.sub(r'\\[^"\']', '', code_text)
-        parsed_code = ast.parse(code_text)
+        with warnings.catch_warnings():
+            warnings.filterwarnings('ignore', category=SyntaxWarning)
+            warnings.filterwarnings('ignore', category=DeprecationWarning)
+            parsed_code = ast.parse(code_text)
+
+        # Support both ast.Assign and ast.AnnAssign (for type-annotated assignments)
+        assignments = (node for node in parsed_code.body if isinstance(node, (ast.Assign, ast.AnnAssign)))

-        assignments = (node for node in parsed_code.body if isinstance(node, ast.Assign))
-        
        for assignment in assignments:
-            if isinstance(assignment.targets[0], ast.Name) and assignment.targets[0].id in ['NODE_CONFIG', 'NODE_CLASS_MAPPINGS']:
+            # Handle ast.AnnAssign (e.g., NODE_CLASS_MAPPINGS: Type = {...})
+            if isinstance(assignment, ast.AnnAssign):
+                if isinstance(assignment.target, ast.Name) and assignment.target.id in ['NODE_CONFIG', 'NODE_CLASS_MAPPINGS']:
+                    node_class_mappings = assignment.value
+                    break
+            # Handle ast.Assign (e.g., NODE_CLASS_MAPPINGS = {...})
+            elif isinstance(assignment.targets[0], ast.Name) and assignment.targets[0].id in ['NODE_CONFIG', 'NODE_CLASS_MAPPINGS']:
                node_class_mappings = assignment.value
                break
        else:
@@ -90,7 +127,7 @@ def extract_nodes(code_text):
            for key in node_class_mappings.keys:
                    if key is not None and isinstance(key.value, str):
                        s.add(key.value.strip())
-                    
+
            return s
        else:
            return set()
@@ -98,6 +135,99 @@ def extract_nodes(code_text):
        return set()


+def has_comfy_node_base(class_node):
+    """Check if class inherits from io.ComfyNode or ComfyNode"""
+    for base in class_node.bases:
+        # Case 1: ComfyNode
+        if isinstance(base, ast.Name) and base.id == 'ComfyNode':
+            return True
+        # Case 2: io.ComfyNode
+        elif isinstance(base, ast.Attribute):
+            if base.attr == 'ComfyNode':
+                return True
+    return False
+
+
+def extract_keyword_value(call_node, keyword):
+    """
+    Extract string value of keyword argument
+    Schema(node_id="MyNode") -> "MyNode"
+    """
+    for kw in call_node.keywords:
+        if kw.arg == keyword:
+            # ast.Constant (Python 3.8+)
+            if isinstance(kw.value, ast.Constant):
+                if isinstance(kw.value.value, str):
+                    return kw.value.value
+            # ast.Str (Python 3.7-) - suppress deprecation warning
+            else:
+                with warnings.catch_warnings():
+                    warnings.filterwarnings('ignore', category=DeprecationWarning)
+                    if hasattr(ast, 'Str') and isinstance(kw.value, ast.Str):
+                        return kw.value.s
+    return None
+
+
+def is_schema_call(call_node):
+    """Check if ast.Call is io.Schema() or Schema()"""
+    func = call_node.func
+    if isinstance(func, ast.Name) and func.id == 'Schema':
+        return True
+    elif isinstance(func, ast.Attribute) and func.attr == 'Schema':
+        return True
+    return False
+
+
+def extract_node_id_from_schema(class_node):
+    """
+    Extract node_id from define_schema() method
+    """
+    for item in class_node.body:
+        if isinstance(item, ast.FunctionDef) and item.name == 'define_schema':
+            # Walk through function body
+            for stmt in ast.walk(item):
+                if isinstance(stmt, ast.Call):
+                    # Check if it's Schema() call
+                    if is_schema_call(stmt):
+                        node_id = extract_keyword_value(stmt, 'node_id')
+                        if node_id:
+                            return node_id
+    return None
+
+
+def extract_v3_nodes(code_text):
+    """
+    Extract V3 node IDs using AST parsing
+    Returns: set of node_id strings
+    """
+    global parse_cnt
+
+    try:
+        if parse_cnt % 100 == 0:
+            print(".", end="", flush=True)
+        parse_cnt += 1
+
+        with warnings.catch_warnings():
+            warnings.filterwarnings('ignore', category=SyntaxWarning)
+            warnings.filterwarnings('ignore', category=DeprecationWarning)
+            tree = ast.parse(code_text)
+    except (SyntaxError, UnicodeDecodeError):
+        return set()
+
+    nodes = set()
+
+    # Find io.ComfyNode subclasses
+    for node in ast.walk(tree):
+        if isinstance(node, ast.ClassDef):
+            # Check if inherits from ComfyNode
+            if has_comfy_node_base(node):
+                node_id = extract_node_id_from_schema(node)
+                if node_id:
+                    nodes.add(node_id)
+
+    return nodes
+
+
 # scan
 def scan_in_file(filename, is_builtin=False):
    global builtin_nodes
@@ -105,13 +235,18 @@ def scan_in_file(filename, is_builtin=False):
    with open(filename, encoding='utf-8', errors='ignore') as file:
        code = file.read()

-    pattern = r"_CLASS_MAPPINGS\s*=\s*{([^}]*)}"
+    # Support type annotations (e.g., NODE_CLASS_MAPPINGS: Type = {...}) and line continuations (\)
+    pattern = r"_CLASS_MAPPINGS\s*(?::\s*\w+\s*)?=\s*(?:\\\s*)?{([^}]*)}"
    regex = re.compile(pattern, re.MULTILINE | re.DOTALL)

    nodes = set()
    class_dict = {}

+    # V1 nodes detection
    nodes |= extract_nodes(code)
+
+    # V3 nodes detection
+    nodes |= extract_v3_nodes(code)
    code = re.sub(r'^#.*?$', '', code, flags=re.MULTILINE)

    def extract_keys(pattern, code):
@@ -208,6 +343,53 @@ def get_nodes(target_dir):
    return py_files, directories


+def get_urls_from_list_file(list_file):
+    """
+    Read URLs from list file for scan-only mode
+
+    Args:
+        list_file (str): Path to URL list file (one URL per line)
+
+    Returns:
+        list of tuples: [(url, "", None, None), ...]
+        Format: (url, title, preemptions, nodename_pattern)
+        - title: Empty string
+        - preemptions: None
+        - nodename_pattern: None
+
+    File format:
+        https://github.com/owner/repo1
+        https://github.com/owner/repo2
+        # Comments starting with # are ignored
+
+    Raises:
+        FileNotFoundError: If list_file does not exist
+    """
+    if not os.path.exists(list_file):
+        raise FileNotFoundError(f"URL list file not found: {list_file}")
+
+    urls = []
+    with open(list_file, 'r', encoding='utf-8') as f:
+        for line_num, line in enumerate(f, 1):
+            line = line.strip()
+
+            # Skip empty lines and comments
+            if not line or line.startswith('#'):
+                continue
+
+            # Validate URL format (basic check)
+            if not (line.startswith('http://') or line.startswith('https://')):
+                print(f"WARNING: Line {line_num} is not a valid URL: {line}")
+                continue
+
+            # Add URL with empty metadata
+            # (url, title, preemptions, nodename_pattern)
+            urls.append((line, "", None, None))
+
+    print(f"Loaded {len(urls)} URLs from {list_file}")
+    return urls
+
+
 def get_git_urls_from_json(json_file):
    with open(json_file, encoding='utf-8') as file:
        data = json.load(file)
@@ -264,13 +446,43 @@ def clone_or_pull_git_repository(git_url):
            print(f"Failed to clone '{repo_name}': {e}")


-def update_custom_nodes():
+def update_custom_nodes(scan_only_mode=False, url_list_file=None):
+    """
+    Update custom nodes by cloning/pulling repositories
+
+    Args:
+        scan_only_mode (bool): If True, use URL list file instead of custom-node-list.json
+        url_list_file (str): Path to URL list file (required if scan_only_mode=True)
+
+    Returns:
+        dict: node_info mapping {repo_name: (url, title, preemptions, node_pattern)}
+    """
    if not os.path.exists(temp_dir):
        os.makedirs(temp_dir)

    node_info = {}

-    git_url_titles_preemptions = get_git_urls_from_json('custom-node-list.json')
+    # Select URL source based on mode
+    if scan_only_mode:
+        if not url_list_file:
+            raise ValueError("url_list_file is required in scan-only mode")
+
+        git_url_titles_preemptions = get_urls_from_list_file(url_list_file)
+        print("\n[Scan-Only Mode]")
+        print(f"  - URL source: {url_list_file}")
+        print("  - GitHub stats: DISABLED")
+        print(f"  - Git clone/pull: {'ENABLED' if not skip_update else 'DISABLED'}")
+        print("  - Metadata: EMPTY")
+    else:
+        if not os.path.exists('custom-node-list.json'):
+            raise FileNotFoundError("custom-node-list.json not found")
+
+        git_url_titles_preemptions = get_git_urls_from_json('custom-node-list.json')
+        print("\n[Standard Mode]")
+        print("  - URL source: custom-node-list.json")
+        print(f"  - GitHub stats: {'ENABLED' if not skip_stat_update else 'DISABLED'}")
+        print(f"  - Git clone/pull: {'ENABLED' if not skip_update else 'DISABLED'}")
+        print("  - Metadata: FULL")

    def process_git_url_title(url, title, preemptions, node_pattern):
        name = os.path.basename(url)
@@ -382,46 +594,59 @@ def update_custom_nodes():
    if not skip_stat_update:
        process_git_stats(git_url_titles_preemptions)

+    # Git clone/pull for all repositories
    with concurrent.futures.ThreadPoolExecutor(11) as executor:
        for url, title, preemptions, node_pattern in git_url_titles_preemptions:
            executor.submit(process_git_url_title, url, title, preemptions, node_pattern)

-    py_url_titles_and_pattern = get_py_urls_from_json('custom-node-list.json')
+    # .py file download (skip in scan-only mode - only process git repos)
+    if not scan_only_mode:
+        py_url_titles_and_pattern = get_py_urls_from_json('custom-node-list.json')

-    def download_and_store_info(url_title_preemptions_and_pattern):
-        url, title, preemptions, node_pattern = url_title_preemptions_and_pattern
-        name = os.path.basename(url)
-        if name.endswith(".py"):
-            node_info[name] = (url, title, preemptions, node_pattern)
+        def download_and_store_info(url_title_preemptions_and_pattern):
+            url, title, preemptions, node_pattern = url_title_preemptions_and_pattern
+            name = os.path.basename(url)
+            if name.endswith(".py"):
+                node_info[name] = (url, title, preemptions, node_pattern)

        try:
            download_url(url, temp_dir)
        except Exception:
            print(f"[ERROR] Cannot download '{url}'")

-    with concurrent.futures.ThreadPoolExecutor(10) as executor:
-        executor.map(download_and_store_info, py_url_titles_and_pattern)
+        with concurrent.futures.ThreadPoolExecutor(10) as executor:
+            executor.map(download_and_store_info, py_url_titles_and_pattern)

    return node_info


-def gen_json(node_info):
+def gen_json(node_info, scan_only_mode=False):
+    """
+    Generate extension-node-map.json from scanned node information
+
+    Args:
+        node_info (dict): Repository metadata mapping
+        scan_only_mode (bool): If True, exclude metadata from output
+    """
    # scan from .py file
    node_files, node_dirs = get_nodes(temp_dir)

    comfyui_path = os.path.abspath(os.path.join(temp_dir, "ComfyUI"))
-    node_dirs.remove(comfyui_path)
-    node_dirs = [comfyui_path] + node_dirs
+    # Only reorder if ComfyUI exists in the list
+    if comfyui_path in node_dirs:
+        node_dirs.remove(comfyui_path)
+        node_dirs = [comfyui_path] + node_dirs

    data = {}
    for dirname in node_dirs:
        py_files = get_py_file_paths(dirname)
        metadata = {}
-        
+
        nodes = set()
        for py in py_files:
            nodes_in_file, metadata_in_file = scan_in_file(py, dirname == "ComfyUI")
            nodes.update(nodes_in_file)
+            # Include metadata from .py files in both modes
            metadata.update(metadata_in_file)
        
        dirname = os.path.basename(dirname)
@@ -436,17 +661,28 @@ def gen_json(node_info):
            if dirname in node_info:
                git_url, title, preemptions, node_pattern = node_info[dirname]

-                metadata['title_aux'] = title
+                # Conditionally add metadata based on mode
+                if not scan_only_mode:
+                    # Standard mode: include all metadata
+                    metadata['title_aux'] = title

-                if preemptions is not None:
-                    metadata['preemptions'] = preemptions
+                    if preemptions is not None:
+                        metadata['preemptions'] = preemptions

-                if node_pattern is not None:
-                    metadata['nodename_pattern'] = node_pattern
+                    if node_pattern is not None:
+                        metadata['nodename_pattern'] = node_pattern
+                # Scan-only mode: metadata remains empty

                data[git_url] = (nodes, metadata)
            else:
-                print(f"WARN: {dirname} is removed from custom-node-list.json")
+                # Scan-only mode: Repository not in node_info (expected behavior)
+                # Construct URL from dirname (author_repo format)
+                if '_' in dirname:
+                    parts = dirname.split('_', 1)
+                    git_url = f"https://github.com/{parts[0]}/{parts[1]}"
+                    data[git_url] = (nodes, metadata)
+                else:
+                    print(f"WARN: {dirname} is removed from custom-node-list.json")

    for file in node_files:
        nodes, metadata = scan_in_file(file)
@@ -459,13 +695,16 @@ def gen_json(node_info):

            if file in node_info:
                url, title, preemptions, node_pattern = node_info[file]
-                metadata['title_aux'] = title

-                if preemptions is not None:
-                    metadata['preemptions'] = preemptions
-                
-                if node_pattern is not None:
-                    metadata['nodename_pattern'] = node_pattern
+                # Conditionally add metadata based on mode
+                if not scan_only_mode:
+                    metadata['title_aux'] = title
+
+                    if preemptions is not None:
+                        metadata['preemptions'] = preemptions
+
+                    if node_pattern is not None:
+                        metadata['nodename_pattern'] = node_pattern

                data[url] = (nodes, metadata)
            else:
@@ -477,6 +716,10 @@ def gen_json(node_info):
    for extension in extensions:
        node_list_json_path = os.path.join(temp_dir, extension, 'node_list.json')
        if os.path.exists(node_list_json_path):
+            # Skip if extension not in node_info (scan-only mode with limited URLs)
+            if extension not in node_info:
+                continue
+
            git_url, title, preemptions, node_pattern = node_info[extension]

            with open(node_list_json_path, 'r', encoding='utf-8') as f:
@@ -506,14 +749,16 @@ def gen_json(node_info):
                print("------------------------------------------------------")
                node_list_json = {}

-            metadata_in_url['title_aux'] = title
+            # Conditionally add metadata based on mode
+            if not scan_only_mode:
+                metadata_in_url['title_aux'] = title

-            if preemptions is not None:
-                metadata['preemptions'] = preemptions
+                if preemptions is not None:
+                    metadata_in_url['preemptions'] = preemptions
+
+                if node_pattern is not None:
+                    metadata_in_url['nodename_pattern'] = node_pattern

-            if node_pattern is not None:
-                metadata_in_url['nodename_pattern'] = node_pattern
-                
            nodes = list(nodes)
            nodes.sort()
            data[git_url] = (nodes, metadata_in_url)
@@ -523,12 +768,53 @@ def gen_json(node_info):
        json.dump(data, file, indent=4, sort_keys=True)


-print("### ComfyUI Manager Node Scanner ###")
+if __name__ == "__main__":
+    # Parse arguments
+    args = parse_arguments()

-print("\n# Updating extensions\n")
-updated_node_info = update_custom_nodes()
+    # Determine mode
+    scan_only_mode = args.scan_only is not None
+    url_list_file = args.scan_only if scan_only_mode else None

-print("\n# 'extension-node-map.json' file is generated.\n")
-gen_json(updated_node_info)
+    # Determine temp_dir
+    if args.temp_dir:
+        temp_dir = args.temp_dir
+    elif args.temp_dir_positional:
+        temp_dir = args.temp_dir_positional
+    else:
+        temp_dir = os.path.join(os.getcwd(), ".tmp")

-print("\nDONE.\n")
+    if not os.path.exists(temp_dir):
+        os.makedirs(temp_dir)
+
+    # Determine skip flags
+    skip_update = args.skip_update or args.skip_all
+    skip_stat_update = args.skip_stat_update or args.skip_all or scan_only_mode
+
+    if not skip_stat_update:
+        auth = Auth.Token(os.environ.get('GITHUB_TOKEN'))
+        g = Github(auth=auth)
+    else:
+        g = None
+
+    print("### ComfyUI Manager Node Scanner ###")
+
+    if scan_only_mode:
+        print(f"\n# [Scan-Only Mode] Processing URL list: {url_list_file}\n")
+    else:
+        print("\n# [Standard Mode] Updating extensions\n")
+
+    # Update/clone repositories and collect node info
+    updated_node_info = update_custom_nodes(scan_only_mode, url_list_file)
+
+    print("\n# Generating 'extension-node-map.json'...\n")
+
+    # Generate extension-node-map.json
+    gen_json(updated_node_info, scan_only_mode)
+
+    print("\n✅ DONE.\n")
+
+    if scan_only_mode:
+        print("Output: extension-node-map.json (node mappings only)")
+    else:
+        print("Output: extension-node-map.json (full metadata)")