UPDATE: HTML/MD improvements.

- pulled ESM modules for local loading.
- added downshow module for better HTML to MD conversion.
This commit is contained in:
korutech-ai
2024-08-28 07:44:59 +12:00
parent 3af7dd8a4f
commit d07d013b30
3 changed files with 2734 additions and 28 deletions

231
web/downshow.js Normal file
View File

@@ -0,0 +1,231 @@
/**
* downshow.js -- A javascript library to convert HTML to markdown.
*
* Copyright (c) 2013 Alex Cornejo.
*
* Original Markdown Copyright (c) 2004-2005 John Gruber
* <http://darlingfireball.net/projects/markdown/>
*
* Redistributable under a BSD-style open source license.
*
* downshow has no external dependencies. It has been tested in chrome and
* firefox, it probably works in internet explorer, but YMMV.
*
* Basic Usage:
*
* downshow(document.getElementById('#yourid').innerHTML);
*
* TODO:
* - Remove extra whitespace between words in headers and other places.
*/
(function () {
var doc;
// Use browser DOM with jsdom as a fallback (for node.js)
try {
doc = document;
} catch(e) {
var jsdom = require("jsdom").jsdom;
doc = jsdom("<html><head></head><body></body></html>");
}
/**
* Returns every element in root in their bfs traversal order.
*
* In the process it transforms any nested lists to conform to the w3c
* standard, see: http://www.w3.org/wiki/HTML_lists#Nesting_lists
*/
function bfsOrder(root) {
var inqueue = [root], outqueue = [];
root._bfs_parent = null;
while (inqueue.length > 0) {
var elem = inqueue.shift();
outqueue.push(elem);
var children = elem.childNodes;
var liParent = null;
for (var i=0 ; i<children.length; i++) {
if (children[i].nodeType == 1) {// element node
if (children[i].tagName === 'LI') {
liParent = children[i];
} else if ((children[i].tagName === 'UL' || children[i].tagName === 'OL') && liParent) {
liParent.appendChild(children[i]);
i--;
continue;
}
children[i]._bfs_parent = elem;
inqueue.push(children[i]);
}
}
}
outqueue.shift();
return outqueue;
}
/**
* Remove whitespace and newlines from beginning and end of a sting.
*/
function trim(str) {
return str.replace(/^\s\s*/,'').replace(/\s\s*$/, '');
}
/**
* Remove all newlines and trims the resulting string.
*/
function nltrim(str) {
return str.replace(/\s{2,}/g, ' ').replace(/^\s\s*/,'').replace(/\s\s*$/, '');
}
/**
* Add prefix to the beginning of every line in block.
*/
function prefixBlock(prefix, block, skipEmpty) {
var lines = block.split('\n');
for (var i =0; i<lines.length; i++) {
// Do not prefix empty lines
if (lines[i].length === 0 && skipEmpty === true)
continue;
else
lines[i] = prefix + lines[i];
}
return lines.join('\n');
}
/**
* Set the node's content.
*/
function setContent(node, content, prefix, suffix) {
if (content.length > 0) {
if (prefix && suffix)
node._bfs_text = prefix + content + suffix;
else
node._bfs_text = content;
} else
node._bfs_text = '';
}
/**
* Get a node's content.
*/
function getContent(node) {
var text = '', atom;
for (var i = 0; i<node.childNodes.length; i++) {
if (node.childNodes[i].nodeType === 1) {
atom = node.childNodes[i]._bfs_text;
} else if (node.childNodes[i].nodeType === 3) {
atom = node.childNodes[i].data;
} else
continue;
if (text.match(/[\t ]+$/) && atom.match(/^[\t ]+/)) {
text = text.replace(/[\t ]+$/,'') + ' ' + atom.replace(/^[\t ]+/, '');
} else {
text = text + atom;
}
}
return text;
}
/**
* Process a node in the DOM tree.
* */
function processNode(node) {
if (node.tagName === 'P' || node.tagName === 'DIV' || node.tagName === 'UL' || node.tagName === 'OL' || node.tagName === 'PRE')
setContent(node, getContent(node), '\n\n', '\n\n');
else if (node.tagName === 'BR')
setContent(node, '\n\n');
else if (node.tagName === 'HR')
setContent(node, '\n***\n');
else if (node.tagName === 'H1')
setContent(node, nltrim(getContent(node)), '\n# ', '\n');
else if (node.tagName === 'H2')
setContent(node, nltrim(getContent(node)), '\n## ', '\n');
else if (node.tagName === 'H3')
setContent(node, nltrim(getContent(node)), '\n### ', '\n');
else if (node.tagName === 'H4')
setContent(node, nltrim(getContent(node)), '\n#### ', '\n');
else if (node.tagName === 'H5')
setContent(node, nltrim(getContent(node)), '\n##### ', '\n');
else if (node.tagName === 'H6')
setContent(node, nltrim(getContent(node)), '\n###### ', '\n');
else if (node.tagName === 'B' || node.tagName === 'STRONG')
setContent(node, nltrim(getContent(node)), '**', '**');
else if (node.tagName === 'I' || node.tagName === 'EM')
setContent(node, nltrim(getContent(node)), '_', '_');
else if (node.tagName === 'A') {
var href = node.href ? nltrim(node.href) : '', text = nltrim(getContent(node)) || href, title = node.title ? nltrim(node.title) : '';
if (href.length > 0)
setContent(node, '[' + text + '](' + href + (title ? ' "' + title + '"' : '') + ')');
else
setContent(node, '');
} else if (node.tagName === 'IMG') {
var src = node.getAttribute('src') ? nltrim(node.getAttribute('src')) : '', alt = node.alt ? nltrim(node.alt) : '', caption = node.title ? nltrim(node.title) : '';
if (src.length > 0)
setContent(node, '![' + alt + '](' + src + (caption ? ' "' + caption + '"' : '') + ')');
else
setContent(node, '');
} else if (node.tagName === 'BLOCKQUOTE') {
var block_content = getContent(node);
if (block_content.length > 0)
setContent(node, prefixBlock('> ', block_content), '\n\n', '\n\n');
else
setContent(node, '');
} else if (node.tagName === 'CODE') {
if (node._bfs_parent.tagName === 'PRE' && node._bfs_parent._bfs_parent !== null)
setContent(node, prefixBlock(' ', getContent(node)));
else
setContent(node, nltrim(getContent(node)), '`', '`');
} else if (node.tagName === 'LI') {
var list_content = getContent(node);
if (list_content.length > 0)
if (node._bfs_parent.tagName === 'OL')
setContent(node, trim(prefixBlock(' ', list_content, true)), '1. ', '\n\n');
else
setContent(node, trim(prefixBlock(' ', list_content, true)), '- ', '\n\n');
else
setContent(node, '');
} else
setContent(node, getContent(node));
}
function downshow(html, options) {
var root = doc.createElement('pre');
root.innerHTML = html;
var nodes = bfsOrder(root).reverse(), i;
if (options && options.nodeParser) {
for (i = 0; i<nodes.length; i++) {
var result = options.nodeParser(doc, nodes[i].cloneNode(true));
if (result === false)
processNode(nodes[i]);
else
setContent(nodes[i], result);
}
} else {
for (i = 0; i<nodes.length; i++) {
processNode(nodes[i]);
}
}
return getContent(root)
// remove empty lines between blockquotes
.replace(/(\n(?:> )+[^\n]*)\n+(\n(?:> )+)/g, "$1\n$2")
// remove empty blockquotes
.replace(/\n((?:> )+[ ]*\n)+/g, '\n\n')
// remove extra newlines
.replace(/\n[ \t]*(?:\n[ \t]*)+\n/g,'\n\n')
// remove trailing whitespace
.replace(/\s\s*$/, '')
// convert lists to inline when not using paragraphs
.replace(/^([ \t]*(?:\d+\.|\+|\-)[^\n]*)\n\n+(?=[ \t]*(?:\d+\.|\+|\-|\*)[^\n]*)/gm, "$1\n")
// remove starting newlines
.replace(/^\n\n*/, '');
}
// Export for use in server and client.
if (typeof module !== 'undefined' && typeof module.exports !== 'undefined')
module.exports = downshow;
else if (typeof define === 'function' && define.amd)
define([], function () {return downshow;});
else
window.downshow = downshow;
})();

2498
web/marked.js Normal file
View File

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,8 @@ import { app } from "../../scripts/app.js";
import { api } from "../../scripts/api.js";
import { ComfyDialog, $el } from "../../scripts/ui.js";
import { ComfyButton } from "../../scripts/ui/components/button.js";
import { marked } from "https://cdn.jsdelivr.net/npm/marked/lib/marked.esm.js";
import { marked } from "./marked.js";
import("./downshow.js");
function clamp(x, min, max) {
return Math.min(Math.max(x, min), max);
@@ -2707,7 +2708,7 @@ class ModelInfo {
const notesElement = this.elements.tabContents[3]; // TODO: remove magic value
notesElement.innerHTML = "";
const markdown = $el("div", {}, "");
markdown.innerHTML = marked.parse(noteText);
markdown.innerHTML = marked.parse(noteText);
notesElement.append.apply(notesElement,
(() => {
@@ -3175,31 +3176,7 @@ async function getModelInfos(urlText) {
version["description"],
civitaiInfo["description"] !== undefined ? "# " + name : undefined,
civitaiInfo["description"],
].filter(x => x !== undefined).join("\n\n")
.replaceAll("</p><p>", "\n\n")
.replaceAll("<strong>", "**").replaceAll("</strong>", "**")
.replaceAll("<ol>", "\n").replaceAll("</ol>", "\n") // wrong
.replaceAll("<ul>", "\n").replaceAll("</ul>", "\n")
.replaceAll("<li>", "- ").replaceAll("</li>", "\n")
.replaceAll("<em>", "*").replaceAll("</em>", "*")
.replaceAll("<code>", "`").replaceAll("</code>", "`")
.replaceAll("<blockquote", "\n<blockquote").replaceAll("</blockquote>", "\n")
.replaceAll("<br", "\n<br")
.replaceAll("<hr>", "\n\n---\n\n")
.replaceAll("<h1", "\n# <h1").replaceAll("</h1>", "\n")
.replaceAll("<h2", "\n## <h2").replaceAll("</h2>", "\n")
.replaceAll("<h3", "\n### <h3").replaceAll("</h3>", "\n")
.replaceAll("<h4", "\n#### <h4").replaceAll("</h4>", "\n")
.replaceAll("<h5", "\n##### <h5").replaceAll("</h5>", "\n")
.replaceAll("<h6", "\n###### <h6").replaceAll("</h6>", "\n")
.replace(/href="(\S*)">/g, 'href=""> $1 <a href="">')
.replace(/src="(\S*)">/g, 'src=""> $1 <img src="">')
// <script></script>
// <span></span>
.replace(/<[^>]+>/g, "") // quick hack
.replaceAll("&lt;", "<").replaceAll("&gt;", ">")
.replaceAll("&lte;", "<=").replaceAll("&gte;", ">=")
.replaceAll("&amp;", "&");
].filter(x => x !== undefined).join("\n\n");
version["files"].forEach((file) => {
infos.push({
"images": images,
@@ -3207,7 +3184,7 @@ async function getModelInfos(urlText) {
"modelType": type,
"downloadUrl": file["downloadUrl"],
"downloadFilePath": "",
"description": description,
"description": downshow(description),
"details": {
"fileSizeKB": file["sizeKB"],
"fileType": file["type"],