new proxy (still not tested/implemented)

This commit is contained in:
zefie
2025-08-08 14:12:57 -04:00
parent 80f19dbecb
commit 222a77d9a7
3 changed files with 490 additions and 87 deletions

View File

@@ -1212,9 +1212,41 @@ function handleProxy(socket, request_type, request_headers, res, data) {
headers["wtv-trusted"] = false;
if (typeof res.headers['Content-Type'] === 'string' && res.headers['Content-Type'].startsWith("text")) {
// Get the original URL for relative link fixing
const originalUrl = request_headers.request.split(' ')[1];
// Transform HTML content for WebTV compatibility
if (res.headers['Content-Type'].includes('html') &&
minisrv_config.services[request_type]?.use_minifying_proxy !== false) {
try {
const WTVProxy = require('./includes/classes/WTVProxy.js');
const proxy = new WTVProxy(minisrv_config);
let htmlContent = Buffer.concat(data).toString();
// Apply WebTV-specific transformations
const transformOptions = {
removeImages: minisrv_config.services[request_type]?.remove_images || false,
maxImageWidth: minisrv_config.services[request_type]?.max_image_width || 400,
simplifyTables: minisrv_config.services[request_type]?.simplify_tables !== false,
addWTVControls: minisrv_config.services[request_type]?.add_wtv_controls !== false,
maxWidth: minisrv_config.services[request_type]?.max_width || 544
};
htmlContent = proxy.transformHtml(htmlContent, originalUrl, transformOptions);
data = [Buffer.from(htmlContent)];
if (minisrv_config.config.verbosity >= 3) {
console.log(` * HTML transformed for WebTV compatibility (${originalUrl})`);
}
} catch (err) {
console.warn(` * HTML transformation failed: ${err.message}`);
}
}
if (request_type != "http" && request_type != "https") {
// replace http and https links on non http/https protocol (for proto:// for example)
var data_t = data.toString().replaceAll("http://", request_type + "://").replaceAll("https://", request_type + "://");
var data_t = Buffer.concat(data).toString().replaceAll("http://", request_type + "://").replaceAll("https://", request_type + "://");
data = [Buffer.from(data_t)]
}
}

View File

@@ -0,0 +1,457 @@
'use strict';
const { WTVShared } = require("./WTVShared.js");
class WTVMinifyingProxy {
constructor(minisrv_config) {
this.minisrv_config = minisrv_config;
this.wtvshared = new WTVShared(this.minisrv_config);
// HTML 3.0/4.0 compatible tags and attributes
this.allowedTags = [
'html', 'head', 'title', 'meta', 'body', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'p', 'br', 'hr', 'div', 'span', 'a', 'img', 'ul', 'ol', 'li', 'table', 'tr',
'td', 'th', 'tbody', 'thead', 'tfoot', 'form', 'input', 'textarea', 'select',
'option', 'button', 'b', 'i', 'u', 'strong', 'em', 'center', 'font', 'big',
'small', 'sub', 'sup', 'pre', 'code', 'blockquote', 'dl', 'dt', 'dd'
];
this.allowedAttributes = [
'href', 'src', 'alt', 'title', 'width', 'height', 'border', 'align', 'valign',
'bgcolor', 'color', 'size', 'face', 'target', 'name', 'value', 'type', 'action',
'method', 'cols', 'rows', 'cellpadding', 'cellspacing', 'nowrap'
];
// CSS properties to convert to HTML attributes
this.cssToHtml = {
'text-align': 'align',
'vertical-align': 'valign',
'background-color': 'bgcolor',
'color': 'color',
'font-size': 'size',
'font-family': 'face'
};
}
/**
* Transform modern HTML to HTML 3.0/4.0 compatible version
* @param {string} html - The HTML content to transform
* @param {string} url - The original URL (for fixing relative links)
* @returns {string} - Transformed HTML
*/
transformHtml(html, url = '') {
try {
let transformed = html;
// Step 1: Clean up the HTML structure
transformed = this.cleanHtml(transformed);
// Step 2: Convert modern tags to compatible ones
transformed = this.convertModernTags(transformed);
// Step 3: Extract and convert CSS to HTML attributes
transformed = this.convertCssToAttributes(transformed);
// Step 4: Fix links and images
transformed = this.fixUrls(transformed, url);
// Step 5: Remove unsupported content
transformed = this.removeUnsupportedContent(transformed);
// Step 6: Minify and optimize
transformed = this.minifyHtml(transformed);
// Step 7: Return the processed content (structure will be handled by transformForWebTV)
return transformed;
} catch (err) {
throw new Error(`HTML transformation failed: ${err.message}`);
}
}
/**
* Clean HTML by removing comments, normalizing whitespace
*/
cleanHtml(html) {
return html
// Remove HTML comments
.replace(/<!--[\s\S]*?-->/g, '')
// Remove CDATA sections
.replace(/<!\[CDATA\[[\s\S]*?\]\]>/g, '')
// Remove XML declarations
.replace(/<\?xml[^>]*\?>/g, '')
// Normalize whitespace
.replace(/\s+/g, ' ')
.trim();
}
/**
* Convert modern HTML5/CSS3 tags to HTML 3.0/4.0 compatible versions
*/
convertModernTags(html) {
// Convert semantic HTML5 tags to divs with classes
const semanticTags = {
'header': 'div',
'footer': 'div',
'nav': 'div',
'section': 'div',
'article': 'div',
'aside': 'div',
'main': 'div',
'figure': 'div',
'figcaption': 'div'
};
Object.entries(semanticTags).forEach(([modern, classic]) => {
// Opening tags
html = html.replace(new RegExp(`<${modern}\\b([^>]*)>`, 'gi'), `<${classic}$1>`);
// Closing tags
html = html.replace(new RegExp(`</${modern}>`, 'gi'), `</${classic}>`);
});
return html;
}
/**
* Extract CSS styles and convert them to HTML attributes where possible
*/
convertCssToAttributes(html) {
// Remove <style> blocks but extract useful info first
html = html.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '');
// Convert inline styles to HTML attributes
html = html.replace(/style\s*=\s*["']([^"']+)["']/gi, (match, styles) => {
const attributes = this.parseStyleToAttributes(styles);
return attributes;
});
return html;
}
/**
* Parse CSS style string and convert to HTML attributes
*/
parseStyleToAttributes(styleString) {
const attributes = [];
const styles = styleString.split(';');
styles.forEach(style => {
const [property, value] = style.split(':').map(s => s.trim());
if (property && value && this.cssToHtml[property]) {
let htmlValue = value;
// Convert CSS values to HTML equivalents
if (property === 'font-size') {
htmlValue = this.convertFontSize(value);
} else if (property === 'color' || property === 'background-color') {
htmlValue = this.convertColor(value);
}
attributes.push(`${this.cssToHtml[property]}="${htmlValue}"`);
}
});
return attributes.join(' ');
}
/**
* Convert CSS font-size to HTML size attribute (1-7)
*/
convertFontSize(cssSize) {
const size = parseInt(cssSize);
if (size <= 8) return '1';
if (size <= 10) return '2';
if (size <= 12) return '3';
if (size <= 14) return '4';
if (size <= 18) return '5';
if (size <= 24) return '6';
return '7';
}
/**
* Convert CSS colors to HTML color format
*/
convertColor(cssColor) {
// If already in hex format, return as-is
if (cssColor.startsWith('#')) return cssColor;
// Convert named colors to hex
const namedColors = {
'black': '#000000', 'white': '#FFFFFF', 'red': '#FF0000',
'green': '#008000', 'blue': '#0000FF', 'yellow': '#FFFF00',
'cyan': '#00FFFF', 'magenta': '#FF00FF', 'gray': '#808080',
'grey': '#808080', 'darkgray': '#A9A9A9', 'lightgray': '#D3D3D3'
};
return namedColors[cssColor.toLowerCase()] || cssColor;
}
/**
* Fix relative URLs to absolute ones
*/
fixUrls(html, baseUrl) {
if (!baseUrl) return html;
try {
const base = new URL(baseUrl);
// Fix image sources
html = html.replace(/src\s*=\s*["']([^"']+)["']/gi, (match, src) => {
if (src.startsWith('http') || src.startsWith('data:')) return match;
const absoluteUrl = new URL(src, base).href;
return `src="${absoluteUrl}"`;
});
// Fix links
html = html.replace(/href\s*=\s*["']([^"']+)["']/gi, (match, href) => {
if (href.startsWith('http') || href.startsWith('mailto:') || href.startsWith('#')) return match;
const absoluteUrl = new URL(href, base).href;
return `href="${absoluteUrl}"`;
});
} catch (e) {
// If URL parsing fails, return original HTML
}
return html;
}
/**
* Remove unsupported content and scripts
*/
removeUnsupportedContent(html) {
return html
// Remove scripts
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
// Remove noscript content (show it since we don't support JS anyway)
.replace(/<noscript\b[^>]*>/gi, '')
.replace(/<\/noscript>/gi, '')
// Remove object/embed tags
.replace(/<object\b[^<]*(?:(?!<\/object>)<[^<]*)*<\/object>/gi, '')
.replace(/<embed\b[^<]*(?:(?!>)<[^<]*)*>/gi, '')
// Remove iframes
.replace(/<iframe\b[^<]*(?:(?!<\/iframe>)<[^<]*)*<\/iframe>/gi, '')
// Remove link tags (CSS, etc.)
.replace(/<link\b[^<]*(?:(?!>)<[^<]*)*>/gi, '')
// Remove meta tags except content-type and basic ones
.replace(/<meta\b(?![^>]*(?:content-type|charset))[^<]*(?:(?!>)<[^<]*)*>/gi, '')
// Remove event handlers
.replace(/on\w+\s*=\s*("[^"]*"|'[^']*'|[^ >]+)/gi, '')
// Remove unsupported attributes
.replace(/\b(?:class|id|data-\w+)\s*=\s*("[^"]*"|'[^']*'|[^ >]+)/gi, '');
}
/**
* Minify HTML while preserving readability
*/
minifyHtml(html) {
return html
// Remove extra whitespace between tags
.replace(/>\s+</g, '><')
// Remove leading/trailing whitespace from lines
.replace(/^\s+|\s+$/gm, '')
// Collapse multiple spaces to single space
.replace(/\s{2,}/g, ' ')
.trim();
}
/**
* Ensure valid HTML structure with proper DOCTYPE
*/
ensureValidStructure(html) {
// Extract title if present
const titleMatch = html.match(/<title[^>]*>(.*?)<\/title>/i);
const title = titleMatch ? titleMatch[1].trim() : 'WebTV Page';
// Extract body content - look for body tag first, then fallback to content after head
let bodyContent = '';
const bodyMatch = html.match(/<body[^>]*>(.*?)<\/body>/is);
if (bodyMatch) {
bodyContent = bodyMatch[1];
} else {
// No body tag found, extract everything after head or use all content
const headEndMatch = html.match(/<\/head>/i);
if (headEndMatch) {
bodyContent = html.substring(html.indexOf(headEndMatch[0]) + headEndMatch[0].length);
} else {
bodyContent = html;
}
}
// Remove any remaining head/html/body/doctype tags to avoid nesting
bodyContent = bodyContent
.replace(/<!DOCTYPE[^>]*>/gi, '')
.replace(/<\/?(?:html|head|body)[^>]*>/gi, '')
.replace(/<title[^>]*>.*?<\/title>/gi, '')
.replace(/<meta[^>]*>/gi, '')
.trim();
// If content is too long, truncate intelligently
if (bodyContent.length > 32768) { // 32KB limit for WebTV
bodyContent = this.intelligentTruncate(bodyContent, 32768);
}
// Build proper HTML structure
return `<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<title>${title}</title>
</head>
<body>
${bodyContent}
</body>
</html>`;
}
/**
* Intelligently truncate content at word/tag boundaries
*/
intelligentTruncate(content, maxLength) {
if (content.length <= maxLength) return content;
let truncated = content.substring(0, maxLength);
// Try to cut at a tag boundary
const lastCloseTag = truncated.lastIndexOf('>');
const lastOpenTag = truncated.lastIndexOf('<');
if (lastCloseTag > lastOpenTag) {
truncated = truncated.substring(0, lastCloseTag + 1);
} else {
// Cut at word boundary
const lastSpace = truncated.lastIndexOf(' ');
if (lastSpace > maxLength * 0.8) { // Only if we don't lose too much
truncated = truncated.substring(0, lastSpace);
}
}
// Add truncation notice
truncated += '<p><i>[Content truncated for WebTV compatibility]</i></p>';
return truncated;
}
/**
* Transform HTML specifically for WebTV constraints
* @param {string} html - HTML content
* @param {string} url - Original URL
* @param {Object} options - Transformation options
* @returns {string} - WebTV-compatible HTML
*/
transformForWebTV(html, url = '', options = {}) {
const defaults = {
maxWidth: 544, // WebTV screen width
maxTableWidth: 500, // Max table width
simplifyTables: true, // Convert complex tables to simple ones
removeImages: false, // Whether to remove images entirely
maxImageWidth: 400, // Max image width
preserveLinks: true, // Keep navigation links
addWTVControls: true // Add WebTV-specific navigation aids
};
const config = { ...defaults, ...options };
// Extract title from original HTML first
const titleMatch = html.match(/<title[^>]*>(.*?)<\/title>/i);
const title = titleMatch ? titleMatch[1].trim() : 'WebTV Page';
// Transform the HTML content
let transformed = this.transformHtml(html, url);
// Extract body content from either the transformed HTML or use all content
let bodyContent = '';
const bodyMatch = transformed.match(/<body[^>]*>(.*?)<\/body>/is);
if (bodyMatch) {
bodyContent = bodyMatch[1].trim();
} else {
// No body tag found, extract content after head or use transformed content
const headEndMatch = transformed.match(/<\/head>/i);
if (headEndMatch) {
bodyContent = transformed.substring(transformed.indexOf(headEndMatch[0]) + headEndMatch[0].length);
} else {
bodyContent = transformed;
}
// Clean up any remaining structural tags
bodyContent = bodyContent
.replace(/<!DOCTYPE[^>]*>/gi, '')
.replace(/<\/?(?:html|head|body)[^>]*>/gi, '')
.replace(/<title[^>]*>.*?<\/title>/gi, '')
.replace(/<meta[^>]*>/gi, '')
.trim();
}
// WebTV-specific optimizations on body content
if (config.simplifyTables) {
bodyContent = this.simplifyTables(bodyContent, config.maxTableWidth);
}
if (config.removeImages) {
bodyContent = bodyContent.replace(/<img[^>]*>/gi, '');
} else {
bodyContent = this.optimizeImages(bodyContent, config.maxImageWidth);
}
if (config.addWTVControls && url) {
bodyContent = this.addWebTVControls(bodyContent, url);
}
// Ensure content isn't too long
if (bodyContent.length > 32768) {
bodyContent = this.intelligentTruncate(bodyContent, 32768);
}
// Rebuild the HTML with the processed body content
return `<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<title>${title}</title>
</head>
<body>
${bodyContent}
</body>
</html>`;
}
/**
* Simplify complex tables for WebTV
*/
simplifyTables(html, maxWidth) {
return html.replace(/<table[^>]*>/gi, `<table border="1" cellpadding="2" cellspacing="0" width="${maxWidth}">`);
}
/**
* Optimize images for WebTV display
*/
optimizeImages(html, maxWidth) {
return html.replace(/<img([^>]*)>/gi, (match, attrs) => {
// Add max width if not specified
if (!attrs.includes('width=')) {
attrs += ` width="${maxWidth}"`;
}
return `<img${attrs}>`;
});
}
/**
* Add WebTV-specific navigation controls
*/
addWebTVControls(html, originalUrl) {
const controls = `<div align="center">
<font size="2">
<a href="javascript:history.back()">← Back</a> |
<a href="javascript:location.reload()">Reload</a> |
<a href="${originalUrl}">Original Site</a>
</font>
</div>
<hr>`;
// Insert controls at the beginning of body content, not after body tag
return controls + html;
}
}
module.exports = WTVMinifyingProxy;

View File

@@ -1,86 +0,0 @@
'use strict';
const { WTVShared, clientShowAlert } = require("./WTVShared.js");
class WTVProxy {
constructor(minisrv_config) {
this.minisrv_config = minisrv_config;
this.wtvshared = new WTVShared(this.minisrv_config);
}
transformHtml(html) {
try {
// Apply existing transformations
let transformed = html
.replace(/[^\x20-\x7E\n\r\t]/g, '') // Remove non-ASCII
.replace(/\s+/g, ' ') // Collapse whitespace
.replace(/<!--[\s\S]*?-->/g, '') // Remove comments
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '') // Remove scripts
.replace(/<meta\b[^<]*(?:(?!>)<[^<]*)*>/gi, '') // Remove meta tags
.replace(/<img\b[^<]*(?:(?!>)<[^<]*)*>/gi, '') // Remove images
.replace(/<input\b[^<]*(?:(?!>)<[^<]*)*>/gi, '') // Remove input tags
.replace(/<link\b[^<]*(?:(?!>)<[^<]*)*>/gi, '') // Remove link tags
.replace(/<embed\b[^<]*(?:(?!>)<[^<]*)*>/gi, '') // Remove embed tags
.replace(/<a\b[^<]*(?:(?!>)<[^<]*)*>/gi, '') // Remove links
.replace(/<\/a>/gi, '') // Remove closing links
.replace(/<iframe\b[^<]*(?:(?!<\/iframe>)<[^<]*)*<\/iframe>/gi, '')
.replace(/<object\b[^<]*(?:(?!<\/object>)<[^<]*)*<\/object>/gi, '')
.replace(/javascript:/gi, '')
.replace(/on\w+\s*=\s*("[^"]*"|'[^']*'|[^ >]+)/gi, '')
.replace(/style\s*=\s*("[^"]*"|'[^']*'|[^ >]+)/gi, '')
.replace(/class\s*=\s*("[^"]*"|'[^']*'|[^ >]+)/gi, '')
.replace(/id\s*=\s*("[^"]*"|'[^']*'|[^ >]+)/gi, '')
.replace(/<(div|span|section|article|aside|header|footer|nav)\b/gi, '')
.replace(/<\/(div|span|section|article|aside|header|footer|nav)>/gi, '')
.replace(/FP_preloadImgs\s*\(.*?\)/gi, '');
// Normalize for processing
transformed = transformed
.replace(/>\s+</g, '><') // Remove accidental whitespace between tags
.replace(/</g, '\n<') // Add newline before each tag
.replace(/>/g, '>\n') // Add newline after each tag
.replace(/\n\s*\n/g, '\n'); // Collapse multiple newlines
// Format with indentation
const lines = transformed.split('\n');
let indentLevel = 0;
const indentSize = 2;
const formatted = lines.map((line) => {
const trimmed = line.trim();
if (trimmed === '') return '';
const isClosing = /^<\/.+?>/.test(trimmed);
const isSelfClosing = /^<.+?\/>$/.test(trimmed) ||
/^<hr/i.test(trimmed) || /^<br/i.test(trimmed) ||
/^<meta/i.test(trimmed) || /^<img/i.test(trimmed) ||
/^<input/i.test(trimmed) || /^<audioscope/i.test(trimmed);
const isOpening = /^<([a-zA-Z0-9]+)(?!.*\/>).*?>/.test(trimmed) && !isClosing;
if (isClosing) indentLevel = Math.max(indentLevel - 1, 0);
const indentedLine = ' '.repeat(indentLevel * indentSize) + trimmed;
if (isOpening && !isSelfClosing) indentLevel++;
return indentedLine;
});
transformed = formatted.join('\n').trim();
// Wrap in DOCTYPE and HTML structure
transformed = `<!DOCTYPE html>\n<html>\n <head>\n <meta http-equiv="content-type" content="text/html; charset=iso-8859-1">\n </head>\n <body>\n${transformed}\n </body>\n</html>`;
// Truncate if necessary
if (transformed.length > 512) {
transformed = transformed.substring(0, 512);
transformed = transformed.substring(0, transformed.lastIndexOf('<')) + '\n </body>\n</html>';
}
return Buffer.from(transformed, 'ascii').toString('ascii');
} catch (err) {
throw new Error(`HTML transformation failed: ${err.message}`);
}
}
}
module.exports = WTVProxy;