const UPSTREAM = "www.gutenberg.org"
const PATH_PREFIX = "/gutenberg-proxy"
const RW_START = "/--rw--"
const RW_STOP = "--wr--"
let g_origin_url
let g_path_pfrefix
async function handleRequest(request) {
g_origin_url = new URL(request.url)
if (request.method == "POST") {
let body = await request.text()
// console.log("post body", request.url, "body", body)
}
const {urlString, pathPrefix} = restoreURL(request.url)
g_path_pfrefix = pathPrefix
const url = new URL(urlString)
console.log("request.url", request.url)
console.log("restore url", urlString)
// return new Response("xx1:" + request.url + "\nxx2:" + urlString)
if (!url.hostname.toLocaleLowerCase().endsWith("gutenberg.org")) {
return new Response("", {status: 403})
}
if (url.pathname === "/browse/scores/top") {
// this page will cause ungzip fail
request.headers.delete("Accept-Encoding")
}
const newRequest = new Request(urlString, request)
const headers = newRequest.headers
headers.set("Host", url.host)
const referer = headers.get("Referer")
if (referer) {
const {urlString, pathPrefix} = restoreURL(referer)
// console.log('referer', urlString, pathPrefix)
headers.set("Referer", urlString)
if (!url.pathname.endsWith(".css")) {
g_path_pfrefix = pathPrefix
}
}
const origin = headers.get("Origin")
if (origin) {
const {urlString} = restoreURL(origin)
// console.log('origin', urlString)
headers.set("Origin", urlString)
}
const response = await fetch(newRequest, {cdnProxy: false, redirect: "manual"})
const responseHeaders = response.headers
let cookie = responseHeaders.get("Set-Cookie")
if (cookie) {
cookie = cookie.replace(/(domain=)([^;]+);/gi, '$1'+g_origin_url.host+';')
responseHeaders.set("Set-Cookie", cookie)
}
let location = responseHeaders.get("Location")
if (location) {
location = rewriteURL(location)
responseHeaders.set("Location", location)
}
const contentType = getResponseHeader(response, "Content-Type")
const contentLength = getResponseHeader(response, "Content-Length")
if (contentLength && Number(contentLength) > 1024 * 100) {
return response
}
// /gutenberg-proxy/cache/epub/615/pg615-images.html
if (contentType.includes("text/html") && !url.pathname.startsWith("/files/")) {
return new HTMLRewriter()
.on("a", new URLHandler(["href", "data-url", "data-verify-url"]))
.on("link", new URLHandler(["href"]))
.on("script", new URLHandler(["src"]))
.on("iframe", new URLHandler(["src"]))
.on("input", new URLHandler(["src"]))
.on("div", new URLHandler(["style", "data-url", "data-status-url"]))
.on("img", new URLHandler(["src", "data-origin"]))
.on("form", new URLHandler(["action"]))
.on("meta", new URLHandler(["content"]))
.on("span", new URLHandler(["data-verify-url"]))
.transform(response)
} else if (contentType.includes("text/css")) {
let text = await response.text()
text = rewriteText(text, /url\((.*?)\)/g)
return new Response(text, response)
} else if (contentType.includes("application/x-javascript")) {
let text = await response.text()
text = text.replace(/https:\\\/\\\//g, "https://")
text = rewriteText(text, /'(\/j\/subject\/)'/g)
text = rewriteText(text, /"https?:(\/\/.*?)"/gi)
text = rewriteText(text, /'https?:(\/\/.*?)'/gi)
text = rewriteText(text, /\.get\("(.*?)\"/g)
return new Response(text, response)
} else if (contentType.includes("application/json")) {
let text = await response.text()
text = rewriteText(text, /"https?:(\/\/.*?)"/gi)
return new Response(text, response)
} else {
return response
}
}
function getResponseHeader(response, headerName) {
const value = response.headers.get(headerName)
return value ? value.toLowerCase() : ""
}
function rewriteText(text, reg) {
let result = text.replace(reg, function(match, str){
let result = match.replace(str, rewriteURL(str));
result = result.replace("https", "http")
return result
});
// if (text != result) {
// console.log("text", text, result)
// }
return result
}
class URLHandler {
constructor(attrs) {
this.attrs = attrs
}
text(text) {
let result = rewriteText(text.text, /':?(\/\/.*?)'/g)
result = rewriteText(result, /"https?:(\/\/.*?)"/gi)
result = rewriteText(result, /'https?:(\/\/.*?)'/gi)
if (result != text.text) {
text.replace(result)
}
}
element(element) {
for (let attr of this.attrs) {
const href1 = element.getAttribute(attr)
if (!href1) continue
let href2
if (attr == "style") {
href2 = rewriteText(href1, /url\((.*?)\)/g)
} else {
href2 = rewriteURL(href1)
}
// console.log('rewrite', element.tagName, attr, this.attrs, href1, href2)
if (href1 != href2) {
element.setAttribute(attr, href2)
}
}
}
}
function rewriteURL(originURL) {
if (!originURL.startsWith("/") && !originURL.startsWith("http")) {
return originURL
}
originURL = originURL.replace(///g, "/").replace(/\\\//g, "/")
if (originURL.startsWith("https://")) {
originURL = originURL.replace("https://", "http://")
}
let fullURL = originURL
if (originURL.startsWith("//")) {
fullURL = "http:" + originURL
} else if (originURL.startsWith("/")) {
return g_path_pfrefix + originURL
}
try {
const url = new URL(fullURL)
let host = ''
if (url.host != UPSTREAM) {
host = `${RW_START}${url.host.replace(/\./g, "---")}${RW_STOP}`
}
const rw = `${g_origin_url.host}${PATH_PREFIX}${host}`
return originURL.replace(url.host, rw)
} catch (e) {
console.error("rewriter error", e, originURL)
return originURL
}
}
function restoreURL(rewritedURL) {
if (rewritedURL.endsWith(PATH_PREFIX) || rewritedURL.endsWith(RW_STOP)) {
rewritedURL += "/"
}
const url = new URL(rewritedURL)
let pathname = url.pathname
let pathPrefix, host
if (pathname.startsWith(PATH_PREFIX)) {
pathname = pathname.substring(PATH_PREFIX.length)
}
if (pathname.startsWith(RW_START) && pathname.includes(RW_STOP)) {
const stop = pathname.indexOf(RW_STOP)
pathPrefix = PATH_PREFIX + pathname.substring(0, stop + RW_STOP.length)
host = pathname.substring(RW_START.length, stop).replace(/---/g, ".")
} else {
host = UPSTREAM
pathPrefix = PATH_PREFIX
}
return {urlString: rewritedURL.replace(url.protocol, "https:").replace(url.host, host).replace(pathPrefix, ''),
pathPrefix: pathPrefix}
}
addEventListener("fetch", event => {
return event.respondWith(handleRequest(event.request))
})