Files
proxy_tmdb/lib/queryParser.js

88 lines
2.6 KiB
JavaScript

// Replicates the query parsing logic shared by api.php and search.php:
// - extract a year (last (19|20)\d{2} match, ignoring 1080/2160)
// - extract an episode marker (SxxExxx, SxxExx, Sxx, partN, NxN, Exxx)
// - choose movie vs tv accordingly
// - extract titlein from the bytes before the year/episode
import { FILTER_RE } from './titleFilter.js';
const YEAR_RE = /(19|20)\d{2}/g;
// Single-pass regex matching the PHP behaviour:
// - S/s and E/e and "part" are case-insensitive ([Ss], [Ee], [Pp]art)
// - the lowercase 'x' in NxN, and uppercase 'E' in standalone Exxx, are case-sensitive
// Greedy left-to-right alternation means "S01E02" is consumed whole, so the
// trailing "E02" alternative cannot match inside it.
const EPISODE_RE =
/[Ss][0-9]{1,2}.?[Ee][0-9]{1,3}|[Ss][0-9]{2}|[Pp]art\.?[0-9]{1,3}|[0-9]{1,2}x[0-9]{1,3}|E[0-9]{1,3}/g;
// PHP uses byte offsets (substr). To stay byte-faithful, work on the UTF-8 bytes.
const utf8 = (s) => Buffer.from(s, 'utf8');
const sliceBytes = (s, end) => utf8(s).slice(0, end).toString('utf8');
function findAll(re, str) {
const out = [];
re.lastIndex = 0;
for (;;) {
const m = re.exec(str);
if (m === null) break;
out.push({ value: m[0], byteOffset: Buffer.byteLength(str.slice(0, m.index), 'utf8') });
if (m.index === re.lastIndex) re.lastIndex++;
}
return out;
}
function stripFilter(s) {
return s.replace(FILTER_RE, '');
}
export function parseQuery(query) {
if (!query) return null;
let yearin = 0;
let yearpos = -1;
let titlein = '';
const years = findAll(YEAR_RE, query).reverse();
for (const m of years) {
if (m.value === '1080' || m.value === '2160') continue;
yearin = parseInt(m.value, 10);
yearpos = m.byteOffset;
titlein = sliceBytes(query, yearpos);
break;
}
let episodein = '';
let episodepos = -1;
const eps = findAll(EPISODE_RE, query).reverse();
for (const m of eps) {
episodein = m.value;
episodepos = m.byteOffset;
break;
}
if (episodein) {
if (!yearin) {
titlein = sliceBytes(query, episodepos);
} else if (episodepos > yearpos) {
titlein = sliceBytes(query, yearpos);
if (!stripFilter(titlein)) {
titlein = sliceBytes(query, episodepos);
yearin = 0;
}
} else {
titlein = sliceBytes(query, episodepos);
if (!stripFilter(titlein)) {
titlein = sliceBytes(query, yearpos);
episodein = '';
}
}
}
if (!yearin && !episodein) {
return { error: 'Year or episode not found in query', titlein, yearin, episodein };
}
const type = episodein ? 'tv' : 'movie';
return { type, titlein, yearin, episodein };
}