fix word borders in regexps; add regexp flags to config file

This commit is contained in:
soffee 2026-01-26 01:26:24 +03:00
parent 0d6646463b
commit f6af162ce6
4 changed files with 51 additions and 6 deletions

View file

@ -5,6 +5,11 @@ keywords:
# will match plain regex # will match plain regex
- name: woof - name: woof
pattern: 'w[oa]+f' pattern: 'w[oa]+f'
# you can also specify flags for your regex
flags:
global: true
multi_line: false
insensitive: true
# will match regex wraped with word borders # will match regex wraped with word borders
# will match 'hi, woof :3', 'woof!', 'i heard a woof' but not 'i like subwoofers' # will match 'hi, woof :3', 'woof!', 'i heard a woof' but not 'i like subwoofers'

View file

@ -1,7 +1,7 @@
{ {
"name": "mtproto_exporter", "name": "mtproto_exporter",
"type": "module", "type": "module",
"version": "1.4.2", "version": "1.5.0",
"packageManager": "pnpm@10.6.5", "packageManager": "pnpm@10.6.5",
"license": "MIT", "license": "MIT",
"scripts": { "scripts": {

View file

@ -94,11 +94,32 @@ export async function readKeywords(filePath: string): Promise<RawKeywordLike[]>
keywords.push(item); keywords.push(item);
} else if (typeof item === "object" && typeof item.name === "string") { } else if (typeof item === "object" && typeof item.name === "string") {
if (typeof item.pattern === "string") { if (typeof item.pattern === "string") {
keywords.push({ let result = {
name: item.name, name: item.name,
pattern: item.pattern, pattern: item.pattern,
word: Boolean(item.word ?? false), word: Boolean(item.word ?? false),
}); flags: {
global: true,
multi_line: false,
insensitive: true,
}
};
if (typeof item.flags === "object") {
if (typeof item.flags.global === "boolean") {
result.flags.global = item.flags.global;
}
if (typeof item.flags.multi_line === "boolean") {
result.flags.multi_line = item.flags.multi_line;
}
if (typeof item.flags.insensitive === "boolean") {
result.flags.insensitive = item.flags.insensitive;
}
}
keywords.push(result);
} }
} }
} }

View file

@ -11,6 +11,11 @@ export interface RawKeywordPattern {
name: string; name: string;
pattern: string; pattern: string;
word: boolean; word: boolean;
flags: {
global: boolean;
multi_line: boolean;
insensitive: boolean;
}
} }
export type RawKeywordLike = string | RawKeywordPattern; export type RawKeywordLike = string | RawKeywordPattern;
@ -26,6 +31,7 @@ export function rawToPatterns(raw: RawKeywordLike[]): KeywordPattern[] {
let pattern; let pattern;
let name; let name;
let addBorders = false; let addBorders = false;
let flags = "giu";
if (typeof keyword === "string") { if (typeof keyword === "string") {
pattern = escapeRegex(keyword); pattern = escapeRegex(keyword);
@ -35,15 +41,28 @@ export function rawToPatterns(raw: RawKeywordLike[]): KeywordPattern[] {
pattern = keyword.pattern; pattern = keyword.pattern;
name = keyword.name; name = keyword.name;
addBorders = keyword.word; addBorders = keyword.word;
flags = "u";
if (keyword.flags.global) {
flags += "g";
}
if (keyword.flags.insensitive) {
flags += "i";
}
if (keyword.flags.multi_line) {
flags += "m";
}
} }
const wordBorder = escapeRegex("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"); const wordBorder = escapeRegex("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~");
const borderStart = addBorders ? `(?:[${wordBorder}\\s]|^)` : ""; const borderStart = addBorders ? `(?<=[${wordBorder}\\s]|^)` : "";
const borderEnd = addBorders ? `(?:[${wordBorder}\\s]|$)` : ""; const borderEnd = addBorders ? `(?=[${wordBorder}\\s]|$)` : "";
patterns.push({ patterns.push({
name, name,
pattern: new RegExp(borderStart + pattern + borderEnd), pattern: new RegExp(`${borderStart}(?:${pattern})${borderEnd}`, flags),
}); });
} }