add better handling of word patterns
This commit is contained in:
parent
69695e265a
commit
fb5fd57238
7 changed files with 99 additions and 46 deletions
|
|
@ -1,4 +1,19 @@
|
|||
keywords:
|
||||
# will match just word 'meow' (requires word border on both sides)
|
||||
- meow
|
||||
|
||||
# will match plain regex
|
||||
- name: woof
|
||||
pattern: 'w[oa]+f'
|
||||
|
||||
# will match regex wraped with word borders
|
||||
# will match 'hi, woof :3', 'woof!', 'i heard a woof' but not 'i like subwoofers'
|
||||
- name: woof
|
||||
pattern: 'w[oa]+f'
|
||||
word: true
|
||||
|
||||
# will match any word starting with 'aqua' (aquarium, aquatic, aquaculture, etc...)
|
||||
# requires word border on both sides too
|
||||
- name: aqua
|
||||
pattern: 'aqua.*?'
|
||||
word: true
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"name": "mtproto_exporter",
|
||||
"type": "module",
|
||||
"version": "1.1.0",
|
||||
"version": "1.2.0",
|
||||
"packageManager": "pnpm@10.6.5",
|
||||
"license": "MIT",
|
||||
"scripts": {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import type { OptionDefinition } from "command-line-args";
|
||||
import type { KeywordLike } from "./keywords.js";
|
||||
import type { RawKeywordLike } from "./keywords.js";
|
||||
import { readFile } from "node:fs/promises";
|
||||
import cmdline from "command-line-args";
|
||||
import yaml from "js-yaml";
|
||||
|
|
@ -12,7 +12,7 @@ export interface Configuration {
|
|||
watchFile: boolean;
|
||||
includePeers?: number[];
|
||||
excludePeers?: number[];
|
||||
keywords?: KeywordLike[];
|
||||
keywords?: RawKeywordLike[];
|
||||
}
|
||||
|
||||
const optionDefinitions: OptionDefinition[] = [
|
||||
|
|
@ -54,19 +54,22 @@ if (cli["exclude-peers"]) {
|
|||
}
|
||||
}
|
||||
|
||||
export async function readKeywords(filePath: string): Promise<KeywordLike[]> {
|
||||
export async function readKeywords(filePath: string): Promise<RawKeywordLike[]> {
|
||||
const doc = yaml.load(await readFile(filePath, "utf8")) as { keywords?: any[] };
|
||||
|
||||
if (doc.keywords && doc.keywords.constructor.name === "Array") {
|
||||
const keywords: KeywordLike[] = [];
|
||||
const keywords: RawKeywordLike[] = [];
|
||||
for (const item of doc.keywords) {
|
||||
if (typeof item === "string") {
|
||||
keywords.push(item);
|
||||
} else if (typeof item === "object" && item.name && item.pattern) {
|
||||
keywords.push({
|
||||
name: item.name,
|
||||
pattern: new RegExp(item.pattern, "gi"),
|
||||
});
|
||||
} else if (typeof item === "object" && typeof item.name === "string") {
|
||||
if (typeof item.pattern === "string") {
|
||||
keywords.push({
|
||||
name: item.name,
|
||||
pattern: item.pattern,
|
||||
word: Boolean(item.word ?? false),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
return keywords;
|
||||
|
|
|
|||
|
|
@ -3,37 +3,55 @@ import { PropagationAction } from "@mtcute/dispatcher";
|
|||
import { Counter } from "prom-client";
|
||||
import { config } from "./config.js";
|
||||
import { peersConfigFilter } from "./filters.js";
|
||||
import { escapeRegex } from "./utils.js";
|
||||
|
||||
interface KeywordPattern {
|
||||
export interface RawKeywordPattern {
|
||||
name: string;
|
||||
pattern: string;
|
||||
word: boolean;
|
||||
}
|
||||
|
||||
export type RawKeywordLike = string | RawKeywordPattern;
|
||||
|
||||
export interface KeywordPattern {
|
||||
name: string;
|
||||
pattern: RegExp;
|
||||
}
|
||||
|
||||
export type KeywordLike = string | KeywordPattern;
|
||||
export function rawToPatterns(raw: RawKeywordLike[]): KeywordPattern[] {
|
||||
const patterns: KeywordPattern[] = [];
|
||||
for (const keyword of raw) {
|
||||
let pattern;
|
||||
let name;
|
||||
let addBorders = false;
|
||||
|
||||
export function newWordsCounter(dp: Dispatcher) {
|
||||
const counter = new Counter({
|
||||
name: "messenger_dialog_words_count",
|
||||
help: "Number of words in messages since exporter startup",
|
||||
labelNames: ["peerId", "word"],
|
||||
});
|
||||
dp.onNewMessage(peersConfigFilter(config), async (msg) => {
|
||||
const words = msg.text.toLowerCase().split(" ");
|
||||
for (const w of words) {
|
||||
counter.inc({
|
||||
peerId: msg.chat.id,
|
||||
word: w,
|
||||
});
|
||||
if (typeof keyword === "string") {
|
||||
pattern = escapeRegex(keyword);
|
||||
name = keyword;
|
||||
addBorders = true;
|
||||
} else {
|
||||
pattern = keyword.pattern;
|
||||
name = keyword.name;
|
||||
addBorders = keyword.word;
|
||||
}
|
||||
return PropagationAction.Continue;
|
||||
});
|
||||
return counter;
|
||||
|
||||
const wordBorder = escapeRegex("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~");
|
||||
const borderStart = addBorders ? `(?:[${wordBorder}\\s]|^)` : "";
|
||||
const borderEnd = addBorders ? `(?:[${wordBorder}\\s]|$)` : "";
|
||||
|
||||
patterns.push({
|
||||
name,
|
||||
pattern: new RegExp(borderStart + pattern + borderEnd),
|
||||
});
|
||||
}
|
||||
|
||||
return patterns;
|
||||
}
|
||||
|
||||
export class KeywordsCounter extends Counter {
|
||||
private _dp: Dispatcher;
|
||||
private _keywords: KeywordLike[];
|
||||
constructor(dp: Dispatcher, keywords: KeywordLike[] = []) {
|
||||
private _keywords: KeywordPattern[];
|
||||
constructor(dp: Dispatcher, keywords: KeywordPattern[] = []) {
|
||||
super({
|
||||
name: "messenger_dialog_keywords_count",
|
||||
help: "Number of keywords found in messages since exporter startup",
|
||||
|
|
@ -42,24 +60,18 @@ export class KeywordsCounter extends Counter {
|
|||
this._dp = dp;
|
||||
this._keywords = keywords;
|
||||
|
||||
dp.onNewMessage(peersConfigFilter(config), async (msg) => {
|
||||
this._dp.onNewMessage(peersConfigFilter(config), async (msg) => {
|
||||
for (const kw of this._keywords) {
|
||||
let count;
|
||||
let kwname;
|
||||
if (typeof kw === "string") {
|
||||
const words = msg.text.toLowerCase().split(" ");
|
||||
count = words.filter(w => w === kw).length;
|
||||
kwname = kw;
|
||||
} else {
|
||||
count = (msg.text.match(kw.pattern) || []).length;
|
||||
kwname = kw.name;
|
||||
}
|
||||
const count = (msg.text.match(kw.pattern) ?? []).length;
|
||||
|
||||
// this will prevent from flooding metrics with keywords that had never been triggered yet
|
||||
if (count === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
this.inc({
|
||||
peerId: msg.chat.id,
|
||||
keyword: kwname,
|
||||
keyword: kw.name,
|
||||
}, count);
|
||||
}
|
||||
return PropagationAction.Continue;
|
||||
|
|
@ -70,7 +82,7 @@ export class KeywordsCounter extends Counter {
|
|||
return this._keywords;
|
||||
}
|
||||
|
||||
public setKeywords(keywords: KeywordLike[]) {
|
||||
public setKeywords(keywords: KeywordPattern[]) {
|
||||
this._keywords = keywords;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import { collectDefaultMetrics, Registry } from "prom-client";
|
|||
|
||||
import { config, readKeywords } from "./config.js";
|
||||
import * as env from "./env.js";
|
||||
import { rawToPatterns } from "./keywords.js";
|
||||
import * as metrics from "./metrics.js";
|
||||
import MetricsServer from "./server.js";
|
||||
|
||||
|
|
@ -36,7 +37,7 @@ registry.registerMetric(metrics.newUnreadCountGauge(tg));
|
|||
registry.registerMetric(metrics.newMessagesCounter(dp));
|
||||
|
||||
if (config.keywords) {
|
||||
const counter = new metrics.KeywordsCounter(dp, config.keywords);
|
||||
const counter = new metrics.KeywordsCounter(dp, rawToPatterns(config.keywords));
|
||||
registry.registerMetric(counter);
|
||||
|
||||
if (config.watchFile) {
|
||||
|
|
@ -47,7 +48,7 @@ if (config.keywords) {
|
|||
console.log("[watch-file] Keywords file was updated. Re-reading keywords configuration...");
|
||||
try {
|
||||
config.keywords = await readKeywords(config.keywordsFile);
|
||||
counter.setKeywords(config.keywords);
|
||||
counter.setKeywords(rawToPatterns(config.keywords));
|
||||
} catch (e) {
|
||||
console.error("Failed to read keywords file", config.keywordsFile, e);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import { Counter, Gauge } from "prom-client";
|
|||
|
||||
import { config } from "./config.js";
|
||||
import { peersConfigBoolFilter, peersConfigFilter } from "./filters.js";
|
||||
import { KeywordsCounter, newWordsCounter } from "./keywords.js";
|
||||
import { KeywordsCounter } from "./keywords.js";
|
||||
|
||||
function newMessagesCounter(dp: Dispatcher) {
|
||||
const counter = new Counter({
|
||||
|
|
@ -67,6 +67,25 @@ function newUnreadCountGauge(tg: TelegramClient) {
|
|||
return gauge;
|
||||
}
|
||||
|
||||
function newWordsCounter(dp: Dispatcher) {
|
||||
const counter = new Counter({
|
||||
name: "messenger_dialog_words_count",
|
||||
help: "Number of words in messages since exporter startup",
|
||||
labelNames: ["peerId", "word"],
|
||||
});
|
||||
dp.onNewMessage(peersConfigFilter(config), async (msg) => {
|
||||
const words = msg.text.toLowerCase().split(" ");
|
||||
for (const w of words) {
|
||||
counter.inc({
|
||||
peerId: msg.chat.id,
|
||||
word: w,
|
||||
});
|
||||
}
|
||||
return PropagationAction.Continue;
|
||||
});
|
||||
return counter;
|
||||
}
|
||||
|
||||
export {
|
||||
KeywordsCounter,
|
||||
newMessagesCounter,
|
||||
|
|
|
|||
3
src/utils.ts
Normal file
3
src/utils.ts
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
export function escapeRegex(text: string) {
|
||||
return text.replace(/[/\-\\^$*+?.()|[\]{}]/g, "\\$&");
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue