add better handling of word patterns
This commit is contained in:
parent
69695e265a
commit
fb5fd57238
7 changed files with 99 additions and 46 deletions
|
|
@ -1,4 +1,19 @@
|
||||||
keywords:
|
keywords:
|
||||||
|
# will match just word 'meow' (requires word border on both sides)
|
||||||
- meow
|
- meow
|
||||||
|
|
||||||
|
# will match plain regex
|
||||||
- name: woof
|
- name: woof
|
||||||
pattern: 'w[oa]+f'
|
pattern: 'w[oa]+f'
|
||||||
|
|
||||||
|
# will match regex wraped with word borders
|
||||||
|
# will match 'hi, woof :3', 'woof!', 'i heard a woof' but not 'i like subwoofers'
|
||||||
|
- name: woof
|
||||||
|
pattern: 'w[oa]+f'
|
||||||
|
word: true
|
||||||
|
|
||||||
|
# will match any word starting with 'aqua' (aquarium, aquatic, aquaculture, etc...)
|
||||||
|
# requires word border on both sides too
|
||||||
|
- name: aqua
|
||||||
|
pattern: 'aqua.*?'
|
||||||
|
word: true
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
{
|
{
|
||||||
"name": "mtproto_exporter",
|
"name": "mtproto_exporter",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"version": "1.1.0",
|
"version": "1.2.0",
|
||||||
"packageManager": "pnpm@10.6.5",
|
"packageManager": "pnpm@10.6.5",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
import type { OptionDefinition } from "command-line-args";
|
import type { OptionDefinition } from "command-line-args";
|
||||||
import type { KeywordLike } from "./keywords.js";
|
import type { RawKeywordLike } from "./keywords.js";
|
||||||
import { readFile } from "node:fs/promises";
|
import { readFile } from "node:fs/promises";
|
||||||
import cmdline from "command-line-args";
|
import cmdline from "command-line-args";
|
||||||
import yaml from "js-yaml";
|
import yaml from "js-yaml";
|
||||||
|
|
@ -12,7 +12,7 @@ export interface Configuration {
|
||||||
watchFile: boolean;
|
watchFile: boolean;
|
||||||
includePeers?: number[];
|
includePeers?: number[];
|
||||||
excludePeers?: number[];
|
excludePeers?: number[];
|
||||||
keywords?: KeywordLike[];
|
keywords?: RawKeywordLike[];
|
||||||
}
|
}
|
||||||
|
|
||||||
const optionDefinitions: OptionDefinition[] = [
|
const optionDefinitions: OptionDefinition[] = [
|
||||||
|
|
@ -54,21 +54,24 @@ if (cli["exclude-peers"]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function readKeywords(filePath: string): Promise<KeywordLike[]> {
|
export async function readKeywords(filePath: string): Promise<RawKeywordLike[]> {
|
||||||
const doc = yaml.load(await readFile(filePath, "utf8")) as { keywords?: any[] };
|
const doc = yaml.load(await readFile(filePath, "utf8")) as { keywords?: any[] };
|
||||||
|
|
||||||
if (doc.keywords && doc.keywords.constructor.name === "Array") {
|
if (doc.keywords && doc.keywords.constructor.name === "Array") {
|
||||||
const keywords: KeywordLike[] = [];
|
const keywords: RawKeywordLike[] = [];
|
||||||
for (const item of doc.keywords) {
|
for (const item of doc.keywords) {
|
||||||
if (typeof item === "string") {
|
if (typeof item === "string") {
|
||||||
keywords.push(item);
|
keywords.push(item);
|
||||||
} else if (typeof item === "object" && item.name && item.pattern) {
|
} else if (typeof item === "object" && typeof item.name === "string") {
|
||||||
|
if (typeof item.pattern === "string") {
|
||||||
keywords.push({
|
keywords.push({
|
||||||
name: item.name,
|
name: item.name,
|
||||||
pattern: new RegExp(item.pattern, "gi"),
|
pattern: item.pattern,
|
||||||
|
word: Boolean(item.word ?? false),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return keywords;
|
return keywords;
|
||||||
} else {
|
} else {
|
||||||
throw new Error("Keywords file format error: no 'keywords' property, or not an array.");
|
throw new Error("Keywords file format error: no 'keywords' property, or not an array.");
|
||||||
|
|
|
||||||
|
|
@ -3,37 +3,55 @@ import { PropagationAction } from "@mtcute/dispatcher";
|
||||||
import { Counter } from "prom-client";
|
import { Counter } from "prom-client";
|
||||||
import { config } from "./config.js";
|
import { config } from "./config.js";
|
||||||
import { peersConfigFilter } from "./filters.js";
|
import { peersConfigFilter } from "./filters.js";
|
||||||
|
import { escapeRegex } from "./utils.js";
|
||||||
|
|
||||||
interface KeywordPattern {
|
export interface RawKeywordPattern {
|
||||||
|
name: string;
|
||||||
|
pattern: string;
|
||||||
|
word: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export type RawKeywordLike = string | RawKeywordPattern;
|
||||||
|
|
||||||
|
export interface KeywordPattern {
|
||||||
name: string;
|
name: string;
|
||||||
pattern: RegExp;
|
pattern: RegExp;
|
||||||
}
|
}
|
||||||
|
|
||||||
export type KeywordLike = string | KeywordPattern;
|
export function rawToPatterns(raw: RawKeywordLike[]): KeywordPattern[] {
|
||||||
|
const patterns: KeywordPattern[] = [];
|
||||||
|
for (const keyword of raw) {
|
||||||
|
let pattern;
|
||||||
|
let name;
|
||||||
|
let addBorders = false;
|
||||||
|
|
||||||
export function newWordsCounter(dp: Dispatcher) {
|
if (typeof keyword === "string") {
|
||||||
const counter = new Counter({
|
pattern = escapeRegex(keyword);
|
||||||
name: "messenger_dialog_words_count",
|
name = keyword;
|
||||||
help: "Number of words in messages since exporter startup",
|
addBorders = true;
|
||||||
labelNames: ["peerId", "word"],
|
} else {
|
||||||
});
|
pattern = keyword.pattern;
|
||||||
dp.onNewMessage(peersConfigFilter(config), async (msg) => {
|
name = keyword.name;
|
||||||
const words = msg.text.toLowerCase().split(" ");
|
addBorders = keyword.word;
|
||||||
for (const w of words) {
|
}
|
||||||
counter.inc({
|
|
||||||
peerId: msg.chat.id,
|
const wordBorder = escapeRegex("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~");
|
||||||
word: w,
|
const borderStart = addBorders ? `(?:[${wordBorder}\\s]|^)` : "";
|
||||||
|
const borderEnd = addBorders ? `(?:[${wordBorder}\\s]|$)` : "";
|
||||||
|
|
||||||
|
patterns.push({
|
||||||
|
name,
|
||||||
|
pattern: new RegExp(borderStart + pattern + borderEnd),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
return PropagationAction.Continue;
|
|
||||||
});
|
return patterns;
|
||||||
return counter;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export class KeywordsCounter extends Counter {
|
export class KeywordsCounter extends Counter {
|
||||||
private _dp: Dispatcher;
|
private _dp: Dispatcher;
|
||||||
private _keywords: KeywordLike[];
|
private _keywords: KeywordPattern[];
|
||||||
constructor(dp: Dispatcher, keywords: KeywordLike[] = []) {
|
constructor(dp: Dispatcher, keywords: KeywordPattern[] = []) {
|
||||||
super({
|
super({
|
||||||
name: "messenger_dialog_keywords_count",
|
name: "messenger_dialog_keywords_count",
|
||||||
help: "Number of keywords found in messages since exporter startup",
|
help: "Number of keywords found in messages since exporter startup",
|
||||||
|
|
@ -42,24 +60,18 @@ export class KeywordsCounter extends Counter {
|
||||||
this._dp = dp;
|
this._dp = dp;
|
||||||
this._keywords = keywords;
|
this._keywords = keywords;
|
||||||
|
|
||||||
dp.onNewMessage(peersConfigFilter(config), async (msg) => {
|
this._dp.onNewMessage(peersConfigFilter(config), async (msg) => {
|
||||||
for (const kw of this._keywords) {
|
for (const kw of this._keywords) {
|
||||||
let count;
|
const count = (msg.text.match(kw.pattern) ?? []).length;
|
||||||
let kwname;
|
|
||||||
if (typeof kw === "string") {
|
// this will prevent from flooding metrics with keywords that had never been triggered yet
|
||||||
const words = msg.text.toLowerCase().split(" ");
|
|
||||||
count = words.filter(w => w === kw).length;
|
|
||||||
kwname = kw;
|
|
||||||
} else {
|
|
||||||
count = (msg.text.match(kw.pattern) || []).length;
|
|
||||||
kwname = kw.name;
|
|
||||||
}
|
|
||||||
if (count === 0) {
|
if (count === 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.inc({
|
this.inc({
|
||||||
peerId: msg.chat.id,
|
peerId: msg.chat.id,
|
||||||
keyword: kwname,
|
keyword: kw.name,
|
||||||
}, count);
|
}, count);
|
||||||
}
|
}
|
||||||
return PropagationAction.Continue;
|
return PropagationAction.Continue;
|
||||||
|
|
@ -70,7 +82,7 @@ export class KeywordsCounter extends Counter {
|
||||||
return this._keywords;
|
return this._keywords;
|
||||||
}
|
}
|
||||||
|
|
||||||
public setKeywords(keywords: KeywordLike[]) {
|
public setKeywords(keywords: KeywordPattern[]) {
|
||||||
this._keywords = keywords;
|
this._keywords = keywords;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import { collectDefaultMetrics, Registry } from "prom-client";
|
||||||
|
|
||||||
import { config, readKeywords } from "./config.js";
|
import { config, readKeywords } from "./config.js";
|
||||||
import * as env from "./env.js";
|
import * as env from "./env.js";
|
||||||
|
import { rawToPatterns } from "./keywords.js";
|
||||||
import * as metrics from "./metrics.js";
|
import * as metrics from "./metrics.js";
|
||||||
import MetricsServer from "./server.js";
|
import MetricsServer from "./server.js";
|
||||||
|
|
||||||
|
|
@ -36,7 +37,7 @@ registry.registerMetric(metrics.newUnreadCountGauge(tg));
|
||||||
registry.registerMetric(metrics.newMessagesCounter(dp));
|
registry.registerMetric(metrics.newMessagesCounter(dp));
|
||||||
|
|
||||||
if (config.keywords) {
|
if (config.keywords) {
|
||||||
const counter = new metrics.KeywordsCounter(dp, config.keywords);
|
const counter = new metrics.KeywordsCounter(dp, rawToPatterns(config.keywords));
|
||||||
registry.registerMetric(counter);
|
registry.registerMetric(counter);
|
||||||
|
|
||||||
if (config.watchFile) {
|
if (config.watchFile) {
|
||||||
|
|
@ -47,7 +48,7 @@ if (config.keywords) {
|
||||||
console.log("[watch-file] Keywords file was updated. Re-reading keywords configuration...");
|
console.log("[watch-file] Keywords file was updated. Re-reading keywords configuration...");
|
||||||
try {
|
try {
|
||||||
config.keywords = await readKeywords(config.keywordsFile);
|
config.keywords = await readKeywords(config.keywordsFile);
|
||||||
counter.setKeywords(config.keywords);
|
counter.setKeywords(rawToPatterns(config.keywords));
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error("Failed to read keywords file", config.keywordsFile, e);
|
console.error("Failed to read keywords file", config.keywordsFile, e);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ import { Counter, Gauge } from "prom-client";
|
||||||
|
|
||||||
import { config } from "./config.js";
|
import { config } from "./config.js";
|
||||||
import { peersConfigBoolFilter, peersConfigFilter } from "./filters.js";
|
import { peersConfigBoolFilter, peersConfigFilter } from "./filters.js";
|
||||||
import { KeywordsCounter, newWordsCounter } from "./keywords.js";
|
import { KeywordsCounter } from "./keywords.js";
|
||||||
|
|
||||||
function newMessagesCounter(dp: Dispatcher) {
|
function newMessagesCounter(dp: Dispatcher) {
|
||||||
const counter = new Counter({
|
const counter = new Counter({
|
||||||
|
|
@ -67,6 +67,25 @@ function newUnreadCountGauge(tg: TelegramClient) {
|
||||||
return gauge;
|
return gauge;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function newWordsCounter(dp: Dispatcher) {
|
||||||
|
const counter = new Counter({
|
||||||
|
name: "messenger_dialog_words_count",
|
||||||
|
help: "Number of words in messages since exporter startup",
|
||||||
|
labelNames: ["peerId", "word"],
|
||||||
|
});
|
||||||
|
dp.onNewMessage(peersConfigFilter(config), async (msg) => {
|
||||||
|
const words = msg.text.toLowerCase().split(" ");
|
||||||
|
for (const w of words) {
|
||||||
|
counter.inc({
|
||||||
|
peerId: msg.chat.id,
|
||||||
|
word: w,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return PropagationAction.Continue;
|
||||||
|
});
|
||||||
|
return counter;
|
||||||
|
}
|
||||||
|
|
||||||
export {
|
export {
|
||||||
KeywordsCounter,
|
KeywordsCounter,
|
||||||
newMessagesCounter,
|
newMessagesCounter,
|
||||||
|
|
|
||||||
3
src/utils.ts
Normal file
3
src/utils.ts
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
export function escapeRegex(text: string) {
|
||||||
|
return text.replace(/[/\-\\^$*+?.()|[\]{}]/g, "\\$&");
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue