1
0
mirror of https://gitlab.com/MoonTestUse1/AdministrationItDepartmens.git synced 2025-08-14 00:25:46 +02:00

Все подряд

This commit is contained in:
MoonTestUse1
2024-12-31 02:37:57 +06:00
parent 8e53bb6cb2
commit d5780b2eab
3258 changed files with 1087440 additions and 268 deletions

View File

@@ -0,0 +1,6 @@
"""
Several extensions Python is missing, such as a proper class to handle a TCP
communication with support for cancelling the operation, and a utility class
to read arbitrary binary data in a more comfortable way, with int/strings/etc.
"""
from .binaryreader import BinaryReader

View File

@@ -0,0 +1,185 @@
"""
This module contains the BinaryReader utility class.
"""
import os
import time
from datetime import datetime, timezone, timedelta
from io import BytesIO
from struct import unpack
from ..errors import TypeNotFoundError
from ..tl.alltlobjects import tlobjects
from ..tl.core import core_objects
_EPOCH_NAIVE = datetime(*time.gmtime(0)[:6])
_EPOCH = _EPOCH_NAIVE.replace(tzinfo=timezone.utc)
class BinaryReader:
"""
Small utility class to read binary data.
"""
def __init__(self, data):
self.stream = BytesIO(data)
self._last = None # Should come in handy to spot -404 errors
# region Reading
# "All numbers are written as little endian."
# https://core.telegram.org/mtproto
def read_byte(self):
"""Reads a single byte value."""
return self.read(1)[0]
def read_int(self, signed=True):
"""Reads an integer (4 bytes) value."""
return int.from_bytes(self.read(4), byteorder='little', signed=signed)
def read_long(self, signed=True):
"""Reads a long integer (8 bytes) value."""
return int.from_bytes(self.read(8), byteorder='little', signed=signed)
def read_float(self):
"""Reads a real floating point (4 bytes) value."""
return unpack('<f', self.read(4))[0]
def read_double(self):
"""Reads a real floating point (8 bytes) value."""
return unpack('<d', self.read(8))[0]
def read_large_int(self, bits, signed=True):
"""Reads a n-bits long integer value."""
return int.from_bytes(
self.read(bits // 8), byteorder='little', signed=signed)
def read(self, length=-1):
"""Read the given amount of bytes, or -1 to read all remaining."""
result = self.stream.read(length)
if (length >= 0) and (len(result) != length):
raise BufferError(
'No more data left to read (need {}, got {}: {}); last read {}'
.format(length, len(result), repr(result), repr(self._last))
)
self._last = result
return result
def get_bytes(self):
"""Gets the byte array representing the current buffer as a whole."""
return self.stream.getvalue()
# endregion
# region Telegram custom reading
def tgread_bytes(self):
"""
Reads a Telegram-encoded byte array, without the need of
specifying its length.
"""
first_byte = self.read_byte()
if first_byte == 254:
length = self.read_byte() | (self.read_byte() << 8) | (
self.read_byte() << 16)
padding = length % 4
else:
length = first_byte
padding = (length + 1) % 4
data = self.read(length)
if padding > 0:
padding = 4 - padding
self.read(padding)
return data
def tgread_string(self):
"""Reads a Telegram-encoded string."""
return str(self.tgread_bytes(), encoding='utf-8', errors='replace')
def tgread_bool(self):
"""Reads a Telegram boolean value."""
value = self.read_int(signed=False)
if value == 0x997275b5: # boolTrue
return True
elif value == 0xbc799737: # boolFalse
return False
else:
raise RuntimeError('Invalid boolean code {}'.format(hex(value)))
def tgread_date(self):
"""Reads and converts Unix time (used by Telegram)
into a Python datetime object.
"""
value = self.read_int()
return _EPOCH + timedelta(seconds=value)
def tgread_object(self):
"""Reads a Telegram object."""
constructor_id = self.read_int(signed=False)
clazz = tlobjects.get(constructor_id, None)
if clazz is None:
# The class was None, but there's still a
# chance of it being a manually parsed value like bool!
value = constructor_id
if value == 0x997275b5: # boolTrue
return True
elif value == 0xbc799737: # boolFalse
return False
elif value == 0x1cb5c415: # Vector
return [self.tgread_object() for _ in range(self.read_int())]
clazz = core_objects.get(constructor_id, None)
if clazz is None:
# If there was still no luck, give up
self.seek(-4) # Go back
pos = self.tell_position()
error = TypeNotFoundError(constructor_id, self.read())
self.set_position(pos)
raise error
return clazz.from_reader(self)
def tgread_vector(self):
"""Reads a vector (a list) of Telegram objects."""
if 0x1cb5c415 != self.read_int(signed=False):
raise RuntimeError('Invalid constructor code, vector was expected')
count = self.read_int()
return [self.tgread_object() for _ in range(count)]
# endregion
def close(self):
"""Closes the reader, freeing the BytesIO stream."""
self.stream.close()
# region Position related
def tell_position(self):
"""Tells the current position on the stream."""
return self.stream.tell()
def set_position(self, position):
"""Sets the current position on the stream."""
self.stream.seek(position)
def seek(self, offset):
"""
Seeks the stream position given an offset from the current position.
The offset may be negative.
"""
self.stream.seek(offset, os.SEEK_CUR)
# endregion
# region with block
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
# endregion

View File

@@ -0,0 +1,195 @@
"""
Simple HTML -> Telegram entity parser.
"""
import struct
from collections import deque
from html import escape
from html.parser import HTMLParser
from typing import Iterable, Optional, Tuple, List
from ..helpers import add_surrogate, del_surrogate, within_surrogate, strip_text
from ..tl import TLObject
from ..tl.types import (
MessageEntityBold, MessageEntityItalic, MessageEntityCode,
MessageEntityPre, MessageEntityEmail, MessageEntityUrl,
MessageEntityTextUrl, MessageEntityMentionName,
MessageEntityUnderline, MessageEntityStrike, MessageEntityBlockquote,
TypeMessageEntity
)
class HTMLToTelegramParser(HTMLParser):
def __init__(self):
super().__init__()
self.text = ''
self.entities = []
self._building_entities = {}
self._open_tags = deque()
self._open_tags_meta = deque()
def handle_starttag(self, tag, attrs):
self._open_tags.appendleft(tag)
self._open_tags_meta.appendleft(None)
attrs = dict(attrs)
EntityType = None
args = {}
if tag == 'strong' or tag == 'b':
EntityType = MessageEntityBold
elif tag == 'em' or tag == 'i':
EntityType = MessageEntityItalic
elif tag == 'u':
EntityType = MessageEntityUnderline
elif tag == 'del' or tag == 's':
EntityType = MessageEntityStrike
elif tag == 'blockquote':
EntityType = MessageEntityBlockquote
elif tag == 'code':
try:
# If we're in the middle of a <pre> tag, this <code> tag is
# probably intended for syntax highlighting.
#
# Syntax highlighting is set with
# <code class='language-...'>codeblock</code>
# inside <pre> tags
pre = self._building_entities['pre']
try:
pre.language = attrs['class'][len('language-'):]
except KeyError:
pass
except KeyError:
EntityType = MessageEntityCode
elif tag == 'pre':
EntityType = MessageEntityPre
args['language'] = ''
elif tag == 'a':
try:
url = attrs['href']
except KeyError:
return
if url.startswith('mailto:'):
url = url[len('mailto:'):]
EntityType = MessageEntityEmail
else:
if self.get_starttag_text() == url:
EntityType = MessageEntityUrl
else:
EntityType = MessageEntityTextUrl
args['url'] = del_surrogate(url)
url = None
self._open_tags_meta.popleft()
self._open_tags_meta.appendleft(url)
if EntityType and tag not in self._building_entities:
self._building_entities[tag] = EntityType(
offset=len(self.text),
# The length will be determined when closing the tag.
length=0,
**args)
def handle_data(self, text):
previous_tag = self._open_tags[0] if len(self._open_tags) > 0 else ''
if previous_tag == 'a':
url = self._open_tags_meta[0]
if url:
text = url
for tag, entity in self._building_entities.items():
entity.length += len(text)
self.text += text
def handle_endtag(self, tag):
try:
self._open_tags.popleft()
self._open_tags_meta.popleft()
except IndexError:
pass
entity = self._building_entities.pop(tag, None)
if entity:
self.entities.append(entity)
def parse(html: str) -> Tuple[str, List[TypeMessageEntity]]:
"""
Parses the given HTML message and returns its stripped representation
plus a list of the MessageEntity's that were found.
:param html: the message with HTML to be parsed.
:return: a tuple consisting of (clean message, [message entities]).
"""
if not html:
return html, []
parser = HTMLToTelegramParser()
parser.feed(add_surrogate(html))
text = strip_text(parser.text, parser.entities)
parser.entities.reverse()
parser.entities.sort(key=lambda entity: entity.offset)
return del_surrogate(text), parser.entities
ENTITY_TO_FORMATTER = {
MessageEntityBold: ('<strong>', '</strong>'),
MessageEntityItalic: ('<em>', '</em>'),
MessageEntityCode: ('<code>', '</code>'),
MessageEntityUnderline: ('<u>', '</u>'),
MessageEntityStrike: ('<del>', '</del>'),
MessageEntityBlockquote: ('<blockquote>', '</blockquote>'),
MessageEntityPre: lambda e, _: (
"<pre>\n"
" <code class='language-{}'>\n"
" ".format(e.language), "{}\n"
" </code>\n"
"</pre>"
),
MessageEntityEmail: lambda _, t: ('<a href="mailto:{}">'.format(t), '</a>'),
MessageEntityUrl: lambda _, t: ('<a href="{}">'.format(t), '</a>'),
MessageEntityTextUrl: lambda e, _: ('<a href="{}">'.format(escape(e.url)), '</a>'),
MessageEntityMentionName: lambda e, _: ('<a href="tg://user?id={}">'.format(e.user_id), '</a>'),
}
def unparse(text: str, entities: Iterable[TypeMessageEntity]) -> str:
"""
Performs the reverse operation to .parse(), effectively returning HTML
given a normal text and its MessageEntity's.
:param text: the text to be reconverted into HTML.
:param entities: the MessageEntity's applied to the text.
:return: a HTML representation of the combination of both inputs.
"""
if not text:
return text
elif not entities:
return escape(text)
if isinstance(entities, TLObject):
entities = (entities,)
text = add_surrogate(text)
insert_at = []
for i, entity in enumerate(entities):
s = entity.offset
e = entity.offset + entity.length
delimiter = ENTITY_TO_FORMATTER.get(type(entity), None)
if delimiter:
if callable(delimiter):
delimiter = delimiter(entity, text[s:e])
insert_at.append((s, i, delimiter[0]))
insert_at.append((e, -i, delimiter[1]))
insert_at.sort(key=lambda t: (t[0], t[1]))
next_escape_bound = len(text)
while insert_at:
# Same logic as markdown.py
at, _, what = insert_at.pop()
while within_surrogate(text, at):
at += 1
text = text[:at] + what + escape(text[at:next_escape_bound]) + text[next_escape_bound:]
next_escape_bound = at
text = escape(text[:next_escape_bound]) + text[next_escape_bound:]
return del_surrogate(text)

View File

@@ -0,0 +1,197 @@
"""
Simple markdown parser which does not support nesting. Intended primarily
for use within the library, which attempts to handle emojies correctly,
since they seem to count as two characters and it's a bit strange.
"""
import re
import warnings
from ..helpers import add_surrogate, del_surrogate, within_surrogate, strip_text
from ..tl import TLObject
from ..tl.types import (
MessageEntityBold, MessageEntityItalic, MessageEntityCode,
MessageEntityPre, MessageEntityTextUrl, MessageEntityMentionName,
MessageEntityStrike
)
DEFAULT_DELIMITERS = {
'**': MessageEntityBold,
'__': MessageEntityItalic,
'~~': MessageEntityStrike,
'`': MessageEntityCode,
'```': MessageEntityPre
}
DEFAULT_URL_RE = re.compile(r'\[([^\]]+)\]\(([^)]+)\)')
DEFAULT_URL_FORMAT = '[{0}]({1})'
def overlap(a, b, x, y):
return max(a, x) < min(b, y)
def parse(message, delimiters=None, url_re=None):
"""
Parses the given markdown message and returns its stripped representation
plus a list of the MessageEntity's that were found.
:param message: the message with markdown-like syntax to be parsed.
:param delimiters: the delimiters to be used, {delimiter: type}.
:param url_re: the URL bytes regex to be used. Must have two groups.
:return: a tuple consisting of (clean message, [message entities]).
"""
if not message:
return message, []
if url_re is None:
url_re = DEFAULT_URL_RE
elif isinstance(url_re, str):
url_re = re.compile(url_re)
if not delimiters:
if delimiters is not None:
return message, []
delimiters = DEFAULT_DELIMITERS
# Build a regex to efficiently test all delimiters at once.
# Note that the largest delimiter should go first, we don't
# want ``` to be interpreted as a single back-tick in a code block.
delim_re = re.compile('|'.join('({})'.format(re.escape(k))
for k in sorted(delimiters, key=len, reverse=True)))
# Cannot use a for loop because we need to skip some indices
i = 0
result = []
# Work on byte level with the utf-16le encoding to get the offsets right.
# The offset will just be half the index we're at.
message = add_surrogate(message)
while i < len(message):
m = delim_re.match(message, pos=i)
# Did we find some delimiter here at `i`?
if m:
delim = next(filter(None, m.groups()))
# +1 to avoid matching right after (e.g. "****")
end = message.find(delim, i + len(delim) + 1)
# Did we find the earliest closing tag?
if end != -1:
# Remove the delimiter from the string
message = ''.join((
message[:i],
message[i + len(delim):end],
message[end + len(delim):]
))
# Check other affected entities
for ent in result:
# If the end is after our start, it is affected
if ent.offset + ent.length > i:
# If the old start is also before ours, it is fully enclosed
if ent.offset <= i:
ent.length -= len(delim) * 2
else:
ent.length -= len(delim)
# Append the found entity
ent = delimiters[delim]
if ent == MessageEntityPre:
result.append(ent(i, end - i - len(delim), '')) # has 'lang'
else:
result.append(ent(i, end - i - len(delim)))
# No nested entities inside code blocks
if ent in (MessageEntityCode, MessageEntityPre):
i = end - len(delim)
continue
elif url_re:
m = url_re.match(message, pos=i)
if m:
# Replace the whole match with only the inline URL text.
message = ''.join((
message[:m.start()],
m.group(1),
message[m.end():]
))
delim_size = m.end() - m.start() - len(m.group())
for ent in result:
# If the end is after our start, it is affected
if ent.offset + ent.length > m.start():
ent.length -= delim_size
result.append(MessageEntityTextUrl(
offset=m.start(), length=len(m.group(1)),
url=del_surrogate(m.group(2))
))
i += len(m.group(1))
continue
i += 1
message = strip_text(message, result)
return del_surrogate(message), result
def unparse(text, entities, delimiters=None, url_fmt=None):
"""
Performs the reverse operation to .parse(), effectively returning
markdown-like syntax given a normal text and its MessageEntity's.
:param text: the text to be reconverted into markdown.
:param entities: the MessageEntity's applied to the text.
:return: a markdown-like text representing the combination of both inputs.
"""
if not text or not entities:
return text
if not delimiters:
if delimiters is not None:
return text
delimiters = DEFAULT_DELIMITERS
if url_fmt is not None:
warnings.warn('url_fmt is deprecated') # since it complicates everything *a lot*
if isinstance(entities, TLObject):
entities = (entities,)
text = add_surrogate(text)
delimiters = {v: k for k, v in delimiters.items()}
insert_at = []
for i, entity in enumerate(entities):
s = entity.offset
e = entity.offset + entity.length
delimiter = delimiters.get(type(entity), None)
if delimiter:
insert_at.append((s, i, delimiter))
insert_at.append((e, -i, delimiter))
else:
url = None
if isinstance(entity, MessageEntityTextUrl):
url = entity.url
elif isinstance(entity, MessageEntityMentionName):
url = 'tg://user?id={}'.format(entity.user_id)
if url:
insert_at.append((s, i, '['))
insert_at.append((e, -i, ']({})'.format(url)))
insert_at.sort(key=lambda t: (t[0], t[1]))
while insert_at:
at, _, what = insert_at.pop()
# If we are in the middle of a surrogate nudge the position by -1.
# Otherwise we would end up with malformed text and fail to encode.
# For example of bad input: "Hi \ud83d\ude1c"
# https://en.wikipedia.org/wiki/UTF-16#U+010000_to_U+10FFFF
while within_surrogate(text, at):
at += 1
text = text[:at] + what + text[at:]
return del_surrogate(text)

View File

@@ -0,0 +1,111 @@
import asyncio
import collections
import io
import struct
from ..tl import TLRequest
from ..tl.core.messagecontainer import MessageContainer
from ..tl.core.tlmessage import TLMessage
class MessagePacker:
"""
This class packs `RequestState` as outgoing `TLMessages`.
The purpose of this class is to support putting N `RequestState` into a
queue, and then awaiting for "packed" `TLMessage` in the other end. The
simplest case would be ``State -> TLMessage`` (1-to-1 relationship) but
for efficiency purposes it's ``States -> Container`` (N-to-1).
This addresses several needs: outgoing messages will be smaller, so the
encryption and network overhead also is smaller. It's also a central
point where outgoing requests are put, and where ready-messages are get.
"""
def __init__(self, state, loggers):
self._state = state
self._deque = collections.deque()
self._ready = asyncio.Event()
self._log = loggers[__name__]
def append(self, state):
self._deque.append(state)
self._ready.set()
def extend(self, states):
self._deque.extend(states)
self._ready.set()
async def get(self):
"""
Returns (batch, data) if one or more items could be retrieved.
If the cancellation occurs or only invalid items were in the
queue, (None, None) will be returned instead.
"""
if not self._deque:
self._ready.clear()
await self._ready.wait()
buffer = io.BytesIO()
batch = []
size = 0
# Fill a new batch to return while the size is small enough,
# as long as we don't exceed the maximum length of messages.
while self._deque and len(batch) <= MessageContainer.MAXIMUM_LENGTH:
state = self._deque.popleft()
size += len(state.data) + TLMessage.SIZE_OVERHEAD
if size <= MessageContainer.MAXIMUM_SIZE:
state.msg_id = self._state.write_data_as_message(
buffer, state.data, isinstance(state.request, TLRequest),
after_id=state.after.msg_id if state.after else None
)
batch.append(state)
self._log.debug('Assigned msg_id = %d to %s (%x)',
state.msg_id, state.request.__class__.__name__,
id(state.request))
continue
if batch:
# Put the item back since it can't be sent in this batch
self._deque.appendleft(state)
break
# If a single message exceeds the maximum size, then the
# message payload cannot be sent. Telegram would forcibly
# close the connection; message would never be confirmed.
#
# We don't put the item back because it can never be sent.
# If we did, we would loop again and reach this same path.
# Setting the exception twice results in `InvalidStateError`
# and this method should never return with error, which we
# really want to avoid.
self._log.warning(
'Message payload for %s is too long (%d) and cannot be sent',
state.request.__class__.__name__, len(state.data)
)
state.future.set_exception(
ValueError('Request payload is too big'))
size = 0
continue
if not batch:
return None, None
if len(batch) > 1:
# Inlined code to pack several messages into a container
data = struct.pack(
'<Ii', MessageContainer.CONSTRUCTOR_ID, len(batch)
) + buffer.getvalue()
buffer = io.BytesIO()
container_id = self._state.write_data_as_message(
buffer, data, content_related=False
)
for s in batch:
s.container_id = container_id
data = buffer.getvalue()
return batch, data