utils.py

← Back to explorer
utils.py
#! /usr/bin/python3
# Created by AG on 09-09-2025

import re
import click
from dataclasses import dataclass
from typing import Literal, Dict, Optional, Tuple

##############################################################################
# START_SEQUENCE = re.compile(r"^\s*#\s*DOCSTRING\s*START\s*$", re.IGNORECASE)
# END_SEQUENCE = re.compile(r"^\s*#\s*DOCSTRING\s*END\s*$", re.IGNORECASE)
START_SEQUENCE = re.compile(
    r"^\s*(?:#|//|/\*)\s*DOCSTRING\s*START\s*(?:\*/)?\s*$",
    re.IGNORECASE,
)
END_SEQUENCE = re.compile(
    r"^\s*(?:#|//|/\*)\s*DOCSTRING\s*END\s*(?:\*/)?\s*$",
    re.IGNORECASE,
)

FUNCTION_SEQUENCE = re.compile(r"^\s*(async\s+def|def)\s+\w+\s*\(.*\)\s*:\s*$")
PYTHON_DOCSTRING_SEQUENCE = re.compile(r'^\s*(?P<q>["\']{3})')
FUNCTION_SEQUENCE_START = re.compile(
    r'^\s*(async\s+def|def)\s+[A-Za-z_][A-Za-z0-9_]*\s*\('
)
FUNCTION_SEQUENCE_END = re.compile(r'\)\s*(->\s*[^:]+)?\s*:\s*(#.*)?$')
MULTILANG_COMMENT_OPEN = re.compile(r"^\s*/\*\*?")
MULTILANG_COMMENT_CLOSE = re.compile(r"\*/")
##############################################################################


@dataclass(frozen=True)
class LanguageCommentStyling:
    opening_comment: str
    line_prefix: str
    closing_comment: str
    comment_positioning: Literal["after_braces", "before_function_definition"]


per_language_comment_styling: Dict[str, LanguageCommentStyling] = {
    "c":     LanguageCommentStyling("/*", " * ", " */", "after_braces"),
    "cpp":   LanguageCommentStyling("/*", " * ", " */", "after_braces"),
    "java":  LanguageCommentStyling("/*", " * ", " */", "after_braces"),
    "js":    LanguageCommentStyling("/*", " * ", " */", "after_braces"),
    "ts":    LanguageCommentStyling("/*", " * ", " */", "after_braces"),
    "rust":  LanguageCommentStyling("", "/// ", "", "before_function_definition"),
}


def find_sequence_pairs(file_lines: str):
    current_line: int = 0
    while current_line < len(file_lines):
        if START_SEQUENCE.match(file_lines[current_line]):
            end_sequence_line: int = current_line + 1
            while end_sequence_line < len(file_lines) and not \
                    END_SEQUENCE.match(file_lines[end_sequence_line]):
                end_sequence_line += 1
            if end_sequence_line >= len(file_lines):
                click.echo(
                    "START sequence has no matching END sequence!", err=True)
                break
            yield (current_line, end_sequence_line)
            current_line = end_sequence_line + 1
        else:
            current_line += 1


def function_line(
    file_lines: str,
    start_line: int,
    end_line: int
) -> None | int:
    # for index in range(start_line, end_line + 1):
    #     if FUNCTION_SEQUENCE.match(file_lines[index]):
    #         return index
    # return None

    while start_line <= end_line:
        line = file_lines[start_line]
        if FUNCTION_SEQUENCE.match(line):
            return start_line

        if FUNCTION_SEQUENCE_START.match(line):
            paren_balance = line.count('(') - line.count(')')
            j = start_line
            while j <= end_line:
                if j > start_line:
                    paren_balance += file_lines[j].count(
                        '(') - file_lines[j].count(')')
                if paren_balance <= 0 and FUNCTION_SEQUENCE_END.search(file_lines[j]):
                    return j
                j += 1
            return None
        start_line += 1
    return None


def block_has_existing_docstring(
    file_lines: str,
    insert_at: int
) -> bool:
    while insert_at < len(file_lines) and \
            (file_lines[insert_at].strip() == "" or
             file_lines[insert_at].lstrip().startswith("#")):
        insert_at += 1

    if insert_at < len(file_lines) and \
            PYTHON_DOCSTRING_SEQUENCE.match(file_lines[insert_at].lstrip()):
        return True

    return False


def multilang_block_has_existing_docstring(
        file_lines: list[str],
        after_brace_line: int
) -> bool:

    index = after_brace_line + 1

    while index < len(file_lines) and (
        file_lines[index].strip() == "" or
        file_lines[index].lstrip().startswith("//")
    ):
        index += 1

    return (
        index < len(file_lines)
        and MULTILANG_COMMENT_OPEN.match(file_lines[index].lstrip())
        is not None
    )


def multilang_strip_existing_docstring(
        file_lines: list[str],
        after_brace_line: int
) -> bool:

    index = after_brace_line + 1

    while index < len(file_lines) and (
        file_lines[index].strip() == ""
        or file_lines[index].lstrip().startswith("//")
    ):
        index += 1

    if index < len(file_lines) and \
            MULTILANG_COMMENT_OPEN.match(file_lines[index].lstrip()):
        j = index + 1

        while j < len(file_lines) and not\
                MULTILANG_COMMENT_CLOSE.search(file_lines[j]):
            j += 1

        if j < len(file_lines):
            del file_lines[index: j + 1]
            return True

    return False


def make_indent_after_function(file_lines: str, function_line: int) -> int:
    function_indent = len(
        file_lines[function_line]) - len(file_lines[function_line].lstrip(" "))
    current_line = function_line + 1
    while current_line < len(file_lines) and \
            file_lines[current_line].strip() == "":
        current_line += 1

    if current_line < len(file_lines):
        indent = len(file_lines[current_line]) - \
            len(file_lines[current_line].lstrip(" "))
        if indent > function_indent:
            return indent
    return function_indent + 4


def insert_docstring_in_function(
    file_lines: str,
    function_index: int,
    docstring: str
) -> int:
    indent_space = make_indent_after_function(
        file_lines=file_lines,
        function_line=function_index
    )
    indent = " " * indent_space

    if not docstring.endswith("\n"):
        docstring += "\n"
    docstring += "\nGenerated by DocNerd"

    code_block: list = [
        f"{indent}\"\"\"\n",
        * [
            (indent + line + ("\n" if not line.endswith("\n") else ""))
            for line in docstring.splitlines()
        ],
        "\n" if not docstring.endswith("\n") else "",
        f"{indent}\"\"\"\n",
    ]

    file_lines[function_index + 1: function_index + 1] = code_block
    return len(code_block)


def strip_existing_docstring(file_lines, function_index):

    insert_at: int = function_index + 1
    while insert_at < len(file_lines) and (file_lines[insert_at].strip() == ""
                                           or file_lines[insert_at].lstrip().startswith("#")):
        insert_at += 1
    if insert_at < len(file_lines) and \
            PYTHON_DOCSTRING_SEQUENCE.match(file_lines[insert_at].lstrip()):
        quote = PYTHON_DOCSTRING_SEQUENCE.match(
            file_lines[insert_at].lstrip()).group("q")
        j = insert_at + 1
        while j < len(file_lines):
            if file_lines[j].lstrip().startswith(quote):
                break
            j += 1
        if j < len(file_lines):
            del file_lines[insert_at: j + 1]
            return True
    return False


def remove_docstring_sequences(file_path: str):
    with open(file_path, "r", encoding="utf-8") as file:
        file_lines = file.readlines()

    new_lines = [
        line for line in file_lines
        if not (START_SEQUENCE.match(line) or END_SEQUENCE.match(line))
    ]

    with open(file_path, "w", encoding="utf-8") as file:
        file.writelines(new_lines)


def strip_comments(
    string: str,
    in_block_comment: bool
) -> Tuple[str, bool]:
    index: int = 0
    out: list = []

    while index < len(string):
        if in_block_comment:
            if index + 1 < len(string) and string[index] == "*" and \
                    string[index + 1] == "/":
                in_block_comment = False
                index += 2
            else:
                index += 2
            continue

        if index + 1 < len(string) and string[index] == "/" and \
                string[index + 1] == "/":
            break

        if index + 1 < len(string) and string[index] == "/" and \
                string[index + 1] == "*":
            in_block_comment = True
            index += 2
            continue

        out.append(string[index])
        index += 1

    return "".join(out), in_block_comment


def find_open_brace_line_index(
    file_lines: list[str],
    start_line: int,
    end_line: int
) -> Optional[int]:
    in_block_comment = False
    while start_line < end_line:
        code_block, in_block_comment = strip_comments(
            file_lines[start_line],
            in_block_comment
        )

        if "{" in code_block:
            return start_line
        start_line += 1
    return None


def insert_documentation_block(
    file_lines: list[str],
    language: str,
    function_index: int,
    docstring: str
) -> int:
    style = per_language_comment_styling.get(language)
    if style is None:
        return 0

    if not docstring.endswith("\n"):
        docstring += "\n"
    docstring += "\nGenerated by DocNerd"

    if style.comment_positioning == "after_braces":
        brace_line = find_open_brace_line_index(
            file_lines=file_lines,
            start_line=function_index,
            end_line=len(file_lines) - 1
        )
        if brace_line is None:
            return 0
        indent = " " * 4

        block: int = []
        if style.opening_comment:
            block.append(f"{indent}{style.opening_comment}\n")

        for ln in docstring.splitlines():
            block.append(f"{indent}{style.line_prefix}{ln.rstrip()}\n")

        if style.closing_comment:
            block.append(f"{indent}{style.closing_comment}\n")

        file_lines[brace_line + 1: brace_line + 1] = block
        return len(block)

    function_indent = len(file_lines[function_index]) - \
        len(file_lines[function_index].lstrip())
    indent = " " * function_indent
    block = [f"{indent}{style.line_prefix}{
        ln.rstrip()}\n" for ln in docstring.splitlines()]
    file_lines[function_index: function_index] = block
    return len(block)


def multilang_function_line(
        file_lines: list[str],
        start_line: int,
        end_line: int
) -> int | None:

    while start_line <= end_line:
        line = file_lines[start_line]

        if not line.strip() or line.lstrip().startswith(('#', '//')):
            start_line += 1
            continue

        if '(' in line:
            parenthesis_balance = line.count('(') - line.count(')')
            j = start_line
            while j <= end_line:
                if j > start_line:
                    parenthesis_balance += file_lines[j].count(
                        '(') - file_lines[j].count(')')
                if parenthesis_balance <= 0:
                    comment_end_line = j
                    k = comment_end_line
                    while k <= end_line:
                        texts = file_lines[k].strip()
                        if not texts or texts.startswith('//'):
                            k += 1
                            continue
                        if '{' in texts:
                            return comment_end_line
                        if ';' in texts:
                            break
                        k += 1
                    break
                j += 1
        start_line += 1
    return None