debputy.lsp.lsp_generic_deb822

src/debputy/lsp/lsp_generic_deb822.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995	import re from itertools import chain from typing import ( Optional, Union, Tuple, Any, List, cast, TYPE_CHECKING, ) from collections.abc import Sequence, Container, Iterable, Iterator, Callable from debputy.linting.lint_util import ( LintState, te_position_to_lsp, with_range_in_continuous_parts, ) from debputy.lsp.config.config_options import DCO_SPELLCHECK_COMMENTS from debputy.lsp.debputy_ls import DebputyLanguageServer from debputy.lsp.lsp_debian_control_reference_data import ( Deb822FileMetadata, Deb822KnownField, StanzaMetadata, F, S, SUBSTVAR_RE, _DEP_RELATION_CLAUSE, ) from debputy.lsp.lsp_features import SEMANTIC_TOKEN_TYPES_IDS from debputy.lsp.quickfixes import propose_correct_text_quick_fix from debputy.lsp.text_util import ( trim_end_of_line_whitespace, SemanticTokensState, ) from debputy.lsp.vendoring._deb822_repro.locatable import ( START_POSITION, Range as TERange, Position as TEPosition, ) from debputy.lsp.vendoring._deb822_repro.parsing import ( Deb822KeyValuePairElement, Deb822ParagraphElement, Deb822FileElement, Deb822CommentElement, Deb822ParsedTokenList, Interpretation, ) from debputy.lsp.vendoring._deb822_repro.tokens import tokenize_deb822_file, Deb822Token from debputy.lsp.vendoring._deb822_repro.types import TokenOrElement from debputy.lsprotocol.types import ( CompletionParams, CompletionList, CompletionItem, Position, MarkupContent, Hover, MarkupKind, HoverParams, FoldingRangeParams, FoldingRange, FoldingRangeKind, SemanticTokensParams, SemanticTokens, TextEdit, MessageType, SemanticTokenTypes, ) from debputy.util import _info, _warn if TYPE_CHECKING: import lsprotocol.types as types else: import debputy.lsprotocol.types as types try: from pygls.server import LanguageServer from pygls.workspace import TextDocument except ImportError: pass _CONTAINS_SPACE_OR_COLON = re.compile(r"[\s:]") def in_range( te_range: TERange, cursor_position: Position, , inclusive_end: bool = False, ) -> bool: cursor_line = cursor_position.line start_pos = te_range.start_pos end_pos = te_range.end_pos if cursor_line < start_pos.line_position or cursor_line > end_pos.line_position: return False if start_pos.line_position == end_pos.line_position: start_col = start_pos.cursor_position cursor_col = cursor_position.character end_col = end_pos.cursor_position if inclusive_end: return start_col <= cursor_col <= end_col return start_col <= cursor_col < end_col if cursor_line == end_pos.line_position: return cursor_position.character < end_pos.cursor_position return ( cursor_line > start_pos.line_position or start_pos.cursor_position <= cursor_position.character ) def _field_at_position( stanza: Deb822ParagraphElement, stanza_metadata: S, stanza_range: TERange, position: Position, ) -> tuple[Deb822KeyValuePairElement \| None, F \| None, str, bool]: te_range = TERange(stanza_range.start_pos, stanza_range.start_pos) for token_or_element in stanza.iter_parts(): te_range = token_or_element.size().relative_to(te_range.end_pos) if not in_range(te_range, position): continue if isinstance(token_or_element, Deb822KeyValuePairElement): value_range = token_or_element.value_element.range_in_parent().relative_to( te_range.start_pos ) known_field = stanza_metadata.get(token_or_element.field_name) in_value = in_range(value_range, position) interpreter = ( known_field.field_value_class.interpreter() if known_field is not None else None ) matched_value = "" if in_value and interpreter is not None: interpreted = token_or_element.interpret_as(interpreter) for value_ref in interpreted.iter_value_references(): value_token_range = ( value_ref.locatable.range_in_parent().relative_to( value_range.start_pos ) ) if in_range(value_token_range, position, inclusive_end=True): matched_value = value_ref.value break return token_or_element, known_field, matched_value, in_value return None, None, "", False def _allow_stanza_continuation( token_or_element: TokenOrElement, is_completion: bool, ) -> bool: if not is_completion: return False if token_or_element.is_error or token_or_element.is_comment: return True return ( token_or_element.is_whitespace and token_or_element.convert_to_text().count("\n") < 2 ) def _at_cursor( deb822_file: Deb822FileElement, file_metadata: Deb822FileMetadata[S, F], doc: "TextDocument", lines: list[str], client_position: Position, is_completion: bool = False, ) -> tuple[ Position, str \| None, str, bool, S \| None, F \| None, Iterable[Deb822ParagraphElement], ]: server_position = doc.position_codec.position_from_client_units( lines, client_position, ) te_range = TERange( START_POSITION, START_POSITION, ) paragraph_no = -1 previous_stanza: Deb822ParagraphElement \| None = None next_stanza: Deb822ParagraphElement \| None = None current_word = doc.word_at_position(client_position) in_value: bool = False file_iter = iter(deb822_file.iter_parts()) matched_token: TokenOrElement \| None = None matched_field: str \| None = None stanza_metadata: S \| None = None known_field: F \| None = None for token_or_element in file_iter: te_range = token_or_element.size().relative_to(te_range.end_pos) if isinstance(token_or_element, Deb822ParagraphElement): previous_stanza = token_or_element paragraph_no += 1 elif not _allow_stanza_continuation(token_or_element, is_completion): previous_stanza = None if not in_range(te_range, server_position): continue matched_token = token_or_element if isinstance(token_or_element, Deb822ParagraphElement): stanza_metadata = file_metadata.guess_stanza_classification_by_idx( paragraph_no ) kvpair, known_field, current_word, in_value = _field_at_position( token_or_element, stanza_metadata, te_range, server_position, ) if kvpair is not None: matched_field = kvpair.field_name break if matched_token is not None and _allow_stanza_continuation( matched_token, is_completion, ): next_te = next(file_iter, None) if isinstance(next_te, Deb822ParagraphElement): next_stanza = next_te stanza_parts = (p for p in (previous_stanza, next_stanza) if p is not None) if stanza_metadata is None and is_completion: if paragraph_no < 0: paragraph_no = 0 stanza_metadata = file_metadata.guess_stanza_classification_by_idx(paragraph_no) return ( server_position, matched_field, current_word, in_value, stanza_metadata, known_field, stanza_parts, ) def deb822_completer( ls: "DebputyLanguageServer", params: CompletionParams, file_metadata: Deb822FileMetadata[Any, Any], ) -> CompletionList \| Sequence[CompletionItem] \| None: doc = ls.workspace.get_text_document(params.text_document.uri) lines = doc.lines lint_state = ls.lint_state(doc) deb822_file = lint_state.parsed_deb822_file_content if not file_metadata.file_metadata_applies_to_file(deb822_file): return None ( server_pos, current_field, word_at_position, in_value, stanza_metadata, known_field, matched_stanzas, ) = _at_cursor( deb822_file, file_metadata, doc, lines, params.position, is_completion=True, ) if lines[server_pos.line].startswith("#"): return items: Sequence[CompletionItem] \| None markdown_kind = ls.completion_item_document_markup( MarkupKind.Markdown, MarkupKind.PlainText ) if in_value: _info(f"Completion for field value {current_field} -- {word_at_position}") if known_field is None: return None value_being_completed = word_at_position items = known_field.value_options_for_completer( lint_state, list(matched_stanzas), value_being_completed, markdown_kind, ) else: _info("Completing field name") assert stanza_metadata is not None items = _complete_field_name( lint_state, stanza_metadata, matched_stanzas, markdown_kind, ) return items def deb822_hover( ls: "DebputyLanguageServer", params: HoverParams, file_metadata: Deb822FileMetadata[S, F], , custom_handler: None \| ( Callable[ [ "DebputyLanguageServer", Position, str \| None, str, F \| None, bool, "TextDocument", list[str], ], Hover \| None, ] ) = None, ) -> Hover \| None: doc = ls.workspace.get_text_document(params.text_document.uri) deb822_file = ls.lint_state(doc).parsed_deb822_file_content if not file_metadata.file_metadata_applies_to_file(deb822_file): return None lines = doc.lines ( server_pos, current_field, word_at_position, in_value, _, known_field, _, ) = _at_cursor( deb822_file, file_metadata, doc, lines, params.position, ) if lines[server_pos.line].startswith("#"): return hover_text = None if custom_handler is not None: res = custom_handler( ls, server_pos, current_field, word_at_position, known_field, in_value, doc, lines, ) if isinstance(res, Hover): return res hover_text = res if hover_text is None: if current_field is None: _info("No hover information as we cannot determine which field it is for") return None if known_field is None: return None if in_value: if not known_field.known_values: return None keyword = known_field.known_values.get(word_at_position) if keyword is None: return None hover_text = keyword.long_description_translated(ls) if hover_text is not None: header = "`{VALUE}` (Field: {FIELD_NAME})".format( VALUE=keyword.value, FIELD_NAME=known_field.name, ) hover_text = f"# {header}\n\n{hover_text}" else: hover_text = known_field.long_description_translated(ls) if hover_text is None: hover_text = ( f"No documentation is available for the field {current_field}." ) hover_text = f"# {known_field.name}\n\n{hover_text}" if hover_text is None: return None return Hover( contents=MarkupContent( kind=ls.hover_markup_format(MarkupKind.Markdown, MarkupKind.PlainText), value=hover_text, ) ) def deb822_token_iter( tokens: Iterable[Deb822Token], ) -> Iterator[tuple[Deb822Token, int, int, int, int]]: line_no = 0 line_offset = 0 for token in tokens: start_line = line_no start_line_offset = line_offset newlines = token.text.count("\n") line_no += newlines text_len = len(token.text) if newlines: if token.text.endswith("\n"): line_offset = 0 else: # -2, one to remove the "\n" and one to get 0-offset line_offset = text_len - token.text.rindex("\n") - 2 else: line_offset += text_len yield token, start_line, start_line_offset, line_no, line_offset def deb822_folding_ranges( ls: "DebputyLanguageServer", params: FoldingRangeParams, file_metadata: Deb822FileMetadata[Any, Any], ) -> Sequence[FoldingRange] \| None: doc = ls.workspace.get_text_document(params.text_document.uri) deb822_file = ls.lint_state(doc).parsed_deb822_file_content if not file_metadata.file_metadata_applies_to_file(deb822_file): return None comment_start = -1 folding_ranges = [] for ( token, start_line, start_offset, end_line, end_offset, ) in deb822_token_iter(deb822_file.iter_tokens()): if token.is_comment: if comment_start < 0: comment_start = start_line elif comment_start > -1: comment_start = -1 folding_range = FoldingRange( comment_start, end_line, kind=FoldingRangeKind.Comment, ) folding_ranges.append(folding_range) return folding_ranges class Deb822SemanticTokensState(SemanticTokensState): __slots__ = ( "file_metadata", "keyword_token_code", "known_value_token_code", "comment_token_code", "substvars_token_code", "operator_token_code", "relation_restriction_token_code", "package_token_code", "allow_overlapping_tokens", ) def __init__( self, ls: "DebputyLanguageServer", doc: "TextDocument", lines: list[str], tokens: list[int], file_metadata: Deb822FileMetadata[Any, Any], ) -> None: super().__init__(ls, doc, lines, tokens) self.file_metadata = file_metadata self.keyword_token_code = SEMANTIC_TOKEN_TYPES_IDS[SemanticTokenTypes.Keyword] self.known_value_token_code = SEMANTIC_TOKEN_TYPES_IDS[ SemanticTokenTypes.EnumMember ] self.comment_token_code = SEMANTIC_TOKEN_TYPES_IDS[ SemanticTokenTypes.Comment.value ] self.substvars_token_code = SEMANTIC_TOKEN_TYPES_IDS[SemanticTokenTypes.Macro] self.operator_token_code = SEMANTIC_TOKEN_TYPES_IDS[SemanticTokenTypes.Operator] self.relation_restriction_token_code = SEMANTIC_TOKEN_TYPES_IDS[ SemanticTokenTypes.TypeParameter ] self.package_token_code = SEMANTIC_TOKEN_TYPES_IDS[SemanticTokenTypes.Variable] def _emit_tokens_for_comment_element( sem_token_state: Deb822SemanticTokensState, comment_element: Deb822CommentElement, comment_start_line: int, comment_token_code: int, ) -> None: for comment_line_no, comment_token in enumerate( comment_element.iter_parts(), start=comment_start_line, ): assert comment_token.is_comment assert isinstance(comment_token, Deb822Token) sem_token_state.emit_token( Position(comment_line_no, 0), len(comment_token.text.rstrip()), comment_token_code, ) async def scan_for_syntax_errors_and_token_level_diagnostics( deb822_file: Deb822FileElement, lint_state: LintState, ) -> int: first_error = len(lint_state.lines) + 1 spell_checker = lint_state.spellchecker() async for ( token, start_line, start_offset, end_line, end_offset, ) in lint_state.slow_iter(deb822_token_iter(deb822_file.iter_tokens())): if token.is_error: first_error = min(first_error, start_line) token_range = TERange( TEPosition( start_line, start_offset, ), TEPosition( end_line, end_offset, ), ) lint_state.emit_diagnostic( token_range, "Syntax error", "error", "debputy", ) elif token.is_comment: if not lint_state.debputy_config.config_value(DCO_SPELLCHECK_COMMENTS): continue for word, col_pos, end_col_pos in spell_checker.iter_words(token.text): corrections = spell_checker.provide_corrections_for(word) if not corrections: continue word_range = TERange.between( TEPosition( start_line, col_pos, ), TEPosition( start_line, end_col_pos, ), ) lint_state.emit_diagnostic( word_range, f'Spelling "{word}"', "spelling", "debputy", quickfixes=[propose_correct_text_quick_fix(c) for c in corrections], enable_non_interactive_auto_fix=False, ) return first_error def _emit_relation_token( sem_token_state: Deb822SemanticTokensState, token_code: int \| None, m: re.Match[str], group_name: str, value_range_te: TERange, ) -> None: token_value = m.group(group_name) token_start = m.start(group_name) if token_value is None or token_start is None: return pos = TEPosition( value_range_te.start_pos.line_position, value_range_te.start_pos.cursor_position + token_start, ) end_pos = TEPosition( pos.line_position, pos.cursor_position + len(token_value), ) _process_value_with_substvars( sem_token_state, token_value, TERange.between(pos, end_pos), token_code, ) async def _deb822_relationship_field_semantic_tokens_full( sem_token_state: Deb822SemanticTokensState, interpretation: Interpretation[Deb822ParsedTokenList[Any, Any]], kvpair: Deb822KeyValuePairElement, value_element_pos: TEPosition, ) -> None: doc = sem_token_state.doc parts = kvpair.interpret_as(interpretation).iter_parts() comment_token_code = sem_token_state.comment_token_code operator_token_code = sem_token_state.operator_token_code relation_restriction_token_code = sem_token_state.relation_restriction_token_code package_token_code = sem_token_state.package_token_code for te in parts: if te.is_whitespace: continue if te.is_separator: continue value_range_in_parent_te = te.range_in_parent() value_range_te = value_range_in_parent_te.relative_to(value_element_pos) value = te.convert_to_text() if te.is_comment: token_type = comment_token_code value = value.rstrip() value_len = doc.position_codec.client_num_units(value) sem_token_state.emit_token( te_position_to_lsp(value_range_te.start_pos), value_len, token_type, ) else: m = _DEP_RELATION_CLAUSE.fullmatch(value) _emit_relation_token( sem_token_state, package_token_code, m, "name_arch_qual", value_range_te, ) _emit_relation_token( sem_token_state, operator_token_code, m, "operator", value_range_te, ) _emit_relation_token( sem_token_state, None, m, "version", value_range_te, ) _emit_relation_token( sem_token_state, relation_restriction_token_code, m, "arch_restriction", value_range_te, ) _emit_relation_token( sem_token_state, relation_restriction_token_code, m, "build_profile_restriction", value_range_te, ) async def _deb822_paragraph_semantic_tokens_full( ls: "DebputyLanguageServer", sem_token_state: Deb822SemanticTokensState, stanza: Deb822ParagraphElement, stanza_range_in_file: "TERange", stanza_idx: int, ) -> None: doc = sem_token_state.doc keyword_token_code = sem_token_state.keyword_token_code known_value_token_code = sem_token_state.known_value_token_code comment_token_code = sem_token_state.comment_token_code stanza_position = stanza_range_in_file.start_pos stanza_metadata = sem_token_state.file_metadata.classify_stanza( stanza, stanza_idx=stanza_idx, ) async for kvpair_range, kvpair in ls.slow_iter( with_range_in_continuous_parts( stanza.iter_parts(), start_relative_to=stanza_position, ), yield_every=25, ): if not isinstance(kvpair, Deb822KeyValuePairElement): continue kvpair_position = kvpair_range.start_pos field_start = kvpair.field_token.position_in_parent().relative_to( kvpair_position ) comment = kvpair.comment_element if comment: comment_start_line = field_start.line_position - len(comment) _emit_tokens_for_comment_element( sem_token_state, comment, comment_start_line, comment_token_code, ) field_size = doc.position_codec.client_num_units(kvpair.field_name) sem_token_state.emit_token( te_position_to_lsp(field_start), field_size, keyword_token_code, ) known_field: Deb822KnownField \| None = stanza_metadata.get(kvpair.field_name) value_element_pos = kvpair.value_element.position_in_parent().relative_to( kvpair_position ) if known_field is not None: if known_field.spellcheck_value: continue interpretation = known_field.field_value_class.interpreter() if ( getattr(known_field, "is_relationship_field", False) and interpretation is not None ): await _deb822_relationship_field_semantic_tokens_full( sem_token_state, interpretation, kvpair, value_element_pos, ) continue known_values: Container[str] = known_field.known_values or frozenset() field_disallows_substvars = ( known_field.is_substvars_disabled_even_if_allowed_by_stanza ) allow_substvars = ( stanza_metadata.is_substvars_allowed_in_stanza and not field_disallows_substvars ) else: known_values = frozenset() interpretation = None allow_substvars = stanza_metadata.is_substvars_allowed_in_stanza if interpretation is None: for value_line in kvpair.value_element.value_lines: comment_element = value_line.comment_element if comment_element: assert comment_element.position_in_parent().line_position == 0 comment_start_line = ( value_line.position_in_parent() .relative_to(value_element_pos) .line_position ) _emit_tokens_for_comment_element( sem_token_state, comment_element, comment_start_line, comment_token_code, ) continue else: parts = kvpair.interpret_as(interpretation).iter_parts() for te in parts: if te.is_whitespace: continue if te.is_separator: continue value_range_in_parent_te = te.range_in_parent() value_range_te = value_range_in_parent_te.relative_to(value_element_pos) value = te.convert_to_text() if te.is_comment: token_type = comment_token_code value = value.rstrip() elif value in known_values: token_type = known_value_token_code elif allow_substvars and "${" in value: _process_value_with_substvars( sem_token_state, value, value_range_te, None, ) continue else: continue value_len = doc.position_codec.client_num_units(value) sem_token_state.emit_token( te_position_to_lsp(value_range_te.start_pos), value_len, token_type, ) def _split_into_substvars( value: str, base_token_type: int \| None, substvar_token_type: int, ) -> Iterable[tuple[str, int \| None]]: i = 0 next_search = i full_value_len = len(value) while i < full_value_len: try: subst_var_start = value.index("${", next_search) subst_var_end = value.index("}", subst_var_start + 2) except ValueError: token = value[i:full_value_len] if token: yield token, base_token_type return subst_var_end += 1 subst_var = value[subst_var_start:subst_var_end] if subst_var != "${}" and not SUBSTVAR_RE.match(subst_var): subst_var = None if subst_var is None: next_search = subst_var_end continue token = value[i:subst_var_start] if token: yield token, base_token_type yield subst_var, substvar_token_type i = subst_var_end next_search = i def _process_value_with_substvars( sem_token_state: Deb822SemanticTokensState, value: str, value_range_te: "TERange", base_token_type: int \| None, ) -> None: pos_codec = sem_token_state.doc.position_codec # TODO: Support overlapping tokens if the editor does. line = value_range_te.start_pos.line_position token_pos = value_range_te.start_pos.cursor_position substvar_token_code = sem_token_state.substvars_token_code for token, token_type in _split_into_substvars( value, base_token_type, substvar_token_code, ): token_len = len(token) if token_type is not None: sem_token_state.emit_token( types.Position(line, token_pos), pos_codec.client_num_units(token), token_type, ) token_pos += token_len def deb822_format_file( lint_state: LintState, file_metadata: Deb822FileMetadata[Any, Any], ) -> Sequence[TextEdit] \| None: deb822_file = lint_state.parsed_deb822_file_content if not file_metadata.file_metadata_applies_to_file(deb822_file): return None effective_preference = lint_state.effective_preference if effective_preference is None: return trim_end_of_line_whitespace(lint_state.position_codec, lint_state.lines) formatter = effective_preference.deb822_formatter() lines = lint_state.lines deb822_file = lint_state.parsed_deb822_file_content if deb822_file is None: _warn("The deb822 result missing failed!?") return None return list( file_metadata.reformat( effective_preference, deb822_file, formatter, lint_state.content, lint_state.position_codec, lines, ) ) async def deb822_semantic_tokens_full( ls: "DebputyLanguageServer", request: SemanticTokensParams, file_metadata: Deb822FileMetadata[Any, Any], ) -> SemanticTokens \| None: doc = ls.workspace.get_text_document(request.text_document.uri) deb822_file = ls.lint_state(doc).parsed_deb822_file_content if not file_metadata.file_metadata_applies_to_file(deb822_file): return None position_codec = doc.position_codec lines = doc.lines if deb822_file is None: _warn("The deb822 result missing failed!?") ls.show_message_log( "Internal error; could not get deb822 content!?", MessageType.Warning ) return None tokens: list[int] = [] sem_token_state = Deb822SemanticTokensState( ls, doc, lines, tokens, file_metadata, ) comment_token_code = sem_token_state.comment_token_code stanza_idx = 0 async for part_range, part in ls.slow_iter( with_range_in_continuous_parts(deb822_file.iter_parts()), yield_every=20 ): if part.is_comment: pos = part_range.start_pos sem_token_state.emit_token( te_position_to_lsp(pos), # Avoid trailing newline position_codec.client_num_units(part.convert_to_text().rstrip()), comment_token_code, ) elif isinstance(part, Deb822ParagraphElement): await _deb822_paragraph_semantic_tokens_full( ls, sem_token_state, part, part_range, stanza_idx, ) stanza_idx += 1 if not tokens: return None return SemanticTokens(tokens) def _complete_field_name( lint_state: LintState, stanza_metadata: StanzaMetadata[Any], matched_stanzas: Iterable[Deb822ParagraphElement], markdown_kind: MarkupKind, ) -> Sequence[CompletionItem]: items = [] matched_stanzas = list(matched_stanzas) seen_fields = { stanza_metadata.normalize_field_name(f.lower()) for f in chain.from_iterable( # The typing from python3-debian is not entirely optimal here. The iter always return a # `str`, but the provided type is `ParagraphKey` (because `__getitem__` supports those) # and that is not exclusively a `str`. # # So, this cast for now cast("Iterable[str]", s) for s in matched_stanzas ) } for cand_key, cand in stanza_metadata.items(): if stanza_metadata.normalize_field_name(cand_key.lower()) in seen_fields: continue item = cand.complete_field(lint_state, matched_stanzas, markdown_kind) if item is not None: items.append(item) return items

src/debputy/lsp/lsp_generic_deb822.py

import re
from itertools import chain
from typing import (
    Optional,
    Union,
    Tuple,
    Any,
    List,
    cast,
    TYPE_CHECKING,
)
from collections.abc import Sequence, Container, Iterable, Iterator, Callable

from debputy.linting.lint_util import (
    LintState,
    te_position_to_lsp,
    with_range_in_continuous_parts,
)
from debputy.lsp.config.config_options import DCO_SPELLCHECK_COMMENTS
from debputy.lsp.debputy_ls import DebputyLanguageServer
from debputy.lsp.lsp_debian_control_reference_data import (
    Deb822FileMetadata,
    Deb822KnownField,
    StanzaMetadata,
    F,
    S,
    SUBSTVAR_RE,
    _DEP_RELATION_CLAUSE,
)
from debputy.lsp.lsp_features import SEMANTIC_TOKEN_TYPES_IDS
from debputy.lsp.quickfixes import propose_correct_text_quick_fix
from debputy.lsp.text_util import (
    trim_end_of_line_whitespace,
    SemanticTokensState,
)
from debputy.lsp.vendoring._deb822_repro.locatable import (
    START_POSITION,
    Range as TERange,
    Position as TEPosition,
)
from debputy.lsp.vendoring._deb822_repro.parsing import (
    Deb822KeyValuePairElement,
    Deb822ParagraphElement,
    Deb822FileElement,
    Deb822CommentElement,
    Deb822ParsedTokenList,
    Interpretation,
)
from debputy.lsp.vendoring._deb822_repro.tokens import tokenize_deb822_file, Deb822Token
from debputy.lsp.vendoring._deb822_repro.types import TokenOrElement
from debputy.lsprotocol.types import (
    CompletionParams,
    CompletionList,
    CompletionItem,
    Position,
    MarkupContent,
    Hover,
    MarkupKind,
    HoverParams,
    FoldingRangeParams,
    FoldingRange,
    FoldingRangeKind,
    SemanticTokensParams,
    SemanticTokens,
    TextEdit,
    MessageType,
    SemanticTokenTypes,
)
from debputy.util import _info, _warn

if TYPE_CHECKING:
    import lsprotocol.types as types
else:
    import debputy.lsprotocol.types as types


try:
    from pygls.server import LanguageServer
    from pygls.workspace import TextDocument
except ImportError:
    pass


_CONTAINS_SPACE_OR_COLON = re.compile(r"[\s:]")


def in_range(
    te_range: TERange,
    cursor_position: Position,
    *,
    inclusive_end: bool = False,
) -> bool:
    cursor_line = cursor_position.line
    start_pos = te_range.start_pos
    end_pos = te_range.end_pos
    if cursor_line < start_pos.line_position or cursor_line > end_pos.line_position:
        return False

    if start_pos.line_position == end_pos.line_position:
        start_col = start_pos.cursor_position
        cursor_col = cursor_position.character
        end_col = end_pos.cursor_position
        if inclusive_end:
            return start_col <= cursor_col <= end_col
        return start_col <= cursor_col < end_col

    if cursor_line == end_pos.line_position:
        return cursor_position.character < end_pos.cursor_position

    return (
        cursor_line > start_pos.line_position
        or start_pos.cursor_position <= cursor_position.character
    )


def _field_at_position(
    stanza: Deb822ParagraphElement,
    stanza_metadata: S,
    stanza_range: TERange,
    position: Position,
) -> tuple[Deb822KeyValuePairElement | None, F | None, str, bool]:
    te_range = TERange(stanza_range.start_pos, stanza_range.start_pos)
    for token_or_element in stanza.iter_parts():
        te_range = token_or_element.size().relative_to(te_range.end_pos)
        if not in_range(te_range, position):
            continue
        if isinstance(token_or_element, Deb822KeyValuePairElement):
            value_range = token_or_element.value_element.range_in_parent().relative_to(
                te_range.start_pos
            )
            known_field = stanza_metadata.get(token_or_element.field_name)
            in_value = in_range(value_range, position)
            interpreter = (
                known_field.field_value_class.interpreter()
                if known_field is not None
                else None
            )
            matched_value = ""
            if in_value and interpreter is not None:
                interpreted = token_or_element.interpret_as(interpreter)
                for value_ref in interpreted.iter_value_references():
                    value_token_range = (
                        value_ref.locatable.range_in_parent().relative_to(
                            value_range.start_pos
                        )
                    )
                    if in_range(value_token_range, position, inclusive_end=True):
                        matched_value = value_ref.value
                        break
            return token_or_element, known_field, matched_value, in_value
    return None, None, "", False


def _allow_stanza_continuation(
    token_or_element: TokenOrElement,
    is_completion: bool,
) -> bool:
    if not is_completion:
        return False
    if token_or_element.is_error or token_or_element.is_comment:
        return True
    return (
        token_or_element.is_whitespace
        and token_or_element.convert_to_text().count("\n") < 2
    )


def _at_cursor(
    deb822_file: Deb822FileElement,
    file_metadata: Deb822FileMetadata[S, F],
    doc: "TextDocument",
    lines: list[str],
    client_position: Position,
    is_completion: bool = False,
) -> tuple[
    Position,
    str | None,
    str,
    bool,
    S | None,
    F | None,
    Iterable[Deb822ParagraphElement],
]:
    server_position = doc.position_codec.position_from_client_units(
        lines,
        client_position,
    )
    te_range = TERange(
        START_POSITION,
        START_POSITION,
    )
    paragraph_no = -1
    previous_stanza: Deb822ParagraphElement | None = None
    next_stanza: Deb822ParagraphElement | None = None
    current_word = doc.word_at_position(client_position)
    in_value: bool = False
    file_iter = iter(deb822_file.iter_parts())
    matched_token: TokenOrElement | None = None
    matched_field: str | None = None
    stanza_metadata: S | None = None
    known_field: F | None = None

    for token_or_element in file_iter:
        te_range = token_or_element.size().relative_to(te_range.end_pos)
        if isinstance(token_or_element, Deb822ParagraphElement):
            previous_stanza = token_or_element
            paragraph_no += 1
        elif not _allow_stanza_continuation(token_or_element, is_completion):
            previous_stanza = None
        if not in_range(te_range, server_position):
            continue
        matched_token = token_or_element
        if isinstance(token_or_element, Deb822ParagraphElement):
            stanza_metadata = file_metadata.guess_stanza_classification_by_idx(
                paragraph_no
            )
            kvpair, known_field, current_word, in_value = _field_at_position(
                token_or_element,
                stanza_metadata,
                te_range,
                server_position,
            )
            if kvpair is not None:
                matched_field = kvpair.field_name
        break

    if matched_token is not None and _allow_stanza_continuation(
        matched_token,
        is_completion,
    ):
        next_te = next(file_iter, None)
        if isinstance(next_te, Deb822ParagraphElement):
            next_stanza = next_te

    stanza_parts = (p for p in (previous_stanza, next_stanza) if p is not None)

    if stanza_metadata is None and is_completion:
        if paragraph_no < 0:
            paragraph_no = 0
        stanza_metadata = file_metadata.guess_stanza_classification_by_idx(paragraph_no)

    return (
        server_position,
        matched_field,
        current_word,
        in_value,
        stanza_metadata,
        known_field,
        stanza_parts,
    )


def deb822_completer(
    ls: "DebputyLanguageServer",
    params: CompletionParams,
    file_metadata: Deb822FileMetadata[Any, Any],
) -> CompletionList | Sequence[CompletionItem] | None:
    doc = ls.workspace.get_text_document(params.text_document.uri)
    lines = doc.lines
    lint_state = ls.lint_state(doc)
    deb822_file = lint_state.parsed_deb822_file_content
    if not file_metadata.file_metadata_applies_to_file(deb822_file):
        return None

    (
        server_pos,
        current_field,
        word_at_position,
        in_value,
        stanza_metadata,
        known_field,
        matched_stanzas,
    ) = _at_cursor(
        deb822_file,
        file_metadata,
        doc,
        lines,
        params.position,
        is_completion=True,
    )

    if lines[server_pos.line].startswith("#"):
        return

    items: Sequence[CompletionItem] | None
    markdown_kind = ls.completion_item_document_markup(
        MarkupKind.Markdown, MarkupKind.PlainText
    )
    if in_value:
        _info(f"Completion for field value {current_field} -- {word_at_position}")
        if known_field is None:
            return None
        value_being_completed = word_at_position
        items = known_field.value_options_for_completer(
            lint_state,
            list(matched_stanzas),
            value_being_completed,
            markdown_kind,
        )
    else:
        _info("Completing field name")
        assert stanza_metadata is not None
        items = _complete_field_name(
            lint_state,
            stanza_metadata,
            matched_stanzas,
            markdown_kind,
        )

    return items


def deb822_hover(
    ls: "DebputyLanguageServer",
    params: HoverParams,
    file_metadata: Deb822FileMetadata[S, F],
    *,
    custom_handler: None | (
        Callable[
            [
                "DebputyLanguageServer",
                Position,
                str | None,
                str,
                F | None,
                bool,
                "TextDocument",
                list[str],
            ],
            Hover | None,
        ]
    ) = None,
) -> Hover | None:
    doc = ls.workspace.get_text_document(params.text_document.uri)
    deb822_file = ls.lint_state(doc).parsed_deb822_file_content
    if not file_metadata.file_metadata_applies_to_file(deb822_file):
        return None
    lines = doc.lines
    (
        server_pos,
        current_field,
        word_at_position,
        in_value,
        _,
        known_field,
        _,
    ) = _at_cursor(
        deb822_file,
        file_metadata,
        doc,
        lines,
        params.position,
    )

    if lines[server_pos.line].startswith("#"):
        return

    hover_text = None
    if custom_handler is not None:
        res = custom_handler(
            ls,
            server_pos,
            current_field,
            word_at_position,
            known_field,
            in_value,
            doc,
            lines,
        )
        if isinstance(res, Hover):
            return res
        hover_text = res

    if hover_text is None:
        if current_field is None:
            _info("No hover information as we cannot determine which field it is for")
            return None

        if known_field is None:
            return None
        if in_value:
            if not known_field.known_values:
                return None
            keyword = known_field.known_values.get(word_at_position)
            if keyword is None:
                return None
            hover_text = keyword.long_description_translated(ls)
            if hover_text is not None:
                header = "`{VALUE}` (Field: {FIELD_NAME})".format(
                    VALUE=keyword.value,
                    FIELD_NAME=known_field.name,
                )
                hover_text = f"# {header}\n\n{hover_text}"
        else:
            hover_text = known_field.long_description_translated(ls)
            if hover_text is None:
                hover_text = (
                    f"No documentation is available for the field {current_field}."
                )
            hover_text = f"# {known_field.name}\n\n{hover_text}"

    if hover_text is None:
        return None
    return Hover(
        contents=MarkupContent(
            kind=ls.hover_markup_format(MarkupKind.Markdown, MarkupKind.PlainText),
            value=hover_text,
        )
    )


def deb822_token_iter(
    tokens: Iterable[Deb822Token],
) -> Iterator[tuple[Deb822Token, int, int, int, int]]:
    line_no = 0
    line_offset = 0

    for token in tokens:
        start_line = line_no
        start_line_offset = line_offset

        newlines = token.text.count("\n")
        line_no += newlines
        text_len = len(token.text)
        if newlines:
            if token.text.endswith("\n"):
                line_offset = 0
            else:
                # -2, one to remove the "\n" and one to get 0-offset
                line_offset = text_len - token.text.rindex("\n") - 2
        else:
            line_offset += text_len

        yield token, start_line, start_line_offset, line_no, line_offset


def deb822_folding_ranges(
    ls: "DebputyLanguageServer",
    params: FoldingRangeParams,
    file_metadata: Deb822FileMetadata[Any, Any],
) -> Sequence[FoldingRange] | None:
    doc = ls.workspace.get_text_document(params.text_document.uri)
    deb822_file = ls.lint_state(doc).parsed_deb822_file_content
    if not file_metadata.file_metadata_applies_to_file(deb822_file):
        return None
    comment_start = -1
    folding_ranges = []
    for (
        token,
        start_line,
        start_offset,
        end_line,
        end_offset,
    ) in deb822_token_iter(deb822_file.iter_tokens()):
        if token.is_comment:
            if comment_start < 0:
                comment_start = start_line
        elif comment_start > -1:
            comment_start = -1
            folding_range = FoldingRange(
                comment_start,
                end_line,
                kind=FoldingRangeKind.Comment,
            )

            folding_ranges.append(folding_range)

    return folding_ranges


class Deb822SemanticTokensState(SemanticTokensState):

    __slots__ = (
        "file_metadata",
        "keyword_token_code",
        "known_value_token_code",
        "comment_token_code",
        "substvars_token_code",
        "operator_token_code",
        "relation_restriction_token_code",
        "package_token_code",
        "allow_overlapping_tokens",
    )

    def __init__(
        self,
        ls: "DebputyLanguageServer",
        doc: "TextDocument",
        lines: list[str],
        tokens: list[int],
        file_metadata: Deb822FileMetadata[Any, Any],
    ) -> None:
        super().__init__(ls, doc, lines, tokens)
        self.file_metadata = file_metadata

        self.keyword_token_code = SEMANTIC_TOKEN_TYPES_IDS[SemanticTokenTypes.Keyword]
        self.known_value_token_code = SEMANTIC_TOKEN_TYPES_IDS[
            SemanticTokenTypes.EnumMember
        ]
        self.comment_token_code = SEMANTIC_TOKEN_TYPES_IDS[
            SemanticTokenTypes.Comment.value
        ]
        self.substvars_token_code = SEMANTIC_TOKEN_TYPES_IDS[SemanticTokenTypes.Macro]
        self.operator_token_code = SEMANTIC_TOKEN_TYPES_IDS[SemanticTokenTypes.Operator]
        self.relation_restriction_token_code = SEMANTIC_TOKEN_TYPES_IDS[
            SemanticTokenTypes.TypeParameter
        ]
        self.package_token_code = SEMANTIC_TOKEN_TYPES_IDS[SemanticTokenTypes.Variable]


def _emit_tokens_for_comment_element(
    sem_token_state: Deb822SemanticTokensState,
    comment_element: Deb822CommentElement,
    comment_start_line: int,
    comment_token_code: int,
) -> None:
    for comment_line_no, comment_token in enumerate(
        comment_element.iter_parts(),
        start=comment_start_line,
    ):
        assert comment_token.is_comment
        assert isinstance(comment_token, Deb822Token)
        sem_token_state.emit_token(
            Position(comment_line_no, 0),
            len(comment_token.text.rstrip()),
            comment_token_code,
        )


async def scan_for_syntax_errors_and_token_level_diagnostics(
    deb822_file: Deb822FileElement,
    lint_state: LintState,
) -> int:
    first_error = len(lint_state.lines) + 1
    spell_checker = lint_state.spellchecker()

    async for (
        token,
        start_line,
        start_offset,
        end_line,
        end_offset,
    ) in lint_state.slow_iter(deb822_token_iter(deb822_file.iter_tokens())):
        if token.is_error:
            first_error = min(first_error, start_line)
            token_range = TERange(
                TEPosition(
                    start_line,
                    start_offset,
                ),
                TEPosition(
                    end_line,
                    end_offset,
                ),
            )
            lint_state.emit_diagnostic(
                token_range,
                "Syntax error",
                "error",
                "debputy",
            )
        elif token.is_comment:
            if not lint_state.debputy_config.config_value(DCO_SPELLCHECK_COMMENTS):
                continue

            for word, col_pos, end_col_pos in spell_checker.iter_words(token.text):
                corrections = spell_checker.provide_corrections_for(word)
                if not corrections:
                    continue
                word_range = TERange.between(
                    TEPosition(
                        start_line,
                        col_pos,
                    ),
                    TEPosition(
                        start_line,
                        end_col_pos,
                    ),
                )
                lint_state.emit_diagnostic(
                    word_range,
                    f'Spelling "{word}"',
                    "spelling",
                    "debputy",
                    quickfixes=[propose_correct_text_quick_fix(c) for c in corrections],
                    enable_non_interactive_auto_fix=False,
                )
    return first_error


def _emit_relation_token(
    sem_token_state: Deb822SemanticTokensState,
    token_code: int | None,
    m: re.Match[str],
    group_name: str,
    value_range_te: TERange,
) -> None:
    token_value = m.group(group_name)
    token_start = m.start(group_name)
    if token_value is None or token_start is None:
        return
    pos = TEPosition(
        value_range_te.start_pos.line_position,
        value_range_te.start_pos.cursor_position + token_start,
    )
    end_pos = TEPosition(
        pos.line_position,
        pos.cursor_position + len(token_value),
    )

    _process_value_with_substvars(
        sem_token_state,
        token_value,
        TERange.between(pos, end_pos),
        token_code,
    )


async def _deb822_relationship_field_semantic_tokens_full(
    sem_token_state: Deb822SemanticTokensState,
    interpretation: Interpretation[Deb822ParsedTokenList[Any, Any]],
    kvpair: Deb822KeyValuePairElement,
    value_element_pos: TEPosition,
) -> None:
    doc = sem_token_state.doc
    parts = kvpair.interpret_as(interpretation).iter_parts()
    comment_token_code = sem_token_state.comment_token_code
    operator_token_code = sem_token_state.operator_token_code
    relation_restriction_token_code = sem_token_state.relation_restriction_token_code
    package_token_code = sem_token_state.package_token_code

    for te in parts:
        if te.is_whitespace:
            continue
        if te.is_separator:
            continue

        value_range_in_parent_te = te.range_in_parent()
        value_range_te = value_range_in_parent_te.relative_to(value_element_pos)
        value = te.convert_to_text()
        if te.is_comment:
            token_type = comment_token_code
            value = value.rstrip()
            value_len = doc.position_codec.client_num_units(value)
            sem_token_state.emit_token(
                te_position_to_lsp(value_range_te.start_pos),
                value_len,
                token_type,
            )
        else:
            m = _DEP_RELATION_CLAUSE.fullmatch(value)
            _emit_relation_token(
                sem_token_state,
                package_token_code,
                m,
                "name_arch_qual",
                value_range_te,
            )

            _emit_relation_token(
                sem_token_state,
                operator_token_code,
                m,
                "operator",
                value_range_te,
            )
            _emit_relation_token(
                sem_token_state,
                None,
                m,
                "version",
                value_range_te,
            )
            _emit_relation_token(
                sem_token_state,
                relation_restriction_token_code,
                m,
                "arch_restriction",
                value_range_te,
            )
            _emit_relation_token(
                sem_token_state,
                relation_restriction_token_code,
                m,
                "build_profile_restriction",
                value_range_te,
            )


async def _deb822_paragraph_semantic_tokens_full(
    ls: "DebputyLanguageServer",
    sem_token_state: Deb822SemanticTokensState,
    stanza: Deb822ParagraphElement,
    stanza_range_in_file: "TERange",
    stanza_idx: int,
) -> None:
    doc = sem_token_state.doc
    keyword_token_code = sem_token_state.keyword_token_code
    known_value_token_code = sem_token_state.known_value_token_code
    comment_token_code = sem_token_state.comment_token_code

    stanza_position = stanza_range_in_file.start_pos
    stanza_metadata = sem_token_state.file_metadata.classify_stanza(
        stanza,
        stanza_idx=stanza_idx,
    )
    async for kvpair_range, kvpair in ls.slow_iter(
        with_range_in_continuous_parts(
            stanza.iter_parts(),
            start_relative_to=stanza_position,
        ),
        yield_every=25,
    ):
        if not isinstance(kvpair, Deb822KeyValuePairElement):
            continue
        kvpair_position = kvpair_range.start_pos
        field_start = kvpair.field_token.position_in_parent().relative_to(
            kvpair_position
        )
        comment = kvpair.comment_element
        if comment:
            comment_start_line = field_start.line_position - len(comment)
            _emit_tokens_for_comment_element(
                sem_token_state,
                comment,
                comment_start_line,
                comment_token_code,
            )

        field_size = doc.position_codec.client_num_units(kvpair.field_name)

        sem_token_state.emit_token(
            te_position_to_lsp(field_start),
            field_size,
            keyword_token_code,
        )

        known_field: Deb822KnownField | None = stanza_metadata.get(kvpair.field_name)
        value_element_pos = kvpair.value_element.position_in_parent().relative_to(
            kvpair_position
        )
        if known_field is not None:
            if known_field.spellcheck_value:
                continue
            interpretation = known_field.field_value_class.interpreter()
            if (
                getattr(known_field, "is_relationship_field", False)
                and interpretation is not None
            ):
                await _deb822_relationship_field_semantic_tokens_full(
                    sem_token_state,
                    interpretation,
                    kvpair,
                    value_element_pos,
                )
                continue
            known_values: Container[str] = known_field.known_values or frozenset()
            field_disallows_substvars = (
                known_field.is_substvars_disabled_even_if_allowed_by_stanza
            )
            allow_substvars = (
                stanza_metadata.is_substvars_allowed_in_stanza
                and not field_disallows_substvars
            )
        else:
            known_values = frozenset()
            interpretation = None
            allow_substvars = stanza_metadata.is_substvars_allowed_in_stanza

        if interpretation is None:
            for value_line in kvpair.value_element.value_lines:
                comment_element = value_line.comment_element
                if comment_element:
                    assert comment_element.position_in_parent().line_position == 0
                    comment_start_line = (
                        value_line.position_in_parent()
                        .relative_to(value_element_pos)
                        .line_position
                    )
                    _emit_tokens_for_comment_element(
                        sem_token_state,
                        comment_element,
                        comment_start_line,
                        comment_token_code,
                    )
            continue
        else:
            parts = kvpair.interpret_as(interpretation).iter_parts()
        for te in parts:
            if te.is_whitespace:
                continue
            if te.is_separator:
                continue
            value_range_in_parent_te = te.range_in_parent()
            value_range_te = value_range_in_parent_te.relative_to(value_element_pos)
            value = te.convert_to_text()
            if te.is_comment:
                token_type = comment_token_code
                value = value.rstrip()
            elif value in known_values:
                token_type = known_value_token_code
            elif allow_substvars and "${" in value:
                _process_value_with_substvars(
                    sem_token_state,
                    value,
                    value_range_te,
                    None,
                )
                continue
            else:
                continue
            value_len = doc.position_codec.client_num_units(value)
            sem_token_state.emit_token(
                te_position_to_lsp(value_range_te.start_pos),
                value_len,
                token_type,
            )


def _split_into_substvars(
    value: str,
    base_token_type: int | None,
    substvar_token_type: int,
) -> Iterable[tuple[str, int | None]]:

    i = 0
    next_search = i
    full_value_len = len(value)
    while i < full_value_len:
        try:
            subst_var_start = value.index("${", next_search)
            subst_var_end = value.index("}", subst_var_start + 2)
        except ValueError:
            token = value[i:full_value_len]
            if token:
                yield token, base_token_type
            return

        subst_var_end += 1
        subst_var = value[subst_var_start:subst_var_end]
        if subst_var != "${}" and not SUBSTVAR_RE.match(subst_var):
            subst_var = None

        if subst_var is None:
            next_search = subst_var_end
            continue

        token = value[i:subst_var_start]
        if token:
            yield token, base_token_type
        yield subst_var, substvar_token_type
        i = subst_var_end
        next_search = i


def _process_value_with_substvars(
    sem_token_state: Deb822SemanticTokensState,
    value: str,
    value_range_te: "TERange",
    base_token_type: int | None,
) -> None:
    pos_codec = sem_token_state.doc.position_codec

    # TODO: Support overlapping tokens if the editor does.

    line = value_range_te.start_pos.line_position
    token_pos = value_range_te.start_pos.cursor_position
    substvar_token_code = sem_token_state.substvars_token_code
    for token, token_type in _split_into_substvars(
        value,
        base_token_type,
        substvar_token_code,
    ):
        token_len = len(token)
        if token_type is not None:
            sem_token_state.emit_token(
                types.Position(line, token_pos),
                pos_codec.client_num_units(token),
                token_type,
            )
        token_pos += token_len


def deb822_format_file(
    lint_state: LintState,
    file_metadata: Deb822FileMetadata[Any, Any],
) -> Sequence[TextEdit] | None:
    deb822_file = lint_state.parsed_deb822_file_content
    if not file_metadata.file_metadata_applies_to_file(deb822_file):
        return None
    effective_preference = lint_state.effective_preference
    if effective_preference is None:
        return trim_end_of_line_whitespace(lint_state.position_codec, lint_state.lines)
    formatter = effective_preference.deb822_formatter()
    lines = lint_state.lines
    deb822_file = lint_state.parsed_deb822_file_content
    if deb822_file is None:
        _warn("The deb822 result missing failed!?")
        return None

    return list(
        file_metadata.reformat(
            effective_preference,
            deb822_file,
            formatter,
            lint_state.content,
            lint_state.position_codec,
            lines,
        )
    )


async def deb822_semantic_tokens_full(
    ls: "DebputyLanguageServer",
    request: SemanticTokensParams,
    file_metadata: Deb822FileMetadata[Any, Any],
) -> SemanticTokens | None:
    doc = ls.workspace.get_text_document(request.text_document.uri)
    deb822_file = ls.lint_state(doc).parsed_deb822_file_content
    if not file_metadata.file_metadata_applies_to_file(deb822_file):
        return None
    position_codec = doc.position_codec
    lines = doc.lines
    if deb822_file is None:
        _warn("The deb822 result missing failed!?")
        ls.show_message_log(
            "Internal error; could not get deb822 content!?", MessageType.Warning
        )
        return None

    tokens: list[int] = []
    sem_token_state = Deb822SemanticTokensState(
        ls,
        doc,
        lines,
        tokens,
        file_metadata,
    )

    comment_token_code = sem_token_state.comment_token_code

    stanza_idx = 0

    async for part_range, part in ls.slow_iter(
        with_range_in_continuous_parts(deb822_file.iter_parts()), yield_every=20
    ):
        if part.is_comment:
            pos = part_range.start_pos
            sem_token_state.emit_token(
                te_position_to_lsp(pos),
                # Avoid trailing newline
                position_codec.client_num_units(part.convert_to_text().rstrip()),
                comment_token_code,
            )
        elif isinstance(part, Deb822ParagraphElement):
            await _deb822_paragraph_semantic_tokens_full(
                ls,
                sem_token_state,
                part,
                part_range,
                stanza_idx,
            )
            stanza_idx += 1
    if not tokens:
        return None
    return SemanticTokens(tokens)


def _complete_field_name(
    lint_state: LintState,
    stanza_metadata: StanzaMetadata[Any],
    matched_stanzas: Iterable[Deb822ParagraphElement],
    markdown_kind: MarkupKind,
) -> Sequence[CompletionItem]:
    items = []
    matched_stanzas = list(matched_stanzas)
    seen_fields = {
        stanza_metadata.normalize_field_name(f.lower())
        for f in chain.from_iterable(
            # The typing from python3-debian is not entirely optimal here. The iter always return a
            # `str`, but the provided type is `ParagraphKey` (because `__getitem__` supports those)
            # and that is not exclusively a `str`.
            #
            # So, this cast for now
            cast("Iterable[str]", s)
            for s in matched_stanzas
        )
    }
    for cand_key, cand in stanza_metadata.items():
        if stanza_metadata.normalize_field_name(cand_key.lower()) in seen_fields:
            continue
        item = cand.complete_field(lint_state, matched_stanzas, markdown_kind)
        if item is not None:
            items.append(item)
    return items