Skip to content

Commit

Permalink
Introduce tokenizing options for full and partial mode (#1669)
Browse files Browse the repository at this point in the history
Add tokenizing mode to tokenizing method
- Full: We get the full text to tokenize
- Partial: We get only a portion of the text to tokenize

In indentation lexing, we do not auto-complete dedents for partial mode
  • Loading branch information
martin-fleck-at authored Sep 6, 2024
1 parent 51d99a6 commit 9a1c021
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 36 deletions.
39 changes: 20 additions & 19 deletions packages/langium/src/parser/indentation-aware.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
import type { CustomPatternMatcherFunc, TokenType, IToken, IMultiModeLexerDefinition, TokenVocabulary } from 'chevrotain';
import type { Grammar, TerminalRule } from '../languages/generated/ast.js';
import type { LexingReport, TokenBuilderOptions } from './token-builder.js';
import type { LexerResult } from './lexer.js';
import type { LexerResult, TokenizeOptions } from './lexer.js';
import type { LangiumCoreServices } from '../services.js';
import { createToken, createTokenInstance, Lexer } from 'chevrotain';
import { DefaultTokenBuilder } from './token-builder.js';
import { DefaultLexer, isTokenTypeArray } from './lexer.js';
import { DEFAULT_TOKENIZE_OPTIONS, DefaultLexer, isTokenTypeArray } from './lexer.js';

type IndentationAwareDelimiter<TokenName extends string> = [begin: TokenName, end: TokenName];

Expand Down Expand Up @@ -179,11 +179,11 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
}
}

override popLexingReport(text: string): IndentationLexingReport {
const result = super.popLexingReport(text);
override flushLexingReport(text: string): IndentationLexingReport {
const result = super.flushLexingReport(text);
return {
...result,
remainingDedents: this.popRemainingDedents(text),
remainingDedents: this.flushRemainingDedents(text),
};
}

Expand All @@ -203,9 +203,12 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
*
* @param text The full input string.
* @param offset The current position at which to attempt a match
* @param tokens Previously scanned tokens
* @param groups Token Groups
* @returns The current and previous indentation levels and the matched whitespace
*/
protected matchWhitespace(text: string, offset: number, _tokens: IToken[], _groups: Record<string, IToken[]>): { currIndentLevel: number, prevIndentLevel: number, match: RegExpExecArray | null } {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
protected matchWhitespace(text: string, offset: number, tokens: IToken[], groups: Record<string, IToken[]>): { currIndentLevel: number, prevIndentLevel: number, match: RegExpExecArray | null } {
this.whitespaceRegExp.lastIndex = offset;
const match = this.whitespaceRegExp.exec(text);
return {
Expand Down Expand Up @@ -251,12 +254,10 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
*
* @param text The full input string.
* @param offset The offset at which to attempt a match
* @param tokens Previously scanned Tokens
* @param tokens Previously scanned tokens
* @param groups Token Groups
*/
protected indentMatcher(text: string, offset: number, tokens: IToken[], groups: Record<string, IToken[]>): ReturnType<CustomPatternMatcherFunc> {
const { indentTokenName } = this.options;

if (!this.isStartOfLine(text, offset)) {
return null;
}
Expand All @@ -274,7 +275,7 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
const indentToken = this.createIndentationTokenInstance(
this.indentTokenType,
text,
match?.[0] ?? indentTokenName,
match?.[0] ?? '',
offset,
);
tokens.push(indentToken);
Expand All @@ -288,12 +289,10 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
*
* @param text The full input string.
* @param offset The offset at which to attempt a match
* @param tokens Previously scanned Tokens
* @param tokens Previously scanned tokens
* @param groups Token Groups
*/
protected dedentMatcher(text: string, offset: number, tokens: IToken[], groups: Record<string, IToken[]>): ReturnType<CustomPatternMatcherFunc> {
const { dedentTokenName } = this.options;

if (!this.isStartOfLine(text, offset)) {
return null;
}
Expand All @@ -316,7 +315,7 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
offset,
length: match?.[0]?.length ?? 0,
line: this.getLineNumber(text, offset),
column: 0
column: 1
});
return null;
}
Expand All @@ -327,7 +326,7 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
const token = this.createIndentationTokenInstance(
this.dedentTokenType,
text,
match?.[0] ?? dedentTokenName,
match?.[0] ?? '',
offset,
);
tokens.push(token);
Expand Down Expand Up @@ -362,7 +361,7 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
* @param text Full text that was tokenized
* @returns Remaining dedent tokens to match all previous indents at the end of the file
*/
popRemainingDedents(text: string): IToken[] {
flushRemainingDedents(text: string): IToken[] {
const remainingDedents: IToken[] = [];
while (this.indentationStack.length > 1) {
remainingDedents.push(
Expand Down Expand Up @@ -402,13 +401,15 @@ export class IndentationAwareLexer extends DefaultLexer {
}
}

override tokenize(text: string): LexerResult {
override tokenize(text: string, options: TokenizeOptions = DEFAULT_TOKENIZE_OPTIONS): LexerResult {
const result = super.tokenize(text);

// consuming all remaining dedents and remove them as they might not be serializable
const report = result.report as IndentationLexingReport;
const remainingDedents = report.remainingDedents;
result.tokens.push(...remainingDedents);
if (options?.mode === 'full') {
// auto-complete document with remaining dedents
result.tokens.push(...report.remainingDedents);
}
report.remainingDedents = [];

// remove any "indent-dedent" pair with an empty body as these are typically
Expand Down
2 changes: 1 addition & 1 deletion packages/langium/src/parser/langium-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,7 @@ export class LangiumCompletionParser extends AbstractLangiumParser {

parse(input: string): CompletionParserResult {
this.resetState();
const tokens = this.lexer.tokenize(input);
const tokens = this.lexer.tokenize(input, { mode: 'partial' });
this.tokens = tokens.tokens;
this.wrapper.input = [...this.tokens];
this.mainRule.call(this.wrapper, {});
Expand Down
16 changes: 12 additions & 4 deletions packages/langium/src/parser/lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,17 @@ export interface LexerResult {
report?: LexingReport;
}

export type TokenizeMode = 'full' | 'partial';

export interface TokenizeOptions {
mode?: TokenizeMode;
}

export const DEFAULT_TOKENIZE_OPTIONS: TokenizeOptions = { mode: 'full' };

export interface Lexer {
readonly definition: TokenTypeDictionary;
tokenize(text: string): LexerResult;
tokenize(text: string, options?: TokenizeOptions): LexerResult;
}

export class DefaultLexer implements Lexer {
Expand All @@ -36,7 +44,7 @@ export class DefaultLexer implements Lexer {
protected tokenBuilder: TokenBuilder;
protected tokenTypes: TokenTypeDictionary;

constructor( services: LangiumCoreServices) {
constructor(services: LangiumCoreServices) {
this.tokenBuilder = services.parser.TokenBuilder;
const tokens = this.tokenBuilder.buildTokens(services.Grammar, {
caseInsensitive: services.LanguageMetaData.caseInsensitive
Expand All @@ -52,13 +60,13 @@ export class DefaultLexer implements Lexer {
return this.tokenTypes;
}

tokenize(text: string): LexerResult {
tokenize(text: string, _options: TokenizeOptions = DEFAULT_TOKENIZE_OPTIONS): LexerResult {
const chevrotainResult = this.chevrotainLexer.tokenize(text);
return {
tokens: chevrotainResult.tokens,
errors: chevrotainResult.errors,
hidden: chevrotainResult.groups.hidden ?? [],
report: this.tokenBuilder.popLexingReport?.(text)
report: this.tokenBuilder.flushLexingReport?.(text)
};
}

Expand Down
9 changes: 6 additions & 3 deletions packages/langium/src/parser/token-builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export interface TokenBuilder {
*
* @param text The text that was tokenized.
*/
popLexingReport?(text: string): LexingReport;
flushLexingReport?(text: string): LexingReport;
}

/**
Expand All @@ -36,8 +36,10 @@ export interface LexingReport {
diagnostics: LexingDiagnostic[];
}

export type LexingDiagnosticSeverity = 'error' | 'warning' | 'info' | 'hint';

export interface LexingDiagnostic extends ILexingError {
severity?: 'error' | 'warning' | 'info' | 'hint';
severity?: LexingDiagnosticSeverity;
}

export class DefaultTokenBuilder implements TokenBuilder {
Expand All @@ -64,7 +66,8 @@ export class DefaultTokenBuilder implements TokenBuilder {
return tokens;
}

popLexingReport(_text: string): LexingReport {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
flushLexingReport(text: string): LexingReport {
return { diagnostics: this.popDiagnostics() };
}

Expand Down
14 changes: 7 additions & 7 deletions packages/langium/src/validation/document-validator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ import type { ParseResult } from '../parser/langium-parser.js';
import type { LangiumCoreServices } from '../services.js';
import type { AstNode, CstNode } from '../syntax-tree.js';
import type { LangiumDocument } from '../workspace/documents.js';
import type { DiagnosticData, DiagnosticInfo, ValidationAcceptor, ValidationCategory, ValidationRegistry } from './validation-registry.js';
import type { DiagnosticData, DiagnosticInfo, ValidationAcceptor, ValidationCategory, ValidationRegistry, ValidationSeverity } from './validation-registry.js';
import { CancellationToken } from '../utils/cancellation.js';
import { findNodeForKeyword, findNodeForProperty } from '../utils/grammar-utils.js';
import { streamAst } from '../utils/ast-utils.js';
import { tokenToRange } from '../utils/cst-utils.js';
import { interruptAndCheck, isOperationCancelled } from '../utils/promise-utils.js';
import { diagnosticData } from './validation-registry.js';
import type { LexingDiagnostic } from '../parser/token-builder.js';
import type { LexingDiagnostic, LexingDiagnosticSeverity } from '../parser/token-builder.js';

export interface ValidationOptions {
/**
Expand Down Expand Up @@ -100,7 +100,7 @@ export class DefaultDocumentValidator implements DocumentValidator {
protected processLexingErrors(parseResult: ParseResult, diagnostics: Diagnostic[], _options: ValidationOptions): void {
const lexerDiagnostics = [...parseResult.lexerErrors, ...parseResult.lexerReport?.diagnostics ?? []] as LexingDiagnostic[];
for (const lexerDiagnostic of lexerDiagnostics) {
const severity = lexerDiagnostic?.severity ?? 'error';
const severity = lexerDiagnostic.severity ?? 'error';
const diagnostic: Diagnostic = {
severity: toDiagnosticSeverity(severity),
range: {
Expand Down Expand Up @@ -180,7 +180,7 @@ export class DefaultDocumentValidator implements DocumentValidator {

protected async validateAst(rootNode: AstNode, options: ValidationOptions, cancelToken = CancellationToken.None): Promise<Diagnostic[]> {
const validationItems: Diagnostic[] = [];
const acceptor: ValidationAcceptor = <N extends AstNode>(severity: 'error' | 'warning' | 'info' | 'hint', message: string, info: DiagnosticInfo<N>) => {
const acceptor: ValidationAcceptor = <N extends AstNode>(severity: ValidationSeverity, message: string, info: DiagnosticInfo<N>) => {
validationItems.push(this.toDiagnostic(severity, message, info));
};

Expand All @@ -194,7 +194,7 @@ export class DefaultDocumentValidator implements DocumentValidator {
return validationItems;
}

protected toDiagnostic<N extends AstNode>(severity: 'error' | 'warning' | 'info' | 'hint', message: string, info: DiagnosticInfo<N, string>): Diagnostic {
protected toDiagnostic<N extends AstNode>(severity: ValidationSeverity, message: string, info: DiagnosticInfo<N, string>): Diagnostic {
return {
message,
range: getDiagnosticRange(info),
Expand Down Expand Up @@ -233,7 +233,7 @@ export function getDiagnosticRange<N extends AstNode>(info: DiagnosticInfo<N, st
return cstNode.range;
}

export function toDiagnosticSeverity(severity: 'error' | 'warning' | 'info' | 'hint'): DiagnosticSeverity {
export function toDiagnosticSeverity(severity: LexingDiagnosticSeverity): DiagnosticSeverity {
switch (severity) {
case 'error':
return 1; // according to vscode-languageserver-types/lib/esm/main.js#DiagnosticSeverity.Error
Expand All @@ -248,7 +248,7 @@ export function toDiagnosticSeverity(severity: 'error' | 'warning' | 'info' | 'h
}
}

export function toDiagnosticData(severity: 'error' | 'warning' | 'info' | 'hint'): DiagnosticData {
export function toDiagnosticData(severity: LexingDiagnosticSeverity): DiagnosticData {
switch (severity) {
case 'error':
return diagnosticData(DocumentValidator.LexingError);
Expand Down
4 changes: 3 additions & 1 deletion packages/langium/src/validation/validation-registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ export function diagnosticData(code: string): DiagnosticData {
return { code };
}

export type ValidationAcceptor = <N extends AstNode>(severity: 'error' | 'warning' | 'info' | 'hint', message: string, info: DiagnosticInfo<N>) => void
export type ValidationSeverity = 'error' | 'warning' | 'info' | 'hint';

export type ValidationAcceptor = <N extends AstNode>(severity: ValidationSeverity, message: string, info: DiagnosticInfo<N>) => void

export type ValidationCheck<T extends AstNode = AstNode> = (node: T, accept: ValidationAcceptor, cancelToken: CancellationToken) => MaybePromise<void>;

Expand Down
36 changes: 35 additions & 1 deletion packages/langium/test/parser/indentation-aware.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { EmptyFileSystem, IndentationAwareLexer, IndentationAwareTokenBuilder }
import { createLangiumGrammarServices, createServicesForGrammar } from 'langium/grammar';
import type { LangiumServices, PartialLangiumServices } from 'langium/lsp';
import { expandToString } from 'langium/generate';
import { parseHelper } from 'langium/test';
import { expectCompletion, parseHelper } from 'langium/test';
import type { IMultiModeLexerDefinition } from 'chevrotain';

const grammarServices = createLangiumGrammarServices(EmptyFileSystem).grammar;
Expand Down Expand Up @@ -193,6 +193,18 @@ describe('IndentationAwareLexer', () => {
expect(dedent.tokenType.name).toBe('DEDENT');
});

test('should NOT add remaining dedents to the end if partial tokenizing', async () => {
const lexer = await getLexer(sampleGrammar);
const { tokens } = lexer.tokenize(expandToString`
// single-line comment
{
name`, { mode: 'partial' });
expect(tokens).toHaveLength(3);

const [/* L_BRAC */, indent, /* id */] = tokens;
expect(indent.tokenType.name).toBe('INDENT');
});

test('should not return any tokens for empty input', async () => {
const lexer = await getLexer(sampleGrammar);
const { tokens } = lexer.tokenize('');
Expand Down Expand Up @@ -389,6 +401,28 @@ describe('IndentationAware parsing', () => {
expect(return2.value).toBe(true);
});

test.fails('should offer correct auto-completion parsing', async () => {
const text = expandToString`
<|>if true:
<|>return true
<|>else:
<|>if false:
<|>return true
<|>return false
<|>return true
`;

const services = await createIndentationAwareServices(sampleGrammar);
const completion = expectCompletion(services);
await completion({ text, index: 0, expectedItems: ['if', 'return'] });
// PR 1669: the lines below currently fail as the completion provider may wrongly assumes that all whitespace tokens are hidden
await completion({ text, index: 1, expectedItems: ['if', 'return'] });
await completion({ text, index: 2, expectedItems: ['else'] });
await completion({ text, index: 3, expectedItems: ['if', 'return'] });
await completion({ text, index: 4, expectedItems: ['if', 'return'] });
await completion({ text, index: 5, expectedItems: ['if', 'return'] });
await completion({ text, index: 6, expectedItems: ['if', 'return'] });
});
});

type Statement = If | Return;
Expand Down

0 comments on commit 9a1c021

Please sign in to comment.