Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion crates/ruff_db/src/diagnostic/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ use ruff_annotate_snippets::Level as AnnotateLevel;
use ruff_text_size::{Ranged, TextRange, TextSize};

pub use self::render::{
DisplayDiagnostic, DisplayDiagnostics, FileResolver, Input, ceil_char_boundary,
DisplayDiagnostic, DisplayDiagnostics, DummyFileResolver, FileResolver, Input,
ceil_char_boundary,
github::{DisplayGithubDiagnostics, GithubRenderer},
};
use crate::{Db, files::File};
Expand Down
25 changes: 25 additions & 0 deletions crates/ruff_db/src/diagnostic/render.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1170,6 +1170,31 @@ pub fn ceil_char_boundary(text: &str, offset: TextSize) -> TextSize {
.unwrap_or_else(|| TextSize::from(upper_bound))
}

/// A stub implementation of [`FileResolver`] intended for testing.
pub struct DummyFileResolver;

impl FileResolver for DummyFileResolver {
fn path(&self, _file: File) -> &str {
unimplemented!()
}

fn input(&self, _file: File) -> Input {
unimplemented!()
}

fn notebook_index(&self, _file: &UnifiedFile) -> Option<NotebookIndex> {
None
}

fn is_notebook(&self, _file: &UnifiedFile) -> bool {
false
}

fn current_directory(&self) -> &Path {
Path::new(".")
}
}

#[cfg(test)]
mod tests {

Expand Down
3 changes: 1 addition & 2 deletions crates/ruff_linter/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ use crate::settings::types::CompiledPerFileIgnoreList;
pub fn get_cwd() -> &'static Path {
#[cfg(target_arch = "wasm32")]
{
static CWD: std::sync::LazyLock<PathBuf> = std::sync::LazyLock::new(|| PathBuf::from("."));
&CWD
Path::new(".")
}
#[cfg(not(target_arch = "wasm32"))]
path_absolutize::path_dedot::CWD.as_path()
Expand Down
152 changes: 116 additions & 36 deletions crates/ruff_python_formatter/tests/fixtures.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
use crate::normalizer::Normalizer;
use itertools::Itertools;
use ruff_db::diagnostic::{
Annotation, Diagnostic, DiagnosticFormat, DiagnosticId, DisplayDiagnosticConfig,
DisplayDiagnostics, DummyFileResolver, Severity, Span,
};
use ruff_formatter::FormatOptions;
use ruff_python_ast::Mod;
use ruff_python_ast::comparable::ComparableMod;
use ruff_python_ast::visitor::source_order::SourceOrderVisitor;
use ruff_python_formatter::{PreviewMode, PyFormatOptions, format_module_source, format_range};
use ruff_python_parser::{ParseOptions, UnsupportedSyntaxError, parse};
use ruff_source_file::{LineIndex, OneIndexed};
use ruff_python_parser::{ParseOptions, Parsed, UnsupportedSyntaxError, parse};
use ruff_source_file::{LineIndex, OneIndexed, SourceFileBuilder};
use ruff_text_size::{Ranged, TextRange, TextSize};
use rustc_hash::FxHashMap;
use similar::TextDiff;
Expand Down Expand Up @@ -193,7 +198,8 @@ fn format() {
writeln!(snapshot, "## Outputs").unwrap();

for (i, options) in options.into_iter().enumerate() {
let formatted_code = format_file(&content, &options, input_path);
let (formatted_code, unsupported_syntax_errors) =
format_file(&content, &options, input_path);

writeln!(
snapshot,
Expand All @@ -210,7 +216,7 @@ fn format() {

// We want to capture the differences in the preview style in our fixtures
let options_preview = options.with_preview(PreviewMode::Enabled);
let formatted_preview = format_file(&content, &options_preview, input_path);
let (formatted_preview, _) = format_file(&content, &options_preview, input_path);

if formatted_code != formatted_preview {
// Having both snapshots makes it hard to see the difference, so we're keeping only
Expand All @@ -227,14 +233,28 @@ fn format() {
)
.unwrap();
}

if !unsupported_syntax_errors.is_empty() {
writeln!(
snapshot,
"### Unsupported Syntax Errors\n{}",
DisplayDiagnostics::new(
&DummyFileResolver,
&DisplayDiagnosticConfig::default().format(DiagnosticFormat::Full),
&unsupported_syntax_errors
)
)
.unwrap();
}
}
} else {
// We want to capture the differences in the preview style in our fixtures
let options = PyFormatOptions::from_extension(input_path);
let formatted_code = format_file(&content, &options, input_path);
let (formatted_code, unsupported_syntax_errors) =
format_file(&content, &options, input_path);

let options_preview = options.with_preview(PreviewMode::Enabled);
let formatted_preview = format_file(&content, &options_preview, input_path);
let (formatted_preview, _) = format_file(&content, &options_preview, input_path);

if formatted_code == formatted_preview {
writeln!(
Expand All @@ -259,6 +279,19 @@ fn format() {
)
.unwrap();
}

if !unsupported_syntax_errors.is_empty() {
writeln!(
snapshot,
"## Unsupported Syntax Errors\n{}",
DisplayDiagnostics::new(
&DummyFileResolver,
&DisplayDiagnosticConfig::default().format(DiagnosticFormat::Full),
&unsupported_syntax_errors
)
)
.unwrap();
}
}

insta::with_settings!({
Expand All @@ -277,7 +310,11 @@ fn format() {
);
}

fn format_file(source: &str, options: &PyFormatOptions, input_path: &Path) -> String {
fn format_file(
source: &str,
options: &PyFormatOptions,
input_path: &Path,
) -> (String, Vec<Diagnostic>) {
let (unformatted, formatted_code) = if source.contains("<RANGE_START>") {
let mut content = source.to_string();
let without_markers = content
Expand Down Expand Up @@ -337,9 +374,10 @@ fn format_file(source: &str, options: &PyFormatOptions, input_path: &Path) -> St
(Cow::Borrowed(source), formatted_code)
};

ensure_unchanged_ast(&unformatted, &formatted_code, options, input_path);
let unsupported_syntax_errors =
ensure_unchanged_ast(&unformatted, &formatted_code, options, input_path);

formatted_code
(formatted_code, unsupported_syntax_errors)
}

/// Format another time and make sure that there are no changes anymore
Expand Down Expand Up @@ -391,12 +429,15 @@ Formatted twice:
/// Like Black, there are a few exceptions to this "invariant" which are encoded in
/// [`NormalizedMod`] and related structs. Namely, formatting can change indentation within strings,
/// and can also flatten tuples within `del` statements.
///
/// Returns any new [`UnsupportedSyntaxError`]s in the formatted code as [`Diagnostic`]s for
/// snapshotting.
fn ensure_unchanged_ast(
unformatted_code: &str,
formatted_code: &str,
options: &PyFormatOptions,
input_path: &Path,
) {
) -> Vec<Diagnostic> {
let source_type = options.source_type();

// Parse the unformatted code.
Expand All @@ -407,7 +448,7 @@ fn ensure_unchanged_ast(
.expect("Unformatted code to be valid syntax");

let unformatted_unsupported_syntax_errors =
collect_unsupported_syntax_errors(unformatted_parsed.unsupported_syntax_errors());
collect_unsupported_syntax_errors(&unformatted_parsed);
let mut unformatted_ast = unformatted_parsed.into_syntax();

Normalizer.visit_module(&mut unformatted_ast);
Expand All @@ -422,29 +463,25 @@ fn ensure_unchanged_ast(

// Assert that there are no new unsupported syntax errors
let mut formatted_unsupported_syntax_errors =
collect_unsupported_syntax_errors(formatted_parsed.unsupported_syntax_errors());
collect_unsupported_syntax_errors(&formatted_parsed);

formatted_unsupported_syntax_errors
.retain(|fingerprint, _| !unformatted_unsupported_syntax_errors.contains_key(fingerprint));

if !formatted_unsupported_syntax_errors.is_empty() {
let index = LineIndex::from_source_text(formatted_code);
panic!(
"Formatted code `{}` introduced new unsupported syntax errors:\n---\n{}\n---",
input_path.display(),
formatted_unsupported_syntax_errors
.into_values()
.map(|error| {
let location = index.line_column(error.start(), formatted_code);
format!(
"{row}:{col} {error}",
row = location.line,
col = location.column
)
})
.join("\n")
);
}
let file = SourceFileBuilder::new(
input_path.file_name().unwrap().to_string_lossy(),
formatted_code,
)
.finish();
let diagnostics = formatted_unsupported_syntax_errors
.values()
.map(|error| {
let mut diag = Diagnostic::new(DiagnosticId::InvalidSyntax, Severity::Error, error);
let span = Span::from(file.clone()).with_range(error.range());
diag.annotate(Annotation::primary(span));
diag
})
.collect::<Vec<_>>();

let mut formatted_ast = formatted_parsed.into_syntax();
Normalizer.visit_module(&mut formatted_ast);
Expand All @@ -466,6 +503,8 @@ fn ensure_unchanged_ast(
input_path.display(),
);
}

diagnostics
}

struct Header<'a> {
Expand Down Expand Up @@ -537,22 +576,58 @@ source_type = {source_type:?}"#,
}
}

/// A visitor to collect a sequence of node IDs for fingerprinting [`UnsupportedSyntaxError`]s.
///
/// It visits each statement in the AST in source order and saves its range. The index of the node
/// enclosing a syntax error's range can then be retrieved with the `node_id` method. This `node_id`
/// should be stable across formatting runs since the formatter won't add or remove statements.
#[derive(Default)]
struct StmtVisitor {
nodes: Vec<TextRange>,
}

impl StmtVisitor {
fn new(module: &Mod) -> Self {
let mut visitor = Self::default();
visitor.visit_mod(module);
visitor
}

/// Return the index of the statement node that contains `range`.
fn node_id(&self, range: TextRange) -> usize {
self.nodes
.iter()
.position(|node| node.contains_range(range))
.unwrap()
}
}

impl<'a> SourceOrderVisitor<'a> for StmtVisitor {
fn visit_stmt(&mut self, stmt: &'a ruff_python_ast::Stmt) {
self.nodes.push(stmt.range());
ruff_python_ast::visitor::source_order::walk_stmt(self, stmt);
}
}

/// Collects the unsupported syntax errors and assigns a unique hash to each error.
fn collect_unsupported_syntax_errors(
errors: &[UnsupportedSyntaxError],
parsed: &Parsed<Mod>,
) -> FxHashMap<u64, UnsupportedSyntaxError> {
let visitor = StmtVisitor::new(parsed.syntax());

let mut collected = FxHashMap::default();

for error in errors {
let mut error_fingerprint = fingerprint_unsupported_syntax_error(error, 0);
for error in parsed.unsupported_syntax_errors() {
let node_id = visitor.node_id(error.range);
let mut error_fingerprint = fingerprint_unsupported_syntax_error(error, node_id, 0);

// Make sure that we do not get a fingerprint that is already in use
// by adding in the previously generated one.
loop {
match collected.entry(error_fingerprint) {
Entry::Occupied(_) => {
error_fingerprint =
fingerprint_unsupported_syntax_error(error, error_fingerprint);
fingerprint_unsupported_syntax_error(error, node_id, error_fingerprint);
}
Entry::Vacant(entry) => {
entry.insert(error.clone());
Expand All @@ -565,7 +640,11 @@ fn collect_unsupported_syntax_errors(
collected
}

fn fingerprint_unsupported_syntax_error(error: &UnsupportedSyntaxError, salt: u64) -> u64 {
fn fingerprint_unsupported_syntax_error(
error: &UnsupportedSyntaxError,
node_id: usize,
salt: u64,
) -> u64 {
let mut hasher = DefaultHasher::new();

let UnsupportedSyntaxError {
Expand All @@ -579,6 +658,7 @@ fn fingerprint_unsupported_syntax_error(error: &UnsupportedSyntaxError, salt: u6
salt.hash(&mut hasher);
kind.hash(&mut hasher);
target_version.hash(&mut hasher);
node_id.hash(&mut hasher);

hasher.finish()
}
Original file line number Diff line number Diff line change
Expand Up @@ -2409,3 +2409,59 @@ print(
# Regression tests for https://github.com/astral-sh/ruff/issues/15536
print(f"{ {}, 1 }")
```


### Unsupported Syntax Errors
error[invalid-syntax]: Cannot use an escape sequence (backslash) in f-strings on Python 3.10 (syntax was added in Python 3.12)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these only the syntax errors that weren't suppressed with invalid-syntax allowed? I'm asking because it's not entirely clear to me when I'm supposed to use invalid-syntax vs when it's okay to just tolerate the invalid syntax errors.

I think I'd either keep panicking in the presence of an invalid syntax error that isn't explicitly suppressed with an invalid-syntax pragma comment or render them:

The risk I see with the pragma comments is that it might unintentionally suppress more syntax errors than I intended (e.g. in the with test case, I might allow invalid-syntax errors to suppress the diagnostics for 3.8, but then introduce an error for 3.9). On the other hand, it makes it very clear that unsupported syntax errors need to be fixed before a PR is merged (or they need to be explicitly suppressed).

If we go with rendering the errors, then I think we should add a short paragraph right below Unsupported syntax errors, saying that these need to be fixed unless they're pre-existing syntax errors already present in the source (and not introduced by the formatter).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we only render the leftover syntax errors that weren't present initially and weren't suppressed with the pragma comment. With the pragma comments, I was thinking we ideally wouldn't tolerate any snapshots, but I didn't want to fix these bugs in the same PR 😅 So I see what you mean, we should go back to panicking instead of snapshotting once these are resolved.

I think we may also be able to print the fingerprint for new errors so that you could include that in the pragma comment, to make them a bit more specific. Or maybe part of the error message would be easier to use, like in mdtests.

But I think I would still lean toward just rendering all of the new errors too and not having the pragma comments. That's a bit simpler and we didn't have many errors to render once we updated the fingerprint filtering anyway.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But I think I would still lean toward just rendering all of the new errors too and not having the pragma comments. That's a bit simpler and we didn't have many errors to render once we updated the fingerprint filtering anyway.

Sounds good to me. Sorry for misdirecting you on the pragma comments

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No worries, it was interesting to try out! I'll keep my cleanup commits from this morning and then revert them all, just in case we ever want to resurrect them.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if all those errors are now okay or if some of those are new errors introduced by the formatter. If any of them falls into the latter category, could you add a FIXME to the fixture file above the relevant line?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought the errors for line 762 were related to #20774, but it turns out that they're false positives with our syntax error detection instead. I'll add the FIXME and open a separate issue.

--> fstring.py:762:19
|
760 | f'{1: abcd "{"aa"}" }'
761 | f'{1=: "abcd {'aa'}}'
762 | f"{x:a{z:hy \"user\"}} '''"
| ^
763 |
764 | # Changing the outer quotes is fine because the format-spec is in a nested expression.
|

error[invalid-syntax]: Cannot use an escape sequence (backslash) in f-strings on Python 3.10 (syntax was added in Python 3.12)
--> fstring.py:762:13
|
760 | f'{1: abcd "{"aa"}" }'
761 | f'{1=: "abcd {'aa'}}'
762 | f"{x:a{z:hy \"user\"}} '''"
| ^
763 |
764 | # Changing the outer quotes is fine because the format-spec is in a nested expression.
|

error[invalid-syntax]: Cannot reuse outer quote character in f-strings on Python 3.10 (syntax was added in Python 3.12)
--> fstring.py:178:8
|
176 | # Here, the formatter will remove the escapes which is correct because they aren't allowed
177 | # pre 3.12. This means we can assume that the f-string is used in the context of 3.12.
178 | f"foo {"'bar'"}"
| ^
179 | f"foo {'"bar"'}"
|

error[invalid-syntax]: Cannot reuse outer quote character in f-strings on Python 3.10 (syntax was added in Python 3.12)
--> fstring.py:771:14
|
769 | f'{1=: "abcd \'\'}' # Don't change the outer quotes, or it results in a syntax error
770 | f"{1=: abcd \'\'}" # Changing the quotes here is fine because the inner quotes aren't the opposite quotes
771 | f"{1=: abcd \"\"}" # Changing the quotes here is fine because the inner quotes are escaped
| ^
772 | # Don't change the quotes in the following cases:
773 | f'{x=:hy "user"} \'\'\''
|

error[invalid-syntax]: Cannot reuse outer quote character in f-strings on Python 3.10 (syntax was added in Python 3.12)
--> fstring.py:762:14
|
760 | f'{1: abcd "{"aa"}" }'
761 | f'{1=: "abcd {'aa'}}'
762 | f"{x:a{z:hy \"user\"}} '''"
| ^
763 |
764 | # Changing the outer quotes is fine because the format-spec is in a nested expression.
|
Loading