blob: a702e60f1c276050f204ae5f348ac59b64f8a490 [file] [log] [blame]
use std::ops::ControlFlow;
use clippy_utils::diagnostics::span_lint_hir_and_then;
use clippy_utils::ty::is_type_lang_item;
use clippy_utils::visitors::for_each_expr;
use clippy_utils::{eq_expr_value, higher, path_to_local_id, sym};
use rustc_errors::{Applicability, MultiSpan};
use rustc_hir::{Expr, ExprKind, LangItem, Node, Pat, PatKind};
use rustc_lint::LateContext;
use rustc_middle::ty::Ty;
use rustc_span::{Span, Symbol};
use super::CHAR_INDICES_AS_BYTE_INDICES;
// The list of `str` methods we want to lint that have a `usize` argument representing a byte index.
// Note: `String` also has methods that work with byte indices,
// but they all take `&mut self` and aren't worth considering since the user couldn't have called
// them while the chars iterator is live anyway.
const BYTE_INDEX_METHODS: &[Symbol] = &[
sym::ceil_char_boundary,
sym::floor_char_boundary,
sym::get,
sym::get_mut,
sym::get_unchecked,
sym::get_unchecked_mut,
sym::index,
sym::index_mut,
sym::is_char_boundary,
sym::slice_mut_unchecked,
sym::slice_unchecked,
sym::split_at,
sym::split_at_checked,
sym::split_at_mut,
sym::split_at_mut_checked,
];
const CONTINUE: ControlFlow<!, ()> = ControlFlow::Continue(());
pub(super) fn check<'tcx>(cx: &LateContext<'tcx>, pat: &Pat<'_>, iterable: &Expr<'_>, body: &'tcx Expr<'tcx>) {
if let ExprKind::MethodCall(_, enumerate_recv, _, enumerate_span) = iterable.kind
&& let Some(method_id) = cx.typeck_results().type_dependent_def_id(iterable.hir_id)
&& cx.tcx.is_diagnostic_item(sym::enumerate_method, method_id)
&& let ExprKind::MethodCall(_, chars_recv, _, chars_span) = enumerate_recv.kind
&& let Some(method_id) = cx.typeck_results().type_dependent_def_id(enumerate_recv.hir_id)
&& cx.tcx.is_diagnostic_item(sym::str_chars, method_id)
{
if let PatKind::Tuple([pat, _], _) = pat.kind
&& let PatKind::Binding(_, binding_id, ..) = pat.kind
{
// Destructured iterator element `(idx, _)`, look for uses of the binding
for_each_expr(cx, body, |expr| {
if path_to_local_id(expr, binding_id) {
check_index_usage(cx, expr, pat, enumerate_span, chars_span, chars_recv);
}
CONTINUE
});
} else if let PatKind::Binding(_, binding_id, ..) = pat.kind {
// Bound as a tuple, look for `tup.0`
for_each_expr(cx, body, |expr| {
if let ExprKind::Field(e, field) = expr.kind
&& path_to_local_id(e, binding_id)
&& field.name == sym::integer(0)
{
check_index_usage(cx, expr, pat, enumerate_span, chars_span, chars_recv);
}
CONTINUE
});
}
}
}
fn check_index_usage<'tcx>(
cx: &LateContext<'tcx>,
expr: &'tcx Expr<'tcx>,
pat: &Pat<'_>,
enumerate_span: Span,
chars_span: Span,
chars_recv: &Expr<'_>,
) {
let Some(parent_expr) = index_consumed_at(cx, expr) else {
return;
};
let is_string_like = |ty: Ty<'_>| ty.is_str() || is_type_lang_item(cx, ty, LangItem::String);
let message = match parent_expr.kind {
ExprKind::MethodCall(segment, recv, ..)
// We currently only lint `str` methods (which `String` can deref to), so a `.is_str()` check is sufficient here
// (contrary to the `ExprKind::Index` case which needs to handle both with `is_string_like` because `String` implements
// `Index` directly and no deref to `str` would happen in that case).
if cx.typeck_results().expr_ty_adjusted(recv).peel_refs().is_str()
&& BYTE_INDEX_METHODS.contains(&segment.ident.name)
&& eq_expr_value(cx, chars_recv, recv) =>
{
"passing a character position to a method that expects a byte index"
},
ExprKind::Index(target, ..)
if is_string_like(cx.typeck_results().expr_ty_adjusted(target).peel_refs())
&& eq_expr_value(cx, chars_recv, target) =>
{
"indexing into a string with a character position where a byte index is expected"
},
_ => return,
};
span_lint_hir_and_then(
cx,
CHAR_INDICES_AS_BYTE_INDICES,
expr.hir_id,
expr.span,
message,
|diag| {
diag.note("a character can take up more than one byte, so they are not interchangeable")
.span_note(
MultiSpan::from_spans(vec![pat.span, enumerate_span]),
"position comes from the enumerate iterator",
)
.span_suggestion_verbose(
chars_span.to(enumerate_span),
"consider using `.char_indices()` instead",
"char_indices()",
Applicability::MaybeIncorrect,
);
},
);
}
/// Returns the expression which ultimately consumes the index.
/// This is usually the parent expression, i.e. `.split_at(idx)` for `idx`,
/// but for `.get(..idx)` we want to consider the method call the consuming expression,
/// which requires skipping past the range expression.
fn index_consumed_at<'tcx>(cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) -> Option<&'tcx Expr<'tcx>> {
for (_, node) in cx.tcx.hir_parent_iter(expr.hir_id) {
match node {
Node::Expr(expr) if higher::Range::hir(expr).is_some() => {},
Node::ExprField(_) => {},
Node::Expr(expr) => return Some(expr),
_ => break,
}
}
None
}