meta/aspellcheck.pl (82 lines of code) (raw):

#!/usr/bin/perl # # Copyright (c) 2023 Microsoft Open Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 # # THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR # CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT # LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS # FOR A PARTICULAR PURPOSE, MERCHANTABILITY OR NON-INFRINGEMENT. # # See the Apache Version 2.0 License for specific language governing # permissions and limitations under the License. # # Microsoft would like to thank the following companies for their review and # assistance with these files: Intel Corporation, Mellanox Technologies Ltd, # Dell Products, L.P., Facebook, Inc., Marvell International Ltd. # # @file aspellcheck.pl # # @brief This module run aspell on meta source and headers # BEGIN { push @INC,'.'; } use strict; use warnings; use diagnostics; use Term::ANSIColor; use utils; use style; our $errors = 0; our $warnings = 0; sub GetSourceFilesAndHeaders { my @files = `find . -name "*.cpp" -o -name "*.[ch]"`; return @files; } sub ReadFile { my $filename = shift; local $/ = undef; # first search file in meta directory open FILE, $filename or die "Couldn't open file $filename: $!"; binmode FILE; my $string = <FILE>; close FILE; return $string; } sub ExtractComments { my $input = shift; my $comments = ""; # good enough comments extractor C/C++ source while ($input =~ m!(".*?")|//.*?[\r\n]|/\*.*?\*/!s) { $input = $'; $comments .= $& if not $1; } return $comments; } sub RunAspell { my $hash = shift; my %wordsToCheck = %{ $hash }; if (not -e "/usr/bin/aspell") { LogError "ASPELL IS NOT PRESENT, please install aspell"; return; } LogInfo "Running Aspell"; my @keys = sort keys %wordsToCheck; my $count = @keys; my $all = "@keys"; LogInfo "Words to check: $count"; my @result = `echo "$all" | /usr/bin/aspell -l en -a -p ./aspell.en.pws 2>&1`; for my $res (@result) { if ($res =~ /error/i) { LogError "aspell error: $res"; last; } chomp $res; next if $res =~ /^\*?$/; print "$res\n"; next if not $res =~ /^\s*&\s*(\S+)/; my $word = $1; next if $word =~ /^wred$/i; chomp $res; my $where = "??"; if (not defined $wordsToCheck{$word}) { for my $k (@keys) { if ($k =~ /(^$word|$word$)/) { $where = $wordsToCheck{$k}; last; } $where = $wordsToCheck{$k} if ($k =~ /$word/); } } else { $where = $wordsToCheck{$word}; } LogWarning "Word '$word' is misspelled $where"; } } my @acronyms = GetAcronyms(); my %spellAcronyms = (); $spellAcronyms{$_} = 1 for @acronyms; my @exceptions = qw/ IPv4 IPv6 0xFF IPv SAIMETADATALOGGER auth objecttype saimetadatalogger sak /; my %spellExceptions = map { $_ => $_ } @exceptions; my @files = GetSourceFilesAndHeaders(); my %wordsToCheck = (); for my $file (@files) { chomp $file; next if $file =~ m!saiswig.cpp!; next if $file =~ m!temp!; next if $file =~ m!xml!; next if $file =~ m!saimetadata.[ch]!; next if $file =~ m!generated!; next if $file =~ m!sai_rpc_server.cpp!; my $data = ReadFile $file; my $comments = ExtractComments $data; $comments =~ s!github.com\S+! !g; $comments =~ s!l2mc! !g; $comments =~ s!\s+\d+(th|nd) ! !g; $comments =~ s!(/\*|\*/)! !g; $comments =~ s!//! !g; $comments =~ s!\s+\*\s+! !g; $comments =~ s![^a-zA-Z0-9_]! !g; my @words = split/\s+/,$comments; for my $word (@words) { next if defined $spellAcronyms{$word}; next if defined $spellExceptions{$word}; next if $word =~ /_/; next if $word =~ /xYYY+/; next if $word =~ /fe\d+/; next if $word =~ /ebe\d+/; next if $word =~ /Werror/; next if $word =~ /^[A-Za-z][a-z]+([A-Z][a-z]+)+$/; # fooBar FooBar $wordsToCheck{$word} = $file; } } RunAspell(\%wordsToCheck); exit 1 if ($warnings > 0 or $errors > 0);