src/metrics/halstead.rs

use std::collections::HashMap; use serde::Serialize; use serde::ser::{SerializeStruct, Serializer}; use std::fmt; use crate::checker::Checker; use crate::getter::Getter; use crate::macros::implement_metric_trait; use crate::*; /// The `Halstead` metric suite. #[derive(Default, Clone, Debug)] pub struct Stats { u_operators: u64, operators: u64, u_operands: u64, operands: u64, } /// Specifies the type of nodes accepted by the `Halstead` metric. pub enum HalsteadType { /// The node is an `Halstead` operator Operator, /// The node is an `Halstead` operand Operand, /// The node is unknown to the `Halstead` metric Unknown, } #[derive(Debug, Default, Clone)] pub struct HalsteadMaps<'a> { pub(crate) operators: HashMap<u16, u64>, pub(crate) operands: HashMap<&'a [u8], u64>, } impl<'a> HalsteadMaps<'a> { pub(crate) fn new() -> Self { HalsteadMaps { operators: HashMap::default(), operands: HashMap::default(), } } pub(crate) fn merge(&mut self, other: &HalsteadMaps<'a>) { for (k, v) in other.operators.iter() { *self.operators.entry(*k).or_insert(0) += v; } for (k, v) in other.operands.iter() { *self.operands.entry(*k).or_insert(0) += v; } } pub(crate) fn finalize(&self, stats: &mut Stats) { stats.u_operators = self.operators.len() as u64; stats.operators = self.operators.values().sum::<u64>(); stats.u_operands = self.operands.len() as u64; stats.operands = self.operands.values().sum::<u64>(); } } impl Serialize for Stats { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer, { let mut st = serializer.serialize_struct("halstead", 14)?; st.serialize_field("n1", &self.u_operators())?; st.serialize_field("N1", &self.operators())?; st.serialize_field("n2", &self.u_operands())?; st.serialize_field("N2", &self.operands())?; st.serialize_field("length", &self.length())?; st.serialize_field("estimated_program_length", &self.estimated_program_length())?; st.serialize_field("purity_ratio", &self.purity_ratio())?; st.serialize_field("vocabulary", &self.vocabulary())?; st.serialize_field("volume", &self.volume())?; st.serialize_field("difficulty", &self.difficulty())?; st.serialize_field("level", &self.level())?; st.serialize_field("effort", &self.effort())?; st.serialize_field("time", &self.time())?; st.serialize_field("bugs", &self.bugs())?; st.end() } } impl fmt::Display for Stats { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, "n1: {}, \ N1: {}, \ n2: {}, \ N2: {}, \ length: {}, \ estimated program length: {}, \ purity ratio: {}, \ size: {}, \ volume: {}, \ difficulty: {}, \ level: {}, \ effort: {}, \ time: {}, \ bugs: {}", self.u_operators(), self.operators(), self.u_operands(), self.operands(), self.length(), self.estimated_program_length(), self.purity_ratio(), self.vocabulary(), self.volume(), self.difficulty(), self.level(), self.effort(), self.time(), self.bugs(), ) } } impl Stats { pub(crate) fn merge(&mut self, _other: &Stats) {} /// Returns `η1`, the number of distinct operators #[inline(always)] pub fn u_operators(&self) -> f64 { self.u_operators as f64 } /// Returns `N1`, the number of total operators #[inline(always)] pub fn operators(&self) -> f64 { self.operators as f64 } /// Returns `η2`, the number of distinct operands #[inline(always)] pub fn u_operands(&self) -> f64 { self.u_operands as f64 } /// Returns `N2`, the number of total operands #[inline(always)] pub fn operands(&self) -> f64 { self.operands as f64 } /// Returns the program length #[inline(always)] pub fn length(&self) -> f64 { self.operands() + self.operators() } /// Returns the calculated estimated program length #[inline(always)] pub fn estimated_program_length(&self) -> f64 { self.u_operators() * self.u_operators().log2() + self.u_operands() * self.u_operands().log2() } /// Returns the purity ratio #[inline(always)] pub fn purity_ratio(&self) -> f64 { self.estimated_program_length() / self.length() } /// Returns the program vocabulary #[inline(always)] pub fn vocabulary(&self) -> f64 { self.u_operands() + self.u_operators() } /// Returns the program volume. /// /// Unit of measurement: bits #[inline(always)] pub fn volume(&self) -> f64 { // Assumes a uniform binary encoding for the vocabulary is used. self.length() * self.vocabulary().log2() } /// Returns the estimated difficulty required to program #[inline(always)] pub fn difficulty(&self) -> f64 { self.u_operators() / 2. * self.operands() / self.u_operands() } /// Returns the estimated level of difficulty required to program #[inline(always)] pub fn level(&self) -> f64 { 1. / self.difficulty() } /// Returns the estimated effort required to program #[inline(always)] pub fn effort(&self) -> f64 { self.difficulty() * self.volume() } /// Returns the estimated time required to program. /// /// Unit of measurement: seconds #[inline(always)] pub fn time(&self) -> f64 { // The floating point `18.` aims to describe the processing rate of the // human brain. It is called Stoud number, S, and its // unit of measurement is moments/seconds. // A moment is the time required by the human brain to carry out the // most elementary decision. // 5 <= S <= 20. Halstead uses 18. // The value of S has been empirically developed from psychological // reasoning, and its recommended value for // programming applications is 18. // // Source: https://www.geeksforgeeks.org/software-engineering-halsteads-software-metrics/ self.effort() / 18. } /// Returns the estimated number of delivered bugs. /// /// This metric represents the average amount of work a programmer can do /// without introducing an error. #[inline(always)] pub fn bugs(&self) -> f64 { // The floating point `3000.` represents the number of elementary // mental discriminations. // A mental discrimination, in psychology, is the ability to perceive // and respond to differences among stimuli. // // The value above is obtained starting from a constant that // is different for every language and assumes that natural language is // the language of the brain. // For programming languages, the English language constant // has been considered. // // After every 3000 mental discriminations a result is produced. // This result, whether correct or incorrect, is more than likely // either used as an input for the next operation or is output to the // environment. // If incorrect the error should become apparent. // Thus, an opportunity for error occurs every 3000 // mental discriminations. // // Source: https://docs.lib.purdue.edu/cgi/viewcontent.cgi?article=1145&context=cstech self.effort().powf(2. / 3.) / 3000. } } pub trait Halstead where Self: Checker, { fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>); } #[inline(always)] fn get_id<'a>(node: &Node<'a>, code: &'a [u8]) -> &'a [u8] { &code[node.start_byte()..node.end_byte()] } #[inline(always)] fn compute_halstead<'a, T: Getter>( node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>, ) { match T::get_op_type(node) { HalsteadType::Operator => { *halstead_maps.operators.entry(node.kind_id()).or_insert(0) += 1; } HalsteadType::Operand => { *halstead_maps .operands .entry(get_id(node, code)) .or_insert(0) += 1; } _ => {} } } impl Halstead for PythonCode { fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) { compute_halstead::<Self>(node, code, halstead_maps); } } impl Halstead for MozjsCode { fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) { compute_halstead::<Self>(node, code, halstead_maps); } } impl Halstead for JavascriptCode { fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) { compute_halstead::<Self>(node, code, halstead_maps); } } impl Halstead for TypescriptCode { fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) { compute_halstead::<Self>(node, code, halstead_maps); } } impl Halstead for TsxCode { fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) { compute_halstead::<Self>(node, code, halstead_maps); } } impl Halstead for RustCode { fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) { compute_halstead::<Self>(node, code, halstead_maps); } } impl Halstead for CppCode { fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) { compute_halstead::<Self>(node, code, halstead_maps); } } impl Halstead for JavaCode { fn compute<'a>(node: &Node<'a>, code: &'a [u8], halstead_maps: &mut HalsteadMaps<'a>) { compute_halstead::<Self>(node, code, halstead_maps); } } implement_metric_trait!(Halstead, KotlinCode, PreprocCode, CcommentCode); #[cfg(test)] mod tests { use crate::tools::check_metrics; use super::*; #[test] fn python_operators_and_operands() { check_metrics::<PythonParser>( "def foo(): def bar(): def toto(): a = 1 + 1 b = 2 + a c = 3 + 3", "foo.py", |metric| { // unique operators: def, =, + // operators: def, def, def, =, =, =, +, +, + // unique operands: foo, bar, toto, a, b, c, 1, 2, 3 // operands: foo, bar, toto, a, b, c, 1, 1, 2, a, 3, 3 insta::assert_json_snapshot!( metric.halstead, @r###" { "n1": 3.0, "N1": 9.0, "n2": 9.0, "N2": 12.0, "length": 21.0, "estimated_program_length": 33.284212515144276, "purity_ratio": 1.584962500721156, "vocabulary": 12.0, "volume": 75.28421251514428, "difficulty": 2.0, "level": 0.5, "effort": 150.56842503028855, "time": 8.364912501682698, "bugs": 0.0094341190071077 }"### ); }, ); } #[test] fn cpp_operators_and_operands() { // Define operators and operands for C/C++ grammar according to this specification: // https://www.verifysoft.com/en_halstead_metrics.html // The only difference with the specification above is that // primitive types are treated as operators, since the definition of a // primitive type can be seen as the creation of a slot of a certain size. // i.e. The `int a;` definition creates a n-bytes slot. check_metrics::<CppParser>( "main() { int a, b, c, avg; scanf(\"%d %d %d\", &a, &b, &c); avg = (a + b + c) / 3; printf(\"avg = %d\", avg); }", "foo.c", |metric| { // unique operators: (), {}, int, &, =, +, /, ,, ; // unique operands: main, a, b, c, avg, scanf, "%d %d %d", 3, printf, "avg = %d" insta::assert_json_snapshot!( metric.halstead, @r###" { "n1": 9.0, "N1": 24.0, "n2": 10.0, "N2": 18.0, "length": 42.0, "estimated_program_length": 61.74860596185444, "purity_ratio": 1.470204903853677, "vocabulary": 19.0, "volume": 178.41295556463058, "difficulty": 8.1, "level": 0.1234567901234568, "effort": 1445.1449400735075, "time": 80.28583000408375, "bugs": 0.04260752914034329 }"### ); }, ); } #[test] fn rust_operators_and_operands() { check_metrics::<RustParser>( "fn main() { let a = 5; let b = 5; let c = 5; let avg = (a + b + c) / 3; println!(\"{}\", avg); }", "foo.rs", |metric| { // unique operators: fn, (), {}, let, =, +, /, ;, !, , // unique operands: main, a, b, c, avg, 5, 3, println, "{}" insta::assert_json_snapshot!( metric.halstead, @r###" { "n1": 10.0, "N1": 23.0, "n2": 9.0, "N2": 15.0, "length": 38.0, "estimated_program_length": 61.74860596185444, "purity_ratio": 1.624963314785643, "vocabulary": 19.0, "volume": 161.42124551085624, "difficulty": 8.333333333333334, "level": 0.12, "effort": 1345.177045923802, "time": 74.7320581068779, "bugs": 0.040619232256751396 }"### ); }, ); } #[test] fn javascript_operators_and_operands() { check_metrics::<JavascriptParser>( "function main() { var a, b, c, avg; a = 5; b = 5; c = 5; avg = (a + b + c) / 3; console.log(\"{}\", avg); }", "foo.js", |metric| { // unique operators: function, (), {}, var, =, +, /, ,, ., ; // unique operands: main, a, b, c, avg, 3, 5, console.log, console, log, "{}" insta::assert_json_snapshot!( metric.halstead, @r###" { "n1": 10.0, "N1": 24.0, "n2": 11.0, "N2": 21.0, "length": 45.0, "estimated_program_length": 71.27302875388389, "purity_ratio": 1.583845083419642, "vocabulary": 21.0, "volume": 197.65428402504423, "difficulty": 9.545454545454545, "level": 0.10476190476190476, "effort": 1886.699983875422, "time": 104.81666577085679, "bugs": 0.05089564733125986 }"### ); }, ); } #[test] fn mozjs_operators_and_operands() { check_metrics::<MozjsParser>( "function main() { var a, b, c, avg; a = 5; b = 5; c = 5; avg = (a + b + c) / 3; console.log(\"{}\", avg); }", "foo.js", |metric| { // unique operators: function, (), {}, var, =, +, /, ,, ., ; // unique operands: main, a, b, c, avg, 3, 5, console.log, console, log, "{}" insta::assert_json_snapshot!( metric.halstead, @r###" { "n1": 10.0, "N1": 24.0, "n2": 11.0, "N2": 21.0, "length": 45.0, "estimated_program_length": 71.27302875388389, "purity_ratio": 1.583845083419642, "vocabulary": 21.0, "volume": 197.65428402504423, "difficulty": 9.545454545454545, "level": 0.10476190476190476, "effort": 1886.699983875422, "time": 104.81666577085679, "bugs": 0.05089564733125986 }"### ); }, ); } #[test] fn typescript_operators_and_operands() { check_metrics::<TypescriptParser>( "function main() { var a, b, c, avg; a = 5; b = 5; c = 5; avg = (a + b + c) / 3; console.log(\"{}\", avg); }", "foo.ts", |metric| { // unique operators: function, (), {}, var, =, +, /, ,, ., ; // unique operands: main, a, b, c, avg, 3, 5, console.log, console, log, "{}" insta::assert_json_snapshot!( metric.halstead, @r###" { "n1": 10.0, "N1": 24.0, "n2": 11.0, "N2": 21.0, "length": 45.0, "estimated_program_length": 71.27302875388389, "purity_ratio": 1.583845083419642, "vocabulary": 21.0, "volume": 197.65428402504423, "difficulty": 9.545454545454545, "level": 0.10476190476190476, "effort": 1886.699983875422, "time": 104.81666577085679, "bugs": 0.05089564733125986 }"### ); }, ); } #[test] fn tsx_operators_and_operands() { check_metrics::<TsxParser>( "function main() { var a, b, c, avg; a = 5; b = 5; c = 5; avg = (a + b + c) / 3; console.log(\"{}\", avg); }", "foo.ts", |metric| { // unique operators: function, (), {}, var, =, +, /, ,, ., ; // unique operands: main, a, b, c, avg, 3, 5, console.log, console, log, "{}" insta::assert_json_snapshot!( metric.halstead, @r###" { "n1": 10.0, "N1": 24.0, "n2": 11.0, "N2": 21.0, "length": 45.0, "estimated_program_length": 71.27302875388389, "purity_ratio": 1.583845083419642, "vocabulary": 21.0, "volume": 197.65428402504423, "difficulty": 9.545454545454545, "level": 0.10476190476190476, "effort": 1886.699983875422, "time": 104.81666577085679, "bugs": 0.05089564733125986 }"### ); }, ); } #[test] fn python_wrong_operators() { check_metrics::<PythonParser>("()[]{}", "foo.py", |metric| { insta::assert_json_snapshot!( metric.halstead, @r###" { "n1": 0.0, "N1": 0.0, "n2": 0.0, "N2": 0.0, "length": 0.0, "estimated_program_length": null, "purity_ratio": null, "vocabulary": 0.0, "volume": null, "difficulty": null, "level": null, "effort": null, "time": null, "bugs": null }"### ); }); } #[test] fn python_check_metrics() { check_metrics::<PythonParser>( "def f(): pass", "foo.py", |metric| { insta::assert_json_snapshot!( metric.halstead, @r###" { "n1": 2.0, "N1": 2.0, "n2": 1.0, "N2": 1.0, "length": 3.0, "estimated_program_length": 2.0, "purity_ratio": 0.6666666666666666, "vocabulary": 3.0, "volume": 4.754887502163468, "difficulty": 1.0, "level": 1.0, "effort": 4.754887502163468, "time": 0.26416041678685936, "bugs": 0.0009425525573729414 }"### ); }, ); } #[test] fn java_operators_and_operands() { check_metrics::<JavaParser>( "public class Main { public static void main(string args[]) { int a, b, c, avg; a = 5; b = 5; c = 5; avg = (a + b + c) / 3; MessageFormat.format(\"{0}\", avg); } }", "foo.java", |metric| { // { void ; ( String [ ] ) , int = + / format . } // Main main args a b c avg 5 3 MessageFormat format "{0}" insta::assert_json_snapshot!( metric.halstead, @r###" { "n1": 10.0, "N1": 25.0, "n2": 12.0, "N2": 22.0, "length": 47.0, "estimated_program_length": 76.2388309575275, "purity_ratio": 1.6221027863303723, "vocabulary": 22.0, "volume": 209.59328607595296, "difficulty": 9.166666666666666, "level": 0.1090909090909091, "effort": 1921.2717890295687, "time": 106.73732161275382, "bugs": 0.05151550353617788 }"### ); }, ); } }

src/metrics/halstead.rs (284 lines of code) (raw):