public static IEnumerable Deserialize()

in Chemistry/src/DataModel/Serialization/LegacyFormats/FciDump.cs [32:109]


        public static IEnumerable<ElectronicStructureProblem> Deserialize(TextReader reader)
        {
            // FCIDUMP files begin with a FORTRAN-formatted namelist, delimited
            // by &FCI and &END. We start by extracting that namelist.
            var allText = reader.ReadToEnd();
            var lines = Regex.Split(allText, "\r\n|\r|\n");
            if (lines == null)
            {
                throw new IOException("Expected a non-empty FCIDUMP file.");
            }
            var header = System.String.Join("\n", lines.TakeWhile(line => line.Trim() != "&END")).Trim();
            var body = lines!.SkipWhile(line => line.Trim() != "&END").Skip(1).ToList();
            
            // Make sure that the header starts with &FCI, as expected.
            if (!header.StartsWith("&FCI"))
            {
                throw new IOException("FCIDUMP file did not start with \"&FCI\" as expected.");
            }
            
            // Split out the &FCI and &END lines, turn the rest into a dictionary of namelist items.
            var namelist = Regex.Matches(
                header
                .Replace("&FCI", "")
                .Replace("&END", ""),
                pattern: "\\s*(?<identifier>\\w+)\\s*=\\s*(?<value>[^=]+),\\s*"
            )
            .ToDictionary(
                match => match.Groups["identifier"].Value,
                match => match.Groups["value"].Value
            );

            var hamiltonian = new OrbitalIntegralHamiltonian();
            var arrayData = body
                .Select(line => line.Trim())
                .Where(line => line.Length > 0)
                .Select(
                    line => line.Split(" ", StringSplitOptions.RemoveEmptyEntries)
                )
                .Select(
                    row => (
                        Double.Parse(row[0]), 
                        row[1..].Select(Int32.Parse).Where(idx => idx != 0).ToZeroBasedIndices()
                    )
                );
            var (coulomb, _) = arrayData.Where(item => item.Item2.Length == 0).Single();
            hamiltonian.Add(arrayData
                .Where(row => row.Item2.Length > 0)
                .SelectMaybe(
                    row => row.Item2.Length % 2 == 0
                           ? new OrbitalIntegral(
                                 row.Item2, row.Item1, OrbitalIntegral.Convention.Mulliken
                             ).ToCanonicalForm()
                           : null
                )
                .Distinct()
            );

            // The identity term in deserialized Hamiltonians is the sum of the 
            // Coloumb repulsion and the energy offset. Since only the former
            // exists in FCIDUMP, we set the identity term accordingly.
            hamiltonian.Add(new OrbitalIntegral(), coulomb);
            
            return new List<ElectronicStructureProblem>
            {
                new ElectronicStructureProblem
                {
                    EnergyOffset = 0.0.WithUnits("hartree"),
                    CoulombRepulsion = coulomb.WithUnits("hartree"),
                    Metadata = new Dictionary<string, object>
                    {
                        ["Comment"] = "Imported from FCIDUMP"
                    },
                    NElectrons = Int32.Parse(namelist["NELEC"]),
                    NOrbitals = Int32.Parse(namelist["NORB"]),
                    OrbitalIntegralHamiltonian = hamiltonian
                }
            };
        }