assessment/collectors/project_analyzer/java_file_dependency_analyzer.go (279 lines of code) (raw):

package assessment import ( "context" "fmt" "os" "path/filepath" "strings" "github.com/GoogleCloudPlatform/spanner-migration-tool/logger" sitter "github.com/smacker/go-tree-sitter" "github.com/smacker/go-tree-sitter/java" "go.uber.org/zap" ) // JavaDependencyAnalyzer implements DependencyAnalyzer for Go projects type JavaDependencyAnalyzer struct { BaseAnalyzer ctx context.Context } type JavaFileParsedInfo struct { FileName string FilePath string Package string DeclaredClasses []string FileContent []byte } func (g *JavaDependencyAnalyzer) IsDAO(filePath string, fileContent string) bool { filePath = strings.ToLower(filePath) if strings.Contains(filePath, "dao") { return true } if strings.Contains(fileContent, "jdbc") || strings.Contains(fileContent, "mysql") { return true } return false } func (g *JavaDependencyAnalyzer) GetFrameworkFromFileContent(fileContent string) string { if strings.Contains(fileContent, "org.hibernate") { return "Hibernate" } if strings.Contains(fileContent, "org.apache.ibatis") { return "MyBatis" } if strings.Contains(fileContent, "java.sql.DriverManager") || strings.Contains(fileContent, "javax.sql.DataSource") { return "JDBC" } if strings.Contains(fileContent, "org.springframework.data.jpa") { return "Spring Data JPA" } return "" } func (j *JavaDependencyAnalyzer) GetExecutionOrder(projectDir string) (map[string]map[string]struct{}, [][]string) { G := j.getDependencyGraph(projectDir) sortedTasks, err := j.TopologicalSort(G) if err != nil { logger.Log.Debug("Graph still has cycles after relaxation. Sorting not possible: ", zap.Error(err)) return nil, nil } logger.Log.Debug("Execution order determined successfully.") return G, sortedTasks } // getDependencyGraph: get dependency graph for java files. There will be not cycle in the graph func (j *JavaDependencyAnalyzer) getDependencyGraph(directory string) map[string]map[string]struct{} { parser := sitter.NewParser() defer parser.Close() parser.SetLanguage(java.GetLanguage()) fileDependenciesMapWithCycles := make(map[string]map[string]struct{}) classToFileInfosMap, fileInfoPathMap, err := fetchedFileClassPackageMap(j.ctx, parser, directory) if err != nil { logger.Log.Error("Error walking the directory while parsing java file for declared classes:", zap.Error(err)) return fileDependenciesMapWithCycles } err = filepath.Walk(directory, func(path string, info os.FileInfo, err error) error { if err != nil { return err } if info.IsDir() || !strings.HasSuffix(path, ".java") { return nil } fileInfo, ok := fileInfoPathMap[path] if !ok { logger.Log.Error("Error fetching file parsed info:", zap.String("path", path)) } fileDependenciesMapWithCycles[path] = make(map[string]struct{}) referencedClassAndPackages, err := fetchClassReferences(j.ctx, parser, fileInfo.FileContent) if err != nil { logger.Log.Error("Error fetching class references:", zap.String("path", path), zap.Error(err)) return nil } for referencedClassIndex := range referencedClassAndPackages { referencedClass := resolveClassDependencies(classToFileInfosMap, fileInfo.Package, referencedClassAndPackages, referencedClassIndex) if referencedClass != "" { fileDependenciesMapWithCycles[path][referencedClass] = struct{}{} } } return nil }) if err != nil { logger.Log.Error("Error walking the directory while parsing java file for declared classes:", zap.Error(err)) return fileDependenciesMapWithCycles } return j.RemoveCycle(fileDependenciesMapWithCycles) } // resolveClassDependencies: within referencedClasses, tries to map the class at referencedClassIndex to the file path. // If the class name is not fully qualified, then the function tries to map the class name to the file path. func resolveClassDependencies(classFileInfoMap map[string][]*JavaFileParsedInfo, sourcePackage string, referencedClasses []string, referencedClassIndex int) string { referencedClass := referencedClasses[referencedClassIndex] parsedFileInfos, ok := classFileInfoMap[referencedClass] if !ok { return "" } isReferencedClassPackage := isPackage(&referencedClass) for _, parsedFileInfo := range parsedFileInfos { if isReferencedClassPackage && strings.HasPrefix(referencedClass, parsedFileInfo.Package) { return parsedFileInfo.FilePath } if isResolvedClassNameEqual(referencedClasses, referencedClassIndex, parsedFileInfo.Package, isReferencedClassPackage) { return parsedFileInfo.FilePath } if parsedFileInfo.Package == sourcePackage { return parsedFileInfo.FilePath } } return "" } func isResolvedClassNameEqual(referencedClasses []string, referencedClassIndex int, targetPackage string, isPackage bool) bool { if isPackage { return false } targetPackageLength := strings.Count(targetPackage, ".") + 1 packageStartIndex := referencedClassIndex - targetPackageLength if packageStartIndex < 0 { return false } targetPackageParts := strings.Split(targetPackage, ".") for i := packageStartIndex; i < referencedClassIndex; i++ { if targetPackageParts[i-packageStartIndex] != referencedClasses[i] { return false } } return true } func isPackage(reference *string) bool { return strings.Contains(*reference, ".") } // fetchedFileClassPackageMap: parses java files within projectDir to fetch declared classes and package name for each // file. The output is structured in 2 format. // map[string]*JavaFileParsedInfo: map of parsed info with File path as key. // map[string][]*JavaFileParsedInfo: map of parsed info with class name as key. Value is a list of files that declared the class. func fetchedFileClassPackageMap(ctx context.Context, parser *sitter.Parser, projectDir string) (map[string][]*JavaFileParsedInfo, map[string]*JavaFileParsedInfo, error) { fileParsedInfo := make([]*JavaFileParsedInfo, 0, 10) fileClassPackageMap := make(map[string][]*JavaFileParsedInfo) fileInfoPathMap := make(map[string]*JavaFileParsedInfo) err := filepath.Walk(projectDir, func(path string, info os.FileInfo, err error) error { if err != nil { return err } if !info.IsDir() && strings.HasSuffix(path, ".java") { parsedInfo, err := fetchFileParsedInfo(ctx, parser, path, projectDir) if err != nil { logger.Log.Error("Error fetching file parsed info:", zap.String("path", path), zap.Error(err)) } else { fileParsedInfo = append(fileParsedInfo, parsedInfo) fileInfoPathMap[path] = parsedInfo for _, className := range parsedInfo.DeclaredClasses { if fileInfos, ok := fileClassPackageMap[className]; ok { fileClassPackageMap[className] = append(fileInfos, parsedInfo) } else { fileClassPackageMap[className] = []*JavaFileParsedInfo{parsedInfo} } classPath := fmt.Sprintf("%s.%s", parsedInfo.Package, className) if fileInfos, ok := fileClassPackageMap[classPath]; ok { fileClassPackageMap[classPath] = append(fileInfos, parsedInfo) } else { fileClassPackageMap[classPath] = []*JavaFileParsedInfo{parsedInfo} } } } } return nil }) if err != nil { return nil, nil, err } return fileClassPackageMap, fileInfoPathMap, nil } // fetchFileParsedInfo: parses java file and returns parsed info containing file name, file path, package name, declared classes, and file content. func fetchFileParsedInfo(ctx context.Context, parser *sitter.Parser, filePath string, projectDir string) (*JavaFileParsedInfo, error) { content, err := os.ReadFile(filePath) if err != nil { return nil, err } tree, err := parser.ParseCtx(ctx, nil, content) if err != nil { return nil, err } defer tree.Close() rootNode := tree.RootNode() packageName, err := fetchPackageName(rootNode, content) if err != nil { return nil, err } declaredClasses, err := fetchClassDeclaration(rootNode, content) if err != nil { return nil, err } return &JavaFileParsedInfo{ FileName: filepath.Base(filePath), FilePath: filePath, Package: packageName, DeclaredClasses: declaredClasses, FileContent: content, }, nil } // fetchClassReferences: parses java file and returns a list of class references. This excludes the classes which are // referred using fully qualified package name. func fetchClassReferences(ctx context.Context, parser *sitter.Parser, content []byte) ([]string, error) { tree, err := parser.ParseCtx(ctx, nil, content) if err != nil { return nil, err } defer tree.Close() rootNode := tree.RootNode() query, err := sitter.NewQuery([]byte(` ( (type_identifier) @class_ref (#not-match? @class_ref "^(void|int|double|float|boolean|char|byte|short|long)$") ) ( (scoped_identifier) @class_ref ) `), java.GetLanguage()) if err != nil { return nil, err } defer query.Close() qc := sitter.NewQueryCursor() defer qc.Close() qc.Exec(query, rootNode) var references []string for { match, found := qc.NextMatch() if !found { break } for _, capture := range match.Captures { references = append(references, capture.Node.Content(content)) } } return references, nil } // fetchPackageName: parses java file and returns the package name of the file. func fetchPackageName(rootNode *sitter.Node, content []byte) (string, error) { packageQuery, err := sitter.NewQuery([]byte(`(package_declaration (scoped_identifier) @package)`), java.GetLanguage()) if err != nil { return "", err } defer packageQuery.Close() packageCursor := sitter.NewQueryCursor() defer packageCursor.Close() packageCursor.Exec(packageQuery, rootNode) packageName := "" if match, found := packageCursor.NextMatch(); found { for _, capture := range match.Captures { return capture.Node.Content(content), nil } } return packageName, nil } // fetchClassDeclaration: parses java file and returns a list of declared classes and interface in the file. func fetchClassDeclaration(rootNode *sitter.Node, content []byte) ([]string, error) { classQuery, err := sitter.NewQuery([]byte(` (class_declaration (identifier) @class) (interface_declaration (identifier) @interface_name) `), java.GetLanguage()) if err != nil { return nil, err } defer classQuery.Close() classCursor := sitter.NewQueryCursor() defer classCursor.Close() classCursor.Exec(classQuery, rootNode) var classNames []string for { match, found := classCursor.NextMatch() if !found { break } for _, capture := range match.Captures { classNames = append(classNames, capture.Node.Content(content)) } } return classNames, nil }