backend/plugins/github_graphql/tasks/job_collector.go (170 lines of code) (raw):
/*
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package tasks
import (
"encoding/json"
"reflect"
"time"
"github.com/apache/incubator-devlake/core/dal"
"github.com/apache/incubator-devlake/core/errors"
"github.com/apache/incubator-devlake/core/plugin"
helper "github.com/apache/incubator-devlake/helpers/pluginhelper/api"
"github.com/apache/incubator-devlake/plugins/github/models"
githubTasks "github.com/apache/incubator-devlake/plugins/github/tasks"
"github.com/merico-dev/graphql"
)
const RAW_GRAPHQL_JOBS_TABLE = "github_graphql_jobs"
type GraphqlQueryCheckRunWrapper struct {
RateLimit struct {
Cost int
}
Node []GraphqlQueryCheckSuite `graphql:"node(id: $id)" graphql-extend:"true"`
}
type GraphqlQueryCheckSuite struct {
Id string
Typename string `graphql:"__typename"`
// equal to Run in rest
CheckSuite struct {
WorkflowRun struct {
DatabaseId int
}
// equal to Job in rest
CheckRuns struct {
TotalCount int
PageInfo struct {
EndCursor string `graphql:"endCursor"`
HasNextPage bool `graphql:"hasNextPage"`
}
Nodes []GraphqlQueryCheckRun
} `graphql:"checkRuns(first: $pageSize, after: $skipCursor)"`
} `graphql:"... on CheckSuite"`
}
type GraphqlQueryCheckRun struct {
Id string
Name string
DetailsUrl string
DatabaseId int
Status string
StartedAt *time.Time
Conclusion string
CompletedAt *time.Time
//ExternalId string
//Url string
//Title interface{}
//Text interface{}
//Summary interface{}
Steps struct {
TotalCount int
Nodes []struct {
CompletedAt *time.Time `json:"completed_at"`
Conclusion string `json:"conclusion"`
Name string `json:"name"`
Number int `json:"number"`
SecondsToCompletion int `json:"seconds_to_completion"`
StartedAt *time.Time `json:"started_at"`
Status string `json:"status"`
}
} `graphql:"steps(first: 50)"`
}
type SimpleWorkflowRun struct {
CheckSuiteNodeID string
}
var CollectJobsMeta = plugin.SubTaskMeta{
Name: "Collect Job Runs",
EntryPoint: CollectJobs,
EnabledByDefault: true,
Description: "Collect Jobs(CheckRun) data from GithubGraphql api, supports both timeFilter and diffSync.",
DomainTypes: []string{plugin.DOMAIN_TYPE_CICD},
}
var _ plugin.SubTaskEntryPoint = CollectJobs
func getPageInfo(query interface{}, args *helper.GraphqlCollectorArgs) (*helper.GraphqlQueryPageInfo, error) {
queryWrapper := query.(*GraphqlQueryCheckRunWrapper)
hasNextPage := false
endCursor := ""
for _, node := range queryWrapper.Node {
if node.CheckSuite.CheckRuns.PageInfo.HasNextPage {
hasNextPage = true
endCursor = node.CheckSuite.CheckRuns.PageInfo.EndCursor
break
}
}
return &helper.GraphqlQueryPageInfo{
EndCursor: endCursor,
HasNextPage: hasNextPage,
}, nil
}
func buildQuery(reqData *helper.GraphqlRequestData) (interface{}, map[string]interface{}, error) {
query := &GraphqlQueryCheckRunWrapper{}
if reqData == nil {
return query, map[string]interface{}{}, nil
}
workflowRuns := reqData.Input.([]interface{})
checkSuiteIds := []map[string]interface{}{}
for _, iWorkflowRuns := range workflowRuns {
workflowRun := iWorkflowRuns.(*SimpleWorkflowRun)
checkSuiteIds = append(checkSuiteIds, map[string]interface{}{
`id`: graphql.ID(workflowRun.CheckSuiteNodeID),
})
}
variables := map[string]interface{}{
"node": checkSuiteIds,
"pageSize": graphql.Int(reqData.Pager.Size),
"skipCursor": (*graphql.String)(reqData.Pager.SkipCursor),
}
return query, variables, nil
}
func CollectJobs(taskCtx plugin.SubTaskContext) errors.Error {
db := taskCtx.GetDal()
data := taskCtx.GetData().(*githubTasks.GithubTaskData)
apiCollector, err := helper.NewStatefulApiCollector(helper.RawDataSubTaskArgs{
Ctx: taskCtx,
Params: githubTasks.GithubApiParams{
ConnectionId: data.Options.ConnectionId,
Name: data.Options.Name,
},
Table: RAW_GRAPHQL_JOBS_TABLE,
})
if err != nil {
return err
}
clauses := []dal.Clause{
dal.Select("check_suite_node_id"),
dal.From(models.GithubRun{}.TableName()),
dal.Where("repo_id = ? and connection_id=?", data.Options.GithubId, data.Options.ConnectionId),
dal.Orderby("github_updated_at DESC"),
}
if apiCollector.IsIncremental() && apiCollector.GetSince() != nil {
clauses = append(clauses, dal.Where("github_updated_at > ?", *apiCollector.GetSince()))
}
cursor, err := db.Cursor(
clauses...,
)
if err != nil {
return err
}
defer cursor.Close()
iterator, err := helper.NewDalCursorIterator(db, cursor, reflect.TypeOf(SimpleWorkflowRun{}))
if err != nil {
return err
}
err = apiCollector.InitGraphQLCollector(helper.GraphqlCollectorArgs{
Input: iterator,
InputStep: 10,
GraphqlClient: data.GraphqlClient,
BuildQuery: buildQuery,
GetPageInfo: getPageInfo,
ResponseParser: func(queryWrapper any) (messages []json.RawMessage, err errors.Error) {
query := queryWrapper.(*GraphqlQueryCheckRunWrapper)
for _, node := range query.Node {
for _, checkRun := range node.CheckSuite.CheckRuns.Nodes {
updatedAt := checkRun.StartedAt
if checkRun.CompletedAt != nil {
updatedAt = checkRun.CompletedAt
}
if apiCollector.GetSince() != nil && !apiCollector.GetSince().Before(*updatedAt) {
return messages, helper.ErrFinishCollect
}
messages = append(messages, errors.Must1(json.Marshal(checkRun)))
}
}
return
},
IgnoreQueryErrors: true,
PageSize: 20,
})
if err != nil {
return err
}
return apiCollector.Execute()
}