lib/translator/dbgap.go (299 lines of code) (raw):
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package translator
import (
"bytes"
"context"
"crypto/rsa"
"crypto/x509"
"encoding/pem"
"fmt"
"net/http"
"regexp"
"strings"
"time"
"github.com/go-jose/go-jose/v3/jwt" /* copybara-comment */
"github.com/coreos/go-oidc" /* copybara-comment */
"github.com/GoogleCloudPlatform/healthcare-federated-access-services/lib/ga4gh" /* copybara-comment: ga4gh */
"github.com/GoogleCloudPlatform/healthcare-federated-access-services/lib/kms" /* copybara-comment: kms */
"github.com/GoogleCloudPlatform/healthcare-federated-access-services/lib/strutil" /* copybara-comment: strutil */
)
const (
// TODO: Update the issuer address once NCBI stands up their own OIDC endpoint.
dbGapIssuer = "https://dbgap.nlm.nih.gov/aa"
dbGapOrgURL = "https://orgs.nih.gov/orgs/"
dbGapUserInfoURL = "https://dbgap.ncbi.nlm.nih.gov/aa/jwt/user_info.cgi?${TOKEN}"
dbGapPassportURL = "https://dbgap.ncbi.nlm.nih.gov/aa/jwt/user_passport.cgi?${TOKEN}"
eraCommonsAuthority = "eRA"
visaScope = "openid"
fixedKeyID = "kid"
)
// DbGapTranslator is a ga4gh.Translator that converts dbGap identities into GA4GH identities.
type DbGapTranslator struct {
publicKey *rsa.PublicKey
visaIssuer string
visaJKU string
signer kms.Signer
}
type dbGapStudy struct {
Accession *string `json:"accession"`
}
type dbGapAccess struct {
Study dbGapStudy `json:"study"`
Expires int64 `json:"expires"`
Issued int64 `json:"issued"`
}
type dbGapPassport struct {
Access []dbGapAccess `json:"access"`
Org *string `json:"org"`
OrgID *string `json:"org_DUNS"`
Role *string `json:"role"`
SO *string `json:"so"`
}
type dbGapIdentity struct {
Authority string `json:"authority"`
ID any `json:"id"`
}
type vCard struct {
Email string `json:"email"`
GivenName string `json:"fname"`
FamilyName string `json:"lname"`
Orgs []string `json:"orgs"`
Roles []string `json:"roles"`
}
type dbGapClaims struct {
DbGapPassport []dbGapPassport `json:"dbgap_passport"`
Identity []dbGapIdentity `json:"identity"`
Vcard vCard `json:"vcard"`
}
// dbGapIdToken mocks OIDC library's idToken implementation, except minor differences in the types of
// Audience, Expiry, and IssuedAt fields to facilitate JSON unmarshalling.
type dbGapIdToken struct {
Issuer string `json:"iss"`
Subject string `json:"sub"`
Audience string `json:"aud"`
Expiry int64 `json:"exp"`
IssuedAt int64 `json:"iat"`
Nonce string `json:"nonce"`
AtHash string `json:"at_hash"`
}
const validSec = 3600 * 24 * 60 // 60 days
var removePunctuation = regexp.MustCompile("[^a-zA-Z0-9 ]+")
func convertToOIDCIDToken(token dbGapIdToken) *oidc.IDToken {
return &oidc.IDToken{
Issuer: token.Issuer,
Subject: token.Subject,
Audience: []string{token.Audience},
Expiry: time.Unix(token.Expiry, 0),
IssuedAt: time.Unix(token.IssuedAt, 0),
Nonce: token.Nonce,
AccessTokenHash: token.AtHash,
}
}
// NewDbGapTranslator creates a new DbGapTranslator with the provided public key. If the tokens
// passed to this translator do not have an audience claim with a value equal to the
// clientID value then they will be rejected.
func NewDbGapTranslator(publicKey, selfIssuer string, signer kms.Signer) (*DbGapTranslator, error) {
if len(selfIssuer) == 0 {
return nil, fmt.Errorf("NewDbGapTranslator failed, selfIssuer or signingPrivateKey is empty")
}
jku := strings.TrimSuffix(selfIssuer, "/") + "/.well-known/jwks.json"
t := &DbGapTranslator{
visaIssuer: selfIssuer,
visaJKU: jku,
signer: signer,
}
block, _ := pem.Decode([]byte(publicKey))
if block == nil {
return t, nil
}
pub, err := x509.ParsePKCS1PublicKey(block.Bytes)
if err != nil {
return nil, fmt.Errorf("parsing public key: %v", err)
}
t.publicKey = pub
return t, nil
}
// TranslateToken implements the ga4gh.Translator interface.
func (s *DbGapTranslator) TranslateToken(ctx context.Context, auth string) (*ga4gh.Identity, error) {
if err := ga4gh.VerifyTokenWithKey(s.publicKey, auth); err != nil {
return nil, fmt.Errorf("verifying user token signature: %v", err)
}
userInfo, err := s.getURL(dbGapUserInfoURL, auth)
if err != nil {
return nil, fmt.Errorf("getting dbGaP user info: %v", err)
}
if err := ga4gh.VerifyTokenWithKey(s.publicKey, userInfo); err != nil {
return nil, fmt.Errorf("verifying user info token signature: %v", err)
}
passport, err := s.getURL(dbGapPassportURL, auth)
if err != nil {
return nil, fmt.Errorf("getting dbGaP passport: %v", err)
}
if err := ga4gh.VerifyTokenWithKey(s.publicKey, passport); err != nil {
return nil, fmt.Errorf("verifying passport token signature: %v", err)
}
var claims dbGapClaims
var id dbGapIdToken
if err := s.extractClaims(auth, &id, &claims); err != nil {
return nil, fmt.Errorf("extracting user claims: %v", err)
}
if err := s.extractClaims(userInfo, &id, &claims); err != nil {
return nil, fmt.Errorf("extracting user info claims: %v", err)
}
if err := s.extractClaims(passport, &id, &claims); err != nil {
return nil, fmt.Errorf("extracting passport claims: %v", err)
}
return s.translateToken(ctx, convertToOIDCIDToken(id), claims, time.Now())
}
func (s *DbGapTranslator) getURL(url, userTok string) (string, error) {
url = strings.Replace(url, "${TOKEN}", userTok, -1)
get, err := http.Get(url)
if err != nil {
return "", err
}
buf := new(bytes.Buffer)
buf.ReadFrom(get.Body)
body := buf.String()
if get.StatusCode < 200 || get.StatusCode > 299 {
return "", fmt.Errorf("http status %d: %v", get.StatusCode, body)
}
return body, nil
}
func (s *DbGapTranslator) extractClaims(tok string, id *dbGapIdToken, claims *dbGapClaims) error {
parsed, err := jwt.ParseSigned(tok)
if err != nil {
return fmt.Errorf("parsing signed token: %v", err)
}
err = parsed.UnsafeClaimsWithoutVerification(id, claims)
if err != nil {
return fmt.Errorf("extracting claims from token: %v", err)
}
return nil
}
func (s *DbGapTranslator) translateToken(ctx context.Context, token *oidc.IDToken, claims dbGapClaims, now time.Time) (*ga4gh.Identity, error) {
id := ga4gh.Identity{
Issuer: token.Issuer,
Subject: token.Subject,
Expiry: token.Expiry.Unix(),
GivenName: claims.Vcard.GivenName,
FamilyName: claims.Vcard.FamilyName,
Name: strutil.JoinNonEmpty([]string{claims.Vcard.GivenName, claims.Vcard.FamilyName}, " "),
Email: claims.Vcard.Email,
VisaJWTs: []string{},
}
for _, ident := range claims.Identity {
if ident.Authority == eraCommonsAuthority {
if username, ok := ident.ID.(string); ok {
id.Username = username
}
}
}
accessions := make(map[string]dbGapAccess)
type source struct {
orgID string
by string
}
affiliations := make(map[string]source)
for _, p := range claims.DbGapPassport {
for _, a := range p.Access {
if a.Study.Accession == nil {
continue
}
// TODO: Verify that the heuristics for de-duplicating access entries is correct.
ac := *a.Study.Accession
exp := a.Expires
if access, ok := accessions[ac]; ok {
// For duplicate accessions, only keep the one with the later expiry timestamp.
if access.Expires > exp {
continue
}
}
accessions[ac] = dbGapAccess{
Expires: exp,
Issued: a.Issued,
}
}
if p.Org == nil || len(*p.Org) == 0 || p.Role == nil || len(*p.Role) == 0 {
continue
}
var r string
if *p.Role == "pi" || *p.Role == "downloader" {
r = "nih.researcher"
} else {
r = "member"
}
o := removePunctuation.ReplaceAllString(*p.Org, "")
o = strings.ReplaceAll(o, " ", "-")
v := r + "@" + o + ".orgs.nih.gov"
// Does not deal with complex cases where multiple org_DUNS attest to the same
// "value" (v) for AffiliationAndRole.
if src, ok := affiliations[v]; !ok || src.by == "self" {
by := "so"
if p.SO == nil || *p.SO == "" {
by = "self"
}
affiliations[v] = source{
orgID: *p.OrgID,
by: by,
}
}
}
currUnixTime := now.Unix()
affiliationAsserted := now.Unix()
for a, val := range accessions {
visa := ga4gh.VisaData{
StdClaims: ga4gh.StdClaims{
Subject: token.Subject,
Issuer: s.visaIssuer,
ExpiresAt: val.Expires,
IssuedAt: val.Issued,
},
Assertion: ga4gh.Assertion{
Type: ga4gh.ControlledAccessGrants,
Value: ga4gh.Value("https://dac.nih.gov/datasets/" + a),
Source: dbGapIssuer,
By: ga4gh.DAC,
Asserted: affiliationAsserted,
},
Scope: visaScope,
}
v, err := ga4gh.NewVisaFromData(ctx, &visa, s.visaJKU, s.signer)
if err != nil {
return nil, fmt.Errorf("sign ControlledAccessGrants claim failed: %s", err)
}
id.VisaJWTs = append(id.VisaJWTs, string(v.JWT()))
// Keep the oldest Issued accession for use as affiliationAsserted.
if val.Issued > 0 && val.Issued < affiliationAsserted {
affiliationAsserted = val.Issued
}
}
for a, src := range affiliations {
// Claim for dbGap
visa := ga4gh.VisaData{
StdClaims: ga4gh.StdClaims{
Issuer: s.visaIssuer,
ExpiresAt: currUnixTime + validSec,
IssuedAt: affiliationAsserted,
},
Assertion: ga4gh.Assertion{
Type: ga4gh.AffiliationAndRole,
Value: ga4gh.Value(a),
Source: dbGapIssuer,
By: ga4gh.System,
Asserted: affiliationAsserted,
},
Scope: visaScope,
}
v, err := ga4gh.NewVisaFromData(ctx, &visa, s.visaJKU, s.signer)
if err != nil {
return nil, fmt.Errorf("sign dbGap ClaimAffiliationAndRole claim failed: %s", err)
}
id.VisaJWTs = append(id.VisaJWTs, string(v.JWT()))
// Claim for org
visa = ga4gh.VisaData{
StdClaims: ga4gh.StdClaims{
Issuer: s.visaIssuer,
ExpiresAt: currUnixTime + validSec,
IssuedAt: affiliationAsserted,
},
Assertion: ga4gh.Assertion{
Type: ga4gh.AffiliationAndRole,
Value: ga4gh.Value(a),
Source: ga4gh.Source(dbGapOrgURL + src.orgID),
By: ga4gh.By(src.by),
Asserted: affiliationAsserted,
},
Scope: visaScope,
}
v, err = ga4gh.NewVisaFromData(ctx, &visa, s.visaJKU, s.signer)
if err != nil {
return nil, fmt.Errorf("sign org ClaimAffiliationAndRole claim failed: %s", err)
}
id.VisaJWTs = append(id.VisaJWTs, string(v.JWT()))
}
return &id, nil
}