lib/mime.go (196 lines of code) (raw):
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package lib
import (
"archive/zip"
"bufio"
"bytes"
"encoding/csv"
"encoding/json"
"io"
"os"
"github.com/google/cel-go/cel"
"github.com/google/cel-go/common/types"
"github.com/google/cel-go/common/types/ref"
)
// MIME returns a cel.EnvOption to configure extended functions for reading files.
// It takes a mapping of mimetypes to transforms to allow interpreting specific mime
// type. The values in the map must be one of: func([]byte), func(io.Reader) io.Reader,
// func(io.Reader) (io.Reader, error) or func(io.Reader) ref.Val. If the
// transform is func([]byte) it is expected to mutate the bytes in place.
//
// # MIME
//
// mime returns <dyn> interpreted through the registered MIME type:
//
// <bytes>.mime(<string>) -> <dyn>
//
// Examples:
//
// string(b"hello world!".mime("text/rot13")) // return "uryyb jbeyq!"
// string(b"hello world!".mime("text/upper")) // return "HELLO WORLD!"
// string(b"\x1f\x8b\b\x00\x00\x00\x00\x00\x00\xff\xcaH\xcd\xc9\xc9W(\xcf/\xcaIQ\x04\x04\x00\x00\xff\xffm´\x03\f\x00\x00\x00"
// .mime("application/gzip")) // return "hello world!"
//
// See also File and NDJSON.
func MIME(mimetypes map[string]interface{}) cel.EnvOption {
return cel.Lib(mimeLib{transforms: mimetypes})
}
type mimeLib struct {
transforms map[string]interface{}
}
func (l mimeLib) CompileOptions() []cel.EnvOption {
return []cel.EnvOption{
cel.Function("mime",
cel.MemberOverload(
"bytes_mime_string",
[]*cel.Type{cel.BytesType, cel.StringType},
cel.DynType,
cel.BinaryBinding(catch(l.transformMIME)),
),
),
}
}
func (mimeLib) ProgramOptions() []cel.ProgramOption { return nil }
func (l mimeLib) transformMIME(arg0, arg1 ref.Val) ref.Val {
input, ok := arg0.(types.Bytes)
if !ok {
return types.ValOrErr(input, "no such overload for file path: %s", arg0.Type())
}
mimetype, ok := arg1.(types.String)
if !ok {
return types.ValOrErr(mimetype, "no such overload for mime type: %s", arg1.Type())
}
transform, ok := l.transforms[string(mimetype)]
if !ok {
return types.NewErr("unknown transform: %q", mimetype)
}
switch transform := transform.(type) {
case func([]byte):
c := make([]byte, len(input))
copy(c, input)
transform(c)
return types.Bytes(c)
case func(io.Reader) io.Reader:
var buf bytes.Buffer
_, err := io.Copy(&buf, transform(bytes.NewReader(input)))
if err != nil {
return types.NewErr("file: %v", err)
}
return types.Bytes(buf.Bytes())
case func(io.Reader) (io.Reader, error):
var buf bytes.Buffer
r, err := transform(bytes.NewReader(input))
if err != nil {
return types.NewErr("file: %v", err)
}
_, err = io.Copy(&buf, r)
if err != nil {
return types.NewErr("file: %v", err)
}
return types.Bytes(buf.Bytes())
case func(io.Reader) ref.Val:
return transform(bytes.NewReader(input))
}
return types.NewErr("invalid transform: %T", transform)
}
type transformReader struct {
r io.Reader
transform func([]byte)
}
func (t transformReader) Read(p []byte) (int, error) {
n, err := t.r.Read(p)
t.transform(p[:n])
return n, err
}
// CSVHeader provides a file transform that returns a <list<map<string,string>>> from an
// io.Reader holding text/csv data. It should be handed to the File or MIME
// lib with
//
// File(map[string]interface{}{
// "text/csv; header=present": lib.CSVHeader,
// })
//
// or
//
// MIME(map[string]interface{}{
// "text/csv; header=present": lib.CSVHeader,
// })
//
// It will then be able to be used in a file or mime call.
//
// Example:
//
// Given a file hello.csv:
// "first","second","third"
// 1,2,3
//
// file('hello.csv', 'text/csv; header=present')
//
// will return:
//
// [{"first": "1", "second": "2", "third": "3"}]
func CSVHeader(r io.Reader) ref.Val {
var vals []map[string]string
cr := csv.NewReader(r)
var h []string
for i := 0; ; i++ {
rec, err := cr.Read()
if err != nil {
if err == io.EOF {
break
}
return types.NewErr("csv: %v", err)
}
if i == 0 {
h = rec
continue
}
v := make(map[string]string, len(h))
for j, n := range h {
v[n] = rec[j]
}
vals = append(vals, v)
}
return types.NewDynamicList(types.DefaultTypeAdapter, vals)
}
// CSVNoHeader provides a file transform that returns a <list<list<string>>> from an
// io.Reader holding text/csv data. It should be handed to the File or MIME
// lib with
//
// File(map[string]interface{}{
// "text/csv; header=absent": lib.CSVNoHeader,
// })
//
// or
//
// MIME(map[string]interface{}{
// "text/csv; header=absent": lib.CSVNoHeader,
// })
//
// It will then be able to be used in a file or mime call.
//
// Example:
//
// Given a file hello.csv:
// "first","second","third"
// 1,2,3
//
// file('hello.csv', 'text/csv; header=absent')
//
// will return:
//
// [["first", "second", "third"], ["1", "2", "3"]]
func CSVNoHeader(r io.Reader) ref.Val {
vals, err := csv.NewReader(r).ReadAll()
if err != nil {
return types.NewErr("csv: %v", err)
}
return types.NewDynamicList(types.DefaultTypeAdapter, vals)
}
// NDJSON provides a file transform that returns a <list<dyn>> from an
// io.Reader holding ND-JSON data. It should be handed to the File or MIME
// lib with
//
// File(map[string]interface{}{
// "application/x-ndjson": lib.NDJSON,
// })
//
// or
//
// MIME(map[string]interface{}{
// "application/x-ndjson": lib.NDJSON,
// })
//
// It will then be able to be used in a file or mime call.
//
// Example:
//
// Given a file hello.ndjson:
// {"message":"hello"}
// {"message":"world"}
//
// file('hello.ndjson', 'application/x-ndjson')
//
// will return:
//
// [
// {
// "message": "hello"
// },
// {
// "message": "world"
// }
// ]
//
// Messages in the ND-JSON stream that are invalid will be added to the list
// as CEL errors and will need to be processed using the try function.
//
// Example:
//
// Given a file hello.ndjson:
// {"message":"hello"}
// {"message":"oops"
// {"message":"world"}
//
// file('hello.ndjson', 'application/x-ndjson').map(e, try(e, "error.message"))
//
// will return:
//
// [
// {
// "message": "hello"
// },
// {
// "error.message": "unexpected end of JSON input: {\"message\":\"oops\""
// },
// {
// "message": "world"
// }
// ]
func NDJSON(r io.Reader) ref.Val {
// This is not real ndjson since it doesn't have the
// stupid requirement for newline line termination.
var vals []interface{}
sc := bufio.NewScanner(r)
for sc.Scan() {
if len(bytes.TrimSpace(sc.Bytes())) == 0 {
continue
}
var v interface{}
err := json.Unmarshal(sc.Bytes(), &v)
if err != nil {
vals = append(vals, types.NewErr("%v: %s", err, sc.Bytes()))
continue
}
vals = append(vals, v)
}
err := sc.Err()
if err != nil {
return types.NewErr("ndjson: %v", err)
}
return types.NewDynamicList(types.DefaultTypeAdapter, vals)
}
// Zip provides a file transform that returns a <map<dyn>> from an io.Reader
// holding a zip archive data. It should be handed to the File or MIME lib with
//
// File(map[string]interface{}{
// "application/zip": lib.Zip,
// })
//
// or
//
// MIME(map[string]interface{}{
// "application/zip": lib.Zip,
// })
//
// It will then be able to be used in a file or mime call.
//
// The returned map reflects the structure of the Go zip.Reader struct.
//
// Example:
//
// file('hello.zip', 'application/zip')
//
// might return:
//
// {
// "Comment": "hello zip file"
// "File": [
// {
// "CRC32": 0,
// "Comment": "",
// "Data": "",
// "Extra": "VVQFAAMCCFhidXgLAAEE6AMAAAToAwAA",
// "IsDir": true,
// "Modified": "2022-04-14T21:09:46+09:30",
// "Name": "subdir/",
// "NonUTF8": false,
// "Size": 0
// },
// {
// "CRC32": 30912436,
// "Comment": "",
// "Data": "aGVsbG8gd29ybGQhCg==",
// "Extra": "VVQFAAP0B1hidXgLAAEE6AMAAAToAwAA",
// "IsDir": false,
// "Modified": "2022-04-14T21:09:32+09:30",
// "Name": "subdir/a.txt",
// "NonUTF8": false,
// "Size": 13
// }
// ]
// }
//
// Note that the entire contents of the zip file is expanded into memory.
func Zip(r io.Reader) ref.Val {
var z *zip.Reader
switch r := r.(type) {
case *os.File:
fi, err := r.Stat()
if err != nil {
return types.NewErr("zip: %s", err)
}
z, err = zip.NewReader(r, fi.Size())
if err != nil {
return types.NewErr("zip: %s", err)
}
default:
var buf bytes.Buffer
_, err := io.Copy(&buf, r)
if err != nil {
return types.NewErr("zip: %s", err)
}
br := bytes.NewReader(buf.Bytes())
z, err = zip.NewReader(br, br.Size())
if err != nil {
return types.NewErr("zip: %s", err)
}
}
return expandZip(z)
}
func expandZip(z *zip.Reader) ref.Val {
var files []map[string]interface{}
for _, f := range z.File {
rc, err := f.Open()
if err != nil {
return types.NewErr("zip: %s", err)
}
var buf bytes.Buffer
_, err = io.Copy(&buf, rc)
if err != nil {
return types.NewErr("zip: %s", err)
}
err = rc.Close()
if err != nil {
return types.NewErr("zip: %s", err)
}
fh := f.FileHeader
fi := fh.FileInfo()
files = append(files, map[string]interface{}{
"Name": fh.Name,
"Comment": fh.Comment,
"IsDir": fi.IsDir(),
"Size": fi.Size(),
"NonUTF8": fh.NonUTF8,
"Modified": fh.Modified,
"CRC32": fh.CRC32,
"Extra": fh.Extra,
"Data": buf.Bytes(),
})
}
return types.DefaultTypeAdapter.NativeToValue(map[string]interface{}{
"File": files,
"Comment": z.Comment,
})
}