#!/usr/bin/perl

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#
#  Heuristically converts line endings to the current OS's preferred format
#  
#  All existing line endings must be identical (e.g. lf's only, or even
#  the accedental cr.cr.lf sequence.)  If some lines end lf, and others as
#  cr.lf, the file is presumed binary.  If the cr character appears anywhere
#  except prefixed to an lf, the file is presumed binary.  If there is no 
#  change in the resulting file size, or the file is binary, the conversion 
#  is discarded.
#  
#  Todo: Handle NULL stdin characters gracefully.
#

use strict;

use IO::File;
use File::Find;

# The ignore list is '-' seperated, with this leading hyphen and
# trailing hyphens in ever concatinated list below.
my $ignore = "-";

# Image formats
$ignore .= "gif-jpg-jpeg-png-ico-bmp-";

# Archive formats
$ignore .= "tar-gz-z-zip-jar-war-bz2-tgz-";

# Many document formats
$ignore .= "eps-psd-pdf-ai-";

# Some encodings
$ignore .= "ucs2-ucs4-";

# Some binary objects
$ignore .= "class-so-dll-exe-obj-a-o-lo-slo-sl-dylib-";

# Some build env files
$ignore .= "mcp-xdc-ncb-opt-pdb-ilk-sbr-";

my $ignorepat = '';

my $preservedate = 1;

my $forceending = 0;

my $givenpaths = 0;

my $notnative = 0;

while (defined $ARGV[0]) {
    if ($ARGV[0] eq '--touch') {
        $preservedate = 0;
    }
    elsif ($ARGV[0] eq '--nocr') {
        $notnative = -1;
    }
    elsif ($ARGV[0] eq '--cr') {
        $notnative = 1;
    }
    elsif ($ARGV[0] eq '--force') {
        $forceending = 1;
    }
    elsif ($ARGV[0] eq '--FORCE') {
        $forceending = 2;
    }
    elsif ($ARGV[0] eq '--ignore' && $#ARGV >= 1) {
        shift;
        $ignorepat = $ARGV[0];
    }
    elsif ($ARGV[0] =~ m/^-/) {
        die "What is " . $ARGV[0] . " supposed to mean?\n\n" 
	  . "Syntax:\t$0 [option()s] [path(s)]\n\n" . <<'OUTCH'
Where:	paths specifies the top level directory to convert (default of '.')
	options are;

	  --cr          keep/add one ^M
	  --nocr        remove ^M's
	  --touch       the datestamp (default: keeps date/attribs)
	  --force       mismatched corrections (unbalanced ^M's)
	  --FORCE       all files regardless of file name!
	  --ignore PAT  Do not convert files with full name matching regexp PAT

OUTCH
    }
    else {
        find(\&totxt, $ARGV[0]);
        print "scanned " . $ARGV[0] . "\n";
        $givenpaths = 1;
    }
    shift @ARGV;
}

if (!$givenpaths) {
    find(\&totxt, '.');
    print "did .\n";
}

sub totxt {
        my $oname = $_;
        my $tname = '.#' . $_;
        if (!-f) {
            return;
        }
        my @exts = split /\./;
        if ($forceending < 2) {
            if ($ignorepat ne '' && $File::Find::name =~ /$ignorepat/o) {
                return;
            }
            my $ext;
            while ($#exts && ($ext = pop(@exts))) {
                if ($ignore =~ m|-$ext-|i) {
                    return;
                }
            }
        }
        my @ostat = stat($oname);
        my $srcfl = new IO::File $oname, "r" or die;
        my $dstfl = new IO::File $tname, "w" or die;
        my ($t, $n);
        binmode $srcfl; 
        if ($notnative) {
            binmode $dstfl;
        } 
        undef $t;
        while (<$srcfl>) { 
            if (s/(\r*)\n$/\n/) {
                $n = length $1;
                if (!defined $t) { 
                    $t = $n; 
                }
                if (!$forceending && (($n != $t) || m/\r/)) {
                    print "mismatch in " .$oname. ":" .$n. " expected " .$t. "\n";
                    undef $t;
                    last;
                }
                elsif ($notnative > 0) {
                    s/\n$/\r\n/; 
                }
            }
            print $dstfl $_; 
        }
        if (defined $t && (tell $srcfl == tell $dstfl)) {
            undef $t;
        }
        undef $srcfl;
        undef $dstfl;
        if (defined $t) {
            unlink $oname or die;
            rename $tname, $oname or die;
            my @anames = ($oname);
            if ($preservedate) {
                utime $ostat[9], $ostat[9], @anames;
            }
            chmod $ostat[2] & 07777, @anames;
            chown $ostat[5], $ostat[6], @anames;
            print "Converted file " . $oname . " to text in " . $File::Find::dir . "\n"; 
        }
        else {
            unlink $tname or die;
        }
}
