G
George Mpouras
# I am writing something, so here is a subroutine of it, you may like.
# It simple merge files, a little bit more careful than usual
#!/usr/bin/perl
use strict;
use warnings;
MergeFiles( DIR => 'r:/tmp/dir' , OUTPUTFILE => '/tmp/big.txt') or die
"oups $^E\n";
# Merge all tiles that exist in a directory to a big one.
# It tries to be clever by merging files to the biggest of them.
# Also it prefers the newer files first to help any potential sort later
# MergeFiles( DIR => '/tmp' , OUTPUTFILE => /tmp/big' ) || die;
#
sub MergeFiles
{
my %option = @_;
exists $option{$_} || die "The \"$_\" argument is missing from ".(caller
0)[3]."\n" foreach qw/DIR OUTPUTFILE/;
opendir DIRFORMERGEFILES, $option{'DIR'} or return 0;
my @File;
while (readdir DIRFORMERGEFILES) {
my $node = "$option{'DIR'}/$_";
next unless -f $node;
push @File, [ $node , -s _ , -M _ ] }
closedir DIRFORMERGEFILES;
return 1 if -1 == $#File;
my @FileSorted;
for ( sort { $b->[1] <=> $a->[1] || $a->[2] <=> $b->[2] } @File )
{
push @FileSorted, $_->[0]
}
@File=();
if (scalar @FileSorted > 1)
{
# Put a final new line character at the bigger file we are going to
merge to in case it does not exist
my $data;
open BIGERFILETOMERGE, '<', $FileSorted[0] or return 0;
binmode BIGERFILETOMERGE, ':raw';
seek BIGERFILETOMERGE, -1 , 2;
read BIGERFILETOMERGE, $data , 1;
close BIGERFILETOMERGE;
my $the_bigger_file_a_final_new_line_character = $data eq chr 10 ? 1 : 0;
open BIGERFILETOMERGE, '>>', $FileSorted[0] or return 0;
print BIGERFILETOMERGE "\n" unless
$the_bigger_file_a_final_new_line_character;
for (my $i=1; $i < @FileSorted; $i++) {
open MERGETHISFILE, '<', $FileSorted[$i] or return 0;
while (<MERGETHISFILE>) { next if /^\s*$/; chomp; s/\s*$//; print
BIGERFILETOMERGE "$_\n" }
close MERGETHISFILE;
unlink $FileSorted[$i] || die "Could not delete file
\"$FileSorted[$i]\" because \"$^E\"\n" }
close BIGERFILETOMERGE;
}
unless ($FileSorted[0] eq $option{'OUTPUTFILE'}) {
rename($FileSorted[0], $option{OUTPUTFILE}) or die "Could not rename
file \"$FileSorted[0]\" to \"$option{OUTPUTFILE}\" because \"$^E\"\n" }
return 1
}
# It simple merge files, a little bit more careful than usual
#!/usr/bin/perl
use strict;
use warnings;
MergeFiles( DIR => 'r:/tmp/dir' , OUTPUTFILE => '/tmp/big.txt') or die
"oups $^E\n";
# Merge all tiles that exist in a directory to a big one.
# It tries to be clever by merging files to the biggest of them.
# Also it prefers the newer files first to help any potential sort later
# MergeFiles( DIR => '/tmp' , OUTPUTFILE => /tmp/big' ) || die;
#
sub MergeFiles
{
my %option = @_;
exists $option{$_} || die "The \"$_\" argument is missing from ".(caller
0)[3]."\n" foreach qw/DIR OUTPUTFILE/;
opendir DIRFORMERGEFILES, $option{'DIR'} or return 0;
my @File;
while (readdir DIRFORMERGEFILES) {
my $node = "$option{'DIR'}/$_";
next unless -f $node;
push @File, [ $node , -s _ , -M _ ] }
closedir DIRFORMERGEFILES;
return 1 if -1 == $#File;
my @FileSorted;
for ( sort { $b->[1] <=> $a->[1] || $a->[2] <=> $b->[2] } @File )
{
push @FileSorted, $_->[0]
}
@File=();
if (scalar @FileSorted > 1)
{
# Put a final new line character at the bigger file we are going to
merge to in case it does not exist
my $data;
open BIGERFILETOMERGE, '<', $FileSorted[0] or return 0;
binmode BIGERFILETOMERGE, ':raw';
seek BIGERFILETOMERGE, -1 , 2;
read BIGERFILETOMERGE, $data , 1;
close BIGERFILETOMERGE;
my $the_bigger_file_a_final_new_line_character = $data eq chr 10 ? 1 : 0;
open BIGERFILETOMERGE, '>>', $FileSorted[0] or return 0;
print BIGERFILETOMERGE "\n" unless
$the_bigger_file_a_final_new_line_character;
for (my $i=1; $i < @FileSorted; $i++) {
open MERGETHISFILE, '<', $FileSorted[$i] or return 0;
while (<MERGETHISFILE>) { next if /^\s*$/; chomp; s/\s*$//; print
BIGERFILETOMERGE "$_\n" }
close MERGETHISFILE;
unlink $FileSorted[$i] || die "Could not delete file
\"$FileSorted[$i]\" because \"$^E\"\n" }
close BIGERFILETOMERGE;
}
unless ($FileSorted[0] eq $option{'OUTPUTFILE'}) {
rename($FileSorted[0], $option{OUTPUTFILE}) or die "Could not rename
file \"$FileSorted[0]\" to \"$option{OUTPUTFILE}\" because \"$^E\"\n" }
return 1
}