Merge files

G

George Mpouras

# I am writing something, so here is a subroutine of it, you may like.
# It simple merge files, a little bit more careful than usual



#!/usr/bin/perl
use strict;
use warnings;


MergeFiles( DIR => 'r:/tmp/dir' , OUTPUTFILE => '/tmp/big.txt') or die
"oups $^E\n";


# Merge all tiles that exist in a directory to a big one.
# It tries to be clever by merging files to the biggest of them.
# Also it prefers the newer files first to help any potential sort later

# MergeFiles( DIR => '/tmp' , OUTPUTFILE => /tmp/big' ) || die;
#
sub MergeFiles
{
my %option = @_;
exists $option{$_} || die "The \"$_\" argument is missing from ".(caller
0)[3]."\n" foreach qw/DIR OUTPUTFILE/;
opendir DIRFORMERGEFILES, $option{'DIR'} or return 0;
my @File;
while (readdir DIRFORMERGEFILES) {
my $node = "$option{'DIR'}/$_";
next unless -f $node;
push @File, [ $node , -s _ , -M _ ] }
closedir DIRFORMERGEFILES;
return 1 if -1 == $#File;
my @FileSorted;

for ( sort { $b->[1] <=> $a->[1] || $a->[2] <=> $b->[2] } @File )
{
push @FileSorted, $_->[0]
}

@File=();

if (scalar @FileSorted > 1)
{
# Put a final new line character at the bigger file we are going to
merge to in case it does not exist
my $data;
open BIGERFILETOMERGE, '<', $FileSorted[0] or return 0;
binmode BIGERFILETOMERGE, ':raw';
seek BIGERFILETOMERGE, -1 , 2;
read BIGERFILETOMERGE, $data , 1;
close BIGERFILETOMERGE;
my $the_bigger_file_a_final_new_line_character = $data eq chr 10 ? 1 : 0;
open BIGERFILETOMERGE, '>>', $FileSorted[0] or return 0;
print BIGERFILETOMERGE "\n" unless
$the_bigger_file_a_final_new_line_character;

for (my $i=1; $i < @FileSorted; $i++) {
open MERGETHISFILE, '<', $FileSorted[$i] or return 0;
while (<MERGETHISFILE>) { next if /^\s*$/; chomp; s/\s*$//; print
BIGERFILETOMERGE "$_\n" }
close MERGETHISFILE;
unlink $FileSorted[$i] || die "Could not delete file
\"$FileSorted[$i]\" because \"$^E\"\n" }
close BIGERFILETOMERGE;
}

unless ($FileSorted[0] eq $option{'OUTPUTFILE'}) {
rename($FileSorted[0], $option{OUTPUTFILE}) or die "Could not rename
file \"$FileSorted[0]\" to \"$option{OUTPUTFILE}\" because \"$^E\"\n" }
return 1
}
 
G

George Mpouras

a little bug correction









# Merge all tiles that exist in a directory to a big one.
# It tries to be clever by merging files to the biggest of them
# Also it prefers the newer files first to help any potential sort later

# MergeFiles( DIR => '/tmp' , OUTPUTFILE => /tmp/big' ) || die;
#
sub MergeFiles
{
my %option = @_;
exists $option{$_} || die "The \"$_\" argument is missing from ".(caller
0)[3]."\n" foreach qw/DIR OUTPUTFILE/;
opendir DIRFORMERGEFILES, $option{'DIR'} or return 0;
my @File;
while (my $name = readdir DIRFORMERGEFILES) {
my $path = "$option{'DIR'}/$name";
next unless -f $path;
push @File, [ $path , -s _ , -M _ ] }
closedir DIRFORMERGEFILES;
return 1 if -1 == $#File;
my @FileSorted;

for ( sort { $b->[1] <=> $a->[1] || $a->[2] <=> $b->[2] } @File )
{
push @FileSorted, $_->[0]
}

@File=();

if (scalar @FileSorted > 1)
{
# Put a final new line character at the bigger file we are going to
merge to in case it does not exist
my $data;
open BIGERFILETOMERGE, '<', $FileSorted[0] or return 0;
binmode BIGERFILETOMERGE, ':raw';
seek BIGERFILETOMERGE, -1 , 2;
read BIGERFILETOMERGE, $data , 1;
close BIGERFILETOMERGE;
my $the_bigger_file_a_final_new_line_character = $data eq chr 10 ? 1 : 0;
open BIGERFILETOMERGE, '>>', $FileSorted[0] or return 0;
print BIGERFILETOMERGE "\n" unless
$the_bigger_file_a_final_new_line_character;

for (my $i=1; $i < @FileSorted; $i++) {
open MERGETHISFILE, '<', $FileSorted[$i] or return 0;
while (<MERGETHISFILE>) { next if /^\s*$/; chomp; s/\s*$//; print
BIGERFILETOMERGE "$_\n" }
close MERGETHISFILE;
unlink $FileSorted[$i] || die "Could not delete file
\"$FileSorted[$i]\" because \"$^E\"\n" }
close BIGERFILETOMERGE;
}

unless ($FileSorted[0] eq $option{'OUTPUTFILE'}) {
rename($FileSorted[0], $option{OUTPUTFILE}) or die "Could not rename
file \"$FileSorted[0]\" to \"$option{OUTPUTFILE}\" because \"$^E\"\n" }
return 1
}
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
473,968
Messages
2,570,150
Members
46,696
Latest member
BarbraOLog

Latest Threads

Top