Perl Conditions - string

Trying to iterate through two files. Everything works although once I get to the negation of my if statement it messes everything up. The only thing that will print is the else statement
Please disregard any unused variables, when defined. Will clean it up after.
#!/usr/bin/perl
#
# Packages and modules
#
use strict;
use warnings;
use version; our $VERSION = qv('5.16.0'); # This is the version of Perl to be used
use Text::CSV 1.32; # We will be using the CSV module (version 1.32 or higher)
# to parse each line
#
# readFile.pl
# Authors: schow04#mail.uoguelph + anilam#mail.uoguelph.ca
# Project: Lab Assignment 1 Script (Iteration 0)
# Date of Last Update: Monday, November 16, 2015.
#
# Functional Summary
# readFile.pl takes in a CSV (comma separated version) file
# and prints out the fields.
# There are three fields:
# 1. name
# 2. gender (F or M)
# 3. number of people with this name
#
# This code will also count the number of female and male
# names in this file and print this out at the end.
#
# The file represents the names of people in the population
# for a particular year of birth in the United States of America.
# Officially it is the "National Data on the relative frequency
# of given names in the population of U.S. births where the individual
# has a Social Security Number".
#
# Commandline Parameters: 1
# $ARGV[0] = name of the input file containing the names
#
# References
# Name files from http://www.ssa.gov/OACT/babynames/limits.html
#
#
# Variables to be used
#
my $EMPTY = q{};
my $SPACE = q{ };
my $COMMA = q{,};
my $femalecount = 0;
my $malecount = 0;
my $lines = 0;
my $filename = $EMPTY;
my $filename2 = $EMPTY;
my #records;
my #records2;
my $record_count = -1;
my $top_number = 0;
my $male_total = 0;
my $male_count = 0;
my #first_name;
my #gender;
my #first_name2;
my #number;
my $count = 0;
my $count2 = 0;
my $csv = Text::CSV->new({ sep_char => $COMMA });
#
# Check that you have the right number of parameters
#
if ($#ARGV != 1) {
print "Usage: readTopNames.pl <names file> <course names file>\n" or
die "Print failure\n";
exit;
}
$filename = $ARGV[0];
$filename2 = $ARGV[1];
#
# Open the input file and load the contents into records array
#
open my $names_fh, '<', $filename
or die "Unable to open names file: $filename\n";
#records = <$names_fh>;
close $names_fh or
die "Unable to close: $ARGV[0]\n"; # Close the input file
open my $names_fh2, '<', $filename2
or die "Unable to open names file: $filename2\n";
#records2 = <$names_fh2>;
close $names_fh2 or
die "Unable to close: $ARGV[1]\n"; # Close the input file
#
# Parse each line and store the information in arrays
# representing each field
#
# Extract each field from each name record as delimited by a comma
#
foreach my $class_record (#records)
{
chomp $class_record;
$record_count = 0;
$count = 0;
foreach my $name_record ( #records2 )
{
if ($csv->parse($name_record))
{
my #master_fields = $csv->fields();
$record_count++;
$first_name[$record_count] = $master_fields[0];
$gender[$record_count] = $master_fields[1];
$number[$record_count] = $master_fields[2];
if($class_record eq $first_name[$record_count])
{
if($gender[$record_count] eq 'F')
{
print("$first_name[$record_count] ($record_count)\n");
}
if($gender[$record_count] eq 'M')
{
my $offset = $count - 2224;
print("$first_name[$record_count] ($offset)\n");
}
}
} else {
warn "Line/record could not be parsed: $records[$record_count]\n";
}
$count++;
}
}
#
# End of Script
#
Adam (187)
Alan (431)
Alejandro (1166)
Alex (120)
Alicia (887)
Ambrose (305)
Caleb (794)
Sample output from running the following code.
This is correct: Although if a name is not found in the second file it is supposed to say:
Adam (187)
Alan (431)
Name (0)
Alejandro (1166)
Alex (120)
Alicia (887)
Ambrose (305)
Caleb (794)
That is what the else is supposed to find. Whether the if statement returned nothing.
else {
print("$first_name[$record_count] (0)\n");
}
The output that i get when i add that else, to account for the negation is literally:
Elzie (0)
Emer (0)
Enna (0)
Enriqueta (0)
Eola (0)
Eppie (0)
Ercell (0)
Estellar (0)

It's really tough to help you properly without better information, so I've written this, which looks for each name from the names file in the master data file and displays the associated values
There's never a reason to write a long list of declarations like that at the top of a program, and you've written way too much code before you started debugging. You should write no more than three or four lines of code before you test that it works and carry on adding to it. You've ended up with 140 lines — mostly of them comments — that don't do what you want, and you're now lost as to what you should fix first
I haven't been able to fathom what all your different counters are for, or why you're subtracting a magic 2224 for male records, so I've just printed the data directly from the master file
I hope you'll agree that it's far clearer with the variables declared when they're required instead of making a huge list at the top of your program. I've dropped the arrays #first_name, #gender and #number because you were only ever using the latest value so they had no purpose
#!/usr/bin/perl
use strict;
use warnings;
use v5.16.0;
use autodie;
use Text::CSV;
STDOUT->autoflush;
if ( #ARGV != 2 ) {
die "Usage: readTopNames.pl <names file> <master names file>\n";
}
my ( $names_file, $master_file ) = #ARGV;
my #names = do {
open my $fh, '<', $names_file;
<$fh>;
};
chomp #names;
my #master_data = do {
open my $fh, '<', $master_file;
<$fh>;
};
chomp #master_data;
my $csv = Text::CSV->new;
for my $i ( 0 .. $#names ) {
my $target_name = $names[$i];
my $found;
for my $j ( 0 .. $#master_data ) {
my $master_rec = $master_data[$j];
my $status = $csv->parse($master_rec);
unless ( $status ) {
warn qq{Line/record "$master_rec" could not be parsed\n};
next;
}
my ( $name, $gender, $count ) = $csv->fields;
if ( $name eq $target_name ) {
$found = 1;
printf "%s %s (%d)\n", $name, $gender, $count;
}
}
unless ( $found ) {
printf "%s (%d)\n", $target_name, 0;
}
}
output
Adam F (7)
Adam M (5293)
Alan F (9)
Alan M (2490)
Name (0)
Alejandro F (6)
Alejandro M (2593)
Alex F (157)
Alex M (3159)
Alicia F (967)
Ambrose M (87)
Caleb F (14)
Caleb M (9143)

4 changes proposed:
foreach my $class_record (#records)
{
chomp $class_record;
$record_count = 0;
$count = 0;
# add found - modification A
my $found = 0;
foreach my $name_record ( #records2 )
{
# should not be here
#$record_count++;
if ($csv->parse($name_record))
{
my #master_fields = $csv->fields();
$record_count++;
$first_name[$record_count] = $master_fields[0];
$gender[$record_count] = $master_fields[1];
$number[$record_count] = $master_fields[2];
if($class_record eq $first_name[$record_count])
{
if($gender[$record_count] eq 'F')
{
print("$first_name[$record_count] ($record_count)\n");
}
if($gender[$record_count] eq 'M')
{
my $offset = $count - 2224;
print("$first_name[$record_count] ($offset)\n");
}
# modification B - set found =1
$found = 1;
#last; # no need to keep looping
next; # find next one if try to find more than 1
}
} else {
warn "Line/record could not be parsed: $records[$record_count]\n";
}
$count++;
}
# modification C -
if($found){
}else{
print "${class_record}(0)\n";
}
}

Related

Perl daemon doesn't go through entire loop

I'm trying to make a program that works like a very simple server in Perl.
The program itself is meant to work as a library catalogue, giving the user options of searching for books by title or author, and borrowing or returning books. The list of books is provided in a separate file.
Basically, it's supposed to take requests (files) from "Requests" folder, process them, and then give answers (also files) in "Answers" folder. After the process is over, it deletes the old requests and repeats the process (answers are deleted by the client after they are accepted).
It's meant to run as a daemon, but for some reason only the loop responsible for deleting the request files works in the background - the requests are not processed into answers, but are just deleted. Whenever a new request appears, it's almost immediately deleted.
I'm learning to use daemons and tried to emulate what is in this thread.
#!/usr/bin/perl
use warnings;
use strict;
use Proc::Daemon;
#FUNCTIONS DEFINTIONS
sub FindAuthor
{
#try to find book by this author in the catalogue
}
sub FindTitle
{
#try to find book with this title in the catalogue
}
sub CheckIfCanBeReturned
{
#check if the book is borrowed and by whom
}
#attempt at daemonization
Proc::Daemon::Init;
my $continueWork = 1;
$SIG{TERM} = sub { $continueWork = 0 };
while ( $continueWork )
{
sleep(2);
my #RequestFilesArray = `ls /home/Ex5/Requests`;
#list all requests currently in the Request folder
for ( my $b = 0; $b < #RequestFilesArray; $b++)
{
my $cut = `printf "$RequestFilesArray[$b]" | wc -m`;
$cut = $cut - 1;
$RequestFilesArray[$b] = substr $RequestFilesArray[$b], 0, $cut;
}
#the requests are formatted in such way,
#that the first 2 letters indicate what the client wants to be done
#and the rest is taken parameters used in the processing
for (my $i = 0; $i < #RequestFilesArray; $i++)
{
my $UserRequest = `tail -1 Requests/$RequestFilesArray[$i]`;
my $fix = `printf "$UserRequest" | wc -m`;
$fix = $fix - 1;
$UserRequest = substr $UserRequest, 0, $fix;
my $RequestType = substr $UserRequest, 0, 2;
my $RequestedValue = substr $UserRequest, 3;
my $RequestNumber = $i;
if ($RequestType eq "fa")
{
#FIND BY AUTHOR
my #results = FindAuthor ($RequestedValue);
my $filename = "/home/Ex5/Answers/" . $RequestFilesArray[$RequestNumber];
open (my $answerFile, '>', $filename) or die "$!";
for (my $a = 0; $a < #results; $a++)
{
print $answerFile $results[$a],"\n";
}
close $answerFile;
}
elsif ($RequestType eq "ft")
{
#FIND BY TITLE
my #results = FindTitle ($RequestedValue);
my $filename = "/home/Ex5/Answers/" . $RequestFilesArray[$RequestNumber];
open ( my $answerFile, '>', $filename) or die "$!";
for (my $a = 0; $a < #results; $a++)
{
print $answerFile $results[$a],"\n";
}
close $answerFile;
}
elsif ($RequestType eq "br")
{
#BOOK RETURN
my $result = CheckIfCanBeReturned ($RequestedValue, $RequestFilesArray[$RequestNumber]);
my $filename = "/home/Ex5/Answers/" . $RequestFilesArray[$RequestNumber];
open ( my $answerFile, '>', $filename) or die "$!";
print $answerFile $result;
close $answerFile;
}
elsif ($RequestType eq "bb")
{
#BOOK BORROW
my $result = CheckIfCanBeBorrowed ($RequestedValue, $RequestFilesArray[$RequestNumber]);
my $filename = "/home/Ex5/Answers/" . $RequestFilesArray[$RequestNumber];
open ( my $answerFile, '>', $filename) or die "$!";
print $answerFile $result;
close $answerFile;
}
else
{
print "something went wrong with this request";
}
}
#deleting processed requests
for ( my $e = 0; $e < #RequestFilesArray; $e++)
{
my $removeReq = "/home/Ex5/Requests/" . $RequestFilesArray[$e];
unlink $removeReq;
}
#$continueWork =0;
}
You have written way too much code before attempting to test it. You have also started shell processes at every opportunity rather than learning the correct way to achieve things in Perl
The first mistake is to use ls to discover what jobs are waiting. ls prints multiple files per line, and you treat the whole of each line as a file name, using the bizarre printf "$RequestFilesArray[$b]" | wc -m instead of length $RequestFilesArray[$b]
Things only get worse after that
I suggest the following
Start again from scratch
Write your program in Perl. Perl isn't a shell language
Advance in very small increments, making sure that your code compiles and does what it is supposed to every three or four lines. It does wonders for the confidence to know that you're enhancing working code rather than creating a magical sequence of random characters
Learn how to debug. You appear to be staring at your code hoping for inspiration to strike in the manner of someone staring at their car engine in the hope of seeing why it won't start
Delete request files as part of processing the request, and only once the request has been processed and the answer file successfully written. It shouldn't be done in a separate loop
Taking what you provided, here's some pseudocode I've devised for you that you can use as somewhat of a template. This is by NO MEANS exhaustive. I think the advice #Borodin gave is sound and prudent.
This is all untested and much of the new stuff is pseudocode. However, hopefully, there are some breadcrumbs from which to learn. Also, as I stated above, your use of Proc::Daemon::Init is suspect. At the very least, it is so minimally used that it is gobbling up whatever error(s) is/are occurring and you've no idea what's wrong with the script.
#!/usr/bin/perl -wl
use strict;
use File::Basename;
use File::Spec;
use Proc::Daemon;
use Data::Dumper;
# turn off buffering
$|++;
#FUNCTIONS DEFINTIONS
sub FindAuthor
{
#try to find book by this author in the catalogue
}
sub FindTitle
{
#try to find book with this title in the catalogue
}
sub CheckIfCanBeReturned
{
#check if the book is borrowed and by whom
}
sub tail
{
my $file = shift;
# do work
}
sub find_by
{
my $file = shift;
my $val = shift;
my $by = shift;
my #results;
my $xt = 0;
# sanity check args
# do work
if ( $by eq 'author' )
{
my #results = FindByAuthor(blah);
}
elsif ( $by eq 'blah' )
{
#results = blah();
}
#...etc
# really should use File::Spec IE
my $filename = File::Spec->catfile('home', 'Ex5', 'Answers', $file);
# might be a good idea to either append or validate you're not clobbering
# an existent file here because this is currently clobbering.
open (my $answerFile, '>', $filename) or die "$!";
for ( #results )
{
print $answerFile $_,"\n";
}
close $answerFile;
# have some error checking in place and set $xt to 1 if an error occurs
return $xt;
}
#attempt at daemonization
# whatever this is is completely broken methinks.
#Proc::Daemon::Init;
my $continueWork++;
my $r_dir = '/home/user/Requests';
$SIG{TERM} = sub { $continueWork = 0 };
# going with pseudocode
while ( $continueWork )
{
#list all requests currently in the Request folder
my #RequestFilesArray = grep(/[^\.]/, <$r_dir/*>);
#the requests are formatted in such way,
#that the first 2 letters indicate what the client wants to be done
#and the rest is taken parameters used in the processing
for my $request_file ( #RequestFilesArray )
{
my $result = 0;
$request_file = basename($request_file);
my $cut = length($request_file) - 1;
my $work_on = substr $request_file, 0, $cut;
my $UserRequest = tail($request_file);
my $fix = length($UserRequest) - 1;
$UserRequest = substr $UserRequest, 0, $fix;
my $RequestType = substr $UserRequest, 0, 2;
my $RequestedValue = substr $UserRequest, 3;
if ($RequestType eq "fa")
{
#FIND BY AUTHOR
$result = find_by($request_file, $RequestedValue, 'author');
}
elsif ($RequestType eq "ft")
{
#FIND BY TITLE
$result = find_by($request_file, $RequestedValue, 'title');
}
elsif ($RequestType eq "br")
{
#BOOK RETURN
$result = CheckIfCanBeReturned ($RequestedValue, $request_file) or handle();
}
elsif ($RequestType eq "bb")
{
#BOOK BORROW
$result = CheckIfCanBeBorrowed ($RequestedValue, $request_file) or handle();
}
else
{
print STDERR "something went wrong with this request";
}
}
#deleting processed requests
if ( $result == 1 )
{
unlink $work_on;
}
sleep(2);
}
Take special note to my "mild" attempt and DRYing up your code by using the find_by subroutine. You had a LOT of duplicate code in your original script, which I moved into a single sub routine. DRY eq 'Don't Repeat Yourself'.

perl : String extraction based on pattern match [closed]

This question is unlikely to help any future visitors; it is only relevant to a small geographic area, a specific moment in time, or an extraordinarily narrow situation that is not generally applicable to the worldwide audience of the internet. For help making this question more broadly applicable, visit the help center.
Closed 10 years ago.
A file FILE1 has several thousands of lines with a terminating pattern _Pattern1.
A second file too has several thousands of lines with the same terminating pattern _Pattern1.
I must now:
Read FILE1 line by line
Find out if the line has any string terminating with _Pattern1
Extract the string and store it into a variable
Open FILE2 and read it line by line
Find out if the line just read from FILE2 contains the string stored in the variable above
How is this to be done in perl?
EDIT2:
Allright, with a bit of googling and referring to the links enlisted below, I solved my problem.
Here is the code snippet.
#!/usr/bin/perl
use strict;
use warnings;
my $OriginalHeader=$ARGV[0]; ## Source file
my $GeneratedHeader=$ARGV[1];## File to compare against
my $DeltaHeader=$ARGV[2]; ## File to store misses
my $MatchingPattern="_Pos";
my $FoundPattern;
open FILE1, $OriginalHeader or die $!;
open FILE2, $GeneratedHeader or die $!;
open (FILE3, ">$DeltaHeader") or die $!;
my $lineFromOriginalHeader;
my $lineFromGeneratedHeader;
my $TotalMacrosExamined = 0;
my $TotalMacrosMissed = 0;
while($lineFromOriginalHeader=<FILE1>)
{
if($lineFromOriginalHeader =~ /$MatchingPattern/)
{
my $index = index($lineFromOriginalHeader,$MatchingPattern);
my $BackIndex = $index;
my $BackIndexStart = $index;
$BackIndex = $BackIndex - 1;
## Use this while loop to extract the substring.
while (1)
{
my $ExtractedChar = substr($lineFromOriginalHeader,$BackIndex,1);
if ($ExtractedChar =~ / /)
{
$FoundPattern = substr($lineFromOriginalHeader,$BackIndex + 1,$BackIndexStart + 3 -
$BackIndex);
print "Identified $FoundPattern \n";
$TotalMacrosExamined = $TotalMacrosExamined + 1;
##Skip the next line
$lineFromOriginalHeader = <FILE1>;
last;
}
else
{
$BackIndex = $BackIndex - 1;
}
} ##while(1)
## We now look for $FoundPattern in FILE2
while ($lineFromGeneratedHeader = <FILE2>)
{
if (index($lineFromGeneratedHeader,$FoundPattern)!= -1)
{
##Pattern found. Reset file pointer and break out of while loop
seek FILE2,0,0;
last;
}
else
{
if (eof(FILE2) == 1)
{
print FILE3 "Generated header misses $FoundPattern\n";
$TotalMacrosMissed = $TotalMacrosMissed + 1;
seek FILE2,0,0;
last;
}
}
} ##while(1)
}
else
{
##NOP
}
} ##while (linefromoriginalheader)
close FILE1;
close FILE2;
close FILE3;
print "Total number of bitfields examined = $TotalMacrosExamined\n";
print "Number of macros obsolete = $TotalMacrosMissed\n";
Just a first cut at making your code more Perly. Actually plenty more could be done, including $some_var is usually used vs $SomeVar in Perl, but I didn't get that far.
#!/usr/bin/perl
use strict;
use warnings;
my ($OriginalHeader, $GeneratedHeader, $DeltaHeader) = #ARGV;
my $MatchingPattern=qr/(\S*_Pos)/; # all non-whitespace terminated by _Pos
open my $file1, '<', $OriginalHeader or die $!;
open my $file2, '<', $GeneratedHeader or die $!;
open my $file3, '>', $DeltaHeader or die $!;
my $TotalMacrosExamined = 0;
my $TotalMacrosMissed = 0;
while(my $lineFromOriginalHeader=<$file1>) {
next unless $lineFromOriginalHeader =~ $MatchingPattern;
my $FoundPattern = $1; # matched string
print "Identified $FoundPattern \n";
$TotalMacrosExamined++;
##Skip the next line
<$file1>;
## We now look for $FoundPattern in FILE2
my $match_found = 0;
while (my $lineFromGeneratedHeader = <$file2>) {
if (index($lineFromGeneratedHeader,$FoundPattern)!= -1) {
##Pattern found. Close the file and break out of while loop
$match_found++;
last;
}
}
unless ($match_found) {
print $file3 "Generated header misses $FoundPattern\n";
$TotalMacrosMissed++;
}
seek $file2,0,0;
}
print "Total number of bitfields examined = $TotalMacrosExamined\n";
print "Number of macros obsolete = $TotalMacrosMissed\n";
Having programmed in C all my life, I googled usage of the perl constructs below and wrote a C like program. This works flawlessly for me. :-)
Edit : This is to clarify why I must skip a line in the algo below. The pattern which is retrieved and later searched for in the second file occurs on two consecutive lines. It therefore suffices that its first occurence is reliably detected. Also a nitpick, It is always guaranteed that the substring containing the pattern is always the second substring on the line.
e.g #define Something_Pos (Some Value)
#!/usr/bin/perl
use strict;
use warnings;
my $OriginalHeader=$ARGV[0];
my $GeneratedHeader=$ARGV[1];
my $DeltaHeader=$ARGV[2];
my $MatchingPattern="_Pos";
my $FoundPattern;
open FILE1, $OriginalHeader or die $!;
open FILE2, $GeneratedHeader or die $!;
open (FILE3, ">$DeltaHeader") or die $!;
my $lineFromOriginalHeader;
my $lineFromGeneratedHeader;
my $TotalMacrosExamined = 0;
my $TotalMacrosMissed = 0;
while($lineFromOriginalHeader=<FILE1>)
{
if($lineFromOriginalHeader =~ /$MatchingPattern/)
{
my $index = index($lineFromOriginalHeader,$MatchingPattern);
my $BackIndex = $index;
my $BackIndexStart = $index;
$BackIndex = $BackIndex - 1;
## Use this while loop to extract the substring.
while (1)
{
my $ExtractedChar = substr($lineFromOriginalHeader,$BackIndex,1);
if ($ExtractedChar =~ / /)
{
$FoundPattern = substr($lineFromOriginalHeader,$BackIndex + 1,$BackIndexStart + 3 -
$BackIndex);
print "Identified $FoundPattern \n";
$TotalMacrosExamined = $TotalMacrosExamined + 1;
##Skip the next line
$lineFromOriginalHeader = <FILE1>;
last;
}
else
{
$BackIndex = $BackIndex - 1;
}
} ##while(1)
## We now look for $FoundPattern in FILE2
while ($lineFromGeneratedHeader = <FILE2>)
{
##print "Read the following line from FILE2: $lineFromGeneratedHeader\n";
if (index($lineFromGeneratedHeader,$FoundPattern)!= -1)
{
##Pattern found. Close the file and break out of while loop
seek FILE2,0,0;
last;
}
else
{
if (eof(FILE2) == 1)
{
print FILE3 "Generated header misses $FoundPattern\n";
$TotalMacrosMissed = $TotalMacrosMissed + 1;
seek FILE2,0,0;
last;
}
}
} ##while(1)
}
else
{
}
} ##while (linefromoriginalheader)
close FILE1;
close FILE2;
close FILE3;
print "Total number of bitfields examined = $TotalMacrosExamined\n";
print "Number of macros obsolete = $TotalMacrosMissed\n";

Calculating the Mean from aPerl Script

I m still in here. ;)
I've got this code from a very expert guy, and I'm shy to ask him this basic questions...anyway this is my question now; this Perl Script prints the median of a column of numbers delimited space, and, I added some stuff to get the size of it, now I'm trying to get the sum of the same column. I did and got not results, did I not take the right column? ./stats.pl 1 columns.txt
#!/usr/bin/perl
use strict;
use warnings;
my $index = shift;
my $filename = shift;
my $columns = [];
open (my $fh, "<", $filename) or die "Unable to open $filename for reading\n";
for my $row (<$fh>) {
my #vals = split/\s+/, $row;
push #{$columns->[$_]}, $vals[$_] for 0 .. $#vals;
}
close $fh;
my #column = sort {$a <=> $b} #{$columns->[$index]};
my $offset = int($#column / 2);
my $length = 2 - #column % 2;
my #medians = splice(#column, $offset, $length);
my $median;
$median += $_ for #medians;
$median /= #medians;
print "MEDIAN = $median\n";
################################################
my #elements = #{$columns->[$index]};
my $size = #elements;
print "SIZE = $size\n";
exit 0;
#################################################
my $sum = #{$columns->[$index]};
for (my $size=0; $size < $sum; $size++) {
my $mean = $sum/$size;
};
print "$mean\n";
thanks in advance.
OK some pointers to get you going :
You can put all the numbers into an array :
my #result = split(m/\d+/, $line);
#average
use List::Util qw(sum);
my $sum = sum(#result);
You can then access individual columns with $result[$index] where index is the number of column you want to access.
Also note that :
$total = $line + $total;
$count = $count + 1;
Can be rewritten as :
$total += $line;
$count += 1;
Finally make sure that you are reading the file :
put a "debugging" print into the while loop :
print $line, "\n";
This should get you going :)

How to turn an array of words into an array containing the characters of the words in order?

I have an array of unknown number of words, with an unknown max length.
I need to convert it to another array, basically turning it into a column word array.
so with this original array:
#original_array = ("first", "base", "Thelongest12", "justAWORD4");
The resluting array would be:
#result_array = ("fbTj","iahu","rses","selt","t oA"," nW"," gO"," eR"," sD"," t4"," 1 "," 2 ");
Actually I will have this:
fbTj
iahu
rses
selt
t oA
nW
gO
eR
sD
t4
1
2
I need to do it in order to make a table, and these words are the table's headers.
I hope I have made myself clear, and appreciate the help you are willing to give.
I tried it with the split function, but I keep messing it up...
EDIT:
Hi all, thanks for all the tips and suggestions! I learned quite much from all of you hence the upvote. However I found tchrist's answer more convenient, maybe because I come from a c,c# background... :)
use strict;
use warnings;
use 5.010;
use Algorithm::Loops 'MapCarU';
my #original_array = ("first", "base", "Thelongest12", "justAWORD4");
my #result_array = MapCarU { join '', map $_//' ', #_ } map [split //], #original_array;
I have an old program that does this. Maybe you can adapt it:
$ cat /tmp/a
first
base
Thelongest12
justAWORD4
$ rot90 /tmp/a
fbTj
iahu
rses
selt
t oA
nW
gO
eR
sD
t4
1
2
Here’s the source:
$ cat ~/scripts/rot90
#!/usr/bin/perl
# rot90 - tchrist#perl.com
$/ = '';
# uncomment for easier to read, but not reversible:
### #ARGV = map { "fmt -20 $_ |" } #ARGV;
while ( <> ) {
chomp;
#lines = split /\n/;
$MAXCOLS = -1;
for (#lines) { $MAXCOLS = length if $MAXCOLS < length; }
#vlines = ( " " x #lines ) x $MAXCOLS;
for ( $row = 0; $row < #lines; $row++ ) {
for ( $col = 0; $col < $MAXCOLS; $col++ ) {
$char = ( length($lines[$row]) > $col )
? substr($lines[$row], $col, 1)
: ' ';
substr($vlines[$col], $row, 1) = $char;
}
}
for (#vlines) {
# uncomment for easier to read, but again not reversible
### s/(.)/$1 /g;
print $_, "\n";
}
print "\n";
}
use strict;
use warnings;
use List::Util qw(max);
my #original_array = ("first", "base", "Thelongest12", "justAWORD4");
my #new_array = transpose(#original_array);
sub transpose {
my #a = map { [ split // ] } #_;
my $max = max(map $#$_, #a);
my #out;
for my $n (0 .. $max) {
$out[$n] .= defined $a[$_][$n] ? $a[$_][$n] : ' ' for 0 .. $#a;
}
return #out;
}
It can be done easily by this simple one-liner:
perl -le'#l=map{chomp;$m=length if$m<length;$_}<>;for$i(0..$m-1){print map substr($_,$i,1)||" ",#l}'

Sorting files with unordered multi-part key

Using any combination of Linux tools (without going into any full featured programming language) how can I sort this list
A,C 1
C,B 2
B,A 3
into
A,B 3
A,C 1
B,C 2
Not applying for any beauty contest, this seems to come close:
#!/bin/bash
while read one two; do
one=`echo $one | sed -e 's/,/\n/g' | sort | sed -e '
1 {h; d}
$! {H; d}
H; g; s/\n/,/g;
'`
echo $one $two
done | sort
Change the internal field separator, then compare the the first two letters with ">":
(
IFS=" ,";
while read a b n; do
if [ "$a" \> "$b" ]; then
echo "$b,$a $n";
else
echo "$a,$b $n";
fi;
done;
) <<EOF | sort
A,C 1
C,B 2
B,A 3
EOF
In case somebody is interested. I was not realy satisfied with any suggestions. Probably because I hoped for view lines solution and such doesn't exist as far as I know.
Anyway I did wrote an utility, called ljoin (for left join like in databases) which does exactly what I was asking for (of course :D)
#!/usr/bin/perl
=head1 NAME
ljoin.pl - Utility to left join files by specified key column(s)
=head1 SYNOPSIS
ljoin.pl [OPTIONS] <INFILE1>..<INFILEN> <OUTFILE>
To successfully join rows one must suply at least one input file and exactly one output file. Input files can be real file names or a patern, like [ABC].txt or *.in etc.
=head1 DESCRIPTION
This utility merges multiple file into one using specified column as a key
=head2 OPTIONS
=item --field-separator=<separator>, -fs <separator>
Specifies what string should be used to separate columns in plain file. Default value for this option is tab symbol.
=item --no-sort-fields, -no-sf
Do not sort columns when creating a key for merging files
=item --complex-key-separator=<separator>, -ks <separator>
Specifies what string should be used to separate multiple values in multikey column. For example "A B" in one file can be presented as "B A" meaning that this application should somehow understand that this is the same key. Default value for this option is space symbol.
=item --no-sort-complex-keys, -no-sk
Do not sort complex column values when creating a key for merging files
=item --include-primary-field, -i
Specifies whether key which is used to find matching lines in multiple files should be included in the output file. First column in output file will be the key in any case, but in case of complex column the value of first column will be sorted. Default value for this option is false.
=item --primary-field-index=<index>, -f <index>
Specifies index of the column which should be used for matching lines. You can use multiple instances of this option to specify a multi-column key made of more than one column like this "-f 0 -f 1"
=item --help, -?
Get help and documentation
=cut
use strict;
use warnings;
use Getopt::Long;
use Pod::Usage;
my $fieldSeparator = "\t";
my $complexKeySeparator = " ";
my $includePrimaryField = 0;
my $containsTitles = 0;
my $sortFields = 1;
my $sortComplexKeys = 1;
my #primaryFieldIndexes;
GetOptions(
"field-separator|fs=s" => \$fieldSeparator,
"sort-fields|sf!" => \$sortFields,
"complex-key-separator|ks=s" => \$complexKeySeparator,
"sort-complex-keys|sk!" => \$sortComplexKeys,
"contains-titles|t!" => \$containsTitles,
"include-primary-field|i!" => \$includePrimaryField,
"primary-field-index|f=i#" => \#primaryFieldIndexes,
"help|?!" => sub { pod2usage(0) }
) or pod2usage(2);
pod2usage(0) if $#ARGV < 1;
push #primaryFieldIndexes, 0 if $#primaryFieldIndexes < 0;
my %primaryFieldIndexesHash;
for(my $i = 0; $i <= $#primaryFieldIndexes; $i++)
{
$primaryFieldIndexesHash{$i} = 1;
}
print "fieldSeparator = $fieldSeparator\n";
print "complexKeySeparator = $complexKeySeparator \n";
print "includePrimaryField = $includePrimaryField\n";
print "containsTitles = $containsTitles\n";
print "primaryFieldIndexes = #primaryFieldIndexes\n";
print "sortFields = $sortFields\n";
print "sortComplexKeys = $sortComplexKeys\n";
my $fieldsCount = 0;
my %keys_hash = ();
my %files = ();
my %titles = ();
# Read columns into a memory
foreach my $argnum (0 .. ($#ARGV - 1))
{
# Find files with specified pattern
my $filePattern = $ARGV[$argnum];
my #matchedFiles = < $filePattern >;
foreach my $inputPath (#matchedFiles)
{
open INPUT_FILE, $inputPath or die $!;
my %lines;
my $lineNumber = -1;
while (my $line = <INPUT_FILE>)
{
next if $containsTitles && $lineNumber == 0;
# Don't use chomp line. It doesn't handle unix input files on windows and vice versa
$line =~ s/[\r\n]+$//g;
# Skip lines that don't have columns
next if $line !~ m/($fieldSeparator)/;
# Split fields and count them (store maximum number of columns in files for later use)
my #fields = split($fieldSeparator, $line);
$fieldsCount = $#fields+1 if $#fields+1 > $fieldsCount;
# Sort complex key
my #multipleKey;
for(my $i = 0; $i <= $#primaryFieldIndexes; $i++)
{
my #complexKey = split ($complexKeySeparator, $fields[$primaryFieldIndexes[$i]]);
#complexKey = sort(#complexKey) if $sortFields;
push #multipleKey, join($complexKeySeparator, #complexKey)
}
# sort multiple keys and create key string
#multipleKey = sort(#multipleKey) if $sortFields;
my $fullKey = join $fieldSeparator, #multipleKey;
$lines{$fullKey} = \#fields;
$keys_hash{$fullKey} = 1;
}
close INPUT_FILE;
$files{$inputPath} = \%lines;
}
}
# Open output file
my $outputPath = $ARGV[$#ARGV];
open OUTPUT_FILE, ">" . $outputPath or die $!;
my #keys = sort keys(%keys_hash);
# Leave blank places for key columns
for(my $pf = 0; $pf <= $#primaryFieldIndexes; $pf++)
{
print OUTPUT_FILE $fieldSeparator;
}
# Print column headers
foreach my $argnum (0 .. ($#ARGV - 1))
{
my $filePattern = $ARGV[$argnum];
my #matchedFiles = < $filePattern >;
foreach my $inputPath (#matchedFiles)
{
print OUTPUT_FILE $inputPath;
for(my $f = 0; $f < $fieldsCount - $#primaryFieldIndexes - 1; $f++)
{
print OUTPUT_FILE $fieldSeparator;
}
}
}
# Print merged columns
print OUTPUT_FILE "\n";
foreach my $key ( #keys )
{
print OUTPUT_FILE $key;
foreach my $argnum (0 .. ($#ARGV - 1))
{
my $filePattern = $ARGV[$argnum];
my #matchedFiles = < $filePattern >;
foreach my $inputPath (#matchedFiles)
{
my $lines = $files{$inputPath};
for(my $i = 0; $i < $fieldsCount; $i++)
{
next if exists $primaryFieldIndexesHash{$i} && !$includePrimaryField;
print OUTPUT_FILE $fieldSeparator;
print OUTPUT_FILE $lines->{$key}->[$i] if exists $lines->{$key}->[$i];
}
}
}
print OUTPUT_FILE "\n";
}
close OUTPUT_FILE;

Resources