head 1.3; access; symbols; locks wmc:1.3; strict; comment @# @; 1.3 date 2004.03.15.12.07.00; author wmc; state Exp; branches; next 1.2; 1.2 date 2004.03.05.12.05.17; author wmc; state Exp; branches; next 1.1; 1.1 date 2004.03.05.12.04.58; author wmc; state Exp; branches; next ; desc @First version available on the web @ 1.3 log @A few misc minor bits and: Add binmode() for linux (well, perl 5.8 upwards) @ text @#!/usr/bin/perl -w # Script to "stashsplit" pp-fields # # Note that we attempt to do byte-reordering, in the # sense that if Reorder is set (default) bytes are # swapped from 01234567... to 32107654... # If the first number read (which should be the length of # the first header, 256 bytes) is not 256, it will try # inverting Reorder. # If this script is running on an alpha or on linux, you # probably need Reorder set (default). # # Use: pp2ss.pl [options] ppfile # or: cat ppfile | pp2txt.pl [options] # # Options: OUTBASE=dir/ - (NB trailing slash!) - output directory. # The default for this used to be ".". That wasn't very useful in most cases. # The default now is: # 1) if the path to the files to split looks like: # .../runid/[64|32]/pp_fields/ # then the output directory will be # .../runid/[64|32]/[ts|day|0.01|0.03|1]/ # if the field looks like it is monthly, seasonal or yearly means # 2) if not, the default is "." # 3) of course, OUTBASE=. can still be specified # SC=SC1,SC2...,SCn - Stashcodes to output (else all) # LBLEV=lev1,lev2...,levn - levels to output (nb this is the lblev parameter, thus applies # in all cases (eg is 8888 for MSLP) # BLEV=blev1,blev2,...blevn - ditto, for blev # or # SC=SC1+BLEV=blev1%blev2+LBLEV=lblev,SC2... # (If this first form is used, *all* fields have to match SC *and* levels (if given); # If the second form is used, fields match case-by-case) # (example: SC=16202+BLEV=500%300,16203+LBEV=1%2%3) # Outmode=">" or ">>" (or indeed any of perls other magics...) # T=1 - test mode. Don't write # # ...with output to stash-split files # # Notes: the handling of the "period" bit isn't as good # as pp_period. I just do lbyrd-lbyr, etc. This should # suffice for standard model run output. # # Author: wmc 2001/10/11 # wmc 2002/09/30 - correct behaviour if pc=0 (set period to 0000...) # - set correct number of 000's! # wmc various # wmc 2004/03/xx - add -w; add some "my"'s to remove warnings; on debug print out first 2 data vals # wmc 2004/03/15 - add binmode() around in and out # Options $V=0; # V=1 - print version and stop $D=0; # Debug $T=0; # Test mode $SC=""; # Comma separated list of desired stash codes $LBLEV=undef; # Ditto for lblev $BLEV=undef; # Ditto for blev $OutMode=">"; # New or Append $oldoutfile=""; # So we know to append to multiple fields-in-file $Sub=1; # Include submodel identifier in SS name from lbuser[6] $Reorder=0; # Swap endians on input for purposes of interpretation. Set to =1 to swap. Will auto-sense. $Outorder=1; # Swap endians on output (independent of Reorder). Note: correct use is # probably Reorder=0 and Outorder=1 $FieldNo=0; # Field number, for debug output $OUTBASE=""; # Base for output filenames. Default current directory. Don't forget the trailing "/" $MAXWRITE=99999;# Max # of fields to write out # Override options, eg "pp2ss.pl SC='1,16222' eval "\$$1=\$2" while $ARGV[0] =~ /^(\w+)=(.*)/ && shift; if ($V) { print "Version: 2004/03/01\n"; exit }; if ($OUTBASE ne "") { $OUTBASEr="set-by-user" } else { $OUTBASEr="set-by-default" }; if ($D > 1) { print "OUTBASE mode: $OUTBASEr\n" }; if ($OutMode =~ /Append/i) { $OutMode = ">>" }; # Make an assoc array of stashcodes, if desired if ($SC) { for $SC (split(/,/,$SC)) { # If we have selected it as SC+BLEV=blev or somesuch, then... if ($SC=~/\+/) { ($sc,@@a)=split(/\+/,$SC); for (@@a) { ($a1,$a2)=split(/=/); for $v (split(/\%/,$a2)) { if ($a1 eq "BLEV") { $v=sprintf("%.3f",$v) }; ${"$sc"."_"."$a1"}{$v}=1; } }; # Otherwise include it in the normal SC list. Note that if we include SC twice # in the two ways we can get it into both lists (is this useful!?!) } else { $SC{$SC}=1 } } }; # Make an assoc array of (l)blev's, if desired if ($LBLEV) { for $lblev (split(/,/,$LBLEV)) { $LBLEV{$lblev}=1 } }; if ($BLEV) { for $blev (split(/,/,$BLEV)) { $tblev=sprintf("%.3f",$blev); $BLEV{$tblev}=1 } }; if ($D > 2) { print "pp2ss.pl: beginning (SC: $SC)\n" }; while ($File = shift) { if ($D) { print "$File\n" }; open STDIN,$File or die "Failed to open $File"; binmode(STDIN); # Read in the length of the first header. Expect 256. while (read(STDIN,$IN,4)) { # Accumulate what we're going to write out in $INs. Note that # even if we need to reorder $IN to read it, we accumulate the # unswapped actual input $INs=$IN; if ($Reorder) { $IN=byteorder($IN) }; $rl=unpack("i",$IN); if ($rl != 256) { print "Record length is $rl not 256 as I expected. I'll try (un)setting Reorder\n"; $Reorder=1-$Reorder; $IN=byteorder($IN); $rl=unpack("i",$IN); if ($rl != 256) { die "Record length is *still* not 256, its $rl" } } else { if ($D > 2) { print "First rl read is 256; good. Not setting reorder\n" } }; if ($D > 2) { print "Read in header record length $rl\n" }; # Read in the integer part of the header (45 integers) and the real part (19 reals) read(STDIN,$IN,4*45); $INs.=$IN; if ($Reorder) { $IN=byteorder($IN) }; @@ih=unpack("i45",$IN); if ($D > 3) { print "ih: ",join(" .. ",@@ih),"\n" }; read(STDIN,$IN,4*19); $INs.=$IN; if ($Reorder) { $IN=byteorder($IN) }; @@rh=unpack("f19",$IN); if ($D > 3) { print "rh: ",join(" .. ",@@rh),"\n" }; if ($Reorder) { $IN=byteorder($IN) }; # Read in the trailer for the header read(STDIN,$IN,4); $INs.=$IN; if ($Reorder) { $IN=byteorder($IN) }; $rl=unpack("i",$IN); if ($D > 2) { print "Read in header trailing length: $rl\n" }; # Set OUTBASE, if it hasn't been specified. This needs to be done file-by-file, # and possibly even field-by-field, which we shall do, as its easy... # which means we need to remember if it was set by default or by the user. # If we can't find owt better, use "."; # Nb: we can't do this until we've called "ss_filename" to set per. $outfile1 = ss_filename(); if ($OUTBASEr eq "set-by-default") { $OUTBASE="."; ($File1=$File) =~ s/[^\/]+$//g; if ($File1 !~ /^\//) { $File1="$ENV{PWD}/$File1" }; # And does it look like a standard setup? if ($File1 =~ /\w{5}\/(32|64)\/pp_fields/) { # And does "per" make sense? undef $per1; if ($per eq "000100000000") { $per1="1/" }; if ($per eq "000003000000") { $per1="0.03/" }; if ($per eq "000001000000") { $per1="0.01/" }; if ($per eq "000000010000") { $per1="day/" }; if ($per eq "000000000000") { $per1="ts/" }; if (defined $per1) { ($OUTBASE=$File1) =~ s/pp_fields(\/)?$/$per1/; } } }; # Make the SS filename. This also makes all the codes in the # name available, eg lbuser[3] as sc $outfile = ${OUTBASE} . $outfile1; # Debug info if ($D > 3) { print "$outfile\n"; }; $FieldNo++; if ($D > 2) { print "F",$FieldNo,": $lbyr/$lbmon/$lbdat $lbhr:$lbmin $sc ($lblev, $tblev) ($outfile)\n" }; # Read the length of the data record read(STDIN,$IN,4); $INs.=$IN; if ($Reorder) { $IN=byteorder($IN) }; $rl=unpack("i",$IN); if ($D >2) { print "Read in data record length $rl\n" }; if ($rl < 0) { $IN=byteorder($IN); $rl1=unpack("i",$IN); print "Oh dear. I read a -ve record length ($rl). Reordering, I get: $rl1\n"; die "Oh dear oh dear oh dear..." }; # Read the data but don't bother unpack it unless we're debuggin read(STDIN,$IN,$rl); # Surely we need to reorder it? if ($Reorder) { $IN=byteorder($IN) }; $INs.=$IN; if ($D > 2) { @@data=unpack("ff",$IN); print "First two data values: $data[0], $data[1]\n" }; # Read in the length of the record again, in case its a multi-field file read(STDIN,$IN2,4); $INs.=$IN2; if ($Reorder) { $IN2=byteorder($IN2) }; # Write it out, if desired # print "$sc"."_BLEV ($blev / $tblev) [".${"$sc"."_BLEV"}{$tblev}."]\n"; # print "$sc"."_LBLEV ($lblev / $lblev) [".${"$sc"."_LBLEV"}{$lblev}."]\n"; if (${"$sc"."_BLEV"}{$tblev} or ${"$sc"."_LBLEV"}{$lblev} or ((!$SC or $SC{$sc}) and (!$LBLEV or $LBLEV{$lblev}) and (!$BLEV or $BLEV{$tblev})) and ($FieldNo <= $MAXWRITE)) { if (!$T) { if ($D >1) { print "Writing to: $outfile\n" }; if ($outfile ne $oldoutfile) { open OUT, "$OutMode $outfile" or warn $!; binmode(OUT) }; $oldoutfile=$outfile; if ($Outorder) { if ($D >3) { print "Swapping endians on output\n" }; $INs=byteorder($INs) }; print OUT $INs; } else { print "Would write to: $outfile\n"; }; }; }; }; # --------------------------------------------------------- sub ss_filename { $lbyr = $ih[0]; $lbmon = $ih[1]; $lbdat = $ih[2]; $lbhr = $ih[3]; $lbmin = $ih[4]; $lbtim = $ih[12]; my $lbrow = $ih[17]; my $lbnpt = $ih[18]; $lblev = $ih[32]; $blev = $rh[6]; $tblev=sprintf("%.3f",$blev); $lbproc= $ih[24]; $pc=sprintf("%6.6d",$lbproc); $lbtim1=$lbtim/10 % 10; if ($lbtim1 == 0 or $lbproc == 0) { # yyyymmddhhmm $per="000000000000" } else { $p1=$lbmin +($lbhr +($lbdat+($lbmon+$lbyr *12)*30)*24)*60; $p2=$ih[10]+($ih[9]+($ih[8]+($ih[7]+$ih[6]*12)*30)*24)*60; $p=$p2-$p1; $dmin=$p % 60; $p-=$dmin; $p/=60; $dhr =$p % 24; $p-=$dhr ; $p/=24; $ddat=$p % 30; $p-=$ddat; $p/=30; $dmon=$p % 12; $p-=$dmon; $p/=12; $dyr =$p; $per=sprintf("%4.4d%2.2d%2.2d%2.2d%2.2d",$dyr,$dmon,$ddat,$dhr,$dmin); }; $date=sprintf("%4.4d.%2.2d.%2.2d.%2.2d.%2.2d", $lbyr,$lbmon,$lbdat,$lbhr,$lbmin ); $sc = $ih[41]; $sc1=sprintf("%2.2d.%3.3d",$sc/1000,$sc % 1000); if ($Sub != 0) { $submodel=$ih[44]; $sub=sprintf("%2.2d.",$submodel) }; return "$per.$sub$sc1.$pc.$date.pp"; }; sub byteorder { ($In)=@@_; for ($i=0; $i<(length($In)-1)/4; $i++) { substr($In,$i*4,4)=reverse(substr($In,$i*4,4)); }; return $In; }; @ 1.2 log @Upgraded version. Should handle different endian-ness more cleanly @ text @d1 1 a1 1 #!/usr/bin/perl d48 3 d56 1 a56 1 $SC=undef; # Comma separated list of desired stash codes d108 1 d211 1 a211 1 # Read the data but don't bother unpack it d216 4 d237 1 a237 1 if ($outfile ne $oldoutfile) { open OUT, "$OutMode $outfile" or warn $! }; d263 2 a264 2 $lbrow = $ih[17]; $lbnpt = $ih[18]; @ 1.1 log @Initial revision @ text @d17 10 a26 1 # Options: OUTBASE=dir/ - (NB trailing slash!) - output directory, else use . d46 2 d50 1 d64 1 d69 5 d97 3 a99 1 d109 3 d122 2 d129 1 a129 1 $INs.=$IN; d132 1 a133 1 @@rh=unpack("f45",$IN); d136 3 d143 11 d155 39 d196 1 a196 1 $INs.=$IN; d200 6 d209 2 d214 3 a216 2 read(STDIN,$IN,4); $INs.=$IN; a217 9 # Make the SS filename. This also makes all the codes in the # name available, eg lbuser[3] as sc $outfile = ${OUTBASE} . ss_filename(); # Debug info if ($D > 1) { print "F",$FieldNo++,": $lbyr/$lbmon/$lbdat $lbhr:$lbmin $sc ($lblev, $tblev)\n" }; d224 3 a226 1 ((!$SC or $SC{$sc}) and (!$LBLEV or $LBLEV{$lblev}) and (!$BLEV or $BLEV{$tblev}))) { d232 1 a232 1 if ($D >2) { print "Swapping endians on output\n" }; d235 1 a235 1 print OUT $INs d255 3 d261 2 d265 3 a267 2 if ($lbtim1 == 0) { $per="0000000000" a285 3 $lbproc= $ih[24]; $pc=sprintf("%6.6d",$lbproc); @