head	1.3;
access;
symbols;
locks
	wmc:1.3; strict;
comment	@# @;


1.3
date	2004.03.15.12.07.00;	author wmc;	state Exp;
branches;
next	1.2;

1.2
date	2004.03.05.12.05.17;	author wmc;	state Exp;
branches;
next	1.1;

1.1
date	2004.03.05.12.04.58;	author wmc;	state Exp;
branches;
next	;


desc
@First version available on the web
@


1.3
log
@A few misc minor bits and:
Add binmode() for linux (well, perl 5.8 upwards)
@
text
@#!/usr/bin/perl -w

# Script to "stashsplit" pp-fields
#
# Note that we attempt to do byte-reordering, in the 
# sense that if Reorder is set (default) bytes are
# swapped from 01234567... to 32107654...
# If the first number read (which should be the length of
# the first header, 256 bytes) is not 256, it will try 
# inverting Reorder.
# If this script is running on an alpha or on linux, you
# probably need Reorder set (default).
#
# Use: pp2ss.pl [options] ppfile 
#  or: cat ppfile | pp2txt.pl [options] 
#
# Options: OUTBASE=dir/ - (NB trailing slash!) - output directory.
#            The default for this used to be ".". That wasn't very useful in most cases.
#            The default now is:
#              1) if the path to the files to split looks like:
#                 .../runid/[64|32]/pp_fields/
#                 then the output directory will be
#                 .../runid/[64|32]/[ts|day|0.01|0.03|1]/
#                 if the field looks like it is monthly, seasonal or yearly means
#              2) if not, the default is "."
#              3) of course, OUTBASE=. can still be specified
#          SC=SC1,SC2...,SCn - Stashcodes to output (else all)
#          LBLEV=lev1,lev2...,levn - levels to output (nb this is the lblev parameter, thus applies
#                                    in all cases (eg is 8888 for MSLP)
#          BLEV=blev1,blev2,...blevn - ditto, for blev
#          or
#          SC=SC1+BLEV=blev1%blev2+LBLEV=lblev,SC2...
#          (If this first form is used, *all* fields have to match SC *and* levels (if given);
#           If the second form is used, fields match case-by-case)
#          (example: SC=16202+BLEV=500%300,16203+LBEV=1%2%3)
#          Outmode=">" or ">>" (or indeed any of perls other magics...)
#          T=1 - test mode. Don't write
#
# ...with output to stash-split files
#
# Notes: the handling of the "period" bit isn't as good
# as pp_period. I just do lbyrd-lbyr, etc. This should
# suffice for standard model run output.
#
# Author: wmc 2001/10/11
#         wmc 2002/09/30 - correct behaviour if pc=0 (set period to 0000...)
#                        - set correct number of 000's!
#         wmc various
#         wmc 2004/03/xx - add -w; add some "my"'s to remove warnings; on debug print out first 2 data vals
#         wmc 2004/03/15 - add binmode() around in and out

# Options
$V=0;		# V=1 - print version and stop
$D=0;		# Debug
$T=0;		# Test mode
$SC="";		# Comma separated list of desired stash codes
$LBLEV=undef;	# Ditto for lblev
$BLEV=undef;	# Ditto for blev
$OutMode=">"; 	# New or Append
$oldoutfile=""; # So we know to append to multiple fields-in-file
$Sub=1;		# Include submodel identifier in SS name from lbuser[6]
$Reorder=0;	# Swap endians on input for purposes of interpretation. Set to =1 to swap. Will auto-sense.
$Outorder=1;    # Swap endians on output (independent of Reorder). Note: correct use is
                # probably Reorder=0 and Outorder=1
$FieldNo=0;	# Field number, for debug output
$OUTBASE="";	# Base for output filenames. Default current directory. Don't forget the trailing "/"
$MAXWRITE=99999;# Max # of fields to write out

# Override options, eg "pp2ss.pl SC='1,16222'
eval "\$$1=\$2" while $ARGV[0] =~ /^(\w+)=(.*)/ && shift;

if ($V) { print "Version: 2004/03/01\n"; exit };

if ($OUTBASE ne "") { $OUTBASEr="set-by-user" } else { $OUTBASEr="set-by-default" };
if ($D > 1) { print "OUTBASE mode: $OUTBASEr\n" };

if ($OutMode =~ /Append/i) { $OutMode = ">>" };

# Make an assoc array of stashcodes, if desired
if ($SC) { for $SC (split(/,/,$SC)) { 
# If we have selected it as SC+BLEV=blev or somesuch, then...
  if ($SC=~/\+/) {
    ($sc,@@a)=split(/\+/,$SC);
    for (@@a) {
      ($a1,$a2)=split(/=/); 
      for $v (split(/\%/,$a2)) {
        if ($a1 eq "BLEV") { $v=sprintf("%.3f",$v) };
        ${"$sc"."_"."$a1"}{$v}=1; 
      }
    };  
# Otherwise include it in the normal SC list. Note that if we include SC twice
# in the two ways we can get it into both lists (is this useful!?!)
  } else {
    $SC{$SC}=1 
  }
} };
# Make an assoc array of (l)blev's, if desired
if ($LBLEV) { for $lblev (split(/,/,$LBLEV)) { $LBLEV{$lblev}=1 } };
if ($BLEV) { for $blev (split(/,/,$BLEV)) { $tblev=sprintf("%.3f",$blev); $BLEV{$tblev}=1 } };
 
if ($D > 2) { print "pp2ss.pl: beginning (SC: $SC)\n" };
 
while ($File = shift) { 

if ($D) { print "$File\n" };

open STDIN,$File or die "Failed to open $File";
binmode(STDIN);

# Read in the length of the first header. Expect 256.
while (read(STDIN,$IN,4)) {

# Accumulate what we're going to write out in $INs. Note that
# even if we need to reorder $IN to read it, we accumulate the
# unswapped actual input
  $INs=$IN;

  if ($Reorder) { $IN=byteorder($IN) };
  $rl=unpack("i",$IN);
  if ($rl != 256) { 
    print "Record length is $rl not 256 as I expected. I'll try (un)setting Reorder\n";
    $Reorder=1-$Reorder;
    $IN=byteorder($IN);
    $rl=unpack("i",$IN);
    if ($rl != 256) { die "Record length is *still* not 256, its $rl" }
  } else {
    if ($D > 2) { print "First rl read is 256; good. Not setting reorder\n" }
  };
  if ($D > 2) { print "Read in header record length $rl\n" };

# Read in the integer part of the header (45 integers) and the real part (19 reals)
  read(STDIN,$IN,4*45);
  $INs.=$IN; 
  if ($Reorder) { $IN=byteorder($IN) };
  @@ih=unpack("i45",$IN);
  if ($D > 3) { print "ih: ",join(" .. ",@@ih),"\n" };
  read(STDIN,$IN,4*19);
  $INs.=$IN;
  if ($Reorder) { $IN=byteorder($IN) };
  @@rh=unpack("f19",$IN);
  if ($D > 3) { print "rh: ",join(" .. ",@@rh),"\n" };
  if ($Reorder) { $IN=byteorder($IN) };

# Read in the trailer for the header
  read(STDIN,$IN,4);
  $INs.=$IN;
  if ($Reorder) { $IN=byteorder($IN) };
  $rl=unpack("i",$IN);
  if ($D > 2) { print "Read in header trailing length: $rl\n" };
 
# Set OUTBASE, if it hasn't been specified. This needs to be done file-by-file,
# and possibly even field-by-field, which we shall do, as its easy...
# which means we need to remember if it was set by default or by the user.
# If we can't find owt better, use ".";
# Nb: we can't do this until we've called "ss_filename" to set per.
  $outfile1 = ss_filename();
  if ($OUTBASEr eq "set-by-default") {

    $OUTBASE=".";
    ($File1=$File) =~ s/[^\/]+$//g;
    if ($File1 !~ /^\//) { $File1="$ENV{PWD}/$File1" };

# And does it look like a standard setup?
    if ($File1 =~ /\w{5}\/(32|64)\/pp_fields/) {

# And does "per" make sense?
      undef $per1;
      if ($per eq "000100000000") { $per1="1/" };
      if ($per eq "000003000000") { $per1="0.03/" };
      if ($per eq "000001000000") { $per1="0.01/" };
      if ($per eq "000000010000") { $per1="day/" };
      if ($per eq "000000000000") { $per1="ts/" };

      if (defined $per1) {

        ($OUTBASE=$File1) =~ s/pp_fields(\/)?$/$per1/;

      }

    }

  };
 
# Make the SS filename. This also makes all the codes in the
# name available, eg lbuser[3] as sc
  $outfile = ${OUTBASE} . $outfile1;

# Debug info
  if ($D > 3) {
    print "$outfile\n";
  };

  $FieldNo++;
  if ($D > 2) { 
    print "F",$FieldNo,": $lbyr/$lbmon/$lbdat $lbhr:$lbmin $sc ($lblev, $tblev) ($outfile)\n" 
  };

# Read the length of the data record
  read(STDIN,$IN,4); 
  $INs.=$IN; 
  if ($Reorder) { $IN=byteorder($IN) };
  $rl=unpack("i",$IN);
  if ($D >2) { print "Read in data record length $rl\n" };
  if ($rl < 0) { 
    $IN=byteorder($IN);
    $rl1=unpack("i",$IN);
    print "Oh dear. I read a -ve record length ($rl). Reordering, I get: $rl1\n";
    die "Oh dear oh dear oh dear..."
  };

# Read the data but don't bother unpack it unless we're debuggin
  read(STDIN,$IN,$rl);
# Surely we need to reorder it?
  if ($Reorder) { $IN=byteorder($IN) };
  $INs.=$IN;
  if ($D > 2) {
    @@data=unpack("ff",$IN);
    print "First two data values: $data[0], $data[1]\n"
  };

# Read in the length of the record again, in case its a multi-field file
  read(STDIN,$IN2,4); 
  $INs.=$IN2;
  if ($Reorder) { $IN2=byteorder($IN2) };

# Write it out, if desired
# print "$sc"."_BLEV ($blev / $tblev) [".${"$sc"."_BLEV"}{$tblev}."]\n";
# print "$sc"."_LBLEV ($lblev / $lblev) [".${"$sc"."_LBLEV"}{$lblev}."]\n";

  if (${"$sc"."_BLEV"}{$tblev} or ${"$sc"."_LBLEV"}{$lblev}
      or
      ((!$SC or $SC{$sc}) and (!$LBLEV or $LBLEV{$lblev}) and (!$BLEV or $BLEV{$tblev}))
      and
      ($FieldNo <= $MAXWRITE)) {
    if (!$T) {
      if ($D >1) { print "Writing to: $outfile\n" };
      if ($outfile ne $oldoutfile) { open OUT, "$OutMode $outfile" or warn $!; binmode(OUT) };
      $oldoutfile=$outfile;
      if ($Outorder) { 
        if ($D >3) { print "Swapping endians on output\n" };
        $INs=byteorder($INs) 
      };
      print OUT $INs;
    } else {
      print "Would write to: $outfile\n";
    };
  };
};
 
};

# ---------------------------------------------------------

sub ss_filename {

$lbyr  = $ih[0];
$lbmon = $ih[1];
$lbdat = $ih[2];
$lbhr  = $ih[3];
$lbmin = $ih[4];
$lbtim = $ih[12];

my $lbrow = $ih[17];
my $lbnpt = $ih[18];

$lblev = $ih[32];
$blev = $rh[6];
$tblev=sprintf("%.3f",$blev);
$lbproc= $ih[24];
$pc=sprintf("%6.6d",$lbproc);

$lbtim1=$lbtim/10 % 10;
if ($lbtim1 == 0 or $lbproc == 0) {
#       yyyymmddhhmm
  $per="000000000000"
} else {
  $p1=$lbmin +($lbhr +($lbdat+($lbmon+$lbyr *12)*30)*24)*60;
  $p2=$ih[10]+($ih[9]+($ih[8]+($ih[7]+$ih[6]*12)*30)*24)*60;
  $p=$p2-$p1;

  $dmin=$p % 60; $p-=$dmin; $p/=60;
  $dhr =$p % 24; $p-=$dhr ; $p/=24;
  $ddat=$p % 30; $p-=$ddat; $p/=30;
  $dmon=$p % 12; $p-=$dmon; $p/=12;
  $dyr =$p;
 
  $per=sprintf("%4.4d%2.2d%2.2d%2.2d%2.2d",$dyr,$dmon,$ddat,$dhr,$dmin);
};

$date=sprintf("%4.4d.%2.2d.%2.2d.%2.2d.%2.2d",
              $lbyr,$lbmon,$lbdat,$lbhr,$lbmin
             );

$sc   = $ih[41];
$sc1=sprintf("%2.2d.%3.3d",$sc/1000,$sc % 1000);

if ($Sub != 0) {
  $submodel=$ih[44];
  $sub=sprintf("%2.2d.",$submodel)
};

return "$per.$sub$sc1.$pc.$date.pp";

};

sub byteorder {

  ($In)=@@_;

  for ($i=0; $i<(length($In)-1)/4; $i++) {
    substr($In,$i*4,4)=reverse(substr($In,$i*4,4));
  };

  return $In;

};
@


1.2
log
@Upgraded version. Should handle different endian-ness more cleanly
@
text
@d1 1
a1 1
#!/usr/bin/perl 
d48 3
d56 1
a56 1
$SC=undef;	# Comma separated list of desired stash codes
d108 1
d211 1
a211 1
# Read the data but don't bother unpack it
d216 4
d237 1
a237 1
      if ($outfile ne $oldoutfile) { open OUT, "$OutMode $outfile" or warn $! };
d263 2
a264 2
$lbrow = $ih[17];
$lbnpt = $ih[18];
@


1.1
log
@Initial revision
@
text
@d17 10
a26 1
# Options: OUTBASE=dir/ - (NB trailing slash!) - output directory, else use .
d46 2
d50 1
d64 1
d69 5
d97 3
a99 1
  
d109 3
d122 2
d129 1
a129 1
  $INs.=$IN;
d132 1
a133 1
  @@rh=unpack("f45",$IN);
d136 3
d143 11
d155 39
d196 1
a196 1
  $INs.=$IN;
d200 6
d209 2
d214 3
a216 2
  read(STDIN,$IN,4); 
  $INs.=$IN;
a217 9
# Make the SS filename. This also makes all the codes in the
# name available, eg lbuser[3] as sc
  $outfile = ${OUTBASE} . ss_filename();

# Debug info
  if ($D > 1) { 
    print "F",$FieldNo++,": $lbyr/$lbmon/$lbdat $lbhr:$lbmin $sc ($lblev, $tblev)\n" 
  };

d224 3
a226 1
      ((!$SC or $SC{$sc}) and (!$LBLEV or $LBLEV{$lblev}) and (!$BLEV or $BLEV{$tblev}))) {
d232 1
a232 1
        if ($D >2) { print "Swapping endians on output\n" };
d235 1
a235 1
      print OUT $INs
d255 3
d261 2
d265 3
a267 2
if ($lbtim1 == 0) {
  $per="0000000000"
a285 3
$lbproc= $ih[24];
$pc=sprintf("%6.6d",$lbproc);

@