#! /usr/bin/perl

# Simulate the output of the 'nm' utility in Unix
# by using DUMPBIN and munging the output.  This is
# required for Win64 object files, because no Cygwin
# version of nm can read them.

# Requires Perl 5 since it uses references.

# This is a fragile program in that it depends on the columnar
# layout of DUMPBIN's output.  If the format of the SECTION HEADER
# or COFF SYMBOL TABLE output changes, fiddle with the regular
# expressions in the SECTION HEADER portion, or the "unpack" 
# specification in the COFF SYMBOL TABLE portion.

our (@filenames, $arg, $fn, $line, $secnum, $secname, $secflags,
     $full_secnum, %sec_table,
     @lines, $ordinal, $address, $section, $type, $linkage, $symbol,
     $nmtype, $outline);

# Constants from WINNT.H which define the meanings of bits in the
# flags (characteristics) of COFF sections.
 our $IMAGE_SCN_CNT_CODE                 = 0x00000020;
#our $IMAGE_SCN_CNT_INITIALIZED_DATA     = 0x00000040;
 our $IMAGE_SCN_CNT_UNINITIALIZED_DATA   = 0x00000080;
#our $IMAGE_SCN_LNK_OTHER                = 0x00000100;
 our $IMAGE_SCN_LNK_INFO                 = 0x00000200;
 our $IMAGE_SCN_LNK_REMOVE               = 0x00000800;
#our $IMAGE_SCN_LNK_COMDAT               = 0x00001000;
#our $IMAGE_SCN_NO_DEFER_SPEC_EXC        = 0x00004000;
#our $IMAGE_SCN_GPREL                    = 0x00008000;
 our $IMAGE_SCN_MEM_FARDATA              = 0x00008000;
#our $IMAGE_SCN_MEM_PURGEABLE            = 0x00020000;
#our $IMAGE_SCN_MEM_16BIT                = 0x00020000;
#our $IMAGE_SCN_MEM_LOCKED               = 0x00040000;
#our $IMAGE_SCN_MEM_PRELOAD              = 0x00080000;
#our $IMAGE_SCN_ALIGN_1BYTES             = 0x00100000;
#our $IMAGE_SCN_ALIGN_2BYTES             = 0x00200000;
#our $IMAGE_SCN_ALIGN_4BYTES             = 0x00300000;
#our $IMAGE_SCN_ALIGN_8BYTES             = 0x00400000;
#our $IMAGE_SCN_ALIGN_16BYTES            = 0x00500000;
#our $IMAGE_SCN_ALIGN_32BYTES            = 0x00600000;
#our $IMAGE_SCN_ALIGN_64BYTES            = 0x00700000;
#our $IMAGE_SCN_ALIGN_128BYTES           = 0x00800000;
#our $IMAGE_SCN_ALIGN_256BYTES           = 0x00900000;
#our $IMAGE_SCN_ALIGN_512BYTES           = 0x00A00000;
#our $IMAGE_SCN_ALIGN_1024BYTES          = 0x00B00000;
#our $IMAGE_SCN_ALIGN_2048BYTES          = 0x00C00000;
#our $IMAGE_SCN_ALIGN_4096BYTES          = 0x00D00000;
#our $IMAGE_SCN_ALIGN_8192BYTES          = 0x00E00000;
#our $IMAGE_SCN_ALIGN_MASK               = 0x00F00000;
#our $IMAGE_SCN_LNK_NRELOC_OVFL          = 0x01000000;
 our $IMAGE_SCN_MEM_DISCARDABLE          = 0x02000000;
#our $IMAGE_SCN_MEM_NOT_CACHED           = 0x04000000;
#our $IMAGE_SCN_MEM_NOT_PAGED            = 0x08000000;
#our $IMAGE_SCN_MEM_SHARED               = 0x10000000;
#our $IMAGE_SCN_MEM_EXECUTE              = 0x20000000;
 our $IMAGE_SCN_MEM_READ                 = 0x40000000;
 our $IMAGE_SCN_MEM_WRITE                = 0x80000000;

@filenames = ();

# Default values for options:
our $debug_syms = 0;
our $print_file_name = 0;
our $format = 'bsd';
our $defined_only = 0;
our $extern_only = 0;
our $numeric_sort = 0;
our $no_sort = 0;
our $reverse_sort = 0;
our $print_size = 0;
our $print_armap = 0;
our $radix = 'x';
our $undefined_only = 0;

# Spin through the arguments, noting options and filenames.
foreach $arg (@ARGV) {
   if ($arg =~ /^-/) {
      # ** OPTIONS **
      if    ($arg eq '-a' || $arg eq '--debug-syms')    {$debug_syms = 1;}
      elsif ($arg eq '-A')                              {$print_file_name = 1;}
      elsif ($arg eq '-B')                              {$format = 'bsd';}
      elsif ($arg =~ /^-C(.*)/ || $arg =~ /^--demangle(.*)/)
            { die "$0: -C demangling not yet supported.\n";}
      elsif ($arg eq '--no-demangle')
            { die "$0: --no-demangle not yet supported.\n";}
      elsif ($arg eq '--defined-only')                  {$defined_only = 1;}
      elsif ($arg eq '-e')                              { }   # Ignored
      elsif ($arg =~ /-f(.*)/ || $arg =~ /--format(.*)/) {
        $equals_format = $1;
        if ($equals_format !~ /=(.+)/)
              { die "$0: -f or --format option requires an argument."; }
        else {
              $format = $1;
              if ($format ne 'bsd' && $format ne 'sysv' && $format ne 'posix')
                 { die "$0: $equals_format: invalid output format"; };
        }
      }
      elsif ($arg eq '-g' || $arg eq '--extern-only')   {$extern_only = 1;}
      elsif ($arg eq '-n' || $arg eq '--numeric-sort')  {$numeric_sort = 1;}
      elsif ($arg eq '-o')                              {$print_file_name = 1;}
      elsif ($arg eq '-p' || $arg eq '--no-sort')       {$no_sort = 1;}
      elsif ($arg eq '-P' || $arg eq '--portability')   {$format = 'posix';}
      elsif ($arg eq '-r' || $arg eq '--reverse-sort')  {$reverse_sort = 1;}
      elsif ($arg eq '-S' || $arg eq '--print-size')    {$print_size = 1;}
      elsif ($arg eq '-s' || $arg eq '--print-armap')   {$print_armap = 1;}
      elsif ($arg =~ /-t(.*)/ || $arg =~ /--radix(.*)/) {
        $equals_radix = $1;
        if ($equals_radix !~ /=(.+)/) {
              die "$0: -t or --radix option requires an argument.";
        } else {
               $radix = $1;
               if ($radix ne 'x' && $radix ne 'd' && $radix ne 'o')
                  { die "$0: $radix: invalid radix (choose 'x' 'd' or 'o')."; }
        }
      }
      elsif ($arg =~ /--target(.*)/)                     { }    # Ignored
      elsif ($arg eq '-u' || $arg eq '--undefined-only') {$undefined_only = 1;}
      elsif ($arg eq '-X')                               {shift @ARGV;}
      elsif ($arg eq '-h' || $arg eq '--help')           {help(); exit;}
      elsif ($arg eq '-V' || $arg eq '--version')        {version(); exit;}
      else          { die "Unrecognized option: $arg\n"; };
   } else {
      # ** FILENAMES **
      push @filenames, ($arg);
   }
}

# nm uses a.out as the default filename if none other is available.
if ((scalar @filenames) == 0) { @filenames = ( "a.out" ); };

# The main loop: for each file, 'nm' it.
foreach $fn (@filenames) {

   if ( ! -e $fn ) { die "$0: $fn: No such file or directory."; };

   # Have DUMPBIN print out the section headers.
   open(HEADERS, "dumpbin /headers $fn |")
       or die "Couldn't perform DUMPBIN /HEADERS $fn: $!\n";

   # Read the SECTION HEADRES
   while(defined($line = <HEADERS>)) {
      chomp($line);
      # Each section header begins with SECTION HEADER and ends with
      # an all-whitespace line.
      if ($line =~ /^SECTION HEADER/ .. $line =~ /^\s*$/) {
         if ($line =~ /^SECTION HEADER #([0-9A-F]+)\s*/) {
            $secnum = $1;        # Get the section number
         } elsif ($line =~ /^\s*([^\s]+)\s+name/) {
            $secname = $1;       # Get the secton name
         } elsif ($line =~ /^\s*([^\s]+)\s+flags/) {
            $secflags = $1;      # Get the section flags
         } elsif ($line =~ /^\s*$/) {    # Finished a section block.
            $full_secnum = "SECT$secnum";
            # Construct a hash of hash references (records),
            # storing section information.
            $sec_table{ $full_secnum } =
               { SECTION => "$full_secnum",
                 NAME => "$secname",
                 FLAGS => hex($secflags),
               };
         }
      }
   }

   close(HEADERS);

   # Now have DUMPBIN print out the COFF symbol table.
   open(SYMBOLS, "dumpbin /symbols $fn |")
       or die "Couldn't perform DUMPBIN /SYMBOLS $fn: $!\n";

   @lines = ();

   # Read the COFF symbol table
   while (defined($line = <SYMBOLS>)) {
      # print "** $line";
      chomp($line);
      next if (1 .. $line =~ /^COFF SYMBOL TABLE/);  # Begin at symbol table
      last if $line =~ /^\s*$/;   # Terminate at first all-white line
      next if $line =~ /^\s/;  # Skip lines that don't start in column 1
   
      # Get the ordinal number of the symbol, the symbol's relative virtual
      # address, the section name (SECT#, or non-section info such as UNDEF),
      # the type (for C, either "notype" or "notype ()"), the linkage (External,
      # Static, or other), and the symbol name itself.
      ($ordinal, $address, $section, $type, $linkage, $symbol) =
         unpack("A4 A9 A7 A13 A13 x2 A*", $line);
 
      # Normally we exclude everything but true symbols: Statics,
      # Externs, and Labels.  But if --debug-syms is on, we include
      # even the pseudo-symbols, remembering that they are not "true." 
      $true_symbol = 1; 
      if ($linkage ne "Static" && $linkage ne "External" &&
                $linkage ne "Label") {
          next if (!$debug_syms);
          $true_symbol = 0;
      }
  
      # nm prints three things: a symbol's name, its address, and
      # one-letter code indicating something about the type of
      # symbol.  Here we determine the latter.

      if    ( !$true_symbol )     { $nmtype = '?' }
 
      elsif ( $section eq "ABS" ) { $nmtype = 'a' }
      elsif ( $section eq "UNDEF" ) {
         # Some UNDEF symbols come out as C, others as U.
         # Guess: the C symbols have nonzero addresses.
         if ( $address eq "00000000" ) {
            $nmtype = 'u'; $address = ' ' x 8;  # U's have no address printed.
         }
         else { $nmtype = 'c' };
      } else {
          # At this point we need the section name and section characteristics
          # from analyzing the section headers.
          $secname = $sec_table{$section}->{NAME};
          $secflags = $sec_table{$section}->{FLAGS};

          # Many of the symbol-type letters are determined (in GNU nm)
          # directly from the symbol's section header name.  So we take
          # the same shortcuts here.
          $_ = $secname;                   # Set up for short pattern matches
          $nmtype = (/^\*DEBUG\*/) ? 'N' :
                    (/^\.bss/)     ? 'b' :
                    (/^zerovars/)  ? 'b' :
                    (/^\.data/)    ? 'd' :
                    (/^vars/)      ? 'd' :
                    (/^\.rdata/)   ? 'r' :
                    (/^\.rodata/)  ? 'r' :
                    (/^\.sbss/)    ? 's' :
                    (/^\.scommon/) ? 'c' :
                    (/^\.sdata/)   ? 'g' :
                    (/^\.text/)    ? 't' :
                    (/^code/)      ? 't' :
                    (/^\.drectve/) ? 'i' :
                    (/^\.edata/)   ? 'e' :
                    (/^\.pdata/)   ? 'p' :
                    (/^\.debug/)   ? 'N' :
                                     '?';

          # If none of these matched, use the $secflags information
          # looking for appropriate bits.  Most of these are *guesses*
          # based on examining the nm output.
          if ($nmtype eq '?') {
              $nmtype =
                        # INFO and REMOVE sections are marked i.
                        $secflags & $IMAGE_SCN_LNK_INFO        ? 'i' :
                        $secflags & $IMAGE_SCN_LNK_REMOVE      ? 'i' :
                        # DISCARDABLE sections are marked N.
                        $secflags & $IMAGE_SCN_MEM_DISCARDABLE ? 'N' :
                        # CODE sections are marked t.
                        $secflags & $IMAGE_SCN_CNT_CODE        ? 't' :
                        # UNINITIALIZED READ/WRITE sections are marked
                        # s if FARDATA and b if not.
                        $secflags & $IMAGE_SCN_CNT_UNINITIALIZED_DATA &&
                         $secflags & $IMAGE_SCN_MEM_READ &&
                         $secflags & $IMAGE_SCN_MEM_WRITE      ?
                            ($secflags & $IMAGE_SCN_MEM_FARDATA  ? 's' : 'b') :
                        # FARDATA READ sections are marked g,
                        # ordinary READ/WRITE sections are marked d,
                        # and ordinary READ-only sections are marked r.
                        $secflags & $IMAGE_SCN_MEM_READ        ?
                            ($secflags & $IMAGE_SCN_MEM_FARDATA  ? 'g' :
                             $secflags & $IMAGE_SCN_MEM_WRITE    ? 'd' : 'r') :
                                                                 '?'; # Default
          }
                   
      }
  
      # Uppercase letters are used for External symbols,
      # lowercase letters for Static symbols. 
      if ( $linkage eq "External" ) { $nmtype = uc $nmtype; };

      # Convert the address from a hex string into the appropriate radix
      # (which may, of course, be hex again).
      if ($address =~ /^[0-9A-Fa-f]+$/) {    # A valid hexadecimal address?
         $wordwidth = length($address);  # number of digits in this address
         $address = sprintf("%0${wordwidth}l${radix}", hex($address));
      }

      # Throw symbols away based on various exclusion options.
      next if ($undefined_only && $address !~ /^\s*$/);  # non-white address
      next if ($defined_only   && $address =~ /^\s*$/);  # all-white address
      next if ($extern_only    && $linkage ne "External");  # non-Externs
   
      push @lines, ({ADDRESS => $address,
                     NMTYPE  => $nmtype,
                     SYMBOL  => $symbol});
   }

   close(SYMBOLS);

   # Sort by symbol name.
   sub bysymbol { $a->{SYMBOL}  cmp $b->{SYMBOL};  }
   sub byaddress{ $a->{ADDRESS} cmp $b->{ADDRESS}; }
   if (! $no_sort) {
      @lines = $numeric_sort ? sort byaddress @lines :
                               sort bysymbol  @lines ;
      if ($reverse_sort) { @lines = reverse @lines; };
   }

   # Print a header if there's more than one filename,
   # or if this is SysV emulation.
   if ($format eq 'sysv') {
      if ($undefined_only) {
         print "Undefined symbols ";
      } else {
         print "Symbols ";
      }
      print "from $fn:\n\n";
      print "Name                  Value   Class        Type         Size     Line  Section\n\n";
   } elsif ((scalar @filenames) > 1)
      { print "$fn:\n"; };
   
   # Print the lines.
   $prefix = $print_file_name ? "$fn:" : "";
   foreach $outline (@lines) {
      if ($undefined_only) {
         # Omit everything but the symbol in --undefined-only mode.
         print "$prefix$outline->{SYMBOL}\n";
      } else {
         print "$prefix";
         if ($format eq 'bsd') {
           print "$outline->{ADDRESS} $outline->{NMTYPE} $outline->{SYMBOL}\n";
         } elsif ($format eq 'posix') {
           print "$outline->{SYMBOL} $outline->{NMTYPE} $outline->{ADDRESS}\n";
         } elsif ($format eq 'sysv') {
           printf "%-20s|%8s|   %1.1s  |                  |        |     |\n",
                $outline->{SYMBOL}, $outline->{ADDRESS}, $outline->{NMTYPE};
         }
      }
   }

   # Leave two blank lines between files in SysV format.
   if ($format eq 'sysv') { print "\n\n"; };
}


sub help {
print <<"HELP";
Usage: $0 [option(s)] [file(s)]
 List symbols in [file(s)] (a.out by default).
 The options are:
  -a, --debug-syms       Display debugger-only symbols
  -A, --print-file-name  Print name of the input file before every symbol
  -B                     Same as --format=bsd
      --defined-only     Display only defined symbols
  -e                     (ignored)
  -f, --format=FORMAT    Use the output format FORMAT.  FORMAT can be `bsd',
                           `sysv' or `posix'.  The default is `bsd'
  -g, --extern-only      Display only external symbols
  -n, --numeric-sort     Sort symbols numerically by address
  -o                     Same as -A
  -p, --no-sort          Do not sort the symbols
  -P, --portability      Same as --format=posix
  -r, --reverse-sort     Reverse the sense of the sort
  -S, --print-size       Print size of defined symbols (*)
  -s, --print-armap      Include index for symbols from archive members (*)
      --size-sort        Sort symbols by size (*)
  -t, --radix=RADIX      Use RADIX for printing symbol values
      --target=BFDNAME   (ignored: only PE/COFF and PE+/COFF are supported)
  -u, --undefined-only   Display only undefined symbols
  -X 32_64               (ignored)
  -h, --help             Display this information
  -V, --version          Display this program's version number
(*) = doesn't work yet.
HELP
}

sub version {
print "$0, a wrapper around DUMPBIN.\n";
}









# Copyright 2010 Wolfram Research, Inc.
#
# This file is part of the build scripts for building the GNU MP Library
# for Mathematica on Windows (32-bit).
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
# License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see http://www.gnu.org/licenses/.
