Difference between revisions of "Iso-anonymizer.pl"
From Cactus Howto
Jump to navigationJump to search(3 intermediate revisions by the same user not shown) | |||
Line 4: | Line 4: | ||
# iso-anonymizer.pl |
# iso-anonymizer.pl |
||
# run like this: |
# run like this: |
||
# ./iso-anonymizer.pl - |
# ./iso-anonymizer.pl [-txt-subst-file=/var/tmp/strings.txt] [-net="192.168.0.0/16"] <config-file1 config-file2 ...> |
||
# ------------------------------------------------------------------------------------------- |
# ------------------------------------------------------------------------------------------- |
||
require 5.006_000; # Needed for NetAddr::IP and file handler |
require 5.006_000; # Needed for NetAddr::IP and file handler |
||
Line 19: | Line 19: | ||
my $infile; |
my $infile; |
||
my $txt_subst_file; |
my $txt_subst_file; |
||
my $net; |
my $net="10.0.0.0/8"; |
||
my $outfile; |
my $outfile; |
||
my %anonymized_ip; |
my %anonymized_ip; |
||
my %anonymized_text; |
my %anonymized_text; |
||
my $ano_txt = "IsoAAAA"; # starting pattern - needs to be alpha chars only for incrementing to work |
my $ano_txt = "IsoAAAA"; # starting pattern - needs to be alpha chars only for incrementing to work |
||
my $ano_suffix = '.anonymized'; |
my $ano_suffix = '.iso-anonymized'; |
||
sub create_string_subst_hash { |
|||
my $txt_subst_file_local = shift; |
|||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
return; |
|||
⚫ | |||
sub _in_range { return 0 <= $_[0] && $_[0] <= 255; } |
sub _in_range { return 0 <= $_[0] && $_[0] <= 255; } |
||
Line 45: | Line 59: | ||
}eg; |
}eg; |
||
return $addrs_found; |
return $addrs_found; |
||
} |
|||
sub show_help { |
|||
print ("---------------------------------------------------------------\n"); |
|||
print ("iso-anonyimzer (c) 2016 by Cactus eSecurity (https://cactus.de)\n"); |
|||
print ("---------------------------------------------------------------\n"); |
|||
print ("iso-anonyimzer can be used to substitute any occurence of ip addresses in a set of text files consistently.\n"); |
|||
print ("Might be helpful for anonymizing configuration files of routers, firewalls, etc. before handing them to third parties\n"); |
|||
print ("Consistently means that one ip is always substituted by the same destination ip address.\n"); |
|||
print ("All subnets, where identified as such, are replaced by /32 subnets. Does currently only handle IPv4 addresses.\n"); |
|||
print ("Additionally strings (e.g. customer names, etc.) can be (also consistently) replaced with generated anonymous strings starting with $ano_txt.\n"); |
|||
print ("Make sure that the string patterns do not contain any text that needs to stay unchanged in the output file.\n"); |
|||
print ("Note that anonymizing is performed consistently across all files. So if you need this multiple file consistency, \n"); |
|||
print ("make sure to anonymize all relevant files in a single run.\n"); |
|||
print ("\nSyntax:\n"); |
|||
print ("iso-anonymizer -help -txt-subst-file=<subst-filename> -net=<ip-subnet> <infile1> <infile2> ... <infilen>\n"); |
|||
print ("-help : displays this text (also when called without parameters)\n"); |
|||
print ("-txt-subst-file=<subst-filename> : optional, if parameter is set, substitutes all strings listed in <subst-filename> (one string per line)\n"); |
|||
print ("-net=<ip-subnet> : optional, defaults to '10.0.0.0/8' - ip subnet that is used for ip address substitution\n"); |
|||
print ("<infile1> <infile2> ... <infilen> : list of files to anonymize\n\n"); |
|||
print ("Example:\n"); |
|||
print ("iso-anonymizer -txt-subst-file=subst-strings.txt -net=192.168.88.0/24 file1.cfg file2.cfg file3.cfg\n\n"); |
|||
} |
} |
||
Line 78: | Line 114: | ||
} |
} |
||
}); |
}); |
||
# obfuscating text |
if (defined($txt_subst_file) && $txt_subst_file ne '') { # obfuscating text |
||
my $regex_all_texts = join("|", map {quotemeta} keys %anonymized_text); |
my $regex_all_texts = join("|", map {quotemeta} keys %anonymized_text); |
||
$line =~ s/($regex_all_texts)/$anonymized_text{$1}/go; |
$line =~ s/($regex_all_texts)/$anonymized_text{$1}/go; |
||
} |
|||
print $ofh $line; |
print $ofh $line; |
||
} |
} |
||
close ($ifh); close ($ofh); return; |
close ($ifh); close ($ofh); return; |
||
} |
} |
||
########################### |
|||
# main start |
|||
########################### |
|||
my $start_time = time(); |
my $start_time = time(); |
||
my $query = CGI->new; |
my $query = CGI->new; |
||
⚫ | |||
⚫ | |||
⚫ | |||
if ((defined($ARGV[0]) && $ARGV[0] eq "-help") || scalar($query->param)==0) { &show_help(); exit 0; } |
|||
⚫ | |||
⚫ | |||
⚫ | |||
else { $txt_subst_file = ''; print ("no -txt-subst-file specified, not doing any string anonymizing\n"); } |
|||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
# treating all params not starting with - as files to anonymize |
# treating all params not starting with - as files to anonymize |
||
# do not re-anonymize files with .anonymized extension and do not anonymize binary files |
|||
foreach my $file ( |
foreach my $file (@ARGV) { |
||
if ($file !~ /^-/ && $file !~ /.*?$ano_suffix$/) { |
if ($file !~ /^-/ && $file !~ /.*?$ano_suffix$/ && -T $file) { |
||
$total_filesize += -s $file; |
$total_filesize += -s $file; |
||
print ("anonymizing: $file ... "); |
print ("anonymizing: $file ... "); |
||
&anonymize($file, $net, $file . $ano_suffix); |
&anonymize($file, $net, $file . $ano_suffix); |
||
print ("result file = $file |
print ("result file = $file$ano_suffix\n"); |
||
} else { print ("ignoring $file\n")} |
|||
⚫ | |||
} |
} |
||
Line 117: | Line 151: | ||
my @kt=keys(%anonymized_text); |
my @kt=keys(%anonymized_text); |
||
my $duration = time() - $start_time; |
my $duration = time() - $start_time; |
||
print("Anonymized " . $#ki . " ip addresses and " . $#kt . " strings in " . sprintf("%.1f",$duration) . " seconds"); |
print("Anonymized " . ($#ki+1) . " ip addresses and " . ($#kt+1) . " strings in " . sprintf("%.1f",$duration) . " seconds"); |
||
printf(" (total %.2f MB, %.2f Mbytes/second).\n", $total_filesize/1000000, $total_filesize/$duration/1000000); |
printf(" (total %.2f MB, %.2f Mbytes/second).\n", $total_filesize/1000000, $total_filesize/$duration/1000000); |
||
my $anonet = NetAddr::IP->new($net); |
|||
if ($anonet->num()<($#ki+1)) { |
|||
print("WARNING: generated " . ($#ki+1) . " anonymized ip addresses (more than available in " . $anonet . |
|||
" which can only hold " . $anonet->num() . " IP addresses).\n"); |
|||
print (" Suggest to use bigger subnet if you need uniqueness of IP addresses.\n"); |
|||
} |
|||
</pre> |
|||
== Documentation == |
|||
<pre> |
|||
=head1 NAME |
=head1 NAME |
||
Line 128: | Line 165: | ||
=head1 SYNOPSIS |
=head1 SYNOPSIS |
||
./iso-anonymizer.pl - |
./iso-anonymizer.pl [-txt-subst-file=/var/tmp/strings.txt] [-net="192.168.0.0/16"] <config-file1 config-file2 ...> |
||
=head1 DESCRIPTION |
=head1 DESCRIPTION |
||
This is a |
This is a script for |
||
a) replacing IP addresses in plain text with anonymized equivalents from |
a) replacing IP addresses in plain text with anonymized equivalents from |
||
the network range supplied. |
the network range supplied. |
||
Line 171: | Line 208: | ||
=head1 EXAMPLES |
=head1 EXAMPLES |
||
./iso-anonymizer.pl -net= |
./iso-anonymizer.pl -net=172.20.0.0/21 -txt-subst-file=/var/tmp/strings.txt /var/tmp/firewall17.cfg /var/tmp/router9.cfg |
||
tim@lacantha:$ sudo perl iso-anonymizer.pl - |
tim@lacantha:$ sudo perl iso-anonymizer.pl -txt-subst-file=strings.txt /var/tmp/netscreen1.cfg |
||
no net specified, using default net 10.0.0.0/8 |
no net specified, using default net 10.0.0.0/8 |
||
anonymizing: /var/tmp/netscreen1.cfg ... result file = /var/tmp/netscreen1.cfg.anonymized |
anonymizing: /var/tmp/netscreen1.cfg ... result file = /var/tmp/netscreen1.cfg.anonymized |
||
Line 180: | Line 217: | ||
Anonymizing a whole (ASCII) Postgresql database: |
Anonymizing a whole (ASCII) Postgresql database: |
||
# creating |
# creating an ASCII dump of the database: |
||
pg_dump -U dbadmin -d isodb -W >/var/tmp/iso_db.dump.sql |
|||
pg_dump |
|||
# or as postgres user: pg_dump -d isodb >/var/tmp/iso_db.dump.sql |
|||
# turn binary .Fc dump into ascii (only necessary if you do not already have an ascii dump): |
# turn binary .Fc dump into ascii (only necessary if you do not already have an ascii dump): pg_restore /var/tmp/iso_db.dump.Fc >/var/tmp/iso_db.dump.sql |
||
pg_restore >dbdump.sql |
|||
# anonymizing: |
|||
iso-anonymizer.pl -txt-subst-file=/var/tmp/strings.txt /var/tmp/iso_db.dump.sql |
|||
⚫ | |||
# restoring anonymized database: |
|||
⚫ | |||
=head1 TODO |
=head1 TODO |
||
⚫ | |||
- define test cases |
|||
⚫ | |||
⚫ | |||
⚫ | |||
- optimize speed |
|||
=head1 AUTHOR |
=head1 AUTHOR |
Latest revision as of 19:43, 11 June 2016
#! /usr/bin/perl -w # ------------------------------------------------------------------------------------------- # iso-anonymizer.pl # run like this: # ./iso-anonymizer.pl [-txt-subst-file=/var/tmp/strings.txt] [-net="192.168.0.0/16"] <config-file1 config-file2 ...> # ------------------------------------------------------------------------------------------- require 5.006_000; # Needed for NetAddr::IP and file handler require Exporter; use strict; use warnings; use CGI qw(:standard); use NetAddr::IP; use Carp; use Time::HiRes qw(time tv_interval); # for exact recording of script execution time my ($cfg_file, $line); our @ISA = qw(Exporter); my $infile; my $txt_subst_file; my $net="10.0.0.0/8"; my $outfile; my %anonymized_ip; my %anonymized_text; my $ano_txt = "IsoAAAA"; # starting pattern - needs to be alpha chars only for incrementing to work my $ano_suffix = '.iso-anonymized'; sub create_string_subst_hash { my $txt_subst_file_local = shift; open( my $txt_file, $txt_subst_file_local ) or croak "Unable to open $txt_subst_file_local: $!\n"; while (my $line = <$txt_file>) { chomp ($line); $anonymized_text{$line} = $ano_txt; # adding separator chars (_-) contained in pattern again: if ($line =~ /.*?([\_\-])$/) { $anonymized_text{$line} .= $1; } if ($line =~ /^([\_\-]).*?/) { $anonymized_text{$line} = $1 . $anonymized_text{$line}; } ++$ano_txt; } close ($txt_file); return; } sub _in_range { return 0 <= $_[0] && $_[0] <= 255; } sub find_ipaddrs (\$&) { my($r_text, $callback) = @_; my $addrs_found = 0; my $regex = qr<(\d+)\.(\d+)\.(\d+)\.(\d+)(\/\d\d?)?>; $$r_text =~ s{$regex}{ my $orig_match = join '.', $1, $2, $3, $4; if (defined($5) && $5 ne '') { $orig_match .= '/32'; } if ((my $num_matches = grep { _in_range($_) } $1, $2, $3, $4) == 4) { $addrs_found++; my $ipaddr = NetAddr::IP->new($orig_match); $callback->($ipaddr, $orig_match); } else { $orig_match; } }eg; return $addrs_found; } sub show_help { print ("---------------------------------------------------------------\n"); print ("iso-anonyimzer (c) 2016 by Cactus eSecurity (https://cactus.de)\n"); print ("---------------------------------------------------------------\n"); print ("iso-anonyimzer can be used to substitute any occurence of ip addresses in a set of text files consistently.\n"); print ("Might be helpful for anonymizing configuration files of routers, firewalls, etc. before handing them to third parties\n"); print ("Consistently means that one ip is always substituted by the same destination ip address.\n"); print ("All subnets, where identified as such, are replaced by /32 subnets. Does currently only handle IPv4 addresses.\n"); print ("Additionally strings (e.g. customer names, etc.) can be (also consistently) replaced with generated anonymous strings starting with $ano_txt.\n"); print ("Make sure that the string patterns do not contain any text that needs to stay unchanged in the output file.\n"); print ("Note that anonymizing is performed consistently across all files. So if you need this multiple file consistency, \n"); print ("make sure to anonymize all relevant files in a single run.\n"); print ("\nSyntax:\n"); print ("iso-anonymizer -help -txt-subst-file=<subst-filename> -net=<ip-subnet> <infile1> <infile2> ... <infilen>\n"); print ("-help : displays this text (also when called without parameters)\n"); print ("-txt-subst-file=<subst-filename> : optional, if parameter is set, substitutes all strings listed in <subst-filename> (one string per line)\n"); print ("-net=<ip-subnet> : optional, defaults to '10.0.0.0/8' - ip subnet that is used for ip address substitution\n"); print ("<infile1> <infile2> ... <infilen> : list of files to anonymize\n\n"); print ("Example:\n"); print ("iso-anonymizer -txt-subst-file=subst-strings.txt -net=192.168.88.0/24 file1.cfg file2.cfg file3.cfg\n\n"); } sub anonymize { my $infile = shift; my $net = shift; my $outfile = shift; my $ip = NetAddr::IP->new("$net"); open( my $ifh, $infile ) or croak "Unable to open $infile: $!\n"; open( my $ofh, ">$outfile" ) or croak "Unable to open $outfile: $!\n" ; while (my $line = <$ifh>) { find_ipaddrs($line, sub { my($ipaddr, $orig) = @_; if ($orig =~ /^2[45][0258]\./) { # found netmask (assuming IPs starting with 24x.* and 25x.* are netmasks) return $anonymized_ip{$orig} if exists $anonymized_ip{$orig}; $anonymized_ip{$orig} = "255.255.255.255"; # changing all netmask to /32 to avoid invalid cidrs return $anonymized_ip{$orig}; } elsif ($orig eq '0.0.0.0') { # leave /0 netmask alone return $ipaddr->addr; } else { my $netmask = ''; if ($orig =~ /(.+?)\/32$/) { $orig = $1; $netmask = '/32'; } return $anonymized_ip{$orig} . $netmask if exists $anonymized_ip{$orig}; # if found ip has not yet an anonymous equivalent in hash - create new ip ++$ip; $anonymized_ip{$orig} = $ip->addr; return $anonymized_ip{$orig} . $netmask; } }); if (defined($txt_subst_file) && $txt_subst_file ne '') { # obfuscating text my $regex_all_texts = join("|", map {quotemeta} keys %anonymized_text); $line =~ s/($regex_all_texts)/$anonymized_text{$1}/go; } print $ofh $line; } close ($ifh); close ($ofh); return; } ########################### # main start ########################### my $start_time = time(); my $query = CGI->new; my $total_filesize = 0; if ((defined($ARGV[0]) && $ARGV[0] eq "-help") || scalar($query->param)==0) { &show_help(); exit 0; } if (defined(param("-txt-subst-file"))) { $txt_subst_file = param("-txt-subst-file"); &create_string_subst_hash($txt_subst_file); } else { $txt_subst_file = ''; print ("no -txt-subst-file specified, not doing any string anonymizing\n"); } if (defined(param("-net"))) { $net = param("-net"); } else { print ("no -net parameter specified, using default net $net\n"); } # treating all params not starting with - as files to anonymize # do not re-anonymize files with .anonymized extension and do not anonymize binary files foreach my $file (@ARGV) { if ($file !~ /^-/ && $file !~ /.*?$ano_suffix$/ && -T $file) { $total_filesize += -s $file; print ("anonymizing: $file ... "); &anonymize($file, $net, $file . $ano_suffix); print ("result file = $file$ano_suffix\n"); } else { print ("ignoring $file\n")} } # Generating statistics my @ki=keys(%anonymized_ip); my @kt=keys(%anonymized_text); my $duration = time() - $start_time; print("Anonymized " . ($#ki+1) . " ip addresses and " . ($#kt+1) . " strings in " . sprintf("%.1f",$duration) . " seconds"); printf(" (total %.2f MB, %.2f Mbytes/second).\n", $total_filesize/1000000, $total_filesize/$duration/1000000); my $anonet = NetAddr::IP->new($net); if ($anonet->num()<($#ki+1)) { print("WARNING: generated " . ($#ki+1) . " anonymized ip addresses (more than available in " . $anonet . " which can only hold " . $anonet->num() . " IP addresses).\n"); print (" Suggest to use bigger subnet if you need uniqueness of IP addresses.\n"); } =head1 NAME iso-anonymizer.pl - replace IP addresses with anonymized IPs as well as text with anonymized text in plain text files =head1 SYNOPSIS ./iso-anonymizer.pl [-txt-subst-file=/var/tmp/strings.txt] [-net="192.168.0.0/16"] <config-file1 config-file2 ...> =head1 DESCRIPTION This is a script for a) replacing IP addresses in plain text with anonymized equivalents from the network range supplied. b) replacing strings in a file with anonymized strings Input is a number of ASCII files (all parameters not starting with -) IP addresses as well as strings are replaced one-for-one throughout all text files, so once an IP address has an anonymized equivalent, it stays that way. This is useful if you need to use production configuration data for testing. E.g. from firewalls but do not want to expose the production data on a test system. This way you can protect an organization's identity at the same time. Caveats: - currently only implemented for IPv4 - beware of anonymizing common strings; e.g. "INT" when handling database dumps is part of keyword CONSTRAINT use slightly longer strings like "INT_" instead Params: - The network range used for replacement, is set to "10.0.0.0/8" if omitted. - For each file <infile> supplied an anonymized file called <infile>.anonymized is created. The second argument is a network address, which should be given in CIDR notation, and really represents a range of IP addresses from which we can draw from while doing the IP address substitutions (Note that the use of NetAddr::IP means that we will never overflow this range - but it will wrap around if we increment it enough). Using an RFC1918 private address range is a good idea. Note that the script tries to handle network addresses so that network address and netmask (both given in 255.255.255.x notation as well as a.b.c.d/xy notation) will match by simply setting all netmasks to /32. =head1 EXAMPLES ./iso-anonymizer.pl -net=172.20.0.0/21 -txt-subst-file=/var/tmp/strings.txt /var/tmp/firewall17.cfg /var/tmp/router9.cfg tim@lacantha:$ sudo perl iso-anonymizer.pl -txt-subst-file=strings.txt /var/tmp/netscreen1.cfg no net specified, using default net 10.0.0.0/8 anonymizing: /var/tmp/netscreen1.cfg ... result file = /var/tmp/netscreen1.cfg.anonymized Anonymized 20197 ip addresses and 150 strings in 31.1 seconds (0.46 Mbytes/second). tim@lacantha:~$ Anonymizing a whole (ASCII) Postgresql database: # creating an ASCII dump of the database: pg_dump -U dbadmin -d isodb -W >/var/tmp/iso_db.dump.sql # or as postgres user: pg_dump -d isodb >/var/tmp/iso_db.dump.sql # turn binary .Fc dump into ascii (only necessary if you do not already have an ascii dump): pg_restore /var/tmp/iso_db.dump.Fc >/var/tmp/iso_db.dump.sql # anonymizing: iso-anonymizer.pl -txt-subst-file=/var/tmp/strings.txt /var/tmp/iso_db.dump.sql # restoring anonymized database: psql --set ON_ERROR_STOP=on targetdb </var/tmp/iso_db.dump.sql =head1 TODO - reliably replace network address by networks with consistent netmasks (currently all networks are reduced to a /32 netmask) =head1 AUTHOR Tim Purschke E<lt>tmp@cactus.deE<gt> =head1 COPYRIGHT AND LICENSE Copyright (C) 2016 by Cactus eSecurity GmbH =head1 SEE ALSO Behind the door =cut