« Chinese Breakfast Cereals | HomePage | Redefining the Gap 14, Appendix: National Codes »
Saturday, May 20, 20061148132700
Redefining the Gap 13, Appendix: Computer Code
Note: This is a selection from Redefining the Gap, part of tdaxp's SummerBlog '06

Below is the perl code I used for data smoothing.
use strict;
my %countries = {};
my %countries_xml = {};
run();
# the grandparent function
sub run {
getDVs();
getIVs();
getReports();
}
# the three parent functions
sub getDVs {
getIOs("Nonaligned Movement","Group of 15","Organization of the Islamic Conference","African Union","Group of 77","Group of 24");
getIGs("least developed countries","less developed countries");
getBarnettWorlds(); # barnett and worlds data in one file
}
sub getIVs {
getPoors(); #// "life in the Gap is poor"
getNasties(); #// "life in the Gap is nasty"
getShorts(); #// "life in the Gap is short"
getBrutals(); #// "life in the Gap is brutal"
getSolitaries(); #// "life in the Gap is solitary"
}
sub getReports {
getCountriesXML();
setIV();
setDVs();
getCSVView();
getXMLView();
}
# the child functions
# first, the children of getDVs()
# specifically, getIOs(), getIGs(), getBarnettWorlds()
sub getIOs { # get international organizations
my @ios = @_;
my $io = "";
my $file = "C:/Downloads/factbook/appendix/appendix-b.html";
my $line = "";
my @fields = ();
my @nations = ();
my $nation = "";
open(IOFILE,$file) || die "Couldn't open $file: $!";
while ($line = ) {
if ($line =~ m//) {
foreach $io (@ios) {
if ($line =~ m/$io/ && $line =~ m//) {
until ($line =~ m/>members/i) {
$line = ;
}
$line =~ s/(/,/g;
$line =~ s/)/,/g;
$line =~ s///g;
@fields = split(/- ,/,$line);
@nations = split(/,/,$fields[$#fields]);
shift(@nations); # first one is junk
foreach $nation (@nations) {
$nation = trim($nation);
$countries{$io}{$nation} = 1;
}
}
}
}
}
}
sub getIGs { # get international groups
my @igs = @_;
my $ig = "";
my $file = "C:/Downloads/factbook/appendix/appendix-b.html";
my $line = "";
my @fields = ();
my @nations = ();
my $nation = "";
open(IOFILE,$file) || die "Couldn't open $file: $!";
while ($line = ) {
if ($line =~ m//) {
foreach $ig (@igs) {
if ($line =~ m/$ig/ && $line =~ m//) {
until ($line =~ m/are: /i) {
$line = ;
}
$line =~ s/(/,/g;
$line =~ s/)/,/g;
$line =~ s///g;
@fields = split(/are: /, $line);
$fields[$#fields] =~ s/;.*//g;
@nations = split(/,/,$fields[$#fields]);
foreach $nation (@nations) {
$nation = trim($nation);
$countries{$ig}{$nation} = 1;
}
}
}
}
}
}
sub getBarnettWorlds {
my $file = "c:/downloads/coregapworlds.csv";
my @lines = ();
my @fields = ();
my $line = "";
open(BARNETT,$file) || die "Couldn't open $file: $!";
@lines = ;
close(BARNETT);
foreach $line (@lines) {
@fields = split(/t/, $line);
#fields0 name
#fields1 old core new core gap
#fields2: first world second world third world
#fields3 neither g22 g77
$countries{"CG"}{$fields[0]} = $fields[1];
$countries{"Worlds123"}{$fields[0]} = $fields[2];
$countries{"Group of 22"}{$fields[0]} = $fields[3];
}
}
# second, the children of getIVs()
# specifically, getPoors, getNasties, getShorts, getBrutals, getSolitaries
sub getPoors {
getCIAInfo("poor","C:/Downloads/factbook/rankorder/2004rank.txt",["$",","]);
}
sub getNasties {
my $file = "c:/downloads/FIWrank7305.csv";
my @fields = ();
my $line = "";
my $state = "";
my $pr = 0;
my $cl = 0;
open(FREE,$file) || die "Couldn't open $file: $!";
while ($line = ) {
@fields = split(/t/, $line);
$state = $fields[0];
$pr = $fields[$#fields-2];
$cl = $fields[$#fields-1];
if ($pr =~ m/[0-9]/ && $cl =~ m/[0-9]/) {
$countries{"nasty"}{$state} = ($pr + $cl) / 2;
}
}
close(FREE);
}
sub getBrutals {
my $file_war = "c:/downloads/icb2.csv";
my $file_code = "c:/downloads/fields.csv";
my @codes = ();
my @fields = ();
my $line = "";
my $name = "";
my $state = "";
my $war = "";
my $year_start = 0;
my $year_end = 0;
# get the country codes
open(CODES,$file_code) || die "Couldn't open $file_code: $!";
while ($line = ) {
chomp($line);
$line =~ s/"//g;
@fields = split(/t/,$line);
$fields[0] = trim($fields[0]);
$fields[1] = trim($fields[1]);
$countries{"codes"}{$fields[1]} = $fields[0];
$countries{"wars"}{$fields[1]} = 0; # baseline 0 if country is in db
}
close(CODES);
# get the wars
open(WARS,$file_war) || die "Couldn't open $file_war : $!";
while ($line = ) {
@fields = split(/t/,$line);
# $fields[4] = Actor
# $fields[5] = Start Year
# $fields[8] .. $fields[13] = war name
# $fields[57] (?) = year term
$state = $fields[4];
$year_start = $fields[5];
$year_end = $fields[5];
$name = "$fields[8]$fields[9]$fields[10]$fields[11]$fields[12]$fields[13]";
if ($year_end > 1992) {
if ($year_start < 1992) {
$year_start = 1992;
}
$countries{"wars"}{$state} = $countries{"wars"}{$state} + ($year_end - $year_start + 1)
}
}
close(WARS);
# now do the math
foreach $war (sort keys %{$countries{"wars"}}) {
$countries{"brutal"}{$countries{"codes"}{$war}} = $countries{"wars"}{$war};
$countries{"wars"}{$countries{"codes"}{$war}} = $countries{"wars"}{$war};
}
delete $countries{"codes"};
#delete $countries{"wars"};
}
sub getShorts {
getCIAInfo("short","C:/Downloads/factbook/rankorder/2102rank.txt",["$"]); # life expectency
}
sub getSolitaries {
getCIAInfo("hosts","C:/Downloads/factbook/rankorder/2184rank.txt",[","]); # internet hosts
getCIAInfo("population","C:/Downloads/factbook/rankorder/2119rank.txt",[","]); # population
my $key = "";
foreach $key (keys %{$countries{"hosts"}}) {
if (exists($countries{"population"}{$key}) && exists($countries{"population"}{$key})) {
$countries{"solitary"}{$key} = $countries{"hosts"}{$key} / $countries{"population"}{$key};
}
}
}
# third, the children of getReports()
# specifically, getCountriesXML, setIV, setDVs, getCSVView, getXMLView
sub getCountriesXML {
my $file = "c:/downloads/rename.csv";
my @keys = sort keys %countries;
my @nations = ();
my @lines = ();
my @fields = ();
my $key = "";
my $nation = "";
my $line = "";
# first, simply transform the data structure
foreach $key (@keys) {
#print "Working on key $keyn";
@nations = sort keys %{$countries{$key}};
foreach $nation (@nations) {
if ($nation) {
$countries_xml{$nation}{$key} = $countries{$key}{$nation};
}
}
}
# then, fix an errors
open(FILE,$file) || die "Couldn't open $file: $!";
@lines = ;
close(FILE);
foreach $line (@lines) {
chomp($line);
@fields = split(/t/,$line);
# fields0: old name
# fields1: correct name
if ($countries_xml{$fields[0]}) {
@keys = keys %{$countries_xml{$fields[0]}};
foreach $key (@keys) {
$countries_xml{$fields[1]}{$key} = $countries_xml{$fields[0]}{$key};
}
delete $countries_xml{$fields[0]};
}
}
# remove countries that shouldn't exist
foreach $nation (sort keys %countries_xml) {
unless (exists($countries_xml{$nation}{"CG"})) {
delete $countries_xml{$nation};
}
}
# then, back-propagate the changes
%countries = undef;
@nations = sort keys %countries_xml;
foreach $nation (@nations) {
@keys = sort keys %{$countries_xml{$nation}};
foreach $key (@keys) {
$countries{$key}{$nation} = $countries_xml{$nation}{$key};
}
}
}
sub setDVs {
my @nations = sort keys %countries_xml;
my $nation = "";
foreach $nation (@nations) {
if (
$countries_xml{$nation}{"African Union"} == 1 ||
$countries_xml{$nation}{"Organization of the Islamic Conference"} == 1
) {
$countries_xml{$nation}{"DV_AfricanIslam"} = 0;
} else{
$countries_xml{$nation}{"DV_AfricanIslam"} = 1;
}
# BarnettCalculation
if ($countries_xml{$nation}{"CG"} == 1) {
$countries_xml{$nation}{"DV_OCNCG"} = 0;
$countries_xml{$nation}{"DV_CG"} = 0;
} elsif ($countries_xml{$nation}{"CG"} == 2) {
$countries_xml{$nation}{"DV_OCNCG"} = 1;
$countries_xml{$nation}{"DV_CG"} = 1;
} elsif ($countries_xml{$nation}{"CG"} == 3) {
$countries_xml{$nation}{"DV_OCNCG"} = 2;
$countries_xml{$nation}{"DV_CG"} = 1;
}
# Group of 22 / Group of 77
if ( $countries_xml{$nation}{"Group of 77"} == 1 &&
$countries_xml{$nation}{"Group of 22"} == 2
) { # both means G77:0 but G22:1
$countries_xml{$nation}{"DV_G77"} = 0;
$countries_xml{$nation}{"DV_G2277"} = 1;
} elsif ($countries_xml{$nation}{"Group of 77"} == 1) { # just G00 is 0 for both
$countries_xml{$nation}{"DV_G2277"} = 0;
$countries_xml{$nation}{"DV_G77"} = 0;
} else {
$countries_xml{$nation}{"DV_G77"} = 1;
$countries_xml{$nation}{"DV_G2277"} = 2;
}
## developed countries
if ( $countries_xml{$nation}{"least developed countries"} == 1 &&
$countries_xml{$nation}{"less developed countries"} == 1
) {
$countries_xml{$nation}{"DV_LDCs"} = 0;
$countries_xml{$nation}{"DV_LDCsLLDCs"} = 0;
} elsif ($countries_xml{$nation}{"less developed countries"} == 1) {
$countries_xml{$nation}{"DV_LDCs"} = 0;
$countries_xml{$nation}{"DV_LDCsLLDCs"} = 1;
} else {
$countries_xml{$nation}{"DV_LDCs"} = 1;
$countries_xml{$nation}{"DV_LDCsLLDCs"} = 2;
}
## worlds 1 2 3
if ($countries_xml{$nation}{"Worlds123"} == 1) {
$countries_xml{$nation}{"DV_WorldsFreeComNon"} = 2;
} elsif ($countries_xml{$nation}{"Worlds123"} == 2) {
$countries_xml{$nation}{"DV_WorldsFreeComNon"} = 1;
} elsif ($countries_xml{$nation}{"Worlds123"} == 3) {
$countries_xml{$nation}{"DV_WorldsFreeComNon"} = 0;
}
# Group of 15 / NAM
if ( $countries_xml{$nation}{"Nonaligned Movement"}) {
$countries_xml{$nation}{"DV_Nalign"} = 0;
if ($countries_xml{$nation}{"Group of 15"}) {
$countries_xml{$nation}{"DV_G15Nalign"} = 1;
} else {
$countries_xml{$nation}{"DV_G15Nalign"} = 0;
}
} else {
$countries_xml{$nation}{"DV_Nalign"} = 1;
$countries_xml{$nation}{"DV_G15Nalign"} = 2;
}
}
}
sub setIV {
my @nations = sort keys %countries_xml;
my $nation = "";
my @keys = ();
my $key = "";
scaleDataXML("brutal");
scaleDataXML("nasty");
scaleDataXML("poor");
scaleDataXML("solitary");
scaleDataXML("short");
foreach $nation (@nations) {
$countries_xml{$nation}{"IV_brutal"} = 1 - $countries_xml{$nation}{"brutal"};
$countries_xml{$nation}{"IV_nasty"} = 1 - $countries_xml{$nation}{"nasty"};
$countries_xml{$nation}{"IV_poor"} = $countries_xml{$nation}{"poor"};
$countries_xml{$nation}{"IV_solitary"} = $countries_xml{$nation}{"solitary"};
$countries_xml{$nation}{"IV_short"} = $countries_xml{$nation}{"short"};
}
}
sub getCSVView {
my @nations = keys %countries_xml;
my $nation = "";
@nations = sort @nations;
open (CSVFILE,">report.csv") || die "Couldn't open report.csv: $!";
print CSVFILE "Nation;Brutal;Nasty;Poor;Solitary;Short;IV;OCNCG;CG;G77;G2277;AfroIslam;Nalign;G15Nalign;LDCs;LDCsLLDCs;FreeComNonn";
print CSVFILE "BrutalnNastynPoornSolitarynShortnIVnOCNCGnCGnG77nG2277nAfroIslamnNalignnG15NalignnLDCsnLLDCsLDCsnFreeComNonnnn";
foreach $nation (@nations) {
if (exists($countries_xml{$nation}{"DV_CG"})) {
print CSVFILE (
$nation . ";" .
$countries_xml{$nation}{"IV_brutal"} . ";" .
$countries_xml{$nation}{"IV_nasty"} . ";" .
$countries_xml{$nation}{"IV_poor"} . ";" .
$countries_xml{$nation}{"IV_solitary"} . ";" .
$countries_xml{$nation}{"IV_short"} . ";" .
"calculate IV" . ";" .
$countries_xml{$nation}{"DV_OCNCG"} . ";" .
$countries_xml{$nation}{"DV_CG"} . ";" .
$countries_xml{$nation}{"DV_G77"} . ";" .
$countries_xml{$nation}{"DV_G2277"} . ";" .
$countries_xml{$nation}{"DV_AfricanIslam"} . ";" .
$countries_xml{$nation}{"DV_Nalign"} . ";" .
$countries_xml{$nation}{"DV_G15Nalign"} . ";" .
$countries_xml{$nation}{"DV_LDCs"} . ";" .
$countries_xml{$nation}{"DV_LDCsLLDCs"} . ";" .
$countries_xml{$nation}{"DV_WorldsFreeComNon"} . "n"
);
}
}
close(CSVFILE);
}
sub getXMLView {
my @keys = keys %countries;
my @nations = ();
my @values = ();
my %names = {};
my $key = "";
my $nation = "";
my $value = "";
open (XMLFILE,">report.xml") || die "Couldn't open report.xml: $!";
print XMLFILE "n";
foreach $nation (sort keys %countries_xml) {
print XMLFILE "tn";
foreach $value (sort keys %{$countries_xml{$nation}}) {
print XMLFILE "tt$countries_xml{$nation}{$value} n";
}
print XMLFILE "t n";
}
print XMLFILE " n";
close(XMLFILE);
}
# fourth, the grandchildren go here
# specifically, getCIAInfo, scaleDataXML, scaleData, and trim
sub getCIAInfo {
my $record = shift(@_);
my $file = shift(@_);
my @to_remove = @{shift(@_)};
my $remove = "";
my $line = "";
my @lines = ();
my $linec = 0;
my $field = "";
my @fields = ();
open(FILE, $file) || die "Couldn't open $file: $!";
@lines = ;
for ($linec=2;$linec<=$#lines;$linec++) {
@fields = split(/t/, $lines[$linec]);
if ($fields[1]) { # if the country is named
# rank order is $fields[0]
# country is $fields[1]
# GDP per capiat is $fields[2]
# year est is $fields[3]
$fields[1] = trim($fields[1]);
foreach $remove (@to_remove) {
$fields[2] =~ s/[$remove]//g;
}
#print "Length of temp is $#temp and temp0 is $temp[0]n";
#$fields[2] = join("",@temp);
$fields[2] = trim($fields[2]);
$countries{$record}{$fields[1]} = $fields[2];;
}
}
}
sub scaleDataXML {
print "Entering scaleDataXMLn";
my $record = shift(@_);
my @nations = sort keys %countries_xml;
my $nation = "";
my $min = $countries_xml{$nations[0]}{$record};
my $max = $countries_xml{$nations[0]}{$record};
# first, find min and max
foreach $nation (@nations) {
if (exists($countries_xml{$nation}{$record})) {
if ($max < $countries_xml{$nation}{$record}) {
$max = $countries_xml{$nation}{$record};
}
if ($min > $countries_xml{$nation}{$record}) {
$min = $countries_xml{$nation}{$record};
}
}
}
print "$record goes from $min to $maxn";
# second, scale
foreach $nation (@nations) {
if (exists($countries_xml{$nation}{$record})) {
$countries_xml{$nation}{$record} = ($countries_xml{$nation}{$record} - $min) / ($max - $min);
}
}
}
# function from http://www.somacon.com/p114.php
sub trim($) {
my $string = shift;
$string =~ s/^s+//;
$string =~ s/s+$//;
return $string;
}
Redefining the Gap, a tdaxp series:
Redefining the Gap 1. Prologue
Redefining the Gap 2. Summary
Redefining the Gap 3. Introduction to Geopolitics
Redefining the Gap 4. First Geopolitical Theories
Redefining the Gap 5. The North and the South
Redefining the Gap 6. Critical Geopolitics
Redefining the Gap 7. The Pentagon's New Map
Redefining the Gap 8. The Research Design
Redefining the Gap 9. Methods and Operationalizations
Redefining the Gap 10. Limitations and Conclusion
Redefining the Gap 11. Results
Redefining the Gap 12. Bibliography
Redefining the Gap 13. Appendix: Computer Code
Redefining the Gap 14. Appendix: National Codes
08:45 Posted in Software, Thomas Barnett, UNL / Scope & Methods | Permalink | Comments (0) | Email this | Tags: pnm, code
Post a comment