File indexing completed on 2023-05-30 09:04:26
0001 #! /usr/bin/env python 0002 # -*- coding: iso-8859-1 -*- 0003 # 0004 # SPDX-License-Identifier: LGPL-2.1-or-later 0005 # 0006 # This python script parses the CIA Factbook 2007 for country data and 0007 # saves it in KML format 0008 # Place this script in the root directory of the CIA Factbook 0009 # (usually "factbook") and run it. 0010 # 0011 # SPDX-FileCopyrightText: 2004-2007 Torsten Rahn <tackat@kde.org>" 0012 # 0013 0014 import urllib, urllib2, re 0015 0016 class AppURLopener(urllib.FancyURLopener): 0017 version = "Mozilla/5.0" 0018 urllib._urlopener = AppURLopener() 0019 0020 # This method converts the representation of the coordinate values 0021 # on the CIA webpages to the representation we need in the KML file. 0022 0023 def convertCoordinates( ciaString ): 0024 # print "cia: " + ciaString 0025 0026 commentStart=ciaString.find(";") 0027 coordinateString=ciaString 0028 if commentStart!=-1 : 0029 coordinateString = ciaString[ 0 : ciaString.find(";") ].strip() 0030 0031 prefixEnd=coordinateString.find("</i> ") 0032 if prefixEnd!=-1 : 0033 coordinateString = coordinateString[ prefixEnd + 5 : len(coordinateString) ].strip() 0034 0035 coordinateList = coordinateString.split(", ") 0036 latCoordinate=0.0 0037 lonCoordinate=0.0 0038 coordinateValue=0.0 0039 0040 for coordinate in coordinateList : 0041 coordinateComponents = coordinate.split(' ') 0042 # print coordinateComponents[0] + " " + coordinateComponents[1]+ " " + coordinateComponents[2] 0043 coordinateValue = float(coordinateComponents[0]) + float(coordinateComponents[1])/60.0 0044 if coordinateComponents[2]=='W' or coordinateComponents[2]=='S' : 0045 coordinateValue=-coordinateValue 0046 if coordinate==coordinateList[0] : 0047 latCoordinate=coordinateValue 0048 else : 0049 lonCoordinate=coordinateValue 0050 latString="%04.3f" % latCoordinate 0051 lonString="%04.3f" % lonCoordinate 0052 coordinateString=lonString + "," + latString 0053 return coordinateString 0054 0055 # This method converts the representation of the population value 0056 # on the CIA webpages to the representation we need in the KML file. 0057 def convertPopulation( ciaString ): 0058 # print ciaString 0059 if ciaString=="uninhabited" : 0060 return 0 0061 prefixEnd=ciaString.find("</i> ") 0062 if prefixEnd!=-1 : 0063 ciaString = ciaString[ prefixEnd + 5 : len(ciaString) ].strip() 0064 populationString=ciaString.split(' ')[0] 0065 if populationString=="no" : 0066 return 0 0067 populationString=populationString.replace(",", "") 0068 population=int(populationString) 0069 populationString="%s" % (populationString) 0070 return populationString 0071 0072 def convertArea( ciaString ): 0073 # print ciaString 0074 # print "..." 0075 prefixEnd=ciaString.find("</i> ") 0076 if prefixEnd!=-1 : 0077 ciaString = ciaString[ prefixEnd + 5 : len(ciaString) ].strip() 0078 prefixEnd=ciaString.find("(") 0079 if prefixEnd!=-1 : 0080 ciaString = ciaString[ 0 : prefixEnd ].strip() 0081 prefixEnd=ciaString.find(";") 0082 if prefixEnd!=-1 : 0083 ciaString = ciaString[ prefixEnd + 1 : len(ciaString) ].strip() 0084 prefixEnd=ciaString.find("land -") 0085 if prefixEnd!=-1 : 0086 ciaString = ciaString[ prefixEnd + 6 : len(ciaString) ].strip() 0087 0088 areaString=ciaString.split(' ')[0] 0089 # print areaString 0090 areaString=areaString.replace(",", "") 0091 area=float(areaString) 0092 areaString="%02.1f" % area 0093 # print areaString 0094 return areaString 0095 0096 0097 # First build two hashes that map FIPS10 to ISO country codes 0098 # and FIPS10 to country names. As a source for data we use 0099 # the appendix-d.html of the CIA Factbook. 0100 0101 countryCodeSource = urllib.urlopen("./appendix/appendix-d.html").readlines() 0102 0103 startTag="<td width=\"15%\" align=\"left\" valign=top class=\"Normal\"><b>" 0104 stopTag="</b></td>" 0105 finishTag="</tr>" 0106 subStartTag="<td width=\"10%\" valign=top class=\"Normal\">" 0107 subStopTag="</td>" 0108 fipsString="" 0109 country="" 0110 0111 inside=0 0112 subcount=0 0113 0114 countryDict={} 0115 countryCodeDict={} 0116 0117 for line in countryCodeSource: 0118 result = re.match ( '(.*)'+ startTag +'(.*)' + stopTag, line ) 0119 if result > -1 : 0120 country = line[ line.find(startTag) + len(startTag) : line.find(stopTag) ].strip() 0121 country = country.split(',')[0] 0122 inside=1 0123 else : 0124 if inside==1 : 0125 result = re.match ( '(.*)'+ subStartTag +'(.*)' + subStopTag, line ) 0126 if result > -1 : 0127 sub = line[ line.find(subStartTag) + len(subStartTag) : line.find(subStopTag) ].strip() 0128 subcount+=1 0129 if subcount == 1 : 0130 fipsString=sub 0131 if subcount == 2 : 0132 if fipsString.strip()!="-" and sub.strip()!="-" : 0133 countryCodeDict[fipsString]=sub 0134 countryDict[fipsString]=country 0135 result = re.match ( '(.*)'+ finishTag +'(.*)', line ) 0136 if result > -1 : 0137 inside = 0 0138 subcount = 0 0139 0140 # Now that the hashes are in place we go through each 0141 # website named after the fips10 code to gather the data 0142 # we need for the KML file. 0143 0144 startTopicTag="<div align=\"right\">" 0145 stopTopicTag="</div>" 0146 topicTagList=["Background:", "Geographic coordinates:", "Area:", "Population:"] 0147 topicBackgroundString="" 0148 topicCoordinatesString="" 0149 topicAreaString="" 0150 topicPopulationString="" 0151 placemarkFolder="" 0152 valueTag="<br> " 0153 finishTag="</table>" 0154 inside=0 0155 subcount=-1 0156 0157 print "Parsing ..." 0158 0159 for item in countryCodeDict: 0160 print item 0161 # print item + " " + countryCodeDict.get( item, "-" ) + " " + countryDict.get( item, "-" ) 0162 fileUrlString="./geos/%s.html" % (item.lower()) 0163 # print fileUrlString 0164 try: 0165 handle = urllib.urlopen(fileUrlString) 0166 except IOError: 0167 print "Error: No file for " + countryDict.get( item, "-" ) 0168 continue 0169 countrySource = handle.readlines() 0170 for topicTag in topicTagList: 0171 for line in countrySource: 0172 result = re.match ( '(.*)'+ startTopicTag + topicTag + stopTopicTag +'(.*)', line ) 0173 if result > -1 : 0174 # print line 0175 inside=1 0176 subcount=-1 0177 else : 0178 if inside==1 : 0179 result = re.match ( '(.*)'+ valueTag, line ) 0180 if result > -1 : 0181 # print line 0182 subcount=0 0183 if subcount>1 and line.strip()!="" : 0184 if topicTag=="Background:" : 0185 topicBackgroundString=line.strip() 0186 if topicTag=="Geographic coordinates:" : 0187 topicCoordinatesString=line.strip() 0188 if topicTag=="Area:" : 0189 topicAreaString=line.strip() 0190 if topicTag=="Population:" : 0191 topicPopulationString=line.strip() 0192 inside=0 0193 subcount=-1 0194 break 0195 if subcount>-1 : 0196 subcount+=1 0197 0198 topicCoordinatesString=convertCoordinates( topicCoordinatesString ) 0199 topicAreaString=convertArea( topicAreaString ) 0200 topicPopulationString=convertPopulation( topicPopulationString ) 0201 countryNameString=countryDict.get( item, "-" ) 0202 countryCodeString=countryCodeDict.get( item, "-" ) 0203 0204 topicBackgroundString=topicBackgroundString + " <i>Source: CIA - The World Factbook 2007</i> " 0205 0206 # Now that we have all the data needed we create a KML snippet that 0207 # represents the country data and add it to the previous ones that 0208 # we've generated already. 0209 0210 placemarkString=''' 0211 <Placemark> 0212 <name>%s</name> 0213 <description>%s</description> 0214 <countrycode>%s</countrycode> 0215 <role>S</role> 0216 <area>%s</area> 0217 <pop>%s</pop> 0218 <Point> 0219 <coordinates>%s</coordinates> 0220 </Point> 0221 </Placemark>''' %( countryNameString, topicBackgroundString, countryCodeString, topicAreaString, topicPopulationString, topicCoordinatesString ) 0222 placemarkFolder += placemarkString 0223 0224 # Now we insert all our KML code for the countries into a KML 0225 # document 0226 0227 kmlDocument = '''<?xml version="1.0" encoding="UTF-8"?> 0228 <kml xmlns="http://earth.google.com/kml/2.0"> 0229 <Document> 0230 <SimpleField> 0231 <name>pop</name> 0232 <type>int</type> 0233 </SimpleField> 0234 <SimpleField> 0235 <name>area</name> 0236 <type>int</type> 0237 </SimpleField> 0238 <SimpleField> 0239 <name>state</name> 0240 <type>string</type> 0241 </SimpleField> 0242 <SimpleField> 0243 <name>countrycode</name> 0244 <type>string</type> 0245 </SimpleField> 0246 <SimpleField> 0247 <name>role</name> 0248 <type>string</type> 0249 </SimpleField> 0250 %s 0251 </Document> 0252 </kml>''' %( placemarkFolder) 0253 0254 # Finally we save the KML document we have created 0255 0256 out_file = open('ciacountries.kml','w') 0257 out_file.write(kmlDocument) 0258 out_file.close() 0259