Warning, /education/labplot/data/datasets/JSEDataArchive.json is written in an unsupported language. File is not indexed.

0001 {
0002     "name": "JSEDataArchive",
0003     "categories": [
0004         {
0005             "name": "Medicine",
0006             "subcategories": [
0007                 {
0008                     "name": "Common",
0009                     "datasets": [
0010                         {
0011                             "description": "Time of Birth, Sex, and Birth Weight of 44 Babies",
0012                             "description_url": "http://jse.amstat.org/datasets/babyboom.txt",
0013                             "url": "http://jse.amstat.org/datasets/babyboom.dat.txt",
0014                             "filename": "babyboom",
0015                             "name": "Time of Birth, Sex, and Birth Weight of 44 Babies",
0016                             "separator": "TAB",
0017                             "columns": ["Time of birth recorded on the 24-hour clock", "Sex of the child (1 = girl, 2 = boy)", "Birth weight in grams", "Number of minutes after midnight of each birth"]
0018                         },
0019                         {
0020                             "description": "This dataset contains 21 body dimension measurements as well as age, \nweight, height, and gender on 507 individuals. The 247 men and 260 \nwomen were primarily individuals in their twenties and thirties, with a \nscattering of older men and women, all exercising several hours a week.",
0021                             "url": "http://jse.amstat.org/datasets/body.dat.txt",
0022                             "filename": "Body",
0023                             "name": "Exploring Relationships in Body Dimensions",
0024                             "separator": "SPACE"
0025                         },
0026                         {
0027                             "description": "Each record contains the results of a laboratory analysis of calcium, \ninorganic phosphorous, and alkaline phosphatase. The variable cammol \nis measured as millimoles per liter. Phosmol is inorganic phosphorous \nin millimoles per liter. Alkphos is meauring alkaline phosphatase in \ninternational units per liter. The purpose of the study was to \ndetermine if significant gender differences exist in the mean values \nof calcium, inorganic phosphorus, and alkaline phosphatase in \nsubjects over age 65. A second purpose was to determine if analytical \nvariation between laboratoreis would affect the mean values of the study variables. \nCalcium.dat contains incorrect records that have transcription errors. Calciumgood.dat \ncontains the corrected values. ",
0028                             "description_url": "http://jse.amstat.org/datasets/calcium.txt",
0029                             "url": "http://jse.amstat.org/datasets/calcium.dat.txt",
0030                             "filename": "Calcium",
0031                             "name": " Calcium, inorganic phosphorus and alkaline phosphatase levels in elderly patients ",
0032                             "separator": "SPACE",
0033                             "columns": ["OBSNO - Patient Observation Number", "AGE - Years", "SEX - 1=Male, 2=Female", "ALKPHOS - Alkaline Phosphatase International Units/Liter", "Lab - 1=Metpath; 2=Deyor; 3=St. Elizabeth's; 4=CB Rouche; 5=YOH; 6=Horizon",
0034                             "CAMMOL - Calcium mmol/L", "PHOSMMOL - Inorganic Phosphorus mmol/L", "AGEGROUP - Age group 1=65-69; 2=70-74; 3=75-79; 4=80-84; 5=85-89 Years"]
0035                         },
0036                         {
0037                             "description": "Each record contains the results of a laboratory analysis of calcium, \ninorganic phosphorous, and alkaline phosphatase. The variable cammol \nis measured as millimoles per liter. Phosmol is inorganic phosphorous \nin millimoles per liter. Alkphos is meauring alkaline phosphatase in \ninternational units per liter. The purpose of the study was to \ndetermine if significant gender differences exist in the mean values \nof calcium, inorganic phosphorus, and alkaline phosphatase in \nsubjects over age 65. A second purpose was to determine if analytical \nvariation between laboratoreis would affect the mean values of the study variables. \nCalcium.dat contains incorrect records that have transcription errors. Calciumgood.dat \ncontains the corrected values. ",
0038                             "description_url": "http://jse.amstat.org/datasets/calcium.txt",
0039                             "url": "http://jse.amstat.org/datasets/calciumgood.dat.txt",
0040                             "filename": "Calciumgood",
0041                             "name": " Calcium, inorganic phosphorus and alkaline phosphatase levels in elderly patients ",
0042                             "separator": "SPACE",
0043                             "columns": ["OBSNO - Patient Observation Number", "AGE - Years", "SEX - 1=Male, 2=Female", "ALKPHOS - Alkaline Phosphatase International Units/Liter", "Lab - 1=Metpath; 2=Deyor; 3=St. Elizabeth's; 4=CB Rouche; 5=YOH; 6=Horizon",
0044                             "CAMMOL - Calcium mmol/L", "PHOSMMOL - Inorganic Phosphorus mmol/L", "AGEGROUP - Age group 1=65-69; 2=70-74; 3=75-79; 4=80-84; 5=85-89 Years"]
0045                         },
0046                         {
0047                             "description": "Percentage of body fat, age, weight, height, and ten body circumference\nmeasurements (e.g., abdomen) are recorded for 252 men. Body fat, a\nmeasure of health, is estimated through an underwater weighing\ntechnique. Fitting body fat to the other measurements using multiple\nregression provides a convenient way of estimating body fat for men\nusing only a scale and a measuring tape.",
0048                             "description_url": "http://jse.amstat.org/datasets/fat.txt",
0049                             "url": "http://jse.amstat.org/datasets/fat.dat.txt",
0050                             "filename": "fat",
0051                             "name": "Fitting Percentage of Body Fat to Simple Body Measurements",
0052                             "separator": "SPACE",
0053                             "columns": ["Case Number", "Percent body fat using Brozek's equation, 457/Density - 414.2", "Percent body fat using Siri's equation, 495/Density - 450", "Density (gm/cm^3)",
0054                             "Age (yrs)", "Weight (lbs)", "Height (inches)", "Adiposity index = Weight/Height^2 (kg/m^2)", "Fat Free Weight = (1 - fraction of body fat) * Weight, using Brozek's formula (lbs)",
0055                             "Neck circumference (cm)", "Chest circumference (cm)", "Abdomen circumference (cm) \"at the umbilicus and level with the iliac crest\"", "Hip circumference (cm)", "Thigh circumference (cm)",
0056                             "Knee circumference (cm)", "Ankle circumference (cm)", "Extended biceps circumference (cm)", "Forearm circumference (cm)", "Wrist circumference (cm) \"distal to the styloid processes\""]
0057                         },
0058                         {
0059                             "description": "Sample of 654 youths, aged 3 to 19, in the area of East Boston\nduring middle to late 1970's. Interest concerns the relationship\nbetween smoking and FEV. Since the study is necessarily\nobservational, statistical adjustment via regression models\nclarifies the relationship.",
0060                             "description_url": "http://jse.amstat.org/datasets/fev.txt",
0061                             "url": "http://jse.amstat.org/datasets/fev.dat.txt",
0062                             "filename": "fev",
0063                             "name": "Forced Expiratory Volume (FEV) Data",
0064                             "separator": "SPACE",
0065                             "columns": ["age - discrete measure, positive integer (years)", "fev - continuous measure (liters)", "ht - continuous measure (inches)", "sex - discrete/nominal (Female coded 0, Male coded 1)", "smoke - discrete/nominal (Nonsmoker coded 0, Smoker coded 1)"]
0066 
0067                         },
0068                         {
0069                             "description": "The tab-delimited data set gives characteristics of young female patients between\nthe ages of 11 to 26 who came to clinics of Johns Hopkins Medical Institutions between\n2006 and 2008 to begin the three-shot regimen of vaccinations with the anti-human\npapillomavirus (HPV) medication Gardasil.   ",
0070                             "url": "http://jse.amstat.org/v19n1/gardasil.dat.txt",
0071                             "filename": "gardasil",
0072                             "name": "Retrospective Study (Potential Predictors for Completion or Non-Completion of the Three-Shot Anti-HPV Gardasil Vaccine Regimen)",
0073                             "separator": "TAB",
0074                             "use_first_row_for_vectorname": true
0075                         },
0076                         {
0077                             "description": "From a very young age, shoes for boys tend to be wider than shoes for \ngirls. Is this because boys have wider feet, or because it is assumed that \ngirls, even in elementary school, are willing to sacrifice comfort for fashion? \nTo assess the former, a statistician measures kids' feet.  Methods for analysis include \nt-tests, ANCOVA, and least-squares model building.  This data set is useful for \ndiscussion of covariates, confounding, and conclusions in the context of the problem.",
0078                             "description_url": "http://jse.amstat.org/datasets/kidsfeet.txt",
0079                             "url": "http://jse.amstat.org/datasets/kidsfeet.dat.txt",
0080                             "filename": "kidsfeet",
0081                             "name": "Foot measurements for fourth grade children",
0082                             "separator": "SPACE",
0083                             "columns": ["Birthdate: month and year  (data were collected in October 1997)", "Length of longer foot (cm)", "Width of longer foot (cm), measured at widest part of foot", "Sex: boy or girl", "Foot measured (right or left)", "Right- or left-handedness"]
0084                         },
0085                         {
0086                             "description": "This article takes data from a paper in the _Journal of the American\nMedical Association_ that examined whether the true mean body\ntemperature is 98.6 degrees Fahrenheit. Because the dataset suggests\nthat the true mean is approximately 98.2, it helps students to grasp\nconcepts about true means, confidence intervals, and t-statistics.\nStudents can use a t-test to test for sex differences in body\ntemperature and regression to investigate the relationship between\ntemperature and heart rate.",
0087                             "description_url": "http://jse.amstat.org/datasets/normtemp.txt",
0088                             "url": "http://jse.amstat.org/datasets/normtemp.dat.txt",
0089                             "filename": "normtemp",
0090                             "name": "Normal Body Temperature, Gender, and Heart Rate ",
0091                             "separator": "SPACE",
0092                             "columns": ["Body temperature (degrees Fahrenheit)", "Gender (1 = male, 2 = female)", "Heart rate (beats per minute)"]
0093                         }
0094                     ]
0095                 },
0096                 {
0097                     "name": "Other",
0098                     "datasets": [
0099                         {
0100                             "description": "Drug interaction study of a new and a standard oral contraceptive \ntherapy.",
0101                             "description_url": "http://jse.amstat.org/datasets/ocdrug.txt",
0102                             "url": "http://jse.amstat.org/datasets/ocdrug.dat.txt",
0103                             "filename": "ocdrug",
0104                             "name": "Drug Interaction",
0105                             "separator": "SPACE",
0106                             "columns": ["Female Subject Number (1 to 22)", "Treatment Sequence (1 = Drug D, placebo; 2 = placebo, Drug D)", "Study Period (1, 2)", "Treatment (0 = placebo, 1 = Drug D)", "EE - AUC (pg*hr/ml)", "EE - Cmax (pg/ml)", "NET - AUC (pg*hr/ml)", "NET - Cmax (pg/ml)"]
0107                         },
0108                         {
0109                             "description": "Bacteria are cultured in medical laboratories to identify them so patients can be treated \ncorrectly. The tryptone dataset contains measurements of bacteria counts following the \nculturing of five strains of Staphylococcus aureus. There are many strains of \nStaphylococcus aureus; five were used by the experimenter.  They are identified by numbers \nin the data because their names are too complicated to be useful as identifiers.  The \ndataset also contains the time of incubation, temperature of incubation and concentration \nof tryptone, a nutrient.  The protocols for culturing this bacteria, set the time at 24 \nhours, the temperature at 35 degrees and the tryptone concentration at 1.0%.  The question \nis whether the conditions recommended in the protocols for the culturing of these strains \nare optimal.  The task is to find the incubation time, temperature and tryptone concentration \nthat optimises the growth of this Bacterium.",
0110                             "description_url": "http://jse.amstat.org/datasets/Tryptone.txt",
0111                             "url": "http://jse.amstat.org/datasets/Tryptone.dat.txt",
0112                             "filename": "Tryptone",
0113                             "name": "The Tryptone Task",
0114                             "separator": "SPACE",
0115                             "use_first_row_for_vectorname": true
0116                         },
0117                         {
0118                             "description": "Examining Potential Predictors for Completion of the Gardasil Vaccine Sequence Based on Data Gathered at Clinics of Johns Hopkins Medical Institutions",
0119                             "description_url": "http://jse.amstat.org/v19n1/gardasil.txt",
0120                             "url": "http://jse.amstat.org/v19n1/gardasil.dat.txt",
0121                             "filename": "gardasil",
0122                             "name": "Gardasil Vaccine Data",
0123                             "separator": "TAB",
0124                             "use_first_row_for_vectorname": true
0125                         },
0126                         {
0127                             "description": "The readabilities of 30 pamphlets about cancer are compared to the reading comprehension levels of 63 patients with cancer. Both variables are measured in grade levels. The data are presented as frequencies of occurrence over grade levels for both the pamphlet readabilities and the reading levels of the patients.",
0128                             "description_url": "http://jse.amstat.org/datasets/readability.txt",
0129                             "url": "http://jse.amstat.org/datasets/readability.dat.txt",
0130                             "filename": "readability",
0131                             "name": "Readability of Educational Materials for Patients with Cancer",
0132                             "separator": "SPACE",
0133                             "columns": ["Grade level", "Frequency of occurrence for brochure readabilities", "Frequency of occurrence for patient reading levels"]
0134                         }
0135                     ]
0136                 },
0137                 {
0138                     "name": "Smoking",
0139                     "datasets": [
0140                         {
0141                             "description": "Measurements of weight and tar, nicotine, and carbon monoxide content\nare given for 25 brands of domestic cigarettes.",
0142                             "description_url": "http://jse.amstat.org/datasets/cigarettes.txt",
0143                             "url": "http://jse.amstat.org/datasets/cigarettes.dat.txt",
0144                             "filename": "Cigarettes",
0145                             "name": "Cigarette data for an introduction to multiple regression",
0146                             "separator": "SPACE",
0147                             "columns": ["Brand name", "Tar content (mg)", "Nicotine content (mg)", "Weight (g)", "Carbon monoxide content (mg)"]
0148                         }
0149                     ]
0150                 }
0151             ]
0152         },
0153         {
0154             "name": "Nature",
0155             "subcategories": [
0156                 {
0157                     "name": "Animals",
0158                     "datasets": [
0159                         {
0160                             "description": "The dataset consists of a few variables that may influence the demand for Beef in the United States. It provides an example of the influence of inflation in monetary time series data as well as providing some interesting statistical features in building demand models in regression.",
0161                             "description_url": "http://jse.amstat.org/v22n1/kopcso/BeefDemandDoc.txt",
0162                             "url": "http://jse.amstat.org/v22n1/kopcso/BeefDemand.txt",
0163                             "filename": "BeefDemand",
0164                             "name": "Beef Demand",
0165                             "separator": "TAB",
0166                             "use_first_row_for_vectorname": true
0167                         },
0168                         {
0169                             "description": "159 fishes of 7 species are caught and measured. Altogether there are\n8 variables. All the fishes are caught from the same lake\n(Laengelmavesi) near Tampere in Finland.",
0170                             "description_url": "http://jse.amstat.org/datasets/fishcatch.txt",
0171                             "url": "http://jse.amstat.org/datasets/fishcatch.dat.txt",
0172                             "filename": "fishcatch",
0173                             "name": "fishcatch",
0174                             "separator": "SPACE",
0175                             "columns": ["Obs - Observation number ranges from 1 to 159", "Species - (Numeric)", "Weight - Weight of the fish (in grams)", "Length1 - Length from the nose to the beginning of the tail (in cm)",
0176                             "Length2 - Length from the nose to the notch of the tail (in cm)", "Length3 - Length from the nose to the end of the tail (in cm)", "Height% - Maximal height as % of Length3",
0177                             "Width% - Maximal width as % of Length3", "Sex - 1 = male 0 = female"]
0178                         },
0179                         {
0180                             "description": "A cost of increased reproduction in terms of reduced longevity has been\nshown for female fruitflies, but not for males. The flies used were an\noutbred stock. Sexual activity was manipulated by supplying individual\nmales with one or eight receptive virgin females per day.  The\nlongevity of these males was compared with that of two control types.\nThe first control consisted of two sets of individual males kept with\none or eight newly inseminated females.  Newly inseminated females will\nnot usually remate for at least two days, and thus served as a control\nfor any effect of competition with the male for food or space.  The\nsecond control was a set of individual males kept with no females.\nThere were 25 males in each of the five groups, which were treated\nidentically in number of anaesthetizations (using CO2) and provision of\nfresh food medium.",
0181                             "description_url": "http://jse.amstat.org/datasets/fruitfly.txt",
0182                             "url": "http://jse.amstat.org/datasets/fruitfly.dat.txt",
0183                             "filename": "fruitfly",
0184                             "name": "Sexual activity and the lifespan of male fruitflies",
0185                             "separator": "SPACE",
0186                             "columns": ["ID - Serial No. (1-25) within each group of 25 (the order in which data points were abstracted)", "PARTNERS - Number of companions (0, 1 or 8)", "TYPE - Type of companion, 0: newly pregnant female, 1: virgin female, 9: not applicable (when PARTNERS=0)",
0187                             "LONGEVITY - Lifespan, in days", "THORAX - Length of thorax, in mm (x.xx)", "SLEEP - Percentage of each day spent sleeping"]
0188                         }
0189                     ]
0190                 }
0191             ]
0192         },
0193         {
0194             "name": "Statistics",
0195             "subcategories": [
0196                 {
0197                     "name": "Economics",
0198                     "datasets": [
0199                         {
0200                             "description": "Data are from the American Association of University Professors (AAUP) annual faculty salary survey of American colleges and universities. They include average salary and overall compensation, broken down by full, associate, and assistant professor ranks. The dataset is used for the 1995 Data Analysis Exposition, sponsored by the Statistical Graphics Section of the American Statistical Association. See the file colleges.txt for more information on the Exposition.",
0201                             "description_url": "http://jse.amstat.org/datasets/aaup.txt",
0202                             "url": "http://jse.amstat.org/datasets/aaup.dat.txt",
0203                             "filename": "AAUP",
0204                             "name": "AAUP Faculty Salary data",
0205                             "separator": ",",
0206                             "columns": ["FICE (Federal ID number)", "College name", "State (postal code)", "Type  (I, IIA, or IIB)", "Average salary - full professors",
0207                             "Average salary - associate professors", "Average salary - assistant professors", "Average salary - all ranks", "Average compensation - full professors",
0208                             "Average compensation - associate professors", "Average compensation - assistant professors", "Average compensation - all ranks", "Number of full professors",
0209                             "Number of associate professors", "Number of assistant professors", "Number of instructors", "Number of faculty - all ranks"]
0210                         },
0211                         {
0212                             "description": "The dataset bestbuy.day contains monthly data on computer usage \n(MIPS) and total number of stores from August 1996 to July 2000. \nAdditionally, information on the planned number of stores through \nDecember 2001 is available. These data can be used to compare \ntime-series forecasting with trend and seasonality components and \ncausal forecasting based on simple linear regression. The simple \nlinear regression model exhibits unequal error variances, suggesting \na transformation of Y.",
0213                             "description_url": "http://jse.amstat.org/datasets/bestbuy.txt",
0214                             "url": "http://jse.amstat.org/datasets/bestbuy.dat.txt",
0215                             "filename": "BestBuy",
0216                             "name": " BestBuy",
0217                             "separator": "SPACE",
0218                             "columns": ["Date dd-mm-yyyy, August 1996 - July 2000", "MIPS usage (MIPS are a measure of computing resources)", "Number of stores", "Date dd-mm-yyyy, July 2000 - December 2001", "Planned Number of Stores through December 2001"]
0219                         },
0220                         {
0221                             "description": "The dollar amount for a monthly (January 1991 through December 2000) \nhousehold electric bill is presented as a time series. In addition, \npotential explanatory variables are included. Twelve representative \nmonthly values are provided for the average temperature, for \nheating degree days, and for cooling degree days (not for each \nmonth for each year). Additional variables give the family size \neach month and indicate when a new electric meter and new heating \nand cooling equipment was installed. To convert the billing amount \nto estimated power consumption, a tiered rate function (supplied \nin the accompanying Instructor's Manual) and the costs of \nassociated riders (provided here) must be used. Consumption \nestimates resulting from this information are supplied.\t",
0222                             "description_url": "http://jse.amstat.org/datasets/electricbill.txt",
0223                             "url": "http://jse.amstat.org/datasets/electricbill.dat.txt",
0224                             "filename": "electricbill",
0225                             "name": " Electric Bill Data",
0226                             "separator": "SPACE",
0227                             "columns": ["Observation number", "Year", "Month", "Amount of bill (in dollars), includes 5% sales tax", "Average temperature (in degrees Fahrenheit)", "Heating Degree Days", "Cooling Degree Days", "Number of family members at home",
0228                             "New meter? (indicator variable, 1 = yes)", "New heat pump 1? (indicator variable, 1= new)", "New heat pump 2? (indicator variable, 1= new)", "Total charge (per kwh) for all riders", "Calculated consumption (in kwh)"]
0229                         },
0230                         {
0231                             "description": "The data file contains information on 76 single-family homes inEugene, Oregon during 2005.  This dataset is suitable for a completemultiple linear regression analysis of home price data that coversmany of the usual regression topics, including interaction andpredictor transformations.  Whereas realtors use experience and localknowledge to subjectively value a house based on its characteristics(size, amenities, location, etc.) and the prices of similar housesnearby, regression analysis can provide an alternative that moreobjectively models local house prices using these same data.SOURCES:The data were provided by Victoria Whitman, a realtor in Eugene, in2005.  The data were used in a case study in Pardoe (2006).",
0232                             "description_url": "http://jse.amstat.org/datasets/homes76.txt",
0233                             "url": "http://jse.amstat.org/datasets/homes76.dat.txt",
0234                             "filename": "homes76",
0235                             "name": " Modeling home prices using realtor data",
0236                             "separator": "SPACE",
0237                             "use_first_row_for_vectorname": true
0238                         },
0239                         {
0240                             "description": "For 97 countries in the world, data are given for birth rates, death\nrates, infant death rates, life expectancies for males and females, and\nGross National Product.",
0241                             "description_url": "http://jse.amstat.org/datasets/poverty.txt",
0242                             "url": "http://jse.amstat.org/datasets/poverty.dat.txt",
0243                             "filename": "poverty",
0244                             "name": "The Statistics of Poverty and Inequality ",
0245                             "separator": "SPACE",
0246                             "columns": ["Live birth rate per 1,000 of population", "Death rate per 1,000 of population", "Infant deaths per 1,000 of population under 1 year old", "Life expectancy at birth for males", "Life expectancy at birth for females",
0247                             "Gross National Product per capita in U.S. dollars", "Country Group: 1 = Eastern Europe, 2 = South America and Mexico, 3 = Western Europe, North America, Japan, Australia, New Zealand, 4 = Middle East, 5 = Asia, 6 = Africa", "Country"]
0248                         }
0249                     ]
0250                 },
0251                 {
0252                     "name": "Sport",
0253                     "datasets": [
0254                         {
0255                             "description": "Each record contains the results of a test of a set of ball bearings. The quantities L10 and L50 are estimated percentiles of the fatigue failure distribution (obtained by fitting a Weibull distribution to the fatigue failure times, separately in each set). The objective is to analyse ln(L10) (and separately ln(L50)) by linear regression on the logarithms of P, Z and D, which are characteristics of the ball bearings. Differences between companies and between types of bearing can be tested.",
0256                             "description_url": "http://jse.amstat.org/datasets/ballbearings.txt",
0257                             "url": "http://jse.amstat.org/datasets/ballbearings.dat.txt",
0258                             "filename": "ballbearings",
0259                             "name": "Ball Bearing Reliability Data",
0260                             "separator": "SPACE",
0261                             "columns": ["Company", "Test number", "Year of test", "No. of bearings", "Load (P)", "No. of balls (Z)", "Diameter (D)", "L10", "L50", "Weibull slope", "Bearing type"]
0262                         },
0263                         {
0264                             "description": "We consider as our population of interest the set of Major League Baseball players who played at least one game in both the 1991 and 1992 seasons, excluding pitchers. This dataset contains the 1992 salaries for that population, along with performance measures for each player from 1991. Four categorical variables indicate how free each player was to move to other teams.",
0265                             "description_url": "http://jse.amstat.org/datasets/baseball.txt",
0266                             "url": "http://jse.amstat.org/datasets/baseball.dat.txt",
0267                             "filename": "baseball",
0268                             "name": "baseball",
0269                             "separator": "SPACE",
0270                             "columns": ["Salary (in thousands of dollars)", "Batting average", "On-base percentage (OBP)", "Number of runs", "Number of hits",
0271                             "Number of doubles", "Number of triples", "Number of home runs", "Number of runs batted in (RBI)", "Number of walks",
0272                             "Number of strike-outs", "Number of stolen bases", "Number of errors",  "Indicator of \"free agency eligibility\"", "Indicator of \"free agent in 1991/2\"",
0273                             "Indicator of \"arbitration eligibility\"", "Indicator of \"arbitration in 1991/2\"", "Player's name (in quotation marks)"]
0274                         },
0275                         {
0276                             "description": "This data set contains every NCAA Basketball Tournament game ever played. The tournament has been held every year since 1939.",
0277                             "description_url": "http://jse.amstat.org/datasets/basketball.txt",
0278                             "url": "http://jse.amstat.org/datasets/basketball.dat.txt",
0279                             "filename": "Basketball",
0280                             "name": "NCAA Basketball Tournament Data",
0281                             "separator": "SPACE",
0282                             "columns": ["Year", "Winning School Columns", "Winning Score", "Losing School", "Losing Score"]
0283                         },
0284                         {
0285                             "description": "Data are provided for Barry Bonds' plate appearances in the 2001\nbaseball season. Variables include characteristics of the innings\nbefore the first pitch to Bonds (e.g., the number of outs, the number\nof runners on each base, the score, the opposing pitcher's earned run\naverage) and after the first pitch to Bonds (e.g., the outcome of the\nappearance, how many runs scored in the inning after Bonds hits).",
0286                             "description_url": "http://jse.amstat.org/datasets/bonds2001.txt",
0287                             "url": "http://jse.amstat.org/datasets/bonds2001.dat.txt",
0288                             "filename": "Bonds",
0289                             "name": "Barry Bonds' 2001 Plate Appearances",
0290                             "separator": "SPACE",
0291                             "columns": ["Plate appearance number", "Number of the game in the season", "Number of the plate appearance within the game", "Equals one for games in San Francisco and equals zero otherwise",
0292                             "1 - there is a runner on first base when Bonds appears, 0 - otherwise", "1- there is a runner on second base when Bonds appears, 0 - otherwise",
0293                             "1 - there is a runner on third base when Bonds appears 0 - otherwise", "Number of outs in inning when Bonds appears", "Inning of plate appearance",
0294                             "Number of runs scored by Giants in the inning after first pitch to Bonds", "1 -  Bonds walks, 0 - otherwise", "1 - Bonds walks intentionally, 0 - otherwise",
0295                             "0 - Bonds does not reach base, 1 - Bonds reaches first base on a single or error, 2 - Bonds reaches second base on a double or error, 3 -  Bonds reaches third base on a triple or error, 4 - Bonds hits a home run, 5 - Bonds walks or is hit by a pitch",
0296                             "Opposing pitchers' career earned run average as of the end of the 2000 season", "Giants score just before first pitch to Bonds", "Opposing team's score just before first pitch to Bonds"]
0297                         },
0298                         {
0299                             "description": "Data are provided for Barry Bonds' plate appearances in the 2002\nbaseball season.  Variables include characteristics of the innings\nbefore the first pitch to Bonds (e.g., the number of outs, the number\nof runners on each base, the score, the opposing pitcher's earned run\naverage) and after the first pitch to Bonds (e.g., the outcome of the\nappearance, how many runs scored in the inning after Bonds hits).",
0300                             "description_url": "http://jse.amstat.org/datasets/bonds2002.txt",
0301                             "url": "http://jse.amstat.org/datasets/bonds2002.dat.txt",
0302                             "filename": "Bonds",
0303                             "name": "Barry Bonds' 2002 Plate Appearances",
0304                             "separator": "SPACE",
0305                             "columns": ["1 - there is a runner on first base when Bonds appears, 0 - otherwise", "1 - there is a runner on second base when Bonds appears and equals zero otherwise",
0306                             "1 -  there is a runner on third base when Bonds appears, 0 - otherwise", "Number of outs in inning when Bonds appears",
0307                             "0 - Bonds does not reach base, 1 - Bonds reaches first base on a single or error, 2 - Bonds reaches second base on a double or error, 3 - Bonds reaches third base on a triple or error, 4 - Bonds hits a home run, 5 - Bonds walks or is hit by a pitch",
0308                             "Number of runs scored by Giants in the inning after first pitch to Bonds", "Opposing pitchers' career earned run average as of the end of the 2001 season",
0309                             "Initials of player batting immediately after Bonds: JK = Jeff Kent, BS = Benito Santiago, RS = Reggie Sanders, RA = Rich Aurelia, YT = Yorvit Torrealba, DB = David Bell, SD = Shawn Dunston, RM = Ramon Martinez, NA = missing",
0310                             "Player batting immediately after Bonds (previous column numerically coded): 0 = missing, 1 = Jeff Kent, 2 = Benito Santiago, 3 = Reggie Sanders, 4 = Rich Aurelia, 5 = Yorvit Torrealba, 6 = David Bell, 7 = Shawn Dunston, 8 = Ramon Martinez"]
0311                         },
0312                         {
0313                             "description": "The dataset contains the scores, opponents, and sites of the 18 Big Ten\nmen's basketball games that involved the University of Iowa in 1997.",
0314                             "description_url": "http://jse.amstat.org/datasets/hawks.txt",
0315                             "url": "http://jse.amstat.org/datasets/hawks.dat.txt",
0316                             "filename": "hawks",
0317                             "name": " 1997 University of Iowa Big Ten Basketball Data",
0318                             "separator": "SPACE"
0319                         },
0320                         {
0321                             "description": "The dataset consists of game-by-game information for the 1998 season\nfor Mark McGwire and the St. Louis Cardinals, and Sammy Sosa and the\nChicago Cubs. The dataset includes information on the home run hitting\nof these two players, as well as game results for the teams.",
0322                             "description_url": "http://jse.amstat.org/datasets/homerun.txt",
0323                             "url": "http://jse.amstat.org/datasets/homerun.dat.txt",
0324                             "filename": "homerun",
0325                             "name": "The 1998 Home Run Race Between Mark McGwire and Sammy Sosa",
0326                             "separator": "SPACE",
0327                             "columns": ["Game number", "Month of game (St. Louis)", "Date of game (St. Louis)", "Calendar date of game [days since beginning of season] (St. Louis)", "Game location (St. Louis) (0 = Away, 1 = Home)",
0328                             "Runs scored (St. Louis)", "Runs scored by opposition (St. Louis)", "Game result (St. Louis) (-1 = Tie, 0 = Loss, 1 = Win)", "Number of home runs hit by McGwire", "Runs driven in by McGwire's home runs",
0329                             "McGwire game status (0 = Played, 1 = Did not play)", "Month of game (Chicago)", "Date of game (Chicago)", "Calendar date of game [days since beginning of season] (Chicago)",
0330                             "Game location (Chicago) (0 = Away, 1 = Home)", "Runs scored (Chicago)", "Runs scored by opposition (Chicago)", "Game result (Chicago) (0 = Loss, 1 = Win)", "Number of home runs hit by Sosa",
0331                             "Runs driven in by Sosa's home runs", "Sosa game status (0 = Played, 1 = Did not play)"]
0332                         },
0333                         {
0334                             "description": "Data are from The Baseball Encyclopedia (1993) and Total Baseball (2001). \nThey include the location, league affiliation (National or American), \ndivision affiliation (East, Central, or West), season of play, home game \nattendance, runs scored, runs allowed, wins, losses, and number of games \nbehind the division leader for each major league franchise for the 1969 \nthrough 2000 seasons. Other data (including opening dates for new stadia, \nand dates of work stoppages) were collected from Ballparks by Munsey and \nSuppes (2001) and InfoPlease (2001).",
0335                             "description_url": "http://jse.amstat.org/datasets/MLBattend.txt",
0336                             "url": "http://jse.amstat.org/datasets/MLBattend.dat.txt",
0337                             "filename": "MLBattend",
0338                             "name": "1969-2000 Major League Baseball Attendance data",
0339                             "separator": "SPACE",
0340                             "columns": ["Major League Baseball franchise", "League affiliation (National or American)", "Division affiliation (East, Central, or West)", "Season",
0341                             "Home game attendance", "Runs scored", "Runs allowed", "Wins", "Losses", "Number of games behind the division winner"]
0342                         },
0343                         {
0344                             "description": "Driver results for all NASCAR races between 1975 1nd 2003, inclusive. The dataset constitutes all \nparticipants in each of 898 races, and includes their start/finish postions, prize winnings, car \nmake and laps completed.",
0345                             "description_url": "http://jse.amstat.org/datasets/nascard.txt",
0346                             "url": "http://jse.amstat.org/datasets/nascard.dat.txt",
0347                             "filename": "nascard",
0348                             "name": "NASCAR Driver Results",
0349                             "separator": "SPACE",
0350                             "columns": ["Series Race", "Year", "Race/Year", "Finishing Position", "Starting Position", "Laps Completed", "Winnings", "Number of cars in race", "Car Make", "Driver"]
0351                         },
0352                         {
0353                             "description": "Race results for all NASCAR Winston Cup races between 1975 and 2003, inclusive. The dataset \nincludes the numbers of cars, total prize winnings, monthly consumer price index for the month \nof the race, track length, laps completed by the winner, spatial co-ordinates and name of track.",
0354                             "description_url": "http://jse.amstat.org/datasets/nascarr.txt",
0355                             "url": "http://jse.amstat.org/datasets/nascarr.dat.txt",
0356                             "filename": "nascarr",
0357                             "name": "NASCAR Race Results",
0358                             "separator": "SPACE",
0359                             "columns": ["Series Race", "Year", "Race/Year", "Number of cars in race", "Total race payout", "Monthly CPI-U", "Spearman’s", "Kendall’s", "Track Length", "Laps Completed by winner", "Road Indicator 1=Road Course, 0=Loop",
0360                             "Caution Flags", "Lead Changes", "Winning Time", "Track Latitude", "Track Longitude", "Track Code", "Track Name"]
0361                         },
0362                         {
0363                             "description": "This data set contains performance statistics for National \nFootball League (NFL) teams for their 2000 regular season.",
0364                             "description_url": "http://jse.amstat.org/datasets/nfl2000.txt",
0365                             "url": "http://jse.amstat.org/datasets/nfl2000.dat.txt",
0366                             "filename": "nfl2000",
0367                             "name": "NFL Y2K PCA",
0368                             "separator": "SPACE",
0369                             "columns": ["team initials", "name and location of the team", "wins", "losses", "drives begun in opponents' territory", "drives begun within 20 yards of the goal",
0370                             "opponents drives begun in team's territory", "opponents drives begun within 20 yards of goal", "punts blocked by team", "punts team had blocked", "touchdowns scored by team",
0371                             "touchdowns scored against team", "total yardage gained by offense", "total yardage allowed by defense", "time of possession by offense (in minutes)", "time of possession by opponents' offense",
0372                             "field goals made", "field goals allowed to opponents", "field goals attempted", "field goals attempted by opponents", "punts made by opponents", "average length of punts made by opponents",
0373                             "average change in field position", "during opponents' punts", "opponents' punts taken for touchbacks", "opponents' punts that resulted in the team's offense beginning within 20 yards of their own (defensive) goal line",
0374                             "longest opponents' punt", "punts made by team", "average length of punts made by team", "average change in field position during team's punts", "team's punts taken for touchbacks",
0375                             "team's punts that resulted in the opponents' offense beginning within 20 yards of their own (defensive) goal line", "homepuntlong  longest team punt", "first downs obtained by offense",
0376                             "first downs allowed by defense", "sacks achieved by team's defense", "sacks allowed by team's offense", "kickoffs made by team", "kickoffs received by team", "yards gained during kickoff returns",
0377                             "yards allowed to opposition during kickoff returns", "average yards gained during kickoff returns", "average yards allowed during kickoff returns", "longest kickoff return made by team",
0378                             "longest kickoff return allowed by team", "kickoffs returned for a touchdown by team", "kickoffs returned for touchdown by opposition", "punts returned by team", "punts returned by opposition",
0379                             "punts \"fair caught\" by team", "punts \"fair caught\" by opposition", "return yardage on punts by team", "return yardage on punts by opposition", "average length of punt returns by team",
0380                             "average length of punt returns by opposition", "punts returned by team for a touchdown", "punts returned by opponents for a touchdown", "interceptions made by team's defense",
0381                             "interceptions made against team's offense", "fumbles recovered by team's defense", "fumbles recovered by opposing defenses", "games played by team", "average number of yards gained per minute of possession by opponents",
0382                             "average number of yards gained per minute of possession by team", "average number of punts per minute of possession by opponents", "average number of punts per minute of possession by team",
0383                             "average number of touchdowns per minute of possession by opponents", "average number of touchdowns per minute of possession by team", "winning percentage", "turnovers obtained by team, per minute of possession by opponents",
0384                             "turnovers allowed by team, per minute of possession", "first downs obtained by team, per minute of possession", "first downs allowed by team's defense, per minute of possession by opposition",
0385                             "points scored by team", "points scored against team", "conference to which the team belongs (AFC or NFC)"]
0386                         },
0387                         {
0388                             "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.",
0389                             "url": "http://jse.amstat.org/datasets/nfl93.dat.txt",
0390                             "filename": "nfl93",
0391                             "name": "NFL Scores and Pointspreads",
0392                             "separator": "SPACE"
0393                         },
0394                         {
0395                             "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.",
0396                             "description_url": "http://jse.amstat.org/datasets/nfl.txt",
0397                             "url": "http://jse.amstat.org/datasets/nfl94.dat.txt",
0398                             "filename": "nfl94",
0399                             "name": "NFL Scores and Pointspreads",
0400                             "separator": "SPACE",
0401                             "columns": ["Date of game", "Visiting team name", "Visiting team score", "Home team name", "Home team score", "Indicator for overtime games (o or -)", "Pointspread", "Over/Under"]
0402                         },
0403                         {
0404                             "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.",
0405                             "description_url": "http://jse.amstat.org/datasets/nfl.txt",
0406                             "url": "http://jse.amstat.org/datasets/nfl95.dat.txt",
0407                             "filename": "nfl95",
0408                             "name": "NFL Scores and Pointspreads",
0409                             "separator": "SPACE",
0410                             "columns": ["Date of game", "Visiting team name", "Visiting team score", "Home team name", "Home team score", "Indicator for overtime games (o or -)", "Pointspread", "Over/Under"]
0411                         },
0412                         {
0413                             "description": "Four datasets (nfl93.dat, nfl94.dat, nfl95.dat, nfl96.dat) contain\nNational Football League game results for recent seasons. In addition\nto game scores, the datasets give oddsmakers' pointspreads and\nover/under values for each game.",
0414                             "description_url": "http://jse.amstat.org/datasets/nfl.txt",
0415                             "url": "http://jse.amstat.org/datasets/nfl96.dat.txt",
0416                             "filename": "nfl96",
0417                             "name": " NFL Scores and Pointspreads",
0418                             "separator": "SPACE",
0419                             "columns": ["Date of game", "Visiting team name", "Visiting team score", "Home team name", "Home team score", "Indicator for overtime games (o or -)", "Pointspread", "Over/Under"]
0420                         },
0421                         {
0422                             "description": "The dataset contains scores for all regular season National Football\nLeague games from the 1998, 1999 and 2000 seasons. In addition to \nthe points scored by the home and visiting teams in each game, the\ndataset contains a pointspread that handicaps each game.",
0423                             "description_url": "http://jse.amstat.org/datasets/nfl98-00.txt",
0424                             "url": "http://jse.amstat.org/datasets/nfl98-00.dat.txt",
0425                             "filename": "nfl98-00",
0426                             "name": " NFL Scores for 1998-2000",
0427                             "separator": "SPACE",
0428                             "columns": ["Year (1998, 1999, or 2000)", "Week of the season (1 to 17)", "Home team name", "Home team score", "Visiting team name", "Visiting team score", "Pointspread"]
0429                         },
0430                         {
0431                             "description": "The data set provides the weights (in lbs)\nof the 26 men on the 1996 US Olympic Rowing Team in Atlanta. The\ndata includes the names of the participants and which event they\nrowed in. The US team participated in 7 of the 8 possible events.\nThis data set is useful for discussing outliers,\nexplanations for outliers, and comparing the robustness of the\nmean and the median.",
0432                             "description_url": "http://jse.amstat.org/datasets/rowing.txt",
0433                             "url": "http://jse.amstat.org/datasets/rowing.dat.txt",
0434                             "filename": "rowing",
0435                             "name": " Weights of 1996 US Olympic Rowing Team",
0436                             "separator": "SPACE",
0437                             "columns": ["Name: The rowers last name", "Event: The actual event the team member participated in", "Weight: Weight of individual team member in lbs"]
0438                         },
0439                         {
0440                             "description": "Data consist of 500-yard freestyle swim times for male and female swimmers age 50-94 in a biennial national competition. Variables include year, gender, age, age group, swim time, seed time (qualifying time from state competition), and split times (in each 50-yard segment).",
0441                             "url": "http://jse.amstat.org/v22n1/doane/SeniorSwimTimes-DataSet.txt",
0442                             "filename": "SeniorSwimTimes",
0443                             "name": "SeniorSwimTimes",
0444                             "separator": "TAB",
0445                             "use_first_row_for_vectorname": true
0446                         },
0447                         {
0448                             "description": "Data are provided for the 56 Tour De France bicycle races since World\nWar II. The year and dates of the event, the total number of stages,\nthe total distance, the winning total time and average speed, the name\nand country of the winner, the birth date of the winner, and the\nwinner's age at the time of victory are the variables in the dataset.",
0449                             "description_url": "http://jse.amstat.org/datasets/tdf.txt",
0450                             "url": "http://jse.amstat.org/datasets/tdf.dat.txt",
0451                             "filename": "tdf",
0452                             "name": "Tour De France Winners (Can Lance Win Six?)",
0453                             "separator": "SPACE",
0454                             "columns": ["Year", "Start-Town", "Start-Date", "End-Date", "Stages", "Distance in kilometers", "Speed: The average speed of the winner in kph", "Time: The total riding time of the winner in hours",
0455                             "Winner", "Country", "Birth-Date", "Age-Year", "Age-Tenth"]
0456                         }
0457                     ]
0458                 },
0459                 {
0460                     "name": "Other",
0461                     "datasets": [
0462                         {
0463                             "description": "Full population of data (all software projects completed by the AT&T data center from 1986 through 1991).",
0464                             "description_url": "http://jse.amstat.org/datasets/aptness.txt",
0465                             "url": "http://jse.amstat.org/datasets/aptness.dat.txt",
0466                             "filename": "aptness",
0467                             "name": "Evaluating Aptness of a Regression Model",
0468                             "columns": ["Function Point Count", "Work Hours", "Operating System: (0) Unix, (1) MVS", "Database Management System: (1) IDMS, (2) IMS, (3) INFORMIX, (4) INGRESS, (5) Other", "Language: (1) COBOL, (2) PLI, (3) C, (4) Other"]
0469                         },
0470                         {
0471                             "description": "The dataset contains information for the construction of a three-way table that illustrates Simpson's Paradox. Categorical variables observed for babies born in a metropolitan area of South Africa include race, whether or not the mother had medical aid (which is similar to health insurance), and whether or not the child was included in a follow-up study. The data are provided in two formats. The file birthtotena.dat contains the category labels and cell frequencies for the three-way table.  The file birthtotenb.dat lists each case on a separate line with three variables that indicate whether or not the mother had medical aid, whether or not the mother was traced for the five-year interview, and race.",
0472                             "description_url": "http://jse.amstat.org/datasets/birthtoten.txt",
0473                             "url": "http://jse.amstat.org/datasets/birthtotena.dat.txt",
0474                             "filename": "birthtotena",
0475                             "name": "Simpson's Paradox: An Example From a Longitudinal Study in South Africa",
0476                             "columns": ["Aid/NoAid", "Traced/NotTraced", "White/Black"]
0477                         },
0478                         {
0479                             "description": "The dataset contains information for the construction of a three-way table that illustrates Simpson's Paradox. Categorical variables observed for babies born in a metropolitan area of South Africa include race, whether or not the mother had medical aid (which is similar to health insurance), and whether or not the child was included in a follow-up study. The data are provided in two formats. The file birthtotena.dat contains the category labels and cell frequencies for the three-way table.  The file birthtotenb.dat lists each case on a separate line with three variables that indicate whether or not the mother had medical aid, whether or not the mother was traced for the five-year interview, and race.",
0480                             "description_url": "http://jse.amstat.org/datasets/birthtoten.txt",
0481                             "url": "http://jse.amstat.org/datasets/birthtotenb.dat.txt",
0482                             "filename": "birthtotenb",
0483                             "name": "Simpson's Paradox: An Example From a Longitudinal Study in South Africa",
0484                             "columns": ["Medical Aid? (0 = No, 1 = Yes)", "Traced? (0 = No, 1 = Five-Year Group)", "Race (1 = White, 2 = Black)"]
0485                         },
0486                         {
0487                             "description": "This dataset contains the prices of ladies' diamond rings and the carat size of their diamond stones. The rings are made with gold of 20 carats purity and are each mounted with a single diamond stone.",
0488                             "description_url": "http://jse.amstat.org/datasets/diamond.txt",
0489                             "url": "http://jse.amstat.org/datasets/diamond.dat.txt",
0490                             "filename": "diamond",
0491                             "name": "Diamond Ring Pricing Using Linear Regression",
0492                             "columns": ["Size of diamond in carats (1 carat = .2 gram)", "Price of ring in Singapore dollars"]
0493                         },
0494                         {
0495                             "description": "In many statistical models the normal distribution of the response is an essential assumption.\nThis paper uses a dataset of 2000 euro coins with information (up to the milligram) about\nthe weight of each coin. As the physical coin production process is subject to a multitude\nof (very small) variability sources, it seems reasonable to expect that the empirical\ndistribution of the weight of euro coins does agree with the normal distribution. Goodness\nof fit tests however show that this is not the case. Moreover, some outliers complicate\nthe analysis. Mixtures of normal distributions and skew normal distributions are fitted\nto the data, revealing that the normality assumption might not hold for those weights.",
0496                             "description_url": "http://jse.amstat.org/datasets/euroweight.txt",
0497                             "url": "http://jse.amstat.org/datasets/euroweight.dat.txt",
0498                             "filename": "euroweight",
0499                             "name": "The Weight of Euro Coins",
0500                             "separator": "TAB",
0501                             "columns": ["ID", "weight in grams", "batch"]
0502                         },
0503                         {
0504                             "description": "Title, year of release, length in minutes, number of cast members listed, rating, and number of lines \nof description are recorded for a simple random sample of 100 movies.  One can use the sample to obtain base-line information \non the movie guide from which the data were collected.  The dataset also illustrates two paradoxes for associations between \nthree variables:  non-transitivity of positive correlation and Simpson's paradox.  SOURCE: The data were taken as a simple \nrandom sample of the approximately 19,000 movies (not including made-for-TV movies) in Leonard Maltin's Movie and Video \nGuide, 1996.  ",
0505                             "description_url": "http://jse.amstat.org/datasets/films.txt",
0506                             "url": "http://jse.amstat.org/datasets/films.dat.txt",
0507                             "filename": "films",
0508                             "name": "films dataset",
0509                             "separator": "SPACE",
0510                             "use_first_row_for_vectorname": true
0511                         },
0512                         {
0513                             "description": "This dataset contains descriptive data of contestants on the game shoe \"Friend or Foe?\". Information on the contestant's \nrace, sex, age, prize money, and playing strategy are included.",
0514                             "description_url": "http://jse.amstat.org/datasets/friend_or_foe.txt",
0515                             "url": "http://jse.amstat.org/datasets/friend_or_foe.dat.txt",
0516                             "filename": "friend_or_foe",
0517                             "name": "Data from the Television Game Show \"Friend or Foe?\"",
0518                             "separator": "TAB",
0519                             "use_first_row_for_vectorname": true
0520                         },
0521                         {
0522                             "description": "The dataset contains hat size as well as circumference, length of major axis \nand length of minor axis of the inner hat band for 26 hats. The manufacturer \nand the country of manufacture are also included.",
0523                             "description_url": "http://jse.amstat.org/datasets/hats.txt",
0524                             "url": "http://jse.amstat.org/datasets/hats.dat.txt",
0525                             "filename": "hats",
0526                             "name": " Hat measurements, including hat size",
0527                             "separator": "SPACE",
0528                             "columns": ["Hat size", "Circumference (inches)", "Length of major axis (inches)", "Length of minor axis (inches)", "Where made? Italy = 1, U.S.A. = 2", "Manufacturer: Beaver = 1, Borsalino = 2, Dobbs = 3, Stetson = 4"]
0529                         },
0530                         {
0531                             "description": "The dataset consists of samples of size six taken without replacement\nfrom the integers {1, 2, 3, ..., 42}. There are actually three\ndatasets from three different sources, and in each case the six-tuples\nare (in theory) random selections or samples.  The observations in each\nsample are given in the order in which they were obtained or selected.",
0532                             "description_url": "http://jse.amstat.org/datasets/lotto.txt",
0533                             "url": "http://jse.amstat.org/datasets/lotto.dat.txt",
0534                             "filename": "lotto",
0535                             "name": "Lotto 6/42 Selections from Individuals, Irish National Lottery, and S-Plus Simulation",
0536                             "separator": "TAB",
0537                             "columns": ["Code for source of sample (1, 2, or 3)", "First selection in sample", "Second selection in sample", "Third selection in sample", "Fourth selection in sample", "Fifth selection in sample", "Sixth selection in sample"]
0538                         },
0539                         {
0540                             "description": "This file contains daily per theater box office receipts for 49 \nmovies. This data is to accompany the article entitled Movie Data.",
0541                             "description_url": "http://jse.amstat.org/datasets/moviedaily.txt",
0542                             "url": "http://jse.amstat.org/datasets/moviedaily.dat.txt",
0543                             "filename": "moviedaily",
0544                             "name": "Data Documentation Template for Daily Movie Box Office Receipts",
0545                             "separator": "TAB",
0546                             "use_first_row_for_vectorname": true
0547                         },
0548                         {
0549                             "description": "This file contains total US gross box office receipts for 49 movies. This data is to accompany the article entitled Movie Data.",
0550                             "description_url": "http://jse.amstat.org/datasets/movietotal.txt",
0551                             "url": "http://jse.amstat.org/datasets/movietotal.dat.txt",
0552                             "filename": "movietotal",
0553                             "name": "Data Documentation Template for Total Movie Box Office Receipts",
0554                             "separator": "TAB",
0555                             "use_first_row_for_vectorname": true
0556                         },
0557                         {
0558                             "description": "This file contains weekend per theater box office receipts for 49 movies. This data is to accompany the article entitled Movie Data.",
0559                             "description_url": "http://jse.amstat.org/datasets/movieweekend.txt",
0560                             "url": "http://jse.amstat.org/datasets/movieweekend.dat.txt",
0561                             "filename": "movieweekend",
0562                             "name": "Data Documentation Template for Weekend Movie Box Office Receipts",
0563                             "separator": "TAB",
0564                             "use_first_row_for_vectorname": true
0565                         },
0566                         {
0567                             "description": "Every year actors and actresses are chosen to receive the Oscars awards for best actor and for best actress. This dataset \ncontains information about each of the winners for each of the 77 annual Oscar awards.\n\nAlthough there have been only 77 Oscars, there are 78 male winners and 78 female winners because ties happened on two \noccasions (1933 for the best actor and 1969 for the best actress).",
0568                             "description_url": "http://jse.amstat.org/datasets/oscars.txt",
0569                             "url": "http://jse.amstat.org/datasets/oscars.dat.txt",
0570                             "filename": "oscars",
0571                             "name": "Oscars: Best Actors and Actresses",
0572                             "separator": "SPACE",
0573                             "columns": ["Gender (m=male  f=female)", "Oscar Year Number (1-77)", "Year the Oscar Took Place", "Winner’s first and last name", "Name of the Movie in which the winner acted", "Age of winner (at the beginning of the winning year)",
0574                             "Birth place (State if born in USA, else Country)", "Month in which the winner was born", "Day of month on which winner was born", "Year the winner was born"]
0575                         },
0576                         {
0577                             "description": "This dataset contains information collected from rolling the pair of\npigs (found in the game \"Pass the Pigs\") 6000 times. A description of\nthe rules, scoring configurations, and data collection method are\nincluded in the accompanying paper.",
0578                             "description_url": "http://jse.amstat.org/datasets/pig.txt",
0579                             "url": "http://jse.amstat.org/datasets/pig.dat.txt",
0580                             "filename": "pig",
0581                             "name": "Data from the game \"Pass the Pigs\"",
0582                             "separator": "SPACE",
0583                             "use_first_row_for_vectorname": true
0584                         },
0585                         {
0586                             "description": "In a residential home, energy consumption is closely related to the\noutdoor temperature and size of the house. In a home of a given size,\ntemperature fluctuations and energy consumption vary fairly predictably\nover time. When homeowners add a room, other things being equal,\nutility usage should increase. This dataset permits students to\nestimate the energy demand and make forecasts for future months, as\nwell as explore other relationships.\n\nThe dataset contains natural gas and electricity usage data for a\ngas-heated single-family residence in the Boston area from September\n1990 through May 1997, accompanied by monthly climatological data.  \nThe dataset is useful for illustrating the concepts and techniques of\ncentral tendency, dispersion, elementary time series analysis,\ncorrelation, simple and multiple regression, and variable\ntransformations.",
0587                             "description_url": "http://jse.amstat.org/datasets/utility.txt",
0588                             "url": "http://jse.amstat.org/datasets/utility.dat.txt",
0589                             "filename": "utility",
0590                             "name": "What Does It Take to Heat a New Room?",
0591                             "separator": "SPACE",
0592                             "columns": ["Observation month", "Number of days in the month", "Mean monthly temperature in Boston, in degrees Fahrenheit", "Mean natural gas usage per day for the month, in therms", "Total therms used for the month", "Days in the gas company billing cycle for the month",
0593                             "Total kilowatt hours consumed in the month", "Mean kilowatt hours per day for the month", "Days in the electric company billing cycle for the month", "Dummy variable for method of determining kwh for the month (0 = actual month-end meter reading, 1 = estimated reading)",
0594                             "Total heating degree days for the month", "Total cooling degree days for the month", "Dummy variable for the new room (0 = pre-addition, 1 = post-addition)"]
0595                         },
0596                         {
0597                             "description": "For each person on board the fatal maiden voyage of the ocean liner Titanic, this dataset records sex, age [adult/child], economic status [first/second/third class, or crew] and whether or not that person survived.",
0598                             "description_url": "http://jse.amstat.org/datasets/titanic.txt",
0599                             "url": "http://jse.amstat.org/datasets/titanic.dat.txt",
0600                             "filename": "titanic",
0601                             "name": "Population at Risk and Death Rates for an Unusual Episode",
0602                             "separator": "SPACE",
0603                             "columns": ["Class (0 = crew, 1 = first, 2 = second, 3 = third)", "Age (1 = adult, 0 = child)", "Sex (1 = male, 0 = female)", "Survived (1 = yes, 0 = no)"]
0604                         }
0605                     ]
0606                 },
0607                 {
0608                     "name": "Travel",
0609                     "datasets": [
0610                         {
0611                             "description": "This dataset consists of all 135 large and medium sized air hubs in the United States as defined by the Federal Aviation Administration.",
0612                             "description_url": "http://jse.amstat.org/datasets/airport.txt",
0613                             "url": "http://jse.amstat.org/datasets/airport.dat.txt",
0614                             "filename": "airport",
0615                             "name": " US Airport Statistics",
0616                             "separator": "SPACE",
0617                             "columns": ["Airport", "City", "Scheduled departures", "Performed departures", "Enplaned passengers", "Enplaned revenue tons of freight", "Enplaned revenue tons of mail"]
0618                         },
0619                         {
0620                             "description": "The data is a set of 50000 (1.3 MB ) observations containing roughly 2 minutes of traffic from the one hour, larger\ndec-pkt-1.tcp  file used in the paper. The larger file can be accessed from the author's web page or from its source. With \nonly 50000 observations, the data set ",
0621                             "description_url": "http://jse.amstat.org/datasets/packetdata.txt",
0622                             "url": "http://jse.amstat.org/datasets/packetdata.dat.txt",
0623                             "filename": "packetdata",
0624                             "name": "packetdata",
0625                             "separator": "SPACE",
0626                             "use_first_row_for_vectorname": true
0627                         }
0628                     ]
0629                 },
0630                 {
0631                     "name": "Population",
0632                     "datasets": [
0633                         {
0634                             "description": "For each of the forty largest countries in the world (according to 1990\npopulation figures), data are given for the country's life expectancy\nat birth, number of people per television set, and number of people per\nphysician.",
0635                             "description_url": "http://jse.amstat.org/datasets/televisions.txt",
0636                             "url": "http://jse.amstat.org/datasets/televisions.dat.txt",
0637                             "filename": "televisions",
0638                             "name": "Televisions, Physicians, and Life Expectancy",
0639                             "separator": "SPACE",
0640                             "columns": ["Country", "Life expectancy", "People per television", "People per physician", "Female life expectancy", "Male life expectancy"]
0641                         },
0642                         {
0643                             "description": "This dataset contains 21 body dimension measurements as well as age, weight, height, and gender on 507 individuals. The 247 men and 260 women were primarily individuals in their twenties and thirties, with a scattering of older men and women, all exercising several hours a week.",
0644                             "description_url": "http://jse.amstat.org/datasets/body.txt",
0645                             "url": "http://jse.amstat.org/datasets/body.dat.txt",
0646                             "filename": "body",
0647                             "name": " Exploring Relationships in Body Dimensions",
0648                             "separator": "SPACE",
0649                             "columns": ["Biacromial diameter", "Biiliac diameter, or \"pelvic breadth\"", "Bitrochanteric diameter", "Chest depth between spine and sternum at nipple level, mid-expiration",
0650                             "Chest diameter at nipple level, mid-expiration", "Elbow diameter, sum of two elbows", "Wrist diameter, sum of two wrists",
0651                             "Knee diameter, sum of two knees", "Ankle diameter, sum of two ankles", "Shoulder girth over deltoid muscles", "Chest girth, nipple line in males and just above breast tissue in females, mid-expiration",
0652                             "Waist girth, narrowest part of torso below the rib cage, average of contracted and relaxed position", "Navel (or \"Abdominal\") girth at umbilicus and iliac crest, iliac crest as a landmark",
0653                             "Hip girth at level of bitrochanteric diameter", "Thigh girth below gluteal fold, average of right and left girths", "Bicep girth, flexed, average of right and left girths",
0654                             "Forearm girth, extended, palm up, average of right and left girths", "Knee girth over patella, slightly flexed position, average of right and left girths",
0655                             "Calf maximum girth, average of right and left girths", "Ankle minimum girth, average of right and left girths", "Wrist minimum girth, average of right and left girths",
0656                             "Age (years)", "Weight (kg)", "Height (cm)", "Gender (1 - male, 0 - female)"]
0657                         }
0658                     ]
0659                 },
0660                 {
0661                     "name": "Technology",
0662                     "datasets": [
0663                         {
0664                             "description": "Specifications are given for 428 new vehicles for the 2004 year. The variables recorded include price, measurements relating to the size of the vehicle, and fuel efficiency.",
0665                             "description_url": "http://jse.amstat.org/datasets/04cars.txt",
0666                             "url": "http://jse.amstat.org/datasets/04cars.dat.txt",
0667                             "filename": "04cars",
0668                             "name": "2004 New Car and Truck Data",
0669                             "separator": "SPACE",
0670                             "columns": ["Vehicle Name", "Sports Car? (1=yes, 0=no)", "Sport Utility Vehicle? (1=yes, 0=no)", "Wagon? (1=yes, 0=no)", "Minivan?(1=yes, 0=no)", "Pickup? (1=yes, 0=no)", "All-Wheel Drive? (1=yes, 0=no)", "Rear-Wheel Drive? (1=yes, 0=no)", "Suggested Retail Price (U.S. Dollars)", "Dealer Cost (or 'invoice price') (U.S. Dollars)", "Engine Size (liters)", "Number of Cylinders (=-1 if rotary engine)", "Horsepower", "City Miles Per Gallon", "Highway Miles Per Gallon", "Weight (Pounds)", "Wheel Base (inches)", "Length (inches)", "Width (inches)"]
0671                         },
0672                         {
0673                             "description": "The data set contains the results of a calibration experiment designed to estimate volume of oysters and to compare two computer vision systems (2-D vs. 3-D) for classification of oysters based on their image size in number of pixels. ",
0674                             "description_url": "http://jse.amstat.org/datasets/30oysters.dat.txt",
0675                             "url": "http://jse.amstat.org/datasets/30oysters.dat.txt",
0676                             "filename": "30oysters",
0677                             "name": "Oyster Volume Estimation Data",
0678                             "separator": "SPACE",
0679                             "columns": ["Oyster ID", "Oyster weight (g)", "Oyster volume (cc)", "Oyster size information from the 3-D imaging system (in volume pixels)", "Oyster size information from the 2-D imaging system (in pixels)"]
0680                         },
0681                         {
0682                             "description": "The data set gives a random sample of the length of visits  of users entering the msnbc.com web site during September 28, 1999.\nThe length of the visit is an estimate of the total number of clicks or pages seen by each user and is based on web server \nlogs, thus it counts  only pages recorded by the server. Pages cached in the user's browser or in a cache proxy server are \nunknown. The data set used in the paper is much larger than the one made available here but that larger data set is also \navailable in a page cited in the references.",
0683                             "description_url": "http://jse.amstat.org/datasets/msnbclength.txt",
0684                             "url": "http://jse.amstat.org/datasets/msnbclength.dat.txt",
0685                             "filename": "msnbclength",
0686                             "name": "Internet Data Analysis for Undergrad Curriculum",
0687                             "separator": ","
0688                         },
0689                         {
0690                             "description": "The video lottery terminal dataset contains observations on the three\nwindows of an electronic slot machine for 345 plays together with the\nprize paid out for each play. The prize payout distribution is so\nbadly skewed that confidence intervals for expected payout based on the\ncentral limit theorem are not accurate.  The dataset can be used at the\ngraduate or upper undergraduate level to illustrate parametric\nbootstrapping.  The dataset can also be used in a graduate course to\nillustrate tests of independence for two and three-way contingency\ntables involving random zeroes, or these tables may be collapsed and\nused as examples in an introductory course.",
0691                             "description_url": "http://jse.amstat.org/datasets/vlt.txt",
0692                             "url": "http://jse.amstat.org/datasets/vlt.dat.txt",
0693                             "filename": "vlt",
0694                             "name": "Video Lottery Terminal Data",
0695                             "separator": "SPACE"
0696                         }
0697                     ]
0698                 },
0699                 {
0700                     "name": "Politics",
0701                     "datasets": [
0702                         {
0703                             "description": "For each U.S. Senator, his or her votes on whether to remove President\nClinton on each of the two articles of impeachment (plus a summary\nvariable representing each Senator's number of \"guilty\" votes) are\nprovided, as well as each Senator's values on several variables that\ncould be predictive of vote (e.g., Senator's degree of conservatism,\nhow well Clinton did in the Senator's state in the 1996 Presidential\nelection).",
0704                             "description_url": "http://jse.amstat.org/datasets/impeach.txt",
0705                             "url": "http://jse.amstat.org/datasets/impeach.dat.txt",
0706                             "filename": "impeach",
0707                             "name": " U.S. Senate Votes on Clinton Removal",
0708                             "separator": "SPACE",
0709                             "columns": [" Name of senator", "State (postal code)", "Vote on Article I, Perjury:  0 = Not Guilty, 1 = Guilty", "Vote on Article II, Obstruction of Justice:  0 = NG, 1 = G",
0710                             "Number of votes for guilt", "Party:  0 = Democrat, 1 = Republican", "Senator's degree of ideological conservativism (0-100)", "Percent of the vote Clinton received in the 1996 Presidential election in each state",
0711                             "The year each Senator's seat is up and he/she must run for re-election (or retire)", "First-term senator? 0 = no, 1 = yes"]
0712                         },
0713                         {
0714                             "description": "The data consist of the numbers of days served in office for the 43 \nPresidents of the United States as of 4 February 2004.",
0715                             "description_url": "http://jse.amstat.org/datasets/outlier.txt",
0716                             "url": "http://jse.amstat.org/datasets/outlier.dat.txt",
0717                             "filename": "outlier",
0718                             "name": "A Dataset That Is 44% Outliers",
0719                             "separator": "SPACE",
0720                             "columns": ["Last name of President (text, spelled with no embedded spaces)", "days in office (counts)"]
0721                         }
0722                     ]
0723                 },
0724                 {
0725                     "name": "Education",
0726                     "datasets": [
0727                         {
0728                             "description": "This dataset contains variables that address the relationship between \npublic school expenditures and academic performance, as measured by the SAT.",
0729                             "description_url": "http://jse.amstat.org/datasets/sat.txt",
0730                             "url": "http://jse.amstat.org/datasets/sat.dat.txt",
0731                             "filename": "sat",
0732                             "name": "Getting What You Pay For: The Debate Over Equity in Public School Expenditures ",
0733                             "separator": "SPACE",
0734                             "remove_quotes": true,
0735                             "columns": ["Name of state (in quotation marks)", "Current expenditure per pupil in average daily attendance in public elementary and secondary schools, 1994-95 (in thousands of dollars)",
0736                             "Average pupil/teacher ratio in public elementary and secondary schools, Fall 1994", "Estimated average annual salary of teachers in public elementary and secondary schools, 1994-95 (in thousands of dollars)",
0737                             "Percentage of all eligible students taking the SAT, 1994-95", "Average verbal SAT score, 1994-95", "Average math SAT score, 1994-95", "Average total score on the SAT, 1994-95"]
0738                         },
0739                         {
0740                             "description": "Data are from the 1995 U.S. News report on American colleges and\nuniversities.  They include demographic information on tuition,\nroom & board costs, SAT or ACT scores, application/acceptance\nrates, student/faculty ratio, graduation rate, and more.",
0741                             "description_url": "http://jse.amstat.org/datasets/usnews.txt",
0742                             "url": "http://jse.amstat.org/datasets/usnews.dat.txt",
0743                             "filename": "usnews",
0744                             "name": "U.S. News College data",
0745                             "separator": ",",
0746                             "columns": ["FICE (Federal ID number)", "College name", "State (postal code)", "Public/private indicator (public=1, private=2)", "Average Math SAT score", "Average Verbal SAT score", "Average Combined SAT score", "Average ACT score", "First quartile - Math SAT",
0747                             "Third quartile - Math SAT", "First quartile - Verbal SAT", "Third quartile - Verbal SAT", "First quartile - ACT", "Third quartile - ACT", "Number of applications received", "Number of applicants accepted", "Number of new students enrolled",
0748                             "Pct. new students from top 10% of H.S. class", "Pct. new students from top 25% of H.S. class", "Number of full-time undergraduates", "Number of part-time undergraduates", "In-state tuition", "Out-of-state tuition",
0749                             "Room and board costs", "Room costs", "Board costs", "Additional fees", "Estimated book costs", "Estimated personal spending", "Pct. of faculty with Ph.D.'s", "Pct. of faculty with terminal degree", "Student/faculty ratio",
0750                             "Pct. alumni who donate", "Instructional expenditure per student", "Graduation rate"]
0751                         }
0752                     ]
0753                 }
0754             ]
0755         }
0756     ]
0757 }