Coverage report

  %line %branch
ca.spaz.cron.datasource.sql.USDAsr17.USDAImporter
0% 
0% 

 1  
 /*
 2  
  * Created on 19-Mar-2005
 3  
  */
 4  
 package ca.spaz.cron.datasource.sql.USDAsr17;
 5  
 
 6  
 import java.io.*;
 7  
 import java.net.*;
 8  
 import java.sql.*;
 9  
 import java.util.*;
 10  
 import java.util.zip.*;
 11  
 
 12  
 import org.apache.log4j.Logger;
 13  
 
 14  
 import ca.spaz.task.Task;
 15  
 import ca.spaz.util.*;
 16  
 
 17  
 public class USDAImporter implements Task {
 18  
    /**
 19  
     * 
 20  
     */
 21  
    private static final int BATCH_SIZE = 100;
 22  
 
 23  
    private static final boolean TEST_MODE = false;
 24  
    
 25  
    /**
 26  
     * 
 27  
     */
 28  
    private static final int CONVERSION_PROGRESS_PORTION = 60;
 29  
 
 30  
    /**
 31  
     * 
 32  
     */
 33  
    private static final int DOWNLOAD_PROGRESS_PORTION = 35;
 34  
 
 35  
    /**
 36  
     * 
 37  
     */
 38  
    private static final int FD_GROUP_INDEX = 3;
 39  
 
 40  
    /**
 41  
     * 
 42  
     */
 43  
    private static final int WEIGHT_INDEX = 2;
 44  
 
 45  
    /**
 46  
     * 
 47  
     */
 48  
    private static final int NUT_DATA_INDEX = 1;
 49  
 
 50  
    /**
 51  
     * 
 52  
     */
 53  
    private static final int FOOD_DES_INDEX = 0;
 54  
 
 55  
    /**
 56  
     * Logger for this class
 57  
     */
 58  0
    private static final Logger logger = Logger.getLogger(USDAImporter.class);
 59  
 
 60  0
    private HashMap groups = new HashMap();
 61  0
    private HashMap foods = new HashMap();
 62  
 
 63  
    private Connection con;
 64  
 
 65  
    private static final int HASH_MAX = 50;
 66  
 
 67  
    private URL sourceURL;
 68  
 
 69  
    private InputStream sourceStream;
 70  
 
 71  
    private PrintStream out;
 72  
 
 73  
    private HashMap insertStmts;
 74  
 
 75  
    private HashMap updateStmts;
 76  
 
 77  
    private Set added_foods;
 78  
    
 79  0
    public USDAImporter(Connection conn, OutputStream out) {
 80  0
       this.con = conn;
 81  0
       this.added_foods = new HashSet();
 82  0
       if (null == out) {
 83  0
          this.out = System.out;
 84  0
       } else {
 85  0
          this.out = new PrintStream(out);
 86  
       }
 87  0
    }
 88  
 
 89  
 public InputStream getSourceStream() {
 90  0
       return sourceStream;
 91  
    }
 92  
 
 93  
    public void setSourceStream(InputStream sourceStream) {
 94  0
       this.sourceStream = sourceStream;
 95  0
    }
 96  
 
 97  
    public URL getSourceURL() {
 98  0
       return sourceURL;
 99  
    }
 100  
 
 101  
    public void setSourceURL(URL sourceURL) {
 102  0
       this.sourceURL = sourceURL;
 103  0
    }
 104  
 
 105  
    /* (non-Javadoc)
 106  
     * @see com.sun.swingutils.SwingWorker#construct()
 107  
     */
 108  
    public void run() {
 109  0
       abort = false;
 110  0
       curTask = "Importing USDA sr17";
 111  0
       if (null == sourceURL && class="keyword">null == sourceStream) {         
 112  0
          return;
 113  
       }
 114  0
       if (null == sourceStream) {
 115  
 //       From a source URL -- this will probably also be a zip.
 116  
          try {
 117  0
             File tempDir = new File(System.getProperty("java.io.tmpdir"));
 118  0
             tempDir = new File(tempDir, "usda_dl");
 119  0
             if (tempDir.exists()) {
 120  0
                out.print("Clearing old temp dir... ");
 121  0
                out.flush();
 122  0
                ToolBox.deleteDir(tempDir);
 123  0
                out.println("Done!");
 124  0
                out.flush();
 125  
             }
 126  0
             out.println("Creating temp directory.");
 127  0
             out.flush();
 128  0
             if (!tempDir.mkdir()) {
 129  
                // Indicate that it aborted, somehow.
 130  0
                return;
 131  
             }
 132  0
             File tempFile = new File(tempDir, "usda_sr17_temp.zip");
 133  0
             if (!downloadFile(sourceURL, tempFile, DOWNLOAD_PROGRESS_PORTION)) {
 134  0
                return; // Indicate failure.
 135  
             }
 136  0
             if (abort) return;
 137  0
             ZipFile zif = new ZipFile(tempFile);
 138  0
             ZipEntry[] relevantEntries = getZipEntries(zif);
 139  
             // Prepare the progress counts, starting with 50%, for the number of bytes
 140  
             // in each zip entry.
 141  0
             long nbytes = getExtractedBytes(relevantEntries);
 142  
 
 143  0
             progress = DOWNLOAD_PROGRESS_PORTION;
 144  
             InputStream ins;
 145  0
             int idx = FD_GROUP_INDEX;
 146  0
             int baseProgress = progress;
 147  0
             ins = zif.getInputStream(relevantEntries[idx]);
 148  0
             parseInputStream(ins, "food groups", relevantEntries[idx].getSize(), nbytes, baseProgress, idx);
 149  
             try {
 150  0
                PreparedStatement ps = con
 151  
                      .prepareStatement("INSERT INTO FoodGroup VALUES (?)");
 152  0
                for (Iterator iter = groups.values().iterator(); iter.hasNext();) {
 153  0
                   String grp = (String) iter.next();
 154  0
                   ps.clearParameters();
 155  0
                   ps.setString(1, grp);
 156  0
                   ps.execute();
 157  0
                }
 158  0
             } catch (SQLException e) {
 159  0
                out.println("[ERROR] Could not import food groups: " + e.getMessage());
 160  0
             }
 161  0
             if (!abort) {
 162  0
                idx = FOOD_DES_INDEX;
 163  0
                baseProgress = progress;
 164  0
                ins = zif.getInputStream(relevantEntries[idx]);
 165  0
                parseInputStream(ins, "food descriptions", relevantEntries[idx].getSize(), nbytes, baseProgress, idx);
 166  
             }
 167  0
             if (!abort) {
 168  0
                idx = WEIGHT_INDEX;
 169  0
                baseProgress = progress;
 170  0
                ins = zif.getInputStream(relevantEntries[idx]);
 171  0
                parseInputStream(ins, "measures", relevantEntries[idx].getSize(), nbytes, baseProgress, idx);
 172  
             }
 173  0
             if (!abort) {
 174  0
                idx = NUT_DATA_INDEX;
 175  0
                baseProgress = progress;
 176  0
                ins = zif.getInputStream(relevantEntries[idx]);
 177  0
                parseInputStream(ins, "nutrients", relevantEntries[idx].getSize(), nbytes, baseProgress, idx);
 178  
             }
 179  0
             zif.close();
 180  0
             if (ToolBox.deleteDir(tempDir)) {
 181  0
                out.println("Succeeded in clearing temp");
 182  0
                out.flush();
 183  0
             } else {
 184  0
                out.println("Failed to clear temp - delete " + tempFile + " manually.");
 185  0
                out.flush();
 186  
             }
 187  0
          } catch (FileNotFoundException e) {
 188  0
             e.printStackTrace();
 189  0
             return;
 190  0
          } catch (IOException e) {
 191  0
             e.printStackTrace();
 192  0
             return;
 193  0
          }
 194  
       } else {
 195  
 //       From an input stream.  Assume that it is the zip.
 196  0
          out.println("Reading data from local input stream");
 197  
       }
 198  0
       progress = 100;
 199  0
       return;
 200  
    }
 201  
    
 202  
    private void parseInputStream(InputStream ins, String streamName, long streamSize, class="keyword">long totalSize, int baseProgress, int parseType) throws IOException {
 203  0
       int progressSize = (class="keyword">int) (CONVERSION_PROGRESS_PORTION * (double) streamSize / totalSize);
 204  0
       out.print("Reading in " + streamName + "...");
 205  0
       out.flush();
 206  0
       CountableInputStream cbis = cbis = new CountableInputStream(ins);
 207  0
       BufferedReader brin = new BufferedReader(class="keyword">new InputStreamReader(cbis));
 208  0
       String line = brin.readLine();
 209  0
       int linenumber = 0;
 210  0
       while (line != null) {
 211  0
          if (abort) return;
 212  0
          switch (parseType) {
 213  
          case FD_GROUP_INDEX:
 214  0
             parseFoodGroupLine(line);
 215  0
             break;
 216  
          case FOOD_DES_INDEX:
 217  0
             USDAFood food = new USDAFood(line, groups);
 218  0
             foods.put(food.ndb_id, food);
 219  
             if (!TEST_MODE) {
 220  0
                food.addToDB(con);
 221  
             }
 222  0
             break;
 223  
          case WEIGHT_INDEX:
 224  0
             USDAWeight w = new USDAWeight(line);            
 225  
             if (!TEST_MODE) {
 226  0
                w.addToDB(con, foods);
 227  
             }  
 228  0
             break;
 229  
          case NUT_DATA_INDEX:
 230  0
             String[] parts = line.split("\\^");
 231  0
             for (int i = 0; i < parts.length; i++) {
 232  0
                parts[i] = parts[i].replaceAll("^~", "");
 233  0
                parts[i] = parts[i].replaceAll("~$", "");
 234  
             }
 235  0
             double amount = Double.parseDouble(parts[2]);
 236  
             //if (amount <= 0) continue;
 237  
             try {
 238  0
                USDANutrientInfo inf = (USDANutrientInfo) USDANutrientInfo.getNutrientMap().get(parts[1]);
 239  0
                if (null == inf) {
 240  0
                   break;
 241  
                }
 242  0
                USDAFood f = (USDAFood) foods.get(parts[0]);
 243  0
                if (!added_foods.contains(inf.table + "," + Integer.toHexString(f.hashCode()))) {
 244  
                   // Need to add the food
 245  0
                   PreparedStatement stmt = getCreateNutrientRow(inf.table);
 246  0
                   stmt.clearParameters();
 247  0
                   stmt.setInt(1, f.ID);
 248  0
                   stmt.addBatch();
 249  0
                   added_foods.add(inf.table + "," + Integer.toHexString(f.hashCode()));
 250  
                }
 251  0
                PreparedStatement stmt = getAddNutrient(inf.table, inf.tag);
 252  0
                stmt.clearParameters();
 253  0
                stmt.setDouble(1, amount);
 254  0
                stmt.setInt(2, f.ID);
 255  0
                stmt.addBatch();
 256  0
             } catch (SQLException e) {
 257  0
                throw new IllegalStateException(e);
 258  0
             }
 259  0
             if (linenumber % BATCH_SIZE == 0) {
 260  0
                executeBatches();
 261  0
             }
 262  
             break;
 263  
          default:
 264  0
             throw new IllegalArgumentException("Invalid parse type: " + parseType);
 265  
          }
 266  0
          progress = (int) (baseProgress + (progressSize * ((double) cbis.getBytesRead() / streamSize)));
 267  0
          line = brin.readLine();
 268  0
          linenumber++;
 269  0
       }
 270  0
       executeBatches();
 271  0
       out.println("Done.");
 272  0
    }
 273  
    
 274  
    private PreparedStatement getCreateNutrientRow(String table) {
 275  0
       if (null == insertStmts) {
 276  0
          insertStmts = new HashMap();
 277  
       }
 278  0
       PreparedStatement ret = (PreparedStatement) insertStmts.get(table);
 279  0
       if (null == ret) {
 280  
          try {
 281  0
             ret = con.prepareStatement("INSERT INTO " + table + " (FID) VALUES (?)");
 282  0
             if (logger.isInfoEnabled()) {
 283  0
                logger.info("prepared: 'INSERT INTO " + table + " (FID) VALUES (?)'");
 284  
             }
 285  0
          } catch (SQLException e) {
 286  0
             throw new IllegalStateException(e);
 287  0
          }
 288  0
          insertStmts.put(table, ret);
 289  
       }
 290  0
       return ret;
 291  
    }
 292  
    
 293  
    private PreparedStatement getAddNutrient(String table, String tag) {
 294  0
       if (null == updateStmts) {
 295  0
          updateStmts = new HashMap();
 296  
       }
 297  0
       PreparedStatement ret = (PreparedStatement) updateStmts.get(table + "," + tag);
 298  0
       if (null == ret) {
 299  
          try {
 300  0
             ret = con.prepareStatement("UPDATE " + table + " SET " + tag + " = ? WHERE FID = ?");
 301  0
             if (logger.isInfoEnabled()) {
 302  0
                logger.info("prepared: 'UPDATE " + table + " SET " + tag + " = ? WHERE FID = ?'");
 303  
             }
 304  0
          } catch (SQLException e) {
 305  0
             throw new IllegalStateException(e);
 306  0
          }
 307  0
          updateStmts.put(table + "," + tag, ret);
 308  
       }
 309  0
       return ret;
 310  
    }
 311  
    
 312  
    private void executeBatches() {
 313  0
       if (abort) return;
 314  0
       if (null != insertStmts) {
 315  0
          for (Iterator iter = insertStmts.values().iterator(); iter.hasNext();) {
 316  0
             PreparedStatement stmt = (PreparedStatement) iter.next();
 317  
             try {
 318  0
                stmt.executeBatch();
 319  0
             } catch (SQLException e) {
 320  0
                throw new IllegalStateException(e);
 321  0
             }
 322  0
          }
 323  
       }
 324  0
       if (null != updateStmts) {
 325  0
          for (Iterator iter = updateStmts.values().iterator(); iter.hasNext();) {
 326  0
             PreparedStatement stmt = (PreparedStatement) iter.next();
 327  
             try {
 328  0
                stmt.executeBatch();
 329  0
             } catch (SQLException e) {
 330  0
                throw new IllegalStateException(e);
 331  0
             }
 332  0
          }
 333  
       }
 334  0
    }
 335  
 
 336  
    /**
 337  
     * @param line
 338  
     */
 339  
    private void parseFoodGroupLine(String line) {
 340  0
       String[] parts = line.split("\\^");
 341  0
       for (int i = 0; i < parts.length; i++) {
 342  0
          parts[i] = parts[i].replaceAll("^~", "");
 343  0
          parts[i] = parts[i].replaceAll("~$", "");
 344  
       }
 345  0
       groups.put(parts[0], parts[1]);
 346  0
    }
 347  
 
 348  
    /**
 349  
     * @param relevantEntries
 350  
     * @return
 351  
     */
 352  
    private long getExtractedBytes(ZipEntry[] relevantEntries) {
 353  0
       long nbytes = 0;
 354  0
       for (int i = 0; i < relevantEntries.length; i++) {
 355  0
          nbytes += relevantEntries[i].getSize();
 356  
       }
 357  0
       if (nbytes < 0) {
 358  0
          nbytes = Long.MAX_VALUE;
 359  
       }
 360  0
       return nbytes;
 361  
    }
 362  
 
 363  
    /**
 364  
     * @param zif
 365  
     * @return
 366  
     */
 367  
    private ZipEntry[] getZipEntries(ZipFile zif) {
 368  0
       ZipEntry[] relevantEntries = new ZipEntry[4];
 369  0
       for(Enumeration e = zif.entries(); e.hasMoreElements();) {
 370  0
          ZipEntry went = (ZipEntry) e.nextElement();
 371  0
          if (went.getName().equalsIgnoreCase("food_des.txt")) {
 372  0
             relevantEntries[FOOD_DES_INDEX] = went;
 373  0
          } else if (went.getName().equalsIgnoreCase("nut_data.txt")) {
 374  0
             relevantEntries[NUT_DATA_INDEX] = went;
 375  0
          } else if (went.getName().equalsIgnoreCase("weight.txt")) {
 376  0
             relevantEntries[WEIGHT_INDEX] = went;
 377  0
          } else if (went.getName().equalsIgnoreCase("fd_group.txt")) {
 378  0
             relevantEntries[FD_GROUP_INDEX] = went;
 379  
          } else {
 380  
             // Irrelevant file.
 381  
          }
 382  0
       }
 383  0
       return relevantEntries;
 384  
    }
 385  
 
 386  
    /**
 387  
     * Download a URL into a file.  Takes the provided portion of the overall progress.
 388  
     * @param url an URL to download.
 389  
     * @param tempFile a file to save its contents in.
 390  
     * @param progressPortion The portion of the overall progress that this method takes.
 391  
     * @throws FileNotFoundException if the destination file cannot be found.
 392  
     * @throws IOException for all other I/O errors.
 393  
     */
 394  
    private boolean downloadFile(URL url, File tempFile, int progressPortion) throws FileNotFoundException, IOException {
 395  0
       FileOutputStream fos = new FileOutputStream(tempFile);
 396  0
       out.println("Created temp file: " + tempFile);
 397  0
       out.flush();
 398  0
       HttpURLConnection uconn = (HttpURLConnection) url.openConnection();
 399  0
       out.println("Getting data from " + url);
 400  0
       out.flush();
 401  0
       InputStream raw = uconn.getInputStream();
 402  0
       InputStream in = new BufferedInputStream(raw);
 403  0
       int contentLength = uconn.getContentLength();
 404  0
       byte[] data = new byte[contentLength];
 405  0
       int read = 0;
 406  0
       int offs = 0;
 407  0
       while (offs < contentLength && !abort) {
 408  0
          read = in.read(data, offs, data.length - offs);
 409  0
          if (read < 0) {
 410  0
             break;
 411  
          }
 412  0
          offs += read;
 413  0
          progress = (int)(progressPortion * (offs/(double)contentLength));
 414  0
       }
 415  0
       in.close();
 416  0
       if (offs != contentLength) {
 417  0
          out.println("Error downloading " + url);
 418  0
          out.flush();
 419  0
          return false;
 420  
       }
 421  0
       out.print("Completed downloading " + url + ", writing...");
 422  0
       out.flush();
 423  0
       fos.write(data);
 424  0
       fos.close();
 425  0
       out.println("Done!");
 426  0
       out.flush();
 427  0
       return true;
 428  
    }
 429  
 
 430  0
    private volatile int progress = 0;
 431  0
    private volatile boolean abort = false;
 432  
    private volatile String curTask;
 433  
    
 434  
    public int getTaskProgress() {
 435  0
       return progress;
 436  
    }
 437  
 
 438  
    public void abortTask() {
 439  0
       abort = true;
 440  0
    }
 441  
 
 442  
    public boolean canAbortTask() {
 443  0
       return true;
 444  
    }
 445  
 
 446  
    public String getTaskDescription() {
 447  0
       return curTask;
 448  
    }
 449  
 
 450  
 }

This report is generated by jcoverage, Maven and Maven JCoverage Plugin.