View Javadoc

1   /********************************************************************************
2    * ******************************************************************************
3    * Copyright (c) 2005 Chris Rose and AIMedia All rights reserved.
4    * NutritionDataDatasource and the accompanying materials are made available
5    * under the terms of the Common Public License v1.0 which accompanies this
6    * distribution, and is available at http://www.eclipse.org/legal/cpl-v10.html
7    * Contributors: Chris Rose
8    ******************************************************************************/
9   package ca.spaz.cron.datasource.www;
10  
11  import java.io.IOException;
12  import java.net.*;
13  import java.util.*;
14  import java.util.regex.*;
15  
16  import org.apache.log4j.Logger;
17  import org.xml.sax.SAXException;
18  
19  import ca.spaz.cron.CRONOMETER;
20  import ca.spaz.cron.database.*;
21  import ca.spaz.cron.datasource.FoodDatasourceException;
22  
23  import com.meterware.httpunit.*;
24  
25  /***
26   * A datasource that is tied to the website http://www.nutritiondata.com/
27   * 
28   * @author Chris Rose
29   */
30  public class NutritionDataDatasource extends AbstractWWWDatasource {
31     /***
32      * Logger for this class
33      */
34     private static final Logger logger = Logger
35           .getLogger(NutritionDataDatasource.class);
36  
37     private static final Pattern ND_COM_FN_END = Pattern.compile("}");
38  
39     private static final Pattern ND_COM_FN_START = Pattern
40           .compile("function +foodcat");
41  
42     private static final Pattern ND_COM_GROUP = Pattern
43           .compile("<option value=\"(//d+)\">([^<]+)</option>");
44  
45     private static final String ND_COM_GROUPS_URL = "http://www.nutritiondata.com/color/nutrition.js";
46  
47     private static final Pattern ND_COM_FOODSEARCH_FOOD = Pattern.compile(
48           "<a class=\"list\" href=\"(facts[^\"]+)\">", Pattern.CASE_INSENSITIVE);
49  
50     private static final String ND_COM_BASE_URL = "http://www.nutritiondata.com/";
51     private static final String ND_COM_SEARCH_URL = "search.php?q=$query$&sourceid=Mozilla-search";
52  
53     private URL baseURL;
54  
55     public static final NutritionDataDatasource createReadonlyFoodSource() {
56        return new NutritionDataDatasource("NutritionData.com");
57     }
58  
59     /***
60      * @param name
61      */
62     private NutritionDataDatasource(String name) {
63        super(name);
64        try {
65           this.baseURL = new URL(ND_COM_BASE_URL);
66        } catch (MalformedURLException e) {
67           throw new FoodDatasourceException("Unable to create URL -- cannot proceed", e);
68        }
69     }
70  
71     /*
72      * (non-Javadoc)
73      * 
74      * @see ca.spaz.cron.datasource.AbstractFoodDataSource#doFindAllFoods()
75      */
76     protected List doFindAllFoods() {
77        return null;
78     }
79  
80     /*
81      * (non-Javadoc)
82      * 
83      * @see ca.spaz.cron.datasource.www.AbstractWWWDatasource#doGetSources()
84      */
85     protected List doGetSources() {
86        List ret = new ArrayList();
87        ret.add("NutritionData.com");
88        return ret;
89     }
90  
91     /*
92      * (non-Javadoc)
93      * 
94      * @see ca.spaz.cron.datasource.www.AbstractWWWDatasource#doGetFoodGroups()
95      */
96     protected List doGetFoodGroups() {
97        List ret = null;
98        try {
99           ret = getFoodGroups(new URL(ND_COM_GROUPS_URL), ND_COM_FN_START,
100                ND_COM_FN_END, ND_COM_GROUP, 1, 2);
101       } catch (MalformedURLException e) {
102          logger.error("doGetFoodGroups()", e);
103          ret = Collections.EMPTY_LIST;
104       } catch (IOException e) {
105          logger.error("doGetFoodGroups()", e);
106          ret = Collections.EMPTY_LIST;
107       }
108       return ret;
109    }
110 
111    /*
112     * (non-Javadoc)
113     * 
114     * @see ca.spaz.cron.datasource.www.AbstractWWWDatasource#doClose()
115     */
116    protected void doClose() {
117       // Nothing here, unless we have a connection to close.
118    }
119 
120    /*
121     * (non-Javadoc)
122     * 
123     * @see ca.spaz.cron.datasource.AbstractFoodDataSource#doFindFoods(java.lang.String[])
124     */
125    protected List doFindFoods(String[] keys) {
126       return doFindFoods(keys, null, null);
127    }
128 
129    /*
130     * (non-Javadoc)
131     * 
132     * @see ca.spaz.cron.datasource.AbstractFoodDataSource#doFindFoods(java.lang.String[],
133     *      java.lang.String, java.lang.String)
134     */
135    protected List doFindFoods(String[] keys, String foodGroup, String source) {
136       if (foodGroup != null) {
137          // Something here. Not sure what, really.
138       }
139       if (source != null) {
140          // Something here. Not sure what, really.
141       }
142       List ret = new ArrayList();
143 
144       StringBuffer sb = new StringBuffer();
145       for (int i = 0; i < keys.length; i++) {
146          sb.append(keys[i]);
147          if (i < (keys.length - 1)) {
148             sb.append("+");
149          }
150       }
151       String query = sb.toString();
152       sb = null;
153 
154       String urlQuery = ND_COM_SEARCH_URL.replaceAll("//$query//$", query);
155 
156       HttpUnitOptions.setExceptionsThrownOnScriptError(false);
157       
158       WebConversation conv = new WebConversation();
159       WebRequest req = new GetMethodWebRequest(baseURL, urlQuery);
160 
161       try {
162          WebResponse resp = conv.getResponse(req);
163          String doc = resp.getText();
164          
165          Matcher foodFinder = ND_COM_FOODSEARCH_FOOD.matcher(doc);
166          if (logger.isDebugEnabled()) {
167             logger.debug("doFindFoods(String[], String, String)"
168                   + ND_COM_FOODSEARCH_FOOD.toString());
169          }
170          List foods = new ArrayList();
171          int idx = 0;
172          while (foodFinder.find(idx)) {
173             foods.add(foodFinder.group(1));
174             idx = foodFinder.end(1);
175          }
176 
177          for (Iterator iter = foods.iterator(); iter.hasNext();) {
178             String urlpart = (String) iter.next();
179             URL loader;
180             try {
181                loader = new URL(baseURL, urlpart);
182                Food newFood = loadFoodFromURL(conv, loader);
183                if (null != newFood) {
184                   ret.add(newFood);
185                }
186             } catch (MalformedURLException e) {
187                logger.error("doFindFoods(String[], String, String)", e);
188             }
189          }         
190       } catch (MalformedURLException e) {
191          logger.error("doFindFoods(String[], String, String)", e);
192       } catch (IOException e) {
193          logger.error("doFindFoods(String[], String, String)", e);
194       } catch (SAXException e) {
195          logger.error("doFindFoods(String[], String, String)", e);
196       }
197 
198       return ret;
199    }
200 
201    private NutritionDataFood loadFoodFromURL(WebConversation conv, URL loader) {
202       NutritionDataFood ret = null;
203       WebRequest req = new GetMethodWebRequest(loader, "");
204       WebResponse resp;
205       try {
206          resp = conv.getResponse(req);
207          ret = new NutritionDataFood(this, resp);
208       } catch (MalformedURLException e) {
209          logger.error("loadFoodFromURL(WebConversation, URL)", e);
210       } catch (IOException e) {
211          logger.error("loadFoodFromURL(WebConversation, URL)", e);
212       } catch (SAXException e) {
213          logger.error("loadFoodFromURL(WebConversation, URL)", e);
214       }
215       return ret;
216    }
217 
218    /*
219     * (non-Javadoc)
220     * 
221     * @see ca.spaz.cron.datasource.AbstractFoodDataSource#doGetMeasuresFor(ca.spaz.cron.database.Food)
222     */
223    protected List doGetMeasuresFor(Food food) {
224       return null;
225    }
226 
227    /*
228     * (non-Javadoc)
229     * 
230     * @see ca.spaz.cron.datasource.AbstractFoodDataSource#doGetNutrientsFor(ca.spaz.cron.database.Food,
231     *      ca.spaz.cron.database.NutrientTable)
232     */
233    protected void doGetNutrientsFor(Food food, NutrientTable nutrients) {
234 
235    }
236 
237    /*
238     * (non-Javadoc)
239     * 
240     * @see ca.spaz.cron.datasource.IFoodDatasource#isAvailable()
241     */
242    public boolean isAvailable() {
243       return false;
244    }
245 
246    public static void main(String[] args) {
247       CRONOMETER.configureLogger();
248       NutritionDataDatasource nds = new NutritionDataDatasource("foo");
249 
250       List foods = nds.findFoods(new String[] { "tutti" });
251    }
252 
253    public boolean containsFood(Food food) {
254       // TODO Auto-generated method stub
255       return false;
256    }
257 
258 }