| 1 |
|
|
| 2 |
|
|
| 3 |
|
|
| 4 |
|
|
| 5 |
|
|
| 6 |
|
|
| 7 |
|
|
| 8 |
|
|
| 9 |
|
package ca.spaz.cron.datasource.www; |
| 10 |
|
|
| 11 |
|
import java.io.IOException; |
| 12 |
|
import java.net.*; |
| 13 |
|
import java.util.*; |
| 14 |
|
import java.util.regex.*; |
| 15 |
|
|
| 16 |
|
import org.apache.log4j.Logger; |
| 17 |
|
import org.xml.sax.SAXException; |
| 18 |
|
|
| 19 |
|
import ca.spaz.cron.CRONOMETER; |
| 20 |
|
import ca.spaz.cron.database.*; |
| 21 |
|
import ca.spaz.cron.datasource.FoodDatasourceException; |
| 22 |
|
|
| 23 |
|
import com.meterware.httpunit.*; |
| 24 |
|
|
| 25 |
|
|
| 26 |
|
|
| 27 |
|
|
| 28 |
|
|
| 29 |
|
|
| 30 |
|
public class NutritionDataDatasource extends AbstractWWWDatasource { |
| 31 |
|
|
| 32 |
|
|
| 33 |
|
|
| 34 |
0 |
private static final Logger logger = Logger |
| 35 |
0 |
.getLogger(NutritionDataDatasource.class); |
| 36 |
|
|
| 37 |
0 |
private static final Pattern ND_COM_FN_END = Pattern.compile("}"); |
| 38 |
|
|
| 39 |
0 |
private static final Pattern ND_COM_FN_START = Pattern |
| 40 |
|
.compile("function +foodcat"); |
| 41 |
|
|
| 42 |
0 |
private static final Pattern ND_COM_GROUP = Pattern |
| 43 |
|
.compile("<option value=\"(\\d+)\">([^<]+)</option>"); |
| 44 |
|
|
| 45 |
|
private static final String ND_COM_GROUPS_URL = "http://www.nutritiondata.com/color/nutrition.js"; |
| 46 |
|
|
| 47 |
0 |
private static final Pattern ND_COM_FOODSEARCH_FOOD = Pattern.compile( |
| 48 |
|
"<a class=\"list\" href=\"(facts[^\"]+)\">", Pattern.CASE_INSENSITIVE); |
| 49 |
|
|
| 50 |
|
private static final String ND_COM_BASE_URL = "http://www.nutritiondata.com/"; |
| 51 |
|
private static final String ND_COM_SEARCH_URL = "search.php?q=$query$&sourceid=Mozilla-search"; |
| 52 |
|
|
| 53 |
|
private URL baseURL; |
| 54 |
|
|
| 55 |
|
public static final NutritionDataDatasource createReadonlyFoodSource() { |
| 56 |
0 |
return new NutritionDataDatasource("NutritionData.com"); |
| 57 |
|
} |
| 58 |
|
|
| 59 |
|
|
| 60 |
|
|
| 61 |
|
|
| 62 |
|
private NutritionDataDatasource(String name) { |
| 63 |
0 |
super(name); |
| 64 |
|
try { |
| 65 |
0 |
this.baseURL = new URL(ND_COM_BASE_URL); |
| 66 |
0 |
} catch (MalformedURLException e) { |
| 67 |
0 |
throw new FoodDatasourceException("Unable to create URL -- cannot proceed", e); |
| 68 |
0 |
} |
| 69 |
0 |
} |
| 70 |
|
|
| 71 |
|
|
| 72 |
|
|
| 73 |
|
|
| 74 |
|
|
| 75 |
|
|
| 76 |
|
protected List doFindAllFoods() { |
| 77 |
0 |
return null; |
| 78 |
|
} |
| 79 |
|
|
| 80 |
|
|
| 81 |
|
|
| 82 |
|
|
| 83 |
|
|
| 84 |
|
|
| 85 |
|
protected List doGetSources() { |
| 86 |
0 |
List ret = new ArrayList(); |
| 87 |
0 |
ret.add("NutritionData.com"); |
| 88 |
0 |
return ret; |
| 89 |
|
} |
| 90 |
|
|
| 91 |
|
|
| 92 |
|
|
| 93 |
|
|
| 94 |
|
|
| 95 |
|
|
| 96 |
|
protected List doGetFoodGroups() { |
| 97 |
0 |
List ret = null; |
| 98 |
|
try { |
| 99 |
0 |
ret = getFoodGroups(new URL(ND_COM_GROUPS_URL), ND_COM_FN_START, |
| 100 |
|
ND_COM_FN_END, ND_COM_GROUP, 1, 2); |
| 101 |
0 |
} catch (MalformedURLException e) { |
| 102 |
0 |
logger.error("doGetFoodGroups()", e); |
| 103 |
0 |
ret = Collections.EMPTY_LIST; |
| 104 |
0 |
} catch (IOException e) { |
| 105 |
0 |
logger.error("doGetFoodGroups()", e); |
| 106 |
0 |
ret = Collections.EMPTY_LIST; |
| 107 |
0 |
} |
| 108 |
0 |
return ret; |
| 109 |
|
} |
| 110 |
|
|
| 111 |
|
|
| 112 |
|
|
| 113 |
|
|
| 114 |
|
|
| 115 |
|
|
| 116 |
|
protected void doClose() { |
| 117 |
|
|
| 118 |
0 |
} |
| 119 |
|
|
| 120 |
|
|
| 121 |
|
|
| 122 |
|
|
| 123 |
|
|
| 124 |
|
|
| 125 |
|
protected List doFindFoods(String[] keys) { |
| 126 |
0 |
return doFindFoods(keys, null, class="keyword">null); |
| 127 |
|
} |
| 128 |
|
|
| 129 |
|
|
| 130 |
|
|
| 131 |
|
|
| 132 |
|
|
| 133 |
|
|
| 134 |
|
|
| 135 |
|
protected List doFindFoods(String[] keys, String foodGroup, String source) { |
| 136 |
0 |
if (foodGroup != null) { |
| 137 |
|
|
| 138 |
|
} |
| 139 |
0 |
if (source != null) { |
| 140 |
|
|
| 141 |
|
} |
| 142 |
0 |
List ret = new ArrayList(); |
| 143 |
|
|
| 144 |
0 |
StringBuffer sb = new StringBuffer(); |
| 145 |
0 |
for (int i = 0; i < keys.length; i++) { |
| 146 |
0 |
sb.append(keys[i]); |
| 147 |
0 |
if (i < (keys.length - 1)) { |
| 148 |
0 |
sb.append("+"); |
| 149 |
|
} |
| 150 |
|
} |
| 151 |
0 |
String query = sb.toString(); |
| 152 |
0 |
sb = null; |
| 153 |
|
|
| 154 |
0 |
String urlQuery = ND_COM_SEARCH_URL.replaceAll("\\$query\\$", query); |
| 155 |
|
|
| 156 |
0 |
HttpUnitOptions.setExceptionsThrownOnScriptError(false); |
| 157 |
|
|
| 158 |
0 |
WebConversation conv = new WebConversation(); |
| 159 |
0 |
WebRequest req = new GetMethodWebRequest(baseURL, urlQuery); |
| 160 |
|
|
| 161 |
|
try { |
| 162 |
0 |
WebResponse resp = conv.getResponse(req); |
| 163 |
0 |
String doc = resp.getText(); |
| 164 |
|
|
| 165 |
0 |
Matcher foodFinder = ND_COM_FOODSEARCH_FOOD.matcher(doc); |
| 166 |
0 |
if (logger.isDebugEnabled()) { |
| 167 |
0 |
logger.debug("doFindFoods(String[], String, String)" |
| 168 |
|
+ ND_COM_FOODSEARCH_FOOD.toString()); |
| 169 |
|
} |
| 170 |
0 |
List foods = new ArrayList(); |
| 171 |
0 |
int idx = 0; |
| 172 |
0 |
while (foodFinder.find(idx)) { |
| 173 |
0 |
foods.add(foodFinder.group(1)); |
| 174 |
0 |
idx = foodFinder.end(1); |
| 175 |
0 |
} |
| 176 |
|
|
| 177 |
0 |
for (Iterator iter = foods.iterator(); iter.hasNext();) { |
| 178 |
0 |
String urlpart = (String) iter.next(); |
| 179 |
|
URL loader; |
| 180 |
|
try { |
| 181 |
0 |
loader = new URL(baseURL, urlpart); |
| 182 |
0 |
Food newFood = loadFoodFromURL(conv, loader); |
| 183 |
0 |
if (null != newFood) { |
| 184 |
0 |
ret.add(newFood); |
| 185 |
|
} |
| 186 |
0 |
} catch (MalformedURLException e) { |
| 187 |
0 |
logger.error("doFindFoods(String[], String, String)", e); |
| 188 |
0 |
} |
| 189 |
0 |
} |
| 190 |
0 |
} catch (MalformedURLException e) { |
| 191 |
0 |
logger.error("doFindFoods(String[], String, String)", e); |
| 192 |
0 |
} catch (IOException e) { |
| 193 |
0 |
logger.error("doFindFoods(String[], String, String)", e); |
| 194 |
0 |
} catch (SAXException e) { |
| 195 |
0 |
logger.error("doFindFoods(String[], String, String)", e); |
| 196 |
0 |
} |
| 197 |
|
|
| 198 |
0 |
return ret; |
| 199 |
|
} |
| 200 |
|
|
| 201 |
|
private NutritionDataFood loadFoodFromURL(WebConversation conv, URL loader) { |
| 202 |
0 |
NutritionDataFood ret = null; |
| 203 |
0 |
WebRequest req = new GetMethodWebRequest(loader, ""); |
| 204 |
|
WebResponse resp; |
| 205 |
|
try { |
| 206 |
0 |
resp = conv.getResponse(req); |
| 207 |
0 |
ret = new NutritionDataFood(this, resp); |
| 208 |
0 |
} catch (MalformedURLException e) { |
| 209 |
0 |
logger.error("loadFoodFromURL(WebConversation, URL)", e); |
| 210 |
0 |
} catch (IOException e) { |
| 211 |
0 |
logger.error("loadFoodFromURL(WebConversation, URL)", e); |
| 212 |
0 |
} catch (SAXException e) { |
| 213 |
0 |
logger.error("loadFoodFromURL(WebConversation, URL)", e); |
| 214 |
0 |
} |
| 215 |
0 |
return ret; |
| 216 |
|
} |
| 217 |
|
|
| 218 |
|
|
| 219 |
|
|
| 220 |
|
|
| 221 |
|
|
| 222 |
|
|
| 223 |
|
protected List doGetMeasuresFor(Food food) { |
| 224 |
0 |
return null; |
| 225 |
|
} |
| 226 |
|
|
| 227 |
|
|
| 228 |
|
|
| 229 |
|
|
| 230 |
|
|
| 231 |
|
|
| 232 |
|
|
| 233 |
|
protected void doGetNutrientsFor(Food food, NutrientTable nutrients) { |
| 234 |
|
|
| 235 |
0 |
} |
| 236 |
|
|
| 237 |
|
|
| 238 |
|
|
| 239 |
|
|
| 240 |
|
|
| 241 |
|
|
| 242 |
|
public boolean isAvailable() { |
| 243 |
0 |
return false; |
| 244 |
|
} |
| 245 |
|
|
| 246 |
|
public static void main(String[] args) { |
| 247 |
0 |
CRONOMETER.configureLogger(); |
| 248 |
0 |
NutritionDataDatasource nds = new NutritionDataDatasource("foo"); |
| 249 |
|
|
| 250 |
0 |
List foods = nds.findFoods(new String[] { "tutti" }); |
| 251 |
0 |
} |
| 252 |
|
|
| 253 |
|
public boolean containsFood(Food food) { |
| 254 |
|
|
| 255 |
0 |
return false; |
| 256 |
|
} |
| 257 |
|
|
| 258 |
|
} |