View Javadoc

1   /*
2    *******************************************************************************
3    * Copyright (c) 2005 Chris Rose and AIMedia
4    * All rights reserved. AbstractWWWDatasource and the accompanying materials
5    * are made available under the terms of the Common Public License v1.0
6    * which accompanies this distribution, and is available at
7    * http://www.eclipse.org/legal/cpl-v10.html
8    * 
9    * Contributors:
10   *     Chris Rose
11   *******************************************************************************/
12  package ca.spaz.cron.datasource.www;
13  
14  import java.io.*;
15  import java.net.URL;
16  import java.util.*;
17  import java.util.regex.*;
18  
19  import org.apache.log4j.Logger;
20  import org.htmlparser.util.Translate;
21  
22  import ca.spaz.cron.database.*;
23  import ca.spaz.cron.datasource.AbstractFoodDataSource;
24  
25  /***
26   * The base class of all WWW food datasources.  Provides convenience methods for
27   * some general functions, as well as some assertions about capabilities.
28   *  
29   * @author Chris Rose
30   */
31  public abstract class AbstractWWWDatasource extends AbstractFoodDataSource {
32     /***
33      * Logger for this class
34      */
35     private static final Logger logger = Logger
36           .getLogger(AbstractWWWDatasource.class);
37  
38     /***
39      * @param name The unique ID of this data source.
40      */
41     protected AbstractWWWDatasource(String name) {
42        super(name);
43     }
44  
45     /* (non-Javadoc)
46      * @see ca.spaz.cron.datasource.AbstractFoodDataSource#doFindAllFoods()
47      */
48     protected abstract List doFindAllFoods();
49  
50     /* (non-Javadoc)
51      * @see ca.spaz.cron.datasource.IFoodDatasource#getSources()
52      */
53     public final List getSources() {
54        return Collections.unmodifiableList(doGetSources());
55     }
56  
57     /***
58      * Retrieve a list of Sources available from this data source.
59      * 
60      * @return a <code>List</code> of sources for this datasource. 
61      */
62     protected abstract List doGetSources();
63  
64     /* (non-Javadoc)
65      * @see ca.spaz.cron.datasource.IFoodDatasource#getFoodGroups()
66      */
67     public final List getFoodGroups() {
68        return Collections.unmodifiableList(doGetFoodGroups());
69     }
70  
71     /***
72      * Retrieve a list of food groups for this data source.
73      * 
74      * @return a <code>List</code> of <code>FoodGroup</code> implementations
75      * containing all of the food groups in this data source.
76      */
77     protected abstract List doGetFoodGroups();
78  
79     /* (non-Javadoc)
80      * @see ca.spaz.cron.datasource.IFoodDatasource#isSearchable()
81      */
82     public boolean isSearchable() {
83        return true;
84     }
85  
86     /* (non-Javadoc)
87      * @see ca.spaz.cron.datasource.IFoodDatasource#isListable()
88      */
89     public boolean isListable() {
90        return false;
91     }
92  
93     /* (non-Javadoc)
94      * @see ca.spaz.cron.datasource.IFoodDatasource#close()
95      */
96     public final void close() {
97        doClose();
98     }
99  
100    /***
101     * 
102     */
103    protected abstract void doClose();
104 
105    /***
106     * Utility method to retrieve food groups from a URL's data, using regular expressions
107     * to mark the beginning and end of the list, as well as a regex to get the food group itself.
108     * 
109     * @param sourceUrl This is the URL whose data will be read.
110     * @param startGroups This pattern should mark the beginning of the list of food groups.
111     * @param endGroups This pattern should mark the end of the list of food groups.
112     * @param groupID This pattern should mark an individual food group.  More specifically, 
113     * it <em>must</em> provide at least two groupings, one of which will be the group key for
114     * searching, and the other of which will be the group name for display.
115     * @param keyGroup This is the group number in the groupID pattern that marks the search
116     * key for the food group.
117     * @param nameGroup This is the group number in the groupID pattern that marks the name
118     * of the food group. 
119     * @return A <code>List</code> of FoodGroup implementations that contains all food groups
120     * available at the URL.
121     * @throws IOException if there is an error reading from the URL.
122     */
123    protected List getFoodGroups(URL sourceUrl, Pattern startGroups,
124          Pattern endGroups, Pattern groupID, int keyGroup, int nameGroup)
125          throws IOException {
126       
127       String grps = readURLAsString(sourceUrl);
128       
129       List ret = new ArrayList();
130    
131       Matcher matchFunction = startGroups.matcher(grps);
132       Matcher matchGroup = groupID.matcher(grps);
133       Matcher matchEnd = endGroups.matcher(grps);
134       if (matchFunction.find()) {
135          if (logger.isDebugEnabled()) {
136             logger.debug("getFoodGroups() - Found function start");
137          }
138          int sidx = matchFunction.end();
139          int eidx = -1;
140          if (matchEnd.find(sidx)) {
141             eidx = matchEnd.start();
142          }
143          if (eidx < 0) {
144             logger.error("getFoodGroups() - No end to function", null);
145             ret = Collections.EMPTY_LIST;
146             return ret;
147          }
148          while (sidx < eidx && sidx > 0) {
149             if (matchGroup.find(sidx)) {
150                sidx = matchGroup.end();
151                WWWFoodGroup fg = new WWWFoodGroupImpl(matchGroup.group(keyGroup),
152                      Translate.decode(matchGroup.group(nameGroup)));
153                ret.add(fg);
154             } else {
155                sidx = -1;
156             }
157          }
158       }
159       return ret;
160    }
161    
162    protected static String readURLAsString(URL source) {
163       StringBuffer out = new StringBuffer();
164       
165       try {
166          BufferedReader br = new BufferedReader(new InputStreamReader(source.openStream()));
167          String line = br.readLine();
168          while (line != null) {
169             out.append(line);
170             line = br.readLine();
171          }
172          br.close();
173       } catch (IOException e) {
174          logger.error("readURLAsString(URL)", e);
175       }
176       
177       return out.toString();
178    }
179 
180    /* (non-Javadoc)
181     * @see ca.spaz.cron.datasource.AbstractFoodDataSource#doFindFoods(java.lang.String[], java.lang.String, java.lang.String)
182     */
183    protected List doFindFoods(String[] keys, String foodGroup, String source) {
184       // TODO Auto-generated method stub
185       return null;
186    }
187 
188    /* (non-Javadoc)
189     * @see ca.spaz.cron.datasource.AbstractFoodDataSource#doFindFoods(java.lang.String[])
190     */
191    protected List doFindFoods(String[] keys) {
192       // TODO Auto-generated method stub
193       return null;
194    }
195 
196    /* (non-Javadoc)
197     * @see ca.spaz.cron.datasource.AbstractFoodDataSource#doGetMeasuresFor(ca.spaz.cron.database.Food)
198     */
199    protected List doGetMeasuresFor(Food food) {
200       // TODO Auto-generated method stub
201       return null;
202    }
203 
204    /* (non-Javadoc)
205     * @see ca.spaz.cron.datasource.AbstractFoodDataSource#doGetNutrientsFor(ca.spaz.cron.database.Food, ca.spaz.cron.database.NutrientTable)
206     */
207    protected void doGetNutrientsFor(Food food, NutrientTable nutrients) {
208       // TODO Auto-generated method stub
209       
210    }
211 
212    public void initialize() {
213       //No-op
214    }
215    
216 }