1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
package ca.spaz.cron.datasource.www; |
13 |
|
|
14 |
|
import java.io.*; |
15 |
|
import java.net.URL; |
16 |
|
import java.util.*; |
17 |
|
import java.util.regex.*; |
18 |
|
|
19 |
|
import org.apache.log4j.Logger; |
20 |
|
import org.htmlparser.util.Translate; |
21 |
|
|
22 |
|
import ca.spaz.cron.database.FoodGroup; |
23 |
|
|
24 |
|
|
25 |
|
|
26 |
|
|
27 |
|
|
28 |
|
|
29 |
|
public class Foodcatter { |
30 |
0 |
private static final Pattern CK_COM_GROUP = Pattern |
31 |
|
.compile("<option value=\"(\\d+)\">([^<]+)</option>"); |
32 |
|
|
33 |
0 |
private static final Pattern CK_COM_FN_END = Pattern.compile("</select>"); |
34 |
|
|
35 |
0 |
private static final Pattern CK_COM_FN_START = Pattern |
36 |
|
.compile("<select name=\"filter\""); |
37 |
|
|
38 |
|
private static final String CK_COM_URL = "http://www.calorieking.com/foods/"; |
39 |
|
|
40 |
|
private static final String ND_COM_URL = "http://www.nutritiondata.com/color/nutrition.js"; |
41 |
|
|
42 |
|
|
43 |
|
|
44 |
|
|
45 |
0 |
private static final Logger logger = Logger.getLogger(Foodcatter.class); |
46 |
|
|
47 |
0 |
private static Foodcatter instance = null; |
48 |
|
|
49 |
|
private URL url; |
50 |
|
|
51 |
0 |
private static final Pattern ND_COM_FN_START = Pattern |
52 |
|
.compile("function +foodcat"); |
53 |
|
|
54 |
0 |
private static final Pattern ND_COM_FN_END = Pattern.compile("}"); |
55 |
|
|
56 |
0 |
private static final Pattern ND_COM_GROUP = CK_COM_GROUP; |
57 |
|
|
58 |
|
public static final Foodcatter getInstance() { |
59 |
0 |
if (null == instance) { |
60 |
0 |
instance = new Foodcatter(); |
61 |
|
} |
62 |
0 |
return instance; |
63 |
|
} |
64 |
|
|
65 |
0 |
private Foodcatter() { |
66 |
0 |
} |
67 |
|
|
68 |
|
|
69 |
|
|
70 |
|
|
71 |
|
|
72 |
|
|
73 |
|
|
74 |
|
|
75 |
|
|
76 |
|
public List getFoodGroups(URL sourceUrl, Pattern startGroups, |
77 |
|
Pattern endGroups, Pattern groupID, int keyGroup, class="keyword">int nameGroup) |
78 |
|
throws IOException { |
79 |
0 |
InputStream ins = sourceUrl.openStream(); |
80 |
0 |
BufferedReader br = new BufferedReader(class="keyword">new InputStreamReader(ins)); |
81 |
0 |
String s = br.readLine(); |
82 |
0 |
StringBuffer sb = new StringBuffer(); |
83 |
0 |
while (s != null) { |
84 |
0 |
sb.append(s); |
85 |
0 |
s = br.readLine(); |
86 |
0 |
} |
87 |
|
|
88 |
0 |
List ret = new ArrayList(); |
89 |
|
|
90 |
0 |
Matcher matchFunction = startGroups.matcher(sb.toString()); |
91 |
0 |
Matcher matchGroup = groupID.matcher(sb.toString()); |
92 |
0 |
Matcher matchEnd = endGroups.matcher(sb.toString()); |
93 |
0 |
if (matchFunction.find()) { |
94 |
0 |
if (logger.isDebugEnabled()) { |
95 |
0 |
logger.debug("getFoodGroups() - Found function start"); |
96 |
|
} |
97 |
0 |
int sidx = matchFunction.end(); |
98 |
0 |
int eidx = -1; |
99 |
0 |
if (matchEnd.find(sidx)) { |
100 |
0 |
eidx = matchEnd.start(); |
101 |
|
} |
102 |
0 |
if (eidx < 0) { |
103 |
0 |
logger.error("getFoodGroups() - No end to function", null); |
104 |
0 |
ret = Collections.EMPTY_LIST; |
105 |
0 |
return ret; |
106 |
|
} |
107 |
0 |
while (sidx < eidx && sidx > 0) { |
108 |
0 |
if (matchGroup.find(sidx)) { |
109 |
0 |
sidx = matchGroup.end(); |
110 |
0 |
WWWFoodGroup fg = new WWWFoodGroup(matchGroup.group(keyGroup), |
111 |
|
Translate.decode(matchGroup.group(nameGroup))); |
112 |
0 |
ret.add(fg); |
113 |
0 |
} else { |
114 |
0 |
sidx = -1; |
115 |
|
} |
116 |
0 |
} |
117 |
|
} |
118 |
0 |
return ret; |
119 |
|
} |
120 |
|
|
121 |
|
public static void main(String[] args) throws IOException { |
122 |
0 |
List lis = getInstance().getFoodGroups(new URL(ND_COM_URL), |
123 |
|
ND_COM_FN_START, ND_COM_FN_END, ND_COM_GROUP, 1, 2); |
124 |
0 |
List lis2 = getInstance().getFoodGroups(new URL(CK_COM_URL), |
125 |
|
CK_COM_FN_START, CK_COM_FN_END, CK_COM_GROUP, 1, 2); |
126 |
0 |
for (Iterator iter = lis.iterator(); iter.hasNext();) { |
127 |
0 |
FoodGroup g = (FoodGroup) iter.next(); |
128 |
0 |
System.out.println(g); |
129 |
0 |
} |
130 |
0 |
for (Iterator iter = lis2.iterator(); iter.hasNext();) { |
131 |
0 |
FoodGroup g = (FoodGroup) iter.next(); |
132 |
0 |
System.out.println(g); |
133 |
0 |
} |
134 |
0 |
} |
135 |
|
|
136 |
|
} |