欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

[原创] Google Custom Search & Yahoo Boss Search | Web Search API 使用

程序员文章站 2022-05-21 09:29:17
...
引用本文时,请标明本文地址

    话外音:前几天上网找资料时,发现自己写的文章被人转载了,心中暗喜。
可是我去发现,该转载者并未标明我的文章原文出处,不标明这个也罢了,
可是转载都竟然连标题都不改(我的文章标题已经写了【原创】二字)。
哎,哪怕你把"【原创】"两字给删了再转载也行啊!
    嘿嘿!话外音就说到这吧!开始正文。

    Google和Yahoo的新Web Search API已经升级了一段时间了,最近正好
有项目要做相关的API升级,因此就对新的API进行了调查,并写了perl和java
的例子。现将java例子的主要代码与大家分享下。

API简介:
Google升级后的Web Search叫"Google Custom Search"(简称CSE),
Yahoo升级后的Web Search叫"Yahoo! Search BOSS",

二者都采用REST的方式进行调用,并且都支持JSON格式的返回结果。
以下例子,就是对Google CSE的基本使用加以说明,并且处理JSON形式的返回结果。

由于两者都是RESTful的,因此Yahoo的例子这里就不贴出来了
(因为可以很方便的根据下面的例子,改成Yahoo的),大家可能参考下
Google的例子,自己改写成支持"Yahoo! Search BOSS"。

本例子使用了httpclient4 + google cse api + json
httpclient4的使用入门,大家可以参考我写的如下文章:
http://yhz61010.iteye.com/blog/654678

本类功能说明:
1. 按指定关键字进行结果检索。
2. 查找指定的关键字或URL在Google上的排名。

以下是主类的详细source:
/**
 * Google Custom Search Engine
 * 
 * @author Michael Leo
 * @version 1.0 2011/01/24
 */
public class GoogleCSE {
	protected static final Class<GoogleCSE> clazz = GoogleCSE.class;

	private String cseKey = "Your custom search id";

	private String apiConsoleKey = "Your console api id";

	private String keyword = "Google";

	private String language;

	private int num = 10;

	private int start = 1;

	private String uri;

	public List<RankBean> getRank(String[] targetWords, String[] targetUrls,
			Map<String, Object> result) {
		if (targetWords == null && targetUrls == null) {
			Log.log(LogLevel.DEBUG, clazz,
					"Both of target words and urls are null.");
			return null;
		}
		ResponseBean res = (ResponseBean) result.get("response");
		int startIndex = res.getQueries().getRequest().get(0).getStartIndex();

		List<RankBean> rank = new ArrayList<RankBean>();

		@SuppressWarnings("unchecked")
		List<ItemsBean> list = (List<ItemsBean>) result.get("items");
		String link = null;
		String title = null;
		String snippet = null;
		for (int i = 0; i < list.size(); i++) {
			link = list.get(i).getLink();
			title = list.get(i).getTitle();
			snippet = list.get(i).getSnippet();
			for (int j = 0; targetWords != null && j < targetWords.length; j++) {
				if (title.indexOf(targetWords[j]) > -1
						|| snippet.indexOf(targetWords[j]) > -1) {
					RankBean ranking = new RankBean();
					ranking.setRank(startIndex + i);
					try {
						BeanUtils.copyProperties(ranking, list.get(i));
					} catch (Exception e) {
						Log.log(LogLevel.DEBUG, clazz,
								"Can't copy properties: targetWords");
					}
					rank.add(ranking);
				}
			}
			for (int k = 0; targetUrls != null && k < targetUrls.length; k++) {
				if (link.indexOf(targetUrls[k]) > -1) {
					RankBean ranking = new RankBean();
					ranking.setRank(startIndex + i);
					try {
						BeanUtils.copyProperties(ranking, list.get(i));
					} catch (Exception e) {
						Log.log(LogLevel.DEBUG, clazz,
								"Can't copy properties: targetUrls");
					}
					rank.add(ranking);
				}
			}
		}

		return RemoveDuplication.removeDuplication(rank, "link");
	}

	public NextPageBean nextPageInfo(Map<String, Object> result) {
		ResponseBean res = (ResponseBean) result.get("response");
		if (res.getQueries().getNextPage() == null) {
			return null;
		}
		return res.getQueries().getNextPage().get(0);
	}

	public Map<String, Object> execute() throws Exception {
		Map<String, Object> result = new HashMap<String, Object>();

		DefaultHttpClient httpclient = new DefaultHttpClient();

		List<NameValuePair> params = new ArrayList<NameValuePair>();
		params.add(new BasicNameValuePair("alt", "json"));
		params.add(new BasicNameValuePair("cx", cseKey));
		params.add(new BasicNameValuePair("key", apiConsoleKey));
		params.add(new BasicNameValuePair("q", keyword));
		if (StringUtils.isNotBlank(language)) {
			params.add(new BasicNameValuePair("lr", language));
		}
		params.add(new BasicNameValuePair("num", String.valueOf(num)));
		params.add(new BasicNameValuePair("start", String.valueOf(start)));
		URI uri = URIUtils.createURI("https", "www.googleapis.com", -1,
				"/customsearch/v1", URLEncodedUtils.format(params, "UTF-8"),
				null);

		HttpGet httpget = new HttpGet(uri);
		this.uri = httpget.getURI().toString();
		Log.log(LogLevel.DEBUG, clazz, this.uri);

		HttpResponse response = httpclient.execute(httpget);
		HttpEntity entity = response.getEntity();

		if (entity != null) {
			entity = new BufferedHttpEntity(entity);
		} else {
			Log.log(LogLevel.DEBUG, clazz, "Entity is null.");
			return null;
		}

		String strResponse = EntityUtils.toString(entity, HTTP.UTF_8);
		JSONObject json = JsonUtils.object2Json(strResponse);

		Map<String, Class<?>> classMap = new HashMap<String, Class<?>>();
		classMap.put("bodyLines", BodyLinesBean.class);
		classMap.put("context", ContextBean.class);
		classMap.put("items", ItemsBean.class);
		classMap.put("nextPage", NextPageBean.class);
		classMap.put("previousPage", PreviousPageBean.class);
		classMap.put("promotions", PromotionsBean.class);
		classMap.put("queries", QueriesBean.class);
		classMap.put("request", RequestBean.class);
		classMap.put("url", UrlBean.class);
		classMap.put("pagemap", PageMapBean.class);
		classMap.put("metatags", MetatagsBean.class);
		classMap.put("person", PersonBean.class);
		classMap.put("hcard", HcardBean.class);
		classMap.put("Movie", MovieBean.class);
		classMap.put("moviereview", MovieReviewBean.class);
		classMap.put("error", ErrorBean.class);
		classMap.put("errors", ErrorsBean.class);
		ResponseBean res = JsonUtils.json2Object(json, ResponseBean.class,
				classMap);

		if (res.getError() != null) {
			result.put("error", res.getError());
		} else {
			result.put("totalResults", res.getQueries().getRequest().get(0)
					.getTotalResults());
			result.put("count", res.getQueries().getRequest().get(0).getCount());
			result.put("startIndex", res.getQueries().getRequest().get(0)
					.getStartIndex());
			result.put("items", res.getItems());
			result.put("response", res);
		}

		return result;
	}
}


以下是Junit的测试类:

/**
 * Google CSE Test
 * 
 * @author Michael Leo
 * @version 2011/01/25
 */
public class GoogleCSETest {
	@Test
	public void case01() throws Exception {
		P.p("Google CSE - Start.");
		P.p();
		long ast = System.currentTimeMillis();
		long aed = 0;

		long st = 0;
		long ed = 0;

		GoogleCSE cse = new GoogleCSE();		
		cse.setApiConsoleKey("Your console api key");

		cse.setCseKey("Your cse key");
		cse.setKeyword("Google");
		cse.setLanguage("lang_zh-CN");
		cse.setNum(10);

		NextPageBean np = null;
		@SuppressWarnings("unused")
		int index = 1;
		ErrorBean err = null;
		do {
			st = System.currentTimeMillis();
			Map<String, Object> result = cse.execute();
			if ((err = (ErrorBean) result.get("error")) != null) {
				P.p("Error code: " + err.getCode());
				P.p("Message: " + err.getMessage());
				return;
			}
			np = cse.nextPageInfo(result);
			if (np != null) {
				cse.setStart(np.getStartIndex());
			}

			int startIndex = ((ResponseBean) result.get("response"))
					.getQueries().getRequest().get(0).getStartIndex();
			P.p("Start index: " + startIndex);
			P.p("Query url:\n" + cse.getUri());
			P.p("totalResults: "
					+ MiscellaneousUtils.formatNumber(result
							.get("totalResults")));

			// @SuppressWarnings("unchecked")
			// List<ItemsBean> list = (List<ItemsBean>) result.get("items");
			//
			// for (int i = 0; i < list.size(); i++) {
			// P.p(index++ + ": " + list.get(i).getTitle());
			// P.p(list.get(i).getSnippet());
			// P.p(list.get(i).getLink());
			// P.p();
			// }

			String[] targetWords = { "Google Chrome" };
			String[] targetUrls = { "google.com" };
			List<RankBean> ranking = cse.getRank(targetWords, targetUrls,
					result);

			for (int i = 0; i < ranking.size(); i++) {
				P.p();
				P.p("Rank: " + ranking.get(i).getRank());
				P.p(ranking.get(i).getTitle());
				P.p(ranking.get(i).getSnippet());
				P.p(ranking.get(i).getLink());
			}

			ed = System.currentTimeMillis();
			P.p("Cost: " + (ed - st) / 1000.0 + "s");
			P.p();
		} while (np != null);
		aed = System.currentTimeMillis();
		P.p("Google CSE - Finished.");
		P.p("Cost: " + (aed - ast) / 1000.0 + "s");
	}
}