Jsoup抓取数据实现为一个网站做第三方Android客户端
一前言
学Android有很大一部分的乐趣其实就是做出一个网站客户端,比官方广告少,速度快,不臃肿,受到众多网友追捧。
由于学校图书馆网站没有APP,网站也没有移动版的,所以说体验相当差,用的实在是太烦就做了这个小应用,先下看下效果。
二Jsoup实现抓取书名
Jsoup是一个Java的一个工具包,百度一搜一大堆,不对Jsoup做过多介绍,先来看看对http://222.188.3.137:8080/opac/search.php的抓取。
这里抓取到数据只要往适配器里填充数据就好了。
1 package com.wyf.newlibrary; 2 3 import org.jsoup.Jsoup; 4 import org.jsoup.nodes.Document; 5 import org.jsoup.nodes.Element; 6 import org.jsoup.select.Elements; 7 8 import java.io.IOException; 9 10 public class BookNameJsoup { 11 String url; 12 13 String[] bName; 14 String[] bLink; 15 String nextPage; 16 17 public BookNameJsoup(String link) { 18 19 url = link; 20 bName = new String[20]; 21 bLink = new String[20]; 22 23 } 24 25 public void init() { 26 try { 27 Document doc = Jsoup.connect(url).get(); 28 int j = 0; 29 Elements bookName = doc.getElementsByTag("h3").select("a"); 30 for (Element i : bookName) { 31 bName[j] = i.text().trim(); 32 bLink[j++] = i.attr("abs:href"); 33 } 34 35 Elements next=doc.getElementsByAttributeValue("class", "blue"); 36 for(Element i:next) 37 { 38 if(i.text().contains("下一页")) 39 { 40 nextPage=i.attr("abs:href"); 41 break; 42 } 43 44 } 45 46 } catch (IOException e) { 47 // TODO Auto-generated catch block 48 e.printStackTrace(); 49 } 50 } 51 52 /*************** 53 * 得到LINK 54 ********************/ 55 public String[] getBookName() { 56 return bName; 57 } 58 59 60 61 /******************* 62 * 得到LINK 63 ******************************/ 64 65 public String[] getLink() { 66 return bLink; 67 } 68 69 70 public String getNextPage() 71 { 72 return nextPage; 73 } 74 75 76 /*** 77 * 判断搜索结果是否为空 78 */ 79 80 81 }
三 MainActivity(书名列表界面)
在MainActivity中有一个ListView,在用Jsoup抓取到的数据往里面填充,ListView只用极其简单的布局,一看就能明白。
package com.wyf.newlibrary; import android.content.Intent; import android.os.AsyncTask; import android.os.Bundle; import android.support.v7.app.AppCompatActivity; import android.util.Log; import android.view.KeyEvent; import android.view.LayoutInflater; import android.view.View; import android.view.inputmethod.EditorInfo; import android.widget.AbsListView; import android.widget.AdapterView; import android.widget.ArrayAdapter; import android.widget.EditText; import android.widget.ImageButton; import android.widget.ListView; import android.widget.ProgressBar; import android.widget.TextView; import android.widget.Toast; import com.umeng.analytics.MobclickAgent; import com.umeng.update.UmengUpdateAgent; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; import java.net.URLEncoder; import java.util.ArrayList; public class MainActivity extends AppCompatActivity { EditText edit_search; String url="我去",key; ListView list_bookname; ArrayList<String> bookName; //得到的书名 ArrayList<String> bookLink; //书名链接 ArrayAdapter adapter; GetBookName get; View foot,complete,fail; ProgressBar progressBar; boolean firstLoad=false; ImageButton ibtn_clear; @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); getSupportActionBar().hide(); setContentView(R.layout.activity_main); init(); initEvent(); list_bookname.setAdapter(adapter); } private void initEvent() { edit_search.setOnEditorActionListener(new TextView.OnEditorActionListener() { @Override public boolean onEditorAction(TextView v, int actionId, KeyEvent event) { if(actionId== EditorInfo.IME_ACTION_SEARCH) { url=edit_search.getText().toString().trim(); try { url= URLEncoder.encode(url,"UTF-8"); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } bookName.clear(); bookLink.clear(); boolean loadNothing=true; if(url!=null) { firstLoad=true; new GetBookName(url).execute(); } else{ Toast.makeText(MainActivity.this, "不能为空", Toast.LENGTH_SHORT).show(); } } return false; } }); list_bookname.setOnScrollListener(new AbsListView.OnScrollListener() { //AbsListView view 这个view对象就是listview int lastItem; @Override public void onScrollStateChanged(AbsListView view, int scrollState) { if (scrollState == AbsListView.OnScrollListener.SCROLL_STATE_IDLE) { if (view.getLastVisiblePosition() == view.getCount() - 1) { if(url!=null) { list_bookname.addFooterView(foot); new GetBookName().execute(); } else if(bookName.isEmpty()){ list_bookname.addFooterView(fail); } else { list_bookname.addFooterView(complete); } } } } @Override public void onScroll(AbsListView view, int firstVisibleItem, int visibleItemCount, int totalItemCount) { lastItem = firstVisibleItem + visibleItemCount - 1 ; } }); /*************************************ListView每个Item设置监听,转到这本书具体信息的Activity*************************************************/ list_bookname.setOnItemClickListener(new AdapterView.OnItemClickListener() { @Override public void onItemClick(AdapterView<?> parent, View view, int position, long id) { Intent intent=new Intent(MainActivity.this,BookInfoActivity.class); intent.putExtra("href",bookLink.get(position)); startActivity(intent); } }); } private void init() { // ibtn_clear= (ImageButton) findViewById(R.id.ibtn_clear); edit_search= (EditText) findViewById(R.id.edit_search); list_bookname= (ListView)findViewById(R.id.list_bookname); bookName=new ArrayList<String>(); bookLink=new ArrayList<String>(); adapter=new ArrayAdapter(MainActivity.this,android.R.layout.simple_list_item_1,bookName); foot= LayoutInflater.from(MainActivity.this).inflate(R.layout.layout_foot,null); fail= LayoutInflater.from(MainActivity.this).inflate(R.layout.layout_fail,null); complete= LayoutInflater.from(MainActivity.this).inflate(R.layout.layout_complete,null); progressBar= (ProgressBar) findViewById(R.id.progressBar); UmengUpdateAgent.update(this); } /*** * 获取图书馆书名数据 */ class GetBookName extends AsyncTask { BookNameJsoup jsoup; URL href; public GetBookName(String keyword) { super(); url="http://222.188.3.137:8080/opac/openlink.php?strSearchType=title&match_flag=forward&historyCount=1&strText="+keyword+ "&doctype=ALL&with_ebook=on&displaypg=20&showmode=list&sort=CATA_DATE&orderby=desc&dept=ALL&page=1"; try { href=new URL(url); url=href.toString(); } catch (MalformedURLException e) { e.printStackTrace(); } } public GetBookName() { } @Override protected void onPreExecute() { super.onPreExecute(); Log.d("TAG", "onPreExecute: "+url); if(firstLoad) { progressBar.setVisibility(View.VISIBLE); firstLoad=false; } } @Override protected Object doInBackground(Object[] params) { jsoup=new BookNameJsoup(url); jsoup.init(); return null; } @Override protected void onPostExecute(Object o) { super.onPostExecute(o); progressBar.setVisibility(View.GONE); String[] book = jsoup.getBookName(); String[] link = jsoup.getLink(); for (int i = 0; i < 20; i++) { if (book[i] != null) { bookName.add(book[i]); Log.d("TAG", "onPostExecute: " + book[i]); } else { break; } if (link != null) { bookLink.add(link[i]); } } if(bookName.isEmpty()) { list_bookname.addFooterView(fail); }else { list_bookname.removeFooterView(fail); } url = jsoup.getNextPage(); list_bookname.removeFooterView(foot); adapter.notifyDataSetChanged(); } } }
四 书的详细信息
这里展现的是书的信息。如图
package com.wyf.newlibrary; import android.os.AsyncTask; import android.os.Bundle; import android.support.v7.app.AppCompatActivity; import android.util.Log; import android.view.MenuItem; import android.view.View; import android.widget.ImageView; import android.widget.ScrollView; import android.widget.TextView; import com.android.volley.RequestQueue; import com.android.volley.Response; import com.android.volley.VolleyError; import com.android.volley.toolbox.StringRequest; import com.android.volley.toolbox.Volley; import com.umeng.analytics.MobclickAgent; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.UnsupportedEncodingException; public class BookInfoActivity extends AppCompatActivity { TextView text_bookName,text_douban,text_position; ImageView image_logo; String url,logoUrl; ScrollView scrollView; @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.activity_book_info); getSupportActionBar().setDisplayHomeAsUpEnabled(true); setTitle("图书信息"); url=getIntent().getStringExtra("href"); init(); RequestQueue queue= Volley.newRequestQueue(BookInfoActivity.this); StringRequest stringRequest=new StringRequest(url, new Response.Listener<String>() { @Override public void onResponse(String s) { try { s=new String(s.getBytes("ISO-8859-1"),"utf-8"); // Toast.makeText(BookInfoActivity.this, s, Toast.LENGTH_SHORT).show(); new GetDetail(s).execute(); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } } }, new Response.ErrorListener() { @Override public void onErrorResponse(VolleyError volleyError) { Log.d("TAG", "onErrorResponse: "+"i fail"); } }); queue.add(stringRequest); } private void init() { text_bookName= (TextView) findViewById(R.id.text_bookname); text_douban= (TextView) findViewById(R.id.text_douban); text_position= (TextView) findViewById(R.id.text_position); scrollView= (ScrollView) findViewById(R.id.scroll_position); } @Override public boolean onOptionsItemSelected(MenuItem item) { if(item.getItemId()==android.R.id.home) { finish(); return true; } return super.onOptionsItemSelected(item); } /*****************异步请求拿到数据**********************/ class GetDetail extends AsyncTask{ String response,name,douBan="110",position="图书位置:\n"; public GetDetail(String response) { super(); this.response=response; } @Override protected Object doInBackground(Object[] params) { Document doc= Jsoup.parse(response); name=doc.getElementsByAttributeValue("class","booklist").first().text(); name=name.substring(name.indexOf(":")+1); // douBan=doc.getElementsByAttributeValue("id","douban_content").select("p").text(); Elements a=doc.select("dl.booklist"); String temp; for(Element i:a) { System.out.println(i.lastElementSibling().text()); if(i.text().contains("提要文摘附注")&&i.text().length()>8) { temp=i.text(); douBan=temp; } } //douBan=doc.select("intro").text(); logoUrl=doc.select("img#book_img").attr("src"); Elements posi=doc.getElementsByAttributeValue("align","left"); posi=posi.select("tr.whitetext"); for(Element i:posi) { if(i.text()!=null) { //temp=i.text(); // temp=temp.substring(0,temp.indexOf(' '))+temp.substring(temp.indexOf("-")); //Log.d("TAG", "doInBackground: "+temp); position+=i.text()+"\n"; } } return null; } @Override protected void onPostExecute(Object o) { super.onPostExecute(o); text_bookName.setText(name); text_douban.setText(douBan); text_position.setText(position); if(douBan.equals("110")) { text_douban.setVisibility(View.GONE); } scrollView.setVisibility(View.VISIBLE); } } }
五 总结
这个Demo其实并不难,很容易理解,但要对Jsoup和异步请求有所了解。你也可以做出自己第三方客户端。
附上源码http://pan.baidu.com/s/1c1B1VIo