第一阶段冲刺(四)
日期:2021.05.06
作者:杨传伟
完成任务:学习爬取后台json数据,re正则匹配字符串,xpath解析,requests请求json数据。爬取爱奇艺电影片库5000+条电影数据(电影名字、播放链接、评分、播放权限)并存到数据库。
爬虫源码:
1 import time 2 import traceback 3 import requests 4 from lxml import etree 5 import re 6 from bs4 import BeautifulSoup 7 from lxml.html.diff import end_tag 8 import json 9 import pymysql 10 #连接数据库 获取游标 11 def get_conn(): 12 """ 13 :return: 连接,游标 14 """ 15 # 创建连接 16 conn = pymysql.connect(host="82.157.112.34", 17 user="root", 18 password="root", 19 db="MovieRankings", 20 charset="utf8") 21 # 创建游标 22 cursor = conn.cursor() # 执行完毕返回的结果集默认以元组显示 23 if ((conn != None) & (cursor != None)): 24 print("数据库连接成功!游标创建成功!") 25 else: 26 print("数据库连接失败!") 27 return conn, cursor 28 #关闭数据库连接和游标 29 def close_conn(conn, cursor): 30 if cursor: 31 cursor.close() 32 if conn: 33 conn.close() 34 return 1 35 def get_iqy(): 36 # 获取数据库总数据条数 37 conn, cursor = get_conn() 38 sql = "select count(*) from movieiqy" 39 cursor.execute(sql) # 执行sql语句 40 conn.commit() # 提交事务 41 all_num = cursor.fetchall()[0][0] #cursor 返回值的类型是一个元祖的嵌套形式 比如( ( ) ,) 42 pagenum=int(all_num/48)+1 #这里是计算一个下面循环的起始值 每48个电影分一组 43 # print(pagenum) 44 print("movieiqy数据库有", all_num, "条数据!") 45 46 url = "https://pcw-api.iqiyi.com/search/recommend/list?channel_id=1&data_type=1&mode=11&page_id=1&ret_num=48&session=ee4d98ebb4e8e44c8d4b14fa90615fb7" 47 headers = { 48 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36" 49 } 50 # response=requests.get(url=url,headers=headers) 51 # response.encoding="utf-8" 52 # page_text=response.text 53 # print(page_text) 54 """ 55 """ 56 # 57 temp_list = [] #暂时存放单部电影的数据 58 dataRes = [] #每次循环把单部电影数据放到这个list 59 for i in range(1, 137): #循环1-136 第137 json 是空的 也就是全部爬完 60 url = "https://pcw-api.iqiyi.com/search/recommend/list?channel_id=1&data_type=1&mode=11&page_id=1&ret_num=48&session=ee4d98ebb4e8e44c8d4b14fa90615fb7" 61 url_0 = "https://pcw-api.iqiyi.com/search/recommend/list?channel_id=1&data_type=1&mode=11&page_id=" 62 url_0 = url_0 + str(i) + "&ret_num=48&session=ad1d98bb953b7e5852ff097c088d66f2" 63 print(url_0) #输出拼接好的url 64 response = requests.get(url=url_0, headers=headers) 65 response.encoding = "utf-8" 66 try: 67 page_text = response.text 68 #解析json对象 69 json_obj = json.loads(page_text) 70 #这里的异常捕获是因为 测试循环的次数有可能超过电影网站提供的电影数 为了防止后续爬到空的json对象报错 71 json_list = json_obj['data']['list'] 72 except: 73 print("捕获异常!") 74 return dataRes #json为空 程序结束 75 for j in json_list: # 开始循环遍历json串 76 # print(json_list) 77 name = j['name'] #找到电影名 78 print(name) 79 temp_list.append(name) 80 #异常捕获,防止出现电影没有评分的现象 81 try: 82 score = j['score'] #找到电影评分 83 print(score) 84 temp_list.append(score) 85 except KeyError: 86 print( "评分---KeyError") 87 temp_list.append("iqy暂无评分") #替换字符串 88 89 link = j['playUrl'] #找到电影链接 90 temp_list.append(link) 91 # 解析播放状态 92 """ 93 独播:https://www.iqiyipic.com/common/fix/site-v4/video-mark/only.png 94 VIP:https://pic0.iqiyipic.com/common/20171106/ac/1b/vip_100000_v_601_0_21.png 95 星钻:https://www.iqiyipic.com/common/fix/site-v4/video-mark/star-movie.png 96 """ 97 state = [] 98 pay_text = j['payMarkUrl'] #因为播放状态只有在一个图片链接里有 所以需要使用re解析出类似vip和only(独播)的字样 99 print(pay_text) 100 if (len(pay_text) == 0): #如果没有这个图片链接 说明电影是免费播放 101 state="免费" 102 else: 103 find_state = re.compile("(.*?).png") 104 state = re.findall(find_state, pay_text) #正则匹配链接找到vip 105 # print(state[0]) 106 107 if(len(state)!=0): #只有当链接不为空再执行 108 # print(state) 109 # 再次解析 110 part_state=str(state[0]) 111 part_state=part_state.split('/') 112 print(part_state[-1]) 113 state = part_state[-1][0:3] #字符串分片 114 # 这里只输出了三个字符,如果是独播,页面显示的是only,我们设置为”独播“ 115 if (state == "onl"): 116 state = "独播" 117 if (state == "sta"): 118 state = "星钻" 119 if(state == "vip"): 120 state="VIP" 121 print(state) 122 # 添加播放状态 123 # print(state) 124 temp_list.append(state) 125 dataRes.append(temp_list) 126 # print(temp_list) 127 temp_list = [] 128 129 print('___________________________') 130 return dataRes 131 132 def insert_iqy(): 133 cursor = None 134 conn = None 135 try: 136 count=0 137 list = get_iqy() 138 print(f"{time.asctime()}开始插入爱奇艺电影数据") 139 conn, cursor = get_conn() 140 sql = "insert into movieiqy (id,name,score,path,state) values(%s,%s,%s,%s,%s)" 141 for item in list: 142 print(item) 143 count = count + 1 144 if (count % 48 == 0): 145 print('___________________________') 146 #异常捕获,防止数据库主键冲突 147 try: 148 cursor.execute(sql, [0, item[0], item[1], item[2], item[3] ]) 149 except pymysql.err.IntegrityError: 150 print("重复!跳过!") 151 152 conn.commit() # 提交事务 update delete insert操作 153 print(f"{time.asctime()}插入爱奇艺电影数据完毕") 154 except: 155 traceback.print_exc() 156 finally: 157 close_conn(conn, cursor) 158 return; 159 160 if __name__ == '__main__': 161 # get_iqy() 162 insert_iqy()
截图示例:
5.6 李楠
今日实现了我的页面的,想看,在看,看过的页面内容的显示,原先想使用Fragment的嵌套,
但是没有成功,于是就给每个radiobutton绑定了一个事件,监听radiogroup是否发生改变,然后返回对应的数据,
注意不能使用activity要在fragment中编写,然后返回视图:
Fragment部分
1 package com.example.cloudlibrary.Fragment; 2 3 import androidx.annotation.NonNull; 4 import androidx.annotation.Nullable; 5 import androidx.fragment.app.Fragment; 6 import androidx.fragment.app.FragmentActivity; 7 import androidx.viewpager.widget.ViewPager; 8 9 import android.os.Bundle; 10 import android.view.LayoutInflater; 11 import android.view.View; 12 import android.view.ViewGroup; 13 import android.widget.ListView; 14 import android.widget.RadioButton; 15 import android.widget.RadioGroup; 16 17 import com.example.cloudlibrary.Adapter.MyPagerListAdapter; 18 import com.example.cloudlibrary.Data.ListData; 19 import com.example.cloudlibrary.R; 20 21 import java.util.ArrayList; 22 import java.util.List; 23 24 public class MyPageFragment extends Fragment implements RadioGroup.OnCheckedChangeListener{ 25 private RadioGroup my_pager_group; 26 private RadioButton want_look; 27 private RadioButton now_look; 28 private RadioButton have_look; 29 private List<ListData> list_data=new ArrayList<>(); 30 private ListView mypager_like_list; 31 private ListData listData; 32 private View view; 33 private MyPagerListAdapter myPagerListAdapter; 34 35 36 public static final int PAGE_ONE = 0; 37 public static final int PAGE_TWO = 1; 38 public static final int PAGE_THREE = 2; 39 public MyPageFragment(){ 40 } 41 @Override 42 public View onCreateView(@NonNull LayoutInflater inflater, @Nullable ViewGroup container, @Nullable Bundle savedInstanceState) { 43 view = inflater.inflate(R.layout.activity_my_page, container, false); 44 my_pager_group = (RadioGroup) view.findViewById(R.id.my_pager_group); 45 want_look = (RadioButton) view.findViewById(R.id.want_look); 46 now_look = (RadioButton) view.findViewById(R.id.now_look); 47 have_look = (RadioButton) view.findViewById(R.id.have_look); 48 my_pager_group.setOnCheckedChangeListener(this); 49 RadioButton[] rbs = new RadioButton[3]; 50 rbs[0] =want_look; 51 rbs[1] = now_look; 52 rbs[2] = have_look; 53 return view; 54 } 55 56 @Override 57 public void onCheckedChanged(RadioGroup group, int checkedId) { 58 switch (checkedId) { 59 case R.id.want_look: 60 list_data=new ArrayList<>(); 61 listData=new ListData("head1","吹响吧,上低音号!"); 62 list_data.add(listData); 63 //list_list_Data= dataDao.queryData("",query_stuid.getText().toString()); 64 mypager_like_list=(ListView)view.findViewById(R.id.mypager_like_list); 65 myPagerListAdapter=new MyPagerListAdapter(getContext(),list_data); 66 mypager_like_list.setAdapter(myPagerListAdapter); 67 break; 68 case R.id.now_look: 69 list_data=new ArrayList<>(); 70 listData=new ListData("head2","AIR"); 71 list_data.add(listData); 72 //list_list_Data= dataDao.queryData("",query_stuid.getText().toString()); 73 mypager_like_list=(ListView)view.findViewById(R.id.mypager_like_list); 74 myPagerListAdapter=new MyPagerListAdapter(getContext(),list_data); 75 mypager_like_list.setAdapter(myPagerListAdapter); 76 break; 77 case R.id.have_look: 78 list_data=new ArrayList<>(); 79 listData=new ListData("head3","百变小樱"); 80 list_data.add(listData); 81 //list_list_Data= dataDao.queryData("",query_stuid.getText().toString()); 82 mypager_like_list=(ListView)view.findViewById(R.id.mypager_like_list); 83 myPagerListAdapter=new MyPagerListAdapter(getContext(),list_data); 84 mypager_like_list.setAdapter(myPagerListAdapter); 85 break; 86 } 87 } 88 }
listview adapter部分:
1 package com.example.cloudlibrary.Adapter; 2 3 import android.content.Context; 4 import android.view.LayoutInflater; 5 import android.view.View; 6 import android.view.ViewGroup; 7 import android.widget.BaseAdapter; 8 import android.widget.ImageView; 9 import android.widget.ListAdapter; 10 import android.widget.TextView; 11 12 import com.example.cloudlibrary.Data.ListData; 13 import com.example.cloudlibrary.R; 14 15 import org.w3c.dom.Text; 16 17 import java.util.ArrayList; 18 import java.util.List; 19 20 public class MyPagerListAdapter extends BaseAdapter { 21 private List<ListData> list_data=new ArrayList<>(); 22 private Context context; 23 public MyPagerListAdapter(Context context, List<ListData> list_data){ 24 this.context=context; 25 this.list_data=list_data; 26 } 27 @Override 28 public int getCount() { 29 return list_data.size(); 30 } 31 32 @Override 33 public Object getItem(int position) { 34 return null; 35 } 36 37 @Override 38 public long getItemId(int position) { 39 return 0; 40 } 41 42 @Override 43 public View getView(int position, View convertView, ViewGroup parent) { 44 if(convertView==null) 45 { 46 convertView= LayoutInflater.from(context).inflate(R.layout.mypager_like_list,null); 47 } 48 ImageView picture_list=(ImageView)convertView.findViewById(R.id.picture_list); 49 TextView name_list=(TextView)convertView.findViewById(R.id.name_list); 50 ListData listData=list_data.get(position); 51 name_list.setText(listData.getName()); 52 switch (listData.getImg()){ 53 case "head1": 54 picture_list.setImageResource(R.mipmap.head1); 55 break; 56 case "head2": 57 picture_list.setImageResource(R.mipmap.head2); 58 break; 59 case "head3": 60 picture_list.setImageResource(R.mipmap.head3); 61 break; 62 } 63 return convertView; 64 } 65 }
xml部分:
1 <?xml version="1.0" encoding="utf-8"?> 2 <LinearLayout xmlns:android="http://schemas.android.com/apk/res/android" 3 xmlns:app="http://schemas.android.com/apk/res-auto" 4 xmlns:tools="http://schemas.android.com/tools" 5 android:layout_width="match_parent" 6 android:layout_height="match_parent" 7 android:orientation="vertical"> 8 9 <LinearLayout 10 android:layout_width="match_parent" 11 android:layout_height="80dp" 12 android:orientation="horizontal"> 13 14 <ImageView 15 android:id="@+id/first_head_picture" 16 android:layout_width="60dp" 17 android:layout_height="60dp" 18 android:layout_marginTop="10dp" 19 android:layout_marginLeft="10dp" 20 android:src="@mipmap/headpictrue"></ImageView> 21 22 <View 23 android:layout_width="2px" 24 android:layout_height="45dp" 25 android:background="@color/login_line_color" 26 android:layout_marginTop="20dp" 27 android:layout_marginLeft="20dp"/> 28 29 <LinearLayout 30 android:layout_width="wrap_content" 31 android:layout_height="wrap_content" 32 android:orientation="vertical"> 33 34 <TextView 35 android:id="@+id/my_name" 36 android:layout_width="wrap_content" 37 android:layout_height="wrap_content" 38 android:layout_marginTop="10dp" 39 android:textSize="20dp" 40 android:layout_marginLeft="30dp" 41 android:textColor="#EEAAFF" 42 android:text="风吹过半夏"></TextView> 43 44 45 <TextView 46 android:id="@+id/my_phone" 47 android:layout_width="wrap_content" 48 android:layout_height="wrap_content" 49 android:layout_marginTop="10dp" 50 android:textSize="20dp" 51 android:layout_marginLeft="30dp" 52 android:textColor="#EEAAFF" 53 android:text="157****5171"></TextView> 54 55 </LinearLayout> 56 57 </LinearLayout> 58 59 <LinearLayout 60 android:layout_width="match_parent" 61 android:layout_height="match_parent" 62 android:orientation="vertical"> 63 64 <LinearLayout 65 android:layout_width="match_parent" 66 android:layout_height="wrap_content" 67 android:orientation="horizontal"> 68 69 <TextView 70 android:layout_width="wrap_content" 71 android:layout_height="wrap_content" 72 android:text="我的收藏" 73 android:textSize="20dp"></TextView> 74 75 <RadioGroup 76 android:id="@+id/my_pager_group" 77 android:layout_width="match_parent" 78 android:layout_height="30dp" 79 android:layout_alignParentBottom="true" 80 android:background="#ffffff" 81 android:orientation="horizontal"> 82 83 <RadioButton android:id="@+id/want_look" 84 android:layout_width="wrap_content" 85 android:layout_height="wrap_content" 86 style="@style/tab_menu_item" 87 android:text="想看"></RadioButton> 88 89 <RadioButton android:id="@+id/now_look" 90 android:layout_width="wrap_content" 91 android:layout_height="wrap_content" 92 style="@style/tab_menu_item" 93 android:text="在看"></RadioButton> 94 95 <RadioButton android:id="@+id/have_look" 96 android:layout_width="wrap_content" 97 android:layout_height="wrap_content" 98 style="@style/tab_menu_item" 99 android:text="看过"></RadioButton> 100 101 </RadioGroup> 102 103 </LinearLayout> 104 105 <View 106 android:id="@+id/div_tab_bar" 107 android:layout_width="match_parent" 108 android:layout_height="2px" 109 android:layout_above="@id/main_group" 110 android:background="#DFDBDB" /> 111 112 <ListView android:id="@+id/mypager_like_list" 113 android:layout_width="match_parent" 114 android:layout_height="wrap_content"> 115 </ListView> 116 117 </LinearLayout> 118 119 120 </LinearLayout>
5.6 章英杰
任务进度:完成了根据电影分类进行多条件筛选功能。可根据类型、年份和地区进行多条件筛选。
产品页面:
电影分类部分主要代码:
1 <!--电影分类--> 2 <div id="classfiy"> 3 <aside> 4 <i>类型:</i> 5 <div> 6 <span>全部</span> 7 <span>喜剧</span> 8 <span>动作</span> 9 <span>爱情</span> 10 <span>惊悚</span> 11 <span>犯罪</span> 12 <span>悬疑</span> 13 <span>战争</span> 14 <span>科幻</span> 15 <span>动画</span> 16 <span>恐怖</span> 17 <span>家庭</span> 18 <span>传记</span> 19 <span>冒险</span> 20 <span>奇幻</span> 21 <span>武侠</span> 22 <span>历史</span> 23 </div> 24 </aside> 25 <aside> 26 <i>年份:</i> 27 <div> 28 <span>全部</span> 29 <span>2021</span> 30 <span>2020</span> 31 <span>2019</span> 32 <span>2018</span> 33 <span>2017</span> 34 <span>2016</span> 35 <span>2015</span> 36 <span>2011-2014</span> 37 <span>2006-2010</span> 38 <span>2000-2005</span> 39 <span>90年代</span> 40 <span>80年代</span> 41 <span>其他</span> 42 </div> 43 </aside> 44 <aside> 45 <i>地区:</i> 46 <div> 47 <span>全部</span> 48 <span>内地</span> 49 <span>香港</span> 50 <span>美国</span> 51 <span>欧洲</span> 52 <span>台湾</span> 53 <span>日本</span> 54 <span>韩国</span> 55 <span>印度</span> 56 <span>泰国</span> 57 <span>英国</span> 58 <span>法国</span> 59 <span>德国</span> 60 <span>加拿大</span> 61 <span>西班牙</span> 62 <span>意大利</span> 63 <span>其他</span> 64 </div> 65 </aside> 66 <div class="last">已选择: 67 <div id="yi"></div> 68 </div> 69 </div> 70 <script> 71 var oDivLength = []; 72 var div = document.getElementsByTagName('div'); 73 var divSpan = document.getElementsByTagName('span'); 74 //判断有几个列表 75 for (var i = 0; i < div.length; i++) { 76 div[i].index = i; 77 } 78 for (var i = 0; i < divSpan.length; i++) { 79 divSpan[i].onclick = function() { 80 oDivLength[this.parentElement.index] = this.innerText; 81 var oChild = this.parentElement.children; 82 for (var j = 0; j < oChild.length; j++) { 83 oChild[j].className = ''; 84 } 85 this.className = 'mystyle'; //已选中的当前列的当前元素添加样式 86 document.getElementById('yi').innerHTML = ''; 87 for (var m = 0; m < oDivLength.length; m++) { //放到已选择里面 88 if (oDivLength[m] == '' || oDivLength[m] !== undefined) { 89 var para = document.createElement("span"); 90 var node = document.createTextNode(oDivLength[m]); 91 para.appendChild(node); 92 document.getElementById('yi').appendChild(para); 93 } 94 } 95 } 96 } 97 </script> 98 <!--电影分类模块结束-->