生活: 爬蟲語言學習

11/16

爬蟲語言架構

安裝python3.6 pyzoIDE pip 安裝10個套件
numpy 矩陣運算
matplotlib視覺化圖片輸出
SciPy 資料科學以上三個補助運算
beautifulsoup4 爬蟲語言html/css 靜態語言<a><span><div.<td>等
Selenium動態語言html/表單Javascipt:Json,xml
Scrapy網頁框架Vue.js 以上三個爬蟲套件 DOM,Jquary Http:post get
Pandas圖表資料處理1.瀏覽2.查詢系統3.曾刪除修4.模糊查詢5.統計和分析6.決策
Seaborn/Bokeh視覺化輸出圖表和文字
mysql-connector-python 資料庫連線mysqlserver
Firebase Google雲端資料庫-大數據分析平台GA

---------------------------------------
在C設一資料夾python36安裝python3.6
資料夾Bigdataproject1放相關學習檔案\
*************************************************
python基礎
間隔輸出
print(value, ..., sep=' ', end='\n', file=sys.stdout)

objects -- 复数，表示可以一次输出多个对象。输出多个对象时，需要用 , 分隔。
sep -- 用来间隔多个对象，默认值是一个空格。
end -- 用来设定以什么结尾。默认值是换行符 \n，我们可以换成其他字符串。
file -- 要写入的文件对象。
flush -- 输出是否被缓存通常决定于 file，但如果 flush 关键字参数为 True，流会被强制刷新。

input()可以指定提示文字，使用者輸入的文字則以字串傳回（Python 2.7的輸入是使用raw_input()
+++++++++++++++++++++++++++
12/06
while
----------------
while True:
#ch=0
n=int(input("請輸入一個數值："))

total=1
i=1

while (i<=n):

total *=i
i=i+1
print(total)
ch=int(input("請選擇：(1)繼續(2)停止"))
if (ch==2):
break
---------------------------------------
list
基本概念
常用函數(一)
1.元素相加 sum(串列名稱)
1.自計算
a=[12,56,99,21,52]

len1=len(a)
total=0

for i in range(len1-1,0-1,-1):
total +=a[i]

print("total:",total)
----------------------------------------
2.a=[12,56,99,21,52]
total=sum(a)
print("total:",total)
2.最大值max()

3.最小值 min()
4.正排序 sorted()
a=[12,56,99,21,52]

len1=len(a)
c=sorted(a)
for i in range(0,len1):

print(c[i])
5.逆排序 sorted(,reverse=True)
a=[12,56,99,21,52]

len1=len(a)
c=sorted(a,reverse=True)
for i in range(0,len1):
print(c[i])

-----------------------
6.判斷指定元素是在串列中
if 元素 in 串列名稱：
[程式區塊]
a=[10,20,30,40,60]
qp=30
x=False
if qp in a:
x=True
if x==True:
print("存在")
else:

print("不存在")
------------------------------
常用函數(一)
1.新增
物件.append(值)
mylist1=[]

mylist1.append("java")
mylist1.append("javascript")
mylist1.append("delphi")
mylist1.append("c++")
mylist1.append("jquery")

for i in mylist1:

print(i)
2.新增(插入)
物件.insert(索引位置，值)
mylist1=[]

mylist1.append("java")
mylist1.append("javascript")
mylist1.append("delphi")
mylist1.append("c++")
mylist1.append("jquery")
mylist1.insert(2,"php")

for i in mylist1:

print(i)
3.數量
物件.count(值)
data=["java","javascript","delphi","c++","jquery"]
qp="java"

c=data.count(qp)

print("[{0}]在串列中出現[{1}]次".format(qp,c))
4.索引位置
物件.index(值)
5.刪除：
物件.remove(值)
------------------------------
11/13
data=["java","javascript","delphi","c++","jquery"]
qp="javascript"

data.remove(qp)
for i in data:

print(i)
______
lista=[[10,20],[30,40],[50,60]]

yl=len(lista)
xl=len(lista[0])
print("y軸長度：{0}".format(yl))
print("x軸長度：{0}".format(xl))

print(lista[0][0])
print(lista[0][1])
print(lista[1][0])
print(lista[1][1])
print(lista[2][0])
print(lista[2][1])

for i in range(0,yl):
for j in range(0,xl):

print("lista[{0}][{1}]={2}".format(i,j,lista[i][j]))
-------------------------------------------------------
tuple
tup1=(12,67,88,32,11)

len1=len(tup1)
print(len1)

for i in range(len1):

print("tup1({0})={1}".format(i,tup1[i]))
______________________________________
tup1=("java","javascript","delphi","c++","jquery")
list1=list(tup1)
list1.append("jsp")#先轉串列在加
len1=len(list1)

for i in range(0,len1):

print(list1[i])
#java
javascript
delphi
c++
jquery
jsp
_____________________

data=(("p1001","小明",60,70,80),
("p1002","小鈴",70,50,90),
("p1003","小童",23,60,80),
("p1004","小張",93,60,90),
("p1005","小金",70,56,86))

leny=len(data)
lenx=len(data[0])
p=0
k=False
while True:
ax=int(input("選單(1)總覽(2)查詢國文大於等於60(3)查詢英文小於等於60(4)學號查詢(5)完全結束："))

if ax==1:
print("%s\t\t%s\t\t%s\t%s\t%s\t%s\t%s" % ("學號","姓名","國文","英文","數學","總分","平均"))
for i in range(0,leny):
total=data[i][2]+data[i][3]+data[i][4]
avg=total/3.0
print("%s\t%s\t\t%d\t%d\t%d\t%d\t%.2f" % (data[i][0],data[i][1],data[i][2],data[i][3],data[i][4],total,avg))

if ax==2:
for i in range(0,leny):
if data[i][2]>=60:
print("%s\t%s\t\t%d\t%d\t%d" % (data[i][0],data[i][1],data[i][2],data[i][3],data[i][4]))

if ax==3:
for i in range(0,leny):
if data[i][3]<60:
print("%s\t%s\t\t%d\t%d\t%d" % (data[i][0],data[i][1],data[i][2],data[i][3],data[i][4]))

if ax==4:
qpn=input("請輸入學號")
for i in range(0,leny):
if qpn==data[i][0]:
p=i
k=True
break
if k==True:
print("%s\t%s\t\t%d\t%d\t%d" % (data[i][0],data[i][1],data[i][2],data[i][3],data[i][4]))
else:
print("查無此資料")
print(ax)
if ax==5:
break

------------------------------

12/20
dict
data={"Java":350,"c++":260,"delphi":300,"c":100,"Javascript":70}

len1=len(data)
print(len1)
print(data["Java"])
print(data["c++"])

k=list(data.keys())
print(k)
for i in range(0,len1):

print(k[i])
#5
350
260
['Java', 'c++', 'delphi', 'c', 'Javascript']
Java
c++
delphi
c
Javascript
+++++++++++++++++++++++
price=[35,78,90,12,88]
fruits=("李子","水蜜桃","西瓜","哈密瓜","文旦")

len1=len(price)
fdata={}
for i in range(0,len1):
fdata[fruits[i]]=price[i]
k=list(fdata.keys())
v=list(fdata.values())
len1=len(k)
for i in range(0,len1):

print("%s===>%d" % (k[i],v[i]))

________________________________

dict1={"程式":["Java","c++","delphi"]}
list1=dict1["程式"]
for i in list1:

print(i)
---------------------------------------
12/27
dictex2.py
dict1={"程式":["java","c","javascript"],
"美工":["photoshop","illustrator","cordraw"],
"系統":["dos","window","mac"]}

len1=len(dict1)
key1=list(dict1.keys())
data1=list(dict1.values())

for i in range(0,len1):
print(key1[i])

d=data1[i]
lend=len(d)
for j in range(0,lend):
print(d[j],end="\t")

print()
---------
#程式
java c javascript
美工
photoshop illustrator cordraw
系統
dos window mac
========================

python函數
函數一: 全域變數
def 函數名稱():
[程式區塊]

函數名稱()
def chin():
print("函數架構一")

chin()

函數二: 區域變數
def 函數名稱(參數一，參數二):
[程式區塊]

函數名稱(引數1，引數2)
def chinfun2(str):
print(str)

chinfun2("函數架構二")

函數三:
def 函數名稱():
[程式區塊]
return 回傳結果
接收回傳變數=函數名稱()
def chinfun3():
return 300

x=chinfun3()

print(x)

函數四:
def 函數名稱(參數一，參數二)
[程式區塊]
return 回傳結果
接收回傳變數=函數名稱(引數1，引數2)
def chinfun4(x,y):
total=x+y
return total

if __name__=="__main__":
sum=chinfun4(12,67)

print(sum)

函數功能
1.功能整理
2.易於維修
3.模組程式發展
4.類別元件開發
5.系統架構

https://codertw.com/%E7%A8%8B%E5%BC%8F%E8%AA%9E%E8%A8%80/374459/

python 主程式
if __name__=="__main__":
[程式區塊]
----------------------------
批次傳值:
串列/元組/字典當引數傳值必須用相對應元件參數接收
def chinarray(sd):
len1=len(sd)
for j in range(len1):
print(sd[j])

if __name__=="__main__":
list1=[12,56,88,43,94]
chinarray(list1)

批次回傳值:

串列/元組/字典當批次回傳結果必須用相對應元件接收回傳變數
def chinarray3():
list1=[45,78,12,34,99]
return list1

if __name__=="__main__":
sd=chinarray3()
len1=len(sd)
for h in range(len1):
print(sd[h])
----------------------
-
----------------------

def input1():
a=int(input("數值一："))
b=int(input("數值二："))
sum=logic1(a,b)
show1(a,b,sum)

def logic1(x,y):
total=x+y
return total

def show1(a,b,c):
print("{0}+{1}={2}".format(a,b,c))
exitfun1()

def exitfun1():
x=int(input("(1)繼續執行(2)完全離開"))
if x==1:
input1()
elif x==2:
quit()
else:
exitfun1()

if __name__=="__main__":
input1()
_______________________________

import requests

r=requests.get("http://192.168.0.53/webproject1/chinjung1.txt")
r.encoding="utf-8"

if r.status_code==requests.codes.ok:

print(r.text)
--------------
import requests

r=requests.get("http://192.168.0.53/webproject1/chinjung2.txt")
r.encoding="utf-8"

if r.status_code==requests.codes.ok:
dat=r.text
print(dat)
list1=dat.split(",")
len1=len(list1)
j=1
for i in range(0,len1,1):
print(j,"==>",list1[i])
j=j+1
_____________
#
程式設計,美工設計,文書設計,系統設計,網頁設計

1 ==> 程式設計
2 ==> 美工設計
3 ==> 文書設計
4 ==> 系統設計

5 ==> 網頁設計
--------------------------------
import requests

r=requests.get("http://192.168.0.53/webproject1/chinjung3.txt")
r.encoding="utf-8"

if r.status_code==requests.codes.ok:
dat=r.text
print(dat)
list2=dat.split("\r\n")
len1=len(list2)
print()

for i in range(0,len1,1):
kdata=list2[i]
list3=kdata.split(",")
len2=len(list3)
for j in range(0,len2,1):
print(list3[j])
print()

----------------------------
1/10
import csv

list1=[[12,34,56],[32,54,21],[56,87,43]]
filename="csvfile1.csv"
with open(filename,"w+",newline="") as f:
writer=csv.writer(f)
writer.writerow(["data1","data2","data3"])
for row in list1:
writer.writerow(row)

f.close()
___________________
import csv

filename="csvfile1.csv"
with open(filename,"r") as f:
data=csv.reader(f)
for row in data:
print(",".join(row))

f.close()
___________________
Json:https://www.runoob.com/python/python-json.html
格式：{key1:value1,key2:value2,key3:value3,.....}
函數庫：import json
載入json：方法一：json.dumps()
方法二：json.loads()
一般取值：元件名稱["key"]
批次取值：元件名稱.key()
元件名稱.values()

import json

data=[{'a':1,'b':2,'c':3,'d':4,'e':5}]
json=json.dumps(data)

print(json)
#[{"a": 1, "b": 2, "c": 3, "d": 4, "e": 5}]
_________
import json

jsonData='{"a":1,"b":2,"c":3,"d":4,"e":5}'
text=json.loads(jsonData)

print(text)
#{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}
---------------------
import json

jsonData='{"a":1,"b":2,"c":3,"d":4,"e":5}'
text=json.loads(jsonData)
print(text['a'])
print(text['b'])
print(text['c'])
print(text['d'])

print(text['e'])
#1
2
3
4
5
-----------
import json

jsonData='{"a":1,"b":2,"c":3,"d":4,"e":5}'
text=json.loads(jsonData)
list1=list(text.keys())#先轉為串列
len1=len(list1)
list2=list(text.values())
len2=len(list2)
for i in range(0,len1,1):

print(list1[i],"==>",list2[i])
#a ==> 1
b ==> 2
c ==> 3
d ==> 4
e ==> 5
-----------------
chinjung1.json
{"a":100,
"b":200,
"c":300,
"d":400,
"e":500
}
json5.py
import requests
import json

r=requests.get("http://192.168.0.53/webproject1/chinjung1.json")
r.encoding="utf-8-sig"

if r.status_code==requests.codes.ok:
dat=r.text
text=json.loads(dat)
list1=list(text.keys())
len1=len(list1)
list2=list(text.values())
len2=len(list2)
for i in range(0,len1,1):

print(list1[i],"==>",list2[i])
#a ==> 100
b ==> 200
c ==> 300
d ==> 400
e ==> 500
----------------------------------------------

0117
import sys

f=open("jungdb2.txt","a")
f.write("學習python邁向人工智慧之路")
print("%s" % "資料寫入成功")

f.close()
_______
f=open("jungdb2.txt","a")
f.write("\r\n學習c邁向嵌入應用系統之路")
print("%s" % "資料寫入成功")

f.close()
_________________
with open("jungdb2.txt","r") as f:
data=f.readline()
print("%s" % data)

f.close
___________________
with open("jungdb2.txt","r") as f:
data=f.readlines()
print("%s" % data)

len1=len(data)
print(len1)
print(data[0])
print(data[2])

f.close
__________________
from urllib.request import urlopen
#from bs4 import BeautifulSoup

print("success")

_______________
from urllib.request import urlopen
#from bs4 import BeautifulSoup

print("success")

__________________
from urllib.request import urlopen
from bs4 import BeautifulSoup

html=urlopen("http://192.168.0.53/webproject1/chin2.html").read().decode("utf-8")
soup=BeautifulSoup(html,"html.parser")

print(soup.title.text)
________________________
from urllib.request import urlopen
from bs4 import BeautifulSoup

html=urlopen("http://192.168.0.53/webproject1/chin2.html").read().decode("utf-8")
soup=BeautifulSoup(html,"html.parser")

print(soup.font.text)
___________________
from urllib.request import urlopen
from bs4 import BeautifulSoup

html=urlopen("http://www.dcview.com/").read().decode("utf-8")
soup=BeautifulSoup(html,"html.parser")

print(soup.title.text)
______________________
from urllib.request import urlopen
from bs4 import BeautifulSoup

html=urlopen("http://www.dcview.com/").read().decode("utf-8")
soup=BeautifulSoup(html,"html.parser")

print(soup.ul.text)
#作品發表
文章
________________________
from urllib.request import urlopen
from bs4 import BeautifulSoup

html=urlopen("http://192.168.0.53/webproject1/chin2.html").read().decode("utf-8")
soup=BeautifulSoup(html,"html.parser")

print(soup.font.text)
_____
from urllib.request import urlopen
from bs4 import BeautifulSoup

html=urlopen("http://192.168.0.53/webproject1/wchin1.html").read().decode("utf-8")
soup=BeautifulSoup(html,"html.parser")

print(soup.p.text)
________
from urllib.request import urlopen
from bs4 import BeautifulSoup

html=urlopen("http://192.168.0.53/webproject1/wchin1.html").read().decode("utf-8")
soup=BeautifulSoup(html,"html.parser")
kdata=soup.find_all("p")
print(kdata)
len1=len(kdata)
for i in range(0,len1,1):

print(kdata[i])
#[<p>程式設計</p>, <p>美工設計</p>, <p>繪圖設計</p>, <p>系統設計</p>, <p>資料庫設計</p>]
<p>程式設計</p>
<p>美工設計</p>
<p>繪圖設計</p>
<p>系統設計</p>
<p>資料庫設計</p>
_________________________
from urllib.request import urlopen
from bs4 import BeautifulSoup

html=urlopen("http://192.168.0.53/webproject1/wchin1.html").read().decode("utf-8")
soup=BeautifulSoup(html,"html.parser")
kdata=soup.find_all("p")
print(kdata)
len1=len(kdata)
for i in range(0,len1,1):

print(kdata[i].text)
#[<p>程式設計</p>, <p>美工設計</p>, <p>繪圖設計</p>, <p>系統設計</p>, <p>資料庫設計</p>]
程式設計
美工設計
繪圖設計
系統設計
資料庫設計
------------------------
<body>
<a id="a1" href="javascript:void;">按扭驅動</a>
<p></p>
<a id="a2" href="javascript:void;">按扭驅動</a>
<p></p>
<div class="c1">程式設計</div>
<div class="c1">美工設計</div>
<div class="c1">系統設計</div>
<div class="c1">網頁設計</div>
<div class="c1">軟體設計</div>

</body>
-------------------------
from urllib.request import urlopen
from bs4 import BeautifulSoup
import lxml

html=urlopen("http://192.168.0.53/webproject1/wchin4.html").read().decode("utf-8")
soup=BeautifulSoup(html,"lxml")
kdata=soup.find(id="a2")
print(kdata.string)
#按扭驅動
--------------
from urllib.request import urlopen
from bs4 import BeautifulSoup
import lxml

html=urlopen("http://192.168.0.53/webproject1/wchin4.html").read().decode("utf-8")
soup=BeautifulSoup(html,"lxml")
kdata=soup.findAll(attrs={"class":"c1"})
len1=len(kdata)
print(len1)
for i in range(0,len1,1):
print(kdata[i].string)
for i in range(0,len1,1):
print(kdata[i].get_text())
#5
程式設計
美工設計
系統設計
網頁設計
軟體設計
程式設計
美工設計
系統設計
網頁設計
軟體設計
--------------------------------------------------
from urllib.request import urlopen
from bs4 import BeautifulSoup
import lxml

html=urlopen("http://www.dcview.com/").read().decode("utf-8")
soup=BeautifulSoup(html,"lxml")
kdata=soup.select(".slide_nav li")
len1=len(kdata)
print(len1)
for i in range(0,len1,1):
print(kdata[i].string)

#5
本日精選總覽
最新作品
評議推薦
資深推薦
發表作品
----------------------------------------------
1/31
wchin3.html
<table border="2" width="500"><br />
<tr>
<td>編號</td>
<td>姓名</td>
<td>生日</td>
<td>血型</td>
<td>學歷</td>
</tr>
<tr>
<td>p1001</td>
<td>迪力熱巴</td>
<td>67/12/09</td>
<td>A</td>
<td>研究所</td>
</tr>
<tr>
<td>p1002</td>
<td>古力納札</td>
<td>71/12/09</td>
<td>O</td>
<td>大學</td>
</tr>
<tr>
<td>p1003</td>
<td>趙麗穎</td>
<td>69/10/09</td>
<td>B</td>
<td>研究所</td>
</tr>

</table>
-------------------------
from urllib.request import urlopen
from bs4 import BeautifulSoup
import lxml

html=urlopen("http://www.dcview.com/").read().decode("utf-8")
soup=BeautifulSoup(html,"lxml")

kdata=soup.select(".slide_nav li")
len1=len(kdata)
#print(len1)
for i in range(0,len1,1):

print(kdata[i].string)
#編號
姓名
生日
血型
學歷
p1001
迪力熱巴
67/12/09
A
研究所
p1002
古力納札
71/12/09
O
大學
p1003
趙麗穎
69/10/09
B
研究所
----------------------------------
Beautiful --> pandas --> matplotlib
爬取資料處理資料>正規化繪圖模組(視覺化輸出)
(瀏覽查詢過濾新增刪除修改匯出匯入)

基礎pandas(一)
1.引入函數庫：
import pandas as pd
2.
字典-->DataFrame --> 輸出
pd.DataFrame(字典來源資料)
pd.DataFrame(字典來源資料，columns=["欄名1","欄名2","欄名3","欄名4","欄名5"])
__________________________
11-1
from urllib.request import urlopen
from bs4 import BeautifulSoup
import lxml

html=urlopen("http://192.168.0.53/webproject1/wchin3.html").read().decode("utf-8")
soup=BeautifulSoup(html,"lxml")
kdata=soup.find_all("td")
#print(kdata)
len1=len(kdata)
id1=[]
name1=[]
birth1=[]
blood1=[]
school1=[]
for i in range(0,len1,5):
if i>=5:
id1.append(kdata[i].text)
name1.append(kdata[i+1].text)
birth1.append(kdata[i+2].text)
blood1.append(kdata[i+3].text)
school1.append(kdata[i+4].text)
len2=len(blood1)
print("編號",end="\t\t")
print("姓名",end="\t\t")
print("生日",end="\t\t")
print("血型",end="\t\t")
print("學歷",end="\t\t")
print()
for k in range(0,len2,1):
print(id1[k].strip(),end="\t")
print(name1[k].strip(),end="\t")
print(birth1[k].strip(),end="\t")
print(blood1[k].strip(),end="\t")

print(school1[k].strip(),end="\t")
#編號姓名生日血型學歷
p1001 迪力熱巴 67/12/09 A 研究所
p1002 古力納札 71/12/09 O 大學
p1003 趙麗穎 69/10/09 B 研究所
>>>
_________________________
11-3
from urllib.request import urlopen
from bs4 import BeautifulSoup
import lxml
import pandas as pd

html=urlopen("http://192.168.0.53/webproject1/wchin3.html").read().decode("utf-8")
soup=BeautifulSoup(html,"lxml")
kdata=soup.find_all("td")
#print(kdata)
len1=len(kdata)
id1=[]
name1=[]
birth1=[]
blood1=[]
school1=[]
for i in range(0,len1,5):
if i>=5:
id1.append(kdata[i].text.strip())
name1.append(kdata[i+1].text.strip())
birth1.append(kdata[i+2].text.strip())
blood1.append(kdata[i+3].text.strip())
school1.append(kdata[i+4].text.strip())
len2=len(blood1)
sourse={"id":id1,"name":name1,"birth":birth1,"blood":blood1,"school":school1}
ds=pd.DataFrame(sourse,columns=["id","name","birth","blood","school"])
print(ds)
rowlen=ds.shape[0]
collen=ds.shape[1]
print("列數：",rowlen)
print("欄數：",collen)

ds.to_csv("lochinjung.csv")
#id name birth blood school
0 p1001 迪力熱巴 67/12/09 A 研究所
1 p1002 古力納札 71/12/09 O 大學
2 p1003 趙麗穎 69/10/09 B 研究所
列數： 3
欄數： 5

_________________________________

______________________
<table border="2" width="500"><br />
<tr>
<td>編號</td>
<td>姓名</td>
<td>生日</td>
<td>血型</td>
<td>學歷</td>
<td>畢業總分數</td>
</tr>
<tr>
<td>p1001</td>
<td>迪力熱巴</td>
<td>67/12/09</td>
<td>A</td>
<td>研究所</td>
<td>78</td>
</tr>
<tr>
<td>p1002</td>
<td>古力納札</td>
<td>71/12/09</td>
<td>O</td>
<td>大學</td>
<td>56</td>
</tr>
<tr>
<td>p1003</td>
<td>趙麗穎</td>
<td>69/10/09</td>
<td>B</td>
<td>研究所</td>
<td>98</td>
</tr>

_________________________

11-4
import lxml
import pandas as pd
import matplotlib.pyplot as plt

html=urlopen("http://192.168.0.53/webproject1/wchin3_3.html").read().decode("utf-8")
soup=BeautifulSoup(html,"lxml")
kdata=soup.find_all("td")
#print(kdata)
len1=len(kdata)
id1=[]
name1=[]
birth1=[]
blood1=[]
school1=[]
total1=[]
for i in range(0,len1,6):
if i>=6:
id1.append(kdata[i].text.strip())
name1.append(kdata[i+1].text.strip())
birth1.append(kdata[i+2].text.strip())
blood1.append(kdata[i+3].text.strip())
school1.append(kdata[i+4].text.strip())
total1.append(kdata[i+5].text.strip())
len2=len(blood1)
sourse={"id":id1,"name":name1,"birth":birth1,"blood":blood1,"school":school1,"total":total1}
ds=pd.DataFrame(sourse,columns=["id","name","birth","blood","school","total"])
print(ds)
rowlen=ds.shape[0]
collen=ds.shape[1]
print("列數：",rowlen)
print("欄數：",collen)
ds.to_csv("lochinjung.csv")
a=int(ds["total"][0])
b=int(ds["total"][1])
c=int(ds["total"][2])
data=[a,b,c]
plt.bar([1,2,3],data)
plt.yticks([0,10,20,30,40,50,60,70,80,90,100])

plt.show()
#id name birth blood school total
0 p1001 迪力熱巴 67/12/09 A 研究所 78
1 p1002 古力納札 71/12/09 O 大學 56
2 p1003 趙麗穎 69/10/09 B 研究所 98
列數： 3
欄數： 6
__________________________
207
from urllib.request import urlopen
from bs4 import BeautifulSoup
import lxml

html=urlopen("http://192.168.0.53/webproject1/wchin4.html").read().decode("utf-8")
soup=BeautifulSoup(html,"lxml")
kdata=soup.find(text="系統設計")
print(kdata.parent.name)

#div
-------------------------------------------
向下走訪
選擇器：
1.元件.select("#id"):選擇ID定位
2.children：子元件
NavigableString:
from bs4.element import NavigableString

3.for i in tdata.children:
print(type(i))a
a. type(i)印出元件
b. i.name元件名稱
c. i,text元件內容
4.排除NavigableString
if not isinstance(i,NavigableString):
print(i.name)
5.i.元件名稱.text 子元件(內層元件)
i.div.text
i.span.text
________________________
<div id="q2">
<ul class="answer">
<li><div>程式設計</div><span>3000</span></li>
<li><div>美工設計</div><span>4000</span></li>
<li><div>網頁設計</div><span>5000</span></li>
</ul>
</div>
_________________________
from urllib.request import urlopen
from bs4 import BeautifulSoup
import lxml

html=urlopen("http://192.168.0.53/webproject1/wormdata1.html").read().decode("utf-8")
soup=BeautifulSoup(html,"lxml")
kdata=soup.select("#q2")
tdata=kdata[0].ul
len1=len(tdata)
print(len1)

for i in tdata.children:

print(type(i))
#7
<class 'bs4.element.NavigableString'>
<class 'bs4.element.Tag'>
<class 'bs4.element.NavigableString'>
<class 'bs4.element.Tag'>
<class 'bs4.element.NavigableString'>
<class 'bs4.element.Tag'>
<class 'bs4.element.NavigableString'>
-----------------------------------------------------
from urllib.request import urlopen
from bs4 import BeautifulSoup
from bs4.element import NavigableString
import lxml

html=urlopen("http://192.168.0.53/webproject1/wormdata1.html").read().decode("utf-8")
soup=BeautifulSoup(html,"lxml")
kdata=soup.select("#q2")
tdata=kdata[0].ul
len1=len(tdata)
print(len1)

for i in tdata.children:
if not isinstance(i,NavigableString):

print(i.name)
#7
li
li
li

____________________________________
for i in tdata.children:
if not isinstance(i,NavigableString):

print(i.text)
#7
程式設計3000
美工設計4000
網頁設計5000
________________________________
from urllib.request import urlopen
from bs4 import BeautifulSoup
from bs4.element import NavigableString
import lxml

html=urlopen("http://192.168.0.53/webproject1/wormdata1.html").read().decode("utf-8")
soup=BeautifulSoup(html,"lxml")
kdata=soup.select("#q2")
tdata=kdata[0].ul
len1=len(tdata)
print(len1)

for i in tdata.children:
if not isinstance(i,NavigableString):
print(i.div.text)
print(i.span.text)

#7
程式設計
3000
美工設計
4000
網頁設計
5000
___________________
from urllib.request import urlopen
from bs4 import BeautifulSoup
from bs4.element import NavigableString
import lxml

html=urlopen("http://192.168.0.53/webproject1/wormdata1.html").read().decode("utf-8")
soup=BeautifulSoup(html,"lxml")
kdata=soup.select("#q2")
tdata=kdata[0].ul

for i in tdata.parents:
print(i.name)

print()
for j in tdata.find_parents():
print(j.name)

#div
body
html
[document]

div
body
html
[document]

___________________
from urllib.request import urlopen
from bs4 import BeautifulSoup
from bs4.element import NavigableString
import lxml

html=urlopen("http://192.168.0.53/webproject1/wormdata1.html").read().decode("utf-8")
soup=BeautifulSoup(html,"lxml")
kdata=soup.select("#q2")
tdata=kdata[0].ul.li

pdata=tdata.next_sibling.next_sibling
print(pdata.text)
cdata=pdata.next_sibling.next_sibling
print(cdata.text)

qdata=cdata.previous_sibling.previous_sibling
print(cdata.text)
udata=qdata.previous_sibling.previous_sibling
print(udata.text)

#美工設計4000
網頁設計5000
網頁設計5000
程式設計3000

_____________________
from urllib.request import urlopen
from bs4 import BeautifulSoup
from bs4.element import NavigableString
import lxml

html=urlopen("http://www.dcview.com/").read().decode("utf-8")
soup=BeautifulSoup(html,"lxml")
kdata=soup.select("#tab1")
tdata=kdata[0].ul
len1=len(tdata)
print(len1)

for i in tdata.children:
if not isinstance(i,NavigableString):
print(i.text)

#7
攝影基礎線上輕鬆學 (18主題)
全景&縮時攝影簡單學 (2主題)
鯊魚- 婚禮攝影技巧分享 (2主題)
人像外拍場景考量 (4主題)
運動攝影-快速上手 (3主題)
看更多>>
________________________
from urllib.request import urlopen
from bs4 import BeautifulSoup
from bs4.element import NavigableString
import lxml

html=urlopen("http://www.dcview.com/").read().decode("utf-8")
soup=BeautifulSoup(html,"lxml")
kdata=soup.select(".ul_wrapper")
tdata=kdata[0].ul

for i in tdata.children:
if not isinstance(i,NavigableString):
print(i.text)

----------------------
人像
秀展
風景
夜景
生態
飛羽
植物
美食
人文
建築
動物

教學
-------------------------------
from urllib.request import urlopen
from bs4 import BeautifulSoup
from bs4.element import NavigableString
import lxml

html=urlopen("http://www.dcview.com/").read().decode("utf-8")
soup=BeautifulSoup(html,"lxml")
kdata=soup.select(".copyright .clearfix")
tdata=kdata[0]

for i in tdata.children:
if not isinstance(i,NavigableString):
print(i.text)

kdata2=soup.select(".share")
tdata2=kdata2[0]

for j in tdata2.children:
if not isinstance(j,NavigableString):
print(j.text)

#行銷合作
廣告刊登
客服中心
迪希facebook粉絲團
迪希facebook粉絲團
-------------------------------
0214
<div id="total">
<div id="dv1">java</div>
<div id="dv2">c++</div>
<div id="dv3">jquery</div>
<div id="dv4">javascript</div>
<div id="dv5">html</div>
</div>
--------------
from urllib.request import urlopen
from bs4 import BeautifulSoup
from bs4.element import NavigableString
import lxml

html=urlopen("http://192.168.0.53/webproject1/wormdata3.html").read().decode("utf-8")
soup=BeautifulSoup(html,"lxml")
kdata=soup.find(id="total")
print(kdata.text)

cdata=kdata.next_element.next_element
print(cdata.text)
pdata=cdata.next_element.next_element
print(pdata)
tdata=pdata.next_element.next_element
print(tdata)

for i in kdata.next_elements:
if not isinstance(i,NavigableString):
print(i.name,"==>",i.text)

#java
c++
jquery
javascript
html

java

c++
div ==> java
div ==> c++
div ==> jquery
div ==> javascript
div ==> html
=======================
讀取線上圖片
kimg=urlopen(url)
info=kimg.read(1000)
size=size+len(info)
kimg.close()
儲存線上圖片
fp=open("filename","wb")
fp.w

----------------------------
from urllib.request import urlopen
from bs4 import BeautifulSoup
from bs4.element import NavigableString
import lxml

html=urlopen("http://www.dcview.com/").read().decode("utf-8")
soup=BeautifulSoup(html,"lxml")
kdata=soup.select("#banner-fade")
tdata=kdata[0].ul
len1=len(tdata)
print(len1)

strimg=""
for i in tdata.children:
if not isinstance(i,NavigableString):
strimg=i.img.get("src")
print(strimg)

kimg=urlopen(strimg)
fp=open("image/chin2.jpg","wb")
size=0
while True:
info=kimg.read(1000)
if len(info)<1:
break
size=size+len(info)
fp.write(info)
fp.close()
kimg.close()

-------------------------
from urllib.request import urlopen
from bs4 import BeautifulSoup
from bs4.element import NavigableString
import lxml

html=urlopen("https://okgo.tw/butyview.html/?id=3283").read().decode("utf-8")
soup=BeautifulSoup(html,"lxml")
kdata=soup.select(".pic")
len1=len(kdata)
print(len1)

strimg=[]
for i in range(0,len1,1):
strimg.append(kdata[i].img.get('src'))

strimg2=[]
for k in range(0,len1,1):
lenk=len(strimg[k])
if lenk>25:
strimg2.append(strimg[k])
len2=len(strimg2)
print(len2)

for j in range(0,len2,1):
kimg=urlopen(strimg2[j])
fp=open("image2/chin.jpg"+str(j)+".jpg","wb")
size=0
while True:
info=kimg.read(1000)
if len(info)<1:
break
size=size+len(info)
fp.write(info)
fp.close()
kimg.close()
________________________
from bs4 import BeautifulSoup
from bs4.element import NavigableString
from urllib.request import urlopen
import lxml

s=""
html=urlopen("http://www.bsnet.com.tw/").read().decode("utf-8")
soup=BeautifulSoup(html,"lxml")
kdata=soup.select("table tr td img")
#print(kdata)
len1=len(kdata)
print(len1)

strimg=[]
for i in range(0,len1,1):
strimg.append("http://www.bsnet.com.tw/"+kdata[i].get('src'))

strimg2=[]
for t in range(0,len1,1):
str1=strimg[t]
#print(str[-3:1])
str2=str1[-3:]
if str2=="jpg":
strimg2.append(strimg[t])

len2=len(strimg2)
for j in range(0,len2,1):
kimg=urlopen(strimg2[j])
fp=open("image3/jung"+str(j)+".jpg","wb")
size=0
while True:
info=kimg.read(1000)
if len(info)<1:
break
size=size+len(info)
fp.write(info)

fp.close()

kimg.close()
#57
==================
0221
xpath:
1.函數庫：
from lxml import html
html.fromstring(urlopen)
=>kdata
kdata.xpath()[0]=>第一個元素

2.取得元素：
元件
3.元件‧tag
4.xpath為網頁結構路徑
++++++
1
from urllib.request import urlopen
from lxml import html

h=urlopen("http://192.168.0.53/webproject1/wormdata3.html").read().decode("utf-8")
kdata=html.fromstring(h)

print(kdata)
#<Element html at 0x3e0c1d8>
+++++++++++++++++++
from urllib.request import urlopen
from lxml import html

h=urlopen("http://192.168.0.53/webproject1/wormdata3.html").read().decode("utf-8")
kdata=html.fromstring(h)
#print(kdata)

for i in kdata.getchildren():

print(i)
#<Element head at 0x4024e08>
<Element body at 0x4024ea8>
+++++++++++++++++++++
from urllib.request import urlopen
from lxml import html

h=urlopen("http://192.168.0.53/webproject1/wormdata3.html").read().decode("utf-8")
kdata=html.fromstring(h)
#print(kdata)

tdata=kdata.xpath("/html/body/div/div")[0]
print(tdata.tag)
print(tdata.attrib["id"])

print(tdata.text_content())
#div
dv1
java
+++++++++++++++++++++
tdata=kdata.xpath("/html/body/div/div[2]")[0] #內部從1算起，外部從0算起
print(tdata.tag)
print(tdata.attrib["id"])

print(tdata.text_content())
#div
dv2
c++
=========================
<body>
<a href="http://www.google.com.tw/" id="a">google</a>
</body>
+++++++++++++++++++++++
from urllib.request import urlopen
from lxml import html

h=urlopen("http://192.168.0.53/webproject1/jungdata1.html").read().decode("utf-8")
kdata=html.fromstring(h)
#print(kdata)

tdata=kdata.xpath("/html/body/a[1]")[0]#kdata.xpath("/html/body/a")[0]
print(tdata.tag)
print(tdata.attrib["id"])

print(tdata.text_content())
#a
a
google
=====================
from urllib.request import urlopen
from lxml import html

h=urlopen("http://travel.nantou.gov.tw/detail.aspx?type=scenic&id=451").read().decode("utf-8")
kdata=html.fromstring(h)
#print(kdata)

tdata=kdata.xpath("/html/body/div/span/div/div/a")[0]
print(tdata.tag)
print(tdata.attrib["href"].strip())
print(tdata.attrib["id"].strip())

print(tdata.text_content().strip())
#a
#content
A1
跳到主要內容區塊
+++++++++++++++++++++
from urllib.request import urlopen
from lxml import html

h=urlopen("http://travel.nantou.gov.tw/detail.aspx?type=scenic&id=451").read().decode("utf-8")

kdata=html.fromstring(h)
tdata=kdata.xpath("/html/body/div/span/div/nav/div/ul/li")
len1=len(tdata)-1

for i in range(1,len1+1,1):
tdata=kdata.xpath("/html/body/div/span/div/nav/div/ul/li["+str(i)+"]")[0]

print(tdata.text_content().strip())
#網站導覽
常見問題
雙語詞彙
樂旅南投
English
日本語
Thai ver.
相關連結
民意信箱

=======================
<body>
<ul id="ul">
<li>程式設計</li>
<li>美工設計</li>
<li>系統設計</li>
<li>網頁設計</li>
<li>軟體設計</li>
</ul>
-----------------------
from urllib.request import urlopen
from lxml import html

h=urlopen("http://192.168.0.53/webproject1/jungdata3.html").read().decode("utf-8")
kdata=html.fromstring(h)
#print(kdata)

tdata=kdata.xpath("/html/body/ul/li[2]")[0]

print(tdata.text_content())
#美工設計
-------------------------
from urllib.request import urlopen
from lxml import html

h=urlopen("http://192.168.0.53/webproject1/jungdata3.html").read().decode("utf-8")
kdata=html.fromstring(h)
#print(kdata)

for i in range(1,5+1,1):
tdata=kdata.xpath("/html/body/ul/li["+str(i)+"]")[0]
print(tdata.text_content())

#程式設計
美工設計
系統設計
網頁設計
軟體設計
================
from urllib.request import urlopen
from lxml import html
from bs4 import BeautifulSoup

h=urlopen("http://192.168.0.53/webproject1/jungdata3.html").read().decode("utf-8")
soup1=BeautifulSoup(h,"html.parser")
list1=soup1.find_all("li")
len1=len(list1)

kdata=html.fromstring(h)
#print(kdata)

for i in range(1,len1+1,1):
tdata=kdata.xpath("/html/body/ul/li["+str(i)+"]")[0]
print(tdata.text_content().strip())

#程式設計
美工設計
系統設計
網頁設計
軟體設計
===============

from urllib.request import urlopen
from lxml import html

h=urlopen("http://192.168.0.53/webproject1/jungdata3.html").read().decode("utf-8")
kdata=html.fromstring(h)
tdata=kdata.xpath("/html/body/ul/li")
len1=len(tdata)

for i in range(1,len1+1,1):
tdata=kdata.xpath("/html/body/ul/li["+str(i)+"]")[0]
print(tdata.text_content().strip())

#程式設計
美工設計
系統設計
網頁設計
軟體設計
==================================
from urllib.request import urlopen
from lxml import html

h=urlopen("http://travel.nantou.gov.tw/detail.aspx?type=scenic&id=451").read().decode("utf-8")

kdata=html.fromstring(h)
cdata=kdata.xpath("/html/body/div/span/div/div/div/ul/span[2]/li")[0]
print(cdata.text_content().strip())

tdata=kdata.xpath("/html/body/div/span/div/div/div/ul/span[2]/li/img")[0]

print(tdata.attrib["src"].strip())
+++++++
from urllib.request import urlopen
from lxml import html

h=urlopen("http://travel.nantou.gov.tw/detail.aspx?type=scenic&id=451").read().decode("utf-8")

kdata=html.fromstring(h)

for i in range(1,4+1,1):
cdata=kdata.xpath("/html/body/div/span/div/div/div/ul/span["+str(i)+"]/li")[0]
print(cdata.text_content().strip())
tdata=kdata.xpath("/html/body/div/span/div/div/div/ul/span["+str(i)+"]/li/img")[0]

print(tdata.attrib["src"].strip())
#

影片下載
1.
2.
3.
4.

-----------------------

from pytube import YouTube

yt=YouTube('https://www.youtube.com/watch?v=uUwWmwEhYoQ&list=PLfr5zCilDSqDrsEiV_BabWUExA6zCvF0H')

video=yt.streams.filter(file_extension='mp4',res='1080p').first()

video.download(r'C:\\tempvideo')

------------------------------
<div id="dv1">
<p>程式設計</p>
<p>美工設計</p>
<p>系統設計</p>
<p>網頁設計</p>
<p>資料庫設計</p>
</div>
<div id="dv2">
<p>java</p>
<p>c++</p>
<p>python</p>
<p>javascript</p>
<p>jquery</p>
</div>
---------------------------

from bs4 import BeautifulSoup
from urllib.request import urlopen
from lxml import html

h=urlopen("http://192.168.0.53/webproject1/jungdata4.html").read().decode("utf-8")
soup1=BeautifulSoup(h,"html.parser")
list1=soup1.find_all("div")
len1=len(list1)

kdata=html.fromstring(h)
for i in range(1,len1+1,1):
for j in range(1,5+1,1):
tdata=kdata.xpath("/html/body/div["+str(i)+"]/p["+str(j)+"]")[0]
print(tdata.text_content().strip(),end=" ")

print()
===============
from urllib.request import urlopen
from lxml import html

h=urlopen("http://192.168.0.53/webproject1/jungdata5.html").read().decode("utf-8")
kdata=html.fromstring(h)
tdata=kdata.xpath("/html/script")[0]
cdata=tdata.text_content().strip()
lenstr=len(cdata)
pdata=cdata.split(";")
len3=len(pdata)
print(len3)
for i in range(0,len3-1,1):
rdata=pdata[i].split("=")
print(rdata[1])

=============
from urllib.request import urlopen
from lxml import html

h=urlopen("http://192.168.0.53/webproject1/jungdata4.html").read().decode("utf-8")
kdata=html.fromstring(h)
tdata=kdata.xpath("/html/body/div/p[2]")[0]

print(tdata.getparent().tag)
print(tdata.getnext().tag)

print(tdata.getprevious().tag)
====================
from bs4 import BeautifulSoup
from urllib.request import urlopen
from lxml import html

h=urlopen("http://192.168.0.53/webproject1/jungdata4.html").read().decode("utf-8")
soup1=BeautifulSoup(h,"html.parser")
list1=soup1.find_all("div")
len1=len(list1)

kdata=html.fromstring(h)
for i in range(1,len1+1,1):
for j in range(1,5+1,1):
tdata=kdata.xpath("/html/body/div["+str(i)+"]/p["+str(j)+"]")[0]
print(tdata.text_content().strip(),end=" ")

print()

生活

2019年11月15日星期五

爬蟲語言學習

沒有留言:

張貼留言

2019年11月15日 星期五

爬蟲語言學習

沒有留言:

張貼留言

2019年11月15日星期五