Skip to content

Instantly share code, notes, and snippets.

Created February 18, 2017 21:41
Show Gist options
  • Save anonymous/6222c8eb825d76630c4148674f588df4 to your computer and use it in GitHub Desktop.
Save anonymous/6222c8eb825d76630c4148674f588df4 to your computer and use it in GitHub Desktop.
<type 'float'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'float'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'float'>
<type 'unicode'>
<type 'numpy.float64'>
goodinfo.py:38: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal
if df[i].loc[0,0] == u"獲 利 狀 況      (/)":
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
41
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
<type 'unicode'>
#! /usr/bin/python
# -*- coding: utf-8 -*-
import re
import urllib2
import csv
import pandas as pd
def GetHtmlcode(ID):
# Get the webpage's source html code
source = 'http://goodinfo.tw/StockInfo/StockDetail.asp?STOCK_ID='
url = source + ID
#print url
# Header
headers = { 'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36',
'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset' : 'Big5,utf-8;q=0.7,*;q=0.3',
#'Accept-Encoding' : 'gzip,deflate,sdch',
'Accept-Language' : 'zh-TW,zh;q=0.8,en-US;q=0.6,en;q=0.4,ja;q=0.2' ,
'Cache-Control' : 'max-age=0',
'Connection' : 'keep-alive',
'Cookie' : '427 bytes were stripped',
'Host' : 'www.goodinfo.tw',
'Referer' : url }
# 連到網頁抓取資料
req= urllib2.Request(url,"",headers)
response = urllib2.urlopen(req)
result = response.read().decode('utf-8')
#print result
return result
def main():
page = GetHtmlcode('2103')
df=pd.read_html(page)
for i in range(len(df)):
print type(df[i].loc[0,0])
if df[i].loc[0,0] == u"獲 利 狀 況      (/)":
print i
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment