Python爬取制服买家秀小姐姐

这个网站大概是1个月之前在吾爱破解水漫金山某位大神发出来的,当天我就写了爬虫,今天没事又去爬一下看更新了没,发现是空的,网站内容更改了。
所以刚刚又重新用scrapy写了一个整站爬虫,但还是不发出来,省的把网站给爬死了。
复制出来改成单分类爬虫,剩下的想爬取,自己更改!!!

# from ip_proxy import ips
import requests, os, re, random
from lxml import etree


# ip_add = random.choice(ips())
if not os.path.exists('./zhifu'):
    os.mkdir('./zhifu')

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
for i in range(1,4):
    url = 'https://www.ikmjx.com/index.php?g=portal&m=list&a=index&id=3&p=' + str(i)
    r = requests.get(url=url, headers=headers).text
    tree = etree.HTML(r)
    div_list = tree.xpath('/html/body/main/div/div[2]/div')[1:-1]
    for li in div_list:
        a = 0
        src = 'https://www.ikmjx.com' + li.xpath('./div[2]/a/@href')[0]
        titles = li.xpath('./div[2]/a/@title')[0]
        title = titles.replace('?','')
        req = requests.get(url=src, headers=headers).text
        tree1 = etree.HTML(req)
        div1_list = tree1.xpath('/html/body/main/div/div/div/div[3]/p[2]')
        for p in div1_list:
            src_path = p.xpath('./img/@src')
            # print(src_path)
            for img in src_path:
                a = a+1
                img_data = requests.get(url=img, headers=headers).content
                img_path = './zhifu/' + title + '_' + str(a) + '.jpg'
                with open(img_path, 'wb') as fp:
                    fp.write(img_data)
                    # print(img_data, '下载完成!!!')
                    

python企业微信及时通讯api每日定时推送内容

前几天看了@https://www.52pojie.cn/thread-1338005-1-1.html大神的帖子,想到自己也写一个想看的东西推送一下,所以也用他的借口,真的很方便。

企业微信注册以及接口地址可以参照上面的帖子
自己写了天气+当地的疫情+度娘头条热点,挂到自己的服务器,每天定时发送一些内容。
代码写的很烂,大神略过,可以自己添加一些其他的内容发送

import requests, urllib, sys
from lxml import etree

class TopNews():
    def __init__(self):
        self.url = 'http://top.baidu.com/'
        self.headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0'
            }
    def get_url(self):
        r = requests.get(url=self.url, headers=self.headers)
        r.encoding = 'gb2312'
        tree = etree.HTML(r.text)
        li_list = tree.xpath('//*[@id="hot-list"]/li')
        titles =[]
        for li in li_list:
            num = li_list.index(li) + 1
            title = li.xpath('./a/@title')[0]
            titles.append(str(num) + ':' + title + '!')
        return titles

class Weather():
    def __init__(self):
        self.data={
            "location":"焦作",
            "key":"3f47abf414184e59b5b791b765e38a74"
        }

    def get_url(self):
        # API地址用S6,也可以用V7
        data_list = requests.get('https://free-api.heweather.com/s6/weather/forecast',params=self.data).json()
        daily_forecast = data_list["HeWeather6"][0]["daily_forecast"]
        forecast_list = []
        for i in daily_forecast:
            # print(daily_forecast.index(i))
            forecast = {}
            # 取列表索引
            if daily_forecast.index(i) == 0:
                forecast['时间'] = '今天   '
            elif daily_forecast.index(i) == 1:
                forecast['时间'] = '明天   '
            else:
                forecast['时间'] = '后天   '
            all_forecast = forecast['时间'] + '  白天:'+ i['cond_txt_d'] + '  晚上:' + i['cond_txt_n'] + '  最高温度:'+ i['tmp_max'] + '°C' + '  最低温度:'+ i['tmp_min'] + '°C' + '  风力:' + i['wind_dir'] + i['wind_sc'] + '级' + '!'

            forecast_list.append(all_forecast)
        select_forecast = "".join(forecast_list)
        new_data = select_forecast.replace('!', '\n')
        return new_data


class YiQing:
    # 由urllib2改成requests
    def __init__(self):
        self.host = 'http://ncovdata.market.alicloudapi.com'
        self.path = '/ncov/cityDiseaseInfoWithTrend'
        self.method = 'GET'
        self.appcode = '9f7f40025d254f2593066def83ebdc98'
        self.querys = ''
        self.bodys = {}
        self.url = self.host + self.path

        self.new_appcode = 'APPCODE ' + self.appcode
        self.headers = {
            'Authorization': self.new_appcode
        }
    def get_data(self):
        response = requests.get(url=self.url, headers=self.headers)
        content = response.text
        # with open('./yiqing.html', 'w', encoding='utf-8') as fp:
        #     fp.write(content)
        # 文本转换成字典
        dict_content = eval(content)
        # 从所有数据取出当地省份内容
        new_content = dict_content['provinceArray'][7]
        yiqing_data = '河南最新疫情:' + '\n' + '新增:' + str(new_content['totalDoubtful']) + '\n' + '累计:' + str(new_content['totalConfirmed']) + '\n' + '累计治愈:' + str(new_content['totalCured']) + '\n' + '累计死亡:' + str(new_content['totalDeath'])
        # print(yiqing_dat)
        return yiqing_data

class SendText:
    def __init__(self, newsdata, hewdata, yqdata):
        self.news_data = newsdata
        self.hew_data = hewdata
        self.yq_data = yqdata
        self.corpid = "自己的"
        self.corpsecret = "自己的"
        self.agentid = "自己的"
    def server_j(self):
        text_content = "".join(self.news_data)
        new_content = text_content.replace('!', '\n')
        # print(new_content)
        send_data = str(self.hew_data) + '\n' + self.yq_data + '\n' + '\n' + new_content

        data = {
        "corpid":self.corpid,
        "corpsecret":self.corpsecret,
        "agentid":self.agentid,
        "text":send_data
                }
        req = requests.post('https://api.htm.fun/api/Wechat/text/',data=data)
        # print(req)
        return req
def main():
    news = TopNews()
    heweather = Weather()
    yiqing = YiQing()
    text = SendText(news.get_url(), heweather.get_url(), yiqing.get_data())
    # print(text.server_j())
if __name__ == '__main__':
    main()