电影天堂每日抓取首页最新电影

python每日抓取电影天堂首页最新电影推送到微信

import requests
from lxml import etree

class Movie(object):
  def __init__(self):
    self.url = 'https://www.dytt8.net/html/gndy/dyzz/index.html'
    self.headers ={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36"}
    self.movies = []
    self.html = ''

  def get_all(self):
    movies = ''
    r = requests.get(url=self.url, headers=self.headers)
    r.encoding = 'gb2312'
    # print(r.text)
    tree = etree.HTML(r.text)
    tables = tree.xpath('//div[@class="co_content8"]/ul/td/table')
    # print(tables)
    for table in tables:
      title = table.xpath('./tr[2]/td[2]/b/a/text()')[0]
      htmls = table.xpath('./tr[2]/td[2]/b/a/@href')[0]
      html = 'https://www.dytt8.net' + htmls
      # print(title)
      self.movies.append(title + '\n' + html + '\n')
    self.html = "".join(self.movies)
    return self.html
dy = Movie()
# print(dy.get_all())
dy.get_all()

Bilibili视频下载批量重命名

软件是自己写给朋友用的,发布的时候博客还没恢复,直接发到吾爱论坛了,再给自己博客发一个。

用Python写的,UI是PySimpleGui,打包成exe很大。

首先要用唧唧DOWN下载,其他没有测试,由于用正则匹配写的,所以其他软件下载的文件名称可能不匹配。
唧唧DOWN,自己搜索,哪都有。
下载时候不要勾选自动新建文件夹
下载

选择目录,直接转换就行了。。
成品

下载地址:
https://modys.lanzout.com/iTJ6l09a8ikb 密码:3hpv

安装python3出现checking how to run the C preprocessor... /lib/cpp

出现checking how to run the C preprocessor... /lib/cpp configure: error: C preprocessor "/lib/cpp" fails sanity check
是由于没有安装kernel-headers
安装yum install kernel-headers时如果出现Plugin "product-id" can't be imported Loaded plugins: fastestmirror, langpac
禁用加速插件,修改/etc/yum/pluginconf.d/fastestmirror.conf
将enabled = 1//由1改为0,禁用该插件
修改yum.conf
把exclude= 的内容注释掉即可

企业微信定时推送到普通微信,每日提醒工作

前几个月写了一个自动推送的文章,由于作者接口出现问题不能使用,大概是上周四出现问题,一直到现在还不能使用。可能是提醒习惯了,我每天的工作日报有时候就忘记写。想起来调用server酱,虽然免费5条够用,但是折叠起来,看起来很麻烦,通过搜索,找到一篇文章,不需要部署服务,直接通过企业微信接口调用。
接口原作者:https://ley.best/push-msgs-to-wechat-and-dingding/
原作者的文章,用的是图文消息,但推送的字体很小,而且不能超过512字节,发送的消息文字显示不全;文本消息可以达到2048字节,所以改成文本。
自己想要什么都可以更改,看企业微信接口文档:https://work.weixin.qq.com/api/doc/90000/90135/90236
everyday

效果图如下:
wechat

from lxml import etree
from datetime import datetime
import requests, sys, urllib, json


class Worker:
    def __init__(self):
        self.now_time = datetime.today()
        self.headers= {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'}
        self.url = 'http://api.tianapi.com/txapi/jiejiari/index?key=自己的&date='
    def my_job(self):
        url = self.url + str(self.now_time)
        r = requests.get(url=url, headers=self.headers)
        content = eval(r.text)
        # print(content)
        newlist = content['newslist'][0]
        infos = ''
        if int(content['code']) == 200:
            if newlist['info'] == '工作日':
                infos = '\n' + '今天是工作日,要写日报!!!!' + '\n'
                return infos
            elif newlist['info'] == '双休日':
                infos = '今天是双休日,要写下周的工作计划'
                return infos
            elif newlist['tip'] != '':
                infos = '国家法定节假日,可以好好休息,别忘了周工作计划和月工作计划'
                return infos
            else:
                pass
        else:
            infos = '接口错误'
            return infos
        # print(infos)
        return infos
class YouJia:
    def __init__(self):
        self.url = 'http://api.tianapi.com/txapi/oilprice/index?key=自己的&prov=河南'
        self.data = ''
    def money(self):
        r = requests.get(url=self.url).text
        content = eval(r)
        #print(content)
        if content['code'] == 200:
            print(content['newslist'][0]['p92'])
            self.data = '92号油价:' + content['newslist'][0]['p92'] + '  95号油价:' + content['newslist'][0]['p95']
        else:
            self.data = '油价接口错误!!!'
        return self.data

class Weather():
    def __init__(self):
        self.data={
            "location":"城市",
            "key":"自己的"
        }

    def get_url(self):
        # API地址用S6,也可以用V7
        data_list = requests.get('https://free-api.heweather.com/s6/weather/forecast',params=self.data).json()
        daily_forecast = data_list["HeWeather6"][0]["daily_forecast"]
        forecast_list = []
        for i in daily_forecast:
            # print(daily_forecast.index(i))
            forecast = {}
            # 取列表索引
            if daily_forecast.index(i) == 0:
                forecast['时间'] = '今天   '
            elif daily_forecast.index(i) == 1:
                forecast['时间'] = '明天   '
            else:
                forecast['时间'] = '后天   '
            all_forecast = forecast['时间'] + '  白天:'+ i['cond_txt_d'] + '  晚上:' + i['cond_txt_n'] + '  最高温度:'+ i['tmp_max'] + '°C' + '  最低温度:'+ i['tmp_min'] + '°C' + '  风力:' + i['wind_dir'] + i['wind_sc'] + '级' + '!'

            forecast_list.append(all_forecast)
        select_forecast = "".join(forecast_list)
        new_data = select_forecast.replace('!', '\n')
        return new_data

class YiQing:
    def __init__(self):
        self.url = 'http://api.tianapi.com/txapi/ncov/index?key=自己的'
        self.all_data = ''
        self.henan_news = ''
    def request_data(self):
        r = requests.get(self.url).text
        content = eval(r)
        # print(type(content))
        news_list = content["newslist"][0]
        all_news = news_list['news']
        news_num = len(all_news)
        nationwide = all_news[0]['summary']
        # print(nationwide)
        for i in range(news_num):
            if all_news[i]['id'] == 150879:
                self.henan_news = all_news[i]['summary']
                break
            else:
                self.henan_news = '河南—————————暂无疫情'
        self.all_data = nationwide + '\n' + '-'*34 + '\n' + '\n' + self.henan_news
        return self.all_data

class Weixin():
    def __init__(self,myjob_data, youjia_data, tianqi_data, yiqing_data):
        self.corpid = '自己的'
        self.corpsecret = '自己的'
        self.HEADERS = {"Content-Type": "application/json ;charset=utf-8"}
        self.myjob = myjob_data
        self.youjia = youjia_data
        self.tianqi = tianqi_data
        self.yiqing = yiqing_data
    def news(self):
        send_data = self.myjob + '\n' + '-'*34 + '\n' +  self.youjia + '\n' + '-'*34 + self.tianqi + '-'*34 +  self.yiqing
        r = requests.get('https://qyapi.weixin.qq.com/cgi-bin/gettoken?corpid=' + self.corpid + '&corpsecret=' + self.corpsecret).text
        # print(r)
        js = json.loads(r)
        token = js['access_token']
        data = {
            'touser': '@all',
            'msgtype': 'text',
            'agentid': 1000002,
            'text':{
                'content': send_data
                },
            'safe': 0,
            'enable_id_trans': 0,
            'enable_duplicate_check': 0,
            'duplicate_check_interval': 1800

                }
        String_testMsg = json.dumps(data)
        wechaturl = f'https://qyapi.weixin.qq.com/cgi-bin/message/send?access_token={token}'
        # print(wechaturl)
        req = requests.post(wechaturl, data=String_testMsg, headers=self.HEADERS)
        # print(req.text)


def main():
    workers = Worker()
    youjias = YouJia()
    tianqis = Weather()
    yiqings = YiQing()
    wechat = Weixin(workers.my_job(), youjias.money(), tianqis.get_url(), yiqings.request_data())
    wechat.news()
if __name__ == '__main__':
    main()

Python爬取制服买家秀小姐姐

这个网站大概是1个月之前在吾爱破解水漫金山某位大神发出来的,当天我就写了爬虫,今天没事又去爬一下看更新了没,发现是空的,网站内容更改了。
所以刚刚又重新用scrapy写了一个整站爬虫,但还是不发出来,省的把网站给爬死了。
复制出来改成单分类爬虫,剩下的想爬取,自己更改!!!

# from ip_proxy import ips
import requests, os, re, random
from lxml import etree


# ip_add = random.choice(ips())
if not os.path.exists('./zhifu'):
    os.mkdir('./zhifu')

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
for i in range(1,4):
    url = 'https://www.ikmjx.com/index.php?g=portal&m=list&a=index&id=3&p=' + str(i)
    r = requests.get(url=url, headers=headers).text
    tree = etree.HTML(r)
    div_list = tree.xpath('/html/body/main/div/div[2]/div')[1:-1]
    for li in div_list:
        a = 0
        src = 'https://www.ikmjx.com' + li.xpath('./div[2]/a/@href')[0]
        titles = li.xpath('./div[2]/a/@title')[0]
        title = titles.replace('?','')
        req = requests.get(url=src, headers=headers).text
        tree1 = etree.HTML(req)
        div1_list = tree1.xpath('/html/body/main/div/div/div/div[3]/p[2]')
        for p in div1_list:
            src_path = p.xpath('./img/@src')
            # print(src_path)
            for img in src_path:
                a = a+1
                img_data = requests.get(url=img, headers=headers).content
                img_path = './zhifu/' + title + '_' + str(a) + '.jpg'
                with open(img_path, 'wb') as fp:
                    fp.write(img_data)
                    # print(img_data, '下载完成!!!')
                    

python企业微信及时通讯api每日定时推送内容

前几天看了@https://www.52pojie.cn/thread-1338005-1-1.html大神的帖子,想到自己也写一个想看的东西推送一下,所以也用他的借口,真的很方便。

企业微信注册以及接口地址可以参照上面的帖子
自己写了天气+当地的疫情+度娘头条热点,挂到自己的服务器,每天定时发送一些内容。
代码写的很烂,大神略过,可以自己添加一些其他的内容发送

import requests, urllib, sys
from lxml import etree

class TopNews():
    def __init__(self):
        self.url = 'http://top.baidu.com/'
        self.headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0'
            }
    def get_url(self):
        r = requests.get(url=self.url, headers=self.headers)
        r.encoding = 'gb2312'
        tree = etree.HTML(r.text)
        li_list = tree.xpath('//*[@id="hot-list"]/li')
        titles =[]
        for li in li_list:
            num = li_list.index(li) + 1
            title = li.xpath('./a/@title')[0]
            titles.append(str(num) + ':' + title + '!')
        return titles

class Weather():
    def __init__(self):
        self.data={
            "location":"焦作",
            "key":"3f47abf414184e59b5b791b765e38a74"
        }

    def get_url(self):
        # API地址用S6,也可以用V7
        data_list = requests.get('https://free-api.heweather.com/s6/weather/forecast',params=self.data).json()
        daily_forecast = data_list["HeWeather6"][0]["daily_forecast"]
        forecast_list = []
        for i in daily_forecast:
            # print(daily_forecast.index(i))
            forecast = {}
            # 取列表索引
            if daily_forecast.index(i) == 0:
                forecast['时间'] = '今天   '
            elif daily_forecast.index(i) == 1:
                forecast['时间'] = '明天   '
            else:
                forecast['时间'] = '后天   '
            all_forecast = forecast['时间'] + '  白天:'+ i['cond_txt_d'] + '  晚上:' + i['cond_txt_n'] + '  最高温度:'+ i['tmp_max'] + '°C' + '  最低温度:'+ i['tmp_min'] + '°C' + '  风力:' + i['wind_dir'] + i['wind_sc'] + '级' + '!'

            forecast_list.append(all_forecast)
        select_forecast = "".join(forecast_list)
        new_data = select_forecast.replace('!', '\n')
        return new_data


class YiQing:
    # 由urllib2改成requests
    def __init__(self):
        self.host = 'http://ncovdata.market.alicloudapi.com'
        self.path = '/ncov/cityDiseaseInfoWithTrend'
        self.method = 'GET'
        self.appcode = '9f7f40025d254f2593066def83ebdc98'
        self.querys = ''
        self.bodys = {}
        self.url = self.host + self.path

        self.new_appcode = 'APPCODE ' + self.appcode
        self.headers = {
            'Authorization': self.new_appcode
        }
    def get_data(self):
        response = requests.get(url=self.url, headers=self.headers)
        content = response.text
        # with open('./yiqing.html', 'w', encoding='utf-8') as fp:
        #     fp.write(content)
        # 文本转换成字典
        dict_content = eval(content)
        # 从所有数据取出当地省份内容
        new_content = dict_content['provinceArray'][7]
        yiqing_data = '河南最新疫情:' + '\n' + '新增:' + str(new_content['totalDoubtful']) + '\n' + '累计:' + str(new_content['totalConfirmed']) + '\n' + '累计治愈:' + str(new_content['totalCured']) + '\n' + '累计死亡:' + str(new_content['totalDeath'])
        # print(yiqing_dat)
        return yiqing_data

class SendText:
    def __init__(self, newsdata, hewdata, yqdata):
        self.news_data = newsdata
        self.hew_data = hewdata
        self.yq_data = yqdata
        self.corpid = "自己的"
        self.corpsecret = "自己的"
        self.agentid = "自己的"
    def server_j(self):
        text_content = "".join(self.news_data)
        new_content = text_content.replace('!', '\n')
        # print(new_content)
        send_data = str(self.hew_data) + '\n' + self.yq_data + '\n' + '\n' + new_content

        data = {
        "corpid":self.corpid,
        "corpsecret":self.corpsecret,
        "agentid":self.agentid,
        "text":send_data
                }
        req = requests.post('https://api.htm.fun/api/Wechat/text/',data=data)
        # print(req)
        return req
def main():
    news = TopNews()
    heweather = Weather()
    yiqing = YiQing()
    text = SendText(news.get_url(), heweather.get_url(), yiqing.get_data())
    # print(text.server_j())
if __name__ == '__main__':
    main()

scrapy爬虫框架创建及案例

一、安装,由于本地和服务器全是anaconda,所以安装比较简单:

conda install -c conda-forge scrapy

二、
1、创建一个工程:

scrapy startproject jobs

2、进入jobs/spiders目录,创建一个爬虫文件

scrapy genspider hr tencent.com

完成后如下图(电脑是老古董,pycharm太卡,一直用sublime)
scrapy创建工程

3、执行工程

scrapy crawl hr

linux/CentOS 安装firefox火狐浏览器配置无头模式

1、yum seach firefox
选择64位狐火浏览器安装
whereis firefox
查看安装目录

2、下载geckodriver
https://npm.taobao.org/mirrors/geckodriver/v0.29.0/geckodriver-v0.29.0-linux64.tar.gz
解压放到firefox安装目录
复制一份到/usr/bin ,省去指定路径

3、把firefox安装目录加入环境变量PATH
export PATH="$PATH:/usr/lib64/firefox"

4、测试使用

from selenium import webdriver

options = webdriver.FirefoxOptions()
options.add_argument('-headless')
driver = webdriver.Firefox(firefox_options=options)
driver.get('http://www.baidu.com')
title = driver.title
print('title:'+title)
now_url = driver.current_url
print('URL:' + now_url)
driver.close()

linux安装Python3

卸载Python3.6

whereis python3 |xargs rm -frv

安装Python3.9
1、安装依赖包
yum install zlib-devel bzip2-devel openssl-devel ncurses-devel sqlite-devel readline-devel tk-devel gcc make libffi-devel -y

2、下载、解压、进入文件
cd /usr/local
wget https://www.python.org/ftp/python/3.9.2/Python-3.9.2.tgz
tar -zxvf Python-3.9.2.tgz
mv Python-3.9.2
cd python3

3、配置路径、编译安装
./configure --prefix=/usr/local/python3
make && make install

4、添加软连接(原来python/pip默认的是2,现在替换成3)
备份:
mv /usr/bin/python /usr/bin/python_back
mv /usr/bin/pip /usr/bin/pip_back
ln -s /usr/local/python3/bin/python3 /usr/bin/python
ln -s /usr/local/python3/bin/pip3 /usr/bin/pip

5、加入环境变量
export PATH=$PATH:/usr/local/python3/bin

6、修复 yum 下载(因为 yum 依赖于python2)
vi /usr/libexec/urlgrabber-ext-down
vi /usr/bin/yum
修改首行python为python2