1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
   | import re import time from lxml import etree import pymongo import requests from bs4 import BeautifulSoup
 
  uri = "mongodb://user:pass@127.0.0.1:27017/demo" arr = [] now = time.strftime("%Y-%m-%d", time.localtime()) print('time.localtime=>', now)
  client = pymongo.MongoClient(uri) db = client.demo collection = db.weather_cn_data headers = {     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36',     'Cookie': '' }
 
  def detail(detail_url, option):     url = detail_url     res = requests.get(url, headers=headers)     paqu_shengfen_table(res)
 
  def paqu_shengfen_table(res):     soup = BeautifulSoup(res.content, 'html.parser')     print('soup', soup)     table_nums = soup.select('.lQCity > ul > li').__len__()     for i in range(table_nums):                  tr_all = soup.select('.conMidtab2 > table')[i]
          tr = tr_all.find_all('tr')         shengfen = (tr[2].get_text().split())[0]         beautiful(res, tr, shengfen)
 
 
 
  def beautiful(res, tr, shengfen):
      td = tr[2].find_all('td')       city = td[1].get_text().strip()     tianqixianxiang_1 = td[2].get_text().strip()     fengxiangfengli_1 = td[3].get_text().strip()     zuigaoqiwen = td[4].get_text().strip()     tianqixianxiang_2 = td[5].get_text().strip()     fengxiangfengli_2 = td[6].get_text().strip()     zuidiqiwen = td[7].get_text().strip()
      db_data = {         'time': now,         'province': shengfen,         'city': city,         'daytime_weather_conditions': tianqixianxiang_1,         'daytime_wind': fengxiangfengli_1,         'maximum_temperature': zuigaoqiwen,         'nighttime_weather_conditions': tianqixianxiang_2,         'nighttime__wind': fengxiangfengli_2,         'minimum_temperature': zuidiqiwen     }     print(res)     arr.append(db_data)          for j in tr[3:]:           td = j.find_all('td')  
          city = td[0].get_text().strip()         tianqixianxiang_1 = td[1].get_text().strip()         fengxiangfengli_1 = td[2].get_text().strip()         zuigaoqiwen = td[3].get_text().strip()         tianqixianxiang_2 = td[4].get_text().strip()         fengxiangfengli_2 = td[5].get_text().strip()         zuidiqiwen = td[6].get_text().strip()
          db_data = {             'time': now,             'province': shengfen,             'city': city,             'daytime_weather_conditions': tianqixianxiang_1,             'daytime_wind': fengxiangfengli_1,             'maximum_temperature': zuigaoqiwen,             'nighttime_weather_conditions': tianqixianxiang_2,             'nighttime__wind': fengxiangfengli_2,             'minimum_temperature': zuidiqiwen         }         print(res)         arr.append(db_data)
 
  def get_datail_url(url, option):     detail(url, option)
 
  def start(option):
      urls = [         'https://www.weather.com.cn/textFC/hb.shtml',         'https://www.weather.com.cn/textFC/db.shtml',         'https://www.weather.com.cn/textFC/hd.shtml',         'https://www.weather.com.cn/textFC/hz.shtml',         'https://www.weather.com.cn/textFC/hn.shtml',         'https://www.weather.com.cn/textFC/xb.shtml',         'https://www.weather.com.cn/textFC/xn.shtml',         'https://www.weather.com.cn/textFC/gat.shtml'     ]          for i in range(0, 7):         get_datail_url(urls[i], option)
 
  def job():     start(1)     print('arr', arr)     print('db result', collection.insert_many(arr))
  if __name__ == '__main__':                         job()
 
   |