(13.02.2019, 20:21)atze2000 schrieb:Code:#!/usr/bin/env python
# -*- coding: utf-8 -*-
import urllib
import re
import time
import os
import os.path
import string
sleeptime1 = 10 #20
sleeptime2 = 2
StartStunde = 8
EndStunde = 22
symbollist = ["https://www.ls-tc.de/de/aktien/deutschland/dax"]
def check_data(source,a,b):
fobj = open("Kursdaten/"+source+".csv", "r")
i = 0
buf = []
for line in fobj:
i = i +1
buf.append(line)
fobj.close()
data = buf[i-1]
daten_feld = data.split(";")
if a == daten_feld[1] or b == daten_feld[2]:
return False
else:
return True
def check_double_data(bid,ask):
buf = []
fobj = open("Kursdaten/"+source+".csv", "r")
for line in fobj:
buf.append[line]
def WriteData(wert,Datum,ti,bi,aa,vo):
if check_data(wert,ti,bi) == True: # and check_double_data() == True: Muss gemacht werden
if os.path.exists("Kursdaten/"+wert+".csv"):
fobj = open("Kursdaten/"+wert+".csv","a")
fobj.write(Datum+";"+ti+";"+bi+";"+aa+";"+vo+"\n")
fobj.close()
#print stri[1],";",Datum,";",ti,";",bi,";",aa,";",vo
else:
fobj = open("Report/"+"Error"+".csv","a")
fobj.write(wert+";"+Datum+";"+ti+";"+bi+";"+aa+";"+vo+"\n")
fobj.close()
def format_filename(s):
"""Take a string and return a valid filename constructed from the string.
Uses a whitelist approach: any characters not present in valid_chars are
removed. Also spaces are replaced with underscores.
Note: this method may produce invalid filenames such as ``, `.` or `..`
When I use this method I prepend a date string like '2009_01_15_19_46_32_'
and append a file extension like '.txt', so I avoid the potential of using
an invalid filename.
"""
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
filename = ''.join(c for c in s if c in valid_chars)
filename = filename.replace(' ','_') # I don't like spaces in filenames.
filename = filename.replace('.','_') # Und ich keine Punkte.
return filename.lower()
def scrap():
i = 0
idx = 0
while i <> 1:
Stunde = int(time.strftime("%H"))
Minute = int(time.strftime("%M"))
time.sleep(0.01)
os.system("clear")
print "Ausserhalb der Handelszeiten!!"
if Stunde >= StartStunde and Stunde <= EndStunde:
os.system("clear")
print "++++++++++++++++++++"
time.sleep(sleeptime1)
os.system("clear")
print "####################"
#=== URL Lesen ===
for i in range(0,len(symbollist),1):
os.system("clear")
print "++++++++++++++++++++"
#time.sleep(sleeptime1)
os.system("clear")
print "####################"
time.sleep(sleeptime2)
htmlfile = urllib.urlopen(symbollist[i])
htmltext = htmlfile.read()
#=== HTML Daten Tags ===
Name = '<a href="/de/aktie/(.+?)</a>'
Geld = 'bidWithCurrencySymbol" decimals="4">(.+?) €</span>'
Brief = 'askWithCurrencySymbol" decimals="4">(.+?) €</span>'
Zeit = 'midTime" decimals="4">(.+?)</span>'
Volumen = 'tradeCumulativeTurnoverWithCurrencySymbol" decimals="4">(.+?) €</span>'
#=== Daten Suchen ===
patternname = re.compile(Name)
patternbid = re.compile(Geld)
patternask = re.compile(Brief)
patterntime = re.compile(Zeit)
patternvol = re.compile(Volumen)
#=== Daten Extrahieren ===
Bezeichnung = re.findall(patternname,htmltext)
BID = re.findall(patternbid,htmltext)
ASK = re.findall(patternask,htmltext)
TIME = re.findall(patterntime,htmltext)
VOL = re.findall(patternvol,htmltext)
Datum = str(time.strftime("%d.%m.%Y"))
for i in range(0,len(Bezeichnung),1):
na = Bezeichnung[i]
print na
bi = BID[i]
aa = ASK[i]
ti = TIME[i]
vo = VOL[i]
stri = na.split(">")
wert = stri[1]
korrekterfilename = format_filename(wert)
if idx == 0:
fobj = open("Kursdaten/"+korrekterfilename+".csv","a")
fobj.write(Datum+";"+ti+";"+bi+";"+aa+";"+vo+"\n")
fobj.close()
#print stri[1],";",Datum,";",ti,";",bi,";",aa,";",vo
elif idx == 1:
WriteData(korrekterfilename,Datum,ti,bi,aa,vo)
#wert = ""
idx = 1 # schaltet nach dem ersten schreiben checkdata frei
scrap()
Hab den Code vor vielen Jahren geschrieben, müsst mal überarbeitet werden, aber läuft noch.
danke für diese Info, ich werd dieses script mal ausprobieren - wird aber ein paar Tage dauern -
