# # Configuration file for crawl. # Do not use quotes. # [General] # Directives that determine which URL is being followed Url-Include=http://.*\.citi\.umich\.edu Url-Exclude=([^a-z]ads\.|\.(ico|eps|ps|gz|c|h|tar|exe|doc|pdf|ppt|txt|diff)$) # Directives that determine which images are downloaded and where # they will be saved. Img-Include=\.(jpg|jpeg) Img-Exclude=thumbs\. Img-Directory=. # The maximum depth of the crawl, -1 means unlimited, be very # careful with this. #Max-Depth=-1 # An external filter that takes URL on stdin, it must return either # 'n' or 'y' on stdout. #External-Filter=./external #Minimum and maximum length of media, unless overwritten below. Min-Length=20000 Max-Length=400000 [HTTP] #Agent=Crawl/0.2 #Use-Robots=1 Connections=20 State-File=crawl.state [image/jpeg] Min-Length=20000 Max-Length=200000 [audio/mpeg3] Min-Length=1000000 Max-Length=30000000 [video/mpeg] Min-Length=2000000 Max-Length=600000000