First off, javascript in the client is probably not the best language to do this nor the best approach to make this happen. It might work, but it's better to know what is best when choosing an approach to a problem. Also, it will avoid for you clicking ~800 times in the popup accepting the download.
You can get the files in a programatically way by just learning what you browser is doing to get the file and trying to reproduce it in bunch.
After inspecting the calls you can see that it's calling an endpoint and that endpoint is returning a link which contains the file that you can download.
Well, that is going to be easy, so now you just need to make the script in any language to be able to retrieve them.
I've chosen javascript but not client side, but nodejs which means that this has to run from your computer.
You could do the same with bash, python or any other language.
To run this do the following:
- Go to a new empty directory
- Run
npm install axios - Create a file with the code I pasted let's call it
crawler.js - Run
node crawler.js
This has been tested using node v8.15.0
// NOTE: Require this to make a request and save the link as file 20190813:Alevale
const axios = require('axios');
const fs = require('fs');
let now = new Date();
let daysOfYear = [];
const baseUrl = 'https://a4dzytphl9.execute-api.ap-southeast-1.amazonaws.com/prod/eod/'
for (var d = new Date(2016, 0, 1); d <= now; d.setDate(d.getDate() + 1)) {
daysOfYear.push(new Date(d).toISOString().substring(0,10));
}
const waitFor = (time) => {
return new Promise((resolve => setTimeout(resolve, time)))
}
const getUrls = async () =>{
let day
for (day of daysOfYear) {
console.log('getting day', baseUrl + day)
// NOTE: Throttle the calls to not overload the server 20190813:Alevale
await waitFor(4000)
await axios.get(baseUrl + day)
.then(response => {
console.log(response.data);
console.log(response);
if (response.data && response.data.download_url) {
return response.data.download_url
}
return Promise.reject('Could not retrieve response.data.download_url')
})
.then((url) =>{
axios({
method: 'get',
url,
responseType: 'stream'
})
.then(function (response) {
// NOTE: Save the file as 2019-08-13 20190813:Alevale
response.data.pipe(fs.createWriteStream(`${day}.csv`))
})
.catch(console.error)
})
.catch(error => {
console.log(error);
});
}
}
getUrls()
Answer from Alejandro Vales on Stack OverflowFirst off, javascript in the client is probably not the best language to do this nor the best approach to make this happen. It might work, but it's better to know what is best when choosing an approach to a problem. Also, it will avoid for you clicking ~800 times in the popup accepting the download.
You can get the files in a programatically way by just learning what you browser is doing to get the file and trying to reproduce it in bunch.
After inspecting the calls you can see that it's calling an endpoint and that endpoint is returning a link which contains the file that you can download.
Well, that is going to be easy, so now you just need to make the script in any language to be able to retrieve them.
I've chosen javascript but not client side, but nodejs which means that this has to run from your computer.
You could do the same with bash, python or any other language.
To run this do the following:
- Go to a new empty directory
- Run
npm install axios - Create a file with the code I pasted let's call it
crawler.js - Run
node crawler.js
This has been tested using node v8.15.0
// NOTE: Require this to make a request and save the link as file 20190813:Alevale
const axios = require('axios');
const fs = require('fs');
let now = new Date();
let daysOfYear = [];
const baseUrl = 'https://a4dzytphl9.execute-api.ap-southeast-1.amazonaws.com/prod/eod/'
for (var d = new Date(2016, 0, 1); d <= now; d.setDate(d.getDate() + 1)) {
daysOfYear.push(new Date(d).toISOString().substring(0,10));
}
const waitFor = (time) => {
return new Promise((resolve => setTimeout(resolve, time)))
}
const getUrls = async () =>{
let day
for (day of daysOfYear) {
console.log('getting day', baseUrl + day)
// NOTE: Throttle the calls to not overload the server 20190813:Alevale
await waitFor(4000)
await axios.get(baseUrl + day)
.then(response => {
console.log(response.data);
console.log(response);
if (response.data && response.data.download_url) {
return response.data.download_url
}
return Promise.reject('Could not retrieve response.data.download_url')
})
.then((url) =>{
axios({
method: 'get',
url,
responseType: 'stream'
})
.then(function (response) {
// NOTE: Save the file as 2019-08-13 20190813:Alevale
response.data.pipe(fs.createWriteStream(`${day}.csv`))
})
.catch(console.error)
})
.catch(error => {
console.log(error);
});
}
}
getUrls()
You can instead of simulating the user, get the link to download from: https://a4dzytphl9.execute-api.ap-southeast-1.amazonaws.com/prod/eod/2019-08-07 just change the date at the end to the date of the file you want to download. And use axios to get this URL.
This will save you sometime (in case you don't really need to simulate the click of the user etc)
Then you will get a response like this:
{
download_url":"https://d3u9ukmkxau9he.cloudfront.net/eod/2019-08-07.csv?Expires=1566226156&Signature=QRUk3tstuNX5KYVPKJSWrXsSXatkWS-eFBIGUufaTEMJ~rgpVi0iPCe1AXl5pbQVdBQxOctpixCbyNz6b9ycDgYNxEdZqPr2o2pDe8cRL655d3zXdICnEGt~dU6p35iMAJkMpPSH~jbewhRSCPUwWXQBfOiEzlHwxru9lPnDfsdSnk3iI3GyR8Oc0ZP50EdUMHF7MjWSBRbCIwnu6wW4Jh0bPmZkQDQ63ms5QxehsmtuGLOgcrC6Ky1OffVQj~ihhmBt4LGhZTajjK4WO18hCP3urKt03qpC4bOvYvJ3pxvRkae0PH1f-vbTWMDkaWHHVCrzqZhkAh3FlvMTWj8D4g__&Key-Pair-Id=APKAIAXOVAEOGN2AYWNQ"
}
and then you can use axios to GET this url and download your file.
Videos
Download File from URL
There are a couple ways to do this. As mentioned, using the developer tools could work (more likely it will give you the url to the file) and right-clicking the link will work. Alternatively there are these options.
In Chrome
- Go to the URL
- Right-click the webpage
- Select Save As...
For verification purposes, here are png, jpg, and mp3 links. Follow them and try these steps. However, in my experience. If you already have a url to a file, opening up Chrome and following these steps is rather tedious so here is an alternative.
In Command Line
- Open your favorite terminal emulator
- type
curl -O URL
- Where
Ois written in capital - And
URLis the URL to the file, e.g.http://example.com/file.mp3
For Powershell, this example works great:
invoke-webrequest -uri http://files.animatedsuperheroes.com/themes/spiderman94.mp3 -outfile "c:\Spiderman94.mp3"
This was confirmed with Win10 x64 1607.
What you could do - setup a certain parameter for example: https://example.com/?autodownload=1
then add a piece of javascript that either imediately, or after some timeout automatically downloads the file. Something like:
document.addEventListener("DOMContentLoaded", function(event) {
if (window.location.href.indexOf("autodownload") > -1)
setTimeout(function(){
window.open("https://example.com/docs/some_file.xlsx");
}, 3000); //your timeout in miliseconds
}
});
of course this solution will not work, if browser will block opening new windows. Second possibility with this approach is to replace the window.open function with click on the anchor itself.
You can trigger a file download with JavaScript by firing click() on an <a> tag with the download attribute. Something like this:
const $link = document.createElement('a');
$link.href = 'docs/some_file.xlsx';
$link.download = 'some_file.xlsx';
$link.click();
Keep in mind, however, that browsers may try to limit automatic file downloads in order to prevent sites doing dodgy things. I've seen this in particular when a site tries to download multiple files, so I think you'll be fine just downloading a single file.
Generally, the best way around this is to ensure that you only perform this action in response to user input, such as a click, but that doesn't sound like it would work for the solution you're looking for here.
Hey guys, I hope this is the right place to ask.
I have a work project that involves me using our software (intelex) to navigate to a list of reports, download the reports, then rename and edit them for different data storage.
What I am hoping to do, is find an automated tool to download all the files (1200+ .docx files, each a few pages long).
I have to open the report in intellex, then scroll to the bottom section, then open that section, and download the report.
I am hoping there is a tool that can make this download process waayyy faster.
If you guys can help me, I would greatly appreciate it.
Dear User,
Thank you for reaching out Microsoft 365 community. To get the file "download" instead of "open", please modify the download URL like below.
https://.sharepoint.com/sites//_layouts/download.aspx?SourceUrl=
Eg.
https://contoso.sharepoint.com/sites/siteA/_layouts/download.aspx?SourceUrl=https://contoso.sharepoint.com/sites/siteA/Shared%20Documents/sample.xlsx
For the , please locate the file on sharepoint > right click it > Details > scroll to bottom and copy path.
We are committed to ensuring you have a smooth experience with Microsoft 365 products, and we appreciate your patience and understanding as we work through this challenge.
If there is anything further we can assist you with in the meantime, please do not hesitate to reach out.
Best regards,
Sophia
Hello Sophia,
and is there a way to create a link for automatic download from teams from the link that is for all the company users?
Three dots >> Copy Link >> Settings >> only people from the organization
I need to find a way to automatically allow downloading such file without granting access to the Teams.
&download=1 does not work for me (the file opens but doesn't download itself).
Thanks :)
Part of my job is checkikg a government website once per week to ensure I have the latest copy of a pdf document downloaded to a hard drive. Is there a way I can automatically download and replace the file each week?
Evidently I was wrong. pythonanywhere is really easy. (But you must use any paid plan in order to access most outside servers (i.e. get files from any website you want)). As of right now, the cheapest plan they have is $5 a month.
Stackoverflow isn't a code-writing service. But enjoy this little exception.
here's a basic implementation of what you've asked for:
We're gonna use an HTTP server written in Python, using a framework called Flask. That server will provide two functions:
- Serve us that basic form you've described
- allow us to request from the backend the file we want, and it will force our web browser to download it (and not display it, for example)
Step 1
Create a "pythonanywhere" account.
Attention: your website will be hosted on username.pythonanywhere.com, choose wisely.
hit "open web tab"*
Step 2
Select the latest Flask version in "add new web app" (3.10 in this case) and hit Next.
Step 3
verify the website works, click on that link, you should see "Hello from Flask!".
Step 4
Go to the "Files" tab, enter the mysite directory
and create a new directory called "templates"
Step 5
Create a new file called index.html,
and put this html in it:
(css and form taken from w3schools)
<!DOCTYPE html>
<html>
<style>
input[type=text], select {
width: 100%;
padding: 12px 20px;
margin: 8px 0;
display: inline-block;
border: 1px solid #ccc;
border-radius: 4px;
box-sizing: border-box;
}
input[type=submit] {
width: 100%;
background-color: #4CAF50;
color: white;
padding: 14px 20px;
margin: 8px 0;
border: none;
border-radius: 4px;
cursor: pointer;
}
input[type=submit]:hover {
background-color: #45a049;
}
div {
border-radius: 5px;
background-color: #f2f2f2;
padding: 20px;
}
</style>
<body>
<h3>File Proxy</h3>
<div>
<form method="POST" action="/download"> <!-- notice the @app.route("/download"...) in the flask file -->
<label for="fpath">File Path</label>
<input type="text" id="fpath" name="fpath" placeholder="Path...">
<input type="submit" value="Submit">
</form>
</div>
</body>
</html>
Hit save
Step 6
Go back to the "Files" tab, and enter mysite directory again.
edit the flask_app.py file
and replace the code in it with the following:
from flask import Flask, request, render_template, Response
import requests
app = Flask(__name__)
content_type_to_extension={"text/html": ".html"}
FALLBACK_EXTENSION=".bin"
# get the path from the form, GET it ourselves, and return it with a content-type that will force the client's browser to download it and not attempt to display it.
@app.route("/download", methods=["POST"])
def loadData():
data = request.form["fpath"]
r = requests.get(data)
response = Response(
r.content,
status=r.status_code,
content_type="application/stream"
# content_type=r.headers['content-type']
)
# basic naming attempt. if it has a uri (text after the last /), let's (maybe falsly, this is a POC) assume that it has a file extension. (you can add regex tests and whatnot)
try:
fname = data.replace('://','')
fname = fname[fname.rindex("/")+1:]
assert (len(fname) > 1)
except (IndexError, ValueError, AssertionError):
fname = data.replace("/", "-")
ext = content_type_to_extension.get(r.headers['content-type'].split(";")[0], FALLBACK_EXTENSION) # if we can't find ther correct extension, fallback to .bin
fname += ext
response.headers["Content-Disposition"] = f'attachment; filename={fname}'
return response
@app.route("/")
def index():
return render_template("index.html") # that html file in /templates
if __name__ == "__main__":
app.run(host='0.0.0.0')
Hit save
Step 7
Go back to the "web" tab, and hit that green "reload" button. (or the blue reload icon next to the save in the flask_app.py edit panel)
And go to your website.
The requests library requires a schema to be specified.
so don't forget the http(s):// or add it via python.
If you don't have a paid account, checking out the error logs in the "web" tab tells us
OSError('Tunnel connection failed: 403 Forbidden')))
**NO MATCH**
or
Network is unreachable
So you gotta go with a paid plan. Or find some other method to host your web server.
For the record - what we're doing here is creating a "dumbed-down" web gui version of a cli http client, like wget or curl.
you could just run in bash / powershell wget <your-url> and it would be basically the same.
This code can work, if you want to download a website on the same domain.
For example, you can download https://example.com/main.html , if your domain is https://example.com/
However, if your domain is https://example2.org/, it is not working.
The reason why you can not download all pages from the internet is the browser is using CORB for safety reason.
<head>
<script src="https://requirejs.org/docs/release/2.3.5/minified/require.js"></script>
</head>
<button type="button" id="down">Download</button>
<input id="InputURL" placeholder="The URL you want to download." style="width:100%">
<p id="HiddenLable" style="">
The HTML
</p>
<script language="javascript">
document.getElementById('down').onclick = function () {
//var filename = "Download.html";
//var content = "Hello, you download a text file by Javasctipt.";
//veiwweb(document.getElementById('InputURL').value);
//var content = document.getElementById("HiddenLable").innerHTML;
//console.log(content);
//download("DownLoad.html", content);
veiwweb(document.getElementById('InputURL').value);
};
function veiwweb(TheLink){
if ( window.ActiveXObject ) // windows
xmlHttp = new ActiveXObject("Microsoft.XMLHTTP");
else if ( window.XMLHttpRequest ) // other sys
xmlHttp = new XMLHttpRequest();
xmlHttp.onreadystatechange = Callback;
xmlHttp.open("GET", TheLink, false ); // read
xmlHttp.send(null);
}
function Callback()
{
if ( xmlHttp.readyState == 4 ) {
if ( xmlHttp.status == 200 ) {
var value = xmlHttp.responseText;
document.getElementById("HiddenLable").innerHTML = value;
download("DownLoad.html", value);
}
}
}
function download(filename, content) {
//can not use this function to call viewweb() function because the value may not downloaded yet.
var blob = new Blob([content], {type: 'text/plain'});
var url = window.URL.createObjectURL(blob);
var a = document.createElement('a');
a.style = "display: none";
a.href = url;
a.download = filename;
document.body.appendChild(a);
a.click();
setTimeout(function () {
document.body.removeChild(a);
window.URL.revokeObjectURL(url);
}, 5);
}
</script>
If you tried to download a HTML from a different domain, you can see this in the console:
Access to XMLHttpRequest at 'https://******/' from origin 'null' has been blocked by CORS policy: No 'Access-Control-Allow-Origin' header is present on the requested resource.
I'm a baby IT admin and we've got an unusual situation. Basically, we have an awful beast of a database website we can query for reports. I can build a report and create a static URL that, if opened in a browser, will automatically download the report as a file (csv, PDF, etc.). But it's not a file path - on a cursory look I believe it's a php script or something.
Ideally, I'd want to use curl and set up a cron job or something, but I can't get past the first step - how do I grab that file on the terminal? I'm trying to automatically download reports to an Ubuntu server.
I was thinking maybe using Selenium and running a browser headless or something, but I feel like that's overkill. Any help pointing to the right direction would be appreciated.
EDIT: Solution was putting the URL in quotes. Derp. Thank you all for your help!