Scraping with Excel VBA - excel

I want to scrape data from some pages. I have a problem with loop -> each red-border rectangle I want to fill-in with "i" parameter, which denotes number of pages. Could someone tell me how to do it?
Sub czwarta()
Dim i As Integer
For i = 6 To i = 100
ActiveWorkbook.Queries.Add Name:="Table 0 (6)", Formula:= _
"let" & Chr(13) & "" & Chr(10) & " Źródło = Web.Page(Web.Contents(""https://wcn.pl/archive?page=6""))," & Chr(13) & "" & Chr(10) & " Data0 = Źródło{0}[Data]," & Chr(13) & "" & Chr(10) & " #""Zmieniono typ"" = Table.TransformColumnTypes(Data0,{{""Zdjęcie/Numer"", type text}, {""Opis"", type text}, {""Stan"", type text}, {""Cena"", Currency.Type}, {""Data"", type date}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & " #""Zmieniono typ"""
With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""Table 0 (6)"";Extended Properties=""""" _
, Destination:=Range("$A$131")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [Table 0 (6)]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.PreserveColumnInfo = True
.ListObject.DisplayName = "Table_0__6"
.Refresh BackgroundQuery:=False
End With
ActiveWindow.SmallScroll Down:=30
Range("A157").Select
Next i
End Sub

Related

Automate Excel Power Query

Hello I am trying to see how I can change this macro and make it so that it will run Power Query below, but for all files in the same folder the excel is in. In this example #3 is the file name that would need to change each time it loops to a new file.
Sub Get_Data()
ExecuteExcel4Macro _
"(""#3"",""let" & Chr(10) & " Source = Excel.Workbook(File.Contents(""/Users/tmayfield/Library/CloudStorage/OneDrive-Personal/Glaeser Park Territories/New Locations/#3.xlsx""), null, true)," & Chr(10) & " Navigation = Source{[Item = ""#3"", Kind = ""Sheet""]}[Data]," & Chr(10) & " #""Promoted headers"" = Table.PromoteHeaders(Navigation, [PromoteAllScalars = true])," & Chr(10) & " #""Changed column type"" = Table.Trans" & _
"formColumnTypes(#""Promoted headers"", {{""#"", type text}, {""Street "", type text}, {""Number"", Int64.Type}, {""Status"", type any}, {""Date"", type any}})" & Chr(10) & "in" & Chr(10) & " #""Changed column type"""")" & _
""
ActiveWorkbook.Worksheets.Add
With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=#3;Extended Properties=""""" _
, Destination:=Range("$A$1")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [#3]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.RefreshPeriod = False
.PreserveColumnInfo = False
.ListObject.DisplayName = "Table_ExternalData_2"
.Refresh BackgroundQuery:=False
End With
End Sub

Change query with the same name

I just started learning VBA and having some trouble making a macro to import from a folder with the same name.
I wanted to add "_current" or "_future" to the end of the folder name as its query name. Then have the data imported to specified columns in a specified workbook (let's say columns B-F in "worksheet 2").
I'm also not sure how to get the temporary ~$ files to not show in the query.
Any help would be appreciated!
ub Macro3()
'
' Macro3 Macro
'
'
Application.CutCopyMode = False
Selection.Copy
ActiveWorkbook.Queries.Add Name:="Training 1", Formula:= _
"let" & Chr(13) & "" & Chr(10) & " Source = Folder.Files(""C:\Users\N14067\Documents\Training\VBA\Training 1"")," & Chr(13) & "" & Chr(10) & " #""Split Column by Delimiter"" = Table.SplitColumn(Source, ""Name"", Splitter.SplitTextByDelimiter("" "", QuoteStyle.Csv), {""Name.1"", ""Name.2"", ""Name.3""})," & Chr(13) & "" & Chr(10) & " #""Changed Type"" = Table.TransformColumnTypes(#""Split Column by Delimiter"",{{""Name.1"", type text}, {""Na" & _
"me.2"", type text}, {""Name.3"", type text}})," & Chr(13) & "" & Chr(10) & " #""Reordered Columns"" = Table.ReorderColumns(#""Changed Type"",{""Content"", ""Name.2"", ""Name.1"", ""Name.3"", ""Extension"", ""Date accessed"", ""Date modified"", ""Date created"", ""Attributes"", ""Folder Path""})," & Chr(13) & "" & Chr(10) & " #""Removed Columns"" = Table.RemoveColumns(#""Reordered Columns"",{""Content"", ""Name.2""," & _
" ""Name.1"", ""Extension"", ""Date accessed"", ""Date modified"", ""Date created"", ""Attributes"", ""Folder Path""})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & " #""Removed Columns"""
ActiveWorkbook.Worksheets.Add
With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""Training 1"";Extended Properties=""""" _
, Destination:=Range("$A$1")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [Training 1]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.PreserveColumnInfo = True
.ListObject.DisplayName = "Training_1"
.Refresh BackgroundQuery:=False
End With
End Sub
The value passed to Formula is just a String, so you can concatenate in a suffix for the folder.
FYI Chr(13) & "" & Chr(10) can be replaced with vbCrLf
Try something like this:
Sub Macro3()
Dim suffix As String, wb As Workbook, ws As Worksheet
Set wb = ActiveWorkbook 'always good to create a specific workbook reference
suffix = "_current" 'for example
wb.Queries.Add Name:="Training 1", Formula:= _
"let" & vbLf & _
" Source = Folder.Files(""C:\Users\N14067\Documents\Training\VBA\Training 1" & suffix & """)," & vbCrLf & _
" #""Split Column by Delimiter"" = Table.SplitColumn(Source, ""Name"", " & _
"Splitter.SplitTextByDelimiter("" "", QuoteStyle.Csv), {""Name.1"", ""Name.2"", ""Name.3""})," & vbCrLf & _
" #""Changed Type"" = Table.TransformColumnTypes(#""Split Column by Delimiter""," & _
"{{""Name.1"", type text}, {""Name.2"", type text}, {""Name.3"", type text}})," & vbCrLf & _
" #""Reordered Columns"" = Table.ReorderColumns(#""Changed Type"",{""Content"", ""Name.2""," & _
" ""Name.1"", ""Name.3"", ""Extension"", ""Date accessed"", ""Date modified""," & _
" ""Date created"", ""Attributes"", ""Folder Path""})," & vbCrLf & _
" #""Removed Columns"" = Table.RemoveColumns(#""Reordered Columns"",{""Content"", ""Name.2""," & _
" ""Name.1"", ""Extension"", ""Date accessed"", ""Date modified"", ""Date created""," & _
" ""Attributes"", ""Folder Path""})" & vbCrLf & _
"in" & vbCrLf & _
" #""Removed Columns"""
Set ws = wb.Worksheets("worksheet2") 'get a reference to the destination worksheet
With ws.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;" & _
"Data Source=$Workbook$;Location=""Training 1"";Extended Properties=""""", _
Destination:=ws.Range("$B$1")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [Training 1]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.PreserveColumnInfo = True
.ListObject.DisplayName = "Training_1"
.Refresh BackgroundQuery:=False
End With
End Sub

Importing data from CSV file error: A query with the name ... already exists

I am trying to import data from a .csv file and then get the sum of last column.
The CSV file contents are:
Name,Age,City,Salary
Rick,25,Dallas,1800
Nick,28,Austin,2500
Jack,30,NYC,3500
Rose,26,Dallas,2400
The macro throws the following error.
The code looks like this.
Sub EmpMacro1()
'
' EmpMacro1 Macro
'
'
ActiveWorkbook.Queries.Add Name:="Emp_Datta", Formula:= _
"let" & Chr(13) & "" & Chr(10) & " Source = Csv.Document(File.Contents(""C:\Users\Irfan.Shaikh\Desktop\Emp_Datta.csv""),[Delimiter="","", Columns=4, Encoding=1252, QuoteStyle=QuoteStyle.None])," & Chr(13) & "" & Chr(10) & " #""Promoted Headers"" = Table.PromoteHeaders(Source, [PromoteAllScalars=true])," & Chr(13) & "" & Chr(10) & " #""Changed Type"" = Table.TransformColumnTypes(#""Promoted Headers"",{{""Name"", type text}, {""Age"", Int64.T" & _
"ype}, {""City"", type text}, {""Salary"", Int64.Type}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & " #""Changed Type"""
ActiveWorkbook.Worksheets.Add
With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=Emp_Datta;Extended Properties=""""" _
, Destination:=Range("$A$1")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [Emp_Datta]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.PreserveColumnInfo = True
.ListObject.DisplayName = "Emp_Datta"
.Refresh BackgroundQuery:=False
End With
Range("C7").Select
ActiveCell.FormulaR1C1 = "Total"
Range("D7").Select
ActiveCell.FormulaR1C1 = "=SUM(Emp_Datta[Salary])"
Range("D8").Select
End Sub
I have two questions.
When I stop recording the macro and delete the imported data. There is an alert to confirm deleting the query. What is the impact of Yes and No?
I looked into the web for the error but did not find a solution. Is it related to me deleting the query when I delete the imported data?
If you are deleting the query by deleting the Range (and answering "Yes" to the question), it seems you are only changing it into a connection-only query and not really deleting it. You need to actually delete the query, either with VBA code or in the Queries and Connections window, to really delete it.
Another problem with your query is that you will be creating multiple ListObjects with the same DisplayName. This will also cause a runtime error.
However, if the tables are on separate worksheets, as is the case with your query, they can have the same Name and Excel will adjust the DisplayName by appending a _n where n is a number, so as to prevent duplicate naming.
(You still cannot have tables with the same Name on the same worksheet).
So I would try:
Const sName As String = "Emp_Datta"
On Error GoTo delQuery
ActiveWorkbook.Queries.Add Name:=sName, Formula:= _
"let" & Chr(13) & "" & Chr(10) & " Source = Csv.Document(File.Contents(""C:\Users\Irfan.Shaikh\Desktop\Emp_Datta.csv""),[Delimiter="","", Columns=4, Encoding=1252, QuoteStyle=QuoteStyle.None])," & Chr(13) & "" & Chr(10) & " #""Promoted Headers"" = Table.PromoteHeaders(Source, [PromoteAllScalars=true])," & Chr(13) & "" & Chr(10) & " #""Changed Type"" = Table.TransformColumnTypes(#""Promoted Headers"",{{""Name"", type text}, {""Age"", Int64.T" & _
"ype}, {""City"", type text}, {""Salary"", Int64.Type}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & " #""Changed Type"""
On Error GoTo 0
ActiveWorkbook.Worksheets.Add
With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=Emp_Datta;Extended Properties=""""" _
, Destination:=Range("$A$1")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [Emp_Datta]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.PreserveColumnInfo = True
'---------------------
.ListObject.Name = sName
'---------------------
.Refresh BackgroundQuery:=False
End With
Range("C7").Select
ActiveCell.FormulaR1C1 = "Total"
Range("D7").Select
ActiveCell.FormulaR1C1 = "=SUM(Emp_Datta[Salary])"
Range("D8").Select
Exit Sub
delQuery:
Dim v
For Each v In ActiveWorkbook.Queries
If v.Name = sName Then _
v.Delete
Resume
Next v
MsgBox "Error No: " & Err.Number & vbLf & Err.Description
Stop
End Sub
And, unless there is some reason to use ActiveWorkbook, I'd suggest changing those references to ThisWorkbook.
Also note that if you Refresh the query, you will overwrite the data table on the activesheet; whereas if you execute your macro, you will be creating a new table on a new worksheet.

Copying Excel data from one file to the other and reformatting

I am trying to copy data from one excel to the other and then reformat.
This is the code I am using:
ActiveWorkbook.Queries.Add Name:="Export", Formula:= _
"let" & Chr(13) & "" & Chr(10) & " Source = Csv.Document(File.Contents(""C:\Users\Khawaja\Desktop\Export.csv""),[Delimiter="","", Columns=9, Encoding=65001, QuoteStyle=QuoteStyle.None])," & Chr(13) & "" & Chr(10) & " #""Promoted Headers"" = Table.PromoteHeaders(Source, [PromoteAllScalars=true])," & Chr(13) & "" & Chr(10) & " #""Changed Type"" = Table.TransformColumnTypes(#""Promoted Headers"",{{""Name"", type text}, {""Surname"", type" & _
" text}, {""Email"", type text}, {""Action"", type text}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & " #""Changed Type"""
Sheets.Add After:=ActiveSheet
With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=Export;Extended Properties=""""" _
, Destination:=Range("$A$1")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [Export]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.PreserveColumnInfo = False
.ListObject.DisplayName = "Export"
.Refresh BackgroundQuery:=False
My source file has data in columns. Each column has a heading of Name, Surname, Email and Action. But when I run the macro, it is not able to detect the column heads.
This is the error I get:
The column 'Name' of the table was not found
Any idea how the error can be removed?
The argument you are looking for in Listobjects.add is XlListObjectHasHeaders. To that argument, you must type XlListObjectHasHeaders:=xlYes. You can also type xlGuess and Excel will guess if the tables has headers. In your case you have string data in both the header and data, therefore the default xlGuess is likely failing for that reason.
ActiveWorkbook.Queries.Add Name:="Export", Formula:= _
"let" & Chr(13) & "" & Chr(10) & " Source = Csv.Document(File.Contents(""C:\Users\Khawaja\Desktop\Export.csv""),[Delimiter="","", Columns=9, Encoding=65001, QuoteStyle=QuoteStyle.None])," & Chr(13) & "" & Chr(10) & " #""Promoted Headers"" = Table.PromoteHeaders(Source, [PromoteAllScalars=true])," & Chr(13) & "" & Chr(10) & " #""Changed Type"" = Table.TransformColumnTypes(#""Promoted Headers"",{{""Name"", type text}, {""Surname"", type" & _
" text}, {""Email"", type text}, {""Action"", type text}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & " #""Changed Type"""
Sheets.Add After:=ActiveSheet
With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=Export;Extended Properties=""""" _
, XlListObjectHasHeaders:=xlYes, Destination:=Range("$A$1")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [Export]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.PreserveColumnInfo = False
.ListObject.DisplayName = "Export"
.Refresh BackgroundQuery:=False

Adding Parameters to connect and scrape data from a Dynamic URL

If i try to add parameters by splicing the string of the URL with my variables it does not connect to the URL. To simplify the problem in my code i am hard coding the variable values but normally I would be pulling this from a named range.
I have tried power queries Advanced "Get Data from Web" feature but cant seem to add the parameters
Sub OpenWebStockDataTest()
'
' OpenWebStockDataTest Macro
'
'
Dim sticker As String
Dim exchange As String
sticker = "TGIF"
exchange = "CN"
ActiveWorkbook.Queries.Add Name:="Table 2", Formula:= _
"let" & Chr(13) & "" & Chr(10) & " Source = Web.Page(Web.Contents(""https://finance.yahoo.com/quote/" & sticker & "." & exchange & "/history?p=" & sticker & "." & exchange & """))," & Chr(13) & "" & Chr(10) & " Data2 = Source{2}[Data]," & Chr(13) & "" & Chr(10) & " #""Changed Type"" = Table.TransformColumnTypes(Data2,{{""Date"", type date}, {""Open"", type number}, {""High"", type number}, {""Low"", type number}, {""Close*"", type number}, {""Adj Close**"", type number}, {""Volume"", Int64" & _
".Type}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & " #""Changed Type"""
ActiveWorkbook.Worksheets.Add
With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""Table 2"";Extended Properties=""""" _
, Destination:=Range("$A$1")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [Table 2]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.PreserveColumnInfo = True
.ListObject.DisplayName = "Table_2"
.Refresh BackgroundQuery:=False
End With
End Sub
The above code should connect to:
https://finance.yahoo.com/quote/TGIF.CN/history?p=TGIF.CN
Please someone Help!!!
You are getting lost in your quotes.
" Source = Web.Page(Web.Contents(""https://finance.yahoo.com/quote/"" & sticker & ""."" & exchange &""/history?p="" &sticker &"".""&exchange)),"
should be
" Source = Web.Page(Web.Contents(""https://finance.yahoo.com/quote/" & sticker & "." & exchange & "/history?p=" & sticker & "." & exchange & """)),"
Edit:
Sub OpenWebStockDataTest()
'
' OpenWebStockDataTest Macro
'
'
Dim sticker As String
Dim exchange As String
sticker = "TGIF"
exchange = "CN"
ActiveWorkbook.Queries.Add Name:="Table 2", Formula:= _
"let" & Chr(13) & "" & Chr(10) & " Source = Web.Page(Web.Contents(""https://finance.yahoo.com/quote/" & sticker & "." & exchange & "/history?p=" & sticker & "." & exchange & """))," & Chr(13) & "" & Chr(10) & " Data2 = Source{2}[Data]," & Chr(13) & "" & Chr(10) & " #""Changed Type"" = Table.TransformColumnTypes(Data2,{{""Date"", type date}, {""Open"", type number}, {""High"", type number}, {""Low"", type number}, {""Close*"", type number}, {""Adj Close**"", type number}, {""Volume"", Int64" & _
".Type}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & " #""Changed Type"""
ActiveWorkbook.Worksheets.Add
With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""Table 2"";Extended Properties=""""" _
, Destination:=Range("$A$1")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [Table 2]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.PreserveColumnInfo = True
.ListObject.DisplayName = "Table_2"
.Refresh BackgroundQuery:=False
End With
End Sub

Resources