# Load the data
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn
oecd = pd.read_csv("oecd.csv", thousands=',')
gd_per_capita = pd.read_csv("gdp_per_capita.csv", thousands=',',delimiter='\t', encoding='latin1', na_values="n/a")
#Prepare the data
country_stats = prepare_country_stats(oecd, gdp_per_capita)
x = np.c_[country_stats["GDP per capita"]]
y = np.c[country_stats["Life satisfaction"]]
#Visualise the data
country_stats.plot(kind='scatter', x="GDP per capita", y='Life satisfaction')
plt.show()
# Select a linear model
model = sklearn.linear_model.LinearRegression()
# Train the code
model.fit(x,y)
# Make a prediction for Cyprus
x_new = [[22587]] # Cyprus GDP per capita
print(model.predict(x_new))
Whenever I try to run this code in Python 3.4.4 this throws up:
Traceback (most recent call last):
File "C:\Users\Ranjan.Ranjan-PC\Desktop\Hands-On\gdp.py", line 6, in <module>
import sklearn
File "C:\Python34\lib\site-packages\sklearn\__init__.py", line 134, in <module>
from .base import clone
File "C:\Python34\lib\site-packages\sklearn\base.py", line 10, in <module>
from scipy import sparse
File "C:\Python34\lib\site-packages\scipy\sparse\__init__.py", line 213, in <module>
from .csr import *
File "C:\Python34\lib\site-packages\scipy\sparse\csr.py", line 13, in <module>
from ._sparsetools import csr_tocsc, csr_tobsr, csr_count_blocks, \
ImportError: DLL load failed: %1 is not a valid Win32 application.
sklearn has been installed though
What is wrong?
Related
Problem
I have written a code that takes some historical data as input. Assuming dataset has a timeseries format, I am trying to do a regression and find a predictor.
Code
For my project, I have four files: my_project.py, utilities.py, plotter.py, and constants.py. Here is some small portions (relevant imports) of the two scripts:
my_project.py:
from time import perf_counter
from constants import (output_dir, DATAPATH, output_file)
from utilities import (dataframe_in_nutshell, excel_reader, info_printer, sys, module_creator, process_discovery, data_explanatory_analysis, excel_reader, df_cleaner, feature_extractor, ml_modelling)
from plotter import Plotter
utilities.py
import os
import sys
import inspect
from pathlib import Path
from typing import (Iterable, List, Tuple, Optional)
from itertools import zip_longest
import matplotlib.pyplot as plt
import statsmodels.tsa.api as smt
import statsmodels.api as sm
import pandas as pd
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import scale
from pycaret.regression import (setup, compare_models, predict_model, plot_model, finalize_model, load_model)
import csv
from constants import (np, Path, nan_value, plots_dir, HOURS_PER_WEEK, LAGS_STEP_NUM, rc_params, NA_VALUES, COLUMNS_NAMES, string_columns, LAGS_LABELS, numeric_columns, output_dir, DATAPATH, dtype_dict, train_size)
from pprint import PrettyPrinter
pp = PrettyPrinter()
import seaborn as sns
sns.set()
Error Message
Traceback (most recent call last):
File "c:\Users\username\OneDrive\Desktop\project\my_project.py", line 10, in <module>
from utilities import (dataframe_in_nutshell, excel_reader, info_printer, sys, module_creator,
File "c:\Users\username\OneDrive\Desktop\project\utilities.py", line 18, in <module>
from pycaret.regression import (setup, compare_models, predict_model, plot_model, finalize_model,
File "C:\Users\username\anaconda3\envs\py310\lib\site-packages\pycaret\regression.py", line 10, in <module>
import pycaret.internal.tabular
File "C:\Users\username\anaconda3\envs\py310\lib\site-packages\pycaret\internal\tabular.py", line 48, in <module>
import pycaret.internal.preprocess
File "C:\Users\username\anaconda3\envs\py310\lib\site-packages\pycaret\internal\preprocess.py", line 27, in <module>
from pyod.models.knn import KNN
File "C:\Users\username\anaconda3\envs\py310\lib\site-packages\pyod\__init__.py", line 4, in <module>
from . import utils
File "C:\Users\username\anaconda3\envs\py310\lib\site-packages\pyod\utils\__init__.py", line 4, in <module>
from .stat_models import pairwise_distances_no_broadcast
File "C:\Users\username\anaconda3\envs\py310\lib\site-packages\pyod\utils\stat_models.py", line 11, in <module>
from numba import njit
File "C:\Users\username\anaconda3\envs\py310\lib\site-packages\numba\__init__.py", line 42, in <module>
from numba.np.ufunc import (vectorize, guvectorize, threading_layer,
File "C:\Users\username\anaconda3\envs\py310\lib\site-packages\numba\np\ufunc\__init__.py", line 3, in <module>
from numba.np.ufunc.decorators import Vectorize, GUVectorize, vectorize, guvectorize
File "C:\Users\username\anaconda3\envs\py310\lib\site-packages\numba\np\ufunc\decorators.py", line 3, in <module>
from numba.np.ufunc import _internal
SystemError: initialization of _internal failed without raising an exception
Logistics
I am running my_project.py in visual studio code on a Windows 10 machine.
All packages are based on Python 3.10 using conda-forge channel
Research
The following pages seem to explain a workaround but I am not sure if I am understanding the issue in here. I would appreciate if you can help me figure this out.
Error on import with numpy HEAD
Update ufunc loop signature resolution to use NumPy
Remove reliance on npy_ ufunc loops.
I had this very same issue today.
Solved it by downgrading Numpy to 1.23.1
So: pip install numpy==1.23.1
I'm trying to load and use some pre-trained fasttext embeddings (that were trained by me and stored in .kv). In the same directory I have stored the "vectors_1920_fullsample.kv.vectors_vocab.npy" file. When Does someone know what is going on?
This doesn't give any error:
import matplotlib
matplotlib.use('Agg')
import numpy as np
from scipy.spatial.distance import cosine
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer("english")
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import os
import joblib
from gensim.models import Word2Vec
import random
from gensim.models import KeyedVectors
import pandas as pd
model = KeyedVectors.load(wd_model + '/vectors_1920_fullsample.kv', mmap='r')
words = ['immigrant','immigrants','migrant','migrants','foreign','foreigner','foreigners','alien','aliens','expatriate','expatriates','emigrant','emigrants','nonnative','nonnatives','stranger','strangers']
But then when I do this I get the error below:
words = pd.DataFrame([np.array(model[word]) for word in words])
Error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<stdin>", line 1, in <listcomp>
File "/cluster/apps/nss/gcc-6.3.0/python/3.7.4/x86_64/lib64/python3.7/site-packages/gensim/models/keyedvectors.py", line 353, in __getitem__
return self.get_vector(entities)
File "/cluster/apps/nss/gcc-6.3.0/python/3.7.4/x86_64/lib64/python3.7/site-packages/gensim/models/keyedvectors.py", line 471, in get_vector
return self.word_vec(word)
File "/cluster/apps/nss/gcc-6.3.0/python/3.7.4/x86_64/lib64/python3.7/site-packages/gensim/models/keyedvectors.py", line 2124, in word_vec
if word in self.vocab:
AttributeError: 'FastTextKeyedVectors' object has no attribute 'vocab'
I am trying to get data from yahoo of stocks of a company through the code.
But i am getting an ImportError at pandas_datareader.data where is says
ImportError: cannot import name 'StringIO'
Please help
I am new to this...and already spent 4 hrs but could not resolve.
I have even tried
import io
from io import StringIO
still getting the same error..!!
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import pandas_datareader.data as web
style.use('ggplot')
start = dt.datetime(2018,1,1)
end = dt.datetime(2018,12,31)
df = web.datareader('TSLA','yahoo',start,end)
print(df.head(5))
Error:-
Traceback (most recent call last):
File "C:\Users\JAILANCHAL\Desktop\tut.py", line 5, in <module>
import pandas_datareader.data as web
File "C:\Users\JAILANCHAL\AppData\Local\Programs\Python\Python35\lib\site-packages\pandas_datareader\__init__.py", line 2, in <module>
from .data import (DataReader, Options, get_components_yahoo,
File "C:\Users\JAILANCHAL\AppData\Local\Programs\Python\Python35\lib\site-packages\pandas_datareader\data.py", line 7, in <module>
from pandas_datareader.av.forex import AVForexReader
File "C:\Users\JAILANCHAL\AppData\Local\Programs\Python\Python35\lib\site-packages\pandas_datareader\av\__init__.py", line 3, in <module>
from pandas_datareader.base import _BaseReader
File "C:\Users\JAILANCHAL\AppData\Local\Programs\Python\Python35\lib\site-packages\pandas_datareader\base.py", line 11, in <module>
from pandas.compat import StringIO, bytes_to_str
ImportError: cannot import name 'StringIO'
I'm facing the below error when I try to run :
from sklearn.neural_network import MLPClassifier
Error :
from sklearn.neural_network import MLPClassifier
Traceback (most recent call last):
File "<ipython-input-77-6113b65dfa44>", line 1, in <module>
from sklearn.neural_network import MLPClassifier
File "C:\Users\anagha\Anaconda3\lib\site-packages\sklearn\neural_network\__init__.py", line 10, in <module>
from .multilayer_perceptron import MLPClassifier
File "C:\Users\anagha\Anaconda3\lib\site-packages\sklearn\neural_network\multilayer_perceptron.py", line 18, in <module>
from ..model_selection import train_test_split
File "C:\Users\anagha\Anaconda3\lib\site-packages\sklearn\model_selection\__init__.py", line 23, in <module>
from ._search import GridSearchCV
File "C:\Users\anagha\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py", line 32, in <module>
from ..utils.fixes import rankdata
**ImportError: cannot import name 'rankdata'**
If you already have a working installation of numpy and scipy:
pip install -U scikit-learn
otherwise:
conda install scikit-learn
finally check for updates:
conda update pip
>>> import matplotlib.pyplot as plt
Traceback (most recent call last):
File "<pyshell#2>", line 1, in <module>
import matplotlib.pyplot as plt
File "C:\Python33\lib\site-packages\matplotlib\pyplot.py", line 29, in <module>
from matplotlib.figure import Figure, figaspect
File "C:\Python33\lib\site-packages\matplotlib\figure.py", line 36, in <module>
from matplotlib.axes import Axes, SubplotBase, subplot_class_factory
File "C:\Python33\lib\site-packages\matplotlib\axes.py", line 22, in <module>
import matplotlib.dates as _ # <-registers a date unit converter
File "C:\Python33\lib\site-packages\matplotlib\dates.py", line 119, in <module>
from dateutil.rrule import (rrule, MO, TU, WE, TH, FR, SA, SU, YEARLY,
File "C:\Python33\lib\site-packages\dateutil\rrule.py", line 18, in <module>
from six import advance_iterator, integer_types
ImportError: No module named 'six'
>>>
I tried to import matplotlib.pyplot in python 3.3 in windows 7, got above error.
You're missing a dependency, namely 'six'.
You can get it through pip:
pip install six
or from here: http://www.lfd.uci.edu/~gohlke/pythonlibs/#six