I am trying to open JSON files located in a directory other than the current working directory (cwd). My setting: Python3.5 on Windows (using Anaconda).
from pathlib import *
import json
path = Path("C:/foo/bar")
filelist = []
for f in path.iterdir():
filelist.append(f)
for file in filelist:
with open(file.name) as data_file:
data = json.load(data_file)
In this setting I have these values:
file >> C:\foo\bar\0001.json
file.name >> 0001.json
However, I get the following error message:
---> 13 with open(file.name) as data_file:
14 data = json.load(data_file)
FileNotFoundError: [Errno 2] No such file or directory: '0001.json'
Here is what I tried so far:
Use .joinpath() to add the directory to the file name in the open command:
with open(path.joinpath(file.name)) as data_file:
data = json.load(data_file)
TypeError: invalid file: WindowsPath:('C:/foo/bar/0001.json')
Used .resolve() as that works for me to load CSV files into Pandas. Did not work here.
for file in filelist:
j = Path(path, file.name).resolve()
with open(j) as data_file:
data = json.load(data_file)
Since I'm on Windows write path as (and yes, the file is in that directory):
path = Path("C:\\foo\\bar") #resulted in the same FileNotFoundError above.
Instantiate path like this:
path = WindowsPath("C:/foo/bar")
#Same TypeError as above for both '\\' and '/'
The accepted answer has a lot of redundants - re-collected generator and mixed with statement with pathlib.Path.
pathlib.Path is awesome solution to handle paths especially if we want to create scripts which may work with Linux and Windows.
# modules
from pathlib import Path
import json
# static values
JSON_SUFFIXES = [".json", ".js", ".other_suffix"]
folder_path = Path("C:/users/user/documents")
for file_path in folder_path.iterdir():
if file_path.suffix in JSON_SUFFIXES:
data = json.loads(file_path.read_bytes())
Just adding modification for new users. pathlib.Path works with Python3.
Complete solution; thanks #eryksun:
from pathlib import *
import json
path = Path("C:/foo/bar")
filelist = []
for f in path.iterdir():
filelist.append(f)
for file in filelist:
with open(str(file) as data_file:
data = json.load(data_file)
This line works as well:
with file.open() as data_file:
Related
My code reads a bunch of json files from a directory and extract "frequency" and "attenuation" data from those files and write to a csv file. Now I want to save that csv file in a different directory. The code executes without any error but saves in the current directory. Can anyone help to resolve this issue?
import csv
import glob
import json
import os
site = 'alpha'
frequency_to_check = '196050.000'
json_dir_name = 'V:/temp/test/'
json_pattern = os.path.join(json_dir_name, '*.json')
total_files = glob.glob(json_pattern)
atten = []
timestamp = []
save_path = 'V:/python/result/'
if not os.path.isdir(save_path):
os.makedirs(save_path)
filename = f'{site}-{frequency_to_check}.csv'
with open(filename, 'w', newline='') as csv_file:
for file in total_files:
with open(file) as json_file:
output_json = json.load(json_file)
for key in output_json:
if key['start-freq'] == frequency_to_check:
csv.writer(csv_file).writerow([key['start-freq'], key['attenuation']])
save_file = os.path.join(save_path, filename)
csv_file.close()
print(f'Total files processed {len(total_files)}')
The issue as far as I can deduce is here :
csv.writer(csv_file).writerow([key['start-freq'], key['attenuation']])
csv_file is your object that is loaded into memory , and everytime this line is executed you are just writing the rows in the already open file. After that you are just creating a new path :
save_file = os.path.join(save_path, filename)
which is never really used as you close the file too.
To fix this I would suggest that you put save_path as csv file :
import csv
import glob
import json
import os
site = 'alpha'
frequency_to_check = '196050.000'
json_dir_name = 'V:/temp/test/'
json_pattern = os.path.join(json_dir_name, '*.json')
total_files = glob.glob(json_pattern)
atten = []
timestamp = []
save_path = 'V:/python/result/'
if not os.path.isdir(save_path):
os.makedirs(save_path)
filename = f'{site}-{frequency_to_check}.csv'
save_file = os.path.join(save_path, filename)
with open(save_file, 'w', newline='') as csv_file:
for file in total_files:
with open(file) as json_file:
output_json = json.load(json_file)
for key in output_json:
if key['start-freq'] == frequency_to_check:
csv.writer(csv_file).writerow([key['start-freq'], key['attenuation']])
csv_file.close()
print(f'Total files processed {len(total_files)}')
I guess this should work.
I am trying to read multiple json files from dbfs in databricks.
raw_df = spark.read.json('/mnt/testdatabricks/metrics-raw/',recursiveFileLookup=True)
This returns data for only 35 files whereas there are around 1600 files.
I tried to read some of the files (except those 35) using pandas and it returned data.
However the driver fails when I try to read all 1600 files using pandas.
import pandas as pd
from glob import glob
jsonFiles = glob('/dbfs/mnt/testdatabricks/metrics-raw/***/*.json')
dfList = []
for jsonFile in jsonFiles:
df = pd.read_json(jsonFile)
dfList.append(df)
print("written :", jsonFile )
dfTrainingDF = pd.concat(dfList, axis=0)
Not sure why spark is not able to read all the files.
Try:
spark.read.option("recursiveFileLookup", "true").json("file:///dir1/subdirectory")
Ref: How to make Spark session read all the files recursively?
I've been trying to play around with the Carla self-driving car environment but I run into "AttributeError: module 'carla' has no attribute 'Client'" when I try running the code from this tutorial: https://pythonprogramming.net/control-camera-sensor-self-driving-autonomous-cars-carla-python/.
I have made a few changes to the code, including changing the .egg file to it's exact file path within my computer.
this is my code...
'''
import glob
import os
import sys
try:
sys.path.append(glob.glob('C:\Downloads\CARLA_0.9.9.4\WindowsNoEditor\PythonAPI\carla\dist\carla-0.9.9-py3.7-win-amd64.egg'))
except IndexError:
pass
import carla
actor_list = []
#try:
client = carla.Client("localhost", 2000)
client.set_timeout(2.0)
world = client.get_world()
blueprint_library = world.get_blueprint_library()
#finally:
for actor in actor_list:
actor.destroy()
print("All cleaned up!")
'''
Just for refrence I'm running on a windows 10 that has anaconda3 and python 3.7.7 and I'm using carla version 0.9.9.4. Thanks in advance!
Just correct your folder path. Would need to rename path in file structure like this...
Remove all "." from the path.
path = glob.glob('C:\Downloads\CARLA_0994\WindowsNoEditor\PythonAPI\carla\dist\carla-099-py37-win-amd64.egg')[0]
sys.path.append(path)
Full Example:
import glob
import os
import sys
try:
path = glob.glob('C:\Downloads\CARLA_0994\WindowsNoEditor\PythonAPI\carla\dist\carla-099-py37-win-amd64.egg')[0]
sys.path.append(path)
except IndexError:
pass
import carla
actor_list = []
try:
client = carla.Client("localhost", 2000)
client.set_timeout(5.0)
world = client.get_world()
blueprint_library = world.get_blueprint_library()
print("Map = ", world.get_map())
finally:
for actor in actor_list:
actor.destroy()
print("All cleaned up!")
I have a following folder structure:
Directory
- Subdirectory 1:
file.json
- Subdirectory 2:
file.json
- Subdirectory 3:
file.json
- Subdirectory 4:
file.json
How do I read these JSON files using Pandas?
Try this code:
import pandas as pd
from pathlib import Path
files = Path("Directory").glob("**/*.json")
for file in files:
df = pd.read_json(file)
To learn more about converting JSON string to Pandas object:
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_json.html
You could do the following:
import glob, os
working_directory = os.getcwd()
sub_directories = [active_directory + "/" + x for x in os.listdir(working_directory) if os.path.isdir(active_directory + "/"+x)]
all_json_files = []
for sub_dir in sub_directories:
os.chdir(sub_dir)
for file in glob.glob("*.json"):
all_json_files.append(sub_dir + "/" + file)
#Get back to original working directory
os.chdir(working_directory)
list_of_dfs = [pd.read_json(x) for x in all_json_files]
From there, if all json files have the same structure, you could concatenate them to get one single dataframe:
final_df = pd.concat(list_of_dfs)
I would like to import a csv file into python with FileChooser and display it as dataframe. Here is the code and it didn't work. Thanks for your kind help.
def get_open_filename(self):
filename = None
chooser = gtk.FileChooserDialog("Open File...", self.window,
gtk.FILE_CHOOSER_ACTION_OPEN,
(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
gtk.STOCK_OPEN, gtk.RESPONSE_OK))
response = chooser.run()
if response == gtk.RESPONSE_OK:
with open(chooser.get_filename(), 'rb') as csvfile:
don = DataFrame.from_csvfile(csvfile) ## I am confused here !!!
print don
chooser.destroy()
return filename
I believe from_csv file takes a filename not a file, using these docs
Try replacing
with open(chooser.get_filename(), 'rb') as csvfile:
don = DataFrame.from_csvfile(csvfile) ## I am confused here !!!
print don
with
don = DataFrame.from_csvfile(chooser.get_filename())
print don