Uncompressing a zip file using zlib

Uncompressing a zip file using zlib - zip

Studying the zip file format and zlib, I wrote a simple program that displays the contents of a zip file named test.zip and tries to use zlib to unzip the test.txt file in the archive:
#include <iostream>
#include <cstdio>
#include <string>
#include <vector>
#include <zlib.h>
using namespace std;
#pragma pack(1)
struct LocalFileHeader {
uint32_t signature;
uint16_t version;
uint16_t bit_flag;
uint16_t compression;
uint16_t last_mod_time;
uint16_t last_mod_date;
uint32_t crc32;
uint32_t compressed_size;
uint32_t uncompressed_size;
uint16_t filename_length;
uint16_t extra_length;
};
int readfile(FILE* f)
{
LocalFileHeader lfh= {0};
while(true)
{
if(!fread(&lfh,sizeof(LocalFileHeader),1,f)) return 1;
if(lfh.signature!=0x04034b50) return 0;
string filename(lfh.filename_length,0);
if(!fread(filename.data(),1,lfh.filename_length,f)) return 1;
cout<<filename<<" "<<lfh.compression<<" "<<lfh.compressed_size
<<" "<<lfh.uncompressed_size<<endl;
if(lfh.extra_length>0) fseek(f,lfh.extra_length,SEEK_CUR);
if(filename=="test.txt"s)
{
cout<<endl<<"---------------test.txt------------------"<<endl;
vector<unsigned char> srcbuf(lfh.compressed_size,0);
vector<unsigned char> dstbuf(lfh.uncompressed_size+1,0);
unsigned long dstlen= lfh.uncompressed_size;
if(!fread(srcbuf.data(),1,lfh.compressed_size,f)) return 1;
int res= uncompress(dstbuf.data(),&dstlen,srcbuf.data(), lfh.compressed_size);
if(res !=Z_OK)
{
if(res==Z_DATA_ERROR) cout<<"Z_DATA_ERROR"<<endl;
if(res==Z_BUF_ERROR) cout<<"Z_BUF_ERROR"<<endl;
if(res==Z_MEM_ERROR) cout<<"Z_MEM_ERROR"<<endl;
return 1;
}
cout<<dstbuf.data();
cout<<endl<<"--------------------------------------"<<endl;
}
else if(fseek(f,lfh.compressed_size,SEEK_CUR)) return 1;
}
return 0;
}
int main()
{
FILE* f= fopen("test.zip","rb");
if(!f)
{
cout << "Error opening file" << endl;
return 1;
}
if(readfile(f)) cout<<"Bad file"<<endl;
fclose(f);
return 0;
}
My program successfully outputs the contents of a zip file, but when it tries to unpack test.txt, I get Z_DATA_ERROR in the uncompress function. Obviously, I missed something, but I do not understand what exactly.

uncompress() is expecting a zlib stream, but you are giving it the raw deflate data of a zip entry. You need to use zlib's inflateInit2(), inflate(), and inflateEnd() functions to decompress raw deflate data.

Related

ICU4C austrdup function

I'm trying to run the code demo for ICU4C bellow, and getting
warning: implicit declaration of function 'austrdup'
which subsequently generate an error. I understand that this is due to the missing imported library that contains 'austrdup' function, and have been looking at the source code to guess which one it is, but no luck. Does anyone have any idea which one should be imported?
#include <unicode/umsg.h>
#include <unicode/ustring.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char const *argv[])
{
UChar* str;
UErrorCode status = U_ZERO_ERROR;
UChar *result = NULL;
UChar pattern[100];
int32_t resultlength, resultLengthOut, i;
double testArgs[] = { 100.0, 1.0, 0.0};
str=(UChar*)malloc(sizeof(UChar) * 10);
u_uastrcpy(str, "MyDisk");
u_uastrcpy(pattern, "The disk {1} contains {0,choice,0#no files|1#one file|1<{0,number,integer} files}");
for(i=0; i<3; i++){
resultlength=0;
resultLengthOut=u_formatMessage( "en_US", pattern, u_strlen(pattern), NULL, resultlength, &status, testArgs[i], str);
if(status==U_BUFFER_OVERFLOW_ERROR){ //check if output truncated
status=U_ZERO_ERROR;
resultlength=resultLengthOut+1;
result=(UChar*)malloc(sizeof(UChar) * resultlength);
u_formatMessage( "en_US", pattern, u_strlen(pattern), result, resultlength, &status, testArgs[i], str);
}
printf("%s\n", austrdup(result) ); //austrdup( a function used to convert UChar* to char*)
free(result);
}
return 0;
}

austrdup is not an official ICU method. It's only used by tests in ICU and defined in icu4c/source/test/cintltst/cintltst.h and implemented in icu4c/source/test/cintltst/cintltst.c. It is bascially just a wrapper around u_austrcpy.

Zero copy in using vmsplice/splice in Linux

I am trying to get zero copy semantics working in linux using
vmsplice()/splice() but I don't see any performance improvement. This
is on linux 3.10, tried on 3.0.0 and 2.6.32. The following code tries
to do file writes, I have tried network socket writes() also, couldn't
see any improvement.
Can somebody tell what am I doing wrong ?
Has anyone gotten improvement using vmsplice()/splice() in production ?
#include <assert.h>
#include <fcntl.h>
#include <iostream>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <unistd.h>
#include <vector>
const char *filename = "Test-File";
const int block_size = 4 * 1024;
const int file_size = 4 * 1024 * 1024;
using namespace std;
int pipes[2];
vector<char *> file_data;
static int NowUsecs() {
struct timeval tv;
const int err = gettimeofday(&tv, NULL);
assert(err >= 0);
return tv.tv_sec * 1000000LL + tv.tv_usec;
}
void CreateData() {
for (int xx = 0; xx < file_size / block_size; ++xx) {
// The data buffer to fill.
char *data = NULL;
assert(posix_memalign(reinterpret_cast<void **>(&data), 4096, block_size) == 0);
file_data.emplace_back(data);
}
}
int SpliceWrite(int fd, char *buf, int buf_len) {
int len = buf_len;
struct iovec iov;
iov.iov_base = buf;
iov.iov_len = len;
while (len) {
int ret = vmsplice(pipes[1], &iov, 1, SPLICE_F_GIFT);
assert(ret >= 0);
if (!ret)
break;
len -= ret;
if (len) {
auto ptr = static_cast<char *>(iov.iov_base);
ptr += ret;
iov.iov_base = ptr;
iov.iov_len -= ret;
}
}
len = buf_len;
while (len) {
int ret = splice(pipes[0], NULL, fd, NULL, len, SPLICE_F_MOVE);
assert(ret >= 0);
if (!ret)
break;
len -= ret;
}
return 1;
}
int WriteToFile(const char *filename, bool use_splice) {
// Open and write to the file.
mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
int fd = open(filename, O_CREAT | O_RDWR, mode);
assert(fd >= 0);
const int start = NowUsecs();
for (int xx = 0; xx < file_size / block_size; ++xx) {
if (use_splice) {
SpliceWrite(fd, file_data[xx], block_size);
} else {
assert(write(fd, file_data[xx], block_size) == block_size);
}
}
const int time = NowUsecs() - start;
// Close file.
assert(close(fd) == 0);
return time;
}
void ValidateData() {
// Open and read from file.
const int fd = open(filename, O_RDWR);
assert(fd >= 0);
char *read_buf = (char *)malloc(block_size);
for (int xx = 0; xx < file_size / block_size; ++xx) {
assert(read(fd, read_buf, block_size) == block_size);
assert(memcmp(read_buf, file_data[xx], block_size) == 0);
}
// Close file.
assert(close(fd) == 0);
assert(unlink(filename) == 0);
}
int main(int argc, char **argv) {
auto res = pipe(pipes);
assert(res == 0);
CreateData();
const int without_splice = WriteToFile(filename, false /* use splice */);
ValidateData();
const int with_splice = WriteToFile(filename, true /* use splice */);
ValidateData();
cout << "TIME WITH SPLICE: " << with_splice << endl;
cout << "TIME WITHOUT SPLICE: " << without_splice << endl;
return 0;
}

I did a proof-of-concept some years ago where I got as 4x speedup using an optimized, specially tailored, vmsplice() code. This was measured against a generic socket/write() based solution. This blog post from natsys-lab echoes my findings. But I believe you need to have the exact right use case to get near this number.
So what are you doing wrong? Primarily I think you are measuring the wrong thing. When writing directly to a file you have 1 system call, which is write(). And you are not actually copying data (except to the kernel). When you have a buffer with data that you want to write to disk, it's not gonna get faster than that.
In you vmsplice/splice setup you are still copying you data into the kernel, but you have a total of 2 system calls vmsplice()+splice() to get it to disk. The speed being identical to write() is probably just a testament to Linux system call speed :-)
A more "fair" setup would be to write one program that read() from stdin and write() the same data to stdout. Write an identical program that simply splice() stdin into a file (or point stdout to a file when you run it). Although this setup might be too simple to really show anything.
Aside: an (undocumented?) feature of vmsplice() is that you can also use to to read data from a pipe. I used this in my old POC. It was basically just an IPC layer based on the idea of passing memory pages around using vmsplice().
Note: NowUsecs() probably overflows the int

Create a rapidjson::Value from a JSON string

I want to create a rapidjson::Value from a JSON string, e.g., [1,2,3]. Note: this is not a complete JSON object, it's just a JSON array. In Java I can use objectMapper.readTree("[1,2,3]")to create a JsonNode from a String.
My complete C++ code is as the following:
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#include <iostream>
// just for debug
static void print_json_value(const rapidjson::Value &value) {
rapidjson::StringBuffer buffer;
rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
value.Accept(writer);
std::cout << buffer.GetString() << std::endl;
}
//TODO: this function probably has a problem
static rapidjson::Value str_to_json(const char* json) {
rapidjson::Document document;
document.Parse(json);
return std::move(document.Move());
}
int main(int argc, char* argv[]) {
const char* json_text = "[1,2,3]";
// copy the code of str_to_json() here
rapidjson::Document document;
document.Parse(json_text);
print_json_value(document); // works
const rapidjson::Value json_value = str_to_json(json_text);
assert(json_value.IsArray());
print_json_value(json_value); // Assertion failed here
return 0;
}
Could anyone find out the problem in my function str_to_json() ?
PS: The code above works in GCC 5.1.0 but not in Visual Studio Community 2015.
UPDATE:
According to the suggestion of #Milo Yip, the correct code is as the following:
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#include <iostream>
static void print_json_value(const rapidjson::Value &value) {
rapidjson::StringBuffer buffer;
rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
value.Accept(writer);
std::cout << buffer.GetString() << std::endl;
}
static rapidjson::Document str_to_json(const char* json) {
rapidjson::Document document;
document.Parse(json);
return std::move(document);
}
int main(int argc, char* argv[]) {
const char* json_text = "[1,2,3]";
// copy the code of str_to_json() here
rapidjson::Document document;
document.Parse(json_text);
print_json_value(document); // works
const rapidjson::Document json_value = str_to_json(json_text);
assert(json_value.IsArray());
print_json_value(json_value); // Now works
return 0;
}

Simple answer: the return type should be rapidjson::Document instead of rapidjson::Value.
Longer version: A Document contains an allocator to store all the values during parsing. When returning the Value (actually the root of the tree), the local Document object will be destructed and the buffers in the allocator will be released. It is like std::string s = ...; return s.c_str(); inside a function.

Trouble with garbage chars in visual c++ file reading

I am trying to read a text file using the following code:
void function readfile(char *inputfile) {
istream is;
int filesize = 0;
is.open(inputfile);
if (!is.is_open()) {
return;
}
is.seekg(0, ios::end);
filesize = (int)is.tellg();
is.seekg(0, ios::beg);
char *buf = new char[filesize];
is.read(buf, filesize);
is.close();
cout << buf << endl;
delete[] buf;
return;
}
While in g++ (mac / macports) it works correctly (getting all contents into a dynamic allocated char* array), in Visual Studio C++ 2010, I get constant errors of this type: Debug assertion failed: (unsigned)(c+1) <= 256, file isctype.c.
The problem is that it opens the file but can't find a termination delimeter so when it reaches the eof it starts reading somewhere else (garbage characters). Using the cout << buf; I can see that the file is being read correctly in mac but in visual c++ it types more garbage chars. What is the problem here?

Make your buffer one larger and add the terminating nul yourself.

Let C++ standard library do the work for you:
void readfile(const char *inputfile) {
std::ifstream is(inputfile);
std::string buf(std::istreambuf_iterator<char>(is), {});
std::cout << buf << std::endl;
}
See, it's now also
exception safe
handles embedded NUL characters correctly
Note, of course you can use vector instead of string if you prefer (just change that one word)
Full demo: see it live on Coliru
#include <fstream>
#include <iostream>
#include <iterator>
void readfile(const char *inputfile) {
std::ifstream is(inputfile);
std::string buf(std::istreambuf_iterator<char>(is), {});
std::cout << buf << std::endl;
}
int main()
{
readfile("main.cpp");
}
Update For C++11 challenged compilers (and showing how to use a vector):
Also Live on Coliru
#include <fstream>
#include <iostream>
#include <iterator>
#include <vector>
void readfile(const char *inputfile) {
std::ifstream is(inputfile);
std::istreambuf_iterator<char> f(is), l;
std::vector<char> buf(f, l);
std::cout.write(buf.data(), buf.size());
}
int main()
{
readfile("main.cpp");
}

using c executables / methods in an android ndk project

I'm trying to use canutils in an android ndk-project.
the package canutils usually compiles to executable files, but i didnt find a way yet to inlude these executables in an ndk-project.
so what im doing at the moment is just loading the shared libraries like this:
static{
System.loadLibrary("cansend");
}
public native void cansend();
that for I've changes the android-mk to build shared libraries instead.
still my c-code looks like this cansend.c as an example :
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <net/if.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <linux/can.h>
#include <linux/can/raw.h>
#include "lib.h"
int main(int argc, char **argv)
{
int s; /* can raw socket */
int required_mtu;
int mtu;
int enable_canfd = 1;
struct sockaddr_can addr;
struct canfd_frame frame;
struct ifreq ifr;
/* check command line options */
if (argc != 3) {
fprintf(stderr, "Usage: %s <device> <can_frame>.\n", argv[0]);
return 1;
}
/* parse CAN frame */
required_mtu = parse_canframe(argv[2], &frame);
if (!required_mtu){
fprintf(stderr, "\nWrong CAN-frame format! Try:\n\n");
fprintf(stderr, " <can_id>#{R|data} for CAN 2.0 frames\n");
fprintf(stderr, " <can_id>##<flags>{data} for CAN FD frames\n\n");
fprintf(stderr, "<can_id> can have 3 (SFF) or 8 (EFF) hex chars\n");
fprintf(stderr, "{data} has 0..8 (0..64 CAN FD) ASCII hex-values (optionally");
fprintf(stderr, " seperated by '.')\n");
fprintf(stderr, "<flags> a single ASCII Hex value (0 .. F) which defines");
fprintf(stderr, " canfd_frame.flags\n\n");
fprintf(stderr, "e.g. 5A1#11.2233.44556677.88 / 123#DEADBEEF / 5AA# / ");
fprintf(stderr, "123##1 / 213##311\n 1F334455#1122334455667788 / 123#R ");
fprintf(stderr, "for remote transmission request.\n\n");
return 1;
}
/* open socket */
if ((s = socket(PF_CAN, SOCK_RAW, CAN_RAW)) < 0) {
perror("socket");
return 1;
}
addr.can_family = AF_CAN;
strcpy(ifr.ifr_name, argv[1]);
if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
perror("SIOCGIFINDEX");
return 1;
}
addr.can_ifindex = ifr.ifr_ifindex;
if (required_mtu > CAN_MTU) {
/* check if the frame fits into the CAN netdevice */
if (ioctl(s, SIOCGIFMTU, &ifr) < 0) {
perror("SIOCGIFMTU");
return 1;
}
mtu = ifr.ifr_mtu;
if (mtu != CANFD_MTU) {
printf("CAN interface ist not CAN FD capable - sorry.\n");
return 1;
}
/* interface is ok - try to switch the socket into CAN FD mode */
if (setsockopt(s, SOL_CAN_RAW, CAN_RAW_FD_FRAMES,
&enable_canfd, sizeof(enable_canfd))){
printf("error when enabling CAN FD support\n");
return 1;
}
/* ensure discrete CAN FD length values 0..8, 12, 16, 20, 24, 32, 64 */
frame.len = can_dlc2len(can_len2dlc(frame.len));
}
/* disable default receive filter on this RAW socket */
/* This is obsolete as we do not read from the socket at all, but for */
/* this reason we can remove the receive list in the Kernel to save a */
/* little (really a very little!) CPU usage. */
setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, NULL, 0);
if (bind(s, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
perror("bind");
return 1;
}
/* send frame */
if (write(s, &frame, required_mtu) != required_mtu) {
perror("write");
return 1;
}
close(s);
return 0;
}
I want to be able to use this cansend method in my android-ndk-project.
Do I need to adjust the c-code and make a shared library out of the code or do i need to use the executable and call and include it in my project in a certain way to be able to use it?

Rename your main function or put another wrapper around it.
If you want your native function to be named as cansend(), your wrapper function should be something like this:
#ifdef __cplusplus
extern "C" {
#endif
JNIEXPORT
void
Java_com_aaa_bbb_ccc_cansend( JNIEnv* env, jobject thiz);
#ifdef __cplusplus
}
#endif
Here, com_aaa_bbb_ccc comes from your package name of your java code which contains public native void cansend();.
For example, if your package name is com.example.test, your function name will be:
Java_com_example_test_cansend();

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Uncompressing a zip file using zlib - zip

uncompress() is expecting a zlib stream, but you are giving it the raw deflate data of a zip entry. You need to use zlib's inflateInit2(), inflate(), and inflateEnd() functions to decompress raw deflate data.

Related

ICU4C austrdup function

Zero copy in using vmsplice/splice in Linux

Create a rapidjson::Value from a JSON string

Trouble with garbage chars in visual c++ file reading

using c executables / methods in an android ndk project

Categories

Resources